From a43477ffb43ad75dd009770fe9c1af2d6eed3ab5 Mon Sep 17 00:00:00 2001 From: Philippe Canal <pcanal@fnal.gov> Date: Thu, 4 Feb 2021 17:31:37 -0600 Subject: [PATCH] Add TTree::InPlaceClone. This speeds up the first step of merging (hadd) --- tree/tree/inc/TChain.h | 1 + tree/tree/inc/TTree.h | 1 + tree/tree/inc/TTreeCloner.h | 8 ++++ tree/tree/src/TChain.cxx | 8 ++++ tree/tree/src/TTree.cxx | 63 +++++++++++++++++++++++------ tree/tree/src/TTreeCloner.cxx | 75 ++++++++++++++++++++++++++++++----- 6 files changed, 132 insertions(+), 24 deletions(-) diff --git a/tree/tree/inc/TChain.h b/tree/tree/inc/TChain.h index b17963e1503..5b9fbb40a4b 100644 --- a/tree/tree/inc/TChain.h +++ b/tree/tree/inc/TChain.h @@ -118,6 +118,7 @@ public: Long64_t *GetTreeOffset() const { return fTreeOffset; } Int_t GetTreeOffsetLen() const { return fTreeOffsetLen; } virtual Double_t GetWeight() const; + virtual Bool_t InPlaceClone(TDirectory *newdirectory, const char *options = ""); virtual Int_t LoadBaskets(Long64_t maxmemory); virtual Long64_t LoadTree(Long64_t entry); void Lookup(Bool_t force = kFALSE); diff --git a/tree/tree/inc/TTree.h b/tree/tree/inc/TTree.h index cca8e1de223..dc9a014fb51 100644 --- a/tree/tree/inc/TTree.h +++ b/tree/tree/inc/TTree.h @@ -539,6 +539,7 @@ public: virtual Long64_t GetZipBytes() const { return fZipBytes; } virtual void IncrementTotalBuffers(Int_t nbytes) { fTotalBuffers += nbytes; } Bool_t IsFolder() const { return kTRUE; } + virtual Bool_t InPlaceClone(TDirectory *newdirectory, const char *options = ""); virtual Int_t LoadBaskets(Long64_t maxmemory = 2000000000); virtual Long64_t LoadTree(Long64_t entry); virtual Long64_t LoadTreeFriend(Long64_t entry, TTree* T); diff --git a/tree/tree/inc/TTreeCloner.h b/tree/tree/inc/TTreeCloner.h index 650be01a655..3151ec109b8 100644 --- a/tree/tree/inc/TTreeCloner.h +++ b/tree/tree/inc/TTreeCloner.h @@ -24,7 +24,9 @@ class TBranch; class TTree; +class TFile; class TFileCacheRead; +class TDirectory; class TTreeCloner { TString fWarningMsg; ///< Text of the error message lead to an 'invalid' state @@ -34,6 +36,8 @@ class TTreeCloner { UInt_t fOptions; TTree *fFromTree; TTree *fToTree; + TDirectory*fToDirectory; + TFile *fToFile; Option_t *fMethod; TObjArray fFromBranches; TObjArray fToBranches; @@ -88,6 +92,8 @@ private: TTreeCloner(const TTreeCloner&) = delete; TTreeCloner &operator=(const TTreeCloner&) = delete; + TTreeCloner(TTree *from, TTree *to, TDirectory *newdirectory, Option_t *method, UInt_t options = kNone); + public: enum EClonerOptions { kNone = 0, @@ -97,6 +103,7 @@ public: }; TTreeCloner(TTree *from, TTree *to, Option_t *method, UInt_t options = kNone); + TTreeCloner(TTree *from, TDirectory *newdirectory, Option_t *method, UInt_t options = kNone); virtual ~TTreeCloner(); void CloseOutWriteBaskets(); @@ -108,6 +115,7 @@ public: void CopyStreamerInfos(); void CopyProcessIds(); const char *GetWarning() const { return fWarningMsg; } + Bool_t IsInPlace() const { return fFromTree == fToTree; } Bool_t Exec(); Bool_t IsValid() { return fIsValid; } Bool_t NeedConversion() { return fNeedConversion; } diff --git a/tree/tree/src/TChain.cxx b/tree/tree/src/TChain.cxx index 08f26f61527..bf67166bcbb 100644 --- a/tree/tree/src/TChain.cxx +++ b/tree/tree/src/TChain.cxx @@ -1221,6 +1221,14 @@ Double_t TChain::GetWeight() const } } +//////////////////////////////////////////////////////////////////////////////// +/// Move content to a new file. (NOT IMPLEMENTED for TChain) +Bool_t TChain::InPlaceClone(TDirectory * /* new directory */, const char * /* options */) +{ + Error("InPlaceClone", "not implemented"); + return false; +} + //////////////////////////////////////////////////////////////////////////////// /// Set the TTree to be reloaded as soon as possible. In particular this /// is needed when adding a Friend. diff --git a/tree/tree/src/TTree.cxx b/tree/tree/src/TTree.cxx index 8d41333b7f5..b3f190502b2 100644 --- a/tree/tree/src/TTree.cxx +++ b/tree/tree/src/TTree.cxx @@ -6816,20 +6816,29 @@ Long64_t TTree::Merge(TCollection* li, TFileMergeInfo *info) { const char *options = info ? info->fOptions.Data() : ""; if (info && info->fIsFirst && info->fOutputDirectory && info->fOutputDirectory->GetFile() != GetCurrentFile()) { - TDirectory::TContext ctxt(info->fOutputDirectory); - TIOFeatures saved_features = fIOFeatures; - TTree *newtree = CloneTree(-1, options); - if (info->fIOFeatures) - fIOFeatures = *(info->fIOFeatures); - else - fIOFeatures = saved_features; - if (newtree) { - newtree->Write(); - delete newtree; + if (GetCurrentFile() == nullptr) { + // In memory TTree, all we need to do is ... write it. + SetDirectory(info->fOutputDirectory); + FlushBasketsImpl(); + fDirectory->WriteTObject(this); + } else if (info->fOptions.Contains("fast")) { + InPlaceClone(info->fOutputDirectory); + } else { + TDirectory::TContext ctxt(info->fOutputDirectory); + TIOFeatures saved_features = fIOFeatures; + TTree *newtree = CloneTree(-1, options); + if (info->fIOFeatures) + fIOFeatures = *(info->fIOFeatures); + else + fIOFeatures = saved_features; + if (newtree) { + newtree->Write(); + delete newtree; + } + // Make sure things are really written out to disk before attempting any reading. + info->fOutputDirectory->GetFile()->Flush(); + info->fOutputDirectory->ReadTObject(this,this->GetName()); } - // Make sure things are really written out to disk before attempting any reading. - info->fOutputDirectory->GetFile()->Flush(); - info->fOutputDirectory->ReadTObject(this,this->GetName()); } if (!li) return 0; Long64_t storeAutoSave = fAutoSave; @@ -6885,6 +6894,34 @@ void TTree::MoveReadCache(TFile *src, TDirectory *dir) } } +//////////////////////////////////////////////////////////////////////////////// +/// Copy the content to a new new file, update this TTree with the new +/// location information and attach this TTree to the new directory. +/// +/// options: Indicates a basket sorting method, see TTreeCloner::TTreeCloner for +/// details +/// +/// If new and old directory are in the same file, the data is untouched, +/// this "just" does a call to SetDirectory. +/// Equivalent to an "in place" cloning of the TTree. +Bool_t TTree::InPlaceClone(TDirectory *newdirectory, const char *options) +{ + if (!newdirectory) { + LoadBaskets(2*fTotBytes); + SetDirectory(nullptr); + return true; + } + if (newdirectory->GetFile() == GetCurrentFile()) { + SetDirectory(newdirectory); + return true; + } + TTreeCloner cloner(this, newdirectory, options); + if (cloner.IsValid()) + return cloner.Exec(); + else + return false; +} + //////////////////////////////////////////////////////////////////////////////// /// Function called when loading a new class library. diff --git a/tree/tree/src/TTreeCloner.cxx b/tree/tree/src/TTreeCloner.cxx index 98244e822eb..98618b8ad7c 100644 --- a/tree/tree/src/TTreeCloner.cxx +++ b/tree/tree/src/TTreeCloner.cxx @@ -102,12 +102,37 @@ Bool_t TTreeCloner::CompareEntry::operator()(UInt_t i1, UInt_t i2) /// sequentially. TTreeCloner::TTreeCloner(TTree *from, TTree *to, Option_t *method, UInt_t options) : + TTreeCloner(from, to, to ? to->GetDirectory() : nullptr, method, options) +{ + +} + +//////////////////////////////////////////////////////////////////////////////// +/// Constructor. In place cloning. +//// This object would transfer the data from +/// 'from' the original location to 'to' the new directory +/// using the sorting method indicated in method. +/// It updates the 'from' TTree with the new information. +/// See TTreeCloner::TTreeCloner(TTree *from, TTree *to, Option_t *method, UInt_t options) +/// for details on the sorting methods. + +TTreeCloner::TTreeCloner(TTree *from, TDirectory *newdirectory, Option_t *method, UInt_t options) : + TTreeCloner(from, from, newdirectory, method, options) +{ + +} + +//////////////////////////////////////////////////////////////////////////////// +/// Constructor implementation. +TTreeCloner::TTreeCloner(TTree *from, TTree *to, TDirectory *newdirectory, Option_t *method, UInt_t options) : fWarningMsg(), fIsValid(kTRUE), fNeedConversion(kFALSE), fOptions(options), fFromTree(from), fToTree(to), + fToDirectory(newdirectory), + fToFile(fToDirectory ? fToDirectory->GetFile() : nullptr), fMethod(method), fFromBranches( from ? from->GetListOfLeaves()->GetEntries()+1 : 0), fToBranches( to ? to->GetListOfLeaves()->GetEntries()+1 : 0), @@ -156,27 +181,27 @@ TTreeCloner::TTreeCloner(TTree *from, TTree *to, Option_t *method, UInt_t option Warning("TTreeCloner::TTreeCloner", "%s", fWarningMsg.Data()); } fIsValid = kFALSE; - } else if (fToTree->GetDirectory() == nullptr) { + } else if (fToDirectory == nullptr) { fWarningMsg.Form("The output TTree (%s) must be associated with a directory.", fToTree->GetName()); if (!(fOptions & kNoWarnings)) { Warning("TTreeCloner::TTreeCloner", "%s", fWarningMsg.Data()); } fIsValid = kFALSE; - } else if (fToTree->GetCurrentFile() == nullptr) { + } else if (fToFile == nullptr) { fWarningMsg.Form("The output TTree (%s) must be associated with a directory (%s) that is in a file.", - fToTree->GetName(),fToTree->GetDirectory()->GetName()); + fToTree->GetName(),fToDirectory->GetName()); if (!(fOptions & kNoWarnings)) { Warning("TTreeCloner::TTreeCloner", "%s", fWarningMsg.Data()); } fIsValid = kFALSE; - } else if (! fToTree->GetDirectory()->IsWritable()) { - if (fToTree->GetDirectory()==fToTree->GetCurrentFile()) { + } else if (! fToDirectory->IsWritable()) { + if (fToDirectory==fToFile) { fWarningMsg.Form("The output TTree (%s) must be associated with a writable file (%s).", - fToTree->GetName(),fToTree->GetCurrentFile()->GetName()); + fToTree->GetName(),fToFile->GetName()); } else { fWarningMsg.Form("The output TTree (%s) must be associated with a writable directory (%s in %s).", - fToTree->GetName(),fToTree->GetDirectory()->GetName(),fToTree->GetCurrentFile()->GetName()); + fToTree->GetName(),fToDirectory->GetName(),fToFile->GetName()); } if (!(fOptions & kNoWarnings)) { Warning("TTreeCloner::TTreeCloner", "%s", fWarningMsg.Data()); @@ -189,6 +214,8 @@ TTreeCloner::TTreeCloner(TTree *from, TTree *to, Option_t *method, UInt_t option } } + + //////////////////////////////////////////////////////////////////////////////// /// Execute the cloning. @@ -207,6 +234,8 @@ Bool_t TTreeCloner::Exec() WriteBaskets(); CopyMemoryBaskets(); RestoreCache(); + if (IsInPlace()) + fToTree->SetDirectory(fToDirectory); return kTRUE; } @@ -232,6 +261,9 @@ TTreeCloner::~TTreeCloner() void TTreeCloner::CloseOutWriteBaskets() { + if (IsInPlace()) + return; + for(Int_t i=0; i<fToBranches.GetEntries(); ++i) { TBranch *to = (TBranch*)fToBranches.UncheckedAt(i); to->FlushOneBasket(to->GetWriteBasket()); @@ -440,7 +472,7 @@ void TTreeCloner::CollectBaskets() void TTreeCloner::CopyStreamerInfos() { TFile *fromFile = fFromTree->GetDirectory()->GetFile(); - TFile *toFile = fToTree->GetDirectory()->GetFile(); + TFile *toFile = fToDirectory->GetFile(); TList *l = fromFile->GetStreamerInfoList(); TIter next(l); TStreamerInfo *oldInfo; @@ -480,6 +512,9 @@ void TTreeCloner::CopyStreamerInfos() void TTreeCloner::CopyMemoryBaskets() { + if (IsInPlace()) + return; + TBasket *basket = 0; for(Int_t i=0; i<fToBranches.GetEntries(); ++i) { TBranch *from = (TBranch*)fFromBranches.UncheckedAt( i ); @@ -510,7 +545,7 @@ void TTreeCloner::CopyProcessIds() // NOTE: We actually need to merge the ProcessId somehow :( TFile *fromfile = fFromTree->GetDirectory()->GetFile(); - TFile *tofile = fToTree->GetDirectory()->GetFile(); + TFile *tofile = fToFile; fPidOffset = tofile->GetNProcessIDs(); @@ -590,6 +625,9 @@ void TTreeCloner::RestoreCache() { void TTreeCloner::ImportClusterRanges() { + if (IsInPlace()) + return; + // First undo, the external call to SetEntries // We could improve the interface to optional tell the TTreeCloner that the // SetEntries was not done. @@ -691,13 +729,28 @@ void TTreeCloner::WriteBaskets() TBranch *from = (TBranch*)fFromBranches.UncheckedAt( fBasketBranchNum[ fBasketIndex[j] ] ); TBranch *to = (TBranch*)fToBranches.UncheckedAt( fBasketBranchNum[ fBasketIndex[j] ] ); - TFile *tofile = to->GetFile(0); + TFile *tofile = fToFile; TFile *fromfile = from->GetFile(0); Int_t index = fBasketNum[ fBasketIndex[j] ]; Long64_t pos = from->GetBasketSeek(index); - if (pos!=0) { + if (IsInPlace()) { + if (pos != 0) { + if (fFileCache && j >= notCached) { + notCached = FillCache(notCached); + } + if (from->GetBasketBytes()[index] == 0) { + from->GetBasketBytes()[index] = basket->ReadBasketBytes(pos, fromfile); + } + Int_t len = from->GetBasketBytes()[index]; + + basket->LoadBasketBuffers(pos,len,fromfile,fFromTree); + basket->IncrementPidOffset(fPidOffset); + basket->CopyTo(tofile); + to->fBasketSeek[index] = basket->GetSeekKey(); + } + } else if (pos!=0) { if (fFileCache && j >= notCached) { notCached = FillCache(notCached); } -- GitLab