// RootHistogramDataset.cxx #include "dataset_root/RootHistogramDataset.h" #include #include #include #include #include #include #include #include #include "dataset_util/Path.h" #include "dataset_util/ssystem.h" #include "dataset_util/copy_file.h" #include "dataset_util/getcwd.h" #include "dataset_util/Environment.h" #include "dataset_util/XmlElement.h" #include "dataset_util/DtdRegistry.h" #include "dataset_util/FileName.h" #include "dataset_util/FileStatus.h" #include "dataset_file/FileCatalog.h" #include "dataset_base/DatasetCreator.h" #include "dataset_base/SingleFileDataset.h" #include "TKey.h" #include "TH1.h" #include "TTree.h" #include "TChain.h" using std::string; using std::vector; using std::set; using std::ostream; using std::ostringstream; using std::cout; using std::cerr; using std::endl; using std::setw; using std::setfill; using dset::Url; using dset::Location; using dset::Dataset; using dset::DatasetCreator; using dset::GenericDataset; using dset::SingleFileDataset; using dset::RootHistogramDataset; typedef SingleFileDataset::Name Name; //********************************************************************** // Local data. //********************************************************************** namespace { // For debugging. int verbose = 0; //********************************************************************** // Helper functions. //********************************************************************** // Single-file dataset creator. const Dataset* sfcreate(Url file, Name content, DatasetId pid) { RootHistogramDataset* pdst = new RootHistogramDataset(file, content, 0, pid, true); return pdst; } // Register this creator. int RootHistogramDataset_sfreg = SingleFileDataset:: register_creator("RootHistogramDataset", sfcreate); // Generate a new file name. string make_file_name(string dir, string sid) { if ( dir.size() == 0 ) return ""; if ( ! FileStatus(dir).is_directory() ) return ""; if ( ! FileStatus(dir).is_writeable() ) return ""; // First try to use job ID plus version. // We should also include content label. Text tjid("jid"); if ( tjid.size() == 1 ) { int version = 0; string fvers = "RootHistogramDataset_version"; ifstream version_in(fvers.c_str()); version_in >> version; unlink(fvers.c_str()); ofstream version_out(fvers.c_str()); ++version; version_out << version; ostringstream ssname; ssname << dir << "/" << "dialjob_" << tjid.line(0) << "_" << setw(4) << setfill('0') << version << ".root"; return ssname.str(); // If no job ID, use dataset ID. } else { return dir + "/dataset_" + sid + ".root"; } return ""; } // Return content ID's void get_cids(TIter& next, ContentIdList& cids, string path) { string prefix = "RootHistogramDataset::get_cids"; // Loop over objects in the directory. while ( TObject* pokey = next() ) { TKey* pkey = dynamic_cast(pokey); if ( pkey == 0 ) { cerr << prefix << "Object is not a key" << endl; cout << " Object: "; pokey->Print(); continue; } assert( pkey != 0 ); TObject* pobj = pkey->ReadObj(); if ( pobj == 0 ) { cerr << prefix << "Unable to read object for key" << endl; cout << " Key: "; pkey->Print(); continue; } string classname = pobj->ClassName(); string name = pobj->GetName(); string fullname = path; if ( fullname.size() ) fullname += "/"; fullname += name; //cout << "**** " << classname << ": " << fullname << endl; TDirectory* pdir = dynamic_cast(pobj); TH1* phist = dynamic_cast(pobj); TTree* ptree = dynamic_cast(pobj); // If object is a directory, loop over its objects. if ( pdir != 0 ) { TIter next2 = pdir->GetListOfKeys(); get_cids(next2, cids, fullname); // If the object is a histogram or tree, add it to the content list. } else if ( phist != 0 || ptree != 0 ) { ContentId cid(classname, fullname); if ( ! cid.is_valid() ) { cerr << "RootHistogramDataset: Content ID type " << classname << " is invalid" << endl; } //cout << " " << cid << endl; cids.insert(cid); } else { cerr << "RootHostogramDataset::get_cids: " << "Uknown type: " << classname << endl; } } } //********************************************************************** // Register promoter. int RootHistogramDataset_prom_stat = GenericDataset:: register_promoter("RootHistogramDataset", dset::promote); //********************************************************************** // Register the content type. ContentId CID201 = ContentId::register_id(201, "TH1"); ContentId CID202 = ContentId::register_id(202, "TH1C"); ContentId CID203 = ContentId::register_id(203, "TH1D"); ContentId CID204 = ContentId::register_id(204, "TH1F"); ContentId CID205 = ContentId::register_id(205, "TH1K"); ContentId CID206 = ContentId::register_id(206, "TH1S"); ContentId CID207 = ContentId::register_id(207, "TProfile"); ContentId CID208 = ContentId::register_id(208, "TH2"); ContentId CID209 = ContentId::register_id(209, "TH2C"); ContentId CID210 = ContentId::register_id(210, "TH2D"); ContentId CID211 = ContentId::register_id(211, "TH2F"); ContentId CID212 = ContentId::register_id(212, "TH2I"); ContentId CID213 = ContentId::register_id(213, "TH2S"); ContentId CID214 = ContentId::register_id(214, "TProfile2D"); ContentId CID215 = ContentId::register_id(215, "TH3"); ContentId CID216 = ContentId::register_id(216, "TH3C"); ContentId CID217 = ContentId::register_id(217, "TH3D"); ContentId CID218 = ContentId::register_id(218, "TH3F"); ContentId CID219 = ContentId::register_id(219, "TH3I"); ContentId CID220 = ContentId::register_id(220, "TH3S"); ContentId CID221 = ContentId::register_id(221, "TTree"); //********************************************************************** // From hadd.cxx -- ROOT utility. // void MergeRootfile( TDirectory* target, TList* sourcelist ) { Bool_t noTrees = false; //cout << "Target path: " << target->GetPath() << endl; TString path( (char*)strstr( target->GetPath(), ":" ) ); path.Remove( 0, 2 ); TFile *first_source = (TFile*)sourcelist->First(); first_source->cd( path ); TDirectory *current_sourcedir = gDirectory; // loop over all keys in this directory TChain *globChain = 0; TIter nextkey( current_sourcedir->GetListOfKeys() ); TKey *key, *oldkey=0; //gain time, do not add the objects in the list in memory TH1::AddDirectory(kFALSE); while ( (key = (TKey*)nextkey())) { //keep only the highest cycle number for each key if (oldkey && !strcmp(oldkey->GetName(),key->GetName())) continue; // read object from first source file first_source->cd( path ); TObject *obj = key->ReadObj(); if ( obj->IsA()->InheritsFrom( TH1::Class() ) ) { // descendant of TH1 -> merge it //cout << "Merging histogram " << obj->GetName() << endl; TH1 *h1 = (TH1*)obj; TList listH; // loop over all source files and add the content of the // correspondant histogram to the one pointed to by "h1" TFile *nextsource = (TFile*)sourcelist->After( first_source ); while ( nextsource ) { // make sure we are at the correct directory level by cd'ing to path nextsource->cd( path ); TKey *key2 = (TKey*)gDirectory->GetListOfKeys()->FindObject(h1->GetName()); if (key2) { listH.Add( key2->ReadObj() ); h1->Merge(&listH); listH.Clear(); } nextsource = (TFile*)sourcelist->After( nextsource ); } } else if ( obj->IsA()->InheritsFrom( "TTree" ) ) { // loop over all source files create a chain of Trees "globChain" if (!noTrees) { TString obj_name; if (path.Length()) { obj_name = path + "/" + obj->GetName(); } else { obj_name = obj->GetName(); } globChain = new TChain(obj_name); globChain->Add(first_source->GetName()); TFile *nextsource = (TFile*)sourcelist->After( first_source ); // const char* file_name = nextsource->GetName(); //cout << "file name " << file_name << endl; while ( nextsource ) { globChain->Add(nextsource->GetName()); nextsource = (TFile*)sourcelist->After( nextsource ); } } } else if ( obj->IsA()->InheritsFrom( "TDirectory" ) ) { // it's a subdirectory //cout << "Found subdirectory " << obj->GetName() << endl; // create a new subdir of same name and title in the target file target->cd(); TDirectory *newdir = target->mkdir( obj->GetName(), obj->GetTitle() ); // newdir is now the starting point of another round of merging // newdir still knows its depth within the target file via // GetPath(), so we can still figure out where we are in the recursion MergeRootfile( newdir, sourcelist ); } else { // object is of no type that we know or can handle //cout << "Unknown object type, name: " // << obj->GetName() << " title: " << obj->GetTitle() << endl; } // now write the merged histogram (which is "in" obj) to the target file // note that this will just store obj in the current directory level, // which is not persistent until the complete directory itself is stored // by "target->Write()" below if ( obj ) { target->cd(); //!!if the object is a tree, it is stored in globChain... if(obj->IsA()->InheritsFrom( "TTree" )) { // DLA mar05 - The Merge line leaks one file descriptor. // Find the next file descriptor and close the file after // calling the line. This is an ugly, dangerous workaround. int fdtmp = open("/dev/null", O_RDONLY); assert( fdtmp > 0 ); close(fdtmp); if (!noTrees) globChain->Merge(target->GetFile(),0,"keep"); close(fdtmp); } else { obj->Write( key->GetName() ); } } oldkey = key; } // while ( ( TKey *key = (TKey*)nextkey() ) ) // save modifications to target file target->SaveSelf(kTRUE); } //********************************************************************** // Check for existence of a file and wait if it is not found. // Probably should make this a method of FileStatus. void wait_exist(string fname, string prefix) { FileStatus fstat(fname); if ( ! fstat.exists() ) { cerr << prefix << ": Unable to find root file:" << endl; cerr << " " << fname << endl; cerr << " " << "Wait and try a few more times:" << endl; int waittime = 1; const char* timefmt = "%c"; char chnowtime[128]; time_t nowtime = 0; for ( int itry=1; itry<8; ++itry ) { waittime *= 2; sleep(waittime); fstat.update(); nowtime = time(0); strftime(chnowtime, 128, timefmt, localtime(&nowtime)); cerr << " " << chnowtime << ": "; if ( fstat.exists() ) { cerr << "File found" << endl; break; } else { cerr << "Still not found" << endl; } } } } //********************************************************************** } // end unnamed namespace //********************************************************************** // Static member functions. //********************************************************************** // Cast. const RootHistogramDataset* RootHistogramDataset::cast(const Dataset* pdst) { if ( pdst == 0 ) return 0; return dynamic_cast(pdst); } //********************************************************************** // Private member functions. //********************************************************************** // Filename. // Stage the file for reading (FMS get) or writing (FMS copy) and // return the corrsponding URL. // If a readable file is requested and the file has is staged for // reading or writing, then the current file is returned. // The same if a writeable file is requested and the file is staged for // writing. // If a writeable file is requested when the file is staged for reading, // a new file is staged. Url RootHistogramDataset::internal_file(string dir, bool write) const { string eprefix = "RootHistogramDataset::internal_file: "; if ( ! is_valid() ) { cerr << eprefix << "Invalid dataset."; return Url(); } if ( is_locked() && write ) { cerr << eprefix << "Writeable file may not be returned for locked dataset"; return Url(); } // If the current file is staged readonly and write is requested, // release the replica. if ( m_stage_read && write ) { internal_release_staged(); } // If staged file exists, return its URL. if ( m_file.is_valid() ) { return m_file; } assert( ! m_stage_read ); assert( ! m_stage_write ); // If not, fetch the SE file. Url file = internal_se_file(); if ( ! file.is_valid() ) { cerr << eprefix << "Unable to access file!" << endl; return Url(); } // Stage. if ( write ) { Url durl("file:" + dir); m_file = m_pfms->copy(file, durl); if ( ! m_file.is_valid() ) { cerr << eprefix << "Unable to extract writeable file from FMS" << endl; cerr << " " << file << endl; return Url(); } m_stage_write = true; } else { m_file = m_pfms->get(file); if ( ! m_file.is_valid() ) { cerr << eprefix << "Unable to extract readonly file from FMS" << endl; cerr << " " << file << endl; return Url(); } m_stage_read = true; } return m_file; } //********************************************************************** // Return reference to the current SE file if existing. Url RootHistogramDataset::internal_se_file() const { if ( ! is_valid() ) return Url(); if ( location().files().size() == 0 ) return Url(); assert( location().files().size() == 1 ); return mutable_location().files().front(); } //********************************************************************** // Return a reference to a TFile describing the current replica. TFile& RootHistogramDataset::internal_root_file(string dir, bool write) const { string eprefix = "RootHistogramDataset::internal_root_file: "; static TFile bad_hfile; if ( ! is_valid() ) return bad_hfile; // If write is requested and current file is read, close it. if ( write && m_stage_read ) { internal_close_root_file(); } // Open root file if it is not already open. if ( m_phfile == 0 ) { Url purl = internal_file(dir, write); if ( ! purl.is_valid() ) { cerr << eprefix << "Unable to fetch internal file" << endl; return bad_hfile; } string ropt = "READ"; if ( m_stage_write ) { ropt = "UPDATE"; } else { assert( m_stage_read ); } // TFile::Open can handle URL's: file, dcap, rfio. // But crashes on empty files... if ( purl.prefix() == "file" ) { if ( FileStatus(purl.fullpath()).size() == 0 ) { cerr << eprefix << "Empty file" << endl; cerr << " " << purl << endl; return bad_hfile; } } TFile* prfile = TFile::Open(purl.to_string().c_str(), ropt.c_str()); if ( prfile==0 || ! prfile->IsOpen() ) { cerr << eprefix << "Unable to open root file:" << endl; cerr << " " << purl << endl; // Following line was causing crash. //delete prfile; return bad_hfile; } m_phfile = prfile; } return *m_phfile; } //********************************************************************** // Close the ROOT file. void RootHistogramDataset::internal_close_root_file() const { if ( m_phfile == 0 ) return; m_phfile->Close(); delete m_phfile; m_phfile = 0; } //********************************************************************** // Release the staged file, if existing. void RootHistogramDataset::internal_release_staged() const { internal_close_root_file(); if ( m_file.is_valid() ) { m_pfms->release(m_file); } m_file = Url(); m_stage_read = false; m_stage_write = false; } //********************************************************************** // Member functions. //********************************************************************** // Constructor from a URL. RootHistogramDataset:: RootHistogramDataset(Url file, string content_label, FileManagementSystem* pfms, DatasetId pid, bool lockit) : m_phfile(0), m_stage_read(false), m_stage_write(false), m_pfms(pfms) { set_fulltype("RootHistogramDataset"); set_id(); if ( ! file.is_valid() ) { set_error(11); return; } // Set the FMS. if ( m_pfms == 0 ) { m_pfms = &FileManagementSystem::default_instance(); } // Set the location. // If lockit it true, the file is assumed to be in the SE. // If not, the file is assumed to be readable by root and will be // copied into the SE when the dataset is locked. if ( lockit ) { Location loc; loc.files().push_back(file); set_location(loc); assert( location().files().size() == 1 ); } else { // Quick hack to give file a unique name. assert( file.prefix() == "file" ); string oldname = file.fullpath(); string dir = FileName(oldname).directory_name(); assert( dir.size() ); string newname = make_file_name(dir, id().to_string()); assert( newname.size() ); link(oldname.c_str(), newname.c_str()); unlink(oldname.c_str()); m_file = Url("file:" + newname); m_stage_write = true; } // Set content. // This requires opening the file. ContentIdList cids; TIter next = internal_root_file("", false).GetListOfKeys(); get_cids(next, cids, ""); Content con("RootHistogramDataset", content_label, cids); set_content(con); set_evstate_none(); // Set parent. set_parent_id(pid); // Lock. if ( lockit ) GenericDataset::lock(); } //********************************************************************** // Generic dataset constructor. RootHistogramDataset:: RootHistogramDataset(const GenericDataset& gen) : GenericDataset(gen), m_phfile(0), m_stage_read(false), m_stage_write(false), m_pfms(&FileManagementSystem::default_instance()) { if ( fulltype() != "RootHistogramDataset" ) { cerr << "RootHistogramDataset::ctor: " << " Generic dataset has the wrong full type." << endl; set_error(15); } } //********************************************************************** // Copy constructor. RootHistogramDataset:: RootHistogramDataset(const RootHistogramDataset& rhs) : GenericDataset(rhs), m_phfile(0), m_stage_read(false), m_stage_write(false), m_pfms(rhs.m_pfms) { } //********************************************************************** // Destructor. RootHistogramDataset::~RootHistogramDataset() { string delfile; if ( m_stage_write ) { delfile = m_file.fullpath(); } internal_release_staged(); if ( delfile.size() ) { unlink(delfile.c_str()); } } //********************************************************************** // SE file. Url RootHistogramDataset::se_file() const { return internal_se_file(); } //********************************************************************** // Staged file. Url RootHistogramDataset::staged_file() const { return internal_file("", false); } //********************************************************************** // ROOT file. TFile& RootHistogramDataset::root_file() const { return internal_root_file("", false); } //********************************************************************** // Clone. RootHistogramDataset* RootHistogramDataset::clone(string dir) const { if ( ! is_valid() ) return 0; if ( ! is_locked() ) return 0; string eprefix = "RootHistogramDataset::clone: "; // Extract the content label to copy to the clone. string clab = content().front().name(); // Create writeable replica for the clone. Url purl; if ( m_stage_write ) { purl = m_file; m_file = Url(); m_stage_write = false; } else { string cdir = dir; if ( dir == "" ) { dir = getcwd(); } Url durl("file:" + dir); assert( durl.is_valid() ); purl = m_pfms->copy(se_file(), durl); if ( ! purl.is_valid() ) { cerr << eprefix << "Unable to extract writeable file from FMS" << endl; cerr << " " << se_file() << endl; return 0; } } assert( purl.is_valid() ); // Construct new dataset from the new file. RootHistogramDataset* pdst = new RootHistogramDataset(purl, clab, m_pfms, id(), false); return pdst; } //********************************************************************** // Lock. int RootHistogramDataset::lock() { string prefix = "RootHistogramDataset::lock: "; if ( ! is_valid() ) return 71; if ( is_locked() ) return 72; // Close the staged file. internal_close_root_file(); // Put the file in the SE and update the dataset location. assert( m_file.is_valid() ); Url lurl = m_pfms->put(m_file); if ( ! lurl.is_valid() ) { cerr << prefix << "Unable to put file in FMS" << endl; cerr << m_pfms->long_error_message() << endl; return 73; } Location loc; loc.files().push_back(lurl); set_location(loc); assert( location().files().size() == 1 ); assert( location().files().front() == lurl ); // Release the staged file. // If writeable, keep for use in cloning. if ( m_stage_read ) { internal_release_staged(); } // Return. return GenericDataset::lock(); } //********************************************************************** // Merge. int RootHistogramDataset::merge(const Dataset& dst, string dir) { string prefix = "RootHistogramDataset::merge "; string line = "--------------------------------------------"; // Cast argument. if ( ! dst.is() ) return 1; const RootHistogramDataset& rdst = dst.cast(); // Check lock status. if ( is_locked() ) { cerr << prefix << "Existing dataset is locked" << endl; return 2; } // Check argument lock status. if ( ! dst.is_locked() ) { cerr << prefix << "New dataset is not locked" << endl; return 3; } // Check contents. ContentIdList cids1 = content().front().content_ids(); ContentIdList cids2 = rdst.content().front().content_ids(); if ( cids1 != cids2 ) { cerr << prefix << "Content lists differ" << endl; cerr << line << endl; cerr << " Existing:" << endl; cerr << cids1 << endl; cerr << " New" << endl; cerr << line << endl; return 4; } if ( cids1.size() == 0 ) { cerr << prefix << "Content lists are empty" << endl; return 0; } // Fetch files and list of keys. TFile& rfile1 = internal_root_file(dir, true); if ( ! rfile1.IsOpen() ) { cerr << prefix << "Unable to open existing file" << endl; return 5; } TFile& rfile2 = rdst.root_file(); if ( ! rfile2.IsOpen() ) { cerr << prefix << "Unable to open new file" << endl; return 6; } bool use_hadd = Environment::current().value("DIAL_MERGE_WITH_HADD").size(); int err = 0; if ( ! use_hadd ) { // Loop over content. set knowndirs; knowndirs.insert(""); for ( ContentIdList::const_iterator icid=cids1.begin(); icid!=cids1.end(); ++icid ) { string fullname = icid->key(); string path; string name = fullname; string::size_type ipos = name.rfind("/"); if ( ipos != string::npos ) { path = name.substr(0, ipos); name = name.substr(ipos+1); } // Fetch 1st histo. rfile1.cd(path.c_str()); TKey* pkey1 = gDirectory->GetKey(name.c_str()); if ( pkey1 == 0 ) { cerr << prefix << "Unable to find key in existing file" << endl; cerr << " File: " << rfile1.GetName() << endl; cerr << " Dir: " << path << endl; cerr << " Key: " << name << endl; err=15; break; } TObject* pobj1 = pkey1->ReadObj(); TH1* phist1 = dynamic_cast(pobj1); TTree* ptree1 = dynamic_cast(pobj1); // Fetch 2nd histo. rfile2.cd(path.c_str()); TKey* pkey2 = gDirectory->GetKey(name.c_str()); if ( pkey2 == 0 ) { cerr << prefix << "Unable to find key in new file" << endl; cerr << " File: " << rfile2.GetName() << endl; cerr << " Dir: " << path << endl; cerr << " Key: " << name << endl; err = 12; break; } TObject* pobj2 = pkey2->ReadObj(); // Merge. if ( phist1 != 0 ) { TH1* phist2 = dynamic_cast(pobj2); if ( phist2 == 0 ) { cerr << prefix << "Histogram in existing file but not in new file" << endl; cerr << " Existing: " << rfile1.GetName() << endl; cerr << " New: " << rfile2.GetName() << endl; cerr << " Dir: " << path << endl; cerr << " Key: " << name << endl; err = 13; break; } rfile1.cd(path.c_str()); phist1->Add(phist2); phist1->Write(name.c_str(), TObject::kOverwrite); } else if ( ptree1 != 0 ) { TTree* ptree2 = dynamic_cast(pobj2); if ( ptree2 == 0 ) { cerr << prefix << "Tree in existing file but not in new file" << endl; cerr << " Existing: " << rfile1.GetName() << endl; cerr << " New: " << rfile2.GetName() << endl; cerr << " Dir: " << path << endl; cerr << " Key: " << name << endl; err = 13; break; } TList tl; tl.Add(ptree2); rfile1.cd(path.c_str()); assert(ptree1->GetDirectory()->GetFile()==gFile); ptree1->Merge(&tl); tl.Clear(); ptree1->Write(name.c_str(), TObject::kOverwrite); } else { cerr << prefix << "RootHistogramDataset: unknown type"; cerr << endl; cerr << " File: " << rfile1.GetName() << endl; cerr << " Content ID: " << *icid << endl; err = 16; break; } delete pobj1; delete pobj2; //rfile1.cd(path.c_str()); //ostringstream sobjname; //sobjname << name << ";" << pkey1->GetCycle(); //string objname = sobjname.str(); //cout << objname << endl; //cout << "Purging..." << endl; //gDirectory->ls(); // Delete the old object. //gDirectory->Delete(objname.c_str()); // Delete the key for that object. //gDirectory->Purge(); //gDirectory->ls(); } if ( err == 0 ) { // Save modifications. //rfile1.SaveSelf(true); //rfile1.Write("0", TObject::kOverwrite); } } else { cerr << prefix << "Merging with hadd is no longer supported" << endl; cerr << " Please unset DIAL_MERGE_WITH_HADD and start again" << endl; assert(false); } rdst.internal_release_staged(); return err; } //**********************************************************************