// dataset_property_db.cxx // Return a datset property. #include #include #include #include #include "dataset_util/MemoryUsage.h" #include "dataset_util/FileStatus.h" #include "dataset_util/Environment.h" #include "dataset_util/DtdRegistry.h" #include "dataset_file/FileManagementSystem.h" #include "dataset_catalog/DatasetSelectionCatalog.h" #include "dataset_base/Location.h" #include "dataset_base/DatasetCreator.h" #include "dataset_xml/XmlParser.h" using std::string; using std::cout; using std::cerr; using std::ostream; using std::ofstream; using std::endl; using dset::Url; using dset::FileManagementSystem; using dset::DatasetSelectionCatalog; using dset::Location; using dset::Dataset; using dset::DatasetCreator; using dset::DatasetRepository; // Register test datasets for testing. //#include "dataset_base/Dataset_t.h" #include "dataset_base/DatasetCreator_t.h" typedef Location::UrlList UrlList; typedef string Name; //********************************************************************** // Helpers. //********************************************************************** // Return XML parser. XmlParser& parser() { static XmlParser pr; return pr; } // String representation of bool. string sbool(bool flag) { if ( flag ) { return "1"; } return "0"; } // memory report. void memrep() { string line = "--------------------------"; cerr << line << endl; cerr << MemoryUsage() << endl; cerr << Dataset::memory_report() << endl; cerr << line << endl; } //********************************************************************** int main(int argc, char* argv[]) { bool help = false; bool error = 0; string conn = ""; string property = ""; string sid = ""; string xfile = ""; string dsname = ""; string outfile = ""; int iarg = 0; string exename = argv[iarg++]; bool showmem = false; while ( iarg < argc ) { // Fetch flag and its argument. string flag = argv[iarg++]; string arg = ""; if ( iarg < argc && flag != "-m" ) { bool has_arg = false; arg = argv[iarg]; if ( arg.size() && arg[0] == '-' ) { arg = ""; } else { has_arg = true; ++iarg; } } // DB directory. if ( flag == "-c" ) { if ( arg == "" ) { conn = ""; } conn = arg; // Input file } else if ( flag == "-f" ) { if ( arg == "" ) { error = 13; break; } xfile = arg; sid = ""; dsname = ""; // Dataset ID. } else if ( flag == "-i" ) { if ( arg == "" ) { error = 14; break; } xfile = ""; sid = arg; dsname = ""; // Dataset name. } else if ( flag == "-n" ) { if ( arg == "" ) { error = 15; break; } xfile = ""; sid = ""; dsname = arg; // DB type. } else if ( flag == "-h" ) { help = true; break; // Output file } else if ( flag == "-o" ) { if ( arg == "" ) { error = 16; break; } outfile = arg; // Sow memory usage } else if ( flag == "-m" ) { showmem = true; // Property } else if ( flag.size() && flag[0]!='-' ) { property = flag; break; // error } else { error = 19; break; } } if ( error || help ) { cout << "Usage: " << exename << " [-d DB_DIR] [-o OUTFILE] [-i iii.jjj] [PROPERTY]" << endl; cout << " -h - help: this message" << endl; cout << " -c CONN - datset repository connection string [\"\"]" << endl; cout << " -f DSFILE - input dataset XML file" << endl; cout << " -i ID - ID of the dataset in format iii-jjj" << endl; cout << " -n DSNAME - dataset name in DSC" << endl; cout << " (One of -f and -i should be present)" << endl; cout << " -o OUTFILE - output file name" << endl; cout << " std out if no name is given" << endl; cout << " -m to report on memory usage" << endl; cout << " PROPERTY - property to display [print]" << endl; cout << " All is_XXX methods return 1 for true and 0 for false" << endl; cout << " exists - file exists, id in DDB or name in DSC" << endl; cout << " type - full type of the dataset" << endl; cout << " is_valid - is a valid dataset" << endl; cout << " is_empty - has no data" << endl; cout << " is_locked - is locked (must always be true)" << endl; cout << " is_virtual - has no location" << endl; cout << " is_compound - has sub-datasets" << endl; cout << " is_event - is an event dataset (has events)" << endl; cout << " id - iii-jjj" << endl; cout << " logical_file_names - list of LFN's" << endl; cout << " interface_logical_file_names - list of interface LFN's" << endl; cout << " file_count - # logical files" << endl; cout << " interface_file_count - # interface logical files" << endl; cout << " extract_files - extract replicas for all logical files" << endl; cout << " sub_datasets - list of sub-dataset ID's" << endl; cout << " sub_dataset_count - # sub-dataset ID's" << endl; cout << " event_count - # events" << endl; cout << " has_event_ids - directly holds event ID's" << endl; cout << " event_ids - list of event ID's" << endl; cout << " content_ids - content ID's for the first block" << endl; cout << " content_label - name for the first content block" << endl; cout << " print - display the dataset description" << endl; cout << " xml - XML description" << endl; cout << " xmlelement - XML descriptionn XmlElement format" << endl; if ( error ) { cerr << "Error reading command line" << endl; } return error; } // Open output file. ostream* pout = &cout; ofstream fout(outfile.c_str()); if ( fout ) pout = &fout; // Open database. DatasetRepository::set_default_instance(conn); DatasetRepository& rep = DatasetRepository::default_instance(); if ( ! rep.is_valid() ) { cerr << "Unable to access dataset repository at connection\n"; cerr << " " << '"' << conn << '"' << endl; } if ( showmem ) memrep(); // Open the dataset. const Dataset* pdst = 0; if ( xfile.size() ) { FileStatus fstat(xfile); if ( property == "exists" ) { *pout << sbool(fstat.exists()); return 0; } if ( ! fstat.exists() ) { cerr << "File does not exist:" << endl; cerr << " " << xfile << endl; return 30; } const XmlElement* pele = parser().parse(xfile); if ( pele == 0 ) { cerr << "Unable to parse dataset XML file" << endl; return 31; } pdst = DatasetCreator::create(*pele, &rep); delete pele; if ( pdst == 0 ) { int ecode = rep.error(); string msg = rep.error_message(); cerr << "Error retrieving input dataset: " << ecode << endl; cerr << " \"" << msg << "\"" << endl; return 39; } } else { DatasetId did; if ( dsname.size() != 0 ) { did = DatasetSelectionCatalog::default_instance().id(dsname); if ( ! did.is_valid() ) { cerr << "Unable to retrieve ID from DSC" << endl; return 33; } } else { did = DatasetId(sid); if ( ! did.is_valid() ) { cerr << "Unable to parse id" << endl; return 34; } } if ( property == "exists" ) { *pout << sbool(rep.has(did)); return 0; } if ( ! rep.has(did) ) { cerr << "Dataset ID " << did.to_string() << " is not in DDB" << endl; return 36; } pdst = rep.extract(did); if ( pdst == 0 ) { int ecode = rep.error(); string msg = rep.error_message(); cerr << "Error retrieving input dataset: " << ecode << endl; cerr << " \"" << msg << "\"" << endl; return 39; } } assert( pdst != 0 ); if ( showmem ) memrep(); // Aliases. if ( property == "files" ) { property = "physical_file_names"; } // Write property. bool has_endl = false; if ( property == "" ) property = "print"; if ( property == "type" ) { *pout << pdst->fulltype(); } else if ( property == "is_valid" ) { *pout << sbool(pdst->is_valid() ); } else if ( property == "is_empty" ) { *pout << sbool(pdst->is_empty() ); } else if ( property == "is_locked" ) { *pout << sbool(pdst->is_locked() ); } else if ( property == "is_virtual" ) { *pout << sbool(pdst->is_virtual() ); } else if ( property == "is_event" ) { *pout << sbool(pdst->content().has_event_content() ); } else if ( property == "is_compound" ) { *pout << sbool(pdst->constituents().size()); } else if ( property == "id" ) { *pout << pdst->id().to_string(); } else if ( property == "print" ) { *pout << *pdst; } else if ( property == "logical_file_names" ) { has_endl = true; const Location& loc = pdst->location(); const UrlList& urls = loc.files(); for ( UrlList::const_iterator iurl=urls.begin(); iurl!=urls.end(); ++iurl ) { Url url = *iurl; *pout << url << endl; } } else if ( property == "interface_logical_file_names" ) { has_endl = true; const Location& loc = pdst->interface_location(); const UrlList& urls = loc.files(); for ( UrlList::const_iterator iurl=urls.begin(); iurl!=urls.end(); ++iurl ) { Url url(*iurl); *pout << url << endl; } } else if ( property == "file_count" ) { const Location& loc = pdst->location(); *pout << loc.files().size(); } else if ( property == "interface_file_count" ) { const Location& loc = pdst->interface_location(); *pout << loc.files().size(); // Extract replicas of all the logical files and create the following // four files: // all_lfns - list of all LFN's // interface_lfns - LFN's in the interface location // all_pfns - list of replicas for the LFN's // interface_pfns - replicas for the interface LFN's // It is assumed that all interface LFN's are also in the global list. } else if ( property == "extract_files" ) { has_endl = true; unlink("all_lfns"); unlink("all_pfns"); unlink("interface_lfns"); unlink("interface_pfns"); Text tall_lfns("all_lfns"); Text tint_lfns("interface_lfns"); Text tall_pfns("all_pfns"); Text tint_pfns("interface_pfns"); const Location& loc = pdst->location(); const Location& iloc = pdst->interface_location(); // List of all logical files. const UrlList& allfiles = loc.files(); const UrlList& intfiles = iloc.files(); FileManagementSystem& fms = FileManagementSystem::default_instance(); // Extract a replicas for all files. for ( UrlList::const_iterator iurl=allfiles.begin(); iurl!=allfiles.end(); ++iurl ) { Url lurl = *iurl; Url purl = fms.get(lurl); if ( ! purl.is_valid() ) { cerr << "Unable to fetch physical file for" << endl; cerr << lurl << endl; return 52; } tall_lfns.append(lurl.to_string()); tall_pfns.append(purl.to_string()); } // Extract a replicas for interface files. for ( UrlList::const_iterator iurl=intfiles.begin(); iurl!=intfiles.end(); ++iurl ) { Url lurl = *iurl; Url purl = fms.get(lurl); if ( ! purl.is_valid() ) { cerr << "Unable to fetch physical file for" << endl; cerr << lurl << endl; return 52; } tint_lfns.append(lurl.to_string()); tint_pfns.append(purl.to_string()); } tall_lfns.write(); tall_pfns.write(); tint_lfns.write(); tint_pfns.write(); *pout << "Logical file replicas extracted. See the following:" << endl; *pout << " all_lfns - list of all LFN's" << endl; *pout << " interface_lfns - LFN's in the interface location" << endl; *pout << " all_pfns - list of replicas for the LFN's" << endl; *pout << " interface_pfns - replicas for the interface LFN's" << endl; } else if ( property == "sub_datasets" ) { has_endl = true; if ( pdst->constituents().size() ) { const DatasetIdList& dids = pdst->constituent_ids(); for ( DatasetIdList::const_iterator idid=dids.begin(); idid!=dids.end(); ++ idid ) { *pout << idid->to_string() << endl; } } } else if ( property == "sub_dataset_count" ) { if ( pdst->constituents().size() ) { const DatasetIdList& dids = pdst->constituent_ids(); *pout << dids.size(); } else { *pout << 0; } } else if ( property == "event_count" ) { if ( pdst->content().has_event_content() ) { *pout << pdst->content().front().event_count(); } else { *pout << "0"; } } else if ( property == "has_event_ids" ) { *pout << sbool(pdst->content().has_event_content()); } else if ( property == "event_ids" ) { if ( pdst->content().has_event_content() ) { *pout << pdst->content().front().event_ids(); } } else if ( property == "content_ids" ) { *pout << pdst->content().front().content_ids(); } else if ( property == "content_label" ) { *pout << pdst->content().front().name(); } else if ( property == "xmlelement" ) { *pout << *pdst->xml(); } else if ( property == "xml" ) { *pout << pdst->xml()->to_xml_text(); } else { cerr << "Invalid property: " << property << endl; return 59; } if ( ! has_endl ) *pout << endl; if ( showmem ) memrep(); return 0; }