// SimpleEventDatasetSplitter.cxx #include "dataset_split/SimpleEventDatasetSplitter.h" #include #include "dataset_util/XmlElement.h" #include "dataset_util/DtdRegistry.h" #include "dataset_id/EventIdList.h" #include "dataset_id/ContentIdList.h" #include "dataset_catalog/DatasetReplicaCatalog.h" #include "dataset_base/SimpleCompoundDataset.h" #include "dataset_base/EventMergeDataset.h" #include "dataset_base/DatasetRepository.h" using std::string; using std::ostream; using std::cerr; using std::endl; using std::auto_ptr; using dset::Dataset; using dset::SimpleCompoundDataset; using dset::EventMergeDataset; using dset::DatasetList; using dset::DatasetReplicaCatalog; using dset::DatasetSplitter; using dset::SimpleEventDatasetSplitter; using dset::DatasetRepository; //********************************************************************** // Local definitions. //********************************************************************** namespace { // Creator. const DatasetSplitter* create(const XmlElement& ele) { string prefix = "SimpleEventDatasetSplitter::create "; assert( ele.name() == SimpleEventDatasetSplitter::xml_name() ); if ( ele.name() != SimpleEventDatasetSplitter::xml_name() ) return 0; string xver = ele.attribute("xml_version"); if ( xver != "1.30" && xver != "" ) { cerr << prefix << "Unexpected XML version: " << xver << endl; return 0; } SimpleEventDatasetSplitter* psp = new SimpleEventDatasetSplitter; if ( ele.has_attribute("dataset_depth") ) { int val = ele.attribute_as_int("dataset_depth"); int sstat = psp->set("dataset_depth", val); assert(sstat == 0 ); } if ( ele.has_attribute("min_dataset") ) { int val = ele.attribute_as_int("min_dataset"); int sstat = psp->set("min_dataset", val); assert(sstat == 0 ); } if ( ele.has_attribute("min_event") ) { int val = ele.attribute_as_int("min_event"); int sstat = psp->set("min_event", val); assert(sstat == 0 ); } if ( ele.has_attribute("max_event") ) { int val = ele.attribute_as_int("max_event"); int sstat = psp->set("max_event", val); assert(sstat == 0 ); } return psp; } // Register creator. int SimpleEventDatasetSplitter_xml_stat = DatasetSplitter::register_creator( SimpleEventDatasetSplitter::xml_name(), create); // Register the DTD. DtdRegistry::Status SimpleEventDatasetSplitter_dtd_stat = DtdRegistry::register_dtd("dataset"); } // end unnamed namespace. //********************************************************************** // Static member functions //********************************************************************** // DTD const Text& SimpleEventDatasetSplitter::dtd() { static Text txt; if ( txt.size() == 0 ) { txt.append(""); txt.append(""); } return txt; } //********************************************************************** // Implementation class. //********************************************************************** class SimpleEventDatasetSplitter::Imp { public: // data int dataset_depth; int min_dataset; int min_event; int max_event; Imp(); }; SimpleEventDatasetSplitter::Imp::Imp() : dataset_depth(1), min_dataset(0), min_event(0), max_event(0) { } //********************************************************************** // Member functions //********************************************************************** // Constructor. SimpleEventDatasetSplitter::SimpleEventDatasetSplitter() : m_pimp(new Imp) { } //********************************************************************** // Copy constructor. SimpleEventDatasetSplitter:: SimpleEventDatasetSplitter(const SimpleEventDatasetSplitter& rhs) : m_pimp(new Imp(*rhs.m_pimp)) { } //********************************************************************** // Assignment. SimpleEventDatasetSplitter& SimpleEventDatasetSplitter:: operator=(const SimpleEventDatasetSplitter& rhs) { if ( &rhs == this ) return *this; delete m_pimp; m_pimp = new Imp(*rhs.m_pimp); } //********************************************************************** // Destructor. SimpleEventDatasetSplitter::~SimpleEventDatasetSplitter() { delete m_pimp; } //********************************************************************** // Split. DatasetList::size_type SimpleEventDatasetSplitter:: split_and_append(const Dataset& dstin, DatasetList& subdsts) const { string prefix = "SimpleEventDatasetSplitter::split_and_append: "; DatasetList::size_type count = 0; // Input must be valid. if ( ! dstin.is_valid() ) { return count; } // Input should have events. if ( ! dstin.content().has_event_content() ) { cerr << "SimpleEventDatasetSplitter:split_and_append " << "Allowing non-event dataset " << endl; } const Dataset* pdst = &dstin; // If the input dataset is virtual, then use the default dataset // replica catalog to find a nonvirtual replica. if ( pdst->is_virtual() ) { DatasetReplicaCatalog& drc = DatasetReplicaCatalog::default_instance(); if ( ! drc.is_valid() ) { return count; } DatasetIdList reps = drc.replicas(pdst->id()); // Loop over replicas and select the first one that is valid and // concrete. pdst = 0; for ( DatasetIdList::const_iterator idid=reps.begin(); idid!=reps.end(); ++idid ) { DatasetId did = *idid; DatasetRepository& rep = DatasetRepository::default_instance(); if ( ! rep.is_valid() ) break; pdst = rep.extract(did, true); if ( pdst != 0 && pdst->is_valid() && ! pdst->is_virtual() ) { break; } else { pdst = 0; } } } if ( pdst == 0 ) { return count; } // Break into constituents. // We assume user is not asking to split content. DatasetList condsts; condsts.push_back(pdst); for ( int depth=0; depthdataset_depth; ++depth ) { DatasetList tmpdsts = condsts; condsts.clear(); for ( DatasetList::const_iterator idst=tmpdsts.begin(); idst!=tmpdsts.end(); ++idst ) { pdst = *idst; // Use constituent ID list to determine if dataset is compound. const DatasetIdList& dids = pdst->constituent_ids(); // If compound, split it. if ( dids.size() ) { const DatasetList& indsts = pdst->constituents(); // It may not be possible to retrieve all the constituents. if ( indsts.size() != dids.size() ) { cerr << prefix << "Unable to find all constituents for compound dataset " << pdst->id() << endl; return count; } for ( DatasetList::const_iterator idst=indsts.begin(); idst!=indsts.end(); ++idst ) { const Dataset* pdst_new = *idst; condsts.push_back(pdst_new); } // Otherwise use the overall dataset. } else { condsts.push_back(pdst); } } // If the list is unchanged, we have reached the maximum depth. if ( condsts.size() == tmpdsts.size() ) { break; } } // Merge constituents. DatasetList mrgdsts; bool have_events = dstin.is_event_dataset(); DatasetIdList::size_type ndst = 0; EventIdList::size_type nevt = 0; int nevent = 0; Dataset* pdst_merged = 0; const Dataset* pdst_single = 0; for ( DatasetList::const_iterator idst=condsts.begin(); idst!=condsts.end(); ++idst ) { const Dataset* pdst = *idst; // If this is first dataset to be merged, record it as the merged // dataset. No need to create a merged dataset for a single dataset. if ( pdst_single == 0 && pdst_merged == 0 ) { pdst_single = pdst; ndst = 1; // Otherwise, append the new dataset to the current merged dataset. } else { // If this the second dataset to be merged, then first create the // merged dataset from the first dataset. if ( pdst_single != 0 ) { assert( pdst_merged == 0 ); if ( have_events ) { pdst_merged = new EventMergeDataset; } else { pdst_merged = new SimpleCompoundDataset; } int m1stat = pdst_merged->merge(*pdst_single); pdst_single = 0; if ( m1stat != 0 ) { cerr << prefix << "First merge failed with error " << m1stat << ":" << endl; cerr << *pdst_single << endl; return 0; } } assert( pdst_single == 0 ); assert( pdst_merged != 0 ); int mstat = pdst_merged->merge(*pdst); ++ndst; if ( mstat != 0 ) { cerr << prefix << "Merge failed with error " << ":" << endl; cerr << *pdst_merged << endl; cerr << *pdst << endl; return 0; } } // Determine if we should use the current merged dataset. const Dataset* pdst_current = pdst_merged; if ( pdst_current == 0 ) pdst_current = pdst_single; DatasetList::const_iterator idst_next = idst; ++idst_next; // Yes if this is the end of the list. bool use_current = idst_next == condsts.end(); // Otherwise check if any of the criteria are met. if ( ! use_current ) { EventIdList::size_type nevt = pdst_current->event_count(); use_current = ndst >= m_pimp->min_dataset && nevt >= m_pimp->min_event; } // If indicated, record the current merged dataset. if ( use_current ) { if ( pdst_single != 0 ) { assert( pdst_merged == 0 ); mrgdsts.push_back(pdst_single); pdst_single = 0; } else { assert( pdst_merged != 0 ); pdst_merged->lock(); mrgdsts.push_back(pdst_merged); pdst_merged = 0; } ++count; ndst = 0; } } // Split datasets. DatasetList spldsts; count = 0; EventIdList::size_type maxevt = m_pimp->max_event; for ( DatasetList::const_iterator idst=mrgdsts.begin(); idst!=mrgdsts.end(); ++idst ) { const Dataset* pdst = *idst; EventIdList::size_type nevt = pdst->event_count(); if ( maxevt!=0 && nevt>maxevt ) { const EventIdList& eids = pdst->event_ids(); EventIdList::const_iterator ieid = eids.begin(); while ( ieid != eids.end() ) { EventIdList eids2; for ( int i=0; iclone(); if ( pdstnew == 0 ) { cerr << prefix << "Split clone failed" << endl; cerr << *pdst << endl; return 0; } int sstat = pdstnew->select_events(eids2); if ( sstat != 0 ) { cerr << prefix << "Split failed with error " << sstat << endl; cerr << *pdstnew << endl; cerr << *pdst << endl; delete pdstnew; return 0; } if ( ! pdst->is_valid() ) { cerr << prefix << "Invalid split dataset before lock" << endl; cerr << *pdstnew << endl; cerr << *pdst << endl; delete pdstnew; return 0; } pdstnew->lock(); if ( ! pdstnew->is_valid() ) { cerr << prefix << "Invalid split dataset after lock" << endl; cerr << *pdstnew << endl; cerr << *pdst << endl; delete pdstnew; return 0; } spldsts.push_back(pdstnew); } if ( pdst != &dstin ) { delete pdst; } } else { spldsts.push_back(pdst); } } // Append the final list of subdatasets to the input list. for ( DatasetList::const_iterator idst=spldsts.begin(); idst!=spldsts.end(); ++idst ) { subdsts.push_back(*idst); ++count; } assert( count > 0 ); return count; } //********************************************************************** // Write to XML. const XmlElement* SimpleEventDatasetSplitter::xml() const { auto_ptr pele(new XmlElement(SimpleEventDatasetSplitter::xml_name())); pele->add_attribute("xml_version", "1.30"); pele->add_attribute_as_int("dataset_depth", m_pimp->dataset_depth); pele->add_attribute_as_int("min_dataset", m_pimp->min_dataset); pele->add_attribute_as_int("min_event", m_pimp->min_event); pele->add_attribute_as_int("max_event", m_pimp->max_event); return pele.release(); } //********************************************************************** // Output stream. ostream& SimpleEventDatasetSplitter::ostr(std::ostream& str) const { str << "Simple event dataset splitter"; str << "\n dataset_depth: " << m_pimp->dataset_depth; str << "\n min_dataset: " << m_pimp->min_dataset; str << "\n min_event: " << m_pimp->min_event; str << "\n max_event: " << m_pimp->max_event; return str; } //********************************************************************** // Setter. int SimpleEventDatasetSplitter::set(Name name, int value) { int rstat = 0; if ( name == "dataset_depth" ) m_pimp->dataset_depth = value; else if ( name == "min_dataset" ) m_pimp->min_dataset = value; else if ( name == "min_event" ) m_pimp->min_event = value; else if ( name == "max_event" ) m_pimp->max_event = value; else rstat = 1; return rstat; } //**********************************************************************