// TextDataset.cxx #include "dataset_base/TextDataset.h" #include #include #include #include "dataset_util/FileStatus.h" #include "dataset_util/WorkingDirectory.h" using std::string; using std::cerr; using std::ostringstream; using std::endl; using dset::GenericDataset; using dset::TextDataset; typedef Text::WordList WordList; //********************************************************************** // Local definitions. //********************************************************************** namespace { // Register creator. int SingleFileDataset_xml_stat = GenericDataset:: register_promoter("TextDataset", dset::promote); // Register ID. // Returns 0 for success. int regid(const char* name) { static int stat = 1; if ( stat != 1 ) return stat; stat = 2; for ( int i=100; i<1000; ++i ) { ContentId cid = ContentId::register_id(i, name); if ( cid.is_valid() ) { stat = 0; break; } } return stat; } // Force registration int CRSRTAT = regid("Text"); } // end unnamed namespace. //********************************************************************** // Member functions. //********************************************************************** // Constructor. TextDataset::TextDataset(string clab, string snames) { assert( regid("Text") == 0 ); set_fulltype("TextDataset"); set_id(); // No location. // But *not* a virtual dataset. set_location_implicit(); // Create a text object for each name. ContentIdList cids; // Lock the directory. WorkingDirectory wd; // Loop over names and construct a text object for each. WordList names = Text::split(snames); for ( WordList::const_iterator inam=names.begin(); inam!=names.end(); ++ inam ) { if ( ! clab.size() ) { set_error(10); break; } Text::Name name = *inam; ContentId cid("Text", name); // Make sure name is not already used. if ( cids.find(cid) != cids.end() ) { set_error(11); break; } // Make sure the filename is relative. if ( ! FileName(name).is_relative() ) { set_error(12); break; } // Create text. Text txt(name, true, true); if ( ! txt.is_valid() ) { set_error(13); break; } assert( txt.name() == name ); // All OK--insert the ID and text object. cids.insert(cid); m_texts.push_back(txt); } Content con("TextDataset", clab, cids); set_content(con); set_evstate_none(); } //********************************************************************** // Promoter. TextDataset::TextDataset(const GenericDataset& gdst) : GenericDataset(gdst) { const XmlElement* pxtra = extra_xml(); if ( pxtra != 0 ) { // Fetch XML for text objexts. const XmlElement::ElementList& xtxts = pxtra->children(Text::xml_name()); for ( XmlElement::ElementList::const_iterator iele=xtxts.begin(); iele!=xtxts.end(); ++iele ) { Text txt(**iele); m_texts.push_back(txt); } } else { cerr << "TextDataset::ctor (promoter): Unable to find extra data" << endl; } // This might be a good place to check that the content ID's and // text names are consistent. } //********************************************************************** // Merge. int TextDataset::merge(const Dataset& dst, std::string) { if ( ! is_valid() ) { return 1; } if ( is_locked() ) { return 2; } if ( ! dst.is_valid() ) { return 3; } if ( ! dst.is_locked() ) { return 4; } // Check type of input dataset. if ( dst.fulltype() != fulltype() ) { return 5; } // Content label must match. string oldclab = content().front().name(); string addclab = dst.content().front().name(); if ( addclab == oldclab ) { return 6; } // Merge content ID lists. const ContentIdList& oldcids = content().front().content_ids(); const ContentIdList& addcids = dst.content().front().content_ids(); ContentIdList newcids; newcids.insert(oldcids.begin(), oldcids.end()); newcids.insert(addcids.begin(), addcids.end()); ContentIdList::size_type sumcids = oldcids.size() + addcids.size(); // Content may not overlap. if ( newcids.size() != sumcids ) { return 7; } Content con("TextDataset", oldclab, newcids); int stat = reset_content(con); if ( stat != 0 ) { return 100 + stat; } return 0; } //********************************************************************** // Create extra XML. const XmlElement* TextDataset::create_extra_xml() const { if ( ! is_valid() ) return 0; // Create top XML elment. std::auto_ptr pele(new XmlElement("ExtraData")); // Add text files. for ( TextList::const_iterator itxt=texts().begin(); itxt!=texts().end(); ++itxt ) { XmlElement* pele_txt = itxt->xml(); if ( pele_txt == 0 ) return 0; pele->add_child(pele_txt); } // Return the XML. return pele.release(); } //********************************************************************** // Web page. Text TextDataset:: web_page(string baseurl, string repurl, string entry) const { Text wp; wp.append(""); // Display dataset. if ( entry == "" ) { string header = "TextDataset " + id().to_string(); wp.append("" + header + ""); //wp.append("

" + header + "

"); wp.append("
");
    ostringstream sout;
    ostr(sout, "", repurl);
    wp.append(sout.str());
    wp.append("
"); wp.append("Text objects:"); for ( TextList::const_iterator itxt=texts().begin(); itxt!=texts().end(); ++itxt ) { const Text& txt = *itxt; wp.append("
  " + txt.name() + ""); } } else if ( entry.size() > 5 && entry.substr(0,5) == "text=" ) { string name = entry.substr(5); string header = "TextDataset " + id().to_string() + " text " + name; wp.append("" + header + ""); //wp.append("

" + header + "

"); TextList::const_iterator itxt = texts().begin(); for ( ; itxt!=texts().end(); ++itxt ) { if ( itxt->name() == name ) break; } if ( itxt == texts().end() ) { wp.append("Dataset does not have text objeect named " + name); } else { wp.append("
");
      ostringstream sout;
      sout << *itxt;
      wp.append(sout.str());
      wp.append("
"); } } else { wp.append("Dataset::web_page: Invalid entry " + entry); } wp.append(""); return wp; } //**********************************************************************