// dataset_text.cxx // Main program to list and extract text objects in a TextDataset or // compound datset holding a TextDataset. #include #include #include "dataset_util/FileStatus.h" #include "dataset_xml/XmlParser.h" #include "dataset_base/TextDataset.h" #include "dataset_base/DatasetCreator.h" #include "dataset_base/DatasetRepository.h" using std::string; using std::cout; using std::cerr; using std::endl; using dset::Dataset; using dset::TextDataset; using dset::DatasetCreator; using dset::DatasetRepository; typedef TextDataset::TextList TextList; int main(int argc, char* argv[]) { string exename = argv[0]; string xfile; string sid; string action; string textname; int error = 0; int iarg = 1; while ( iarg < argc ) { // Action defined--this must be the text name. string flag = argv[iarg++]; if ( action.size() ) { textname = flag; break; } // Not a flag--this must be the action. if ( flag[0] != '-' ) { action = flag; continue; } // Otherwise this is a flag. string arg = ""; if ( iarg < argc ) { arg = argv[iarg]; if ( arg.size() && arg[0] == '-' ) { arg = ""; } else { ++iarg; } } // Input file if ( flag == "-h" ) { action = "help"; break; } else if ( flag == "-f" ) { if ( arg == "" ) { error = 13; break; } xfile = arg; sid = ""; } else if ( flag == "-i" ) { if ( arg == "" ) { error = 14; break; } xfile = ""; sid = arg; } else { cerr << "Invalid flag: " << flag << endl; error = 15; } } if ( error ) { cerr << "Unable to parse command line." << endl; return error; } if ( action == "help" ) { cout << "Usage: " << exename << " [-h] (-i iii.jjj | -f filename) action textname" << endl; cout << " action = help, list, extract" << endl; cout << " name is from the list or blank for all" << endl; cout << " -f DSFILE - input dataset XML file" << endl; cout << " -i ID - ID of the dataset in format iii-jjj" << endl; return 0; } // Open the repository. DatasetRepository::set_default_instance(); DatasetRepository& rep = DatasetRepository::default_instance(); if ( ! rep.is_valid() ) { cerr << "Unable to access default dataset repository"; } // Open the dataset. XmlParser parser; const Dataset* pdst = 0; if ( xfile.size() ) { FileStatus fstat(xfile); if ( ! fstat.exists() ) { cerr << "File does not exist:" << endl; cerr << " " << xfile << endl; return 31; } const XmlElement* pele = parser.parse(xfile); if ( pele == 0 ) { cerr << "Unable to parse dataset XML file" << endl; return 32; } pdst = DatasetCreator::create(*pele, &rep); delete pele; if ( pdst == 0 ) { int ecode = rep.error(); string msg = rep.error_message(); cerr << "Error retrieving input dataset: " << ecode << endl; cerr << " \"" << msg << "\"" << endl; return 39; } } else if ( sid.size() ) { DatasetId did; did = DatasetId(sid); if ( ! did.is_valid() ) { cerr << "Unable to parse id" << endl; return 34; } pdst = rep.extract(did); if ( pdst == 0 ) { int ecode = rep.error(); string msg = rep.error_message(); cerr << "Error retrieving input dataset: " << ecode << endl; cerr << " \"" << msg << "\"" << endl; return 39; } assert( pdst != 0 ); } else { cerr << "either filename or ID must be provided" << endl; return 30; } // Check type. if ( ! pdst->is() ) { cerr << "Dataset is not a TextDataset" << endl; cerr << " Full type is " << pdst->fulltype() << endl; return 41; } // Extract the text objects. const TextDataset* ptdst = dynamic_cast(pdst); TextList texts0 = ptdst->texts(); // Find texts matching name. TextList texts; for ( TextList::const_iterator itxt=texts0.begin(); itxt!=texts0.end(); ++itxt ) { if ( textname.size() == 0 || itxt->name() == textname ) { texts.push_back(*itxt); } } if ( texts.size() == 0 ) { cerr << "No matching texts" << endl; return 71; } // Handle action. if ( action == "list" || action == "extract" ) { string slist; for ( TextList::const_iterator itxt=texts.begin(); itxt!=texts.end(); ++itxt ) { if ( action == "extract" ) { int wstat = itxt->write(); if ( wstat != 0 ) { cerr << "Error " << wstat << " writing text " << itxt->name() << endl; if ( slist.size() ) cout << slist << endl; return 72; } } if ( itxt != texts.begin() ) slist += " "; slist += itxt->name(); } cout << slist << endl; } else { cerr << "Unknown action: " << action << endl; return 81; } return 0; }