// XmlParser.cxx #include "dataset_xml/XmlParser.h" #include #include #include #include #include #include "dataset_util/FileStatus.h" #include "dataset_util/WorkingDirectory.h" #include "dataset_util/ssystem.h" #include "dataset_util/get_hostname.h" #include "dataset_util/mkdir.h" #include "dataset_util/FileName.h" #include "dataset_util/DtdRegistry.h" #include "dataset_util/PThreadMutex.h" #include "xercesc/util/PlatformUtils.hpp" #include "xercesc/dom/DOM.hpp" #include "xercesc/parsers/XercesDOMParser.hpp" #include "xercesc/dom/DOMException.hpp" #include "xercesc/dom/DOMNamedNodeMap.hpp" #include "xercesc/dom/DOMAttr.hpp" using std::string; using std::ofstream; using std::cerr; using std::endl; using std::ostringstream; #ifdef XERCES_CPP_NAMESPACE using XERCES_CPP_NAMESPACE::XMLPlatformUtils; using XERCES_CPP_NAMESPACE::AbstractDOMParser; using XERCES_CPP_NAMESPACE::XercesDOMParser; using XERCES_CPP_NAMESPACE::DOMNode; using XERCES_CPP_NAMESPACE::XMLString; using XERCES_CPP_NAMESPACE::DOMNamedNodeMap; using XERCES_CPP_NAMESPACE::DOMAttr; using XERCES_CPP_NAMESPACE::DOMException; using XERCES_CPP_NAMESPACE::DOMDocument; #endif //********************************************************************** // Local definitions. //********************************************************************** namespace { //********************************************************************** // Mutex. // Leak this to make sure it is present for all of C++ cleanup. PThreadMutex& mutex() { static PThreadMutex* pmut = new PThreadMutex; return *pmut; } // Make sure mutex is created during initialization of the first thread. PThreadMutex& mutref = mutex(); //********************************************************************** // Initialize Xerces. int initialize() { XMLPlatformUtils::Initialize(); return 0; } int init = initialize(); //********************************************************************** // Reinitialize. int reinitialize() { XMLPlatformUtils::Terminate(); XMLPlatformUtils::Initialize(); return 0; } //********************************************************************** // Return the DOM parser. XercesDOMParser* pparser(bool makenew, bool dodelete =false) { mutex().lock(); static XercesDOMParser* pparser = 0; if ( makenew || pparser==0 ) { delete pparser; pparser = new XercesDOMParser; pparser->setValidationScheme(AbstractDOMParser::Val_Always); pparser->setDoSchema(true); pparser->setValidationSchemaFullChecking(true); } if ( dodelete ) { delete pparser; pparser = 0; return pparser; } mutex().unlock(); return pparser; } //********************************************************************** // Class to clean up. class Cleanup { public: ~Cleanup() { pparser(false,true); XMLPlatformUtils::Terminate(); } }; Cleanup cleanup; //********************************************************************** // Create an XmlElement from a DOM node. const XmlElement* xml_element(const DOMNode& node) { if ( node.getNodeType() != DOMNode::ELEMENT_NODE ) { cerr << "xml_element: unexpected node type: " << node.getNodeType() << endl; return 0; } char* cele = XMLString::transcode(node.getNodeName()); XmlElement* pele = new XmlElement(cele); XMLString::release(&cele); // add attributes. const DOMNamedNodeMap* patts = node.getAttributes(); if ( patts == 0 ) { cerr << "xml_element: node.getAttributes() returned null" << endl; return 0; } const DOMNamedNodeMap& atts = *patts; for ( unsigned int iatt=0; iatt(gatt); char* cname = XMLString::transcode(att.getNodeName()); char* cvalue = XMLString::transcode(att.getValue()); string name = cname; string value = cvalue; pele->add_attribute(name, value); XMLString::release(&cname); XMLString::release(&cvalue); } // Add child nodes. const DOMNode* pchild = node.getFirstChild(); while ( pchild != 0 ) { bool skip = false; skip |= pchild->getNodeType() == DOMNode::TEXT_NODE; skip |= pchild->getNodeType() == DOMNode::COMMENT_NODE; if ( ! skip ) { const XmlElement* pele_child = xml_element(*pchild); if ( pele_child == 0 ) { cerr << "xml_element: xml_element(*pchild) returned null" << endl; return 0; } pele->add_child(pele_child); } // PC data. if ( pchild->getNodeType() == DOMNode::TEXT_NODE ) { char* ctxt = XMLString::transcode(pchild->getTextContent()); string txt =ctxt; XMLString::release(&ctxt); // Strip leading whitespace. string::size_type ipos = 0; string::size_type jpos = txt.size(); while ( iposipos && isspace(txt[jpos]) ) --jpos; txt = txt.substr(ipos, jpos-ipos+1); // Check we do not already have PC data. if ( pele->pcdata().size() == 0 ) { //std::cout << "PCDATA: |" << txt << "|" << endl; pele->set_pcdata(txt); } } } pchild = pchild->getNextSibling(); } return pele; } //********************************************************************** // Write an XML string to a file. // Adds header. // Arguments are filename, XML element name, XML text. int write_sxml(string filename, string xmlname, string xtxt) { // Strip leading a trailing blanks, etc. string::size_type ipos1 = 0; string::size_type ipos2 = xtxt.size(); while( ipos1 ipos1 ) { bool skip = false; skip |= xtxt[ipos2] == ' '; skip |= xtxt[ipos2] == '\n'; if ( ! skip ) break; } string xmltext = xtxt.substr(ipos1, ipos2-ipos1+1); // Check names. if ( filename.size() == 0 ) return 11; if ( xmlname.size() == 0 ) return 12; if ( xmltext.size() == 0 ) return 13; if ( xmltext.size() == 0 ) return 14; if ( xmltext[0] != '<' ) return 15; if ( xmltext[xmltext.size()-1] != '>' ) return 16; // Fetch DTD name. DtdRegistry* preg = DtdRegistry::system(xmlname); if ( preg == 0 ) return 21; string dtdname = preg->filename(); if ( dtdname.size() == 0 ) return 22; // Open file. ofstream file(filename.c_str()); if ( ! file ) return 31; // Write XML to file. file << "\n"; file << "\n"; file << "\n"; file << xmltext << endl; return 0; } //********************************************************************** } // end unnamed namespace //********************************************************************** // Implementation class. //********************************************************************** class XmlParser::Imp { public: PThreadMutex notusedmutex; // No longer used }; //********************************************************************** // Member functions. //********************************************************************** // Constructor. XmlParser::XmlParser() : pimp(new Imp) { } //********************************************************************** // Destructor. XmlParser::~XmlParser() { delete pimp; }; //********************************************************************** // Parse a file or XML string. const XmlElement* XmlParser::parse(string arg) { if ( ! arg.size() ) { return 0; } string filename = arg; // Create directory for parsing. string tmpdir = ""; char ctmpdir[32] = "/tmp/dial_XmlParser_XXXXXX"; char* cchk = mkdtemp(ctmpdir); if ( cchk == 0 ) { cerr << "XmlParser::parse: Unable to create temporary directory" << endl; return 0; } tmpdir = ctmpdir; assert( tmpdir.size() ); filename = tmpdir + "/tmp.xml"; // Write DTD to directory. { WorkingDirectory wd(tmpdir); DtdRegistry::instance("dataset").write(); DtdRegistry::instance("dial").write(); } // If argument is an XML string, then write to the file. if ( arg[0] == '<' ) { // Find XML name. string::size_type ipos1 = 0; while ( ++ipos1 < arg.size() ) { if ( arg[ipos1] != ' ' ) break; } if ( ipos1 > arg.size() ) { return 0; } string::size_type ipos2 = 1; while ( ++ipos2 < arg.size() ) { if ( arg[ipos2] == ' ' || arg[ipos2] == '>' ) break; } string xmlname = arg.substr(ipos1, ipos2-ipos1); int stat = write_sxml(filename, xmlname, arg); if ( stat != 0 ) { cerr << "XmlParser::parse: Unable to create temporary file at" << endl; cerr << " " << filename << endl; cerr << " write_sxml returned " << stat << endl; cerr << " xmlname" << endl; cerr << "-------------- XML --------------" << endl; cerr << arg << endl; cerr << "----------- END XML ------------" << endl; return 0; } // Otherwise copy the file. } else { ssystem("cp " + arg + " " + filename); if ( ! FileStatus(filename).exists() ) { cerr << "XmlParser::parse: " << endl; cerr << " Unable to copy from source" << endl; cerr << " " << arg << endl; cerr << " to temporary file at" << endl; cerr << " " << filename << endl; return 0; } } // Lock mutex before using Xerces parser. mutex().lock(); // Check file. FileStatus fstat(filename); if ( ! fstat.is_readable() ) { mutex().unlock(); return 0; } bool use_new_parser = false; XercesDOMParser& parser = *pparser(use_new_parser); int errcount = parser.getErrorCount(); try { parser.parse(filename.c_str()); } catch (const DOMException& dex) { int code = dex.code; string line = "-----------------------------------------------------"; cerr << line << endl; cerr << "XmlParser: DOM exception " << code << endl; cerr << dex.msg << endl; cerr << "Argument to XMLParser::parse(string):" << endl; cerr << arg << endl; cerr << line << endl; mutex().unlock(); return 0; } if ( parser.getErrorCount() != errcount ) { string line = "-----------------------------------------------------"; cerr << line << endl; cerr << "XmlParser: Unable to parse XML file " << filename << endl; cerr << "Argument to XMLParser::parse(string):" << endl; cerr << arg << endl; cerr << line << endl; mutex().unlock(); return 0; } DOMDocument* pdoc = parser.getDocument(); if ( pdoc == 0 ) { string line = "-----------------------------------------------------"; cerr << line << endl; cerr << "XmlParser: getDocument returned null" << endl; cerr << "Argument to XMLParser::parse(string):" << endl; cerr << arg << endl; cerr << line << endl; mutex().unlock(); return 0; } // Read children until we find the first element. // This is assumed to describe the dataset. DOMNode* pchild = pdoc->getFirstChild(); while ( pchild != 0 ) { if ( pchild->getNodeType() == DOMNode::ELEMENT_NODE ) break; pchild = pchild->getNextSibling(); } if ( pchild == 0 ) { string line = "-----------------------------------------------------"; cerr << line << endl; cerr << "XmlParser: getFirstChild returned null" << endl; cerr << "Argument to XMLParser::parse(string):" << endl; cerr << arg << endl; cerr << line << endl; mutex().unlock(); return 0; } // Construct the XML element from the name. const XmlElement* pele = xml_element(*pchild); if ( pele == 0 ) { string line = "-----------------------------------------------------"; cerr << line << endl; cerr << "XmlParser: xml_element returned null" << endl; cerr << "Argument to XMLParser::parse(string):" << endl; cerr << arg << endl; cerr << line << endl; mutex().unlock(); return 0; } // Release resources associated with the document. //pdoc->release(); // Don't do this: causes crash in cleanup parser.resetDocumentPool(); // Remove temp files. if ( tmpdir.size() ) { string dtd = tmpdir + "/dataset.dtd"; unlink(dtd.c_str()); dtd = tmpdir + "/dial.dtd"; unlink(dtd.c_str()); unlink(filename.c_str()); rmdir(tmpdir.c_str()); } mutex().unlock(); return pele; } //********************************************************************** // Write an element to a file. int XmlParser::write(string filename, const XmlElement& ele) { return write_sxml(filename, ele.name(), ele.to_xml_text()); } //**********************************************************************