Main Page | Namespace List | Class Hierarchy | Alphabetical List | Data Structures | Directories | File List | Namespace Members | Data Fields | Globals

parse.cpp

Go to the documentation of this file.
00001 // See ../../license.txt for license information.
00002 //
00003 // parse.cpp
00004 //
00005 // NOTES
00006 //              XML Parser for the persistence framework.
00007 //
00008 // 30-Jun-2003  phamilton  Created
00009 //
00010 
00011 #define PERSIST_IN_LIBRARY_SOURCE
00012 
00013 #include "parse.hpp"
00014 #include <iostream>
00015 #include <fstream>
00016 #include "expat-1.95.5/lib/expat.h"
00017 #include "boost/lexical_cast.hpp"
00018 #include "boost/format.hpp"
00019 
00020 using namespace ph::persist::xml;
00021 
00022 // format strings.
00023 const char XMLFmt_error[] = "file: %s %s at line %d";
00024 const char XMLFmt_expected[] = "expected %s=\"string\"";
00025 
00026 // common possible error messages.
00027 const char XMLErr_mismatched_end_tag[] = "mismatched end element tag.";
00028 
00029 #define PROGRESS_UNIT   512
00030 #define BUFFER_SIZE     1024
00031 
00032 bool parse::parse_xml(std::istream *stream, const std::string &streampath, parse *parser, parse_progress *progress)
00033 {
00034 
00035         if (progress)
00036         {
00037                 stream->seekg(0, std::ios_base::end);
00038                 long len = stream->tellg();
00039                 long count = len / PROGRESS_UNIT;
00040                 progress->total(count > 0 ? count : 1);
00041         }
00042 
00043         if (progress)
00044                 progress->progress(0);
00045 
00046         parser->startparse(streampath);
00047         int done = 0;
00048         long total = 0;
00049         bool parseresult = true;
00050         while (!done)
00051         {
00052                 char buf[BUFFER_SIZE];
00053                 stream->read(buf, sizeof(buf));
00054                 long len = stream->gcount();
00055                 done = len < (long)sizeof(buf);
00056                 int error = parser->doparse(buf, len, done);
00057                 if (error != PARSE_SUCCESS)
00058                 {
00059                         parseresult =  false;
00060                         done = 1;
00061                 }
00062                 total += len;
00063                 if (progress)
00064                 {
00065                         long p = total / PROGRESS_UNIT;
00066                         progress->progress(p);
00067                 }
00068         }
00069         parser->endparse();
00070 
00071         return parseresult;
00072 }
00073 
00074 void parse::startparse(const std::string &streamname)
00075 /*
00076         Called at the start of a parse. Set's to the expat
00077         data structures.
00078 */
00079 {
00080         assert(_parser == NULL);
00081         _parser = XML_ParserCreate(NULL);
00082         _filename = streamname; // for error messages.
00083         _error = PARSE_SUCCESS;
00084         
00085         XML_SetUserData(_parser, this);
00086         XML_SetElementHandler(_parser, sstartelement_handler, sendelement_handler);
00087         XML_SetCharacterDataHandler(_parser, scdata_handler);
00088         XML_SetCommentHandler(_parser, scomment_handler);
00089         XML_SetDefaultHandler(_parser, sdefault_handler);
00090 }
00091 
00092 int parse::doparse(char *buf, long len, int done)
00093 /*
00094         Called in the middle of a parse. Feed more XML into the
00095         parser.
00096 */
00097 {
00098         try
00099         {
00100                 if (XML_Parse(_parser, buf, len, done))
00101                         return PARSE_SUCCESS;
00102         }
00103         catch (...)
00104         {
00105                 // the only thrown exception is to end the parse.
00106                 _error = PARSE_BADXMLTYPE;
00107                 return _error;
00108         }
00109 
00110         // for some reason, the string returned is not actually UNICODE! So we convert it to unicode here.
00111 /* TBD
00112         CWStr m;
00113         m.Convert(string);
00114 
00115         XMLError(m.c_str());
00116 */
00117         return _error;
00118 }
00119 
00120 void parse::endparse()
00121 /*
00122         Called at the end of a parse, or to abort a parse.
00123 */
00124 {
00125         finish_handler();
00126         XML_ParserFree(_parser);
00127         _parser = NULL;
00128 }
00129 
00130 int parse::parsestream(std::istream *stream, const std::string &streamname)
00131 /*
00132         Wrapper function to parse a file of XML.
00133 */
00134 {
00135         startparse(streamname);
00136 
00137         int done = 0;
00138         while (!done)
00139         {
00140                 char buf[BUFFER_SIZE];
00141                 stream->read(buf, sizeof(buf));
00142                 long len = stream->gcount();
00143                 done = len < (long)sizeof(buf);
00144                 int error = doparse(buf, len, done);
00145                 if (error != PARSE_SUCCESS)
00146                         return _error;
00147         }
00148         endparse();
00149 
00150         return _error;
00151 }
00152 
00153 int parse::doparsefile(const std::string &filename)
00154 /*
00155         Parse a file given a filename of a file on disk.
00156 */
00157 {
00158         int result = PARSE_NOFILE;
00159         std::ifstream f(filename.c_str());
00160         if (f.is_open())
00161         {
00162                 result = parsestream(&f, filename);
00163                 f.close();
00164         }
00165         return result;
00166 }
00167 
00168 void parse::sstartelement_handler(void *userData, const XML_Char *name, const XML_Char **atts)
00169 {
00170         parse *me = reinterpret_cast<parse *>(userData);
00171         
00172         // push this element.
00173         me->_elementstack.push_back(name);
00174 
00175         xmlstring n(name);
00176         std::vector<xmlstring> a;
00177         if (atts)
00178                 for (int i=0; atts[i]; i++)
00179                         a.push_back(atts[i]);
00180         me->startelement_handler(n, a);
00181 }
00182 
00183 void parse::sendelement_handler(void *userData, const XML_Char *name)
00184 {
00185         parse *me = reinterpret_cast<parse *>(userData);
00186 
00187         me->endelement_handler(name);
00188 
00189         // pop the element.
00190         if (me->_elementstack.back() == name)
00191                 me->_elementstack.pop_back();
00192         else
00193                 me->error(XMLErr_mismatched_end_tag);
00194 }
00195 
00196 void parse::scdata_handler(void *userData, const XML_Char *s, int len)
00197 {
00198         parse *me = reinterpret_cast<parse *>(userData);
00199 
00200         // use the length in conversion.
00201         xmlstring ws(s, len);
00202 
00203         me->cdata_handler(ws, len);
00204 }
00205 
00206 void parse::scomment_handler(void *userData, const XML_Char *data)
00207 {
00208         parse *me = reinterpret_cast<parse *>(userData);
00209 
00210         // use the length in conversion.
00211         xmlstring ws(data);
00212 
00213         me->comment_handler(data);
00214 }
00215 
00216 void parse::sdefault_handler(void *userData,  const XML_Char *s,  int len)
00217 {
00218         parse *me = reinterpret_cast<parse *>(userData);
00219 
00220         // use the length in conversion.
00221         xmlstring ws(s, len);
00222 
00223         me->default_handler(ws, len);
00224 }
00225 
00226 xmlstring parse::attr(const std::vector<xmlstring> &attrs, int index)
00227 {
00228         if ((int)attrs.size() > (index * 2))
00229                 return attrs[index * 2];
00230         return S("");
00231 }
00232 
00233 xmlstring parse::attrval(const std::vector<xmlstring> &attrs, int index)
00234 {
00235         if ((int)attrs.size() > ((index * 2) + 1))
00236                 return attrs[(index * 2) + 1];
00237         return S("");
00238 }
00239 
00240 xmlstring parse::attr(const std::vector<xmlstring> &attrs, const xmlstring &token)
00241 {
00242         // these are processed in pairs. So a simple for() is best here.
00243         for (int i=0; i < (int)attrs.size(); i++)
00244         {
00245                 if (token == attrs[i])
00246                         return attrs[i+1];
00247                 i++;
00248         }
00249                 
00250         return S("");
00251 }
00252 
00253 xmlstring parse::expectedattr(const std::vector<xmlstring> &attrs, const xmlstring &token)
00254 {
00255         xmlstring a = attr(attrs, token);
00256         if (!a.empty())
00257                 return a;
00258 
00259         expected_error(token);
00260         return S("");
00261 }
00262 
00263 void parse::expected_error(const xmlstring &token)
00264 {
00265         error(boost::io::str(boost::format(XMLFmt_expected) % boost::lexical_cast<std::string>(token)));
00266 }
00267 
00268 void parse::error(const std::string &s, bool detail)
00269 /*
00270         Default error does a message box.
00271 */
00272 {
00273         if (!_silent)
00274         {
00275                 if (_errorhandler)
00276                 {
00277                         if (detail)
00278                         {
00279                                 // on debian, the direct version of this that uses a stream doesn't seem to work, so just cast to a string
00280                                 // for now.
00281                                 *_errorhandler << boost::io::str(boost::format(XMLFmt_error) % _filename % s % XML_GetCurrentLineNumber(_parser)) << std::endl;
00282                         }
00283                         else
00284                                 *_errorhandler << s << std::endl;
00285                 }
00286         }
00287 
00288         _error = PARSE_XMLERROR;
00289 }
00290 
00291 void parse::error(const std::string &format, const std::string &s1, bool detail)
00292 {
00293         error(boost::io::str(boost::format(format) % s1), detail);
00294 }
00295 
00296 void parse::error(const std::string &format, const std::string &s1, const std::string &s2, bool detail)
00297 {
00298         error(boost::io::str(boost::format(format) % s1 % s2), detail);
00299 }
00300 
00301 // we escape all data with 2 sets of this char...
00302 const char kEscapeChar = '\\';
00303 
00304 // and here are the things that we escape.
00305 static struct { char c; const char *s; } gXMLEncodingTable[] = 
00306 {
00307         { '<', S("lt") },
00308         { '>', S("gt") },
00309         { '&', S("amp") },
00310         { 0, 0 }
00311 };
00312 
00313 // some helper strings. The short header is used as a way of telling whether a particular
00314 // string contains XML or not.
00315 const xmlstring kXMLShortHeader = S("<?xml version=\"1.0\"");
00316 const xmlstring kXMLLongHeader = S("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>");
00317 
00318 bool parse::encodexmldata(const xmlstring &s, xmlstring *news)
00319 {
00320         *news = S("");
00321         for (xmlstring::const_iterator i = s.begin(); i != s.end(); i++)
00322         {
00323                 if (*i == kEscapeChar)
00324                 {
00325                         *news += kEscapeChar;
00326                         *news += kEscapeChar;
00327 
00328                 }
00329                 else
00330                 {
00331                         int j=0;
00332                         while (gXMLEncodingTable[j].c && gXMLEncodingTable[j].c != *i)
00333                                 j++;
00334                         if (gXMLEncodingTable[j].c)
00335                         {
00336                                 *news += kEscapeChar;
00337                                 *news += gXMLEncodingTable[j].s;
00338                                 *news += kEscapeChar;
00339                         }
00340                         else
00341                                 *news += *i;
00342                 }
00343         }
00344 
00345         return s.length() != news->length();
00346 }
00347 
00348 bool parse::decodexmldata(const xmlstring &s, xmlstring *news)
00349 {
00350         // if the string is actually a chunk of XML, then we don't decode (we are already
00351         // decoded).
00352         if (s.substr(0, kXMLShortHeader.length()) == kXMLShortHeader)
00353                 return false;
00354 
00355         bool escape = false;
00356         xmlstring escdata;
00357         *news = S("");
00358         for (xmlstring::const_iterator i = s.begin(); i != s.end(); i++)
00359         {
00360                 if (escape)
00361                 {
00362                         if (*i == kEscapeChar)
00363                         {
00364                                 if (escdata == S(""))
00365                                         *news += kEscapeChar;
00366                                 else
00367                                 {
00368                                         // finished escaping.
00369                                         int j=0;
00370                                         while (gXMLEncodingTable[j].c && gXMLEncodingTable[j].s != escdata)
00371                                                 j++;
00372                                         if (gXMLEncodingTable[j].c)
00373                                                 *news += gXMLEncodingTable[j].c;
00374                                         else
00375                                                 *news += escdata;
00376                                 }
00377                                 escape = false;
00378                         }
00379                         else
00380                                 escdata += *i;
00381                 }
00382                 else if (*i == kEscapeChar)
00383                 {
00384                         escdata = S("");
00385                         escape = true;
00386                 }
00387                 else
00388                         *news += *i;
00389         }
00390 
00391         if (escape)
00392                 *news += escdata;
00393 
00394         return s.length() != news->length();
00395 }
00396 

Generated on Wed Apr 5 22:03:25 2006 for cppxmlobj by  doxygen 1.4.3