00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #define PERSIST_IN_LIBRARY_SOURCE
00012
00013 #include "parse.hpp"
00014 #include <iostream>
00015 #include <fstream>
00016 #include "expat-1.95.5/lib/expat.h"
00017 #include "boost/lexical_cast.hpp"
00018 #include "boost/format.hpp"
00019
00020 using namespace ph::persist::xml;
00021
00022
00023 const char XMLFmt_error[] = "file: %s %s at line %d";
00024 const char XMLFmt_expected[] = "expected %s=\"string\"";
00025
00026
00027 const char XMLErr_mismatched_end_tag[] = "mismatched end element tag.";
00028
00029 #define PROGRESS_UNIT 512
00030 #define BUFFER_SIZE 1024
00031
00032 bool parse::parse_xml(std::istream *stream, const std::string &streampath, parse *parser, parse_progress *progress)
00033 {
00034
00035 if (progress)
00036 {
00037 stream->seekg(0, std::ios_base::end);
00038 long len = stream->tellg();
00039 long count = len / PROGRESS_UNIT;
00040 progress->total(count > 0 ? count : 1);
00041 }
00042
00043 if (progress)
00044 progress->progress(0);
00045
00046 parser->startparse(streampath);
00047 int done = 0;
00048 long total = 0;
00049 bool parseresult = true;
00050 while (!done)
00051 {
00052 char buf[BUFFER_SIZE];
00053 stream->read(buf, sizeof(buf));
00054 long len = stream->gcount();
00055 done = len < (long)sizeof(buf);
00056 int error = parser->doparse(buf, len, done);
00057 if (error != PARSE_SUCCESS)
00058 {
00059 parseresult = false;
00060 done = 1;
00061 }
00062 total += len;
00063 if (progress)
00064 {
00065 long p = total / PROGRESS_UNIT;
00066 progress->progress(p);
00067 }
00068 }
00069 parser->endparse();
00070
00071 return parseresult;
00072 }
00073
00074 void parse::startparse(const std::string &streamname)
00075
00076
00077
00078
00079 {
00080 assert(_parser == NULL);
00081 _parser = XML_ParserCreate(NULL);
00082 _filename = streamname;
00083 _error = PARSE_SUCCESS;
00084
00085 XML_SetUserData(_parser, this);
00086 XML_SetElementHandler(_parser, sstartelement_handler, sendelement_handler);
00087 XML_SetCharacterDataHandler(_parser, scdata_handler);
00088 XML_SetCommentHandler(_parser, scomment_handler);
00089 XML_SetDefaultHandler(_parser, sdefault_handler);
00090 }
00091
00092 int parse::doparse(char *buf, long len, int done)
00093
00094
00095
00096
00097 {
00098 try
00099 {
00100 if (XML_Parse(_parser, buf, len, done))
00101 return PARSE_SUCCESS;
00102 }
00103 catch (...)
00104 {
00105
00106 _error = PARSE_BADXMLTYPE;
00107 return _error;
00108 }
00109
00110
00111
00112
00113
00114
00115
00116
00117 return _error;
00118 }
00119
00120 void parse::endparse()
00121
00122
00123
00124 {
00125 finish_handler();
00126 XML_ParserFree(_parser);
00127 _parser = NULL;
00128 }
00129
00130 int parse::parsestream(std::istream *stream, const std::string &streamname)
00131
00132
00133
00134 {
00135 startparse(streamname);
00136
00137 int done = 0;
00138 while (!done)
00139 {
00140 char buf[BUFFER_SIZE];
00141 stream->read(buf, sizeof(buf));
00142 long len = stream->gcount();
00143 done = len < (long)sizeof(buf);
00144 int error = doparse(buf, len, done);
00145 if (error != PARSE_SUCCESS)
00146 return _error;
00147 }
00148 endparse();
00149
00150 return _error;
00151 }
00152
00153 int parse::doparsefile(const std::string &filename)
00154
00155
00156
00157 {
00158 int result = PARSE_NOFILE;
00159 std::ifstream f(filename.c_str());
00160 if (f.is_open())
00161 {
00162 result = parsestream(&f, filename);
00163 f.close();
00164 }
00165 return result;
00166 }
00167
00168 void parse::sstartelement_handler(void *userData, const XML_Char *name, const XML_Char **atts)
00169 {
00170 parse *me = reinterpret_cast<parse *>(userData);
00171
00172
00173 me->_elementstack.push_back(name);
00174
00175 xmlstring n(name);
00176 std::vector<xmlstring> a;
00177 if (atts)
00178 for (int i=0; atts[i]; i++)
00179 a.push_back(atts[i]);
00180 me->startelement_handler(n, a);
00181 }
00182
00183 void parse::sendelement_handler(void *userData, const XML_Char *name)
00184 {
00185 parse *me = reinterpret_cast<parse *>(userData);
00186
00187 me->endelement_handler(name);
00188
00189
00190 if (me->_elementstack.back() == name)
00191 me->_elementstack.pop_back();
00192 else
00193 me->error(XMLErr_mismatched_end_tag);
00194 }
00195
00196 void parse::scdata_handler(void *userData, const XML_Char *s, int len)
00197 {
00198 parse *me = reinterpret_cast<parse *>(userData);
00199
00200
00201 xmlstring ws(s, len);
00202
00203 me->cdata_handler(ws, len);
00204 }
00205
00206 void parse::scomment_handler(void *userData, const XML_Char *data)
00207 {
00208 parse *me = reinterpret_cast<parse *>(userData);
00209
00210
00211 xmlstring ws(data);
00212
00213 me->comment_handler(data);
00214 }
00215
00216 void parse::sdefault_handler(void *userData, const XML_Char *s, int len)
00217 {
00218 parse *me = reinterpret_cast<parse *>(userData);
00219
00220
00221 xmlstring ws(s, len);
00222
00223 me->default_handler(ws, len);
00224 }
00225
00226 xmlstring parse::attr(const std::vector<xmlstring> &attrs, int index)
00227 {
00228 if ((int)attrs.size() > (index * 2))
00229 return attrs[index * 2];
00230 return S("");
00231 }
00232
00233 xmlstring parse::attrval(const std::vector<xmlstring> &attrs, int index)
00234 {
00235 if ((int)attrs.size() > ((index * 2) + 1))
00236 return attrs[(index * 2) + 1];
00237 return S("");
00238 }
00239
00240 xmlstring parse::attr(const std::vector<xmlstring> &attrs, const xmlstring &token)
00241 {
00242
00243 for (int i=0; i < (int)attrs.size(); i++)
00244 {
00245 if (token == attrs[i])
00246 return attrs[i+1];
00247 i++;
00248 }
00249
00250 return S("");
00251 }
00252
00253 xmlstring parse::expectedattr(const std::vector<xmlstring> &attrs, const xmlstring &token)
00254 {
00255 xmlstring a = attr(attrs, token);
00256 if (!a.empty())
00257 return a;
00258
00259 expected_error(token);
00260 return S("");
00261 }
00262
00263 void parse::expected_error(const xmlstring &token)
00264 {
00265 error(boost::io::str(boost::format(XMLFmt_expected) % boost::lexical_cast<std::string>(token)));
00266 }
00267
00268 void parse::error(const std::string &s, bool detail)
00269
00270
00271
00272 {
00273 if (!_silent)
00274 {
00275 if (_errorhandler)
00276 {
00277 if (detail)
00278 {
00279
00280
00281 *_errorhandler << boost::io::str(boost::format(XMLFmt_error) % _filename % s % XML_GetCurrentLineNumber(_parser)) << std::endl;
00282 }
00283 else
00284 *_errorhandler << s << std::endl;
00285 }
00286 }
00287
00288 _error = PARSE_XMLERROR;
00289 }
00290
00291 void parse::error(const std::string &format, const std::string &s1, bool detail)
00292 {
00293 error(boost::io::str(boost::format(format) % s1), detail);
00294 }
00295
00296 void parse::error(const std::string &format, const std::string &s1, const std::string &s2, bool detail)
00297 {
00298 error(boost::io::str(boost::format(format) % s1 % s2), detail);
00299 }
00300
00301
00302 const char kEscapeChar = '\\';
00303
00304
00305 static struct { char c; const char *s; } gXMLEncodingTable[] =
00306 {
00307 { '<', S("lt") },
00308 { '>', S("gt") },
00309 { '&', S("amp") },
00310 { 0, 0 }
00311 };
00312
00313
00314
00315 const xmlstring kXMLShortHeader = S("<?xml version=\"1.0\"");
00316 const xmlstring kXMLLongHeader = S("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>");
00317
00318 bool parse::encodexmldata(const xmlstring &s, xmlstring *news)
00319 {
00320 *news = S("");
00321 for (xmlstring::const_iterator i = s.begin(); i != s.end(); i++)
00322 {
00323 if (*i == kEscapeChar)
00324 {
00325 *news += kEscapeChar;
00326 *news += kEscapeChar;
00327
00328 }
00329 else
00330 {
00331 int j=0;
00332 while (gXMLEncodingTable[j].c && gXMLEncodingTable[j].c != *i)
00333 j++;
00334 if (gXMLEncodingTable[j].c)
00335 {
00336 *news += kEscapeChar;
00337 *news += gXMLEncodingTable[j].s;
00338 *news += kEscapeChar;
00339 }
00340 else
00341 *news += *i;
00342 }
00343 }
00344
00345 return s.length() != news->length();
00346 }
00347
00348 bool parse::decodexmldata(const xmlstring &s, xmlstring *news)
00349 {
00350
00351
00352 if (s.substr(0, kXMLShortHeader.length()) == kXMLShortHeader)
00353 return false;
00354
00355 bool escape = false;
00356 xmlstring escdata;
00357 *news = S("");
00358 for (xmlstring::const_iterator i = s.begin(); i != s.end(); i++)
00359 {
00360 if (escape)
00361 {
00362 if (*i == kEscapeChar)
00363 {
00364 if (escdata == S(""))
00365 *news += kEscapeChar;
00366 else
00367 {
00368
00369 int j=0;
00370 while (gXMLEncodingTable[j].c && gXMLEncodingTable[j].s != escdata)
00371 j++;
00372 if (gXMLEncodingTable[j].c)
00373 *news += gXMLEncodingTable[j].c;
00374 else
00375 *news += escdata;
00376 }
00377 escape = false;
00378 }
00379 else
00380 escdata += *i;
00381 }
00382 else if (*i == kEscapeChar)
00383 {
00384 escdata = S("");
00385 escape = true;
00386 }
00387 else
00388 *news += *i;
00389 }
00390
00391 if (escape)
00392 *news += escdata;
00393
00394 return s.length() != news->length();
00395 }
00396