GDBMS 1.0
|
00001 00052 #ifndef _CRT_SECURE_NO_DEPRECATE 00053 #define _CRT_SECURE_NO_DEPRECATE 00054 #endif 00055 #include "xmlParser.h" 00056 #ifdef _XMLWINDOWS 00057 //#ifdef _DEBUG 00058 //#define _CRTDBG_MAP_ALLOC 00059 //#include <crtdbg.h> 00060 //#endif 00061 #define WIN32_LEAN_AND_MEAN 00062 #include <Windows.h> // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files 00063 // to have "MessageBoxA" to display error messages for openFilHelper 00064 #endif 00065 00066 #include <memory.h> 00067 #include <assert.h> 00068 #include <stdio.h> 00069 #include <string.h> 00070 #include <stdlib.h> 00071 00072 XMLCSTR XMLNode::getVersion() { return _CXML("v2.42"); } 00073 void freeXMLString(XMLSTR t){if(t)free(t);} 00074 00075 static XMLNode::XMLCharEncoding characterEncoding=XMLNode::char_encoding_UTF8; 00076 static char guessWideCharChars=1, dropWhiteSpace=1, removeCommentsInMiddleOfText=1; 00077 00078 inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; } 00079 00080 // You can modify the initialization of the variable "XMLClearTags" below 00081 // to change the clearTags that are currently recognized by the library. 00082 // The number on the second columns is the length of the string inside the 00083 // first column. 00084 // The "<!DOCTYPE" declaration must be the second in the list. 00085 // The "<!--" declaration must be the third in the list. 00086 // All ClearTag Strings must start with the '<' character. 00087 typedef struct { XMLCSTR lpszOpen; int openTagLen; XMLCSTR lpszClose;} ALLXMLClearTag; 00088 static ALLXMLClearTag XMLClearTags[] = 00089 { 00090 { _CXML("<![CDATA["),9, _CXML("]]>") }, 00091 { _CXML("<!DOCTYPE"),9, _CXML(">") }, 00092 { _CXML("<!--") ,4, _CXML("-->") }, 00093 { _CXML("<PRE>") ,5, _CXML("</PRE>") }, 00094 // { _CXML("<Script>") ,8, _CXML("</Script>")}, 00095 { NULL ,0, NULL } 00096 }; 00097 00098 // You can modify the initialization of the variable "XMLEntities" below 00099 // to change the character entities that are currently recognized by the library. 00100 // The number on the second columns is the length of the string inside the 00101 // first column. Additionally, the syntaxes " " and " " are recognized. 00102 typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity; 00103 static XMLCharacterEntity XMLEntities[] = 00104 { 00105 { _CXML("&" ), 5, _CXML('&' )}, 00106 { _CXML("<" ), 4, _CXML('<' )}, 00107 { _CXML(">" ), 4, _CXML('>' )}, 00108 { _CXML("""), 6, _CXML('\"')}, 00109 { _CXML("'"), 6, _CXML('\'')}, 00110 { NULL , 0, '\0' } 00111 }; 00112 00113 // When rendering the XMLNode to a string (using the "createXMLString" function), 00114 // you can ask for a beautiful formatting. This formatting is using the 00115 // following indentation character: 00116 #define INDENTCHAR _CXML('\t') 00117 00118 // The following function parses the XML errors into a user friendly string. 00119 // You can edit this to change the output language of the library to something else. 00120 XMLCSTR XMLNode::getError(XMLError xerror) 00121 { 00122 switch (xerror) 00123 { 00124 case eXMLErrorNone: return _CXML("No error"); 00125 case eXMLErrorMissingEndTag: return _CXML("Warning: Unmatched end tag"); 00126 case eXMLErrorNoXMLTagFound: return _CXML("Warning: No XML tag found"); 00127 case eXMLErrorEmpty: return _CXML("Error: No XML data"); 00128 case eXMLErrorMissingTagName: return _CXML("Error: Missing start tag name"); 00129 case eXMLErrorMissingEndTagName: return _CXML("Error: Missing end tag name"); 00130 case eXMLErrorUnmatchedEndTag: return _CXML("Error: Unmatched end tag"); 00131 case eXMLErrorUnmatchedEndClearTag: return _CXML("Error: Unmatched clear tag end"); 00132 case eXMLErrorUnexpectedToken: return _CXML("Error: Unexpected token found"); 00133 case eXMLErrorNoElements: return _CXML("Error: No elements found"); 00134 case eXMLErrorFileNotFound: return _CXML("Error: File not found"); 00135 case eXMLErrorFirstTagNotFound: return _CXML("Error: First Tag not found"); 00136 case eXMLErrorUnknownCharacterEntity:return _CXML("Error: Unknown character entity"); 00137 case eXMLErrorCharacterCodeAbove255: return _CXML("Error: Character code above 255 is forbidden in MultiByte char mode."); 00138 case eXMLErrorCharConversionError: return _CXML("Error: unable to convert between WideChar and MultiByte chars"); 00139 case eXMLErrorCannotOpenWriteFile: return _CXML("Error: unable to open file for writing"); 00140 case eXMLErrorCannotWriteFile: return _CXML("Error: cannot write into file"); 00141 00142 case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _CXML("Warning: Base64-string length is not a multiple of 4"); 00143 case eXMLErrorBase64DecodeTruncatedData: return _CXML("Warning: Base64-string is truncated"); 00144 case eXMLErrorBase64DecodeIllegalCharacter: return _CXML("Error: Base64-string contains an illegal character"); 00145 case eXMLErrorBase64DecodeBufferTooSmall: return _CXML("Error: Base64 decode output buffer is too small"); 00146 }; 00147 return _CXML("Unknown"); 00148 } 00149 00151 // Here start the abstraction layer to be OS-independent // 00153 00154 // Here is an abstraction layer to access some common string manipulation functions. 00155 // The abstraction layer is currently working for gcc, Microsoft Visual Studio 6.0, 00156 // Microsoft Visual Studio .NET, CC (sun compiler) and Borland C++. 00157 // If you plan to "port" the library to a new system/compiler, all you have to do is 00158 // to edit the following lines. 00159 #ifdef XML_NO_WIDE_CHAR 00160 char myIsTextWideChar(const void *b, int len) { return FALSE; } 00161 #else 00162 #if defined (UNDER_CE) || !defined(_XMLWINDOWS) 00163 char myIsTextWideChar(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode 00164 { 00165 #ifdef sun 00166 // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer. 00167 if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE; 00168 #endif 00169 const wchar_t *s=(const wchar_t*)b; 00170 00171 // buffer too small: 00172 if (len<(int)sizeof(wchar_t)) return FALSE; 00173 00174 // odd length test 00175 if (len&1) return FALSE; 00176 00177 /* only checks the first 256 characters */ 00178 len=mmin(256,len/sizeof(wchar_t)); 00179 00180 // Check for the special byte order: 00181 if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE; 00182 if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE 00183 00184 // checks for ASCII characters in the UNICODE stream 00185 int i,stats=0; 00186 for (i=0; i<len; i++) if (s[i]<=(unsigned short)255) stats++; 00187 if (stats>len/2) return TRUE; 00188 00189 // Check for UNICODE NULL chars 00190 for (i=0; i<len; i++) if (!s[i]) return TRUE; 00191 00192 return FALSE; 00193 } 00194 #else 00195 char myIsTextWideChar(const void *b,int l) { return (char)IsTextUnicode((CONST LPVOID)b,l,NULL); } 00196 #endif 00197 #endif 00198 00199 #ifdef _XMLWINDOWS 00200 // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0 00201 #ifdef _XMLWIDECHAR 00202 wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce) 00203 { 00204 int i; 00205 if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,NULL,0); 00206 else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,NULL,0); 00207 if (i<0) return NULL; 00208 wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(XMLCHAR)); 00209 if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,d,i); 00210 else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,d,i); 00211 d[i]=0; 00212 return d; 00213 } 00214 static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return _wfopen(filename,mode); } 00215 static inline int xstrlen(XMLCSTR c) { return (int)wcslen(c); } 00216 static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _wcsnicmp(c1,c2,l);} 00217 static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);} 00218 static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _wcsicmp(c1,c2); } 00219 static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); } 00220 static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); } 00221 #else 00222 char *myWideCharToMultiByte(const wchar_t *s) 00223 { 00224 UINT codePage=CP_ACP; if (characterEncoding==XMLNode::char_encoding_UTF8) codePage=CP_UTF8; 00225 int i=(int)WideCharToMultiByte(codePage, // code page 00226 0, // performance and mapping flags 00227 s, // wide-character string 00228 -1, // number of chars in string 00229 NULL, // buffer for new string 00230 0, // size of buffer 00231 NULL, // default for unmappable chars 00232 NULL // set when default char used 00233 ); 00234 if (i<0) return NULL; 00235 char *d=(char*)malloc(i+1); 00236 WideCharToMultiByte(codePage, // code page 00237 0, // performance and mapping flags 00238 s, // wide-character string 00239 -1, // number of chars in string 00240 d, // buffer for new string 00241 i, // size of buffer 00242 NULL, // default for unmappable chars 00243 NULL // set when default char used 00244 ); 00245 d[i]=0; 00246 return d; 00247 } 00248 static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); } 00249 static inline int xstrlen(XMLCSTR c) { return (int)strlen(c); } 00250 #ifdef __BORLANDC__ 00251 static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strnicmp(c1,c2,l);} 00252 static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return stricmp(c1,c2); } 00253 #else 00254 static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _strnicmp(c1,c2,l);} 00255 static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _stricmp(c1,c2); } 00256 #endif 00257 static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);} 00258 static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); } 00259 static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); } 00260 #endif 00261 #else 00262 // for gcc and CC 00263 #ifdef XML_NO_WIDE_CHAR 00264 char *myWideCharToMultiByte(const wchar_t *s) { return NULL; } 00265 #else 00266 char *myWideCharToMultiByte(const wchar_t *s) 00267 { 00268 const wchar_t *ss=s; 00269 int i=(int)wcsrtombs(NULL,&ss,0,NULL); 00270 if (i<0) return NULL; 00271 char *d=(char *)malloc(i+1); 00272 wcsrtombs(d,&s,i,NULL); 00273 d[i]=0; 00274 return d; 00275 } 00276 #endif 00277 #ifdef _XMLWIDECHAR 00278 wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce) 00279 { 00280 const char *ss=s; 00281 int i=(int)mbsrtowcs(NULL,&ss,0,NULL); 00282 if (i<0) return NULL; 00283 wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(wchar_t)); 00284 mbsrtowcs(d,&s,i,NULL); 00285 d[i]=0; 00286 return d; 00287 } 00288 int xstrlen(XMLCSTR c) { return wcslen(c); } 00289 #ifdef sun 00290 // for CC 00291 #include <widec.h> 00292 static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);} 00293 static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncmp(c1,c2,l);} 00294 static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); } 00295 #else 00296 static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);} 00297 #ifdef __linux__ 00298 // for gcc/linux 00299 static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);} 00300 static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); } 00301 #else 00302 #include <wctype.h> 00303 // for gcc/non-linux (MacOS X 10.3, FreeBSD 6.0, NetBSD 3.0, OpenBSD 3.8, AIX 4.3.2, HP-UX 11, IRIX 6.5, OSF/1 5.1, Cygwin, mingw) 00304 static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) 00305 { 00306 wchar_t left,right; 00307 do 00308 { 00309 left=towlower(*c1++); right=towlower(*c2++); 00310 } while (left&&(left==right)); 00311 return (int)left-(int)right; 00312 } 00313 static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) 00314 { 00315 wchar_t left,right; 00316 while(l--) 00317 { 00318 left=towlower(*c1++); right=towlower(*c2++); 00319 if ((!left)||(left!=right)) return (int)left-(int)right; 00320 } 00321 return 0; 00322 } 00323 #endif 00324 #endif 00325 static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); } 00326 static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); } 00327 static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) 00328 { 00329 char *filenameAscii=myWideCharToMultiByte(filename); 00330 FILE *f; 00331 if (mode[0]==_CXML('r')) f=fopen(filenameAscii,"rb"); 00332 else f=fopen(filenameAscii,"wb"); 00333 free(filenameAscii); 00334 return f; 00335 } 00336 #else 00337 static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); } 00338 static inline int xstrlen(XMLCSTR c) { return strlen(c); } 00339 static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncasecmp(c1,c2,l);} 00340 static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);} 00341 static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return strcasecmp(c1,c2); } 00342 static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); } 00343 static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); } 00344 #endif 00345 static inline int _strnicmp(const char *c1,const char *c2, int l) { return strncasecmp(c1,c2,l);} 00346 #endif 00347 00348 00350 // the "xmltoc,xmltob,xmltoi,xmltol,xmltof,xmltoa" functions // 00352 // These 6 functions are not used inside the XMLparser. 00353 // There are only here as "convenience" functions for the user. 00354 // If you don't need them, you can delete them without any trouble. 00355 #ifdef _XMLWIDECHAR 00356 #ifdef _XMLWINDOWS 00357 // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0 00358 char xmltob(XMLCSTR t,char v){ if (t&&(*t)) return (char)_wtoi(t); return v; } 00359 int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return _wtoi(t); return v; } 00360 long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return _wtol(t); return v; } 00361 double xmltof(XMLCSTR t,double v){ if (t&&(*t)) swscanf(t, L"%lf", &v); /*v=_wtof(t);*/ return v; } 00362 #else 00363 #ifdef sun 00364 // for CC 00365 #include <widec.h> 00366 char xmltob(XMLCSTR t,char v){ if (t) return (char)wstol(t,NULL,10); return v; } 00367 int xmltoi(XMLCSTR t,int v){ if (t) return (int)wstol(t,NULL,10); return v; } 00368 long xmltol(XMLCSTR t,long v){ if (t) return wstol(t,NULL,10); return v; } 00369 #else 00370 // for gcc 00371 char xmltob(XMLCSTR t,char v){ if (t) return (char)wcstol(t,NULL,10); return v; } 00372 int xmltoi(XMLCSTR t,int v){ if (t) return (int)wcstol(t,NULL,10); return v; } 00373 long xmltol(XMLCSTR t,long v){ if (t) return wcstol(t,NULL,10); return v; } 00374 #endif 00375 double xmltof(XMLCSTR t,double v){ if (t&&(*t)) swscanf(t, L"%lf", &v); /*v=_wtof(t);*/ return v; } 00376 #endif 00377 #else 00378 char xmltob(XMLCSTR t,char v){ if (t&&(*t)) return (char)atoi(t); return v; } 00379 int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return atoi(t); return v; } 00380 long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return atol(t); return v; } 00381 double xmltof(XMLCSTR t,double v){ if (t&&(*t)) return atof(t); return v; } 00382 #endif 00383 XMLCSTR xmltoa(XMLCSTR t, XMLCSTR v){ if (t) return t; return v; } 00384 XMLCHAR xmltoc(XMLCSTR t,const XMLCHAR v){ if (t&&(*t)) return *t; return v; } 00385 00387 // the "openFileHelper" function // 00389 00390 // Since each application has its own way to report and deal with errors, you should modify & rewrite 00391 // the following "openFileHelper" function to get an "error reporting mechanism" tailored to your needs. 00392 XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) 00393 { 00394 // guess the value of the global parameter "characterEncoding" 00395 // (the guess is based on the first 200 bytes of the file). 00396 FILE *f=xfopen(filename,_CXML("rb")); 00397 if (f) 00398 { 00399 char bb[205]; 00400 int l=(int)fread(bb,1,200,f); 00401 setGlobalOptions(guessCharEncoding(bb,l),guessWideCharChars,dropWhiteSpace,removeCommentsInMiddleOfText); 00402 fclose(f); 00403 } 00404 00405 // parse the file 00406 XMLResults pResults; 00407 XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults); 00408 00409 // display error message (if any) 00410 if (pResults.error != eXMLErrorNone) 00411 { 00412 // create message 00413 char message[2000],*s1=(char*)"",*s3=(char*)""; XMLCSTR s2=_CXML(""); 00414 if (pResults.error==eXMLErrorFirstTagNotFound) { s1=(char*)"First Tag should be '"; s2=tag; s3=(char*)"'.\n"; } 00415 sprintf(message, 00416 #ifdef _XMLWIDECHAR 00417 "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s" 00418 #else 00419 "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s" 00420 #endif 00421 ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3); 00422 00423 // display message 00424 #if defined(_XMLWINDOWS) && !defined(UNDER_CE) && !defined(_XMLPARSER_NO_MESSAGEBOX_) 00425 MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST); 00426 #else 00427 printf("%s",message); 00428 #endif 00429 exit(255); 00430 } 00431 return xnode; 00432 } 00433 00435 // Here start the core implementation of the XMLParser library // 00437 00438 // You should normally not change anything below this point. 00439 00440 #ifndef _XMLWIDECHAR 00441 // If "characterEncoding=ascii" then we assume that all characters have the same length of 1 byte. 00442 // If "characterEncoding=UTF8" then the characters have different lengths (from 1 byte to 4 bytes). 00443 // If "characterEncoding=ShiftJIS" then the characters have different lengths (from 1 byte to 2 bytes). 00444 // This table is used as lookup-table to know the length of a character (in byte) based on the 00445 // content of the first byte of the character. 00446 // (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ). 00447 static const char XML_utf8ByteTable[256] = 00448 { 00449 // 0 1 2 3 4 5 6 7 8 9 a b c d e f 00450 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 00451 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 00452 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 00453 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 00454 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 00455 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 00456 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 00457 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 End of ASCII range 00458 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid 00459 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90 00460 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0 00461 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0 00462 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte 00463 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 00464 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte 00465 4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid 00466 }; 00467 static const char XML_legacyByteTable[256] = 00468 { 00469 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 00470 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 00471 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 00472 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 00473 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 00474 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 00475 }; 00476 static const char XML_sjisByteTable[256] = 00477 { 00478 // 0 1 2 3 4 5 6 7 8 9 a b c d e f 00479 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 00480 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 00481 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 00482 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 00483 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 00484 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 00485 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 00486 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 00487 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0x9F 2 bytes 00488 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90 00489 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0 00490 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0 00491 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xc0 00492 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xd0 00493 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 0xe0 to 0xef 2 bytes 00494 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 00495 }; 00496 static const char XML_gb2312ByteTable[256] = 00497 { 00498 // 0 1 2 3 4 5 6 7 8 9 a b c d e f 00499 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 00500 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 00501 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 00502 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 00503 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 00504 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 00505 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 00506 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 00507 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 00508 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90 00509 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 0xa1 to 0xf7 2 bytes 00510 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0 00511 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 00512 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 00513 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 00514 2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1 // 0xf0 00515 }; 00516 static const char XML_gbk_big5_ByteTable[256] = 00517 { 00518 // 0 1 2 3 4 5 6 7 8 9 a b c d e f 00519 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 00520 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 00521 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 00522 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 00523 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 00524 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 00525 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 00526 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 00527 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0xfe 2 bytes 00528 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90 00529 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 00530 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0 00531 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 00532 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 00533 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 00534 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1 // 0xf0 00535 }; 00536 static const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "characterEncoding=XMLNode::encoding_UTF8" 00537 #endif 00538 00539 00540 XMLNode XMLNode::emptyXMLNode; 00541 XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL}; 00542 XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL}; 00543 00544 // Enumeration used to decipher what type a token is 00545 typedef enum XMLTokenTypeTag 00546 { 00547 eTokenText = 0, 00548 eTokenQuotedText, 00549 eTokenTagStart, /* "<" */ 00550 eTokenTagEnd, /* "</" */ 00551 eTokenCloseTag, /* ">" */ 00552 eTokenEquals, /* "=" */ 00553 eTokenDeclaration, /* "<?" */ 00554 eTokenShortHandClose, /* "/>" */ 00555 eTokenClear, 00556 eTokenError 00557 } XMLTokenType; 00558 00559 // Main structure used for parsing XML 00560 typedef struct XML 00561 { 00562 XMLCSTR lpXML; 00563 XMLCSTR lpszText; 00564 int nIndex,nIndexMissigEndTag; 00565 enum XMLError error; 00566 XMLCSTR lpEndTag; 00567 int cbEndTag; 00568 XMLCSTR lpNewElement; 00569 int cbNewElement; 00570 int nFirst; 00571 } XML; 00572 00573 typedef struct 00574 { 00575 ALLXMLClearTag *pClr; 00576 XMLCSTR pStr; 00577 } NextToken; 00578 00579 // Enumeration used when parsing attributes 00580 typedef enum Attrib 00581 { 00582 eAttribName = 0, 00583 eAttribEquals, 00584 eAttribValue 00585 } Attrib; 00586 00587 // Enumeration used when parsing elements to dictate whether we are currently 00588 // inside a tag 00589 typedef enum XMLStatus 00590 { 00591 eInsideTag = 0, 00592 eOutsideTag 00593 } XMLStatus; 00594 00595 XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const 00596 { 00597 if (!d) return eXMLErrorNone; 00598 FILE *f=xfopen(filename,_CXML("wb")); 00599 if (!f) return eXMLErrorCannotOpenWriteFile; 00600 #ifdef _XMLWIDECHAR 00601 unsigned char h[2]={ 0xFF, 0xFE }; 00602 if (!fwrite(h,2,1,f)) 00603 { 00604 fclose(f); 00605 return eXMLErrorCannotWriteFile; 00606 } 00607 if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration()))) 00608 { 00609 if (!fwrite(L"<?xml version=\"1.0\" encoding=\"utf-16\"?>\n",sizeof(wchar_t)*40,1,f)) 00610 { 00611 fclose(f); 00612 return eXMLErrorCannotWriteFile; 00613 } 00614 } 00615 #else 00616 if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration()))) 00617 { 00618 if (characterEncoding==char_encoding_UTF8) 00619 { 00620 // header so that windows recognize the file as UTF-8: 00621 unsigned char h[3]={0xEF,0xBB,0xBF}; 00622 if (!fwrite(h,3,1,f)) 00623 { 00624 fclose(f); 00625 return eXMLErrorCannotWriteFile; 00626 } 00627 encoding="utf-8"; 00628 } else if (characterEncoding==char_encoding_ShiftJIS) encoding="SHIFT-JIS"; 00629 00630 if (!encoding) encoding="ISO-8859-1"; 00631 if (fprintf(f,"<?xml version=\"1.0\" encoding=\"%s\"?>\n",encoding)<0) 00632 { 00633 fclose(f); 00634 return eXMLErrorCannotWriteFile; 00635 } 00636 } else 00637 { 00638 if (characterEncoding==char_encoding_UTF8) 00639 { 00640 unsigned char h[3]={0xEF,0xBB,0xBF}; 00641 if (!fwrite(h,3,1,f)) 00642 { 00643 fclose(f); 00644 return eXMLErrorCannotWriteFile; 00645 } 00646 } 00647 } 00648 #endif 00649 int i; 00650 XMLSTR t=createXMLString(nFormat,&i); 00651 if (!fwrite(t,sizeof(XMLCHAR)*i,1,f)) 00652 { 00653 free(t); 00654 fclose(f); 00655 return eXMLErrorCannotWriteFile; 00656 } 00657 if (fclose(f)!=0) 00658 { 00659 free(t); 00660 return eXMLErrorCannotWriteFile; 00661 } 00662 free(t); 00663 return eXMLErrorNone; 00664 } 00665 00666 // Duplicate a given string. 00667 XMLSTR stringDup(XMLCSTR lpszData, int cbData) 00668 { 00669 if (lpszData==NULL) return NULL; 00670 00671 XMLSTR lpszNew; 00672 if (cbData==-1) cbData=(int)xstrlen(lpszData); 00673 lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR)); 00674 if (lpszNew) 00675 { 00676 memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR)); 00677 lpszNew[cbData] = (XMLCHAR)NULL; 00678 } 00679 return lpszNew; 00680 } 00681 00682 XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest,XMLCSTR source) 00683 { 00684 XMLSTR dd=dest; 00685 XMLCHAR ch; 00686 XMLCharacterEntity *entity; 00687 while ((ch=*source)) 00688 { 00689 entity=XMLEntities; 00690 do 00691 { 00692 if (ch==entity->c) {xstrcpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; } 00693 entity++; 00694 } while(entity->s); 00695 #ifdef _XMLWIDECHAR 00696 *(dest++)=*(source++); 00697 #else 00698 switch(XML_ByteTable[(unsigned char)ch]) 00699 { 00700 case 4: *(dest++)=*(source++); 00701 case 3: *(dest++)=*(source++); 00702 case 2: *(dest++)=*(source++); 00703 case 1: *(dest++)=*(source++); 00704 } 00705 #endif 00706 out_of_loop1: 00707 ; 00708 } 00709 *dest=0; 00710 return dd; 00711 } 00712 00713 // private (used while rendering): 00714 int ToXMLStringTool::lengthXMLString(XMLCSTR source) 00715 { 00716 int r=0; 00717 XMLCharacterEntity *entity; 00718 XMLCHAR ch; 00719 while ((ch=*source)) 00720 { 00721 entity=XMLEntities; 00722 do 00723 { 00724 if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; } 00725 entity++; 00726 } while(entity->s); 00727 #ifdef _XMLWIDECHAR 00728 r++; source++; 00729 #else 00730 ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch; 00731 #endif 00732 out_of_loop1: 00733 ; 00734 } 00735 return r; 00736 } 00737 00738 ToXMLStringTool::~ToXMLStringTool(){ freeBuffer(); } 00739 void ToXMLStringTool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; } 00740 XMLSTR ToXMLStringTool::toXML(XMLCSTR source) 00741 { 00742 if (!source) 00743 { 00744 if (buflen<1) { buflen=1; buf=(XMLSTR)malloc(sizeof(XMLCHAR)); } 00745 *buf=0; 00746 return buf; 00747 } 00748 int l=lengthXMLString(source)+1; 00749 if (l>buflen) { freeBuffer(); buflen=l; buf=(XMLSTR)malloc(l*sizeof(XMLCHAR)); } 00750 return toXMLUnSafe(buf,source); 00751 } 00752 00753 // private: 00754 XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML) 00755 { 00756 // This function is the opposite of the function "toXMLString". It decodes the escape 00757 // sequences &, ", ', <, > and replace them by the characters 00758 // &,",',<,>. This function is used internally by the XML Parser. All the calls to 00759 // the XML library will always gives you back "decoded" strings. 00760 // 00761 // in: string (s) and length (lo) of string 00762 // out: new allocated string converted from xml 00763 if (!s) return NULL; 00764 00765 int ll=0,j; 00766 XMLSTR d; 00767 XMLCSTR ss=s; 00768 XMLCharacterEntity *entity; 00769 while ((lo>0)&&(*s)) 00770 { 00771 if (*s==_CXML('&')) 00772 { 00773 if ((lo>2)&&(s[1]==_CXML('#'))) 00774 { 00775 s+=2; lo-=2; 00776 if ((*s==_CXML('X'))||(*s==_CXML('x'))) { s++; lo--; } 00777 while ((*s)&&(*s!=_CXML(';'))&&((lo--)>0)) s++; 00778 if (*s!=_CXML(';')) 00779 { 00780 pXML->error=eXMLErrorUnknownCharacterEntity; 00781 return NULL; 00782 } 00783 s++; lo--; 00784 } else 00785 { 00786 entity=XMLEntities; 00787 do 00788 { 00789 if ((lo>=entity->l)&&(xstrnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; } 00790 entity++; 00791 } while(entity->s); 00792 if (!entity->s) 00793 { 00794 pXML->error=eXMLErrorUnknownCharacterEntity; 00795 return NULL; 00796 } 00797 } 00798 } else 00799 { 00800 #ifdef _XMLWIDECHAR 00801 s++; lo--; 00802 #else 00803 j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1; 00804 #endif 00805 } 00806 ll++; 00807 } 00808 00809 d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR)); 00810 s=d; 00811 while (ll-->0) 00812 { 00813 if (*ss==_CXML('&')) 00814 { 00815 if (ss[1]==_CXML('#')) 00816 { 00817 ss+=2; j=0; 00818 if ((*ss==_CXML('X'))||(*ss==_CXML('x'))) 00819 { 00820 ss++; 00821 while (*ss!=_CXML(';')) 00822 { 00823 if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j<<4)+*ss-_CXML('0'); 00824 else if ((*ss>=_CXML('A'))&&(*ss<=_CXML('F'))) j=(j<<4)+*ss-_CXML('A')+10; 00825 else if ((*ss>=_CXML('a'))&&(*ss<=_CXML('f'))) j=(j<<4)+*ss-_CXML('a')+10; 00826 else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;} 00827 ss++; 00828 } 00829 } else 00830 { 00831 while (*ss!=_CXML(';')) 00832 { 00833 if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j*10)+*ss-_CXML('0'); 00834 else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;} 00835 ss++; 00836 } 00837 } 00838 #ifndef _XMLWIDECHAR 00839 if (j>255) { free((void*)s); pXML->error=eXMLErrorCharacterCodeAbove255;return NULL;} 00840 #endif 00841 (*d++)=(XMLCHAR)j; ss++; 00842 } else 00843 { 00844 entity=XMLEntities; 00845 do 00846 { 00847 if (xstrnicmp(ss,entity->s,entity->l)==0) { *(d++)=entity->c; ss+=entity->l; break; } 00848 entity++; 00849 } while(entity->s); 00850 } 00851 } else 00852 { 00853 #ifdef _XMLWIDECHAR 00854 *(d++)=*(ss++); 00855 #else 00856 switch(XML_ByteTable[(unsigned char)*ss]) 00857 { 00858 case 4: *(d++)=*(ss++); ll--; 00859 case 3: *(d++)=*(ss++); ll--; 00860 case 2: *(d++)=*(ss++); ll--; 00861 case 1: *(d++)=*(ss++); 00862 } 00863 #endif 00864 } 00865 } 00866 *d=0; 00867 return (XMLSTR)s; 00868 } 00869 00870 #define XML_isSPACECHAR(ch) ((ch==_CXML('\n'))||(ch==_CXML(' '))||(ch== _CXML('\t'))||(ch==_CXML('\r'))) 00871 00872 // private: 00873 char myTagCompare(XMLCSTR cclose, XMLCSTR copen) 00874 // !!!! WARNING strange convention&: 00875 // return 0 if equals 00876 // return 1 if different 00877 { 00878 if (!cclose) return 1; 00879 int l=(int)xstrlen(cclose); 00880 if (xstrnicmp(cclose, copen, l)!=0) return 1; 00881 const XMLCHAR c=copen[l]; 00882 if (XML_isSPACECHAR(c)|| 00883 (c==_CXML('/' ))|| 00884 (c==_CXML('<' ))|| 00885 (c==_CXML('>' ))|| 00886 (c==_CXML('=' ))) return 0; 00887 return 1; 00888 } 00889 00890 // Obtain the next character from the string. 00891 static inline XMLCHAR getNextChar(XML *pXML) 00892 { 00893 XMLCHAR ch = pXML->lpXML[pXML->nIndex]; 00894 #ifdef _XMLWIDECHAR 00895 if (ch!=0) pXML->nIndex++; 00896 #else 00897 pXML->nIndex+=XML_ByteTable[(unsigned char)ch]; 00898 #endif 00899 return ch; 00900 } 00901 00902 // Find the next token in a string. 00903 // pcbToken contains the number of characters that have been read. 00904 static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType) 00905 { 00906 NextToken result; 00907 XMLCHAR ch; 00908 XMLCHAR chTemp; 00909 int indexStart,nFoundMatch,nIsText=FALSE; 00910 result.pClr=NULL; // prevent warning 00911 00912 // Find next non-white space character 00913 do { indexStart=pXML->nIndex; ch=getNextChar(pXML); } while XML_isSPACECHAR(ch); 00914 00915 if (ch) 00916 { 00917 // Cache the current string pointer 00918 result.pStr = &pXML->lpXML[indexStart]; 00919 00920 // check for standard tokens 00921 switch(ch) 00922 { 00923 // Check for quotes 00924 case _CXML('\''): 00925 case _CXML('\"'): 00926 // Type of token 00927 *pType = eTokenQuotedText; 00928 chTemp = ch; 00929 00930 // Set the size 00931 nFoundMatch = FALSE; 00932 00933 // Search through the string to find a matching quote 00934 while((ch = getNextChar(pXML))) 00935 { 00936 if (ch==chTemp) { nFoundMatch = TRUE; break; } 00937 if (ch==_CXML('<')) break; 00938 } 00939 00940 // If we failed to find a matching quote 00941 if (nFoundMatch == FALSE) 00942 { 00943 pXML->nIndex=indexStart+1; 00944 nIsText=TRUE; 00945 break; 00946 } 00947 00948 // 4.02.2002 00949 // if (FindNonWhiteSpace(pXML)) pXML->nIndex--; 00950 00951 break; 00952 00953 // Equals (used with attribute values) 00954 case _CXML('='): 00955 *pType = eTokenEquals; 00956 break; 00957 00958 // Close tag 00959 case _CXML('>'): 00960 *pType = eTokenCloseTag; 00961 break; 00962 00963 // Check for tag start and tag end 00964 case _CXML('<'): 00965 00966 { 00967 // First check whether the token is in the clear tag list (meaning it 00968 // does not need formatting). 00969 ALLXMLClearTag *ctag=XMLClearTags; 00970 do 00971 { 00972 if (!xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen)) 00973 { 00974 result.pClr=ctag; 00975 pXML->nIndex+=ctag->openTagLen-1; 00976 *pType=eTokenClear; 00977 return result; 00978 } 00979 ctag++; 00980 } while(ctag->lpszOpen); 00981 00982 // Peek at the next character to see if we have an end tag '</', 00983 // or an xml declaration '<?' 00984 chTemp = pXML->lpXML[pXML->nIndex]; 00985 00986 // If we have a tag end... 00987 if (chTemp == _CXML('/')) 00988 { 00989 // Set the type and ensure we point at the next character 00990 getNextChar(pXML); 00991 *pType = eTokenTagEnd; 00992 } 00993 00994 // If we have an XML declaration tag 00995 else if (chTemp == _CXML('?')) 00996 { 00997 00998 // Set the type and ensure we point at the next character 00999 getNextChar(pXML); 01000 *pType = eTokenDeclaration; 01001 } 01002 01003 // Otherwise we must have a start tag 01004 else 01005 { 01006 *pType = eTokenTagStart; 01007 } 01008 break; 01009 } 01010 01011 // Check to see if we have a short hand type end tag ('/>'). 01012 case _CXML('/'): 01013 01014 // Peek at the next character to see if we have a short end tag '/>' 01015 chTemp = pXML->lpXML[pXML->nIndex]; 01016 01017 // If we have a short hand end tag... 01018 if (chTemp == _CXML('>')) 01019 { 01020 // Set the type and ensure we point at the next character 01021 getNextChar(pXML); 01022 *pType = eTokenShortHandClose; 01023 break; 01024 } 01025 01026 // If we haven't found a short hand closing tag then drop into the 01027 // text process 01028 01029 // Other characters 01030 default: 01031 nIsText = TRUE; 01032 } 01033 01034 // If this is a TEXT node 01035 if (nIsText) 01036 { 01037 // Indicate we are dealing with text 01038 *pType = eTokenText; 01039 while((ch = getNextChar(pXML))) 01040 { 01041 if XML_isSPACECHAR(ch) 01042 { 01043 indexStart++; break; 01044 01045 } else if (ch==_CXML('/')) 01046 { 01047 // If we find a slash then this maybe text or a short hand end tag 01048 // Peek at the next character to see it we have short hand end tag 01049 ch=pXML->lpXML[pXML->nIndex]; 01050 // If we found a short hand end tag then we need to exit the loop 01051 if (ch==_CXML('>')) { pXML->nIndex--; break; } 01052 01053 } else if ((ch==_CXML('<'))||(ch==_CXML('>'))||(ch==_CXML('='))) 01054 { 01055 pXML->nIndex--; break; 01056 } 01057 } 01058 } 01059 *pcbToken = pXML->nIndex-indexStart; 01060 } else 01061 { 01062 // If we failed to obtain a valid character 01063 *pcbToken = 0; 01064 *pType = eTokenError; 01065 result.pStr=NULL; 01066 } 01067 01068 return result; 01069 } 01070 01071 XMLCSTR XMLNode::updateName_WOSD(XMLSTR lpszName) 01072 { 01073 if (!d) { free(lpszName); return NULL; } 01074 if (d->lpszName&&(lpszName!=d->lpszName)) free((void*)d->lpszName); 01075 d->lpszName=lpszName; 01076 return lpszName; 01077 } 01078 01079 // private: 01080 XMLNode::XMLNode(struct XMLNodeDataTag *p){ d=p; (p->ref_count)++; } 01081 XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration) 01082 { 01083 d=(XMLNodeData*)malloc(sizeof(XMLNodeData)); 01084 d->ref_count=1; 01085 01086 d->lpszName=NULL; 01087 d->nChild= 0; 01088 d->nText = 0; 01089 d->nClear = 0; 01090 d->nAttribute = 0; 01091 01092 d->isDeclaration = isDeclaration; 01093 01094 d->pParent = pParent; 01095 d->pChild= NULL; 01096 d->pText= NULL; 01097 d->pClear= NULL; 01098 d->pAttribute= NULL; 01099 d->pOrder= NULL; 01100 01101 updateName_WOSD(lpszName); 01102 } 01103 01104 XMLNode XMLNode::createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration) { return XMLNode(NULL,lpszName,isDeclaration); } 01105 XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) { return XMLNode(NULL,stringDup(lpszName),isDeclaration); } 01106 01107 #define MEMORYINCREASE 50 01108 01109 static inline void myFree(void *p) { if (p) free(p); } 01110 static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem) 01111 { 01112 if (p==NULL) { if (memInc) return malloc(memInc*sizeofElem); return malloc(sizeofElem); } 01113 if ((memInc==0)||((newsize%memInc)==0)) p=realloc(p,(newsize+memInc)*sizeofElem); 01114 // if (!p) 01115 // { 01116 // printf("XMLParser Error: Not enough memory! Aborting...\n"); exit(220); 01117 // } 01118 return p; 01119 } 01120 01121 // private: 01122 XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index, XMLElementType xxtype) 01123 { 01124 if (index<0) return -1; 01125 int i=0,j=(int)((index<<2)+xxtype),*o=d->pOrder; while (o[i]!=j) i++; return i; 01126 } 01127 01128 // private: 01129 // update "order" information when deleting a content of a XMLNode 01130 int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index) 01131 { 01132 int n=d->nChild+d->nText+d->nClear, *o=d->pOrder,i=findPosition(d,index,t); 01133 memmove(o+i, o+i+1, (n-i)*sizeof(int)); 01134 for (;i<n;i++) 01135 if ((o[i]&3)==(int)t) o[i]-=4; 01136 // We should normally do: 01137 // d->pOrder=(int)realloc(d->pOrder,n*sizeof(int)); 01138 // but we skip reallocation because it's too time consuming. 01139 // Anyway, at the end, it will be free'd completely at once. 01140 return i; 01141 } 01142 01143 void *XMLNode::addToOrder(int memoryIncrease,int *_pos, int nc, void *p, int size, XMLElementType xtype) 01144 { 01145 // in: *_pos is the position inside d->pOrder ("-1" means "EndOf") 01146 // out: *_pos is the index inside p 01147 p=myRealloc(p,(nc+1),memoryIncrease,size); 01148 int n=d->nChild+d->nText+d->nClear; 01149 d->pOrder=(int*)myRealloc(d->pOrder,n+1,memoryIncrease*3,sizeof(int)); 01150 int pos=*_pos,*o=d->pOrder; 01151 01152 if ((pos<0)||(pos>=n)) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; } 01153 01154 int i=pos; 01155 memmove(o+i+1, o+i, (n-i)*sizeof(int)); 01156 01157 while ((pos<n)&&((o[pos]&3)!=(int)xtype)) pos++; 01158 if (pos==n) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; } 01159 01160 o[i]=o[pos]; 01161 for (i=pos+1;i<=n;i++) if ((o[i]&3)==(int)xtype) o[i]+=4; 01162 01163 *_pos=pos=o[pos]>>2; 01164 memmove(((char*)p)+(pos+1)*size,((char*)p)+pos*size,(nc-pos)*size); 01165 01166 return p; 01167 } 01168 01169 // Add a child node to the given element. 01170 XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, char isDeclaration, int pos) 01171 { 01172 if (!lpszName) return emptyXMLNode; 01173 d->pChild=(XMLNode*)addToOrder(memoryIncrease,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild); 01174 d->pChild[pos].d=NULL; 01175 d->pChild[pos]=XMLNode(d,lpszName,isDeclaration); 01176 d->nChild++; 01177 return d->pChild[pos]; 01178 } 01179 01180 // Add an attribute to an element. 01181 XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease,XMLSTR lpszName, XMLSTR lpszValuev) 01182 { 01183 if (!lpszName) return &emptyXMLAttribute; 01184 if (!d) { myFree(lpszName); myFree(lpszValuev); return &emptyXMLAttribute; } 01185 int nc=d->nAttribute; 01186 d->pAttribute=(XMLAttribute*)myRealloc(d->pAttribute,(nc+1),memoryIncrease,sizeof(XMLAttribute)); 01187 XMLAttribute *pAttr=d->pAttribute+nc; 01188 pAttr->lpszName = lpszName; 01189 pAttr->lpszValue = lpszValuev; 01190 d->nAttribute++; 01191 return pAttr; 01192 } 01193 01194 // Add text to the element. 01195 XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos) 01196 { 01197 if (!lpszValue) return NULL; 01198 if (!d) { myFree(lpszValue); return NULL; } 01199 d->pText=(XMLCSTR*)addToOrder(memoryIncrease,&pos,d->nText,d->pText,sizeof(XMLSTR),eNodeText); 01200 d->pText[pos]=lpszValue; 01201 d->nText++; 01202 return lpszValue; 01203 } 01204 01205 // Add clear (unformatted) text to the element. 01206 XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, int pos) 01207 { 01208 if (!lpszValue) return &emptyXMLClear; 01209 if (!d) { myFree(lpszValue); return &emptyXMLClear; } 01210 d->pClear=(XMLClear *)addToOrder(memoryIncrease,&pos,d->nClear,d->pClear,sizeof(XMLClear),eNodeClear); 01211 XMLClear *pNewClear=d->pClear+pos; 01212 pNewClear->lpszValue = lpszValue; 01213 if (!lpszOpen) lpszOpen=XMLClearTags->lpszOpen; 01214 if (!lpszClose) lpszClose=XMLClearTags->lpszClose; 01215 pNewClear->lpszOpenTag = lpszOpen; 01216 pNewClear->lpszCloseTag = lpszClose; 01217 d->nClear++; 01218 return pNewClear; 01219 } 01220 01221 // private: 01222 // Parse a clear (unformatted) type node. 01223 char XMLNode::parseClearTag(void *px, void *_pClear) 01224 { 01225 XML *pXML=(XML *)px; 01226 ALLXMLClearTag pClear=*((ALLXMLClearTag*)_pClear); 01227 int cbTemp=0; 01228 XMLCSTR lpszTemp=NULL; 01229 XMLCSTR lpXML=&pXML->lpXML[pXML->nIndex]; 01230 static XMLCSTR docTypeEnd=_CXML("]>"); 01231 01232 // Find the closing tag 01233 // Seems the <!DOCTYPE need a better treatment so lets handle it 01234 if (pClear.lpszOpen==XMLClearTags[1].lpszOpen) 01235 { 01236 XMLCSTR pCh=lpXML; 01237 while (*pCh) 01238 { 01239 if (*pCh==_CXML('<')) { pClear.lpszClose=docTypeEnd; lpszTemp=xstrstr(lpXML,docTypeEnd); break; } 01240 else if (*pCh==_CXML('>')) { lpszTemp=pCh; break; } 01241 #ifdef _XMLWIDECHAR 01242 pCh++; 01243 #else 01244 pCh+=XML_ByteTable[(unsigned char)(*pCh)]; 01245 #endif 01246 } 01247 } else lpszTemp=xstrstr(lpXML, pClear.lpszClose); 01248 01249 if (lpszTemp) 01250 { 01251 // Cache the size and increment the index 01252 cbTemp = (int)(lpszTemp - lpXML); 01253 01254 pXML->nIndex += cbTemp+(int)xstrlen(pClear.lpszClose); 01255 01256 // Add the clear node to the current element 01257 addClear_priv(MEMORYINCREASE,stringDup(lpXML,cbTemp), pClear.lpszOpen, pClear.lpszClose,-1); 01258 return 0; 01259 } 01260 01261 // If we failed to find the end tag 01262 pXML->error = eXMLErrorUnmatchedEndClearTag; 01263 return 1; 01264 } 01265 01266 void XMLNode::exactMemory(XMLNodeData *d) 01267 { 01268 if (d->pOrder) d->pOrder=(int*)realloc(d->pOrder,(d->nChild+d->nText+d->nClear)*sizeof(int)); 01269 if (d->pChild) d->pChild=(XMLNode*)realloc(d->pChild,d->nChild*sizeof(XMLNode)); 01270 if (d->pAttribute) d->pAttribute=(XMLAttribute*)realloc(d->pAttribute,d->nAttribute*sizeof(XMLAttribute)); 01271 if (d->pText) d->pText=(XMLCSTR*)realloc(d->pText,d->nText*sizeof(XMLSTR)); 01272 if (d->pClear) d->pClear=(XMLClear *)realloc(d->pClear,d->nClear*sizeof(XMLClear)); 01273 } 01274 01275 char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr) 01276 { 01277 XML *pXML=(XML *)pa; 01278 XMLCSTR lpszText=pXML->lpszText; 01279 if (!lpszText) return 0; 01280 if (dropWhiteSpace) while (XML_isSPACECHAR(*lpszText)&&(lpszText!=tokenPStr)) lpszText++; 01281 int cbText = (int)(tokenPStr - lpszText); 01282 if (!cbText) { pXML->lpszText=NULL; return 0; } 01283 if (dropWhiteSpace) { cbText--; while ((cbText)&&XML_isSPACECHAR(lpszText[cbText])) cbText--; cbText++; } 01284 if (!cbText) { pXML->lpszText=NULL; return 0; } 01285 XMLSTR lpt=fromXMLString(lpszText,cbText,pXML); 01286 if (!lpt) return 1; 01287 pXML->lpszText=NULL; 01288 if (removeCommentsInMiddleOfText && d->nText && d->nClear) 01289 { 01290 // if the previous insertion was a comment (<!-- -->) AND 01291 // if the previous previous insertion was a text then, delete the comment and append the text 01292 int n=d->nChild+d->nText+d->nClear-1,*o=d->pOrder; 01293 if (((o[n]&3)==eNodeClear)&&((o[n-1]&3)==eNodeText)) 01294 { 01295 int i=o[n]>>2; 01296 if (d->pClear[i].lpszOpenTag==XMLClearTags[2].lpszOpen) 01297 { 01298 deleteClear(i); 01299 i=o[n-1]>>2; 01300 n=xstrlen(d->pText[i]); 01301 int n2=xstrlen(lpt)+1; 01302 d->pText[i]=(XMLSTR)realloc((void*)d->pText[i],(n+n2)*sizeof(XMLCHAR)); 01303 if (!d->pText[i]) return 1; 01304 memcpy((void*)(d->pText[i]+n),lpt,n2*sizeof(XMLCHAR)); 01305 free(lpt); 01306 return 0; 01307 } 01308 } 01309 } 01310 addText_priv(MEMORYINCREASE,lpt,-1); 01311 return 0; 01312 } 01313 // private: 01314 // Recursively parse an XML element. 01315 int XMLNode::ParseXMLElement(void *pa) 01316 { 01317 XML *pXML=(XML *)pa; 01318 int cbToken; 01319 enum XMLTokenTypeTag xtype; 01320 NextToken token; 01321 XMLCSTR lpszTemp=NULL; 01322 int cbTemp=0; 01323 char nDeclaration; 01324 XMLNode pNew; 01325 enum XMLStatus status; // inside or outside a tag 01326 enum Attrib attrib = eAttribName; 01327 01328 assert(pXML); 01329 01330 // If this is the first call to the function 01331 if (pXML->nFirst) 01332 { 01333 // Assume we are outside of a tag definition 01334 pXML->nFirst = FALSE; 01335 status = eOutsideTag; 01336 } else 01337 { 01338 // If this is not the first call then we should only be called when inside a tag. 01339 status = eInsideTag; 01340 } 01341 01342 // Iterate through the tokens in the document 01343 for(;;) 01344 { 01345 // Obtain the next token 01346 token = GetNextToken(pXML, &cbToken, &xtype); 01347 01348 if (xtype != eTokenError) 01349 { 01350 // Check the current status 01351 switch(status) 01352 { 01353 01354 // If we are outside of a tag definition 01355 case eOutsideTag: 01356 01357 // Check what type of token we obtained 01358 switch(xtype) 01359 { 01360 // If we have found text or quoted text 01361 case eTokenText: 01362 case eTokenCloseTag: /* '>' */ 01363 case eTokenShortHandClose: /* '/>' */ 01364 case eTokenQuotedText: 01365 case eTokenEquals: 01366 break; 01367 01368 // If we found a start tag '<' and declarations '<?' 01369 case eTokenTagStart: 01370 case eTokenDeclaration: 01371 01372 // Cache whether this new element is a declaration or not 01373 nDeclaration = (xtype == eTokenDeclaration); 01374 01375 // If we have node text then add this to the element 01376 if (maybeAddTxT(pXML,token.pStr)) return FALSE; 01377 01378 // Find the name of the tag 01379 token = GetNextToken(pXML, &cbToken, &xtype); 01380 01381 // Return an error if we couldn't obtain the next token or 01382 // it wasnt text 01383 if (xtype != eTokenText) 01384 { 01385 pXML->error = eXMLErrorMissingTagName; 01386 return FALSE; 01387 } 01388 01389 // If we found a new element which is the same as this 01390 // element then we need to pass this back to the caller.. 01391 01392 #ifdef APPROXIMATE_PARSING 01393 if (d->lpszName && 01394 myTagCompare(d->lpszName, token.pStr) == 0) 01395 { 01396 // Indicate to the caller that it needs to create a 01397 // new element. 01398 pXML->lpNewElement = token.pStr; 01399 pXML->cbNewElement = cbToken; 01400 return TRUE; 01401 } else 01402 #endif 01403 { 01404 // If the name of the new element differs from the name of 01405 // the current element we need to add the new element to 01406 // the current one and recurse 01407 pNew = addChild_priv(MEMORYINCREASE,stringDup(token.pStr,cbToken), nDeclaration,-1); 01408 01409 while (!pNew.isEmpty()) 01410 { 01411 // Callself to process the new node. If we return 01412 // FALSE this means we dont have any more 01413 // processing to do... 01414 01415 if (!pNew.ParseXMLElement(pXML)) return FALSE; 01416 else 01417 { 01418 // If the call to recurse this function 01419 // evented in a end tag specified in XML then 01420 // we need to unwind the calls to this 01421 // function until we find the appropriate node 01422 // (the element name and end tag name must 01423 // match) 01424 if (pXML->cbEndTag) 01425 { 01426 // If we are back at the root node then we 01427 // have an unmatched end tag 01428 if (!d->lpszName) 01429 { 01430 pXML->error=eXMLErrorUnmatchedEndTag; 01431 return FALSE; 01432 } 01433 01434 // If the end tag matches the name of this 01435 // element then we only need to unwind 01436 // once more... 01437 01438 if (myTagCompare(d->lpszName, pXML->lpEndTag)==0) 01439 { 01440 pXML->cbEndTag = 0; 01441 } 01442 01443 return TRUE; 01444 } else 01445 if (pXML->cbNewElement) 01446 { 01447 // If the call indicated a new element is to 01448 // be created on THIS element. 01449 01450 // If the name of this element matches the 01451 // name of the element we need to create 01452 // then we need to return to the caller 01453 // and let it process the element. 01454 01455 if (myTagCompare(d->lpszName, pXML->lpNewElement)==0) 01456 { 01457 return TRUE; 01458 } 01459 01460 // Add the new element and recurse 01461 pNew = addChild_priv(MEMORYINCREASE,stringDup(pXML->lpNewElement,pXML->cbNewElement),0,-1); 01462 pXML->cbNewElement = 0; 01463 } 01464 else 01465 { 01466 // If we didn't have a new element to create 01467 pNew = emptyXMLNode; 01468 01469 } 01470 } 01471 } 01472 } 01473 break; 01474 01475 // If we found an end tag 01476 case eTokenTagEnd: 01477 01478 // If we have node text then add this to the element 01479 if (maybeAddTxT(pXML,token.pStr)) return FALSE; 01480 01481 // Find the name of the end tag 01482 token = GetNextToken(pXML, &cbTemp, &xtype); 01483 01484 // The end tag should be text 01485 if (xtype != eTokenText) 01486 { 01487 pXML->error = eXMLErrorMissingEndTagName; 01488 return FALSE; 01489 } 01490 lpszTemp = token.pStr; 01491 01492 // After the end tag we should find a closing tag 01493 token = GetNextToken(pXML, &cbToken, &xtype); 01494 if (xtype != eTokenCloseTag) 01495 { 01496 pXML->error = eXMLErrorMissingEndTagName; 01497 return FALSE; 01498 } 01499 pXML->lpszText=pXML->lpXML+pXML->nIndex; 01500 01501 // We need to return to the previous caller. If the name 01502 // of the tag cannot be found we need to keep returning to 01503 // caller until we find a match 01504 if (myTagCompare(d->lpszName, lpszTemp) != 0) 01505 #ifdef STRICT_PARSING 01506 { 01507 pXML->error=eXMLErrorUnmatchedEndTag; 01508 pXML->nIndexMissigEndTag=pXML->nIndex; 01509 return FALSE; 01510 } 01511 #else 01512 { 01513 pXML->error=eXMLErrorMissingEndTag; 01514 pXML->nIndexMissigEndTag=pXML->nIndex; 01515 pXML->lpEndTag = lpszTemp; 01516 pXML->cbEndTag = cbTemp; 01517 } 01518 #endif 01519 01520 // Return to the caller 01521 exactMemory(d); 01522 return TRUE; 01523 01524 // If we found a clear (unformatted) token 01525 case eTokenClear: 01526 // If we have node text then add this to the element 01527 if (maybeAddTxT(pXML,token.pStr)) return FALSE; 01528 if (parseClearTag(pXML, token.pClr)) return FALSE; 01529 pXML->lpszText=pXML->lpXML+pXML->nIndex; 01530 break; 01531 01532 default: 01533 break; 01534 } 01535 break; 01536 01537 // If we are inside a tag definition we need to search for attributes 01538 case eInsideTag: 01539 01540 // Check what part of the attribute (name, equals, value) we 01541 // are looking for. 01542 switch(attrib) 01543 { 01544 // If we are looking for a new attribute 01545 case eAttribName: 01546 01547 // Check what the current token type is 01548 switch(xtype) 01549 { 01550 // If the current type is text... 01551 // Eg. 'attribute' 01552 case eTokenText: 01553 // Cache the token then indicate that we are next to 01554 // look for the equals 01555 lpszTemp = token.pStr; 01556 cbTemp = cbToken; 01557 attrib = eAttribEquals; 01558 break; 01559 01560 // If we found a closing tag... 01561 // Eg. '>' 01562 case eTokenCloseTag: 01563 // We are now outside the tag 01564 status = eOutsideTag; 01565 pXML->lpszText=pXML->lpXML+pXML->nIndex; 01566 break; 01567 01568 // If we found a short hand '/>' closing tag then we can 01569 // return to the caller 01570 case eTokenShortHandClose: 01571 exactMemory(d); 01572 pXML->lpszText=pXML->lpXML+pXML->nIndex; 01573 return TRUE; 01574 01575 // Errors... 01576 case eTokenQuotedText: /* '"SomeText"' */ 01577 case eTokenTagStart: /* '<' */ 01578 case eTokenTagEnd: /* '</' */ 01579 case eTokenEquals: /* '=' */ 01580 case eTokenDeclaration: /* '<?' */ 01581 case eTokenClear: 01582 pXML->error = eXMLErrorUnexpectedToken; 01583 return FALSE; 01584 default: break; 01585 } 01586 break; 01587 01588 // If we are looking for an equals 01589 case eAttribEquals: 01590 // Check what the current token type is 01591 switch(xtype) 01592 { 01593 // If the current type is text... 01594 // Eg. 'Attribute AnotherAttribute' 01595 case eTokenText: 01596 // Add the unvalued attribute to the list 01597 addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp), NULL); 01598 // Cache the token then indicate. We are next to 01599 // look for the equals attribute 01600 lpszTemp = token.pStr; 01601 cbTemp = cbToken; 01602 break; 01603 01604 // If we found a closing tag 'Attribute >' or a short hand 01605 // closing tag 'Attribute />' 01606 case eTokenShortHandClose: 01607 case eTokenCloseTag: 01608 // If we are a declaration element '<?' then we need 01609 // to remove extra closing '?' if it exists 01610 pXML->lpszText=pXML->lpXML+pXML->nIndex; 01611 01612 if (d->isDeclaration && 01613 (lpszTemp[cbTemp-1]) == _CXML('?')) 01614 { 01615 cbTemp--; 01616 if (d->pParent && d->pParent->pParent) xtype = eTokenShortHandClose; 01617 } 01618 01619 if (cbTemp) 01620 { 01621 // Add the unvalued attribute to the list 01622 addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp), NULL); 01623 } 01624 01625 // If this is the end of the tag then return to the caller 01626 if (xtype == eTokenShortHandClose) 01627 { 01628 exactMemory(d); 01629 return TRUE; 01630 } 01631 01632 // We are now outside the tag 01633 status = eOutsideTag; 01634 break; 01635 01636 // If we found the equals token... 01637 // Eg. 'Attribute =' 01638 case eTokenEquals: 01639 // Indicate that we next need to search for the value 01640 // for the attribute 01641 attrib = eAttribValue; 01642 break; 01643 01644 // Errors... 01645 case eTokenQuotedText: /* 'Attribute "InvalidAttr"'*/ 01646 case eTokenTagStart: /* 'Attribute <' */ 01647 case eTokenTagEnd: /* 'Attribute </' */ 01648 case eTokenDeclaration: /* 'Attribute <?' */ 01649 case eTokenClear: 01650 pXML->error = eXMLErrorUnexpectedToken; 01651 return FALSE; 01652 default: break; 01653 } 01654 break; 01655 01656 // If we are looking for an attribute value 01657 case eAttribValue: 01658 // Check what the current token type is 01659 switch(xtype) 01660 { 01661 // If the current type is text or quoted text... 01662 // Eg. 'Attribute = "Value"' or 'Attribute = Value' or 01663 // 'Attribute = 'Value''. 01664 case eTokenText: 01665 case eTokenQuotedText: 01666 // If we are a declaration element '<?' then we need 01667 // to remove extra closing '?' if it exists 01668 if (d->isDeclaration && 01669 (token.pStr[cbToken-1]) == _CXML('?')) 01670 { 01671 cbToken--; 01672 } 01673 01674 if (cbTemp) 01675 { 01676 // Add the valued attribute to the list 01677 if (xtype==eTokenQuotedText) { token.pStr++; cbToken-=2; } 01678 XMLSTR attrVal=(XMLSTR)token.pStr; 01679 if (attrVal) 01680 { 01681 attrVal=fromXMLString(attrVal,cbToken,pXML); 01682 if (!attrVal) return FALSE; 01683 } 01684 addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp),attrVal); 01685 } 01686 01687 // Indicate we are searching for a new attribute 01688 attrib = eAttribName; 01689 break; 01690 01691 // Errors... 01692 case eTokenTagStart: /* 'Attr = <' */ 01693 case eTokenTagEnd: /* 'Attr = </' */ 01694 case eTokenCloseTag: /* 'Attr = >' */ 01695 case eTokenShortHandClose: /* "Attr = />" */ 01696 case eTokenEquals: /* 'Attr = =' */ 01697 case eTokenDeclaration: /* 'Attr = <?' */ 01698 case eTokenClear: 01699 pXML->error = eXMLErrorUnexpectedToken; 01700 return FALSE; 01701 break; 01702 default: break; 01703 } 01704 } 01705 } 01706 } 01707 // If we failed to obtain the next token 01708 else 01709 { 01710 if ((!d->isDeclaration)&&(d->pParent)) 01711 { 01712 #ifdef STRICT_PARSING 01713 pXML->error=eXMLErrorUnmatchedEndTag; 01714 #else 01715 pXML->error=eXMLErrorMissingEndTag; 01716 #endif 01717 pXML->nIndexMissigEndTag=pXML->nIndex; 01718 } 01719 maybeAddTxT(pXML,pXML->lpXML+pXML->nIndex); 01720 return FALSE; 01721 } 01722 } 01723 } 01724 01725 // Count the number of lines and columns in an XML string. 01726 static void CountLinesAndColumns(XMLCSTR lpXML, int nUpto, XMLResults *pResults) 01727 { 01728 XMLCHAR ch; 01729 assert(lpXML); 01730 assert(pResults); 01731 01732 struct XML xml={ lpXML,lpXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE }; 01733 01734 pResults->nLine = 1; 01735 pResults->nColumn = 1; 01736 while (xml.nIndex<nUpto) 01737 { 01738 ch = getNextChar(&xml); 01739 if (ch != _CXML('\n')) pResults->nColumn++; 01740 else 01741 { 01742 pResults->nLine++; 01743 pResults->nColumn=1; 01744 } 01745 } 01746 } 01747 01748 // Parse XML and return the root element. 01749 XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, XMLResults *pResults) 01750 { 01751 if (!lpszXML) 01752 { 01753 if (pResults) 01754 { 01755 pResults->error=eXMLErrorNoElements; 01756 pResults->nLine=0; 01757 pResults->nColumn=0; 01758 } 01759 return emptyXMLNode; 01760 } 01761 01762 XMLNode xnode(NULL,NULL,FALSE); 01763 struct XML xml={ lpszXML, lpszXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE }; 01764 01765 // Create header element 01766 xnode.ParseXMLElement(&xml); 01767 enum XMLError error = xml.error; 01768 if (!xnode.nChildNode()) error=eXMLErrorNoXMLTagFound; 01769 if ((xnode.nChildNode()==1)&&(xnode.nElement()==1)) xnode=xnode.getChildNode(); // skip the empty node 01770 01771 // If no error occurred 01772 if ((error==eXMLErrorNone)||(error==eXMLErrorMissingEndTag)||(error==eXMLErrorNoXMLTagFound)) 01773 { 01774 XMLCSTR name=xnode.getName(); 01775 if (tag&&(*tag)&&((!name)||(xstricmp(name,tag)))) 01776 { 01777 xnode=xnode.getChildNode(tag); 01778 if (xnode.isEmpty()) 01779 { 01780 if (pResults) 01781 { 01782 pResults->error=eXMLErrorFirstTagNotFound; 01783 pResults->nLine=0; 01784 pResults->nColumn=0; 01785 } 01786 return emptyXMLNode; 01787 } 01788 } 01789 } else 01790 { 01791 // Cleanup: this will destroy all the nodes 01792 xnode = emptyXMLNode; 01793 } 01794 01795 01796 // If we have been given somewhere to place results 01797 if (pResults) 01798 { 01799 pResults->error = error; 01800 01801 // If we have an error 01802 if (error!=eXMLErrorNone) 01803 { 01804 if (error==eXMLErrorMissingEndTag) xml.nIndex=xml.nIndexMissigEndTag; 01805 // Find which line and column it starts on. 01806 CountLinesAndColumns(xml.lpXML, xml.nIndex, pResults); 01807 } 01808 } 01809 return xnode; 01810 } 01811 01812 XMLNode XMLNode::parseFile(XMLCSTR filename, XMLCSTR tag, XMLResults *pResults) 01813 { 01814 if (pResults) { pResults->nLine=0; pResults->nColumn=0; } 01815 FILE *f=xfopen(filename,_CXML("rb")); 01816 if (f==NULL) { if (pResults) pResults->error=eXMLErrorFileNotFound; return emptyXMLNode; } 01817 fseek(f,0,SEEK_END); 01818 int l=(int)ftell(f),headerSz=0; 01819 if (!l) { if (pResults) pResults->error=eXMLErrorEmpty; fclose(f); return emptyXMLNode; } 01820 fseek(f,0,SEEK_SET); 01821 unsigned char *buf=(unsigned char*)malloc(l+4); 01822 l=(int)fread(buf,1,l,f); 01823 fclose(f); 01824 buf[l]=0;buf[l+1]=0;buf[l+2]=0;buf[l+3]=0; 01825 #ifdef _XMLWIDECHAR 01826 if (guessWideCharChars) 01827 { 01828 if (!myIsTextWideChar(buf,l)) 01829 { 01830 XMLNode::XMLCharEncoding ce=XMLNode::char_encoding_legacy; 01831 if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) { headerSz=3; ce=XMLNode::char_encoding_UTF8; } 01832 XMLSTR b2=myMultiByteToWideChar((const char*)(buf+headerSz),ce); 01833 free(buf); buf=(unsigned char*)b2; headerSz=0; 01834 } else 01835 { 01836 if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2; 01837 if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2; 01838 } 01839 } else 01840 { 01841 if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2; 01842 if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2; 01843 if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) headerSz=3; 01844 } 01845 #else 01846 if (guessWideCharChars) 01847 { 01848 if (myIsTextWideChar(buf,l)) 01849 { 01850 if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2; 01851 if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2; 01852 char *b2=myWideCharToMultiByte((const wchar_t*)(buf+headerSz)); 01853 free(buf); buf=(unsigned char*)b2; headerSz=0; 01854 } else 01855 { 01856 if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) headerSz=3; 01857 } 01858 } else 01859 { 01860 if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2; 01861 if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2; 01862 if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) headerSz=3; 01863 } 01864 #endif 01865 01866 if (!buf) { if (pResults) pResults->error=eXMLErrorCharConversionError; return emptyXMLNode; } 01867 XMLNode x=parseString((XMLSTR)(buf+headerSz),tag,pResults); 01868 free(buf); 01869 return x; 01870 } 01871 01872 static inline void charmemset(XMLSTR dest,XMLCHAR c,int l) { while (l--) *(dest++)=c; } 01873 // private: 01874 // Creates an user friendly XML string from a given element with 01875 // appropriate white space and carriage returns. 01876 // 01877 // This recurses through all subnodes then adds contents of the nodes to the 01878 // string. 01879 int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat) 01880 { 01881 int nResult = 0; 01882 int cb=nFormat<0?0:nFormat; 01883 int cbElement; 01884 int nChildFormat=-1; 01885 int nElementI=pEntry->nChild+pEntry->nText+pEntry->nClear; 01886 int i,j; 01887 if ((nFormat>=0)&&(nElementI==1)&&(pEntry->nText==1)&&(!pEntry->isDeclaration)) nFormat=-2; 01888 01889 assert(pEntry); 01890 01891 #define LENSTR(lpsz) (lpsz ? xstrlen(lpsz) : 0) 01892 01893 // If the element has no name then assume this is the head node. 01894 cbElement = (int)LENSTR(pEntry->lpszName); 01895 01896 if (cbElement) 01897 { 01898 // "<elementname " 01899 if (lpszMarker) 01900 { 01901 if (cb) charmemset(lpszMarker, INDENTCHAR, cb); 01902 nResult = cb; 01903 lpszMarker[nResult++]=_CXML('<'); 01904 if (pEntry->isDeclaration) lpszMarker[nResult++]=_CXML('?'); 01905 xstrcpy(&lpszMarker[nResult], pEntry->lpszName); 01906 nResult+=cbElement; 01907 lpszMarker[nResult++]=_CXML(' '); 01908 01909 } else 01910 { 01911 nResult+=cbElement+2+cb; 01912 if (pEntry->isDeclaration) nResult++; 01913 } 01914 01915 // Enumerate attributes and add them to the string 01916 XMLAttribute *pAttr=pEntry->pAttribute; 01917 for (i=0; i<pEntry->nAttribute; i++) 01918 { 01919 // "Attrib 01920 cb = (int)LENSTR(pAttr->lpszName); 01921 if (cb) 01922 { 01923 if (lpszMarker) xstrcpy(&lpszMarker[nResult], pAttr->lpszName); 01924 nResult += cb; 01925 // "Attrib=Value " 01926 if (pAttr->lpszValue) 01927 { 01928 cb=(int)ToXMLStringTool::lengthXMLString(pAttr->lpszValue); 01929 if (lpszMarker) 01930 { 01931 lpszMarker[nResult]=_CXML('='); 01932 lpszMarker[nResult+1]=_CXML('"'); 01933 if (cb) ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+2],pAttr->lpszValue); 01934 lpszMarker[nResult+cb+2]=_CXML('"'); 01935 } 01936 nResult+=cb+3; 01937 } 01938 if (lpszMarker) lpszMarker[nResult] = _CXML(' '); 01939 nResult++; 01940 } 01941 pAttr++; 01942 } 01943 01944 if (pEntry->isDeclaration) 01945 { 01946 if (lpszMarker) 01947 { 01948 lpszMarker[nResult-1]=_CXML('?'); 01949 lpszMarker[nResult]=_CXML('>'); 01950 } 01951 nResult++; 01952 if (nFormat!=-1) 01953 { 01954 if (lpszMarker) lpszMarker[nResult]=_CXML('\n'); 01955 nResult++; 01956 } 01957 } else 01958 // If there are child nodes we need to terminate the start tag 01959 if (nElementI) 01960 { 01961 if (lpszMarker) lpszMarker[nResult-1]=_CXML('>'); 01962 if (nFormat>=0) 01963 { 01964 if (lpszMarker) lpszMarker[nResult]=_CXML('\n'); 01965 nResult++; 01966 } 01967 } else nResult--; 01968 } 01969 01970 // Calculate the child format for when we recurse. This is used to 01971 // determine the number of spaces used for prefixes. 01972 if (nFormat!=-1) 01973 { 01974 if (cbElement&&(!pEntry->isDeclaration)) nChildFormat=nFormat+1; 01975 else nChildFormat=nFormat; 01976 } 01977 01978 // Enumerate through remaining children 01979 for (i=0; i<nElementI; i++) 01980 { 01981 j=pEntry->pOrder[i]; 01982 switch((XMLElementType)(j&3)) 01983 { 01984 // Text nodes 01985 case eNodeText: 01986 { 01987 // "Text" 01988 XMLCSTR pChild=pEntry->pText[j>>2]; 01989 cb = (int)ToXMLStringTool::lengthXMLString(pChild); 01990 if (cb) 01991 { 01992 if (nFormat>=0) 01993 { 01994 if (lpszMarker) 01995 { 01996 charmemset(&lpszMarker[nResult],INDENTCHAR,nFormat+1); 01997 ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+nFormat+1],pChild); 01998 lpszMarker[nResult+nFormat+1+cb]=_CXML('\n'); 01999 } 02000 nResult+=cb+nFormat+2; 02001 } else 02002 { 02003 if (lpszMarker) ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult], pChild); 02004 nResult += cb; 02005 } 02006 } 02007 break; 02008 } 02009 02010 // Clear type nodes 02011 case eNodeClear: 02012 { 02013 XMLClear *pChild=pEntry->pClear+(j>>2); 02014 // "OpenTag" 02015 cb = (int)LENSTR(pChild->lpszOpenTag); 02016 if (cb) 02017 { 02018 if (nFormat!=-1) 02019 { 02020 if (lpszMarker) 02021 { 02022 charmemset(&lpszMarker[nResult], INDENTCHAR, nFormat+1); 02023 xstrcpy(&lpszMarker[nResult+nFormat+1], pChild->lpszOpenTag); 02024 } 02025 nResult+=cb+nFormat+1; 02026 } 02027 else 02028 { 02029 if (lpszMarker)xstrcpy(&lpszMarker[nResult], pChild->lpszOpenTag); 02030 nResult += cb; 02031 } 02032 } 02033 02034 // "OpenTag Value" 02035 cb = (int)LENSTR(pChild->lpszValue); 02036 if (cb) 02037 { 02038 if (lpszMarker) xstrcpy(&lpszMarker[nResult], pChild->lpszValue); 02039 nResult += cb; 02040 } 02041 02042 // "OpenTag Value CloseTag" 02043 cb = (int)LENSTR(pChild->lpszCloseTag); 02044 if (cb) 02045 { 02046 if (lpszMarker) xstrcpy(&lpszMarker[nResult], pChild->lpszCloseTag); 02047 nResult += cb; 02048 } 02049 02050 if (nFormat!=-1) 02051 { 02052 if (lpszMarker) lpszMarker[nResult] = _CXML('\n'); 02053 nResult++; 02054 } 02055 break; 02056 } 02057 02058 // Element nodes 02059 case eNodeChild: 02060 { 02061 // Recursively add child nodes 02062 nResult += CreateXMLStringR(pEntry->pChild[j>>2].d, lpszMarker ? lpszMarker + nResult : 0, nChildFormat); 02063 break; 02064 } 02065 default: break; 02066 } 02067 } 02068 02069 if ((cbElement)&&(!pEntry->isDeclaration)) 02070 { 02071 // If we have child entries we need to use long XML notation for 02072 // closing the element - "<elementname>blah blah blah</elementname>" 02073 if (nElementI) 02074 { 02075 // "</elementname>\0" 02076 if (lpszMarker) 02077 { 02078 if (nFormat >=0) 02079 { 02080 charmemset(&lpszMarker[nResult], INDENTCHAR,nFormat); 02081 nResult+=nFormat; 02082 } 02083 02084 lpszMarker[nResult]=_CXML('<'); lpszMarker[nResult+1]=_CXML('/'); 02085 nResult += 2; 02086 xstrcpy(&lpszMarker[nResult], pEntry->lpszName); 02087 nResult += cbElement; 02088 02089 lpszMarker[nResult]=_CXML('>'); 02090 if (nFormat == -1) nResult++; 02091 else 02092 { 02093 lpszMarker[nResult+1]=_CXML('\n'); 02094 nResult+=2; 02095 } 02096 } else 02097 { 02098 if (nFormat>=0) nResult+=cbElement+4+nFormat; 02099 else if (nFormat==-1) nResult+=cbElement+3; 02100 else nResult+=cbElement+4; 02101 } 02102 } else 02103 { 02104 // If there are no children we can use shorthand XML notation - 02105 // "<elementname/>" 02106 // "/>\0" 02107 if (lpszMarker) 02108 { 02109 lpszMarker[nResult]=_CXML('/'); lpszMarker[nResult+1]=_CXML('>'); 02110 if (nFormat != -1) lpszMarker[nResult+2]=_CXML('\n'); 02111 } 02112 nResult += nFormat == -1 ? 2 : 3; 02113 } 02114 } 02115 02116 return nResult; 02117 } 02118 02119 #undef LENSTR 02120 02121 // Create an XML string 02122 // @param int nFormat - 0 if no formatting is required 02123 // otherwise nonzero for formatted text 02124 // with carriage returns and indentation. 02125 // @param int *pnSize - [out] pointer to the size of the 02126 // returned string not including the 02127 // NULL terminator. 02128 // @return XMLSTR - Allocated XML string, you must free 02129 // this with free(). 02130 XMLSTR XMLNode::createXMLString(int nFormat, int *pnSize) const 02131 { 02132 if (!d) { if (pnSize) *pnSize=0; return NULL; } 02133 02134 XMLSTR lpszResult = NULL; 02135 int cbStr; 02136 02137 // Recursively Calculate the size of the XML string 02138 if (!dropWhiteSpace) nFormat=0; 02139 nFormat = nFormat ? 0 : -1; 02140 cbStr = CreateXMLStringR(d, 0, nFormat); 02141 // Alllocate memory for the XML string + the NULL terminator and 02142 // create the recursively XML string. 02143 lpszResult=(XMLSTR)malloc((cbStr+1)*sizeof(XMLCHAR)); 02144 CreateXMLStringR(d, lpszResult, nFormat); 02145 lpszResult[cbStr]=_CXML('\0'); 02146 if (pnSize) *pnSize = cbStr; 02147 return lpszResult; 02148 } 02149 02150 int XMLNode::detachFromParent(XMLNodeData *d) 02151 { 02152 XMLNode *pa=d->pParent->pChild; 02153 int i=0; 02154 while (((void*)(pa[i].d))!=((void*)d)) i++; 02155 d->pParent->nChild--; 02156 if (d->pParent->nChild) memmove(pa+i,pa+i+1,(d->pParent->nChild-i)*sizeof(XMLNode)); 02157 else { free(pa); d->pParent->pChild=NULL; } 02158 return removeOrderElement(d->pParent,eNodeChild,i); 02159 } 02160 02161 XMLNode::~XMLNode() 02162 { 02163 if (!d) return; 02164 d->ref_count--; 02165 emptyTheNode(0); 02166 } 02167 void XMLNode::deleteNodeContent() 02168 { 02169 if (!d) return; 02170 if (d->pParent) { detachFromParent(d); d->pParent=NULL; d->ref_count--; } 02171 emptyTheNode(1); 02172 } 02173 void XMLNode::emptyTheNode(char force) 02174 { 02175 XMLNodeData *dd=d; // warning: must stay this way! 02176 if ((dd->ref_count==0)||force) 02177 { 02178 if (d->pParent) detachFromParent(d); 02179 int i; 02180 XMLNode *pc; 02181 for(i=0; i<dd->nChild; i++) 02182 { 02183 pc=dd->pChild+i; 02184 pc->d->pParent=NULL; 02185 pc->d->ref_count--; 02186 pc->emptyTheNode(force); 02187 } 02188 myFree(dd->pChild); 02189 for(i=0; i<dd->nText; i++) free((void*)dd->pText[i]); 02190 myFree(dd->pText); 02191 for(i=0; i<dd->nClear; i++) free((void*)dd->pClear[i].lpszValue); 02192 myFree(dd->pClear); 02193 for(i=0; i<dd->nAttribute; i++) 02194 { 02195 free((void*)dd->pAttribute[i].lpszName); 02196 if (dd->pAttribute[i].lpszValue) free((void*)dd->pAttribute[i].lpszValue); 02197 } 02198 myFree(dd->pAttribute); 02199 myFree(dd->pOrder); 02200 myFree((void*)dd->lpszName); 02201 dd->nChild=0; dd->nText=0; dd->nClear=0; dd->nAttribute=0; 02202 dd->pChild=NULL; dd->pText=NULL; dd->pClear=NULL; dd->pAttribute=NULL; 02203 dd->pOrder=NULL; dd->lpszName=NULL; dd->pParent=NULL; 02204 } 02205 if (dd->ref_count==0) 02206 { 02207 free(dd); 02208 d=NULL; 02209 } 02210 } 02211 02212 XMLNode& XMLNode::operator=( const XMLNode& A ) 02213 { 02214 // shallow copy 02215 if (this != &A) 02216 { 02217 if (d) { d->ref_count--; emptyTheNode(0); } 02218 d=A.d; 02219 if (d) (d->ref_count) ++ ; 02220 } 02221 return *this; 02222 } 02223 02224 XMLNode::XMLNode(const XMLNode &A) 02225 { 02226 // shallow copy 02227 d=A.d; 02228 if (d) (d->ref_count)++ ; 02229 } 02230 02231 XMLNode XMLNode::deepCopy() const 02232 { 02233 if (!d) return XMLNode::emptyXMLNode; 02234 XMLNode x(NULL,stringDup(d->lpszName),d->isDeclaration); 02235 XMLNodeData *p=x.d; 02236 int n=d->nAttribute; 02237 if (n) 02238 { 02239 p->nAttribute=n; p->pAttribute=(XMLAttribute*)malloc(n*sizeof(XMLAttribute)); 02240 while (n--) 02241 { 02242 p->pAttribute[n].lpszName=stringDup(d->pAttribute[n].lpszName); 02243 p->pAttribute[n].lpszValue=stringDup(d->pAttribute[n].lpszValue); 02244 } 02245 } 02246 if (d->pOrder) 02247 { 02248 n=(d->nChild+d->nText+d->nClear)*sizeof(int); p->pOrder=(int*)malloc(n); memcpy(p->pOrder,d->pOrder,n); 02249 } 02250 n=d->nText; 02251 if (n) 02252 { 02253 p->nText=n; p->pText=(XMLCSTR*)malloc(n*sizeof(XMLCSTR)); 02254 while(n--) p->pText[n]=stringDup(d->pText[n]); 02255 } 02256 n=d->nClear; 02257 if (n) 02258 { 02259 p->nClear=n; p->pClear=(XMLClear*)malloc(n*sizeof(XMLClear)); 02260 while (n--) 02261 { 02262 p->pClear[n].lpszCloseTag=d->pClear[n].lpszCloseTag; 02263 p->pClear[n].lpszOpenTag=d->pClear[n].lpszOpenTag; 02264 p->pClear[n].lpszValue=stringDup(d->pClear[n].lpszValue); 02265 } 02266 } 02267 n=d->nChild; 02268 if (n) 02269 { 02270 p->nChild=n; p->pChild=(XMLNode*)malloc(n*sizeof(XMLNode)); 02271 while (n--) 02272 { 02273 p->pChild[n].d=NULL; 02274 p->pChild[n]=d->pChild[n].deepCopy(); 02275 p->pChild[n].d->pParent=p; 02276 } 02277 } 02278 return x; 02279 } 02280 02281 XMLNode XMLNode::addChild(XMLNode childNode, int pos) 02282 { 02283 XMLNodeData *dc=childNode.d; 02284 if ((!dc)||(!d)) return childNode; 02285 if (!dc->lpszName) 02286 { 02287 // this is a root node: todo: correct fix 02288 int j=pos; 02289 while (dc->nChild) 02290 { 02291 addChild(dc->pChild[0],j); 02292 if (pos>=0) j++; 02293 } 02294 return childNode; 02295 } 02296 if (dc->pParent) { if ((detachFromParent(dc)<=pos)&&(dc->pParent==d)) pos--; } else dc->ref_count++; 02297 dc->pParent=d; 02298 // int nc=d->nChild; 02299 // d->pChild=(XMLNode*)myRealloc(d->pChild,(nc+1),memoryIncrease,sizeof(XMLNode)); 02300 d->pChild=(XMLNode*)addToOrder(0,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild); 02301 d->pChild[pos].d=dc; 02302 d->nChild++; 02303 return childNode; 02304 } 02305 02306 void XMLNode::deleteAttribute(int i) 02307 { 02308 if ((!d)||(i<0)||(i>=d->nAttribute)) return; 02309 d->nAttribute--; 02310 XMLAttribute *p=d->pAttribute+i; 02311 free((void*)p->lpszName); 02312 if (p->lpszValue) free((void*)p->lpszValue); 02313 if (d->nAttribute) memmove(p,p+1,(d->nAttribute-i)*sizeof(XMLAttribute)); else { free(p); d->pAttribute=NULL; } 02314 } 02315 02316 void XMLNode::deleteAttribute(XMLAttribute *a){ if (a) deleteAttribute(a->lpszName); } 02317 void XMLNode::deleteAttribute(XMLCSTR lpszName) 02318 { 02319 int j=0; 02320 getAttribute(lpszName,&j); 02321 if (j) deleteAttribute(j-1); 02322 } 02323 02324 XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,int i) 02325 { 02326 if (!d) { if (lpszNewValue) free(lpszNewValue); if (lpszNewName) free(lpszNewName); return NULL; } 02327 if (i>=d->nAttribute) 02328 { 02329 if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue); 02330 return NULL; 02331 } 02332 XMLAttribute *p=d->pAttribute+i; 02333 if (p->lpszValue&&p->lpszValue!=lpszNewValue) free((void*)p->lpszValue); 02334 p->lpszValue=lpszNewValue; 02335 if (lpszNewName&&p->lpszName!=lpszNewName) { free((void*)p->lpszName); p->lpszName=lpszNewName; }; 02336 return p; 02337 } 02338 02339 XMLAttribute *XMLNode::updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute) 02340 { 02341 if (oldAttribute) return updateAttribute_WOSD((XMLSTR)newAttribute->lpszValue,(XMLSTR)newAttribute->lpszName,oldAttribute->lpszName); 02342 return addAttribute_WOSD((XMLSTR)newAttribute->lpszName,(XMLSTR)newAttribute->lpszValue); 02343 } 02344 02345 XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,XMLCSTR lpszOldName) 02346 { 02347 int j=0; 02348 getAttribute(lpszOldName,&j); 02349 if (j) return updateAttribute_WOSD(lpszNewValue,lpszNewName,j-1); 02350 else 02351 { 02352 if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue); 02353 else return addAttribute_WOSD(stringDup(lpszOldName),lpszNewValue); 02354 } 02355 } 02356 02357 int XMLNode::indexText(XMLCSTR lpszValue) const 02358 { 02359 if (!d) return -1; 02360 int i,l=d->nText; 02361 if (!lpszValue) { if (l) return 0; return -1; } 02362 XMLCSTR *p=d->pText; 02363 for (i=0; i<l; i++) if (lpszValue==p[i]) return i; 02364 return -1; 02365 } 02366 02367 void XMLNode::deleteText(int i) 02368 { 02369 if ((!d)||(i<0)||(i>=d->nText)) return; 02370 d->nText--; 02371 XMLCSTR *p=d->pText+i; 02372 free((void*)*p); 02373 if (d->nText) memmove(p,p+1,(d->nText-i)*sizeof(XMLCSTR)); else { free(p); d->pText=NULL; } 02374 removeOrderElement(d,eNodeText,i); 02375 } 02376 02377 void XMLNode::deleteText(XMLCSTR lpszValue) { deleteText(indexText(lpszValue)); } 02378 02379 XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, int i) 02380 { 02381 if (!d) { if (lpszNewValue) free(lpszNewValue); return NULL; } 02382 if (i>=d->nText) return addText_WOSD(lpszNewValue); 02383 XMLCSTR *p=d->pText+i; 02384 if (*p!=lpszNewValue) { free((void*)*p); *p=lpszNewValue; } 02385 return lpszNewValue; 02386 } 02387 02388 XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue) 02389 { 02390 if (!d) { if (lpszNewValue) free(lpszNewValue); return NULL; } 02391 int i=indexText(lpszOldValue); 02392 if (i>=0) return updateText_WOSD(lpszNewValue,i); 02393 return addText_WOSD(lpszNewValue); 02394 } 02395 02396 void XMLNode::deleteClear(int i) 02397 { 02398 if ((!d)||(i<0)||(i>=d->nClear)) return; 02399 d->nClear--; 02400 XMLClear *p=d->pClear+i; 02401 free((void*)p->lpszValue); 02402 if (d->nClear) memmove(p,p+1,(d->nClear-i)*sizeof(XMLClear)); else { free(p); d->pClear=NULL; } 02403 removeOrderElement(d,eNodeClear,i); 02404 } 02405 02406 int XMLNode::indexClear(XMLCSTR lpszValue) const 02407 { 02408 if (!d) return -1; 02409 int i,l=d->nClear; 02410 if (!lpszValue) { if (l) return 0; return -1; } 02411 XMLClear *p=d->pClear; 02412 for (i=0; i<l; i++) if (lpszValue==p[i].lpszValue) return i; 02413 return -1; 02414 } 02415 02416 void XMLNode::deleteClear(XMLCSTR lpszValue) { deleteClear(indexClear(lpszValue)); } 02417 void XMLNode::deleteClear(XMLClear *a) { if (a) deleteClear(a->lpszValue); } 02418 02419 XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, int i) 02420 { 02421 if (!d) { if (lpszNewContent) free(lpszNewContent); return NULL; } 02422 if (i>=d->nClear) return addClear_WOSD(lpszNewContent); 02423 XMLClear *p=d->pClear+i; 02424 if (lpszNewContent!=p->lpszValue) { free((void*)p->lpszValue); p->lpszValue=lpszNewContent; } 02425 return p; 02426 } 02427 02428 XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, XMLCSTR lpszOldValue) 02429 { 02430 if (!d) { if (lpszNewContent) free(lpszNewContent); return NULL; } 02431 int i=indexClear(lpszOldValue); 02432 if (i>=0) return updateClear_WOSD(lpszNewContent,i); 02433 return addClear_WOSD(lpszNewContent); 02434 } 02435 02436 XMLClear *XMLNode::updateClear_WOSD(XMLClear *newP,XMLClear *oldP) 02437 { 02438 if (oldP) return updateClear_WOSD((XMLSTR)newP->lpszValue,(XMLSTR)oldP->lpszValue); 02439 return NULL; 02440 } 02441 02442 int XMLNode::nChildNode(XMLCSTR name) const 02443 { 02444 if (!d) return 0; 02445 int i,j=0,n=d->nChild; 02446 XMLNode *pc=d->pChild; 02447 for (i=0; i<n; i++) 02448 { 02449 if (xstricmp(pc->d->lpszName, name)==0) j++; 02450 pc++; 02451 } 02452 return j; 02453 } 02454 02455 XMLNode XMLNode::getChildNode(XMLCSTR name, int *j) const 02456 { 02457 if (!d) return emptyXMLNode; 02458 int i=0,n=d->nChild; 02459 if (j) i=*j; 02460 XMLNode *pc=d->pChild+i; 02461 for (; i<n; i++) 02462 { 02463 if (!xstricmp(pc->d->lpszName, name)) 02464 { 02465 if (j) *j=i+1; 02466 return *pc; 02467 } 02468 pc++; 02469 } 02470 return emptyXMLNode; 02471 } 02472 02473 XMLNode XMLNode::getChildNode(XMLCSTR name, int j) const 02474 { 02475 if (!d) return emptyXMLNode; 02476 if (j>=0) 02477 { 02478 int i=0; 02479 while (j-->0) getChildNode(name,&i); 02480 return getChildNode(name,&i); 02481 } 02482 int i=d->nChild; 02483 while (i--) if (!xstricmp(name,d->pChild[i].d->lpszName)) break; 02484 if (i<0) return emptyXMLNode; 02485 return getChildNode(i); 02486 } 02487 02488 XMLNode XMLNode::getChildNodeByPath(XMLCSTR _path, char createMissing, XMLCHAR sep) 02489 { 02490 XMLSTR path=stringDup(_path); 02491 XMLNode x=getChildNodeByPathNonConst(path,createMissing,sep); 02492 if (path) free(path); 02493 return x; 02494 } 02495 02496 XMLNode XMLNode::getChildNodeByPathNonConst(XMLSTR path, char createIfMissing, XMLCHAR sep) 02497 { 02498 if ((!path)||(!(*path))) return *this; 02499 XMLNode xn,xbase=*this; 02500 XMLCHAR *tend1,sepString[2]; sepString[0]=sep; sepString[1]=0; 02501 tend1=xstrstr(path,sepString); 02502 while(tend1) 02503 { 02504 *tend1=0; 02505 xn=xbase.getChildNode(path); 02506 if (xn.isEmpty()) 02507 { 02508 if (createIfMissing) xn=xbase.addChild(path); 02509 else { *tend1=sep; return XMLNode::emptyXMLNode; } 02510 } 02511 *tend1=sep; 02512 xbase=xn; 02513 path=tend1+1; 02514 tend1=xstrstr(path,sepString); 02515 } 02516 xn=xbase.getChildNode(path); 02517 if (xn.isEmpty()&&createIfMissing) xn=xbase.addChild(path); 02518 return xn; 02519 } 02520 02521 XMLElementPosition XMLNode::positionOfText (int i) const { if (i>=d->nText ) i=d->nText-1; return findPosition(d,i,eNodeText ); } 02522 XMLElementPosition XMLNode::positionOfClear (int i) const { if (i>=d->nClear) i=d->nClear-1; return findPosition(d,i,eNodeClear); } 02523 XMLElementPosition XMLNode::positionOfChildNode(int i) const { if (i>=d->nChild) i=d->nChild-1; return findPosition(d,i,eNodeChild); } 02524 XMLElementPosition XMLNode::positionOfText (XMLCSTR lpszValue) const { return positionOfText (indexText (lpszValue)); } 02525 XMLElementPosition XMLNode::positionOfClear(XMLCSTR lpszValue) const { return positionOfClear(indexClear(lpszValue)); } 02526 XMLElementPosition XMLNode::positionOfClear(XMLClear *a) const { if (a) return positionOfClear(a->lpszValue); return positionOfClear(); } 02527 XMLElementPosition XMLNode::positionOfChildNode(XMLNode x) const 02528 { 02529 if ((!d)||(!x.d)) return -1; 02530 XMLNodeData *dd=x.d; 02531 XMLNode *pc=d->pChild; 02532 int i=d->nChild; 02533 while (i--) if (pc[i].d==dd) return findPosition(d,i,eNodeChild); 02534 return -1; 02535 } 02536 XMLElementPosition XMLNode::positionOfChildNode(XMLCSTR name, int count) const 02537 { 02538 if (!name) return positionOfChildNode(count); 02539 int j=0; 02540 do { getChildNode(name,&j); if (j<0) return -1; } while (count--); 02541 return findPosition(d,j-1,eNodeChild); 02542 } 02543 02544 XMLNode XMLNode::getChildNodeWithAttribute(XMLCSTR name,XMLCSTR attributeName,XMLCSTR attributeValue, int *k) const 02545 { 02546 int i=0,j; 02547 if (k) i=*k; 02548 XMLNode x; 02549 XMLCSTR t; 02550 do 02551 { 02552 x=getChildNode(name,&i); 02553 if (!x.isEmpty()) 02554 { 02555 if (attributeValue) 02556 { 02557 j=0; 02558 do 02559 { 02560 t=x.getAttribute(attributeName,&j); 02561 if (t&&(xstricmp(attributeValue,t)==0)) { if (k) *k=i; return x; } 02562 } while (t); 02563 } else 02564 { 02565 if (x.isAttributeSet(attributeName)) { if (k) *k=i; return x; } 02566 } 02567 } 02568 } while (!x.isEmpty()); 02569 return emptyXMLNode; 02570 } 02571 02572 // Find an attribute on an node. 02573 XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j) const 02574 { 02575 if (!d) return NULL; 02576 int i=0,n=d->nAttribute; 02577 if (j) i=*j; 02578 XMLAttribute *pAttr=d->pAttribute+i; 02579 for (; i<n; i++) 02580 { 02581 if (xstricmp(pAttr->lpszName, lpszAttrib)==0) 02582 { 02583 if (j) *j=i+1; 02584 return pAttr->lpszValue; 02585 } 02586 pAttr++; 02587 } 02588 return NULL; 02589 } 02590 02591 char XMLNode::isAttributeSet(XMLCSTR lpszAttrib) const 02592 { 02593 if (!d) return FALSE; 02594 int i,n=d->nAttribute; 02595 XMLAttribute *pAttr=d->pAttribute; 02596 for (i=0; i<n; i++) 02597 { 02598 if (xstricmp(pAttr->lpszName, lpszAttrib)==0) 02599 { 02600 return TRUE; 02601 } 02602 pAttr++; 02603 } 02604 return FALSE; 02605 } 02606 02607 XMLCSTR XMLNode::getAttribute(XMLCSTR name, int j) const 02608 { 02609 if (!d) return NULL; 02610 int i=0; 02611 while (j-->0) getAttribute(name,&i); 02612 return getAttribute(name,&i); 02613 } 02614 02615 XMLNodeContents XMLNode::enumContents(int i) const 02616 { 02617 XMLNodeContents c; 02618 if (!d) { c.etype=eNodeNULL; return c; } 02619 if (i<d->nAttribute) 02620 { 02621 c.etype=eNodeAttribute; 02622 c.attrib=d->pAttribute[i]; 02623 return c; 02624 } 02625 i-=d->nAttribute; 02626 c.etype=(XMLElementType)(d->pOrder[i]&3); 02627 i=(d->pOrder[i])>>2; 02628 switch (c.etype) 02629 { 02630 case eNodeChild: c.child = d->pChild[i]; break; 02631 case eNodeText: c.text = d->pText[i]; break; 02632 case eNodeClear: c.clear = d->pClear[i]; break; 02633 default: break; 02634 } 02635 return c; 02636 } 02637 02638 XMLCSTR XMLNode::getName() const { if (!d) return NULL; return d->lpszName; } 02639 int XMLNode::nText() const { if (!d) return 0; return d->nText; } 02640 int XMLNode::nChildNode() const { if (!d) return 0; return d->nChild; } 02641 int XMLNode::nAttribute() const { if (!d) return 0; return d->nAttribute; } 02642 int XMLNode::nClear() const { if (!d) return 0; return d->nClear; } 02643 int XMLNode::nElement() const { if (!d) return 0; return d->nAttribute+d->nChild+d->nText+d->nClear; } 02644 XMLClear XMLNode::getClear (int i) const { if ((!d)||(i>=d->nClear )) return emptyXMLClear; return d->pClear[i]; } 02645 XMLAttribute XMLNode::getAttribute (int i) const { if ((!d)||(i>=d->nAttribute)) return emptyXMLAttribute; return d->pAttribute[i]; } 02646 XMLCSTR XMLNode::getAttributeName (int i) const { if ((!d)||(i>=d->nAttribute)) return NULL; return d->pAttribute[i].lpszName; } 02647 XMLCSTR XMLNode::getAttributeValue(int i) const { if ((!d)||(i>=d->nAttribute)) return NULL; return d->pAttribute[i].lpszValue; } 02648 XMLCSTR XMLNode::getText (int i) const { if ((!d)||(i>=d->nText )) return NULL; return d->pText[i]; } 02649 XMLNode XMLNode::getChildNode (int i) const { if ((!d)||(i>=d->nChild )) return emptyXMLNode; return d->pChild[i]; } 02650 XMLNode XMLNode::getParentNode ( ) const { if ((!d)||(!d->pParent )) return emptyXMLNode; return XMLNode(d->pParent); } 02651 char XMLNode::isDeclaration ( ) const { if (!d) return 0; return d->isDeclaration; } 02652 char XMLNode::isEmpty ( ) const { return (d==NULL); } 02653 XMLNode XMLNode::emptyNode ( ) { return XMLNode::emptyXMLNode; } 02654 02655 XMLNode XMLNode::addChild(XMLCSTR lpszName, char isDeclaration, XMLElementPosition pos) 02656 { return addChild_priv(0,stringDup(lpszName),isDeclaration,pos); } 02657 XMLNode XMLNode::addChild_WOSD(XMLSTR lpszName, char isDeclaration, XMLElementPosition pos) 02658 { return addChild_priv(0,lpszName,isDeclaration,pos); } 02659 XMLAttribute *XMLNode::addAttribute(XMLCSTR lpszName, XMLCSTR lpszValue) 02660 { return addAttribute_priv(0,stringDup(lpszName),stringDup(lpszValue)); } 02661 XMLAttribute *XMLNode::addAttribute_WOSD(XMLSTR lpszName, XMLSTR lpszValuev) 02662 { return addAttribute_priv(0,lpszName,lpszValuev); } 02663 XMLCSTR XMLNode::addText(XMLCSTR lpszValue, XMLElementPosition pos) 02664 { return addText_priv(0,stringDup(lpszValue),pos); } 02665 XMLCSTR XMLNode::addText_WOSD(XMLSTR lpszValue, XMLElementPosition pos) 02666 { return addText_priv(0,lpszValue,pos); } 02667 XMLClear *XMLNode::addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, XMLElementPosition pos) 02668 { return addClear_priv(0,stringDup(lpszValue),lpszOpen,lpszClose,pos); } 02669 XMLClear *XMLNode::addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, XMLElementPosition pos) 02670 { return addClear_priv(0,lpszValue,lpszOpen,lpszClose,pos); } 02671 XMLCSTR XMLNode::updateName(XMLCSTR lpszName) 02672 { return updateName_WOSD(stringDup(lpszName)); } 02673 XMLAttribute *XMLNode::updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute) 02674 { return updateAttribute_WOSD(stringDup(newAttribute->lpszValue),stringDup(newAttribute->lpszName),oldAttribute->lpszName); } 02675 XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,int i) 02676 { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),i); } 02677 XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName) 02678 { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),lpszOldName); } 02679 XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, int i) 02680 { return updateText_WOSD(stringDup(lpszNewValue),i); } 02681 XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) 02682 { return updateText_WOSD(stringDup(lpszNewValue),lpszOldValue); } 02683 XMLClear *XMLNode::updateClear(XMLCSTR lpszNewContent, int i) 02684 { return updateClear_WOSD(stringDup(lpszNewContent),i); } 02685 XMLClear *XMLNode::updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) 02686 { return updateClear_WOSD(stringDup(lpszNewValue),lpszOldValue); } 02687 XMLClear *XMLNode::updateClear(XMLClear *newP,XMLClear *oldP) 02688 { return updateClear_WOSD(stringDup(newP->lpszValue),oldP->lpszValue); } 02689 02690 char XMLNode::setGlobalOptions(XMLCharEncoding _characterEncoding, char _guessWideCharChars, 02691 char _dropWhiteSpace, char _removeCommentsInMiddleOfText) 02692 { 02693 guessWideCharChars=_guessWideCharChars; dropWhiteSpace=_dropWhiteSpace; removeCommentsInMiddleOfText=_removeCommentsInMiddleOfText; 02694 #ifdef _XMLWIDECHAR 02695 if (_characterEncoding) characterEncoding=_characterEncoding; 02696 #else 02697 switch(_characterEncoding) 02698 { 02699 case char_encoding_UTF8: characterEncoding=_characterEncoding; XML_ByteTable=XML_utf8ByteTable; break; 02700 case char_encoding_legacy: characterEncoding=_characterEncoding; XML_ByteTable=XML_legacyByteTable; break; 02701 case char_encoding_ShiftJIS: characterEncoding=_characterEncoding; XML_ByteTable=XML_sjisByteTable; break; 02702 case char_encoding_GB2312: characterEncoding=_characterEncoding; XML_ByteTable=XML_gb2312ByteTable; break; 02703 case char_encoding_Big5: 02704 case char_encoding_GBK: characterEncoding=_characterEncoding; XML_ByteTable=XML_gbk_big5_ByteTable; break; 02705 default: return 1; 02706 } 02707 #endif 02708 return 0; 02709 } 02710 02711 XMLNode::XMLCharEncoding XMLNode::guessCharEncoding(void *buf,int l, char useXMLEncodingAttribute) 02712 { 02713 #ifdef _XMLWIDECHAR 02714 return (XMLCharEncoding)0; 02715 #else 02716 if (l<25) return (XMLCharEncoding)0; 02717 if (guessWideCharChars&&(myIsTextWideChar(buf,l))) return (XMLCharEncoding)0; 02718 unsigned char *b=(unsigned char*)buf; 02719 if ((b[0]==0xef)&&(b[1]==0xbb)&&(b[2]==0xbf)) return char_encoding_UTF8; 02720 02721 // Match utf-8 model ? 02722 XMLCharEncoding bestGuess=char_encoding_UTF8; 02723 int i=0; 02724 while (i<l) 02725 switch (XML_utf8ByteTable[b[i]]) 02726 { 02727 case 4: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) { bestGuess=char_encoding_legacy; i=l; } // 10bbbbbb ? 02728 case 3: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) { bestGuess=char_encoding_legacy; i=l; } // 10bbbbbb ? 02729 case 2: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) { bestGuess=char_encoding_legacy; i=l; } // 10bbbbbb ? 02730 case 1: i++; break; 02731 case 0: i=l; 02732 } 02733 if (!useXMLEncodingAttribute) return bestGuess; 02734 // if encoding is specified and different from utf-8 than it's non-utf8 02735 // otherwise it's utf-8 02736 char bb[201]; 02737 l=mmin(l,200); 02738 memcpy(bb,buf,l); // copy buf into bb to be able to do "bb[l]=0" 02739 bb[l]=0; 02740 b=(unsigned char*)strstr(bb,"encoding"); 02741 if (!b) return bestGuess; 02742 b+=8; while XML_isSPACECHAR(*b) b++; if (*b!='=') return bestGuess; 02743 b++; while XML_isSPACECHAR(*b) b++; if ((*b!='\'')&&(*b!='"')) return bestGuess; 02744 b++; while XML_isSPACECHAR(*b) b++; 02745 02746 if ((xstrnicmp((char*)b,"utf-8",5)==0)|| 02747 (xstrnicmp((char*)b,"utf8",4)==0)) 02748 { 02749 if (bestGuess==char_encoding_legacy) return char_encoding_error; 02750 return char_encoding_UTF8; 02751 } 02752 02753 if ((xstrnicmp((char*)b,"shiftjis",8)==0)|| 02754 (xstrnicmp((char*)b,"shift-jis",9)==0)|| 02755 (xstrnicmp((char*)b,"sjis",4)==0)) return char_encoding_ShiftJIS; 02756 02757 if (xstrnicmp((char*)b,"GB2312",6)==0) return char_encoding_GB2312; 02758 if (xstrnicmp((char*)b,"Big5",4)==0) return char_encoding_Big5; 02759 if (xstrnicmp((char*)b,"GBK",3)==0) return char_encoding_GBK; 02760 02761 return char_encoding_legacy; 02762 #endif 02763 } 02764 #undef XML_isSPACECHAR 02765 02767 // Here starts the base64 conversion functions. // 02769 02770 static const char base64Fillchar = _CXML('='); // used to mark partial words at the end 02771 02772 // this lookup table defines the base64 encoding 02773 XMLCSTR base64EncodeTable=_CXML("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"); 02774 02775 // Decode Table gives the index of any valid base64 character in the Base64 table] 02776 // 96: '=' - 97: space char - 98: illegal char - 99: end of string 02777 const unsigned char base64DecodeTable[] = { 02778 99,98,98,98,98,98,98,98,98,97, 97,98,98,97,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //00 -29 02779 98,98,97,98,98,98,98,98,98,98, 98,98,98,62,98,98,98,63,52,53, 54,55,56,57,58,59,60,61,98,98, //30 -59 02780 98,96,98,98,98, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, 15,16,17,18,19,20,21,22,23,24, //60 -89 02781 25,98,98,98,98,98,98,26,27,28, 29,30,31,32,33,34,35,36,37,38, 39,40,41,42,43,44,45,46,47,48, //90 -119 02782 49,50,51,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //120 -149 02783 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //150 -179 02784 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //180 -209 02785 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //210 -239 02786 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98 //240 -255 02787 }; 02788 02789 XMLParserBase64Tool::~XMLParserBase64Tool(){ freeBuffer(); } 02790 02791 void XMLParserBase64Tool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; } 02792 02793 int XMLParserBase64Tool::encodeLength(int inlen, char formatted) 02794 { 02795 unsigned int i=((inlen-1)/3*4+4+1); 02796 if (formatted) i+=inlen/54; 02797 return i; 02798 } 02799 02800 XMLSTR XMLParserBase64Tool::encode(unsigned char *inbuf, unsigned int inlen, char formatted) 02801 { 02802 int i=encodeLength(inlen,formatted),k=17,eLen=inlen/3,j; 02803 alloc(i*sizeof(XMLCHAR)); 02804 XMLSTR curr=(XMLSTR)buf; 02805 for(i=0;i<eLen;i++) 02806 { 02807 // Copy next three bytes into lower 24 bits of int, paying attention to sign. 02808 j=(inbuf[0]<<16)|(inbuf[1]<<8)|inbuf[2]; inbuf+=3; 02809 // Encode the int into four chars 02810 *(curr++)=base64EncodeTable[ j>>18 ]; 02811 *(curr++)=base64EncodeTable[(j>>12)&0x3f]; 02812 *(curr++)=base64EncodeTable[(j>> 6)&0x3f]; 02813 *(curr++)=base64EncodeTable[(j )&0x3f]; 02814 if (formatted) { if (!k) { *(curr++)=_CXML('\n'); k=18; } k--; } 02815 } 02816 eLen=inlen-eLen*3; // 0 - 2. 02817 if (eLen==1) 02818 { 02819 *(curr++)=base64EncodeTable[ inbuf[0]>>2 ]; 02820 *(curr++)=base64EncodeTable[(inbuf[0]<<4)&0x3F]; 02821 *(curr++)=base64Fillchar; 02822 *(curr++)=base64Fillchar; 02823 } else if (eLen==2) 02824 { 02825 j=(inbuf[0]<<8)|inbuf[1]; 02826 *(curr++)=base64EncodeTable[ j>>10 ]; 02827 *(curr++)=base64EncodeTable[(j>> 4)&0x3f]; 02828 *(curr++)=base64EncodeTable[(j<< 2)&0x3f]; 02829 *(curr++)=base64Fillchar; 02830 } 02831 *(curr++)=0; 02832 return (XMLSTR)buf; 02833 } 02834 02835 unsigned int XMLParserBase64Tool::decodeSize(XMLCSTR data,XMLError *xe) 02836 { 02837 if (!data) return 0; 02838 if (xe) *xe=eXMLErrorNone; 02839 int size=0; 02840 unsigned char c; 02841 //skip any extra characters (e.g. newlines or spaces) 02842 while (*data) 02843 { 02844 #ifdef _XMLWIDECHAR 02845 if (*data>255) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } 02846 #endif 02847 c=base64DecodeTable[(unsigned char)(*data)]; 02848 if (c<97) size++; 02849 else if (c==98) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } 02850 data++; 02851 } 02852 if (xe&&(size%4!=0)) *xe=eXMLErrorBase64DataSizeIsNotMultipleOf4; 02853 if (size==0) return 0; 02854 do { data--; size--; } while(*data==base64Fillchar); size++; 02855 return (unsigned int)((size*3)/4); 02856 } 02857 02858 unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, int len, XMLError *xe) 02859 { 02860 if (!data) return 0; 02861 if (xe) *xe=eXMLErrorNone; 02862 int i=0,p=0; 02863 unsigned char d,c; 02864 for(;;) 02865 { 02866 02867 #ifdef _XMLWIDECHAR 02868 #define BASE64DECODE_READ_NEXT_CHAR(c) \ 02869 do { \ 02870 if (data[i]>255){ c=98; break; } \ 02871 c=base64DecodeTable[(unsigned char)data[i++]]; \ 02872 }while (c==97); \ 02873 if(c==98){ if(xe)*xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } 02874 #else 02875 #define BASE64DECODE_READ_NEXT_CHAR(c) \ 02876 do { c=base64DecodeTable[(unsigned char)data[i++]]; }while (c==97); \ 02877 if(c==98){ if(xe)*xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } 02878 #endif 02879 02880 BASE64DECODE_READ_NEXT_CHAR(c) 02881 if (c==99) { return 2; } 02882 if (c==96) 02883 { 02884 if (p==(int)len) return 2; 02885 if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; 02886 return 1; 02887 } 02888 02889 BASE64DECODE_READ_NEXT_CHAR(d) 02890 if ((d==99)||(d==96)) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } 02891 if (p==(int)len) { if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; return 0; } 02892 buf[p++]=(unsigned char)((c<<2)|((d>>4)&0x3)); 02893 02894 BASE64DECODE_READ_NEXT_CHAR(c) 02895 if (c==99) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } 02896 if (p==(int)len) 02897 { 02898 if (c==96) return 2; 02899 if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; 02900 return 0; 02901 } 02902 if (c==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } 02903 buf[p++]=(unsigned char)(((d<<4)&0xf0)|((c>>2)&0xf)); 02904 02905 BASE64DECODE_READ_NEXT_CHAR(d) 02906 if (d==99 ) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } 02907 if (p==(int)len) 02908 { 02909 if (d==96) return 2; 02910 if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; 02911 return 0; 02912 } 02913 if (d==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } 02914 buf[p++]=(unsigned char)(((c<<6)&0xc0)|d); 02915 } 02916 } 02917 #undef BASE64DECODE_READ_NEXT_CHAR 02918 02919 void XMLParserBase64Tool::alloc(int newsize) 02920 { 02921 if ((!buf)&&(newsize)) { buf=malloc(newsize); buflen=newsize; return; } 02922 if (newsize>buflen) { buf=realloc(buf,newsize); buflen=newsize; } 02923 } 02924 02925 unsigned char *XMLParserBase64Tool::decode(XMLCSTR data, int *outlen, XMLError *xe) 02926 { 02927 if (xe) *xe=eXMLErrorNone; 02928 if (!data) { *outlen=0; return (unsigned char*)""; } 02929 unsigned int len=decodeSize(data,xe); 02930 if (outlen) *outlen=len; 02931 if (!len) return NULL; 02932 alloc(len+1); 02933 if(!decode(data,(unsigned char*)buf,len,xe)){ return NULL; } 02934 return (unsigned char*)buf; 02935 } 02936