00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include <inttypes.h>
00029 #include <wchar.h>
00030
00031 #pragma once
00032 #ifndef __CSXML_H__
00033 #define __CSXML_H__
00034
00035 #define CS_XML_ERR_OUT_OF_MEMORY -1
00036 #define CS_XML_ERR_CHAR_TOO_LARGE -2
00037
00038 #define CS_XML_EOF_CHAR WCHAR_MAX
00039
00040 #define CS_MAX_XML_NAME_SIZE 48
00041 #define CS_XML_ERR_MSG_SIZE 128
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058 #define XML_BEFORE_CDATA 0
00059 #define XML_IN_CDATA 1
00060
00061 #define XML_LT 2
00062 #define XML_LT_BANG 3
00063 #define XML_LT_BANG_DASH 4
00064 #define XML_LT_BANG_SQR 5
00065 #define XML_LT_BANG_SQR_IN_NAME 6
00066 #define XML_LT_BANG_SQR_AFTER_NAME 7
00067
00068 #define XML_IN_TAG_NAME 8
00069
00070 #define XML_BEFORE_ATTR 9
00071 #define XML_IN_ATTR 10
00072
00073 #define XML_BEFORE_EQUAL 11
00074 #define XML_AFTER_EQUAL 12
00075
00076 #define XML_QUOTE_BEFORE_VALUE 13
00077 #define XML_IN_VALUE 14
00078 #define XML_QUOTE_AFTER_VALUE 15
00079
00080 #define XML_SLASH 16
00081 #define XML_QMARK 17
00082 #define XML_SQR 18
00083
00084 #define XML_IN_COMMENT 19
00085 #define XML_IN_COMMENT_DASH 20
00086 #define XML_IN_COMMENT_DASH_DASH 21
00087 #define XML_IN_COMMENT_3_DASH 22
00088
00089 #define XML_IN_CDATA_TAG 23
00090 #define XML_IN_CDATA_TAG_SQR 24
00091 #define XML_IN_CDATA_TAG_SQR_SQR 25
00092 #define XML_IN_CDATA_TAG_3_SQR 26
00093
00094 #define PARSE_BUFFER_SIZE 20
00095 #define PARSE_STACK_SIZE 200
00096
00097 #define END_TAG_TYPE(x) (x->nesting-1 < PARSE_STACK_SIZE ? x->end_type[x->nesting-1] : XML_OP_1_END_UNKNOWN_TAG)
00098
00099 #define TO_LONG_CHAR(ch) ((unsigned char) (ch))
00100
00101 #define XML_STEP_NONE 0
00102 #define XML_STEP_TAG 1
00103 #define XML_STEP_ATTR 2
00104 #define XML_STEP_VALUE 3
00105 #define XML_STEP_NESTED 4
00106
00107 class CSXMLParser {
00108 public:
00109 CSXMLParser() :
00110 state(0),
00111 quote(0),
00112 step(0),
00113 type(0),
00114 count(0),
00115 nesting(0) {
00116 }
00117 virtual ~CSXMLParser() { }
00118
00119 int32_t parseChar(wchar_t ch);
00120 void setDataType(int32_t t) { type = t; }
00121 int32_t getDataLen() { return count; }
00122 wchar_t *getDataPtr() { return buffer; }
00123
00124 private:
00125
00126 int32_t state;
00127 int32_t quote;
00128 int32_t step;
00129
00130
00131 int32_t type;
00132 int32_t count;
00133 wchar_t buffer[PARSE_BUFFER_SIZE];
00134
00135
00136 int32_t nesting;
00137 uint8_t end_type[PARSE_STACK_SIZE];
00138
00139 bool match_string(const char *ch);
00140 void increment_nesting(wchar_t ch);
00141 };
00142
00143 #define XML_OP_1_MASK 0x0000000F
00144 #define XML_ERROR 0x00001000
00145
00146 #define XML_OP_1_NOOP 0x00000000
00147 #define XML_OP_1_END_TAG 0x00000001
00148 #define XML_OP_1_END_CLOSE_TAG 0x00000002
00149 #define XML_OP_1_END_EMPTY_TAG 0x00000003
00150 #define XML_OP_1_END_PI_TAG 0x00000004
00151 #define XML_OP_1_END_ENTITY_TAG 0x00000005
00152 #define XML_OP_1_END_BRACKET_TAG 0x00000006
00153 #define XML_OP_1_END_UNKNOWN_TAG 0x00000007
00154 #define XML_OP_1_START_CDATA_TAG 0x00000008
00155 #define XML_OP_1_START_COMMENT 0x00000009
00156 #define XML_OP_1_START_TAG 0x0000000A
00157 #define XML_OP_1_ADD_ATTR 0x0000000B
00158 #define XML_OP_1_END_CDATA 0x0000000C
00159 #define XML_OP_1_END_CDATA_TAG 0x0000000D
00160 #define XML_OP_1_END_COMMENT 0x0000000E
00161
00162 #define XML_DATA_MASK 0x000000F0
00163
00164 #define XML_NO_DATA 0x00000000
00165 #define XML_DATA_TAG 0x00000010
00166 #define XML_DATA_ATTR 0x00000020
00167 #define XML_DATA_CDATA 0x00000030
00168 #define XML_DATA_CDATA_TAG 0x00000040
00169 #define XML_COMMENT 0x00000050
00170 #define XML_DATA_VALUE 0x00000060
00171
00172 #define XML_OP_2_MASK 0x00000F00
00173
00174 #define XML_OP_2_NOOP 0x00000000
00175 #define XML_OP_2_END_TAG 0x00000100
00176 #define XML_OP_2_END_CLOSE_TAG 0x00000200
00177 #define XML_OP_2_END_EMPTY_TAG 0x00000300
00178 #define XML_OP_2_END_PI_TAG 0x00000400
00179 #define XML_OP_2_END_ENTITY_TAG 0x00000500
00180 #define XML_OP_2_END_BRACKET_TAG 0x00000600
00181 #define XML_OP_2_END_UNKNOWN_TAG 0x00000700
00182 #define XML_OP_2_START_CDATA_TAG 0x00000800
00183 #define XML_OP_2_START_COMMENT 0x00000900
00184
00185 #define XML_noop (XML_OP_2_NOOP|XML_NO_DATA)
00186
00187 #define XML_CDATA_CH (XML_DATA_CDATA)
00188 #define XML_end_cdata_TAG_CH (XML_OP_1_END_CDATA|XML_DATA_TAG)
00189 #define XML_start_tag_TAG_CH (XML_OP_1_START_TAG|XML_DATA_TAG)
00190 #define XML_add_attr_TAG_CH (XML_OP_1_ADD_ATTR|XML_DATA_TAG)
00191 #define XML_TAG_CH (XML_DATA_TAG)
00192 #define XML_start_tag_ATTR_CH (XML_OP_1_START_TAG|XML_DATA_ATTR)
00193 #define XML_add_attr_ATTR_CH (XML_OP_1_ADD_ATTR|XML_DATA_ATTR)
00194 #define XML_ATTR_CH (XML_DATA_ATTR)
00195 #define XML_start_tag_VALUE_CH (XML_OP_1_START_TAG|XML_DATA_VALUE)
00196 #define XML_add_attr_VALUE_CH (XML_OP_1_ADD_ATTR|XML_DATA_VALUE)
00197 #define XML_VALUE_CH (XML_DATA_VALUE)
00198 #define XML_start_tag_end_tag(x) (XML_OP_1_START_TAG|((x) << 8))
00199 #define XML_add_attr_end_tag(x) (XML_OP_1_ADD_ATTR|((x) << 8))
00200 #define XML_end_tag(x) (x)
00201 #define XML_start_tag_end_empty_tag XML_start_tag_end_tag(XML_OP_1_END_EMPTY_TAG)
00202 #define XML_add_attr_end_empty_tag XML_add_attr_end_tag(XML_OP_1_END_EMPTY_TAG)
00203 #define XML_end_empty_tag XML_end_tag(XML_OP_1_END_EMPTY_TAG)
00204 #define XML_start_tag_end_pi_tag XML_start_tag_end_tag(XML_OP_1_END_PI_TAG)
00205 #define XML_add_attr_end_pi_tag XML_add_attr_end_tag(XML_OP_1_END_PI_TAG)
00206 #define XML_end_pi_tag XML_end_tag(XML_OP_1_END_PI_TAG)
00207
00208 #define XML_end_cdata_start_cdata_tag (XML_OP_1_END_CDATA|XML_OP_2_START_CDATA_TAG)
00209 #define XML_start_tag_start_cdata_tag (XML_OP_1_START_TAG|XML_OP_2_START_CDATA_TAG)
00210 #define XML_add_attr_start_cdata_tag (XML_OP_1_ADD_ATTR|XML_OP_2_START_CDATA_TAG)
00211 #define XML_start_cdata_tag (XML_OP_1_START_CDATA_TAG)
00212 #define XML_CDATA_TAG_CH (XML_DATA_CDATA_TAG)
00213 #define XML_end_cdata_tag (XML_OP_1_END_CDATA_TAG)
00214
00215 #define XML_end_cdata_start_comment (XML_OP_1_END_CDATA|XML_OP_2_START_COMMENT)
00216 #define XML_start_tag_start_comment (XML_OP_1_START_TAG|XML_OP_2_START_COMMENT)
00217 #define XML_add_attr_start_comment (XML_OP_1_ADD_ATTR|XML_OP_2_START_COMMENT)
00218 #define XML_start_comment (XML_OP_1_START_COMMENT)
00219 #define XML_COMMENT_CH (XML_COMMENT)
00220 #define XML_end_comment (XML_OP_1_END_COMMENT)
00221
00222
00223
00224
00225 #define CHARSET_STANDARD 0
00226 #define CHARSET_UTF_8 1
00227 #define CHARSET_TO_CONVERT_8_BIT 2
00228
00229 class CSXMLProcessor : public CSXMLParser {
00230 public:
00231 CSXMLProcessor() :
00232 err_no(0),
00233 ip(false),
00234 tlength(0),
00235 nlength(0),
00236 vlength(0),
00237 utf8_count(0),
00238 utf8_length(0),
00239 elength(0) {
00240 err_message[0] = 0;
00241 charset[0] = 0;
00242 pr_tag[0] = 0;
00243 pr_name[0] = 0;
00244 pr_value[0] = 0;
00245 utf8_buffer[0] = 0;
00246 entity[0] = 0;
00247 }
00248 virtual ~CSXMLProcessor() { }
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266 int32_t processChar(wchar_t ch);
00267
00268 bool getError(int32_t *err, char **msg);
00269 void setError(int32_t err, char *msg);
00270 void printError(char *prefix);
00271
00272 private:
00273 int32_t err_no;
00274 char err_message[CS_XML_ERR_MSG_SIZE];
00275
00276 private:
00277
00278
00279
00280
00281 virtual bool buildConversionTable();
00282
00283 int32_t charset_type;
00284 char charset[CS_MAX_XML_NAME_SIZE];
00285 wchar_t conversion_table[128];
00286
00287 bool ip;
00288 size_t tlength;
00289 char pr_tag[CS_MAX_XML_NAME_SIZE];
00290 size_t nlength;
00291 char pr_name[CS_MAX_XML_NAME_SIZE];
00292 size_t vlength;
00293 char pr_value[CS_MAX_XML_NAME_SIZE];
00294
00295 int32_t utf8_count;
00296 int32_t utf8_length;
00297 uint32_t utf8_buffer[6];
00298
00299 int32_t elength;
00300 char entity[CS_MAX_XML_NAME_SIZE];
00301
00302 int32_t capture_initializer(wchar_t ch);
00303 int32_t entity_translator(wchar_t ch);
00304 int32_t charset_transformer(wchar_t ch);
00305 void appendWCharToString(char *dstr, size_t *dlen, size_t dsize, wchar_t *schars, size_t slen);
00306 };
00307
00308
00309
00310
00311 #define XML_KEEP_EMPTY_CDATA 1
00312
00313 class CSXMLString {
00314 public:
00315 CSXMLString() : stringPtr(NULL), stringLen(0), stringSize(0) {}
00316 virtual ~CSXMLString() { }
00317
00318 public:
00319 bool addChar(char ch, CSXMLProcessor *xml);
00320 bool addChars(size_t size, wchar_t *buffer, bool to_lower, CSXMLProcessor *xml);
00321 bool addString(const char *string, CSXMLProcessor *xml);
00322 void setEmpty();
00323 void setNull();
00324 char *lastComponent();
00325 char *findTrailingComponent(const char *comp);
00326 void truncate(char *ptr);
00327
00328 char *stringPtr;
00329 size_t stringLen;
00330 size_t stringSize;
00331 };
00332
00333 class CSXML : public CSXMLProcessor {
00334 public:
00335 bool parseXML(int32_t flags);
00336
00337 private:
00338
00339
00340
00341 virtual bool getChar(wchar_t *ch) = 0;
00342
00343
00344
00345
00346
00347 virtual bool openNode(char *path, char *value) = 0;
00348 virtual bool closeNode(char *path) = 0;
00349 virtual bool addAttribute(char *path, char *name, char *value) = 0;
00350
00351 private:
00352 uint32_t flags;
00353
00354 CSXMLString xml_path;
00355 CSXMLString xml_name;
00356 CSXMLString xml_value;
00357
00358 int32_t nodeType(char *name);
00359 bool internalCloseNode(const char *name, bool single);
00360 bool internalOpenNode(const char *name);
00361 };
00362
00363 class CSXMLPrint : public CSXML {
00364 private:
00365 virtual bool openNode(char *path, char *value);
00366 virtual bool closeNode(char *path);
00367 virtual bool addAttribute(char *path, char *name, char *value);
00368 };
00369
00370 class CSXMLBuffer : public CSXMLPrint {
00371 public:
00372 bool parseString(const char *data, int32_t flags);
00373 bool parseData(const char *data, size_t len, int32_t flags);
00374
00375 private:
00376 virtual bool getChar(wchar_t *ch);
00377
00378 private:
00379 const char *charData;
00380 size_t dataLen;
00381 size_t dataPos;
00382 };
00383
00384 class CSXMLFile : public CSXMLPrint {
00385 public:
00386 bool parseFile(char *file_name, int32_t flags);
00387
00388 private:
00389 virtual bool getChar(wchar_t *ch);
00390
00391 private:
00392 char *fileName;
00393 FILE *file;
00394 };
00395
00396 #endif