Drizzled Public API Documentation

CSXML.cc
00001 /* Copyright (C) 2010 PrimeBase Technologies GmbH, Germany
00002  *
00003  * PrimeBase Media Stream for MySQL
00004  *
00005  * This program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 2 of the License, or
00008  * (at your option) any later version.
00009  *
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program; if not, write to the Free Software
00017  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
00018  *
00019  * Paul McCullagh (H&G2JCtL)
00020  *
00021  * 2010-01-12
00022  *
00023  * CORE SYSTEM:
00024  * XML Parsing
00025  *
00026  */
00027 
00028 #include "CSConfig.h"
00029 #include <inttypes.h>
00030 
00031 
00032 #include <string.h>
00033 #include <stdlib.h>
00034 #include <ctype.h>
00035 #include <stdio.h>
00036 #include <errno.h>
00037 
00038 #ifdef DRIZZLED
00039 #include <boost/algorithm/string.hpp>
00040 #define STRCASESTR(s1, s2) boost::ifind_first(s1, s2)
00041 #else
00042 #define STRCASESTR(s1, s2) strcasestr(s1, s2)
00043 #endif
00044 
00045 #include "CSXML.h"
00046 
00047 #define ISSPACE(ch)     (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
00048 #define ISSINGLE(ch)    (ch == '*' || ch == '+' || ch == '(' || ch == ')' || ch == ',' || ch == '|' || ch == '[' || ch == ']' || ch == '?' || ch == '/')
00049 
00050 #define SET_CHAR(x, ch)   { x->buffer[0] = ch; x->count = 1; }
00051 #define ADD_CHAR(x, ch)   { if (x->count < PARSE_BUFFER_SIZE) { x->buffer[x->count] = ch; x->count++; } else x->buffer[PARSE_BUFFER_SIZE-1] = ch; }
00052 
00053 bool CSXMLParser::match_string(const char *ch)
00054 {
00055   int32_t i;
00056   
00057   for (i=0; i<this->count; i++) {
00058     if (this->buffer[i] != *ch)
00059       return false;
00060     ch++;
00061   }
00062   if (*ch)
00063     return false;
00064   return(i == this->count);
00065 }
00066 
00067 void CSXMLParser::increment_nesting(wchar_t ch)
00068 {
00069   if (this->nesting < PARSE_STACK_SIZE) {
00070     switch (ch) {
00071       case '/':
00072         this->end_type[this->nesting] = XML_OP_1_END_CLOSE_TAG;
00073         break;
00074       case '?':
00075         this->end_type[this->nesting] = XML_OP_1_END_PI_TAG;
00076         break;
00077       case '!':
00078         this->end_type[this->nesting] = XML_OP_1_END_ENTITY_TAG;
00079         break;
00080       case '[':
00081         this->end_type[this->nesting] = XML_OP_1_END_BRACKET_TAG;
00082         break;
00083       default:
00084         if (ISSPACE(ch))
00085           this->end_type[this->nesting] = XML_OP_1_END_UNKNOWN_TAG;
00086         else
00087           this->end_type[this->nesting] = XML_OP_1_END_TAG;
00088         break;
00089     }
00090   }
00091   this->nesting++;
00092 }
00093 
00094 int32_t CSXMLParser::parseChar(wchar_t ch)
00095 /* This function does the actual work of parsing. It is expects 
00096  * "complete" characters as input. This could be 4 byte characters
00097  * as long as it is able to recognize the characters that are
00098  * relevant to parsing.
00099  * The function outputs processing instructions, and indicates
00100  * how the output data is to be understood.
00101  */
00102 {
00103   switch (this->state) {
00104     case XML_BEFORE_CDATA:
00105       this->nesting = 0;
00106       /* This is the initial state! */
00107       if (ch == '<') {
00108         this->state = XML_LT;
00109         this->type = XML_noop;
00110       }
00111       else {
00112         this->state = XML_IN_CDATA;
00113         this->type = XML_CDATA_CH;
00114       }
00115       SET_CHAR(this, ch);
00116       break;
00117     case XML_IN_CDATA:
00118       if (ch == '<') {
00119         this->state = XML_LT;
00120         this->type = XML_noop;
00121       }
00122       else
00123         this->type = XML_CDATA_CH;
00124       SET_CHAR(this, ch);
00125       break;
00126     case XML_LT:
00127       if (ISSPACE(ch)) {
00128         if (this->nesting) {
00129           this->state = XML_BEFORE_ATTR;
00130           if (this->step == XML_STEP_TAG)
00131             this->type = XML_start_tag_TAG_CH;
00132           else if (this->step == XML_STEP_NESTED)
00133             this->type = XML_TAG_CH;
00134           else if (this->step == XML_STEP_NONE)
00135             this->type = XML_end_cdata_TAG_CH;
00136           else
00137             this->type = XML_add_attr_TAG_CH;
00138           this->step = XML_STEP_TAG;
00139           increment_nesting(ch);
00140           this->count = 0;
00141         }
00142         else {
00143           this->state = XML_IN_CDATA;
00144           this->type = XML_CDATA_CH;
00145           ADD_CHAR(this, ch);
00146         }
00147       }
00148       else if (ch == '!') {
00149         this->state = XML_LT_BANG;
00150         this->type = XML_noop;
00151         ADD_CHAR(this, ch);
00152       }
00153       else {
00154         this->state = XML_IN_TAG_NAME;
00155         if (this->step == XML_STEP_TAG)
00156           this->type = XML_start_tag_TAG_CH;
00157         else if (this->step == XML_STEP_NESTED)
00158           this->type = XML_TAG_CH;
00159         else if (this->step == XML_STEP_NONE)
00160           this->type = XML_end_cdata_TAG_CH;
00161         else
00162           this->type = XML_add_attr_TAG_CH;
00163         this->step = XML_STEP_TAG;
00164         increment_nesting(ch);
00165         SET_CHAR(this, ch);
00166       }
00167       break;
00168     case XML_LT_BANG:
00169       if (ch == '-') {
00170         this->state = XML_LT_BANG_DASH;
00171         this->type = XML_noop;
00172       }
00173       else if (ch == '[') {
00174         this->state = XML_LT_BANG_SQR;
00175         this->type = XML_noop;
00176       }
00177       else {
00178         this->state = XML_IN_TAG_NAME;
00179         if (this->step == XML_STEP_TAG)
00180           this->type = XML_start_tag_TAG_CH;
00181         else if (this->step == XML_STEP_NESTED)
00182           this->type = XML_TAG_CH;
00183         else if (this->step == XML_STEP_NONE)
00184           this->type = XML_end_cdata_TAG_CH;
00185         else
00186           this->type = XML_add_attr_TAG_CH;
00187         this->step = XML_STEP_TAG;
00188         increment_nesting('!');
00189         SET_CHAR(this, '!');
00190       }
00191       ADD_CHAR(this, ch);
00192       break;
00193     case XML_LT_BANG_DASH:
00194       if (ch == '-') {
00195         this->state = XML_IN_COMMENT;
00196         if (this->step == XML_STEP_TAG)
00197           this->type = XML_start_tag_start_comment;
00198         else if (this->step == XML_STEP_NESTED)
00199           this->type = XML_start_comment;
00200         else if (this->step == XML_STEP_NONE)
00201           this->type = XML_end_cdata_start_comment;
00202         else
00203           this->type = XML_add_attr_start_comment;
00204         increment_nesting(' ');
00205       }
00206       else {
00207         this->state = XML_IN_CDATA;
00208         this->type = XML_CDATA_CH;
00209         ADD_CHAR(this, ch);
00210       }
00211       break;
00212     case XML_LT_BANG_SQR:
00213       if (ISSPACE(ch))
00214         this->type = XML_noop;
00215       else if (ch == '[') {
00216         this->state = XML_BEFORE_ATTR;
00217         if (this->step == XML_STEP_TAG)
00218           this->type = XML_start_tag_TAG_CH;
00219         else if (this->step == XML_STEP_NESTED)
00220           this->type = XML_TAG_CH;
00221         else if (this->step == XML_STEP_NONE)
00222           this->type = XML_end_cdata_TAG_CH;
00223         else
00224           this->type = XML_add_attr_TAG_CH;
00225         this->step = XML_STEP_TAG;
00226         increment_nesting('[');
00227         SET_CHAR(this, '!');
00228         ADD_CHAR(this, '[');
00229       }
00230       else {
00231         this->state = XML_LT_BANG_SQR_IN_NAME;
00232         this->type = XML_noop;
00233         SET_CHAR(this, '!');
00234         ADD_CHAR(this, '[');
00235         ADD_CHAR(this, ch);
00236       }
00237       break;
00238     case XML_LT_BANG_SQR_IN_NAME:
00239       if (ISSPACE(ch)) {
00240         this->state = XML_LT_BANG_SQR_AFTER_NAME;
00241         this->type = XML_noop;
00242       }
00243       else if (ch == '[') {
00244         if (match_string("![CDATA")) {
00245           this->state = XML_IN_CDATA_TAG;
00246           if (this->step == XML_STEP_TAG)
00247             this->type = XML_start_tag_start_cdata_tag;
00248           else if (this->step == XML_STEP_NESTED)
00249             this->type = XML_start_cdata_tag;
00250           else if (this->step == XML_STEP_NONE)
00251             this->type = XML_end_cdata_start_cdata_tag;
00252           else
00253             this->type = XML_add_attr_start_cdata_tag;
00254           this->step = XML_STEP_TAG;
00255           increment_nesting('[');
00256         }
00257         else {
00258           this->state = XML_BEFORE_ATTR;
00259           if (this->step == XML_STEP_TAG)
00260             this->type = XML_start_tag_TAG_CH;
00261           else if (this->step == XML_STEP_NESTED)
00262             this->type = XML_TAG_CH;
00263           else if (this->step == XML_STEP_NONE)
00264             this->type = XML_end_cdata_TAG_CH;
00265           else
00266             this->type = XML_add_attr_TAG_CH;
00267           this->step = XML_STEP_TAG;
00268           increment_nesting('[');
00269         }
00270       }
00271       else {
00272         this->type = XML_noop;
00273         ADD_CHAR(this, ch);
00274       }
00275       break;
00276     case XML_LT_BANG_SQR_AFTER_NAME:
00277       if (ch == '[') {
00278         if (match_string("![CDATA")) {
00279           this->state = XML_IN_CDATA_TAG;
00280           if (this->step == XML_STEP_TAG)
00281             this->type = XML_start_tag_start_cdata_tag;
00282           else if (this->step == XML_STEP_NESTED)
00283             this->type = XML_start_cdata_tag;
00284           else if (this->step == XML_STEP_NONE)
00285             this->type = XML_end_cdata_start_cdata_tag;
00286           else
00287             this->type = XML_add_attr_start_cdata_tag;
00288           increment_nesting('[');
00289         }
00290         else {
00291           this->state = XML_BEFORE_ATTR;
00292           if (this->step == XML_STEP_TAG)
00293             this->type = XML_start_tag_TAG_CH;
00294           else if (this->step == XML_STEP_NESTED)
00295             this->type = XML_TAG_CH;
00296           else if (this->step == XML_STEP_NONE)
00297             this->type = XML_end_cdata_TAG_CH;
00298           else
00299             this->type = XML_add_attr_TAG_CH;
00300           this->step = XML_STEP_TAG;
00301           increment_nesting('[');
00302         }
00303       }
00304       else
00305         /* Ignore data until the '['!!! */
00306         this->type = XML_noop;
00307       break;
00308     case XML_IN_TAG_NAME:
00309       if (ISSPACE(ch)) {
00310         this->state = XML_BEFORE_ATTR;
00311         this->type = XML_noop;
00312       }
00313       else if (ch == '<') {
00314         this->state = XML_LT;
00315         this->type = XML_noop;
00316       }
00317       else if (ch == '>') {
00318         if (this->step == XML_STEP_TAG)
00319           this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
00320         else if (this->step == XML_STEP_NESTED)
00321           this->type = XML_end_tag(END_TAG_TYPE(this));
00322         else
00323           this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
00324         this->nesting--;
00325         if (this->nesting) {
00326           this->step = XML_STEP_NESTED;
00327           this->state = XML_BEFORE_ATTR;
00328         }
00329         else {
00330           this->step = XML_STEP_NONE;
00331           this->state = XML_IN_CDATA;
00332         }
00333       }
00334       else if (ch == '"' || ch == '\'') {
00335         this->state = XML_QUOTE_BEFORE_VALUE;
00336         this->quote = ch;
00337         this->type = XML_noop;
00338       }
00339       else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
00340         this->state = XML_SLASH;
00341         this->type = XML_noop;
00342       }
00343       else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
00344         this->state = XML_QMARK;
00345         this->type = XML_noop;
00346       }
00347       else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
00348         this->state = XML_SQR;
00349         this->type = XML_noop;
00350       }
00351       else if (ISSINGLE(ch)) {
00352         this->state = XML_BEFORE_ATTR;
00353         if (this->step == XML_STEP_TAG)
00354           this->type = XML_start_tag_ATTR_CH;
00355         else if (this->step == XML_STEP_NESTED)
00356           this->type = XML_ATTR_CH;
00357         else
00358           this->type = XML_add_attr_ATTR_CH;
00359         this->step = XML_STEP_ATTR;
00360         SET_CHAR(this, ch);
00361       }
00362       else {
00363         this->type = XML_TAG_CH;
00364         SET_CHAR(this, ch);
00365       }
00366       break;
00367     case XML_BEFORE_ATTR:
00368       if (ISSPACE(ch))
00369         this->type = XML_noop;
00370       else if (ch == '<') {
00371         this->state = XML_LT;
00372         this->type = XML_noop;
00373       }
00374       else if (ch == '>') {
00375         if (this->step == XML_STEP_TAG)
00376           this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
00377         else if (this->step == XML_STEP_NESTED)
00378           this->type = XML_end_tag(END_TAG_TYPE(this));
00379         else
00380           this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
00381         this->nesting--;
00382         if (this->nesting) {
00383           this->step = XML_STEP_NESTED;
00384           this->state = XML_BEFORE_ATTR;
00385         }
00386         else {
00387           this->step = XML_STEP_NONE;
00388           this->state = XML_IN_CDATA;
00389         }
00390       }
00391       else if (ch == '"' || ch == '\'') {
00392         this->state = XML_QUOTE_BEFORE_VALUE;
00393         this->quote = ch;
00394         this->type = XML_noop;
00395       }
00396       else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
00397         this->state = XML_SLASH;
00398         this->type = XML_noop;
00399       }
00400       else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
00401         this->state = XML_QMARK;
00402         this->type = XML_noop;
00403       }
00404       else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
00405         this->state = XML_SQR;
00406         this->type = XML_noop;
00407       }
00408       else if (ISSINGLE(ch)) {
00409         if (this->step == XML_STEP_TAG)
00410           this->type = XML_start_tag_ATTR_CH;
00411         else if (this->step == XML_STEP_NESTED)
00412           this->type = XML_ATTR_CH;
00413         else
00414           this->type = XML_add_attr_ATTR_CH;
00415         this->step = XML_STEP_ATTR;
00416         SET_CHAR(this, ch);
00417       }
00418       else {
00419         this->state = XML_IN_ATTR;
00420         if (this->step == XML_STEP_TAG)
00421           this->type = XML_start_tag_ATTR_CH;
00422         else if (this->step == XML_STEP_NESTED)
00423           this->type = XML_ATTR_CH;
00424         else
00425           this->type = XML_add_attr_ATTR_CH;
00426         this->step = XML_STEP_ATTR;
00427         SET_CHAR(this, ch);
00428       }
00429       break;
00430     case XML_IN_ATTR:
00431       if (ISSPACE(ch)) {
00432         this->state = XML_BEFORE_EQUAL;
00433         this->type = XML_noop;
00434       }
00435       else if (ch == '<') {
00436         this->state = XML_LT;
00437         this->type = XML_noop;
00438       }
00439       else if (ch == '>') {
00440         if (this->step == XML_STEP_TAG)
00441           this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
00442         else if (this->step == XML_STEP_NESTED)
00443           this->type = XML_end_tag(END_TAG_TYPE(this));
00444         else
00445           this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
00446         this->nesting--;
00447         if (this->nesting) {
00448           this->step = XML_STEP_NESTED;
00449           this->state = XML_BEFORE_ATTR;
00450         }
00451         else {
00452           this->step = XML_STEP_NONE;
00453           this->state = XML_IN_CDATA;
00454         }
00455       }
00456       else if (ch == '"' || ch == '\'') {
00457         this->state = XML_QUOTE_BEFORE_VALUE;
00458         this->quote = ch;
00459         this->type = XML_noop;
00460       }
00461       else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
00462         this->state = XML_SLASH;
00463         this->type = XML_noop;
00464       }
00465       else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
00466         this->state = XML_QMARK;
00467         this->type = XML_noop;
00468       }
00469       else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
00470         this->state = XML_SQR;
00471         this->type = XML_noop;
00472       }
00473       else if (ISSINGLE(ch)) {
00474         this->state = XML_BEFORE_ATTR;
00475         if (this->step == XML_STEP_TAG)
00476           this->type = XML_start_tag_ATTR_CH;
00477         else if (this->step == XML_STEP_NESTED)
00478           this->type = XML_ATTR_CH;
00479         else
00480           this->type = XML_add_attr_ATTR_CH;
00481         this->step = XML_STEP_ATTR;
00482         SET_CHAR(this, ch);
00483       }
00484       else if (ch == '=') {
00485         this->state = XML_AFTER_EQUAL;
00486         this->type = XML_noop;
00487       }
00488       else {
00489         this->type = XML_ATTR_CH;
00490         SET_CHAR(this, ch);
00491       }
00492       break;
00493     case XML_BEFORE_EQUAL:
00494       if (ISSPACE(ch))
00495         this->type = XML_noop;
00496       else if (ch == '<') {
00497         this->state = XML_LT;
00498         this->type = XML_noop;
00499       }
00500       else if (ch == '>') {
00501         if (this->step == XML_STEP_TAG)
00502           this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
00503         else if (this->step == XML_STEP_NESTED)
00504           this->type = XML_end_tag(END_TAG_TYPE(this));
00505         else
00506           this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
00507         this->nesting--;
00508         if (this->nesting) {
00509           this->step = XML_STEP_NESTED;
00510           this->state = XML_BEFORE_ATTR;
00511         }
00512         else {
00513           this->step = XML_STEP_NONE;
00514           this->state = XML_IN_CDATA;
00515         }
00516       }
00517       else if (ch == '"' || ch == '\'') {
00518         this->state = XML_QUOTE_BEFORE_VALUE;
00519         this->quote = ch;
00520         this->type = XML_noop;
00521       }
00522       else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
00523         this->state = XML_SLASH;
00524         this->type = XML_noop;
00525       }
00526       else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
00527         this->state = XML_QMARK;
00528         this->type = XML_noop;
00529       }
00530       else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
00531         this->state = XML_SQR;
00532         this->type = XML_noop;
00533       }
00534       else if (ISSINGLE(ch)) {
00535         this->state = XML_BEFORE_ATTR;
00536         if (this->step == XML_STEP_TAG)
00537           this->type = XML_start_tag_ATTR_CH;
00538         else if (this->step == XML_STEP_NESTED)
00539           this->type = XML_ATTR_CH;
00540         else
00541           this->type = XML_add_attr_ATTR_CH;
00542         this->step = XML_STEP_ATTR;
00543         SET_CHAR(this, ch);
00544       }
00545       else if (ch == '=') {
00546         this->state = XML_AFTER_EQUAL;
00547         this->type = XML_noop;
00548       }
00549       else {
00550         this->state = XML_IN_ATTR;
00551         if (this->step == XML_STEP_TAG)
00552           this->type = XML_start_tag_ATTR_CH;
00553         else if (this->step == XML_STEP_NESTED)
00554           this->type = XML_ATTR_CH;
00555         else
00556           this->type = XML_add_attr_ATTR_CH;
00557         this->step = XML_STEP_ATTR;
00558         SET_CHAR(this, ch);
00559       }
00560       break;
00561     case XML_AFTER_EQUAL:
00562       if (ISSPACE(ch)) {
00563         this->state = XML_AFTER_EQUAL;
00564         this->type = XML_noop;
00565       }
00566       else if (ch == '<') {
00567         this->state = XML_LT;
00568         this->type = XML_noop;
00569       }
00570       else if (ch == '>') {
00571         if (this->step == XML_STEP_TAG)
00572           this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
00573         else if (this->step == XML_STEP_NESTED)
00574           this->type = XML_end_tag(END_TAG_TYPE(this));
00575         else
00576           this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
00577         this->nesting--;
00578         if (this->nesting) {
00579           this->step = XML_STEP_NESTED;
00580           this->state = XML_BEFORE_ATTR;
00581         }
00582         else {
00583           this->step = XML_STEP_NONE;
00584           this->state = XML_IN_CDATA;
00585         }
00586       }
00587       else if (ch == '"' || ch == '\'') {
00588         this->state = XML_QUOTE_BEFORE_VALUE;
00589         this->quote = ch;
00590         this->type = XML_noop;
00591       }
00592       else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
00593         this->state = XML_SLASH;
00594         this->type = XML_noop;
00595       }
00596       else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
00597         this->state = XML_QMARK;
00598         this->type = XML_noop;
00599       }
00600       else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
00601         this->state = XML_SQR;
00602         this->type = XML_noop;
00603       }
00604       else if (ISSINGLE(ch)) {
00605         this->state = XML_BEFORE_ATTR;
00606         if (this->step == XML_STEP_TAG)
00607           this->type = XML_start_tag_ATTR_CH;
00608         else if (this->step == XML_STEP_NESTED)
00609           this->type = XML_ATTR_CH;
00610         else
00611           this->type = XML_add_attr_ATTR_CH;
00612         this->step = XML_STEP_ATTR;
00613         SET_CHAR(this, ch);
00614       }
00615       else {
00616         this->state = XML_IN_VALUE;
00617         this->quote = 0;
00618         if (this->step == XML_STEP_TAG)
00619           this->type = XML_start_tag_VALUE_CH;
00620         else if (this->step == XML_STEP_VALUE)
00621           this->type = XML_add_attr_VALUE_CH;
00622         else
00623           this->type = XML_VALUE_CH;
00624         this->step = XML_STEP_VALUE;
00625         SET_CHAR(this, ch);
00626       }
00627       break;
00628     case XML_QUOTE_BEFORE_VALUE:
00629       if (ch == this->quote) {
00630         this->state = XML_QUOTE_AFTER_VALUE;
00631         // Empty string:
00632         if (this->step == XML_STEP_TAG)
00633           this->type = XML_start_tag_VALUE_CH;
00634         else if (this->step == XML_STEP_VALUE)
00635           this->type = XML_add_attr_VALUE_CH;
00636         else
00637           this->type = XML_VALUE_CH;
00638         this->step = XML_STEP_VALUE;
00639         this->count = 0;
00640       }
00641       else {
00642         this->state = XML_IN_VALUE;
00643         if (this->step == XML_STEP_TAG)
00644           this->type = XML_start_tag_VALUE_CH;
00645         else if (this->step == XML_STEP_VALUE)
00646           this->type = XML_add_attr_VALUE_CH;
00647         else
00648           this->type = XML_VALUE_CH;
00649         this->step = XML_STEP_VALUE;
00650         SET_CHAR(this, ch);
00651       }
00652       break;
00653     case XML_IN_VALUE:
00654       if (this->quote) {
00655         if (ch == this->quote) {
00656           this->state = XML_QUOTE_AFTER_VALUE;
00657           this->type = XML_noop;
00658         }
00659         else {
00660           this->type = XML_VALUE_CH;
00661           SET_CHAR(this, ch);
00662         }
00663       }
00664       else {
00665         /* A value without quotes (for HTML!) */
00666         if (ISSPACE(ch)) {
00667           this->state = XML_BEFORE_ATTR;
00668           this->type = XML_noop;
00669         }
00670         else if (ch == '<') {
00671           this->state = XML_LT;
00672           this->type = XML_noop;
00673         }
00674         else if (ch == '>') {
00675           if (this->step == XML_STEP_TAG)
00676             this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
00677           else if (this->step == XML_STEP_NESTED)
00678             this->type = XML_end_tag(END_TAG_TYPE(this));
00679           else
00680             this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
00681           this->nesting--;
00682           if (this->nesting) {
00683             this->step = XML_STEP_NESTED;
00684             this->state = XML_BEFORE_ATTR;
00685           }
00686           else {
00687             this->step = XML_STEP_NONE;
00688             this->state = XML_IN_CDATA;
00689           }
00690         }
00691         else if (ch == '"' || ch == '\'') {
00692           this->state = XML_QUOTE_BEFORE_VALUE;
00693           this->quote = ch;
00694           this->type = XML_noop;
00695         }
00696         else {
00697           this->type = XML_VALUE_CH;
00698           SET_CHAR(this, ch);
00699         }
00700       }
00701       break;
00702     case XML_QUOTE_AFTER_VALUE:
00703       if (ISSPACE(ch)) {
00704         this->state = XML_BEFORE_ATTR;
00705         this->type = XML_noop;
00706       }
00707       else if (ch == '<') {
00708         this->state = XML_LT;
00709         this->type = XML_noop;
00710       }
00711       else if (ch == '>') {
00712         if (this->step == XML_STEP_TAG)
00713           this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
00714         else if (this->step == XML_STEP_NESTED)
00715           this->type = XML_end_tag(END_TAG_TYPE(this));
00716         else
00717           this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
00718         this->nesting--;
00719         if (this->nesting) {
00720           this->step = XML_STEP_NESTED;
00721           this->state = XML_BEFORE_ATTR;
00722         }
00723         else {
00724           this->step = XML_STEP_NONE;
00725           this->state = XML_IN_CDATA;
00726         }
00727       }
00728       else if (ch == '"' || ch == '\'') {
00729         this->state = XML_QUOTE_BEFORE_VALUE;
00730         this->quote = ch;
00731         this->type = XML_noop;
00732       }
00733       else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
00734         this->state = XML_SLASH;
00735         this->type = XML_noop;
00736       }
00737       else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
00738         this->state = XML_QMARK;
00739         this->type = XML_noop;
00740       }
00741       else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
00742         this->state = XML_SQR;
00743         this->type = XML_noop;
00744       }
00745       else if (ISSINGLE(ch)) {
00746         this->state = XML_BEFORE_ATTR;
00747         if (this->step == XML_STEP_TAG)
00748           this->type = XML_start_tag_ATTR_CH;
00749         else if (this->step == XML_STEP_NESTED)
00750           this->type = XML_ATTR_CH;
00751         else
00752           this->type = XML_add_attr_ATTR_CH;
00753         this->step = XML_STEP_ATTR;
00754         SET_CHAR(this, ch);
00755       }
00756       else {
00757         this->state = XML_IN_ATTR;
00758         if (this->step == XML_STEP_TAG)
00759           this->type = XML_start_tag_ATTR_CH;
00760         else if (this->step == XML_STEP_NESTED)
00761           this->type = XML_ATTR_CH;
00762         else
00763           this->type = XML_add_attr_ATTR_CH;
00764         this->step = XML_STEP_ATTR;
00765         SET_CHAR(this, ch);
00766       }
00767       break;
00768     case XML_SQR:
00769       SET_CHAR(this, ']');
00770       goto cont;
00771     case XML_SLASH:
00772       SET_CHAR(this, '/');
00773       goto cont;
00774     case XML_QMARK:
00775       SET_CHAR(this, '?');
00776       cont:
00777       if (ISSPACE(ch)) {
00778         this->state = XML_BEFORE_ATTR;
00779         if (this->step == XML_STEP_TAG)
00780           this->type = XML_start_tag_TAG_CH;
00781         else if (this->step == XML_STEP_NESTED)
00782           this->type = XML_TAG_CH;
00783         else if (this->step == XML_STEP_NONE)
00784           this->type = XML_end_cdata_TAG_CH;
00785         else
00786           this->type = XML_add_attr_TAG_CH;
00787         this->step = XML_STEP_ATTR;
00788       }
00789       else if (ch == '<') {
00790         this->state = XML_LT;
00791         if (this->step == XML_STEP_TAG)
00792           this->type = XML_start_tag_TAG_CH;
00793         else if (this->step == XML_STEP_NESTED)
00794           this->type = XML_TAG_CH;
00795         else if (this->step == XML_STEP_NONE)
00796           this->type = XML_end_cdata_TAG_CH;
00797         else
00798           this->type = XML_add_attr_TAG_CH;
00799         this->step = XML_STEP_TAG;
00800       }
00801       else if (ch == '>') {
00802         if (this->state == XML_SLASH) {
00803           if (this->step == XML_STEP_TAG)
00804             this->type = XML_start_tag_end_empty_tag;
00805           else if (this->step == XML_STEP_NESTED)
00806             this->type = XML_end_empty_tag;
00807           else
00808             this->type = XML_add_attr_end_empty_tag;
00809         }
00810         else if (this->state == XML_SQR) {
00811           if (this->step == XML_STEP_TAG)
00812             this->type = XML_start_tag_end_tag(XML_OP_1_END_BRACKET_TAG);
00813           else if (this->step == XML_STEP_NESTED)
00814             this->type = XML_end_tag(XML_OP_1_END_BRACKET_TAG);
00815           else
00816             this->type = XML_add_attr_end_tag(XML_OP_1_END_BRACKET_TAG);
00817         }
00818         else {
00819           if (this->step == XML_STEP_TAG)
00820             this->type = XML_start_tag_end_pi_tag;
00821           else if (this->step == XML_STEP_NESTED)
00822             this->type = XML_end_pi_tag;
00823           else
00824             this->type = XML_add_attr_end_pi_tag;
00825         }
00826         this->nesting--;
00827         if (this->nesting) {
00828           this->step = XML_STEP_NESTED;
00829           this->state = XML_BEFORE_ATTR;
00830         }
00831         else {
00832           this->step = XML_STEP_NONE;
00833           this->state = XML_IN_CDATA;
00834         }
00835       }
00836       else if (ch == '"' || ch == '\'') {
00837         this->state = XML_QUOTE_BEFORE_VALUE;
00838         this->quote = ch;
00839         if (this->step == XML_STEP_TAG)
00840           this->type = XML_start_tag_TAG_CH;
00841         else if (this->step == XML_STEP_NESTED)
00842           this->type = XML_TAG_CH;
00843         else if (this->step == XML_STEP_NONE)
00844           this->type = XML_end_cdata_TAG_CH;
00845         else
00846           this->type = XML_add_attr_TAG_CH;
00847         this->step = XML_STEP_ATTR;
00848       }
00849       else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
00850         this->state = XML_SLASH;
00851         if (this->step == XML_STEP_TAG)
00852           this->type = XML_start_tag_TAG_CH;
00853         else if (this->step == XML_STEP_NESTED)
00854           this->type = XML_TAG_CH;
00855         else if (this->step == XML_STEP_NONE)
00856           this->type = XML_end_cdata_TAG_CH;
00857         else
00858           this->type = XML_add_attr_TAG_CH;
00859         this->step = XML_STEP_ATTR;
00860       }
00861       else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
00862         this->state = XML_QMARK;
00863         if (this->step == XML_STEP_TAG)
00864           this->type = XML_start_tag_TAG_CH;
00865         else if (this->step == XML_STEP_NESTED)
00866           this->type = XML_TAG_CH;
00867         else if (this->step == XML_STEP_NONE)
00868           this->type = XML_end_cdata_TAG_CH;
00869         else
00870           this->type = XML_add_attr_TAG_CH;
00871         this->step = XML_STEP_ATTR;
00872       }
00873       else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
00874         this->state = XML_SQR;
00875         if (this->step == XML_STEP_TAG)
00876           this->type = XML_start_tag_TAG_CH;
00877         else if (this->step == XML_STEP_NESTED)
00878           this->type = XML_TAG_CH;
00879         else if (this->step == XML_STEP_NONE)
00880           this->type = XML_end_cdata_TAG_CH;
00881         else
00882           this->type = XML_add_attr_TAG_CH;
00883         this->step = XML_STEP_ATTR;
00884       }
00885       else if (ISSINGLE(ch)) {
00886         this->state = XML_BEFORE_ATTR;
00887         if (this->step == XML_STEP_TAG)
00888           this->type = XML_start_tag_TAG_CH;
00889         else if (this->step == XML_STEP_NESTED)
00890           this->type = XML_TAG_CH;
00891         else if (this->step == XML_STEP_NONE)
00892           this->type = XML_end_cdata_TAG_CH;
00893         else
00894           this->type = XML_add_attr_TAG_CH;
00895         this->step = XML_STEP_ATTR;
00896         ADD_CHAR(this, ch);
00897       }
00898       else {
00899         this->state = XML_IN_ATTR;
00900         if (this->step == XML_STEP_TAG)
00901           this->type = XML_start_tag_TAG_CH;
00902         else if (this->step == XML_STEP_NESTED)
00903           this->type = XML_TAG_CH;
00904         else if (this->step == XML_STEP_NONE)
00905           this->type = XML_end_cdata_TAG_CH;
00906         else
00907           this->type = XML_add_attr_TAG_CH;
00908         this->step = XML_STEP_ATTR;
00909         ADD_CHAR(this, ch);
00910       }
00911       break;
00912     case XML_IN_COMMENT:
00913       if (ch == '-') {
00914         this->state = XML_IN_COMMENT_DASH;
00915         this->type = XML_noop;
00916       }
00917       else
00918         this->type = XML_COMMENT_CH;
00919       SET_CHAR(this, ch);
00920       break;
00921     case XML_IN_COMMENT_DASH:
00922       if (ch == '-') {
00923         this->state = XML_IN_COMMENT_DASH_DASH;
00924         this->type = XML_noop;
00925       }
00926       else {
00927         this->state = XML_IN_COMMENT;
00928         this->type = XML_COMMENT_CH;
00929       }
00930       ADD_CHAR(this, ch);
00931       break;
00932     case XML_IN_COMMENT_DASH_DASH:
00933       if (ch == '-') {
00934         this->state = XML_IN_COMMENT_3_DASH;
00935         this->type = XML_COMMENT_CH;
00936         SET_CHAR(this, ch);
00937       }
00938       else if (ch == '>') {
00939         this->type = XML_end_comment;
00940         this->nesting--;
00941         if (this->nesting) {
00942           this->step = XML_STEP_NESTED;
00943           this->state = XML_BEFORE_ATTR;
00944         }
00945         else {
00946           this->step = XML_STEP_NONE;
00947           this->state = XML_IN_CDATA;
00948         }
00949       }
00950       else {
00951         this->state = XML_IN_COMMENT;
00952         this->type = XML_COMMENT_CH;
00953         ADD_CHAR(this, ch);
00954       }
00955       break;
00956     case XML_IN_COMMENT_3_DASH:
00957       if (ch == '-') {
00958         this->type = XML_COMMENT_CH;
00959         SET_CHAR(this, ch);
00960       }
00961       else if (ch == '>') {
00962         this->type = XML_end_comment;
00963         this->nesting--;
00964         if (this->nesting) {
00965           this->step = XML_STEP_NESTED;
00966           this->state = XML_BEFORE_ATTR;
00967         }
00968         else {
00969           this->step = XML_STEP_NONE;
00970           this->state = XML_IN_CDATA;
00971         }
00972       }
00973       else {
00974         this->state = XML_IN_COMMENT;
00975         this->type = XML_COMMENT_CH;
00976         SET_CHAR(this, '-');
00977         ADD_CHAR(this, '-');
00978         ADD_CHAR(this, ch);
00979       }
00980       break;
00981     case XML_IN_CDATA_TAG:
00982       if (ch == ']') {
00983         this->state = XML_IN_CDATA_TAG_SQR;
00984         this->type = XML_noop;
00985       }
00986       else
00987         this->type = XML_CDATA_TAG_CH;
00988       SET_CHAR(this, ch);
00989       break;
00990     case XML_IN_CDATA_TAG_SQR:
00991       if (ch == ']') {
00992         this->state = XML_IN_CDATA_TAG_SQR_SQR;
00993         this->type = XML_noop;
00994       }
00995       else {
00996         this->state = XML_IN_CDATA_TAG;
00997         this->type = XML_CDATA_TAG_CH;
00998       }
00999       ADD_CHAR(this, ch);
01000       break;
01001     case XML_IN_CDATA_TAG_SQR_SQR:
01002       if (ch == ']') {
01003         this->state = XML_IN_CDATA_TAG_3_SQR;
01004         this->type = XML_CDATA_TAG_CH;
01005         SET_CHAR(this, ch);
01006       }
01007       else if (ch == '>') {
01008         this->type = XML_end_cdata_tag;
01009         this->nesting--;
01010         if (this->nesting) {
01011           this->step = XML_STEP_NESTED;
01012           this->state = XML_BEFORE_ATTR;
01013         }
01014         else {
01015           this->step = XML_STEP_NONE;
01016           this->state = XML_IN_CDATA;
01017         }
01018       }
01019       else {
01020         this->state = XML_IN_CDATA_TAG;
01021         this->type = XML_CDATA_TAG_CH;
01022         ADD_CHAR(this, ch);
01023       }
01024       break;
01025     case XML_IN_CDATA_TAG_3_SQR:
01026       if (ch == ']') {
01027         this->type = XML_CDATA_TAG_CH;
01028         SET_CHAR(this, ch);
01029       }
01030       else if (ch == '>') {
01031         this->type = XML_end_cdata_tag;
01032         this->nesting--;
01033         if (this->nesting) {
01034           this->step = XML_STEP_NESTED;
01035           this->state = XML_BEFORE_ATTR;
01036         }
01037         else {
01038           this->step = XML_STEP_NONE;
01039           this->state = XML_IN_CDATA;
01040         }
01041       }
01042       else {
01043         this->state = XML_IN_CDATA_TAG;
01044         this->type = XML_CDATA_TAG_CH;
01045         SET_CHAR(this, ']');
01046         ADD_CHAR(this, ']');
01047         ADD_CHAR(this, ch);
01048       }
01049       break;
01050   }
01051   return(this->type);
01052 }
01053 
01054 /* ------------------------------------------------------------------- */
01055 /* CSXMLProcessor */
01056 
01057 bool CSXMLProcessor::buildConversionTable()
01058 {
01059   int32_t i;
01060 
01061   /* By default we don't know how to convert any charset
01062    * other tha ISO-1 to unicode!
01063    */
01064   if (strcasecmp(charset, "ISO-8859-1") == 0) {
01065     for (i=0; i<128; i++)
01066       conversion_table[i] = (wchar_t) (i + 128);
01067   }
01068   else {
01069     for (i=0; i<128; i++)
01070       conversion_table[i] = '?';
01071   }
01072   return true;
01073 }
01074 
01075 // Private use are: E000 - F8FF
01076 
01077 int32_t CSXMLProcessor::capture_initializer(wchar_t ch)
01078 /* We capture tag and attribute data for the parsing purposes.
01079  * The buffers are initialized here (at the lowest level)
01080  * of processing after parsing.
01081  */
01082 {
01083   int32_t op;
01084 
01085   op = parseChar(ch);
01086   switch (op & XML_OP_1_MASK) {
01087     case XML_OP_1_START_TAG:
01088       this->tlength = 0;
01089       break;
01090     case XML_OP_1_ADD_ATTR:
01091       this->nlength = 0;
01092       this->vlength = 0;
01093       break;
01094   }
01095   return(op);
01096 }
01097 
01098 int32_t CSXMLProcessor::entity_translator(wchar_t ch)
01099 /* This function handles entities.
01100  * Certain entities are translated into UNICODE characters.
01101  * Strictly speaking, these enties are only recognised by HTML.
01102  * The few entities that are recognised by XML are first translated
01103  * into some reserved characters for the parser. This is to ensure
01104  * that the parser does not recognize them as characters with special
01105  * meaning! This includes '&', '<' and '>'.
01106  */
01107 {
01108   int32_t op;
01109 
01110   op = capture_initializer(ch);
01111   return(op);
01112 }
01113 
01114 /*
01115  * This function translates the input character stream into UNICODE.
01116  */
01117 int32_t CSXMLProcessor::charset_transformer(wchar_t ch)
01118 {
01119   int32_t op;
01120 
01121   // Do transformation according to the charset.
01122   switch (this->charset_type) {
01123     case CHARSET_UTF_8:
01124       if (ch > 127 && ch < 256) {
01125         uint32_t utf_value;
01126         uint8_t utf_ch = (uint8_t)ch;
01127 
01128         if ((utf_ch & 0xC0) != 0x80)
01129           this->utf8_count = 0;
01130         if ((utf_ch & 0x80) == 0x00)
01131           this->utf8_length = 1;
01132         else if ((utf_ch & 0xE0) == 0xC0)
01133           this->utf8_length = 2;
01134         else if ((utf_ch & 0xF0) == 0xE0)
01135           this->utf8_length = 3;
01136         else if ((utf_ch & 0xF8) == 0xF0)
01137           this->utf8_length = 4;
01138         else if ((utf_ch & 0xFC) == 0xF8)
01139           this->utf8_length = 5;
01140         else if ((utf_ch & 0xFE) == 0xFC)
01141           this->utf8_length = 6;
01142         this->utf8_buffer[this->utf8_count] = (uint32_t) utf_ch;
01143         this->utf8_count++;
01144         if (this->utf8_count < this->utf8_length) {
01145           // I need more bytes!
01146           setDataType(XML_noop);
01147           return(XML_noop);
01148         }
01149         utf_value = 0;
01150         switch (this->utf8_length) {
01151           case 1:
01152             utf_value = this->utf8_buffer[0] & 0x0000007F;
01153             break;
01154           case 2:
01155             utf_value = ((this->utf8_buffer[0] & 0x0000001F) << 6) |
01156                   (this->utf8_buffer[1] & 0x0000003F);
01157             if (utf_value < 0x00000080)
01158               utf_value = '?';
01159             break;
01160           case 3:
01161             utf_value = ((this->utf8_buffer[0] & 0x0000000F) << 12) |
01162                   ((this->utf8_buffer[1] & 0x0000003F) << 6) |
01163                   (this->utf8_buffer[2] & 0x0000003F);
01164             if (utf_value < 0x000000800)
01165               utf_value = '?';
01166             break;
01167           case 4:
01168             utf_value = ((this->utf8_buffer[0] & 0x00000007) << 18) |
01169                   ((this->utf8_buffer[1] & 0x0000003F) << 12) |
01170                   ((this->utf8_buffer[2] & 0x0000003F) << 6) |
01171                   (this->utf8_buffer[3] & 0x0000003F);
01172             if (utf_value < 0x00010000)
01173               utf_value = '?';
01174             break;
01175           case 5:
01176             utf_value = ((this->utf8_buffer[0] & 0x00000003) << 24) |
01177                   ((this->utf8_buffer[1] & 0x0000003F) << 18) |
01178                   ((this->utf8_buffer[2] & 0x0000003F) << 12) |
01179                   ((this->utf8_buffer[3] & 0x0000003F) << 6) |
01180                   (this->utf8_buffer[4] & 0x0000003F);
01181             if (utf_value < 0x00200000)
01182               utf_value = '?';
01183             break;
01184           case 6:
01185             utf_value = ((this->utf8_buffer[0] & 0x00000001) << 30) |
01186                   ((this->utf8_buffer[1] & 0x0000003F) << 24) |
01187                   ((this->utf8_buffer[2] & 0x0000003F) << 18) |
01188                   ((this->utf8_buffer[3] & 0x0000003F) << 12) |
01189                   ((this->utf8_buffer[4] & 0x0000003F) << 6) |
01190                   (this->utf8_buffer[5] & 0x0000003F);
01191             if (utf_value < 0x04000000)
01192               utf_value = '?';
01193             break;
01194         }
01195         if (utf_value > 0x0000FFFF)
01196           ch = '?';
01197         else
01198           ch = utf_value;
01199       }
01200       break;
01201     case CHARSET_TO_CONVERT_8_BIT:
01202       if (ch > 127 && ch < 256)
01203         ch = this->conversion_table[((unsigned char) ch) - 128];
01204       break;
01205   }
01206 
01207   op = entity_translator(ch);
01208 
01209   // Determine the characters set:
01210   switch (op & XML_OP_1_MASK) {
01211     case XML_OP_1_START_TAG:
01212       if (strcmp(this->pr_tag, "?xml") == 0)
01213         this->ip = true;
01214       else
01215         this->ip = false;
01216       break;
01217     case XML_OP_1_ADD_ATTR:
01218       if (this->ip) {
01219         if (strcasecmp(this->pr_name, "encoding") == 0) {
01220           strcpy(this->charset, this->pr_value);
01221           if (STRCASESTR(this->charset, "utf-8"))
01222             this->charset_type = CHARSET_UTF_8;
01223           else if (STRCASESTR(this->charset, "ucs-2") ||
01224             STRCASESTR(this->charset, "ucs-4") ||
01225             STRCASESTR(this->charset, "unicode"))
01226             this->charset_type = CHARSET_STANDARD;
01227           else {
01228             this->charset_type = CHARSET_TO_CONVERT_8_BIT;
01229             buildConversionTable();
01230           }
01231         }
01232       }
01233       break;
01234   }
01235   return(op);
01236 }
01237 
01238 void CSXMLProcessor::appendWCharToString(char *dstr, size_t *dlen, size_t dsize, wchar_t *schars, size_t slen)
01239 {
01240   for (size_t i=0; i < slen; i++) {
01241     if (*dlen < dsize-1) {
01242       if (*schars > 127)
01243         dstr[*dlen] = '~';
01244       else
01245         dstr[*dlen] = (char)*schars;
01246       (*dlen)++;
01247       schars++;
01248       dstr[*dlen] = 0;
01249     }
01250   }
01251 }
01252 
01253 int32_t CSXMLProcessor::processChar(wchar_t ch)
01254 {
01255   int32_t op;
01256 
01257   op = charset_transformer(ch);
01258 
01259   /*
01260    * Capture output tag and attribute data.
01261    * This must be done at the highest level, after
01262    * parsing.
01263    */
01264   switch (op & XML_DATA_MASK) {
01265     case XML_DATA_TAG:
01266       appendWCharToString(this->pr_tag, &this->tlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen());
01267       break;
01268     case XML_DATA_ATTR:
01269       appendWCharToString(this->pr_name, &this->nlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen());
01270       break;
01271     case XML_DATA_VALUE:
01272       appendWCharToString(this->pr_value, &this->vlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen());
01273       break;
01274   }
01275   return(op);
01276 }
01277 
01278 bool CSXMLProcessor::getError(int32_t *err, char **msg)
01279 {
01280   *err = err_no;
01281   *msg = err_message;
01282   return err_no != 0;
01283 }
01284 
01285 void CSXMLProcessor::setError(int32_t err, char *msg)
01286 {
01287   err_no = err;
01288   if (msg) {
01289     strncpy(err_message, msg, CS_XML_ERR_MSG_SIZE);
01290     err_message[CS_XML_ERR_MSG_SIZE-1] = 0;
01291     return;
01292   }
01293 
01294   switch (err) {
01295     case CS_XML_ERR_OUT_OF_MEMORY:
01296       snprintf(err_message, CS_XML_ERR_MSG_SIZE, "AES parse error- insufficient memory");     
01297       break;
01298     case CS_XML_ERR_CHAR_TOO_LARGE:
01299       snprintf(err_message, CS_XML_ERR_MSG_SIZE, "AES parse error- UNICODE character too large to be encoded as UTF-8");      
01300       break;
01301     default:
01302       snprintf(err_message, CS_XML_ERR_MSG_SIZE, "AES parse error- %s", strerror(err));
01303       break;
01304   }
01305 }
01306 
01307 void CSXMLProcessor::printError(char *prefix)
01308 {
01309   printf("%s%s", prefix, err_message);
01310 }
01311 
01312 /* ------------------------------------------------------------------- */
01313 /* CSXMLString */
01314 
01315 #ifdef DEBUG_ALL
01316 #define EXTRA_SIZE      2
01317 #else
01318 #define EXTRA_SIZE      100
01319 #endif
01320 
01321 bool CSXMLString::addChar(char ch, CSXMLProcessor *xml)
01322 {
01323   char *ptr;
01324 
01325   if (stringLen + 2 > stringSize) {
01326     if (!(ptr = (char *) realloc(stringPtr, stringLen + 2 + EXTRA_SIZE))) {
01327       xml->setError(CS_XML_ERR_OUT_OF_MEMORY, NULL);
01328       return false;
01329     }
01330     stringPtr = ptr;
01331     stringSize = stringLen + 2 + EXTRA_SIZE;
01332   }
01333   stringPtr[stringLen] = ch;
01334   stringPtr[stringLen+1] = 0;
01335   stringLen++;
01336   return true;
01337 }
01338 
01339 bool CSXMLString::addChars(size_t size, wchar_t *buffer, bool to_lower, CSXMLProcessor *xml)
01340 {
01341   size_t    i;
01342   uint32_t  uni_char;
01343   int32_t     shift;
01344 
01345   for (i=0; i<size; i++) {
01346     uni_char = (uint32_t) buffer[i];
01347     
01348     /* Convertion to lower only done for ASCII! */
01349     if (to_lower && uni_char <= 127)
01350       uni_char = (uint32_t) tolower((int32_t) uni_char);
01351 
01352     // Convert to UTF-8!
01353     if (uni_char <= 0x0000007F) {
01354       if (!addChar((char) uni_char, xml))
01355         return false;
01356       shift = -6;
01357     }
01358     else if (uni_char <= 0x000007FF) {
01359       if (!addChar((char) ((0x000000C0) | ((uni_char >> 6) & 0x0000001F)), xml))
01360         return false;
01361       shift = 0;
01362     }
01363     else if (uni_char <= 0x00000FFFF) {
01364       if (!addChar((char) ((0x000000E0) | ((uni_char >> 12) & 0x0000000F)), xml))
01365         return false;
01366       shift = 6;
01367     }
01368     else if (uni_char <= 0x001FFFFF) {
01369       if (!addChar((char) ((0x000000F0) | ((uni_char >> 18) & 0x00000007)), xml))
01370         return false;
01371       shift = 12;
01372     }
01373     else if (uni_char <= 0x003FFFFFF) {
01374       if (!addChar((char) ((0x000000F0) | ((uni_char >> 24) & 0x00000003)), xml))
01375         return false;
01376       shift = 18;
01377     }
01378     else if (uni_char <= 0x07FFFFFFF) {
01379       if (!addChar((char) ((0x000000F0) | ((uni_char >> 30) & 0x00000001)), xml))
01380         return false;
01381       shift = 24;
01382     }
01383     else {
01384       xml->setError(CS_XML_ERR_CHAR_TOO_LARGE, NULL);
01385       return false;
01386     }
01387 
01388     while (shift >= 0) {
01389       if (!addChar((char) ((0x00000080) | ((uni_char >> shift) & 0x0000003F)), xml))
01390         return false;
01391       shift -= 6;
01392     }
01393   }
01394   return true;
01395 }
01396 
01397 bool CSXMLString::addString(const char *string, CSXMLProcessor *xml)
01398 {
01399   bool ok = true;
01400   
01401   while (*string && ok) {
01402     ok = addChar(*string, xml);
01403     string++;
01404   }
01405   return ok;
01406 }
01407 
01408 void CSXMLString::setEmpty()
01409 {
01410   stringLen = 0;
01411   if (stringPtr)
01412     *stringPtr = 0;
01413 }
01414 
01415 void CSXMLString::setNull()
01416 {
01417   if (stringPtr)
01418     free(stringPtr);
01419   stringPtr = NULL;
01420   stringLen = 0;
01421   stringSize = 0;
01422 }
01423 
01424 char *CSXMLString::lastComponent()
01425 {
01426   char *ptr;
01427 
01428   if (stringLen == 0)
01429     return NULL;
01430 
01431   ptr = stringPtr + stringLen - 1;
01432   while (ptr > stringPtr && *ptr != '/')
01433     ptr--;
01434   return ptr;
01435 }
01436 
01437 /* We assume comp begins with a '/' */
01438 char *CSXMLString::findTrailingComponent(const char *comp)
01439 {
01440   char *ptr, *last_slash;
01441 
01442   if (stringLen == 0)
01443     return NULL;
01444 
01445   ptr = stringPtr + stringLen - 1;
01446   last_slash = NULL;
01447 
01448   do {
01449     /* Find the next '/' */
01450     while (ptr > stringPtr && *ptr != '/')
01451       ptr--;
01452     if (last_slash)
01453       *last_slash = 0;
01454     if (strcmp(ptr, comp) == 0) {
01455       if (last_slash)
01456         *last_slash = '/';
01457       return ptr;
01458     }
01459     if (last_slash)
01460       *last_slash = '/';
01461     last_slash = ptr;
01462     ptr--;
01463   }
01464   while (ptr > stringPtr);
01465   return NULL;
01466 }
01467 
01468 void CSXMLString::truncate(char *ptr)
01469 {
01470   *ptr = 0;
01471   stringLen = ptr - stringPtr;
01472 }
01473 
01474 /* ------------------------------------------------------------------- */
01475 /* CSXML */
01476 
01477 #define IS_XML_CDATA        0
01478 #define IS_XML_CDATA_TAG      1
01479 #define IS_XML_TAG          2
01480 #define IS_XML_CLOSE_TAG      3
01481 #define IS_XML_COMMENT        4
01482 #define IS_XML_DTD          5
01483 #define IS_XML_PI         6
01484 #define IS_XML_PI_XML       7
01485 #define IS_XML_IN_EX        8
01486 #define IS_XML_OPEN_BRACKET     9
01487 #define IS_XML_CLOSE_BRACKET    10
01488 
01489 int32_t CSXML::nodeType(char *name)
01490 {
01491   if (name) {
01492     switch (*name) {
01493       case 0:
01494         return IS_XML_CDATA;
01495       case '[':
01496         if (strlen(name) == 1)
01497           return IS_XML_OPEN_BRACKET;
01498         break;
01499       case ']':
01500         if (strlen(name) == 1)
01501           return IS_XML_CLOSE_BRACKET;
01502         break;
01503       case '/':
01504         return IS_XML_CLOSE_TAG;
01505       case '!':
01506         if (strlen(name) > 1) {
01507           if (strcasecmp(name, "!--") == 0)
01508             return IS_XML_COMMENT;
01509           if (name[1] == '[') {
01510             if (strcasecmp(name, "![CDATA[") == 0)
01511               return IS_XML_CDATA_TAG;
01512             return IS_XML_IN_EX;
01513           }
01514         }
01515         return IS_XML_DTD;
01516       case '?':
01517         if (strcasecmp(name, "?xml") == 0)
01518           return IS_XML_PI_XML;
01519         return IS_XML_PI;
01520     }
01521     return IS_XML_TAG;
01522   }
01523   return IS_XML_CDATA;
01524 }
01525 
01526 bool CSXML::internalCloseNode(const char *name, bool single)
01527 {
01528   bool  ok = true;
01529   char  *ptr;
01530 
01531   if (single) {
01532     if ((ptr = xml_path.lastComponent())) {
01533       ok = closeNode(xml_path.stringPtr);
01534       xml_path.truncate(ptr);
01535     }
01536   }
01537   else if ((ptr = xml_path.findTrailingComponent(name))) {
01538     /* Close the node that is named above. If the XML is
01539      * correct, then the node should be at the top of the
01540      * node stack (last element of the path).
01541      *
01542      * If not found, "ignore" the close.
01543      *
01544      * If not found on the top of the node stack, then
01545      * we close serveral nodes.
01546      */
01547     for (;;) {
01548       if (!(ptr = xml_path.lastComponent()))
01549         break;
01550       if (!(ok = closeNode(xml_path.stringPtr)))
01551         break;
01552       if (strcmp(ptr, name) == 0) {
01553         xml_path.truncate(ptr);
01554         break;
01555       }
01556       xml_path.truncate(ptr);
01557     }
01558   }
01559   return ok;
01560 }
01561 
01562 bool CSXML::internalOpenNode(const char *name)
01563 {
01564   bool ok;
01565 
01566   ok = xml_path.addString("/", this);
01567   if (!ok)
01568     return ok;
01569   ok = xml_path.addString(name, this);
01570   if (!ok)
01571     return ok;
01572   return openNode(this->xml_path.stringPtr, this->xml_value.stringPtr);
01573 }
01574 
01575 bool CSXML::parseXML(int32_t my_flags)
01576 {
01577   wchar_t ch;
01578   bool  ok = true;
01579   int32_t   op;
01580   int32_t   tagtype;
01581 
01582   this->flags = my_flags;
01583   ok = xml_path.addChars(0, NULL, false, this);
01584   if (!ok)
01585     goto exit;
01586   ok = xml_name.addChars(0, NULL, false, this);
01587   if (!ok)
01588     goto exit;
01589   ok = xml_value.addChars(0, NULL, false, this);
01590   if (!ok)
01591     goto exit;
01592 
01593   ok = getChar(&ch);
01594   while (ch != CS_XML_EOF_CHAR && ok) {
01595     op = processChar(ch);
01596     switch (op & XML_OP_1_MASK) {
01597       case XML_OP_1_NOOP:
01598         break;
01599       case XML_OP_1_END_TAG:
01600         break;
01601       case XML_OP_1_END_CLOSE_TAG:
01602         break;
01603       case XML_OP_1_END_EMPTY_TAG:
01604         ok = internalCloseNode("/>", true);
01605         break;
01606       case XML_OP_1_END_PI_TAG:
01607         ok = internalCloseNode("?>", true);
01608         break;
01609       case XML_OP_1_END_ENTITY_TAG:
01610         ok = internalCloseNode(">", true);
01611         break;
01612       case XML_OP_1_END_BRACKET_TAG:
01613         ok = internalCloseNode("]>", true);
01614         break;
01615       case XML_OP_1_END_UNKNOWN_TAG:
01616         ok = internalCloseNode(">", true);
01617         break;
01618       case XML_OP_1_START_CDATA_TAG:
01619         break;
01620       case XML_OP_1_START_COMMENT:
01621         break;
01622       case XML_OP_1_START_TAG:
01623         if (nodeType(xml_name.stringPtr) == IS_XML_CLOSE_TAG)
01624           ok = internalCloseNode(xml_name.stringPtr, false);
01625         else
01626           ok = internalOpenNode(xml_name.stringPtr);
01627         xml_name.setEmpty();
01628         xml_value.setEmpty();
01629         break;
01630       case XML_OP_1_ADD_ATTR:
01631         tagtype = nodeType(xml_name.stringPtr);
01632         if (tagtype != IS_XML_OPEN_BRACKET && tagtype != IS_XML_CLOSE_BRACKET)
01633           ok = addAttribute(xml_path.stringPtr, xml_name.stringPtr, xml_value.stringPtr);
01634         xml_name.setEmpty();
01635         xml_value.setEmpty();
01636         break;
01637       case XML_OP_1_END_CDATA:
01638         if (xml_value.stringLen || (my_flags & XML_KEEP_EMPTY_CDATA)) {
01639           ok = internalOpenNode("");
01640           xml_name.setEmpty();
01641           xml_value.setEmpty();
01642           ok = internalCloseNode("", true);
01643         }
01644         break;
01645       case XML_OP_1_END_CDATA_TAG:
01646         ok = internalOpenNode("![CDATA[");
01647         xml_name.setEmpty();
01648         xml_value.setEmpty();
01649         if (ok)
01650           ok = internalCloseNode("]]>", true);
01651         break;
01652       case XML_OP_1_END_COMMENT:
01653         ok = internalOpenNode("!--");
01654         xml_name.setEmpty();
01655         xml_value.setEmpty();
01656         if (ok)
01657           ok = internalCloseNode("-->", true);
01658         break;
01659     }
01660     if (!ok)
01661       break;
01662     switch (op & XML_DATA_MASK) {
01663       case XML_DATA_TAG:
01664       case XML_DATA_ATTR:
01665         ok = xml_name.addChars(getDataLen(), getDataPtr(), true, this);
01666         break;
01667       case XML_DATA_CDATA:
01668       case XML_DATA_CDATA_TAG:
01669       case XML_COMMENT:
01670       case XML_DATA_VALUE:
01671         ok = xml_value.addChars(getDataLen(), getDataPtr(), false, this);
01672         break;
01673     }
01674     if (!ok)
01675       break;
01676     switch (op & XML_OP_2_MASK) {
01677       case XML_OP_2_NOOP:
01678         break;
01679       case XML_OP_2_END_TAG:
01680         break;
01681       case XML_OP_2_END_CLOSE_TAG:
01682         break;
01683       case XML_OP_2_END_EMPTY_TAG:
01684         ok = internalCloseNode("/>", true);
01685         break;
01686       case XML_OP_2_END_PI_TAG:
01687         ok = internalCloseNode("?>", true);
01688         break;
01689       case XML_OP_2_END_ENTITY_TAG:
01690         ok = internalCloseNode(">", true);
01691         break;
01692       case XML_OP_2_END_BRACKET_TAG:
01693         ok = internalCloseNode("]>", true);
01694         break;
01695       case XML_OP_2_END_UNKNOWN_TAG:
01696         ok = internalCloseNode(">", true);
01697         break;
01698       case XML_OP_2_START_CDATA_TAG:
01699         break;
01700       case XML_OP_2_START_COMMENT:
01701         break;
01702     }
01703     ok = getChar(&ch);
01704   }
01705 
01706   exit:
01707   xml_path.setNull();
01708   xml_name.setNull();
01709   xml_value.setNull();
01710   return ok;
01711 }
01712 
01713 /* ------------------------------------------------------------------- */
01714 /* CSXMLPrint */
01715 
01716 bool CSXMLPrint::openNode(char *path, char *value)
01717 {
01718   printf("OPEN  %s\n", path);
01719   if (value && *value)
01720     printf("      %s\n", value);
01721   return true;
01722 }
01723 
01724 bool CSXMLPrint::closeNode(char *path)
01725 {
01726   printf("close %s\n", path);
01727   return true;
01728 }
01729 
01730 bool CSXMLPrint::addAttribute(char *path, char *name, char *value)
01731 {
01732   if (value)
01733     printf("attr  %s %s=%s\n", path, name, value);
01734   else
01735     printf("attr  %s %s\n", path, name);
01736   return true;
01737 }
01738 
01739 /* ------------------------------------------------------------------- */
01740 /* CSXMLBuffer */
01741 
01742 bool CSXMLBuffer::parseString(const char *data, int32_t my_flags)
01743 {
01744   charData = data;
01745   dataLen = strlen(data);
01746   dataPos = 0;
01747   return parseXML(my_flags);
01748 }
01749 
01750 bool CSXMLBuffer::parseData(const char *data, size_t len, int32_t my_flags)
01751 {
01752   charData = data;
01753   dataLen = len;
01754   dataPos = 0;
01755   return parseXML(my_flags);
01756 }
01757 
01758 bool CSXMLBuffer::getChar(wchar_t *ch)
01759 {
01760   if (dataPos == dataLen)
01761     *ch = CS_XML_EOF_CHAR;
01762   else {
01763     *ch = (wchar_t) (unsigned char) charData[dataPos];
01764     dataPos++;
01765   }
01766   return true;
01767 }
01768 
01769 /* ------------------------------------------------------------------- */
01770 /* CSXMLFile */
01771 
01772 bool CSXMLFile::parseFile(char *file_name, int32_t my_flags)
01773 {
01774   bool ok;
01775 
01776   if (!(this->file = fopen(file_name, "r"))) {
01777     setError(errno, NULL);
01778     return false;
01779   }
01780   ok = parseXML(my_flags);
01781   fclose(this->file);
01782   return ok;
01783 }
01784 
01785 bool CSXMLFile::getChar(wchar_t *ch)
01786 {
01787   int32_t next_ch;
01788   
01789   next_ch = fgetc(file);
01790   if (next_ch == EOF) {
01791     if (ferror(file)) {
01792       setError(errno, NULL);
01793       return false;
01794     }
01795     *ch = CS_XML_EOF_CHAR;
01796   }
01797   else
01798     *ch = (wchar_t) next_ch;
01799   return true;
01800 }
01801 
01802