/* * parser.c : an XML 1.0 non-verifying parser * * See Copyright for the status of this software. * * $Id$ */ #ifdef WIN32 #define HAVE_FCNTL_H #include #else #include #endif #include #include #include /* for memset() only */ #include #include #ifdef HAVE_FCNTL_H #include #endif #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_ZLIB_H #include #endif #include "tree.h" #include "parser.h" #include "entities.h" /************************************************************************ * * * Parser stacks related functions and macros * * * ************************************************************************/ /* * Generic function for accessing stacks in the Parser Context */ #define PUSH_AND_POP(type, name) \ int name##Push(xmlParserCtxtPtr ctxt, type value) { \ if (ctxt->name##Nr >= ctxt->name##Max) { \ ctxt->name##Max *= 2; \ ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \ ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ if (ctxt->name##Tab == NULL) { \ fprintf(stderr, "realloc failed !\n"); \ exit(1); \ } \ } \ ctxt->name##Tab[ctxt->name##Nr] = value; \ ctxt->name = value; \ return(ctxt->name##Nr++); \ } \ type name##Pop(xmlParserCtxtPtr ctxt) { \ if (ctxt->name##Nr <= 0) return(0); \ ctxt->name##Nr--; \ if (ctxt->name##Nr > 0) \ ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ else \ ctxt->name = NULL; \ return(ctxt->name); \ } \ PUSH_AND_POP(xmlParserInputPtr, input) PUSH_AND_POP(xmlNodePtr, node) /* * Macros for accessing the content. Those should be used only by the parser, * and not exported. * * Dirty macros, i.e. one need to make assumption on the context to use them * * CUR_PTR return the current pointer to the CHAR to be parsed. * CUR returns the current CHAR value, i.e. a 8 bit value if compiled * in ISO-Latin or UTF-8, and the current 16 bit value if compiled * in UNICODE mode. This should be used internally by the parser * only to compare to ASCII values otherwise it would break when * running with UTF-8 encoding. * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only * to compare on ASCII based substring. * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined * strings within the parser. * * Clean macros, not dependent of an ASCII context. * * CURRENT Returns the current char value, with the full decoding of * UTF-8 if we are using this mode. It returns an int. * NEXT Skip to the next character, this does the proper decoding * in UTF-8 mode. It also pop-up unfinished entities on the fly. * It returns the pointer to the current CHAR. */ #define CUR (*ctxt->input->cur) #define SKIP(val) ctxt->input->cur += (val) #define NXT(val) ctxt->input->cur[(val)] #define CUR_PTR ctxt->input->cur #define SKIP_BLANKS \ while (IS_BLANK(*(ctxt->input->cur))) NEXT #ifndef USE_UTF_8 #define CURRENT (*ctxt->input->cur) #define NEXT ((*ctxt->input->cur) ? \ (((*(ctxt->input->cur) == '\n') ? \ (ctxt->input->line++, ctxt->input->col = 1) : \ (ctxt->input->col++)), ctxt->input->cur++) : \ (xmlPopInput(ctxt), ctxt->input->cur)) #else #endif /** * xmlPopInput: * @ctxt: an XML parser context * * xmlPopInput: the current input pointed by ctxt->input came to an end * pop it and return the next char. * * TODO A deallocation of the popped Input structure is needed * return values: the current CHAR in the parser context */ CHAR xmlPopInput(xmlParserCtxtPtr ctxt) { if (ctxt->inputNr == 1) return(0); /* End of main Input */ inputPop(ctxt); return(CUR); } /** * xmlPushInput: * @ctxt: an XML parser context * @input: an XML parser input fragment (entity, XML fragment ...). * * xmlPushInput: switch to a new input stream which is stacked on top * of the previous one(s). */ void xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { if (input == NULL) return; inputPush(ctxt, input); } /** * xmlNewEntityInputStream: * @ctxt: an XML parser context * @entity: an Entity pointer * * Create a new input stream based on a memory buffer. * return vakues: the new input stream */ xmlParserInputPtr xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { xmlParserInputPtr input; if (entity == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "internal: xmlNewEntityInputStream entity = NULL\n"); return(NULL); } if (entity->content == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "internal: xmlNewEntityInputStream entity->input = NULL\n"); return(NULL); } input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput)); if (input == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n"); return(NULL); } input->filename = entity->SystemID; /* TODO !!! char <- CHAR */ input->base = entity->content; input->cur = entity->content; input->line = 1; input->col = 1; return(input); } /* * A few macros needed to help building the parser. */ #ifdef UNICODE /************************************************************************ * * * UNICODE version of the macros. * * * ************************************************************************/ /* * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] * | [#x10000-#x10FFFF] * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ #define IS_CHAR(c) \ (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) /* * [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \ ((c) == 0x0D)) /* * [85] BaseChar ::= ... long list see REC ... * * VI is your friend ! * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/ * and * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/ */ #define IS_BASECHAR(c) \ ((((c) >= 0x0041) && ((c) <= 0x005A)) || \ (((c) >= 0x0061) && ((c) <= 0x007A)) || \ (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \ (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \ (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \ (((c) >= 0x0100) && ((c) <= 0x0131)) || \ (((c) >= 0x0134) && ((c) <= 0x013E)) || \ (((c) >= 0x0141) && ((c) <= 0x0148)) || \ (((c) >= 0x014A) && ((c) <= 0x017E)) || \ (((c) >= 0x0180) && ((c) <= 0x01C3)) || \ (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \ (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \ (((c) >= 0x01FA) && ((c) <= 0x0217)) || \ (((c) >= 0x0250) && ((c) <= 0x02A8)) || \ (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \ ((c) == 0x0386) || \ (((c) >= 0x0388) && ((c) <= 0x038A)) || \ ((c) == 0x038C) || \ (((c) >= 0x038E) && ((c) <= 0x03A1)) || \ (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \ (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \ ((c) == 0x03DA) || \ ((c) == 0x03DC) || \ ((c) == 0x03DE) || \ ((c) == 0x03E0) || \ (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \ (((c) >= 0x0401) && ((c) <= 0x040C)) || \ (((c) >= 0x040E) && ((c) <= 0x044F)) || \ (((c) >= 0x0451) && ((c) <= 0x045C)) || \ (((c) >= 0x045E) && ((c) <= 0x0481)) || \ (((c) >= 0x0490) && ((c) <= 0x04C4)) || \ (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \ (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \ (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \ (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \ (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \ (((c) >= 0x0531) && ((c) <= 0x0556)) || \ ((c) == 0x0559) || \ (((c) >= 0x0561) && ((c) <= 0x0586)) || \ (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \ (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \ (((c) >= 0x0621) && ((c) <= 0x063A)) || \ (((c) >= 0x0641) && ((c) <= 0x064A)) || \ (((c) >= 0x0671) && ((c) <= 0x06B7)) || \ (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \ (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \ (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \ ((c) == 0x06D5) || \ (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \ (((c) >= 0x0905) && ((c) <= 0x0939)) || \ ((c) == 0x093D) || \ (((c) >= 0x0958) && ((c) <= 0x0961)) || \ (((c) >= 0x0985) && ((c) <= 0x098C)) || \ (((c) >= 0x098F) && ((c) <= 0x0990)) || \ (((c) >= 0x0993) && ((c) <= 0x09A8)) || \ (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \ ((c) == 0x09B2) || \ (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \ (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \ (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \ (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \ (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \ (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \ (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \ (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \ (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \ (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \ (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \ (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \ ((c) == 0x0A5E) || \ (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \ (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \ ((c) == 0x0A8D) || \ (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \ (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \ (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \ (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \ (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \ ((c) == 0x0ABD) || \ ((c) == 0x0AE0) || \ (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \ (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \ (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \ (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \ (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \ (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \ ((c) == 0x0B3D) || \ (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \ (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \ (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \ (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \ (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \ (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \ ((c) == 0x0B9C) || \ (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \ (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \ (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \ (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \ (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \ (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \ (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \ (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \ (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \ (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \ (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \ (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \ (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \ (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \ (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \ (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \ ((c) == 0x0CDE) || \ (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \ (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \ (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \ (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \ (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \ (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \ (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \ ((c) == 0x0E30) || \ (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \ (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \ (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \ ((c) == 0x0E84) || \ (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \ ((c) == 0x0E8A) || \ ((c) == 0x0E8D) || \ (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \ (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \ (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \ ((c) == 0x0EA5) || \ ((c) == 0x0EA7) || \ (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \ (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \ ((c) == 0x0EB0) || \ (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \ ((c) == 0x0EBD) || \ (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \ (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \ (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \ (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \ (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \ ((c) == 0x1100) || \ (((c) >= 0x1102) && ((c) <= 0x1103)) || \ (((c) >= 0x1105) && ((c) <= 0x1107)) || \ ((c) == 0x1109) || \ (((c) >= 0x110B) && ((c) <= 0x110C)) || \ (((c) >= 0x110E) && ((c) <= 0x1112)) || \ ((c) == 0x113C) || \ ((c) == 0x113E) || \ ((c) == 0x1140) || \ ((c) == 0x114C) || \ ((c) == 0x114E) || \ ((c) == 0x1150) || \ (((c) >= 0x1154) && ((c) <= 0x1155)) || \ ((c) == 0x1159) || \ (((c) >= 0x115F) && ((c) <= 0x1161)) || \ ((c) == 0x1163) || \ ((c) == 0x1165) || \ ((c) == 0x1167) || \ ((c) == 0x1169) || \ (((c) >= 0x116D) && ((c) <= 0x116E)) || \ (((c) >= 0x1172) && ((c) <= 0x1173)) || \ ((c) == 0x1175) || \ ((c) == 0x119E) || \ ((c) == 0x11A8) || \ ((c) == 0x11AB) || \ (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \ (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \ ((c) == 0x11BA) || \ (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \ ((c) == 0x11EB) || \ ((c) == 0x11F0) || \ ((c) == 0x11F9) || \ (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \ (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \ (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \ (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \ (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \ (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \ (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \ ((c) == 0x1F59) || \ ((c) == 0x1F5B) || \ ((c) == 0x1F5D) || \ (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \ (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \ (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \ ((c) == 0x1FBE) || \ (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \ (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \ (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \ (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \ (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \ (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \ (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \ ((c) == 0x2126) || \ (((c) >= 0x212A) && ((c) <= 0x212B)) || \ ((c) == 0x212E) || \ (((c) >= 0x2180) && ((c) <= 0x2182)) || \ (((c) >= 0x3041) && ((c) <= 0x3094)) || \ (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \ (((c) >= 0x3105) && ((c) <= 0x312C)) || \ (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* * [88] Digit ::= ... long list see REC ... */ #define IS_DIGIT(c) \ ((((c) >= 0x0030) && ((c) <= 0x0039)) || \ (((c) >= 0x0660) && ((c) <= 0x0669)) || \ (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \ (((c) >= 0x0966) && ((c) <= 0x096F)) || \ (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \ (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \ (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \ (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \ (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \ (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \ (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \ (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \ (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \ (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \ (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* * [87] CombiningChar ::= ... long list see REC ... */ #define IS_COMBINING(c) \ ((((c) >= 0x0300) && ((c) <= 0x0345)) || \ (((c) >= 0x0360) && ((c) <= 0x0361)) || \ (((c) >= 0x0483) && ((c) <= 0x0486)) || \ (((c) >= 0x0591) && ((c) <= 0x05A1)) || \ (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \ (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \ ((c) == 0x05BF) || \ (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \ ((c) == 0x05C4) || \ (((c) >= 0x064B) && ((c) <= 0x0652)) || \ ((c) == 0x0670) || \ (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \ (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \ (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \ (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \ (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \ (((c) >= 0x0901) && ((c) <= 0x0903)) || \ ((c) == 0x093C) || \ (((c) >= 0x093E) && ((c) <= 0x094C)) || \ ((c) == 0x094D) || \ (((c) >= 0x0951) && ((c) <= 0x0954)) || \ (((c) >= 0x0962) && ((c) <= 0x0963)) || \ (((c) >= 0x0981) && ((c) <= 0x0983)) || \ ((c) == 0x09BC) || \ ((c) == 0x09BE) || \ ((c) == 0x09BF) || \ (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \ (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \ (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \ ((c) == 0x09D7) || \ (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \ ((c) == 0x0A02) || \ ((c) == 0x0A3C) || \ ((c) == 0x0A3E) || \ ((c) == 0x0A3F) || \ (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \ (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \ (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \ (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \ (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \ ((c) == 0x0ABC) || \ (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \ (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \ (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \ (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \ ((c) == 0x0B3C) || \ (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \ (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \ (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \ (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \ (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \ (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \ (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \ (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \ ((c) == 0x0BD7) || \ (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \ (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \ (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \ (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \ (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \ (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \ (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \ (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \ (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \ (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \ (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \ (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \ (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \ (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \ ((c) == 0x0D57) || \ ((c) == 0x0E31) || \ (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \ (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \ ((c) == 0x0EB1) || \ (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \ (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \ (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \ (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \ ((c) == 0x0F35) || \ ((c) == 0x0F37) || \ ((c) == 0x0F39) || \ ((c) == 0x0F3E) || \ ((c) == 0x0F3F) || \ (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \ (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \ (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \ ((c) == 0x0F97) || \ (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \ (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \ ((c) == 0x0FB9) || \ (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \ ((c) == 0x20E1) || \ (((c) >= 0x302A) && ((c) <= 0x302F)) || \ ((c) == 0x3099) || \ ((c) == 0x309A)) /* * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | * [#x309D-#x309E] | [#x30FC-#x30FE] */ #define IS_EXTENDER(c) \ (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \ ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \ ((c) == 0xec6) || ((c) == 0x3005) \ (((c) >= 0x3031) && ((c) <= 0x3035)) || \ (((c) >= 0x309b) && ((c) <= 0x309e)) || \ (((c) >= 0x30fc) && ((c) <= 0x30fe))) /* * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] */ #define IS_IDEOGRAPHIC(c) \ ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \ (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \ (((c) >= 0x3021) && ((c) <= 0x3029)) || \ ((c) == 0x3007)) /* * [84] Letter ::= BaseChar | Ideographic */ #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)) #else #ifndef USE_UTF_8 /************************************************************************ * * * 8bits / ISO-Latin version of the macros. * * * ************************************************************************/ /* * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] * | [#x10000-#x10FFFF] * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ #define IS_CHAR(c) \ (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\ ((c) == 0xa)) /* * [85] BaseChar ::= ... long list see REC ... */ #define IS_BASECHAR(c) \ ((((c) >= 0x41) && ((c) <= 0x5a)) || \ (((c) >= 0x61) && ((c) <= 0x7a)) || \ (((c) >= 0xaa) && ((c) <= 0x5b)) || \ (((c) >= 0xc0) && ((c) <= 0xd6)) || \ (((c) >= 0xd8) && ((c) <= 0xf6)) || \ (((c) >= 0xf8) && ((c) <= 0xff)) || \ ((c) == 0xba)) /* * [88] Digit ::= ... long list see REC ... */ #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39)) /* * [84] Letter ::= BaseChar | Ideographic */ #define IS_LETTER(c) IS_BASECHAR(c) /* * [87] CombiningChar ::= ... long list see REC ... */ #define IS_COMBINING(c) 0 /* * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | * [#x309D-#x309E] | [#x30FC-#x30FE] */ #define IS_EXTENDER(c) ((c) == 0xb7) #else /* USE_UTF_8 */ /************************************************************************ * * * 8bits / UTF-8 version of the macros. * * * ************************************************************************/ TODO !!! #endif /* USE_UTF_8 */ #endif /* !UNICODE */ /* * Blank chars. * * [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \ ((c) == 0x0D)) /* * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */ #define IS_PUBIDCHAR(c) \ (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \ (((c) >= 'a') && ((c) <= 'z')) || \ (((c) >= 'A') && ((c) <= 'Z')) || \ (((c) >= '0') && ((c) <= '9')) || \ ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \ ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \ ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \ ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \ ((c) == '$') || ((c) == '_') || ((c) == '%')) #define SKIP_EOL(p) \ if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \ if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; } #define MOVETO_ENDTAG(p) \ while (IS_CHAR(*p) && (*(p) != '>')) (p)++ #define MOVETO_STARTTAG(p) \ while (IS_CHAR(*p) && (*(p) != '<')) (p)++ /************************************************************************ * * * Commodity functions to handle CHARs * * * ************************************************************************/ /** * xmlStrndup: * @cur: the input CHAR * * @len: the len of @cur * * a strndup for array of CHAR's * return values: a new CHAR * or NULL */ CHAR * xmlStrndup(const CHAR *cur, int len) { CHAR *ret = malloc((len + 1) * sizeof(CHAR)); if (ret == NULL) { fprintf(stderr, "malloc of %d byte failed\n", (len + 1) * sizeof(CHAR)); return(NULL); } memcpy(ret, cur, len * sizeof(CHAR)); ret[len] = 0; return(ret); } /** * xmlStrdup: * @cur: the input CHAR * * * a strdup for array of CHAR's * return values: a new CHAR * or NULL */ CHAR * xmlStrdup(const CHAR *cur) { const CHAR *p = cur; while (IS_CHAR(*p)) p++; return(xmlStrndup(cur, p - cur)); } /** * xmlCharStrndup: * @cur: the input char * * @len: the len of @cur * * a strndup for char's to CHAR's * return values: a new CHAR * or NULL */ CHAR * xmlCharStrndup(const char *cur, int len) { int i; CHAR *ret = malloc((len + 1) * sizeof(CHAR)); if (ret == NULL) { fprintf(stderr, "malloc of %d byte failed\n", (len + 1) * sizeof(CHAR)); return(NULL); } for (i = 0;i < len;i++) ret[i] = (CHAR) cur[i]; ret[len] = 0; return(ret); } /** * xmlCharStrdup: * @cur: the input char * * @len: the len of @cur * * a strdup for char's to CHAR's * return values: a new CHAR * or NULL */ CHAR * xmlCharStrdup(const char *cur) { const char *p = cur; while (*p != '\0') p++; return(xmlCharStrndup(cur, p - cur)); } /** * xmlStrcmp: * @str1: the first CHAR * * @str2: the second CHAR * * * a strcmp for CHAR's * return values: the integer result of the comparison */ int xmlStrcmp(const CHAR *str1, const CHAR *str2) { register int tmp; do { tmp = *str1++ - *str2++; if (tmp != 0) return(tmp); } while ((*str1 != 0) && (*str2 != 0)); return (*str1 - *str2); } /** * xmlStrncmp: * @str1: the first CHAR * * @str2: the second CHAR * * @len: the max comparison length * * a strncmp for CHAR's * return values: the integer result of the comparison */ int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) { register int tmp; if (len <= 0) return(0); do { tmp = *str1++ - *str2++; if (tmp != 0) return(tmp); len--; if (len <= 0) return(0); } while ((*str1 != 0) && (*str2 != 0)); return (*str1 - *str2); } /** * xmlStrchr: * @str: the CHAR * array * @val: the CHAR to search * * a strchr for CHAR's * return values: the CHAR * for the first occurence or NULL. */ CHAR * xmlStrchr(const CHAR *str, CHAR val) { while (*str != 0) { if (*str == val) return((CHAR *) str); str++; } return(NULL); } /** * xmlStrlen: * @str: the CHAR * array * * lenght of a CHAR's string * return values: the number of CHAR contained in the ARRAY. */ int xmlStrlen(const CHAR *str) { int len = 0; if (str == NULL) return(0); while (*str != 0) { str++; len++; } return(len); } /** * xmlStrncat: * @first: the original CHAR * array * @add: the CHAR * array added * @len: the length of @add * * a strncat for array of CHAR's * return values: a new CHAR * containing the concatenated string. */ CHAR * xmlStrncat(CHAR *cur, const CHAR *add, int len) { int size; CHAR *ret; if ((add == NULL) || (len == 0)) return(cur); if (cur == NULL) return(xmlStrndup(add, len)); size = xmlStrlen(cur); ret = realloc(cur, (size + len + 1) * sizeof(CHAR)); if (ret == NULL) { fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n", (size + len + 1) * sizeof(CHAR)); return(cur); } memcpy(&ret[size], add, len * sizeof(CHAR)); ret[size + len] = 0; return(ret); } /** * xmlStrcat: * @first: the original CHAR * array * @add: the CHAR * array added * * a strcat for array of CHAR's * return values: a new CHAR * containing the concatenated string. */ CHAR * xmlStrcat(CHAR *cur, const CHAR *add) { const CHAR *p = add; if (add == NULL) return(cur); if (cur == NULL) return(xmlStrdup(add)); while (IS_CHAR(*p)) p++; return(xmlStrncat(cur, add, p - add)); } /************************************************************************ * * * Commodity functions, cleanup needed ? * * * ************************************************************************/ /** * areBlanks: * @ctxt: an XML parser context * @str: a CHAR * * @len: the size of @str * * Is this a sequence of blank chars that one can ignore ? * * TODO: to be corrected accodingly to DTD information if available * return values: 1 if ignorable 0 otherwise. */ static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) { int i; xmlNodePtr lastChild; for (i = 0;i < len;i++) if (!(IS_BLANK(str[i]))) return(0); if (CUR != '<') return(0); lastChild = xmlGetLastChild(ctxt->node); if (lastChild == NULL) { if (ctxt->node->content != NULL) return(0); } else if (xmlNodeIsText(lastChild)) return(0); return(1); } /** * xmlHandleEntity: * @ctxt: an XML parser context * @entity: an XML entity pointer. * * Default handling of defined entities, when should we define a new input * stream ? When do we just handle that as a set of chars ? * TODO: we should call the SAX handler here and have it resolve the issue */ void xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { int len; xmlParserInputPtr input; if (entity->content == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "xmlHandleEntity %s: content == NULL\n", entity->name); return; } len = xmlStrlen(entity->content); if (len <= 2) goto handle_as_char; /* * Redefine its content as an input stream. */ input = xmlNewEntityInputStream(ctxt, entity); xmlPushInput(ctxt, input); return; handle_as_char: /* * Just handle the content as a set of chars. */ if (ctxt->sax != NULL) ctxt->sax->characters(ctxt, entity->content, 0, len); } /* * Forward definition for recusive behaviour. */ xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt); CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt); CHAR *xmlParseReference(xmlParserCtxtPtr ctxt); /************************************************************************ * * * Extra stuff for namespace support * * Relates to http://www.w3.org/TR/WD-xml-names * * * ************************************************************************/ /** * xmlNamespaceParseNCName: * @ctxt: an XML parser context * * parse an XML namespace name. * * [NS 3] NCName ::= (Letter | '_') (NCNameChar)* * * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | * CombiningChar | Extender * return values: the namespace name or NULL */ CHAR * xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) { const CHAR *q; CHAR *ret = NULL; if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL); q = NEXT; while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || (CUR == '.') || (CUR == '-') || (CUR == '_') || (IS_COMBINING(CUR)) || (IS_EXTENDER(CUR))) NEXT; ret = xmlStrndup(q, CUR_PTR - q); return(ret); } /** * xmlNamespaceParseQName: * @ctxt: an XML parser context * @prefix: a CHAR ** * * parse an XML qualified name * * [NS 5] QName ::= (Prefix ':')? LocalPart * * [NS 6] Prefix ::= NCName * * [NS 7] LocalPart ::= NCName * return values: the function returns the local part, and prefix is updated * to get the Prefix if any. */ CHAR * xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) { CHAR *ret = NULL; *prefix = NULL; ret = xmlNamespaceParseNCName(ctxt); if (CUR == ':') { *prefix = ret; NEXT; ret = xmlNamespaceParseNCName(ctxt); } return(ret); } /** * xmlNamespaceParseNSDef: * @ctxt: an XML parser context * * parse a namespace prefix declaration * * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral * * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)? * return values: the namespace name */ CHAR * xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) { CHAR *name = NULL; if ((CUR == 'x') && (NXT(1) == 'm') && (NXT(2) == 'l') && (NXT(3) == 'n') && (NXT(4) == 's')) { SKIP(5); if (CUR == ':') { NEXT; name = xmlNamespaceParseNCName(ctxt); } } return(name); } /** * xmlParseQuotedString: * @ctxt: an XML parser context * * [OLD] Parse and return a string between quotes or doublequotes * return values: the string parser or NULL. */ CHAR * xmlParseQuotedString(xmlParserCtxtPtr ctxt) { CHAR *ret = NULL; const CHAR *q; if (CUR == '"') { NEXT; q = CUR_PTR; while (IS_CHAR(CUR) && (CUR != '"')) NEXT; if (CUR != '"') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q); } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; } } else if (CUR == '\''){ NEXT; q = CUR_PTR; while (IS_CHAR(CUR) && (CUR != '\'')) NEXT; if (CUR != '\'') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q); } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; } } return(ret); } /** * xmlParseNamespace: * @ctxt: an XML parser context * * [OLD] xmlParseNamespace: parse specific PI '')) { /* * We can have "ns" or "prefix" attributes * Old encoding as 'href' or 'AS' attributes is still supported */ if ((CUR == 'n') && (NXT(1) == 's')) { garbage = 0; SKIP(2); SKIP_BLANKS; if (CUR != '=') continue; NEXT; SKIP_BLANKS; href = xmlParseQuotedString(ctxt); SKIP_BLANKS; } else if ((CUR == 'h') && (NXT(1) == 'r') && (NXT(2) == 'e') && (NXT(3) == 'f')) { garbage = 0; SKIP(4); SKIP_BLANKS; if (CUR != '=') continue; NEXT; SKIP_BLANKS; href = xmlParseQuotedString(ctxt); SKIP_BLANKS; } else if ((CUR == 'p') && (NXT(1) == 'r') && (NXT(2) == 'e') && (NXT(3) == 'f') && (NXT(4) == 'i') && (NXT(5) == 'x')) { garbage = 0; SKIP(6); SKIP_BLANKS; if (CUR != '=') continue; NEXT; SKIP_BLANKS; prefix = xmlParseQuotedString(ctxt); SKIP_BLANKS; } else if ((CUR == 'A') && (NXT(1) == 'S')) { garbage = 0; SKIP(2); SKIP_BLANKS; if (CUR != '=') continue; NEXT; SKIP_BLANKS; prefix = xmlParseQuotedString(ctxt); SKIP_BLANKS; } else if ((CUR == '?') && (NXT(1) == '>')) { garbage = 0; CUR_PTR ++; } else { /* * Found garbage when parsing the namespace */ if (!garbage) if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "xmlParseNamespace found garbage\n"); NEXT; } } MOVETO_ENDTAG(CUR_PTR); NEXT; /* * Register the DTD. */ if (href != NULL) xmlNewGlobalNs(ctxt->doc, href, prefix); if (prefix != NULL) free(prefix); if (href != NULL) free(href); } /************************************************************************ * * * The parser itself * * Relates to http://www.w3.org/TR/REC-xml * * * ************************************************************************/ /** * xmlParseName: * @ctxt: an XML parser context * * parse an XML name. * * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | * CombiningChar | Extender * * [5] Name ::= (Letter | '_' | ':') (NameChar)* * * [6] Names ::= Name (S Name)* * return values: the Name parsed or NULL */ CHAR * xmlParseName(xmlParserCtxtPtr ctxt) { const CHAR *q; CHAR *ret = NULL; if (!IS_LETTER(CUR) && (CUR != '_') && (CUR != ':')) return(NULL); q = NEXT; while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || (CUR == '.') || (CUR == '-') || (CUR == '_') || (CUR == ':') || (IS_COMBINING(CUR)) || (IS_EXTENDER(CUR))) NEXT; ret = xmlStrndup(q, CUR_PTR - q); return(ret); } /** * xmlParseNmtoken: * @ctxt: an XML parser context * * parse an XML Nmtoken. * * [7] Nmtoken ::= (NameChar)+ * * [8] Nmtokens ::= Nmtoken (S Nmtoken)* * return values: the Nmtoken parsed or NULL */ CHAR * xmlParseNmtoken(xmlParserCtxtPtr ctxt) { const CHAR *q; CHAR *ret = NULL; q = NEXT; while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || (CUR == '.') || (CUR == '-') || (CUR == '_') || (CUR == ':') || (IS_COMBINING(CUR)) || (IS_EXTENDER(CUR))) NEXT; ret = xmlStrndup(q, CUR_PTR - q); return(ret); } /** * xmlParseEntityValue: * @ctxt: an XML parser context * * parse a value for ENTITY decl. * * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | * "'" ([^%&'] | PEReference | Reference)* "'" * return values: the EntityValue parsed or NULL */ CHAR * xmlParseEntityValue(xmlParserCtxtPtr ctxt) { CHAR *ret = NULL, *cur; const CHAR *q; if (CUR == '"') { NEXT; q = CUR_PTR; while ((IS_CHAR(CUR)) && (CUR != '"')) { if (CUR == '%') { ret = xmlStrncat(ret, q, CUR_PTR - q); cur = xmlParsePEReference(ctxt); ret = xmlStrcat(ret, cur); q = CUR_PTR; } else if (CUR == '&') { ret = xmlStrncat(ret, q, CUR_PTR - q); cur = xmlParseReference(ctxt); if (cur != NULL) { CHAR buf[2]; buf[0] = '&'; buf[1] = 0; ret = xmlStrncat(ret, buf, 1); ret = xmlStrcat(ret, cur); buf[0] = ';'; buf[1] = 0; ret = xmlStrncat(ret, buf, 1); } q = CUR_PTR; } else NEXT; } if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished EntityValue\n"); } else { ret = xmlStrncat(ret, q, CUR_PTR - q); NEXT; } } else if (CUR == '\'') { NEXT; q = CUR_PTR; while ((IS_CHAR(CUR)) && (CUR != '\'')) { if (CUR == '%') { ret = xmlStrncat(ret, q, CUR_PTR - q); cur = xmlParsePEReference(ctxt); ret = xmlStrcat(ret, cur); q = CUR_PTR; } else if (CUR == '&') { ret = xmlStrncat(ret, q, CUR_PTR - q); cur = xmlParseReference(ctxt); if (cur != NULL) { CHAR buf[2]; buf[0] = '&'; buf[1] = 0; ret = xmlStrncat(ret, buf, 1); ret = xmlStrcat(ret, cur); buf[0] = ';'; buf[1] = 0; ret = xmlStrncat(ret, buf, 1); } q = CUR_PTR; } else NEXT; } if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished EntityValue\n"); } else { ret = xmlStrncat(ret, q, CUR_PTR - q); NEXT; } } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "xmlParseEntityValue \" or ' expected\n"); } return(ret); } /** * xmlParseAttValue: * @ctxt: an XML parser context * * parse a value for an attribute * * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | * "'" ([^<&'] | Reference)* "'" * return values: the AttValue parsed or NULL. */ CHAR * xmlParseAttValue(xmlParserCtxtPtr ctxt) { CHAR *ret = NULL, *cur; const CHAR *q; if (CUR == '"') { NEXT; q = CUR_PTR; while ((IS_CHAR(CUR)) && (CUR != '"')) { if (CUR == '&') { ret = xmlStrncat(ret, q, CUR_PTR - q); cur = xmlParseReference(ctxt); if (cur != NULL) { /* * Special case for '&', we don't want to * resolve it here since it will break later * when searching entities in the string. */ if ((cur[0] == '&') && (cur[1] == 0)) { CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 }; ret = xmlStrncat(ret, buf, 5); } else ret = xmlStrcat(ret, cur); free(cur); } q = CUR_PTR; } else NEXT; /* * Pop out finished entity references. */ while ((CUR == 0) && (ctxt->inputNr > 1)) { if (CUR_PTR != q) ret = xmlStrncat(ret, q, CUR_PTR - q); xmlPopInput(ctxt); q = CUR_PTR; } } if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished AttValue\n"); } else { ret = xmlStrncat(ret, q, CUR_PTR - q); NEXT; } } else if (CUR == '\'') { NEXT; q = CUR_PTR; while ((IS_CHAR(CUR)) && (CUR != '\'')) { if (CUR == '&') { ret = xmlStrncat(ret, q, CUR_PTR - q); cur = xmlParseReference(ctxt); if (cur != NULL) { /* * Special case for '&', we don't want to * resolve it here since it will break later * when searching entities in the string. */ if ((cur[0] == '&') && (cur[1] == 0)) { CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 }; ret = xmlStrncat(ret, buf, 5); } else ret = xmlStrcat(ret, cur); free(cur); } q = CUR_PTR; } else NEXT; /* * Pop out finished entity references. */ while ((CUR == 0) && (ctxt->inputNr > 1)) { if (CUR_PTR != q) ret = xmlStrncat(ret, q, CUR_PTR - q); xmlPopInput(ctxt); q = CUR_PTR; } } if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished AttValue\n"); } else { ret = xmlStrncat(ret, q, CUR_PTR - q); NEXT; } } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "AttValue: \" or ' expected\n"); } return(ret); } /** * xmlParseSystemLiteral: * @ctxt: an XML parser context * * parse an XML Literal * * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") * return values: the SystemLiteral parsed or NULL */ CHAR * xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { const CHAR *q; CHAR *ret = NULL; if (CUR == '"') { NEXT; q = CUR_PTR; while ((IS_CHAR(CUR)) && (CUR != '"')) NEXT; if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n"); } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; } } else if (CUR == '\'') { NEXT; q = CUR_PTR; while ((IS_CHAR(CUR)) && (CUR != '\'')) NEXT; if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n"); } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; } } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n"); } return(ret); } /** * xmlParsePubidLiteral: * @ctxt: an XML parser context * * parse an XML public literal * return values: the PubidLiteral parsed or NULL. */ CHAR * xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { const CHAR *q; CHAR *ret = NULL; /* * Name ::= (Letter | '_') (NameChar)* */ if (CUR == '"') { NEXT; q = CUR_PTR; while (IS_PUBIDCHAR(CUR)) NEXT; if (CUR != '"') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n"); } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; } } else if (CUR == '\'') { NEXT; q = CUR_PTR; while ((IS_LETTER(CUR)) && (CUR != '\'')) NEXT; if (!IS_LETTER(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n"); } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; } } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n"); } return(ret); } /** * xmlParseCharData: * @ctxt: an XML parser context * @cdata: int indicating whether we are within a CDATA section * * parse a CharData section. * if we are within a CDATA section ']]>' marks an end of section. * * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) * return values: */ void xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { const CHAR *q; q = CUR_PTR; while ((IS_CHAR(CUR)) && (CUR != '<') && (CUR != '&')) { NEXT; if ((cdata) && (CUR == ']') && (NXT(1) == ']') && (NXT(2) == '>')) break; } if (q == CUR_PTR) return; /* * Ok the segment [q CUR_PTR] is to be consumed as chars. */ if (ctxt->sax != NULL) { if (areBlanks(ctxt, q, CUR_PTR - q)) ctxt->sax->ignorableWhitespace(ctxt, q, 0, CUR_PTR - q); else ctxt->sax->characters(ctxt, q, 0, CUR_PTR - q); } } /** * xmlParseExternalID: * @ctxt: an XML parser context * @publicID: a CHAR** receiving PubidLiteral * * Parse an External ID * * [75] ExternalID ::= 'SYSTEM' S SystemLiteral * | 'PUBLIC' S PubidLiteral S SystemLiteral * return values: the function returns SystemLiteral and in the second * case publicID receives PubidLiteral */ CHAR * xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) { CHAR *URI = NULL; if ((CUR == 'S') && (NXT(1) == 'Y') && (NXT(2) == 'S') && (NXT(3) == 'T') && (NXT(4) == 'E') && (NXT(5) == 'M')) { SKIP(6); SKIP_BLANKS; URI = xmlParseSystemLiteral(ctxt); if (URI == NULL) if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "xmlParseExternalID: SYSTEM, no URI\n"); } else if ((CUR == 'P') && (NXT(1) == 'U') && (NXT(2) == 'B') && (NXT(3) == 'L') && (NXT(4) == 'I') && (NXT(5) == 'C')) { SKIP(6); SKIP_BLANKS; *publicID = xmlParsePubidLiteral(ctxt); if (*publicID == NULL) if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "xmlParseExternalID: PUBLIC, no Public Identifier\n"); SKIP_BLANKS; URI = xmlParseSystemLiteral(ctxt); if (URI == NULL) if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "xmlParseExternalID: PUBLIC, no URI\n"); } return(URI); } /** * xmlParseComment: * @create: should we create a node * * Skip an XML (SGML) comment * This may or may not create a node (depending on the context) * The spec says that "For compatibility, the string "--" (double-hyphen) * must not occur within comments. " * * [15] Comment ::= '' * * TODO: this should call a SAX function which will handle (or not) the * creation of the comment ! * return values: */ xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) { xmlNodePtr ret = NULL; const CHAR *q, *start; const CHAR *r; CHAR *val; /* * Check that there is a comment right here. */ if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '-') || (NXT(3) != '-')) return(NULL); SKIP(4); start = q = CUR_PTR; NEXT; r = CUR_PTR; NEXT; while (IS_CHAR(CUR) && ((CUR == ':') || (CUR != '>') || (*r != '-') || (*q != '-'))) { if ((*r == '-') && (*q == '-')) if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Comment must not contain '--' (double-hyphen)`\n"); NEXT;r++;q++; } if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Comment not terminated \n