diff options
Diffstat (limited to 'gnulib-local/lib/libxml/parserInternals.c')
-rw-r--r-- | gnulib-local/lib/libxml/parserInternals.c | 378 |
1 files changed, 234 insertions, 144 deletions
diff --git a/gnulib-local/lib/libxml/parserInternals.c b/gnulib-local/lib/libxml/parserInternals.c index f4da58d1d..2b8646c21 100644 --- a/gnulib-local/lib/libxml/parserInternals.c +++ b/gnulib-local/lib/libxml/parserInternals.c @@ -55,6 +55,9 @@ #include <libxml/globals.h> #include <libxml/chvalid.h> +#include "buf.h" +#include "enc.h" + /* * Various global defaults for parsing */ @@ -73,15 +76,15 @@ xmlCheckVersion(int version) { xmlInitParser(); if ((myversion / 10000) != (version / 10000)) { - xmlGenericError(xmlGenericErrorContext, + xmlGenericError(xmlGenericErrorContext, "Fatal: program compiled against libxml %d using libxml %d\n", (version / 10000), (myversion / 10000)); - fprintf(stderr, + fprintf(stderr, "Fatal: program compiled against libxml %d using libxml %d\n", (version / 10000), (myversion / 10000)); } if ((myversion / 100) < (version / 100)) { - xmlGenericError(xmlGenericErrorContext, + xmlGenericError(xmlGenericErrorContext, "Warning: program compiled against libxml %d using older %d\n", (version / 100), (myversion / 100)); } @@ -90,7 +93,7 @@ xmlCheckVersion(int version) { /************************************************************************ * * - * Some factorized error routines * + * Some factorized error routines * * * ************************************************************************/ @@ -225,7 +228,7 @@ xmlIsLetter(int c) { /************************************************************************ * * - * Input handling functions for progressive parsing * + * Input handling functions for progressive parsing * * * ************************************************************************/ @@ -242,7 +245,7 @@ xmlIsLetter(int c) { static void check_buffer(xmlParserInputPtr in) { - if (in->base != in->buf->buffer->content) { + if (in->base != xmlBufContent(in->buf->buffer)) { xmlGenericError(xmlGenericErrorContext, "xmlParserInput: base mismatch problem\n"); } @@ -250,17 +253,17 @@ void check_buffer(xmlParserInputPtr in) { xmlGenericError(xmlGenericErrorContext, "xmlParserInput: cur < base problem\n"); } - if (in->cur > in->base + in->buf->buffer->use) { + if (in->cur > in->base + xmlBufUse(in->buf->buffer)) { xmlGenericError(xmlGenericErrorContext, "xmlParserInput: cur > base + use problem\n"); } - xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n", - (int) in, (int) in->buf->buffer->content, in->cur - in->base, - in->buf->buffer->use, in->buf->buffer->size); + xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n", + (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base, + xmlBufUse(in->buf->buffer)); } #else -#define CHECK_BUFFER(in) +#define CHECK_BUFFER(in) #endif @@ -269,50 +272,13 @@ void check_buffer(xmlParserInputPtr in) { * @in: an XML parser input * @len: an indicative size for the lookahead * - * This function refresh the input for the parser. It doesn't try to - * preserve pointers to the input buffer, and discard already read data + * This function was internal and is deprecated. * - * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the - * end of this entity + * Returns -1 as this is an error to use it. */ int -xmlParserInputRead(xmlParserInputPtr in, int len) { - int ret; - int used; - int indx; - - if (in == NULL) return(-1); -#ifdef DEBUG_INPUT - xmlGenericError(xmlGenericErrorContext, "Read\n"); -#endif - if (in->buf == NULL) return(-1); - if (in->base == NULL) return(-1); - if (in->cur == NULL) return(-1); - if (in->buf->buffer == NULL) return(-1); - if (in->buf->readcallback == NULL) return(-1); - - CHECK_BUFFER(in); - - used = in->cur - in->buf->buffer->content; - ret = xmlBufferShrink(in->buf->buffer, used); - if (ret > 0) { - in->cur -= ret; - in->consumed += ret; - } - ret = xmlParserInputBufferRead(in->buf, len); - if (in->base != in->buf->buffer->content) { - /* - * the buffer has been reallocated - */ - indx = in->cur - in->base; - in->base = in->buf->buffer->content; - in->cur = &in->buf->buffer->content[indx]; - } - in->end = &in->buf->buffer->content[in->buf->buffer->use]; - - CHECK_BUFFER(in); - - return(ret); +xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) { + return(-1); } /** @@ -323,15 +289,16 @@ xmlParserInputRead(xmlParserInputPtr in, int len) { * This function increase the input for the parser. It tries to * preserve pointers to the input buffer, and keep already read data * - * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the + * Returns the amount of char read, or -1 in case of error, 0 indicate the * end of this entity */ int xmlParserInputGrow(xmlParserInputPtr in, int len) { - int ret; - int indx; + size_t ret; + size_t indx; + const xmlChar *content; - if (in == NULL) return(-1); + if ((in == NULL) || (len < 0)) return(-1); #ifdef DEBUG_INPUT xmlGenericError(xmlGenericErrorContext, "Grow\n"); #endif @@ -343,15 +310,15 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) { CHECK_BUFFER(in); indx = in->cur - in->base; - if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) { + if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) { CHECK_BUFFER(in); return(0); } - if (in->buf->readcallback != NULL) + if (in->buf->readcallback != NULL) { ret = xmlParserInputBufferGrow(in->buf, len); - else + } else return(0); /* @@ -360,15 +327,17 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) { * pointer arithmetic. Insure will raise it as a bug but in * that specific case, that's not ! */ - if (in->base != in->buf->buffer->content) { + + content = xmlBufContent(in->buf->buffer); + if (in->base != content) { /* * the buffer has been reallocated */ indx = in->cur - in->base; - in->base = in->buf->buffer->content; - in->cur = &in->buf->buffer->content[indx]; + in->base = content; + in->cur = &content[indx]; } - in->end = &in->buf->buffer->content[in->buf->buffer->use]; + in->end = xmlBufEnd(in->buf->buffer); CHECK_BUFFER(in); @@ -383,9 +352,10 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) { */ void xmlParserInputShrink(xmlParserInputPtr in) { - int used; - int ret; - int indx; + size_t used; + size_t ret; + size_t indx; + const xmlChar *content; #ifdef DEBUG_INPUT xmlGenericError(xmlGenericErrorContext, "Shrink\n"); @@ -398,42 +368,43 @@ xmlParserInputShrink(xmlParserInputPtr in) { CHECK_BUFFER(in); - used = in->cur - in->buf->buffer->content; + used = in->cur - xmlBufContent(in->buf->buffer); /* * Do not shrink on large buffers whose only a tiny fraction * was consumed */ if (used > INPUT_CHUNK) { - ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); + ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN); if (ret > 0) { in->cur -= ret; in->consumed += ret; } - in->end = &in->buf->buffer->content[in->buf->buffer->use]; + in->end = xmlBufEnd(in->buf->buffer); } CHECK_BUFFER(in); - if (in->buf->buffer->use > INPUT_CHUNK) { + if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) { return; } xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); - if (in->base != in->buf->buffer->content) { + content = xmlBufContent(in->buf->buffer); + if (in->base != content) { /* * the buffer has been reallocated */ indx = in->cur - in->base; - in->base = in->buf->buffer->content; - in->cur = &in->buf->buffer->content[indx]; + in->base = content; + in->cur = &content[indx]; } - in->end = &in->buf->buffer->content[in->buf->buffer->use]; + in->end = xmlBufEnd(in->buf->buffer); CHECK_BUFFER(in); } /************************************************************************ * * - * UTF8 character input and related functions * + * UTF8 character input and related functions * * * ************************************************************************/ @@ -484,7 +455,7 @@ xmlNextChar(xmlParserCtxtPtr ctxt) * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ @@ -494,20 +465,26 @@ xmlNextChar(xmlParserCtxtPtr ctxt) if (c & 0x80) { if (c == 0xC0) goto encoding_error; - if (cur[1] == 0) + if (cur[1] == 0) { xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } if ((cur[1] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xe0) == 0xe0) { unsigned int val; - if (cur[2] == 0) + if (cur[2] == 0) { xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } if ((cur[2] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xf0) == 0xf0) { - if (cur[3] == 0) + if (cur[3] == 0) { xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) goto encoding_error; @@ -628,7 +605,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ @@ -638,21 +615,26 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { c = *cur; if (c & 0x80) { - if (c == 0xC0) + if (((c & 0x40) == 0) || (c == 0xC0)) goto encoding_error; - if (cur[1] == 0) + if (cur[1] == 0) { xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } if ((cur[1] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xe0) == 0xe0) { - - if (cur[2] == 0) + if (cur[2] == 0) { xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } if ((cur[2] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xf0) == 0xf0) { - if (cur[3] == 0) + if (cur[3] == 0) { xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) goto encoding_error; @@ -662,27 +644,40 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { val |= (cur[1] & 0x3f) << 12; val |= (cur[2] & 0x3f) << 6; val |= cur[3] & 0x3f; + if (val < 0x10000) + goto encoding_error; } else { /* 3-byte code */ *len = 3; val = (cur[0] & 0xf) << 12; val |= (cur[1] & 0x3f) << 6; val |= cur[2] & 0x3f; + if (val < 0x800) + goto encoding_error; } } else { /* 2-byte code */ *len = 2; val = (cur[0] & 0x1f) << 6; val |= cur[1] & 0x3f; + if (val < 0x80) + goto encoding_error; } if (!IS_CHAR(val)) { xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, "Char 0x%X out of allowed range\n", val); - } + } return(val); } else { /* 1-byte code */ *len = 1; + if (*ctxt->input->cur == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if ((*ctxt->input->cur == 0) && + (ctxt->input->end > ctxt->input->cur)) { + xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, + "Char 0x0 out of allowed range\n", 0); + } if (*ctxt->input->cur == 0xD) { if (ctxt->input->cur[1] == 0xA) { ctxt->nbChars++; @@ -735,7 +730,7 @@ encoding_error: "Input is not proper UTF-8, indicate encoding !\n%s", BAD_CAST buffer, NULL); } - ctxt->charset = XML_CHAR_ENCODING_8859_1; + ctxt->charset = XML_CHAR_ENCODING_8859_1; *len = 1; return((int) *ctxt->input->cur); } @@ -764,7 +759,7 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ @@ -857,7 +852,7 @@ encoding_error: * @out: pointer to an array of xmlChar * @val: the char value * - * append the char value in the array + * append the char value in the array * * Returns the number of xmlChar written */ @@ -871,7 +866,7 @@ xmlCopyCharMultiByte(xmlChar *out, int val) { * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx */ if (val >= 0x80) { xmlChar *savedout = out; @@ -899,7 +894,7 @@ xmlCopyCharMultiByte(xmlChar *out, int val) { * @out: pointer to an array of xmlChar * @val: the char value * - * append the char value in the array + * append the char value in the array * * Returns the number of xmlChar written */ @@ -921,6 +916,12 @@ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { * * ************************************************************************/ +static int +xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, + xmlCharEncodingHandlerPtr handler, int len); +static int +xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, + xmlCharEncodingHandlerPtr handler, int len); /** * xmlSwitchEncoding: * @ctxt: the parser context @@ -935,6 +936,8 @@ int xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) { xmlCharEncodingHandlerPtr handler; + int len = -1; + int ret; if (ctxt == NULL) return(-1); switch (enc) { @@ -978,9 +981,33 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) (ctxt->input->cur[2] == 0xBF)) { ctxt->input->cur += 3; } - break ; - default: - break; + len = 90; + break; + case XML_CHAR_ENCODING_UCS2: + len = 90; + break; + case XML_CHAR_ENCODING_UCS4BE: + case XML_CHAR_ENCODING_UCS4LE: + case XML_CHAR_ENCODING_UCS4_2143: + case XML_CHAR_ENCODING_UCS4_3412: + len = 180; + break; + case XML_CHAR_ENCODING_EBCDIC: + case XML_CHAR_ENCODING_8859_1: + case XML_CHAR_ENCODING_8859_2: + case XML_CHAR_ENCODING_8859_3: + case XML_CHAR_ENCODING_8859_4: + case XML_CHAR_ENCODING_8859_5: + case XML_CHAR_ENCODING_8859_6: + case XML_CHAR_ENCODING_8859_7: + case XML_CHAR_ENCODING_8859_8: + case XML_CHAR_ENCODING_8859_9: + case XML_CHAR_ENCODING_ASCII: + case XML_CHAR_ENCODING_2022_JP: + case XML_CHAR_ENCODING_SHIFT_JIS: + case XML_CHAR_ENCODING_EUC_JP: + len = 45; + break; } handler = xmlGetCharEncodingHandler(enc); if (handler == NULL) { @@ -1071,7 +1098,15 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) if (handler == NULL) return(-1); ctxt->charset = XML_CHAR_ENCODING_UTF8; - return(xmlSwitchToEncoding(ctxt, handler)); + ret = xmlSwitchToEncodingInt(ctxt, handler, len); + if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) { + /* + * on encoding conversion errors, stop the parser + */ + xmlStopParser(ctxt); + ctxt->errNo = XML_I18N_CONV_FAILED; + } + return(ret); } /** @@ -1079,15 +1114,16 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) * @ctxt: the parser context * @input: the input stream * @handler: the encoding handler + * @len: the number of bytes to convert for the first line or -1 * * change the input functions when discovering the character encoding * of a given entity. * * Returns 0 in case of success, -1 otherwise */ -int -xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, - xmlCharEncodingHandlerPtr handler) +static int +xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, + xmlCharEncodingHandlerPtr handler, int len) { int nbchars; @@ -1128,12 +1164,12 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, /* * Is there already some content down the pipe to convert ? */ - if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) { + if (xmlBufIsEmpty(input->buf->buffer) == 0) { int processed; unsigned int use; /* - * Specific handling of the Byte Order Mark for + * Specific handling of the Byte Order Mark for * UTF-16 */ if ((handler->name != NULL) && @@ -1164,19 +1200,17 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, * Move it as the raw buffer and create a new input buffer */ processed = input->cur - input->base; - xmlBufferShrink(input->buf->buffer, processed); + xmlBufShrink(input->buf->buffer, processed); input->buf->raw = input->buf->buffer; - input->buf->buffer = xmlBufferCreate(); + input->buf->buffer = xmlBufCreate(); input->buf->rawconsumed = processed; - use = input->buf->raw->use; + use = xmlBufUse(input->buf->raw); if (ctxt->html) { /* * convert as much as possible of the buffer */ - nbchars = xmlCharEncInFunc(input->buf->encoder, - input->buf->buffer, - input->buf->raw); + nbchars = xmlCharEncInput(input->buf, 1); } else { /* * convert just enough to get @@ -1184,9 +1218,7 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, * parsed with the autodetected encoding * into the parser reading buffer. */ - nbchars = xmlCharEncFirstLine(input->buf->encoder, - input->buf->buffer, - input->buf->raw); + nbchars = xmlCharEncFirstLineInput(input->buf, len); } if (nbchars < 0) { xmlErrInternal(ctxt, @@ -1194,10 +1226,8 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, NULL); return (-1); } - input->buf->rawconsumed += use - input->buf->raw->use; - input->base = input->cur = input->buf->buffer->content; - input->end = &input->base[input->buf->buffer->use]; - + input->buf->rawconsumed += use - xmlBufUse(input->buf->raw); + xmlBufResetInput(input->buf->buffer, input); } return (0); } else if (input->length == 0) { @@ -1212,8 +1242,9 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, } /** - * xmlSwitchToEncoding: + * xmlSwitchInputEncoding: * @ctxt: the parser context + * @input: the input stream * @handler: the encoding handler * * change the input functions when discovering the character encoding @@ -1222,13 +1253,32 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, * Returns 0 in case of success, -1 otherwise */ int -xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) -{ +xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, + xmlCharEncodingHandlerPtr handler) { + return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); +} + +/** + * xmlSwitchToEncodingInt: + * @ctxt: the parser context + * @handler: the encoding handler + * @len: the length to convert or -1 + * + * change the input functions when discovering the character encoding + * of a given entity, and convert only @len bytes of the output, this + * is needed on auto detect to allows any declared encoding later to + * convert the actual content after the xmlDecl + * + * Returns 0 in case of success, -1 otherwise + */ +static int +xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, + xmlCharEncodingHandlerPtr handler, int len) { int ret = 0; if (handler != NULL) { if (ctxt->input != NULL) { - ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler); + ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); } else { xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", NULL); @@ -1238,11 +1288,27 @@ xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) * The parsing is now done in UTF8 natively */ ctxt->charset = XML_CHAR_ENCODING_UTF8; - } else + } else return(-1); return(ret); } +/** + * xmlSwitchToEncoding: + * @ctxt: the parser context + * @handler: the encoding handler + * + * change the input functions when discovering the character encoding + * of a given entity. + * + * Returns 0 in case of success, -1 otherwise + */ +int +xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) +{ + return (xmlSwitchToEncodingInt(ctxt, handler, -1)); +} + /************************************************************************ * * * Commodity functions to handle entities processing * @@ -1265,7 +1331,7 @@ xmlFreeInputStream(xmlParserInputPtr input) { if (input->version != NULL) xmlFree((char *) input->version); if ((input->free != NULL) && (input->base != NULL)) input->free((xmlChar *) input->base); - if (input->buf != NULL) + if (input->buf != NULL) xmlFreeParserInputBuffer(input->buf); xmlFree(input); } @@ -1274,13 +1340,13 @@ xmlFreeInputStream(xmlParserInputPtr input) { * xmlNewInputStream: * @ctxt: an XML parser context * - * Create a new input stream structure + * Create a new input stream structure. + * * Returns the new input stream or NULL */ xmlParserInputPtr xmlNewInputStream(xmlParserCtxtPtr ctxt) { xmlParserInputPtr input; - static int id = 0; input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); if (input == NULL) { @@ -1291,11 +1357,15 @@ xmlNewInputStream(xmlParserCtxtPtr ctxt) { input->line = 1; input->col = 1; input->standalone = -1; + /* - * we don't care about thread reentrancy unicity for a single - * parser context (and hence thread) is sufficient. + * If the context is NULL the id cannot be initialized, but that + * should not happen while parsing which is the situation where + * the id is actually needed. */ - input->id = id++; + if (ctxt != NULL) + input->id = ctxt->input_id++; + return(input); } @@ -1324,9 +1394,8 @@ xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, } inputStream->filename = NULL; inputStream->buf = input; - inputStream->base = inputStream->buf->buffer->content; - inputStream->cur = inputStream->buf->buffer->content; - inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; + xmlBufResetInput(inputStream->buf->buffer, inputStream); + if (enc != XML_CHAR_ENCODING_NONE) { xmlSwitchEncoding(ctxt, enc); } @@ -1387,8 +1456,11 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { if (input == NULL) { return(NULL); } - input->filename = (char *) entity->URI; + if (entity->URI != NULL) + input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); input->base = entity->content; + if (entity->length == 0) + entity->length = xmlStrlen(entity->content); input->cur = entity->content; input->length = entity->length; input->end = &entity->content[input->length]; @@ -1467,7 +1539,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { inputStream = xmlCheckHTTPInput(ctxt, inputStream); if (inputStream == NULL) return(NULL); - + if (inputStream->filename == NULL) URI = xmlStrdup((xmlChar *) filename); else @@ -1478,9 +1550,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { if (URI != NULL) xmlFree((char *) URI); inputStream->directory = directory; - inputStream->base = inputStream->buf->buffer->content; - inputStream->cur = inputStream->buf->buffer->content; - inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; + xmlBufResetInput(inputStream->buf->buffer, inputStream); if ((ctxt->directory == NULL) && (directory != NULL)) ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); return(inputStream); @@ -1519,6 +1589,8 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) xmlErrMemory(NULL, "cannot initialize parser context\n"); return(-1); } + xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT); + if (ctxt->sax == NULL) ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); if (ctxt->sax == NULL) { @@ -1630,12 +1702,20 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) ctxt->nsWellFormed = 1; ctxt->valid = 1; ctxt->loadsubset = xmlLoadExtDtdDefaultValue; + if (ctxt->loadsubset) { + ctxt->options |= XML_PARSE_DTDLOAD; + } ctxt->validate = xmlDoValidityCheckingDefaultValue; ctxt->pedantic = xmlPedanticParserDefaultValue; + if (ctxt->pedantic) { + ctxt->options |= XML_PARSE_PEDANTIC; + } ctxt->linenumbers = xmlLineNumbersDefaultValue; ctxt->keepBlanks = xmlKeepBlanksDefaultValue; - if (ctxt->keepBlanks == 0) + if (ctxt->keepBlanks == 0) { ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; + ctxt->options |= XML_PARSE_NOBLANKS; + } ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; ctxt->vctxt.userData = ctxt; @@ -1647,8 +1727,12 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) else ctxt->vctxt.warning = xmlParserValidityWarning; ctxt->vctxt.nodeMax = 0; + ctxt->options |= XML_PARSE_DTDVALID; } ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; + if (ctxt->replaceEntities) { + ctxt->options |= XML_PARSE_NOENT; + } ctxt->record_info = 0; ctxt->nbChars = 0; ctxt->checkIndex = 0; @@ -1657,6 +1741,10 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) ctxt->depth = 0; ctxt->charset = XML_CHAR_ENCODING_UTF8; ctxt->catalogs = NULL; + ctxt->nbentities = 0; + ctxt->sizeentities = 0; + ctxt->sizeentcopy = 0; + ctxt->input_id = 1; xmlInitNodeInfoSeq(&ctxt->node_seq); return(0); } @@ -1682,6 +1770,7 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); + if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); if (ctxt->version != NULL) xmlFree((char *) ctxt->version); if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); @@ -1701,7 +1790,7 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); - if (ctxt->attsDefault != NULL) + if (ctxt->attsDefault != NULL) xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); if (ctxt->attsSpecial != NULL) xmlHashFree(ctxt->attsSpecial, NULL); @@ -1801,7 +1890,7 @@ xmlClearParserCtxt(xmlParserCtxtPtr ctxt) * @node: an XML node within the tree * * Find the parser node info struct for a given node - * + * * Returns an xmlParserNodeInfo block pointer or NULL */ const xmlParserNodeInfo * @@ -1859,7 +1948,7 @@ xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) * @seq: a node info sequence pointer * @node: an XML node pointer * - * + * * xmlParserFindNodeInfoIndex : Find the index that the info record for * the given node is or should be at in a sorted sequence * @@ -1916,7 +2005,7 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) info->node); - if ((pos < ctxt->node_seq.length) && + if ((pos < ctxt->node_seq.length) && (ctxt->node_seq.buffer != NULL) && (ctxt->node_seq.buffer[pos].node == info->node)) { ctxt->node_seq.buffer[pos] = *info; @@ -1924,7 +2013,8 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, /* Otherwise, we need to add new node to buffer */ else { - if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) { + if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) || + (ctxt->node_seq.buffer == NULL)) { xmlParserNodeInfo *tmp_buffer; unsigned int byte_size; @@ -1969,7 +2059,7 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, ************************************************************************/ /** * xmlPedanticParserDefault: - * @val: int 0 or 1 + * @val: int 0 or 1 * * Set and return the previous value for enabling pedantic warnings. * @@ -1986,7 +2076,7 @@ xmlPedanticParserDefault(int val) { /** * xmlLineNumbersDefault: - * @val: int 0 or 1 + * @val: int 0 or 1 * * Set and return the previous value for enabling line numbers in elements * contents. This may break on old application and is turned off by default. @@ -2004,7 +2094,7 @@ xmlLineNumbersDefault(int val) { /** * xmlSubstituteEntitiesDefault: - * @val: int 0 or 1 + * @val: int 0 or 1 * * Set and return the previous value for default entity support. * Initially the parser always keep entity references instead of substituting @@ -2026,7 +2116,7 @@ xmlSubstituteEntitiesDefault(int val) { /** * xmlKeepBlanksDefault: - * @val: int 0 or 1 + * @val: int 0 or 1 * * Set and return the previous value for default blanks text nodes support. * The 1.x version of the parser used an heuristic to try to detect @@ -2037,7 +2127,7 @@ xmlSubstituteEntitiesDefault(int val) { * ignorableWhitespace() are only generated when running the parser in * validating mode and when the current element doesn't allow CDATA or * mixed content. - * This function is provided as a way to force the standard behavior + * This function is provided as a way to force the standard behavior * on 1.X libs and to switch back to the old mode for compatibility when * running 1.X client code on 2.X . Upgrade of 1.X code should be done * by using xmlIsBlankNode() commodity function to detect the "empty" @@ -2053,7 +2143,7 @@ xmlKeepBlanksDefault(int val) { int old = xmlKeepBlanksDefaultValue; xmlKeepBlanksDefaultValue = val; - xmlIndentTreeOutput = !val; + if (!val) xmlIndentTreeOutput = 1; return(old); } |