summaryrefslogtreecommitdiff
path: root/gnulib-local/lib/libxml/parserInternals.c
diff options
context:
space:
mode:
Diffstat (limited to 'gnulib-local/lib/libxml/parserInternals.c')
-rw-r--r--gnulib-local/lib/libxml/parserInternals.c378
1 files changed, 234 insertions, 144 deletions
diff --git a/gnulib-local/lib/libxml/parserInternals.c b/gnulib-local/lib/libxml/parserInternals.c
index f4da58d1d..2b8646c21 100644
--- a/gnulib-local/lib/libxml/parserInternals.c
+++ b/gnulib-local/lib/libxml/parserInternals.c
@@ -55,6 +55,9 @@
#include <libxml/globals.h>
#include <libxml/chvalid.h>
+#include "buf.h"
+#include "enc.h"
+
/*
* Various global defaults for parsing
*/
@@ -73,15 +76,15 @@ xmlCheckVersion(int version) {
xmlInitParser();
if ((myversion / 10000) != (version / 10000)) {
- xmlGenericError(xmlGenericErrorContext,
+ xmlGenericError(xmlGenericErrorContext,
"Fatal: program compiled against libxml %d using libxml %d\n",
(version / 10000), (myversion / 10000));
- fprintf(stderr,
+ fprintf(stderr,
"Fatal: program compiled against libxml %d using libxml %d\n",
(version / 10000), (myversion / 10000));
}
if ((myversion / 100) < (version / 100)) {
- xmlGenericError(xmlGenericErrorContext,
+ xmlGenericError(xmlGenericErrorContext,
"Warning: program compiled against libxml %d using older %d\n",
(version / 100), (myversion / 100));
}
@@ -90,7 +93,7 @@ xmlCheckVersion(int version) {
/************************************************************************
* *
- * Some factorized error routines *
+ * Some factorized error routines *
* *
************************************************************************/
@@ -225,7 +228,7 @@ xmlIsLetter(int c) {
/************************************************************************
* *
- * Input handling functions for progressive parsing *
+ * Input handling functions for progressive parsing *
* *
************************************************************************/
@@ -242,7 +245,7 @@ xmlIsLetter(int c) {
static
void check_buffer(xmlParserInputPtr in) {
- if (in->base != in->buf->buffer->content) {
+ if (in->base != xmlBufContent(in->buf->buffer)) {
xmlGenericError(xmlGenericErrorContext,
"xmlParserInput: base mismatch problem\n");
}
@@ -250,17 +253,17 @@ void check_buffer(xmlParserInputPtr in) {
xmlGenericError(xmlGenericErrorContext,
"xmlParserInput: cur < base problem\n");
}
- if (in->cur > in->base + in->buf->buffer->use) {
+ if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
xmlGenericError(xmlGenericErrorContext,
"xmlParserInput: cur > base + use problem\n");
}
- xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
- (int) in, (int) in->buf->buffer->content, in->cur - in->base,
- in->buf->buffer->use, in->buf->buffer->size);
+ xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
+ (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
+ xmlBufUse(in->buf->buffer));
}
#else
-#define CHECK_BUFFER(in)
+#define CHECK_BUFFER(in)
#endif
@@ -269,50 +272,13 @@ void check_buffer(xmlParserInputPtr in) {
* @in: an XML parser input
* @len: an indicative size for the lookahead
*
- * This function refresh the input for the parser. It doesn't try to
- * preserve pointers to the input buffer, and discard already read data
+ * This function was internal and is deprecated.
*
- * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
- * end of this entity
+ * Returns -1 as this is an error to use it.
*/
int
-xmlParserInputRead(xmlParserInputPtr in, int len) {
- int ret;
- int used;
- int indx;
-
- if (in == NULL) return(-1);
-#ifdef DEBUG_INPUT
- xmlGenericError(xmlGenericErrorContext, "Read\n");
-#endif
- if (in->buf == NULL) return(-1);
- if (in->base == NULL) return(-1);
- if (in->cur == NULL) return(-1);
- if (in->buf->buffer == NULL) return(-1);
- if (in->buf->readcallback == NULL) return(-1);
-
- CHECK_BUFFER(in);
-
- used = in->cur - in->buf->buffer->content;
- ret = xmlBufferShrink(in->buf->buffer, used);
- if (ret > 0) {
- in->cur -= ret;
- in->consumed += ret;
- }
- ret = xmlParserInputBufferRead(in->buf, len);
- if (in->base != in->buf->buffer->content) {
- /*
- * the buffer has been reallocated
- */
- indx = in->cur - in->base;
- in->base = in->buf->buffer->content;
- in->cur = &in->buf->buffer->content[indx];
- }
- in->end = &in->buf->buffer->content[in->buf->buffer->use];
-
- CHECK_BUFFER(in);
-
- return(ret);
+xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
+ return(-1);
}
/**
@@ -323,15 +289,16 @@ xmlParserInputRead(xmlParserInputPtr in, int len) {
* This function increase the input for the parser. It tries to
* preserve pointers to the input buffer, and keep already read data
*
- * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
+ * Returns the amount of char read, or -1 in case of error, 0 indicate the
* end of this entity
*/
int
xmlParserInputGrow(xmlParserInputPtr in, int len) {
- int ret;
- int indx;
+ size_t ret;
+ size_t indx;
+ const xmlChar *content;
- if (in == NULL) return(-1);
+ if ((in == NULL) || (len < 0)) return(-1);
#ifdef DEBUG_INPUT
xmlGenericError(xmlGenericErrorContext, "Grow\n");
#endif
@@ -343,15 +310,15 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
CHECK_BUFFER(in);
indx = in->cur - in->base;
- if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
+ if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
CHECK_BUFFER(in);
return(0);
}
- if (in->buf->readcallback != NULL)
+ if (in->buf->readcallback != NULL) {
ret = xmlParserInputBufferGrow(in->buf, len);
- else
+ } else
return(0);
/*
@@ -360,15 +327,17 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
* pointer arithmetic. Insure will raise it as a bug but in
* that specific case, that's not !
*/
- if (in->base != in->buf->buffer->content) {
+
+ content = xmlBufContent(in->buf->buffer);
+ if (in->base != content) {
/*
* the buffer has been reallocated
*/
indx = in->cur - in->base;
- in->base = in->buf->buffer->content;
- in->cur = &in->buf->buffer->content[indx];
+ in->base = content;
+ in->cur = &content[indx];
}
- in->end = &in->buf->buffer->content[in->buf->buffer->use];
+ in->end = xmlBufEnd(in->buf->buffer);
CHECK_BUFFER(in);
@@ -383,9 +352,10 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
*/
void
xmlParserInputShrink(xmlParserInputPtr in) {
- int used;
- int ret;
- int indx;
+ size_t used;
+ size_t ret;
+ size_t indx;
+ const xmlChar *content;
#ifdef DEBUG_INPUT
xmlGenericError(xmlGenericErrorContext, "Shrink\n");
@@ -398,42 +368,43 @@ xmlParserInputShrink(xmlParserInputPtr in) {
CHECK_BUFFER(in);
- used = in->cur - in->buf->buffer->content;
+ used = in->cur - xmlBufContent(in->buf->buffer);
/*
* Do not shrink on large buffers whose only a tiny fraction
* was consumed
*/
if (used > INPUT_CHUNK) {
- ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
+ ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
if (ret > 0) {
in->cur -= ret;
in->consumed += ret;
}
- in->end = &in->buf->buffer->content[in->buf->buffer->use];
+ in->end = xmlBufEnd(in->buf->buffer);
}
CHECK_BUFFER(in);
- if (in->buf->buffer->use > INPUT_CHUNK) {
+ if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
return;
}
xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
- if (in->base != in->buf->buffer->content) {
+ content = xmlBufContent(in->buf->buffer);
+ if (in->base != content) {
/*
* the buffer has been reallocated
*/
indx = in->cur - in->base;
- in->base = in->buf->buffer->content;
- in->cur = &in->buf->buffer->content[indx];
+ in->base = content;
+ in->cur = &content[indx];
}
- in->end = &in->buf->buffer->content[in->buf->buffer->use];
+ in->end = xmlBufEnd(in->buf->buffer);
CHECK_BUFFER(in);
}
/************************************************************************
* *
- * UTF8 character input and related functions *
+ * UTF8 character input and related functions *
* *
************************************************************************/
@@ -484,7 +455,7 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 0000 0000-0000 007F 0xxxxxxx
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
*
* Check for the 0x110000 limit too
*/
@@ -494,20 +465,26 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
if (c & 0x80) {
if (c == 0xC0)
goto encoding_error;
- if (cur[1] == 0)
+ if (cur[1] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if ((cur[1] & 0xc0) != 0x80)
goto encoding_error;
if ((c & 0xe0) == 0xe0) {
unsigned int val;
- if (cur[2] == 0)
+ if (cur[2] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if ((cur[2] & 0xc0) != 0x80)
goto encoding_error;
if ((c & 0xf0) == 0xf0) {
- if (cur[3] == 0)
+ if (cur[3] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if (((c & 0xf8) != 0xf0) ||
((cur[3] & 0xc0) != 0x80))
goto encoding_error;
@@ -628,7 +605,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 0000 0000-0000 007F 0xxxxxxx
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
*
* Check for the 0x110000 limit too
*/
@@ -638,21 +615,26 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
c = *cur;
if (c & 0x80) {
- if (c == 0xC0)
+ if (((c & 0x40) == 0) || (c == 0xC0))
goto encoding_error;
- if (cur[1] == 0)
+ if (cur[1] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if ((cur[1] & 0xc0) != 0x80)
goto encoding_error;
if ((c & 0xe0) == 0xe0) {
-
- if (cur[2] == 0)
+ if (cur[2] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if ((cur[2] & 0xc0) != 0x80)
goto encoding_error;
if ((c & 0xf0) == 0xf0) {
- if (cur[3] == 0)
+ if (cur[3] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if (((c & 0xf8) != 0xf0) ||
((cur[3] & 0xc0) != 0x80))
goto encoding_error;
@@ -662,27 +644,40 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
val |= (cur[1] & 0x3f) << 12;
val |= (cur[2] & 0x3f) << 6;
val |= cur[3] & 0x3f;
+ if (val < 0x10000)
+ goto encoding_error;
} else {
/* 3-byte code */
*len = 3;
val = (cur[0] & 0xf) << 12;
val |= (cur[1] & 0x3f) << 6;
val |= cur[2] & 0x3f;
+ if (val < 0x800)
+ goto encoding_error;
}
} else {
/* 2-byte code */
*len = 2;
val = (cur[0] & 0x1f) << 6;
val |= cur[1] & 0x3f;
+ if (val < 0x80)
+ goto encoding_error;
}
if (!IS_CHAR(val)) {
xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
"Char 0x%X out of allowed range\n", val);
- }
+ }
return(val);
} else {
/* 1-byte code */
*len = 1;
+ if (*ctxt->input->cur == 0)
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ if ((*ctxt->input->cur == 0) &&
+ (ctxt->input->end > ctxt->input->cur)) {
+ xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
+ "Char 0x0 out of allowed range\n", 0);
+ }
if (*ctxt->input->cur == 0xD) {
if (ctxt->input->cur[1] == 0xA) {
ctxt->nbChars++;
@@ -735,7 +730,7 @@ encoding_error:
"Input is not proper UTF-8, indicate encoding !\n%s",
BAD_CAST buffer, NULL);
}
- ctxt->charset = XML_CHAR_ENCODING_8859_1;
+ ctxt->charset = XML_CHAR_ENCODING_8859_1;
*len = 1;
return((int) *ctxt->input->cur);
}
@@ -764,7 +759,7 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 0000 0000-0000 007F 0xxxxxxx
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
*
* Check for the 0x110000 limit too
*/
@@ -857,7 +852,7 @@ encoding_error:
* @out: pointer to an array of xmlChar
* @val: the char value
*
- * append the char value in the array
+ * append the char value in the array
*
* Returns the number of xmlChar written
*/
@@ -871,7 +866,7 @@ xmlCopyCharMultiByte(xmlChar *out, int val) {
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 0000 0000-0000 007F 0xxxxxxx
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
*/
if (val >= 0x80) {
xmlChar *savedout = out;
@@ -899,7 +894,7 @@ xmlCopyCharMultiByte(xmlChar *out, int val) {
* @out: pointer to an array of xmlChar
* @val: the char value
*
- * append the char value in the array
+ * append the char value in the array
*
* Returns the number of xmlChar written
*/
@@ -921,6 +916,12 @@ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
* *
************************************************************************/
+static int
+xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
+ xmlCharEncodingHandlerPtr handler, int len);
+static int
+xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+ xmlCharEncodingHandlerPtr handler, int len);
/**
* xmlSwitchEncoding:
* @ctxt: the parser context
@@ -935,6 +936,8 @@ int
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
{
xmlCharEncodingHandlerPtr handler;
+ int len = -1;
+ int ret;
if (ctxt == NULL) return(-1);
switch (enc) {
@@ -978,9 +981,33 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
(ctxt->input->cur[2] == 0xBF)) {
ctxt->input->cur += 3;
}
- break ;
- default:
- break;
+ len = 90;
+ break;
+ case XML_CHAR_ENCODING_UCS2:
+ len = 90;
+ break;
+ case XML_CHAR_ENCODING_UCS4BE:
+ case XML_CHAR_ENCODING_UCS4LE:
+ case XML_CHAR_ENCODING_UCS4_2143:
+ case XML_CHAR_ENCODING_UCS4_3412:
+ len = 180;
+ break;
+ case XML_CHAR_ENCODING_EBCDIC:
+ case XML_CHAR_ENCODING_8859_1:
+ case XML_CHAR_ENCODING_8859_2:
+ case XML_CHAR_ENCODING_8859_3:
+ case XML_CHAR_ENCODING_8859_4:
+ case XML_CHAR_ENCODING_8859_5:
+ case XML_CHAR_ENCODING_8859_6:
+ case XML_CHAR_ENCODING_8859_7:
+ case XML_CHAR_ENCODING_8859_8:
+ case XML_CHAR_ENCODING_8859_9:
+ case XML_CHAR_ENCODING_ASCII:
+ case XML_CHAR_ENCODING_2022_JP:
+ case XML_CHAR_ENCODING_SHIFT_JIS:
+ case XML_CHAR_ENCODING_EUC_JP:
+ len = 45;
+ break;
}
handler = xmlGetCharEncodingHandler(enc);
if (handler == NULL) {
@@ -1071,7 +1098,15 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
if (handler == NULL)
return(-1);
ctxt->charset = XML_CHAR_ENCODING_UTF8;
- return(xmlSwitchToEncoding(ctxt, handler));
+ ret = xmlSwitchToEncodingInt(ctxt, handler, len);
+ if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
+ /*
+ * on encoding conversion errors, stop the parser
+ */
+ xmlStopParser(ctxt);
+ ctxt->errNo = XML_I18N_CONV_FAILED;
+ }
+ return(ret);
}
/**
@@ -1079,15 +1114,16 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
* @ctxt: the parser context
* @input: the input stream
* @handler: the encoding handler
+ * @len: the number of bytes to convert for the first line or -1
*
* change the input functions when discovering the character encoding
* of a given entity.
*
* Returns 0 in case of success, -1 otherwise
*/
-int
-xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
- xmlCharEncodingHandlerPtr handler)
+static int
+xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+ xmlCharEncodingHandlerPtr handler, int len)
{
int nbchars;
@@ -1128,12 +1164,12 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
/*
* Is there already some content down the pipe to convert ?
*/
- if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {
+ if (xmlBufIsEmpty(input->buf->buffer) == 0) {
int processed;
unsigned int use;
/*
- * Specific handling of the Byte Order Mark for
+ * Specific handling of the Byte Order Mark for
* UTF-16
*/
if ((handler->name != NULL) &&
@@ -1164,19 +1200,17 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
* Move it as the raw buffer and create a new input buffer
*/
processed = input->cur - input->base;
- xmlBufferShrink(input->buf->buffer, processed);
+ xmlBufShrink(input->buf->buffer, processed);
input->buf->raw = input->buf->buffer;
- input->buf->buffer = xmlBufferCreate();
+ input->buf->buffer = xmlBufCreate();
input->buf->rawconsumed = processed;
- use = input->buf->raw->use;
+ use = xmlBufUse(input->buf->raw);
if (ctxt->html) {
/*
* convert as much as possible of the buffer
*/
- nbchars = xmlCharEncInFunc(input->buf->encoder,
- input->buf->buffer,
- input->buf->raw);
+ nbchars = xmlCharEncInput(input->buf, 1);
} else {
/*
* convert just enough to get
@@ -1184,9 +1218,7 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
* parsed with the autodetected encoding
* into the parser reading buffer.
*/
- nbchars = xmlCharEncFirstLine(input->buf->encoder,
- input->buf->buffer,
- input->buf->raw);
+ nbchars = xmlCharEncFirstLineInput(input->buf, len);
}
if (nbchars < 0) {
xmlErrInternal(ctxt,
@@ -1194,10 +1226,8 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
NULL);
return (-1);
}
- input->buf->rawconsumed += use - input->buf->raw->use;
- input->base = input->cur = input->buf->buffer->content;
- input->end = &input->base[input->buf->buffer->use];
-
+ input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
+ xmlBufResetInput(input->buf->buffer, input);
}
return (0);
} else if (input->length == 0) {
@@ -1212,8 +1242,9 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
}
/**
- * xmlSwitchToEncoding:
+ * xmlSwitchInputEncoding:
* @ctxt: the parser context
+ * @input: the input stream
* @handler: the encoding handler
*
* change the input functions when discovering the character encoding
@@ -1222,13 +1253,32 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
* Returns 0 in case of success, -1 otherwise
*/
int
-xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
-{
+xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+ xmlCharEncodingHandlerPtr handler) {
+ return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
+}
+
+/**
+ * xmlSwitchToEncodingInt:
+ * @ctxt: the parser context
+ * @handler: the encoding handler
+ * @len: the length to convert or -1
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity, and convert only @len bytes of the output, this
+ * is needed on auto detect to allows any declared encoding later to
+ * convert the actual content after the xmlDecl
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+static int
+xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
+ xmlCharEncodingHandlerPtr handler, int len) {
int ret = 0;
if (handler != NULL) {
if (ctxt->input != NULL) {
- ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
+ ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
} else {
xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
NULL);
@@ -1238,11 +1288,27 @@ xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
* The parsing is now done in UTF8 natively
*/
ctxt->charset = XML_CHAR_ENCODING_UTF8;
- } else
+ } else
return(-1);
return(ret);
}
+/**
+ * xmlSwitchToEncoding:
+ * @ctxt: the parser context
+ * @handler: the encoding handler
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity.
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+int
+xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
+{
+ return (xmlSwitchToEncodingInt(ctxt, handler, -1));
+}
+
/************************************************************************
* *
* Commodity functions to handle entities processing *
@@ -1265,7 +1331,7 @@ xmlFreeInputStream(xmlParserInputPtr input) {
if (input->version != NULL) xmlFree((char *) input->version);
if ((input->free != NULL) && (input->base != NULL))
input->free((xmlChar *) input->base);
- if (input->buf != NULL)
+ if (input->buf != NULL)
xmlFreeParserInputBuffer(input->buf);
xmlFree(input);
}
@@ -1274,13 +1340,13 @@ xmlFreeInputStream(xmlParserInputPtr input) {
* xmlNewInputStream:
* @ctxt: an XML parser context
*
- * Create a new input stream structure
+ * Create a new input stream structure.
+ *
* Returns the new input stream or NULL
*/
xmlParserInputPtr
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
xmlParserInputPtr input;
- static int id = 0;
input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
if (input == NULL) {
@@ -1291,11 +1357,15 @@ xmlNewInputStream(xmlParserCtxtPtr ctxt) {
input->line = 1;
input->col = 1;
input->standalone = -1;
+
/*
- * we don't care about thread reentrancy unicity for a single
- * parser context (and hence thread) is sufficient.
+ * If the context is NULL the id cannot be initialized, but that
+ * should not happen while parsing which is the situation where
+ * the id is actually needed.
*/
- input->id = id++;
+ if (ctxt != NULL)
+ input->id = ctxt->input_id++;
+
return(input);
}
@@ -1324,9 +1394,8 @@ xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
}
inputStream->filename = NULL;
inputStream->buf = input;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
+ xmlBufResetInput(inputStream->buf->buffer, inputStream);
+
if (enc != XML_CHAR_ENCODING_NONE) {
xmlSwitchEncoding(ctxt, enc);
}
@@ -1387,8 +1456,11 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
if (input == NULL) {
return(NULL);
}
- input->filename = (char *) entity->URI;
+ if (entity->URI != NULL)
+ input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
input->base = entity->content;
+ if (entity->length == 0)
+ entity->length = xmlStrlen(entity->content);
input->cur = entity->content;
input->length = entity->length;
input->end = &entity->content[input->length];
@@ -1467,7 +1539,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
inputStream = xmlCheckHTTPInput(ctxt, inputStream);
if (inputStream == NULL)
return(NULL);
-
+
if (inputStream->filename == NULL)
URI = xmlStrdup((xmlChar *) filename);
else
@@ -1478,9 +1550,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
if (URI != NULL) xmlFree((char *) URI);
inputStream->directory = directory;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
+ xmlBufResetInput(inputStream->buf->buffer, inputStream);
if ((ctxt->directory == NULL) && (directory != NULL))
ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
return(inputStream);
@@ -1519,6 +1589,8 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
xmlErrMemory(NULL, "cannot initialize parser context\n");
return(-1);
}
+ xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
+
if (ctxt->sax == NULL)
ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
if (ctxt->sax == NULL) {
@@ -1630,12 +1702,20 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
ctxt->nsWellFormed = 1;
ctxt->valid = 1;
ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
+ if (ctxt->loadsubset) {
+ ctxt->options |= XML_PARSE_DTDLOAD;
+ }
ctxt->validate = xmlDoValidityCheckingDefaultValue;
ctxt->pedantic = xmlPedanticParserDefaultValue;
+ if (ctxt->pedantic) {
+ ctxt->options |= XML_PARSE_PEDANTIC;
+ }
ctxt->linenumbers = xmlLineNumbersDefaultValue;
ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
- if (ctxt->keepBlanks == 0)
+ if (ctxt->keepBlanks == 0) {
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
+ ctxt->options |= XML_PARSE_NOBLANKS;
+ }
ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
ctxt->vctxt.userData = ctxt;
@@ -1647,8 +1727,12 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
else
ctxt->vctxt.warning = xmlParserValidityWarning;
ctxt->vctxt.nodeMax = 0;
+ ctxt->options |= XML_PARSE_DTDVALID;
}
ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
+ if (ctxt->replaceEntities) {
+ ctxt->options |= XML_PARSE_NOENT;
+ }
ctxt->record_info = 0;
ctxt->nbChars = 0;
ctxt->checkIndex = 0;
@@ -1657,6 +1741,10 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
ctxt->depth = 0;
ctxt->charset = XML_CHAR_ENCODING_UTF8;
ctxt->catalogs = NULL;
+ ctxt->nbentities = 0;
+ ctxt->sizeentities = 0;
+ ctxt->sizeentcopy = 0;
+ ctxt->input_id = 1;
xmlInitNodeInfoSeq(&ctxt->node_seq);
return(0);
}
@@ -1682,6 +1770,7 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
+ if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
@@ -1701,7 +1790,7 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
- if (ctxt->attsDefault != NULL)
+ if (ctxt->attsDefault != NULL)
xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
if (ctxt->attsSpecial != NULL)
xmlHashFree(ctxt->attsSpecial, NULL);
@@ -1801,7 +1890,7 @@ xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
* @node: an XML node within the tree
*
* Find the parser node info struct for a given node
- *
+ *
* Returns an xmlParserNodeInfo block pointer or NULL
*/
const xmlParserNodeInfo *
@@ -1859,7 +1948,7 @@ xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
* @seq: a node info sequence pointer
* @node: an XML node pointer
*
- *
+ *
* xmlParserFindNodeInfoIndex : Find the index that the info record for
* the given node is or should be at in a sorted sequence
*
@@ -1916,7 +2005,7 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
info->node);
- if ((pos < ctxt->node_seq.length) &&
+ if ((pos < ctxt->node_seq.length) &&
(ctxt->node_seq.buffer != NULL) &&
(ctxt->node_seq.buffer[pos].node == info->node)) {
ctxt->node_seq.buffer[pos] = *info;
@@ -1924,7 +2013,8 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
/* Otherwise, we need to add new node to buffer */
else {
- if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
+ if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
+ (ctxt->node_seq.buffer == NULL)) {
xmlParserNodeInfo *tmp_buffer;
unsigned int byte_size;
@@ -1969,7 +2059,7 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
************************************************************************/
/**
* xmlPedanticParserDefault:
- * @val: int 0 or 1
+ * @val: int 0 or 1
*
* Set and return the previous value for enabling pedantic warnings.
*
@@ -1986,7 +2076,7 @@ xmlPedanticParserDefault(int val) {
/**
* xmlLineNumbersDefault:
- * @val: int 0 or 1
+ * @val: int 0 or 1
*
* Set and return the previous value for enabling line numbers in elements
* contents. This may break on old application and is turned off by default.
@@ -2004,7 +2094,7 @@ xmlLineNumbersDefault(int val) {
/**
* xmlSubstituteEntitiesDefault:
- * @val: int 0 or 1
+ * @val: int 0 or 1
*
* Set and return the previous value for default entity support.
* Initially the parser always keep entity references instead of substituting
@@ -2026,7 +2116,7 @@ xmlSubstituteEntitiesDefault(int val) {
/**
* xmlKeepBlanksDefault:
- * @val: int 0 or 1
+ * @val: int 0 or 1
*
* Set and return the previous value for default blanks text nodes support.
* The 1.x version of the parser used an heuristic to try to detect
@@ -2037,7 +2127,7 @@ xmlSubstituteEntitiesDefault(int val) {
* ignorableWhitespace() are only generated when running the parser in
* validating mode and when the current element doesn't allow CDATA or
* mixed content.
- * This function is provided as a way to force the standard behavior
+ * This function is provided as a way to force the standard behavior
* on 1.X libs and to switch back to the old mode for compatibility when
* running 1.X client code on 2.X . Upgrade of 1.X code should be done
* by using xmlIsBlankNode() commodity function to detect the "empty"
@@ -2053,7 +2143,7 @@ xmlKeepBlanksDefault(int val) {
int old = xmlKeepBlanksDefaultValue;
xmlKeepBlanksDefaultValue = val;
- xmlIndentTreeOutput = !val;
+ if (!val) xmlIndentTreeOutput = 1;
return(old);
}