summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--parserInternals.c172
-rw-r--r--result/HTML/758606.html2
-rw-r--r--result/HTML/758606.html.err16
-rw-r--r--result/HTML/758606.html.sax10
-rw-r--r--result/HTML/758606_2.html2
-rw-r--r--result/HTML/758606_2.html.err16
-rw-r--r--result/HTML/758606_2.html.sax17
-rw-r--r--test/HTML/758606.html1
-rw-r--r--test/HTML/758606_2.html1
9 files changed, 154 insertions, 83 deletions
diff --git a/parserInternals.c b/parserInternals.c
index 8c796788..bfc778ac 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -55,6 +55,10 @@
#include <libxml/globals.h>
#include <libxml/chvalid.h>
+#define CUR(ctxt) ctxt->input->cur
+#define END(ctxt) ctxt->input->end
+#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
+
#include "buf.h"
#include "enc.h"
@@ -422,103 +426,105 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
(ctxt->input == NULL))
return;
- if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
- if ((*ctxt->input->cur == 0) &&
- (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
- (ctxt->instate != XML_PARSER_COMMENT)) {
- /*
- * If we are at the end of the current entity and
- * the context allows it, we pop consumed entities
- * automatically.
- * the auto closing should be blocked in other cases
- */
+ if (!(VALID_CTXT(ctxt))) {
+ xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
+ ctxt->errNo = XML_ERR_INTERNAL_ERROR;
+ xmlStopParser(ctxt);
+ return;
+ }
+
+ if ((*ctxt->input->cur == 0) &&
+ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
+ if ((ctxt->instate != XML_PARSER_COMMENT))
xmlPopInput(ctxt);
- } else {
- const unsigned char *cur;
- unsigned char c;
+ return;
+ }
- /*
- * 2.11 End-of-Line Handling
- * the literal two-character sequence "#xD#xA" or a standalone
- * literal #xD, an XML processor must pass to the application
- * the single character #xA.
- */
- if (*(ctxt->input->cur) == '\n') {
- ctxt->input->line++; ctxt->input->col = 1;
- } else
- ctxt->input->col++;
+ if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
+ const unsigned char *cur;
+ unsigned char c;
- /*
- * We are supposed to handle UTF8, check it's valid
- * From rfc2044: encoding of the Unicode values on UTF-8:
- *
- * UCS-4 range (hex.) UTF-8 octet sequence (binary)
- * 0000 0000-0000 007F 0xxxxxxx
- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
- *
- * Check for the 0x110000 limit too
- */
- cur = ctxt->input->cur;
+ /*
+ * 2.11 End-of-Line Handling
+ * the literal two-character sequence "#xD#xA" or a standalone
+ * literal #xD, an XML processor must pass to the application
+ * the single character #xA.
+ */
+ if (*(ctxt->input->cur) == '\n') {
+ ctxt->input->line++; ctxt->input->col = 1;
+ } else
+ ctxt->input->col++;
- c = *cur;
- if (c & 0x80) {
- if (c == 0xC0)
- goto encoding_error;
- if (cur[1] == 0) {
+ /*
+ * We are supposed to handle UTF8, check it's valid
+ * From rfc2044: encoding of the Unicode values on UTF-8:
+ *
+ * UCS-4 range (hex.) UTF-8 octet sequence (binary)
+ * 0000 0000-0000 007F 0xxxxxxx
+ * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ *
+ * Check for the 0x110000 limit too
+ */
+ cur = ctxt->input->cur;
+
+ c = *cur;
+ if (c & 0x80) {
+ if (c == 0xC0)
+ goto encoding_error;
+ if (cur[1] == 0) {
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
+ if ((cur[1] & 0xc0) != 0x80)
+ goto encoding_error;
+ if ((c & 0xe0) == 0xe0) {
+ unsigned int val;
+
+ if (cur[2] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
cur = ctxt->input->cur;
}
- if ((cur[1] & 0xc0) != 0x80)
+ if ((cur[2] & 0xc0) != 0x80)
goto encoding_error;
- if ((c & 0xe0) == 0xe0) {
- unsigned int val;
-
- if (cur[2] == 0) {
+ if ((c & 0xf0) == 0xf0) {
+ if (cur[3] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
cur = ctxt->input->cur;
}
- if ((cur[2] & 0xc0) != 0x80)
+ if (((c & 0xf8) != 0xf0) ||
+ ((cur[3] & 0xc0) != 0x80))
goto encoding_error;
- if ((c & 0xf0) == 0xf0) {
- if (cur[3] == 0) {
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
- cur = ctxt->input->cur;
- }
- if (((c & 0xf8) != 0xf0) ||
- ((cur[3] & 0xc0) != 0x80))
- goto encoding_error;
- /* 4-byte code */
- ctxt->input->cur += 4;
- val = (cur[0] & 0x7) << 18;
- val |= (cur[1] & 0x3f) << 12;
- val |= (cur[2] & 0x3f) << 6;
- val |= cur[3] & 0x3f;
- } else {
- /* 3-byte code */
- ctxt->input->cur += 3;
- val = (cur[0] & 0xf) << 12;
- val |= (cur[1] & 0x3f) << 6;
- val |= cur[2] & 0x3f;
- }
- if (((val > 0xd7ff) && (val < 0xe000)) ||
- ((val > 0xfffd) && (val < 0x10000)) ||
- (val >= 0x110000)) {
- xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
- "Char 0x%X out of allowed range\n",
- val);
- }
- } else
- /* 2-byte code */
- ctxt->input->cur += 2;
+ /* 4-byte code */
+ ctxt->input->cur += 4;
+ val = (cur[0] & 0x7) << 18;
+ val |= (cur[1] & 0x3f) << 12;
+ val |= (cur[2] & 0x3f) << 6;
+ val |= cur[3] & 0x3f;
+ } else {
+ /* 3-byte code */
+ ctxt->input->cur += 3;
+ val = (cur[0] & 0xf) << 12;
+ val |= (cur[1] & 0x3f) << 6;
+ val |= cur[2] & 0x3f;
+ }
+ if (((val > 0xd7ff) && (val < 0xe000)) ||
+ ((val > 0xfffd) && (val < 0x10000)) ||
+ (val >= 0x110000)) {
+ xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
+ "Char 0x%X out of allowed range\n",
+ val);
+ }
} else
- /* 1-byte code */
- ctxt->input->cur++;
+ /* 2-byte code */
+ ctxt->input->cur += 2;
+ } else
+ /* 1-byte code */
+ ctxt->input->cur++;
- ctxt->nbChars++;
- if (*ctxt->input->cur == 0)
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
- }
+ ctxt->nbChars++;
+ if (*ctxt->input->cur == 0)
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
} else {
/*
* Assume it's a fixed length encoding (1) with
diff --git a/result/HTML/758606.html b/result/HTML/758606.html
new file mode 100644
index 00000000..4f21f628
--- /dev/null
+++ b/result/HTML/758606.html
@@ -0,0 +1,2 @@
+<!DOCTYPE >
+
diff --git a/result/HTML/758606.html.err b/result/HTML/758606.html.err
new file mode 100644
index 00000000..060433a8
--- /dev/null
+++ b/result/HTML/758606.html.err
@@ -0,0 +1,16 @@
+./test/HTML/758606.html:1: HTML parser error : Comment not terminated
+<!--
+<!-- <!doctype
+ ^
+./test/HTML/758606.html:1: HTML parser error : Invalid char in CDATA 0xC
+<!-- <!doctype
+ ^
+./test/HTML/758606.html:1: HTML parser error : Misplaced DOCTYPE declaration
+<!-- <!doctype
+ ^
+./test/HTML/758606.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name !
+
+^
+./test/HTML/758606.html:2: HTML parser error : DOCTYPE improperly terminated
+
+^
diff --git a/result/HTML/758606.html.sax b/result/HTML/758606.html.sax
new file mode 100644
index 00000000..d44a5cf4
--- /dev/null
+++ b/result/HTML/758606.html.sax
@@ -0,0 +1,10 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.error: Comment not terminated
+<!--
+SAX.error: Invalid char in CDATA 0xC
+SAX.error: Misplaced DOCTYPE declaration
+SAX.error: htmlParseDocTypeDecl : no DOCTYPE name !
+SAX.error: DOCTYPE improperly terminated
+SAX.internalSubset((null), , )
+SAX.endDocument()
diff --git a/result/HTML/758606_2.html b/result/HTML/758606_2.html
new file mode 100644
index 00000000..273816a0
--- /dev/null
+++ b/result/HTML/758606_2.html
@@ -0,0 +1,2 @@
+<!DOCTYPE >
+<html><body><p>&#145;</p></body></html>
diff --git a/result/HTML/758606_2.html.err b/result/HTML/758606_2.html.err
new file mode 100644
index 00000000..4be039f4
--- /dev/null
+++ b/result/HTML/758606_2.html.err
@@ -0,0 +1,16 @@
+./test/HTML/758606_2.html:1: HTML parser error : Comment not terminated
+<!--
+<!-- ‘<!dOctYPE
+ ^
+./test/HTML/758606_2.html:1: HTML parser error : Invalid char in CDATA 0xC
+<!-- ‘<!dOctYPE
+ ^
+./test/HTML/758606_2.html:1: HTML parser error : Misplaced DOCTYPE declaration
+‘<!dOctYPE
+ ^
+./test/HTML/758606_2.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name !
+
+^
+./test/HTML/758606_2.html:2: HTML parser error : DOCTYPE improperly terminated
+
+^
diff --git a/result/HTML/758606_2.html.sax b/result/HTML/758606_2.html.sax
new file mode 100644
index 00000000..80ff3d77
--- /dev/null
+++ b/result/HTML/758606_2.html.sax
@@ -0,0 +1,17 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.error: Comment not terminated
+<!--
+SAX.error: Invalid char in CDATA 0xC
+SAX.startElement(html)
+SAX.startElement(body)
+SAX.startElement(p)
+SAX.characters(&#145;, 2)
+SAX.error: Misplaced DOCTYPE declaration
+SAX.error: htmlParseDocTypeDecl : no DOCTYPE name !
+SAX.error: DOCTYPE improperly terminated
+SAX.internalSubset((null), , )
+SAX.endElement(p)
+SAX.endElement(body)
+SAX.endElement(html)
+SAX.endDocument()
diff --git a/test/HTML/758606.html b/test/HTML/758606.html
new file mode 100644
index 00000000..01a013c5
--- /dev/null
+++ b/test/HTML/758606.html
@@ -0,0 +1 @@
+<!-- <!doctype
diff --git a/test/HTML/758606_2.html b/test/HTML/758606_2.html
new file mode 100644
index 00000000..daa185b8
--- /dev/null
+++ b/test/HTML/758606_2.html
@@ -0,0 +1 @@
+<!-- ‘<!dOctYPE