summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorChristoph M. Becker <cmbecker69@gmx.de>2020-10-23 11:06:30 +0200
committerChristoph M. Becker <cmbecker69@gmx.de>2020-10-26 13:08:05 +0100
commit6d2bc7253018baa57487f622e706b8962c16d148 (patch)
treea90e90e3628f0b020b50fce0caec36148fcb0abe /ext
parent824cbc27819f3f6dd6de24e182dbb7de6273b2ab (diff)
downloadphp-git-6d2bc7253018baa57487f622e706b8962c16d148.tar.gz
Fix #80268: loadHTML() truncates at NUL bytes
libxml2 has no particular issues parsing HTML strings with NUL bytes; these just cause truncation of the current text content, but parsing continues generally. Since `::loadHTMLFile()` already supports NUL bytes, `::loadHTML()` should as well. Note that this is different from XML, which does not allow any NUL bytes. Closes GH-6368.
Diffstat (limited to 'ext')
-rw-r--r--ext/dom/document.c1
-rw-r--r--ext/dom/tests/bug80268.phpt24
2 files changed, 24 insertions, 1 deletions
diff --git a/ext/dom/document.c b/ext/dom/document.c
index 22bb90d5d8..0e15e7a110 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -2024,7 +2024,6 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
}
ctxt = htmlCreateFileParserCtxt(source, NULL);
} else {
- source_len = xmlStrlen((xmlChar *) source);
if (ZEND_SIZE_T_INT_OVFL(source_len)) {
php_error_docref(NULL, E_WARNING, "Input string is too long");
RETURN_FALSE;
diff --git a/ext/dom/tests/bug80268.phpt b/ext/dom/tests/bug80268.phpt
new file mode 100644
index 0000000000..0fe50b85e8
--- /dev/null
+++ b/ext/dom/tests/bug80268.phpt
@@ -0,0 +1,24 @@
+--TEST--
+Bug #80268 (loadHTML() truncates at NUL bytes)
+--SKIPIF--
+<?php require_once('skipif.inc'); ?>
+--FILE--
+<?php
+$doc = new DOMDocument;
+$doc->loadHTML("<p>foo\0bar</p>");
+$html = $doc->saveHTML();
+var_dump(strpos($html, '<p>foo</p>') !== false);
+
+file_put_contents(__DIR__ . '/80268.html', "<p>foo\0bar</p>");
+$doc = new DOMDocument;
+$doc->loadHTMLFile(__DIR__ . '/80268.html');
+$html = $doc->saveHTML();
+var_dump(strpos($html, '<p>foo</p>') !== false);
+?>
+--CLEAN--
+<?php
+unlink(__DIR__ . '/80268.html');
+?>
+--EXPECT--
+bool(true)
+bool(true)