summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph M. Becker <cmbecker69@gmx.de>2021-03-08 15:16:55 +0100
committerChristoph M. Becker <cmbecker69@gmx.de>2021-03-08 15:16:55 +0100
commit9f826e8ce9a7b51a5b8be48339c9febd749cd946 (patch)
tree26a87c58e35571710c9a0a21680a75c6b9329c6d
parent8d585040353a4f153368fadf76b50291bea35442 (diff)
parent7931956805beba80188f3c0638c285f8fb75dfe1 (diff)
downloadphp-git-9f826e8ce9a7b51a5b8be48339c9febd749cd946.tar.gz
Merge branch 'PHP-8.0'
* PHP-8.0: Fix #51903: simplexml_load_file() doesn't use HTTP headers
-rw-r--r--ext/libxml/libxml.c48
-rw-r--r--ext/libxml/tests/bug51903.phpt38
2 files changed, 86 insertions, 0 deletions
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index 088af30713..0b4d3568f5 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -361,6 +361,54 @@ php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc)
return(NULL);
}
+ /* Check if there's been an external transport protocol with an encoding information */
+ if (enc == XML_CHAR_ENCODING_NONE) {
+ php_stream *s = (php_stream *) context;
+
+ if (Z_TYPE(s->wrapperdata) == IS_ARRAY) {
+ zval *header;
+
+ ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) {
+ const char buf[] = "Content-Type:";
+ if (Z_TYPE_P(header) == IS_STRING &&
+ !zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) {
+ char *needle = estrdup("charset=");
+ char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header));
+ char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), sizeof("charset=")-1);
+
+ if (encoding) {
+ char *end;
+
+ encoding += sizeof("charset=")-1;
+ if (*encoding == '"') {
+ encoding++;
+ }
+ end = strchr(encoding, ';');
+ if (end == NULL) {
+ end = encoding + strlen(encoding);
+ }
+ end--; /* end == encoding-1 isn't a buffer underrun */
+ while (*end == ' ' || *end == '\t') {
+ end--;
+ }
+ if (*end == '"') {
+ end--;
+ }
+ if (encoding >= end) continue;
+ *(end+1) = '\0';
+ enc = xmlParseCharEncoding(encoding);
+ if (enc <= XML_CHAR_ENCODING_NONE) {
+ enc = XML_CHAR_ENCODING_NONE;
+ }
+ }
+ efree(haystack);
+ efree(needle);
+ break; /* found content-type */
+ }
+ } ZEND_HASH_FOREACH_END();
+ }
+ }
+
/* Allocate the Input buffer front-end. */
ret = xmlAllocParserInputBuffer(enc);
if (ret != NULL) {
diff --git a/ext/libxml/tests/bug51903.phpt b/ext/libxml/tests/bug51903.phpt
new file mode 100644
index 0000000000..ebbca2068c
--- /dev/null
+++ b/ext/libxml/tests/bug51903.phpt
@@ -0,0 +1,38 @@
+--TEST--
+Bug #51903 (simplexml_load_file() doesn't use HTTP headers)
+--SKIPIF--
+<?php
+if (!extension_loaded('simplexml')) die('skip simplexml extension not available');
+if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
+http_server_skipif();
+?>
+--FILE--
+<?php
+require "./ext/standard/tests/http/server.inc";
+$responses = [
+ "data://text/plain,HTTP/1.1 200 OK\r\n"
+ . "Content-Type: text/xml; charset=ISO-8859-1\r\n\r\n"
+ . "<?xml version=\"1.0\"?>\n"
+ . "<root>\xE4\xF6\xFC</root>\n",
+ "data://text/plain,HTTP/1.1 200 OK\r\n"
+ . "Content-Type: text/xml; charset=ISO-8859-1; foo=bar\r\n\r\n"
+ . "<?xml version=\"1.0\"?>\n"
+ . "<root>\xE4\xF6\xFC</root>\n",
+ "data://text/plain,HTTP/1.1 200 OK\r\n"
+ . "Content-Type: text/xml; charset=\"ISO-8859-1\" ; foo=bar\r\n\r\n"
+ . "<?xml version=\"1.0\"?>\n"
+ . "<root>\xE4\xF6\xFC</root>\n",
+];
+['pid' => $pid, 'uri' => $uri] = http_server($responses);
+
+for ($i = 0; $i < count($responses); $i++) {
+ $sxe = simplexml_load_file($uri);
+ echo "$sxe\n";
+}
+
+http_server_kill($pid);
+?>
+--EXPECT--
+äöü
+äöü
+äöü