From f901bec494ae921f36e1066e4380b92888757f0f Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Wed, 3 Mar 2021 19:23:39 +0100 Subject: Fix #51903: simplexml_load_file() doesn't use HTTP headers The `encoding` attribute of the XML declaration is optional; it is good practice to use external encoding information where available if it is missing. Thus, we check for `charset` info of `Content-Type` headers, and see whether the encoding is supported. We cater to trailing parameters and quoted-strings, but not to escaped backslashes and quotes in quoted-strings, since no known character encoding contains these anyway. Co-authored-by: Michael Wallner Closes GH-6747. --- ext/libxml/libxml.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'ext/libxml/libxml.c') diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index c024e16670..e21d6fdbbe 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -409,6 +409,54 @@ php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc) return(NULL); } + /* Check if there's been an external transport protocol with an encoding information */ + if (enc == XML_CHAR_ENCODING_NONE) { + php_stream *s = (php_stream *) context; + + if (Z_TYPE(s->wrapperdata) == IS_ARRAY) { + zval *header; + + ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) { + const char buf[] = "Content-Type:"; + if (Z_TYPE_P(header) == IS_STRING && + !zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) { + char *needle = estrdup("charset="); + char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header)); + char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), sizeof("charset=")-1); + + if (encoding) { + char *end; + + encoding += sizeof("charset=")-1; + if (*encoding == '"') { + encoding++; + } + end = strchr(encoding, ';'); + if (end == NULL) { + end = encoding + strlen(encoding); + } + end--; /* end == encoding-1 isn't a buffer underrun */ + while (*end == ' ' || *end == '\t') { + end--; + } + if (*end == '"') { + end--; + } + if (encoding >= end) continue; + *(end+1) = '\0'; + enc = xmlParseCharEncoding(encoding); + if (enc <= XML_CHAR_ENCODING_NONE) { + enc = XML_CHAR_ENCODING_NONE; + } + } + efree(haystack); + efree(needle); + break; /* found content-type */ + } + } ZEND_HASH_FOREACH_END(); + } + } + /* Allocate the Input buffer front-end. */ ret = xmlAllocParserInputBuffer(enc); if (ret != NULL) { -- cgit v1.2.1