summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2019-02-11 12:10:40 +0100
committerStanislav Malyshev <stas@php.net>2019-03-27 23:19:46 -0700
commit40fe50daf6069f73d0be43d8fcd7d5953053c10d (patch)
tree28f1c99fdf52a0acd94f9396b3d5a6cd9cbdd0b1
parent58c25bf679125a2da354db58ddc6b0cf6d10ee00 (diff)
downloadphp-git-40fe50daf6069f73d0be43d8fcd7d5953053c10d.tar.gz
Validate pattern against mbregex encoding
Oniguruma does not consistently perform this validation itself (at least on older versions), so make sure we check pattern encoding validity on the PHP side.
-rw-r--r--ext/mbstring/php_mbregex.c10
-rw-r--r--ext/mbstring/tests/bug72994.phpt4
-rw-r--r--ext/mbstring/tests/bug77370.phpt8
-rw-r--r--ext/mbstring/tests/bug77371.phpt5
-rw-r--r--ext/mbstring/tests/bug77381.phpt11
-rw-r--r--ext/mbstring/tests/mb_ereg_replace_variation1.phpt8
6 files changed, 31 insertions, 15 deletions
diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c
index 5b40661b09..ab4fe38df2 100644
--- a/ext/mbstring/php_mbregex.c
+++ b/ext/mbstring/php_mbregex.c
@@ -451,13 +451,18 @@ static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patl
OnigErrorInfo err_info;
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
+ if (!php_mb_check_encoding(pattern, patlen, _php_mb_regex_mbctype2name(enc))) {
+ php_error_docref(NULL, E_WARNING,
+ "Pattern is not valid under %s encoding", _php_mb_regex_mbctype2name(enc));
+ return NULL;
+ }
+
rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
if (!rc || onig_get_options(rc) != options || onig_get_encoding(rc) != enc || onig_get_syntax(rc) != syntax) {
if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
onig_error_code_to_str(err_str, err_code, &err_info);
php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str);
- retval = NULL;
- goto out;
+ return NULL;
}
if (rc == MBREX(search_re)) {
/* reuse the new rc? see bug #72399 */
@@ -467,7 +472,6 @@ static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patl
} else {
retval = rc;
}
-out:
return retval;
}
/* }}} */
diff --git a/ext/mbstring/tests/bug72994.phpt b/ext/mbstring/tests/bug72994.phpt
index 1d37bae7be..d001fac679 100644
--- a/ext/mbstring/tests/bug72994.phpt
+++ b/ext/mbstring/tests/bug72994.phpt
@@ -13,5 +13,7 @@ var_dump($var1);
===DONE===
--EXPECTF--
Notice: Undefined variable: var in %s on line %d
-string(0) ""
+
+Warning: mbereg_replace(): Pattern is not valid under UTF-8 encoding in %s on line %d
+bool(false)
===DONE===
diff --git a/ext/mbstring/tests/bug77370.phpt b/ext/mbstring/tests/bug77370.phpt
index c4d25582fe..d16fcd724d 100644
--- a/ext/mbstring/tests/bug77370.phpt
+++ b/ext/mbstring/tests/bug77370.phpt
@@ -6,8 +6,6 @@ Bug #77370 (Buffer overflow on mb regex functions - fetch_token)
<?php
var_dump(mb_split(" \xfd",""));
?>
---EXPECT--
-array(1) {
- [0]=>
- string(0) ""
-}
+--EXPECTF--
+Warning: mb_split(): Pattern is not valid under UTF-8 encoding in %s on line %d
+bool(false)
diff --git a/ext/mbstring/tests/bug77371.phpt b/ext/mbstring/tests/bug77371.phpt
index 33e5fc115c..25f5ac9aca 100644
--- a/ext/mbstring/tests/bug77371.phpt
+++ b/ext/mbstring/tests/bug77371.phpt
@@ -6,5 +6,6 @@ Bug #77371 (heap buffer overflow in mb regex functions - compile_string_node)
<?php
var_dump(mb_ereg("()0\xfc00000\xfc00000\xfc00000\xfc",""));
?>
---EXPECT--
-bool(false) \ No newline at end of file
+--EXPECTF--
+Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d
+bool(false)
diff --git a/ext/mbstring/tests/bug77381.phpt b/ext/mbstring/tests/bug77381.phpt
index cb83759fc0..9768cd34de 100644
--- a/ext/mbstring/tests/bug77381.phpt
+++ b/ext/mbstring/tests/bug77381.phpt
@@ -9,8 +9,15 @@ var_dump(mb_ereg("(?i)000000000000000000000\xf0",""));
var_dump(mb_ereg("0000\\"."\xf5","0"));
var_dump(mb_ereg("(?i)FFF00000000000000000\xfd",""));
?>
---EXPECT--
-int(1)
+--EXPECTF--
+Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d
bool(false)
+
+Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d
bool(false)
+
+Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d
+bool(false)
+
+Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d
bool(false)
diff --git a/ext/mbstring/tests/mb_ereg_replace_variation1.phpt b/ext/mbstring/tests/mb_ereg_replace_variation1.phpt
index 059d2c9dd8..626e9a20e6 100644
--- a/ext/mbstring/tests/mb_ereg_replace_variation1.phpt
+++ b/ext/mbstring/tests/mb_ereg_replace_variation1.phpt
@@ -109,13 +109,17 @@ string(10) "string_val"
string(10) "string_val"
-- Iteration 4 --
-string(10) "string_val"
+
+Warning: mb_ereg_replace(): Pattern is not valid under UTF-8 encoding in %s on line %d
+bool(false)
-- Iteration 5 --
string(10) "string_val"
-- Iteration 6 --
-string(10) "string_val"
+
+Warning: mb_ereg_replace(): Pattern is not valid under UTF-8 encoding in %s on line %d
+bool(false)
-- Iteration 7 --
string(10) "string_val"