From 2e594265b8a71306c9c832f2e8f4a0b0feea9408 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 11 Feb 2019 12:10:40 +0100 Subject: Validate pattern against mbregex encoding Oniguruma does not consistently perform this validation itself (at least on older versions), so make sure we check pattern encoding validity on the PHP side. --- ext/mbstring/php_mbregex.c | 10 +++++++--- ext/mbstring/tests/bug72994.phpt | 2 +- ext/mbstring/tests/bug77370.phpt | 2 +- ext/mbstring/tests/bug77371.phpt | 2 +- ext/mbstring/tests/bug77381.phpt | 8 ++++---- ext/mbstring/tests/mb_ereg_replace_variation1.phpt | 4 ++-- 6 files changed, 16 insertions(+), 12 deletions(-) (limited to 'ext/mbstring') diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index 148e96dcff..dbe48b2542 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -445,13 +445,18 @@ static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, size_t p OnigErrorInfo err_info; OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; + if (!php_mb_check_encoding(pattern, patlen, _php_mb_regex_mbctype2name(enc))) { + php_error_docref(NULL, E_WARNING, + "Pattern is not valid under %s encoding", _php_mb_regex_mbctype2name(enc)); + return NULL; + } + rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen); if (!rc || onig_get_options(rc) != options || onig_get_encoding(rc) != enc || onig_get_syntax(rc) != syntax) { if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) { onig_error_code_to_str(err_str, err_code, &err_info); php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str); - retval = NULL; - goto out; + return NULL; } if (rc == MBREX(search_re)) { /* reuse the new rc? see bug #72399 */ @@ -461,7 +466,6 @@ static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, size_t p } else { retval = rc; } -out: return retval; } /* }}} */ diff --git a/ext/mbstring/tests/bug72994.phpt b/ext/mbstring/tests/bug72994.phpt index 5cf5b0ecb0..c9d2c9acf2 100644 --- a/ext/mbstring/tests/bug72994.phpt +++ b/ext/mbstring/tests/bug72994.phpt @@ -16,6 +16,6 @@ Notice: Undefined variable: var in %s on line %d Deprecated: mb_ereg_replace(): Non-string patterns will be interpreted as strings in the future. Use an explicit chr() call to preserve the current behavior in %s on line %d -Warning: mb_ereg_replace(): mbregex compile err: invalid code point value in %sbug72994.php on line %d +Warning: mb_ereg_replace(): Pattern is not valid under UTF-8 encoding in %s on line %d bool(false) ===DONE=== diff --git a/ext/mbstring/tests/bug77370.phpt b/ext/mbstring/tests/bug77370.phpt index 73f186bc90..d16fcd724d 100644 --- a/ext/mbstring/tests/bug77370.phpt +++ b/ext/mbstring/tests/bug77370.phpt @@ -7,5 +7,5 @@ Bug #77370 (Buffer overflow on mb regex functions - fetch_token) var_dump(mb_split(" \xfd","")); ?> --EXPECTF-- -Warning: mb_split(): mbregex compile err: invalid code point value in %sbug77370.php on line %d +Warning: mb_split(): Pattern is not valid under UTF-8 encoding in %s on line %d bool(false) diff --git a/ext/mbstring/tests/bug77371.phpt b/ext/mbstring/tests/bug77371.phpt index 2ab04c04f6..25f5ac9aca 100644 --- a/ext/mbstring/tests/bug77371.phpt +++ b/ext/mbstring/tests/bug77371.phpt @@ -7,5 +7,5 @@ Bug #77371 (heap buffer overflow in mb regex functions - compile_string_node) var_dump(mb_ereg("()0\xfc00000\xfc00000\xfc00000\xfc","")); ?> --EXPECTF-- -Warning: mb_ereg(): mbregex compile err: invalid code point value in %sbug77371.php on line %d +Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d bool(false) diff --git a/ext/mbstring/tests/bug77381.phpt b/ext/mbstring/tests/bug77381.phpt index 3d6dd76a4a..9768cd34de 100644 --- a/ext/mbstring/tests/bug77381.phpt +++ b/ext/mbstring/tests/bug77381.phpt @@ -10,14 +10,14 @@ var_dump(mb_ereg("0000\\"."\xf5","0")); var_dump(mb_ereg("(?i)FFF00000000000000000\xfd","")); ?> --EXPECTF-- -Warning: mb_ereg(): mbregex compile err: invalid code point value in %sbug77381.php on line %d +Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d bool(false) -Warning: mb_ereg(): mbregex compile err: invalid code point value in %sbug77381.php on line %d +Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d bool(false) -Warning: mb_ereg(): mbregex compile err: invalid code point value in %sbug77381.php on line %d +Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d bool(false) -Warning: mb_ereg(): mbregex compile err: invalid code point value in %sbug77381.php on line %d +Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d bool(false) diff --git a/ext/mbstring/tests/mb_ereg_replace_variation1.phpt b/ext/mbstring/tests/mb_ereg_replace_variation1.phpt index e1c6bffa67..4c6e177b3e 100644 --- a/ext/mbstring/tests/mb_ereg_replace_variation1.phpt +++ b/ext/mbstring/tests/mb_ereg_replace_variation1.phpt @@ -118,7 +118,7 @@ string(10) "string_val" Deprecated: mb_ereg_replace(): Non-string patterns will be interpreted as strings in the future. Use an explicit chr() call to preserve the current behavior in %s on line %d -Warning: mb_ereg_replace(): mbregex compile err: invalid code point value in %s on line %d +Warning: mb_ereg_replace(): Pattern is not valid under UTF-8 encoding in %s on line %d bool(false) -- Iteration 5 -- @@ -130,7 +130,7 @@ string(10) "string_val" Deprecated: mb_ereg_replace(): Non-string patterns will be interpreted as strings in the future. Use an explicit chr() call to preserve the current behavior in %s on line %d -Warning: mb_ereg_replace(): mbregex compile err: invalid code point value in %s on line %d +Warning: mb_ereg_replace(): Pattern is not valid under UTF-8 encoding in %s on line %d bool(false) -- Iteration 7 -- -- cgit v1.2.1