diff options
author | Nikita Popov <nikita.ppv@gmail.com> | 2018-05-25 11:33:13 +0200 |
---|---|---|
committer | Nikita Popov <nikita.ppv@gmail.com> | 2018-05-25 11:33:13 +0200 |
commit | 9d63f4dec1d180f2a9533d8b7b6b5c445917aee2 (patch) | |
tree | 1a6e21bd4b3f0d2e3a252eb24b6abaf14cb24b08 /ext | |
parent | 3382424903e911fd7e3e193f1e5e141aab9ca666 (diff) | |
download | php-git-9d63f4dec1d180f2a9533d8b7b6b5c445917aee2.tar.gz |
Fixed bug #76319
While at it, also make sure that mbstring case conversion takes
into account the specified substitution character and substitution
mode.
Diffstat (limited to 'ext')
-rw-r--r-- | ext/mbstring/mbstring.c | 18 | ||||
-rw-r--r-- | ext/mbstring/php_unicode.c | 15 | ||||
-rw-r--r-- | ext/mbstring/php_unicode.h | 4 | ||||
-rw-r--r-- | ext/mbstring/tests/bug76319.phpt | 9 |
4 files changed, 38 insertions, 8 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 5f11a510aa..171430f778 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -3252,6 +3252,14 @@ PHP_FUNCTION(mb_convert_encoding) } /* }}} */ +static char *mbstring_convert_case( + int case_mode, const char *str, size_t str_len, size_t *ret_len, + const mbfl_encoding *enc) { + return php_unicode_convert_case( + case_mode, str, str_len, ret_len, enc, + MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar)); +} + /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding]) Returns a case-folded version of sourcestring */ PHP_FUNCTION(mb_convert_case) @@ -3280,7 +3288,7 @@ PHP_FUNCTION(mb_convert_case) return; } - newstr = php_unicode_convert_case(case_mode, str, str_len, &ret_len, enc); + newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc); if (newstr) { // TODO: avoid reallocation ??? @@ -3312,7 +3320,7 @@ PHP_FUNCTION(mb_strtoupper) RETURN_FALSE; } - newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc); + newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc); if (newstr) { // TODO: avoid reallocation ??? @@ -3346,7 +3354,7 @@ PHP_FUNCTION(mb_strtolower) RETURN_FALSE; } - newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc); + newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc); if (newstr) { // TODO: avoid reallocation ??? @@ -5172,7 +5180,7 @@ MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t ol * offsets otherwise. */ size_t len = 0; - haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc); + haystack.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc); haystack.len = len; if (!haystack.val) { @@ -5183,7 +5191,7 @@ MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t ol break; } - needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc); + needle.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc); needle.len = len; if (!needle.val) { diff --git a/ext/mbstring/php_unicode.c b/ext/mbstring/php_unicode.c index 0cffec652e..ac452b6a20 100644 --- a/ext/mbstring/php_unicode.c +++ b/ext/mbstring/php_unicode.c @@ -312,6 +312,14 @@ static int convert_case_filter(int c, void *void_data) struct convert_case_data *data = (struct convert_case_data *) void_data; unsigned out[3]; unsigned len, i; + + /* Handle invalid characters early, as we assign special meaning to + * codepoints above 0xffffff. */ + if (UNEXPECTED(c > 0xffffff)) { + (*data->next_filter->filter_function)(c, data->next_filter); + return 0; + } + switch (data->case_mode) { case PHP_UNICODE_CASE_UPPER_SIMPLE: out[0] = php_unicode_toupper_simple(c, data->no_encoding); @@ -376,7 +384,7 @@ static int convert_case_filter(int c, void *void_data) MBSTRING_API char *php_unicode_convert_case( int case_mode, const char *srcstr, size_t srclen, size_t *ret_len, - const mbfl_encoding *src_encoding) + const mbfl_encoding *src_encoding, int illegal_mode, int illegal_substchar) { struct convert_case_data data; mbfl_convert_filter *from_wchar, *to_wchar; @@ -403,6 +411,11 @@ MBSTRING_API char *php_unicode_convert_case( return NULL; } + to_wchar->illegal_mode = illegal_mode; + to_wchar->illegal_substchar = illegal_substchar; + from_wchar->illegal_mode = illegal_mode; + from_wchar->illegal_substchar = illegal_substchar; + data.next_filter = from_wchar; data.no_encoding = src_encoding->no_encoding; data.case_mode = case_mode; diff --git a/ext/mbstring/php_unicode.h b/ext/mbstring/php_unicode.h index 8868176fa1..68dff61da4 100644 --- a/ext/mbstring/php_unicode.h +++ b/ext/mbstring/php_unicode.h @@ -87,8 +87,8 @@ MBSTRING_API int php_unicode_is_prop(unsigned long code, ...); MBSTRING_API int php_unicode_is_prop1(unsigned long code, int prop); MBSTRING_API char *php_unicode_convert_case( - int case_mode, const char *srcstr, size_t srclen, size_t *retlen, - const mbfl_encoding *src_encoding); + int case_mode, const char *srcstr, size_t srclen, size_t *ret_len, + const mbfl_encoding *src_encoding, int illegal_mode, int illegal_substchar); #define PHP_UNICODE_CASE_UPPER 0 #define PHP_UNICODE_CASE_LOWER 1 diff --git a/ext/mbstring/tests/bug76319.phpt b/ext/mbstring/tests/bug76319.phpt new file mode 100644 index 0000000000..8b706020e8 --- /dev/null +++ b/ext/mbstring/tests/bug76319.phpt @@ -0,0 +1,9 @@ +--TEST-- +Bug #76319: mb_strtolower with invalid UTF-8 causes segmentation fault +--FILE-- +<?php +mb_substitute_character(0xFFFD); +var_dump(mb_strtolower("a\xA1", 'UTF-8')); +?> +--EXPECT-- +string(4) "a�" |