summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2018-05-25 11:33:13 +0200
committerNikita Popov <nikita.ppv@gmail.com>2018-05-25 11:33:13 +0200
commit9d63f4dec1d180f2a9533d8b7b6b5c445917aee2 (patch)
tree1a6e21bd4b3f0d2e3a252eb24b6abaf14cb24b08 /ext
parent3382424903e911fd7e3e193f1e5e141aab9ca666 (diff)
downloadphp-git-9d63f4dec1d180f2a9533d8b7b6b5c445917aee2.tar.gz
Fixed bug #76319
While at it, also make sure that mbstring case conversion takes into account the specified substitution character and substitution mode.
Diffstat (limited to 'ext')
-rw-r--r--ext/mbstring/mbstring.c18
-rw-r--r--ext/mbstring/php_unicode.c15
-rw-r--r--ext/mbstring/php_unicode.h4
-rw-r--r--ext/mbstring/tests/bug76319.phpt9
4 files changed, 38 insertions, 8 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index 5f11a510aa..171430f778 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -3252,6 +3252,14 @@ PHP_FUNCTION(mb_convert_encoding)
}
/* }}} */
+static char *mbstring_convert_case(
+ int case_mode, const char *str, size_t str_len, size_t *ret_len,
+ const mbfl_encoding *enc) {
+ return php_unicode_convert_case(
+ case_mode, str, str_len, ret_len, enc,
+ MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar));
+}
+
/* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)
@@ -3280,7 +3288,7 @@ PHP_FUNCTION(mb_convert_case)
return;
}
- newstr = php_unicode_convert_case(case_mode, str, str_len, &ret_len, enc);
+ newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc);
if (newstr) {
// TODO: avoid reallocation ???
@@ -3312,7 +3320,7 @@ PHP_FUNCTION(mb_strtoupper)
RETURN_FALSE;
}
- newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
+ newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
if (newstr) {
// TODO: avoid reallocation ???
@@ -3346,7 +3354,7 @@ PHP_FUNCTION(mb_strtolower)
RETURN_FALSE;
}
- newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
+ newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
if (newstr) {
// TODO: avoid reallocation ???
@@ -5172,7 +5180,7 @@ MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t ol
* offsets otherwise. */
size_t len = 0;
- haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc);
+ haystack.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc);
haystack.len = len;
if (!haystack.val) {
@@ -5183,7 +5191,7 @@ MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t ol
break;
}
- needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc);
+ needle.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc);
needle.len = len;
if (!needle.val) {
diff --git a/ext/mbstring/php_unicode.c b/ext/mbstring/php_unicode.c
index 0cffec652e..ac452b6a20 100644
--- a/ext/mbstring/php_unicode.c
+++ b/ext/mbstring/php_unicode.c
@@ -312,6 +312,14 @@ static int convert_case_filter(int c, void *void_data)
struct convert_case_data *data = (struct convert_case_data *) void_data;
unsigned out[3];
unsigned len, i;
+
+ /* Handle invalid characters early, as we assign special meaning to
+ * codepoints above 0xffffff. */
+ if (UNEXPECTED(c > 0xffffff)) {
+ (*data->next_filter->filter_function)(c, data->next_filter);
+ return 0;
+ }
+
switch (data->case_mode) {
case PHP_UNICODE_CASE_UPPER_SIMPLE:
out[0] = php_unicode_toupper_simple(c, data->no_encoding);
@@ -376,7 +384,7 @@ static int convert_case_filter(int c, void *void_data)
MBSTRING_API char *php_unicode_convert_case(
int case_mode, const char *srcstr, size_t srclen, size_t *ret_len,
- const mbfl_encoding *src_encoding)
+ const mbfl_encoding *src_encoding, int illegal_mode, int illegal_substchar)
{
struct convert_case_data data;
mbfl_convert_filter *from_wchar, *to_wchar;
@@ -403,6 +411,11 @@ MBSTRING_API char *php_unicode_convert_case(
return NULL;
}
+ to_wchar->illegal_mode = illegal_mode;
+ to_wchar->illegal_substchar = illegal_substchar;
+ from_wchar->illegal_mode = illegal_mode;
+ from_wchar->illegal_substchar = illegal_substchar;
+
data.next_filter = from_wchar;
data.no_encoding = src_encoding->no_encoding;
data.case_mode = case_mode;
diff --git a/ext/mbstring/php_unicode.h b/ext/mbstring/php_unicode.h
index 8868176fa1..68dff61da4 100644
--- a/ext/mbstring/php_unicode.h
+++ b/ext/mbstring/php_unicode.h
@@ -87,8 +87,8 @@ MBSTRING_API int php_unicode_is_prop(unsigned long code, ...);
MBSTRING_API int php_unicode_is_prop1(unsigned long code, int prop);
MBSTRING_API char *php_unicode_convert_case(
- int case_mode, const char *srcstr, size_t srclen, size_t *retlen,
- const mbfl_encoding *src_encoding);
+ int case_mode, const char *srcstr, size_t srclen, size_t *ret_len,
+ const mbfl_encoding *src_encoding, int illegal_mode, int illegal_substchar);
#define PHP_UNICODE_CASE_UPPER 0
#define PHP_UNICODE_CASE_LOWER 1
diff --git a/ext/mbstring/tests/bug76319.phpt b/ext/mbstring/tests/bug76319.phpt
new file mode 100644
index 0000000000..8b706020e8
--- /dev/null
+++ b/ext/mbstring/tests/bug76319.phpt
@@ -0,0 +1,9 @@
+--TEST--
+Bug #76319: mb_strtolower with invalid UTF-8 causes segmentation fault
+--FILE--
+<?php
+mb_substitute_character(0xFFFD);
+var_dump(mb_strtolower("a\xA1", 'UTF-8'));
+?>
+--EXPECT--
+string(4) "a�"