summaryrefslogtreecommitdiff
path: root/ext/mbstring/tests
diff options
context:
space:
mode:
authorRui Hirokawa <hirokawa@php.net>2013-07-31 08:46:54 +0900
committerRui Hirokawa <hirokawa@php.net>2013-07-31 08:46:54 +0900
commit0a974f14d13832838dcc7bae88b3271b7d035f46 (patch)
tree8e567328faa4ad0821794da78121cd2a2d4ecf3d /ext/mbstring/tests
parent1d7b6970f20a059c501e68927c9fb874bdb226bc (diff)
downloadphp-git-0a974f14d13832838dcc7bae88b3271b7d035f46.tar.gz
MFH: fixed #65045: mb_convert_encoding breaks well-formed character.
Diffstat (limited to 'ext/mbstring/tests')
-rw-r--r--ext/mbstring/tests/bug65045.phpt29
-rw-r--r--ext/mbstring/tests/illformed_utf_sequences.phpt18
2 files changed, 38 insertions, 9 deletions
diff --git a/ext/mbstring/tests/bug65045.phpt b/ext/mbstring/tests/bug65045.phpt
new file mode 100644
index 0000000000..03a090ded2
--- /dev/null
+++ b/ext/mbstring/tests/bug65045.phpt
@@ -0,0 +1,29 @@
+--TEST--
+Bug #65045: mb_convert_encoding breaks well-formed character
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+
+//declare(encoding = 'UTF-8');
+mb_internal_encoding('UTF-8');
+
+$str = "\xF0\xA4\xAD". "\xF0\xA4\xAD\xA2"."\xF0\xA4\xAD\xA2";
+$expected = "\xEF\xBF\xBD"."\xF0\xA4\xAD\xA2"."\xF0\xA4\xAD\xA2";
+
+$str2 = "\xF0\xA4\xAD\xA2"."\xF0\xA4\xAD\xA2"."\xF0\xA4\xAD";
+$expected2 = "\xF0\xA4\xAD\xA2"."\xF0\xA4\xAD\xA2"."\xEF\xBF\xBD";
+
+mb_substitute_character(0xFFFD);
+var_dump(
+ $expected === htmlspecialchars_decode(htmlspecialchars($str, ENT_SUBSTITUTE, 'UTF-8')),
+ $expected2 === htmlspecialchars_decode(htmlspecialchars($str2, ENT_SUBSTITUTE, 'UTF-8')),
+ $expected === mb_convert_encoding($str, 'UTF-8', 'UTF-8'),
+ $expected2 === mb_convert_encoding($str2, 'UTF-8', 'UTF-8')
+);
+
+--EXPECT--
+bool(true)
+bool(true)
+bool(true)
+bool(true) \ No newline at end of file
diff --git a/ext/mbstring/tests/illformed_utf_sequences.phpt b/ext/mbstring/tests/illformed_utf_sequences.phpt
index b5b9d94db8..378b956d68 100644
--- a/ext/mbstring/tests/illformed_utf_sequences.phpt
+++ b/ext/mbstring/tests/illformed_utf_sequences.phpt
@@ -25,28 +25,28 @@ var_dump(chk_enc("\x31\x32\x33", 0));
var_dump(chk_enc("\x41\x42\x43", 0));
var_dump(chk_enc("\xc0\xb1\xc0\xb2\xc0\xb3", 6));
var_dump(chk_enc("\xc1\x81\xc1\x82\xc1\x83", 6));
-var_dump(chk_enc("\xe0\x80\xb1\xe0\x80\xb2\xe0\x80\xb3", 6));
-var_dump(chk_enc("\xe0\x81\x81\xe0\x81\x82\xe0\x81\x83", 6));
-var_dump(chk_enc("\xf0\x80\x80\xb1\xf0\x80\x80\xb2\xf0\x80\x80\xb3", 9));
-var_dump(chk_enc("\xf0\x80\x81\x81\xf0\x80\x81\x82\xf0\x81\x83", 8));
+var_dump(chk_enc("\xe0\x80\xb1\xe0\x80\xb2\xe0\x80\xb3", 9));
+var_dump(chk_enc("\xe0\x81\x81\xe0\x81\x82\xe0\x81\x83", 9));
+var_dump(chk_enc("\xf0\x80\x80\xb1\xf0\x80\x80\xb2\xf0\x80\x80\xb3", 12));
+var_dump(chk_enc("\xf0\x80\x81\x81\xf0\x80\x81\x82\xf0\x81\x83", 11));
var_dump(chk_enc("\xf8\x80\x80\x80\xb1\xf8\x80\x80\x80\xb2\xf8\x80\x80\x80\xb3", 15));
var_dump(chk_enc("\xf8\x80\x80\x81\x81\xf8\x80\x80\x81\x82\xf8\x80\x80\x81\x83", 15));
var_dump(chk_enc("\xfc\x80\x80\x80\x80\xb1\xfc\x80\x80\x80\x80\xb2\xfc\x80\x80\x80\x80\xb3", 18));
var_dump(chk_enc("\xfc\x80\x80\x80\x81\x81\xfc\x80\x80\x80\x81\x82\xfc\x80\x80\x80\x81\x83", 18));
var_dump(chk_enc("\xc2\xa2\xc2\xa3\xc2\xa5", 0));
-var_dump(chk_enc("\xe0\x82\xa2\xe0\x82\xa3\xe0\x82\xa5", 6));
-var_dump(chk_enc("\xf0\x80\x82\xa2\xf0\x80\x82\xa3\xf0\x80\x82\xa5", 9));
+var_dump(chk_enc("\xe0\x82\xa2\xe0\x82\xa3\xe0\x82\xa5", 9));
+var_dump(chk_enc("\xf0\x80\x82\xa2\xf0\x80\x82\xa3\xf0\x80\x82\xa5", 12));
var_dump(chk_enc("\xf8\x80\x80\x82\xa2\xf8\x80\x80\x82\xa3\xf8\x80\x80\x82\xa5", 15));
var_dump(chk_enc("\xfc\x80\x80\x80\x82\xa2\xfc\x80\x80\x80\x82\xa3\xfc\x80\x80\x80\x82\xa5", 18));
var_dump(chk_enc("\xc1\xbf", 2));
var_dump(chk_enc("\xc2\x80", 0));
var_dump(chk_enc("\xdf\xbf", 0));
-var_dump(chk_enc("\xe0\x9f\xff", 2));
+var_dump(chk_enc("\xe0\x9f\xff", 3));
var_dump(chk_enc("\xe0\xa0\x80", 2));
var_dump(chk_enc("\xef\xbf\xbf", 0));
-var_dump(chk_enc("\xf0\x8f\xbf\xbf", 3));
+var_dump(chk_enc("\xf0\x8f\xbf\xbf", 4));
var_dump(chk_enc("\xf0\x90\x80\x80", 0));
var_dump(chk_enc("\xf7\xbf\xbf\xbf", 4));
var_dump(chk_enc("\xf8\x87\xbf\xbf\xbf", 5));
@@ -61,7 +61,7 @@ echo "UTF-8 and surrogates area\n";
$out = '';
$cnt = 0;
for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
- $s = chk_enc(pack('C3', 0xe0 | ($i >> 12), 0x80 | ($i >> 6) & 0x3f, 0x80 | $i & 0x3f), 2);
+ $s = chk_enc(pack('C3', 0xe0 | ($i >> 12), 0x80 | ($i >> 6) & 0x3f, 0x80 | $i & 0x3f), 3);
if ($s === false) {
$cnt++;
} else {