summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2009-10-18 21:55:52 +0100
committerNicholas Clark <nick@ccl4.org>2009-10-18 22:10:36 +0100
commit01ea242be7d23d3bfac7a37c0cdfaec0a8eb7e33 (patch)
treee6fd4877d2901d78e1548663670f966a414c07e4
parente0ea5e2d50a479e160d39f481e02abd7c0c9cf91 (diff)
downloadperl-01ea242be7d23d3bfac7a37c0cdfaec0a8eb7e33.tar.gz
utf16_to_utf8() should croak if the buffer ends without the second surrogate.
-rw-r--r--ext/XS-APItest/t/utf16_to_utf8.t7
-rw-r--r--utf8.c12
2 files changed, 15 insertions, 4 deletions
diff --git a/ext/XS-APItest/t/utf16_to_utf8.t b/ext/XS-APItest/t/utf16_to_utf8.t
index 83add20a27..3f6f798a32 100644
--- a/ext/XS-APItest/t/utf16_to_utf8.t
+++ b/ext/XS-APItest/t/utf16_to_utf8.t
@@ -54,3 +54,10 @@ like($@, qr/^panic: utf16_to_utf8_reversed: odd bytelen 1 at/,
'Odd byte length panics');
is($got, undef, 'hence eval returns undef');
is($in, "NA", 'and input unchanged');
+
+$in = "\xD8\0\xDC\0";
+$got = eval {utf16_to_utf8($in, 2)};
+like($@, qr/^Malformed UTF-16 surrogate at/, 'Lone surrogate croaks');
+(ok(!defined $got, 'hence eval returns undef')) or
+ diag(join ', ', map {ord $_} split //, $got);
+
diff --git a/utf8.c b/utf8.c
index 455078d582..4a728aa09a 100644
--- a/utf8.c
+++ b/utf8.c
@@ -986,11 +986,15 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
continue;
}
if (uv >= 0xd800 && uv < 0xdbff) { /* surrogates */
- UV low = (p[0] << 8) + p[1];
- p += 2;
- if (low < 0xdc00 || low >= 0xdfff)
+ if (p >= pend) {
Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
- uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000;
+ } else {
+ UV low = (p[0] << 8) + p[1];
+ p += 2;
+ if (low < 0xdc00 || low >= 0xdfff)
+ Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
+ uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000;
+ }
}
if (uv < 0x10000) {
*d++ = (U8)(( uv >> 12) | 0xe0);