summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-03-19 16:31:18 -0600
committerKarl Williamson <public@khwilliamson.com>2012-03-19 18:23:44 -0600
commit977c1d31fff4d41aa42e40c904fe08b509e3a34e (patch)
tree6dc75d828245a74564c25954ec7096938adc2572
parent4b88fb76efce8c436e63b907c9842345d4fa77c7 (diff)
downloadperl-977c1d31fff4d41aa42e40c904fe08b509e3a34e.tar.gz
Deprecate utf8_to_uvchr() and utf8_to_uvuni()
These functions can read beyond the end of their input strings if presented with malformed UTF-8 input. Perl core code has been converted to use other functions instead of these.
-rw-r--r--embed.fnc4
-rw-r--r--pod/perldelta.pod32
-rw-r--r--proto.h2
-rw-r--r--utf8.c12
4 files changed, 36 insertions, 14 deletions
diff --git a/embed.fnc b/embed.fnc
index d5e25fa40e..f9d214d266 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1448,8 +1448,8 @@ Apd |int |bytes_cmp_utf8 |NN const U8 *b|STRLEN blen|NN const U8 *u \
|STRLEN ulen
ApMd |U8* |bytes_from_utf8|NN const U8 *s|NN STRLEN *len|NULLOK bool *is_utf8
ApMd |U8* |bytes_to_utf8 |NN const U8 *s|NN STRLEN *len
-Apd |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen
-Apd |UV |utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen
+ApdD |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen
+ApdD |UV |utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen
ApdM |UV |valid_utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen
ApdM |UV |valid_utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen
Apd |UV |utf8_to_uvchr_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 8b04237747..b1c96c8caf 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -28,7 +28,12 @@ write C<< no feature ':all' >>.
=head1 Security
-There have been no security related fixed between 5.15.8 and 5.15.9.
+=head2 Malformed UTF-8 input could cause attempts to read beyond the end of the buffer
+
+Two new XS-accessible functions, C<utf8_to_uvchr_buf()> and
+C<utf8_to_uvuni_buf()> are now available to prevent this, and the Perl
+core has been converted to use them.
+See L</Internal Changes>.
=head1 Incompatible Changes
@@ -44,6 +49,11 @@ It has been documented that the current plans include requiring a
literal C<< "{" >> to be escaped: 5.18 will emit deprecation warnings,
and it will be required in 5.20.
+=head2 XS functions C<utf8_to_uvchr()> and C<utf8_to_uvuni()>
+
+Use C<utf8_to_uvchr_buf()> and C<utf8_to_uvuni_buf()> instead.
+See L</Internal Changes>.
+
=head1 Performance Enhancements
=over 4
@@ -104,11 +114,7 @@ The code has been refactored to reduce duplication.
=item *
-Two new functions C<utf8_to_uvchr_buf()> and C<utf8_to_uvuni_buf()> have
-been added. These are the same as C<utf8_to_uvchr> and
-C<utf8_to_uvuni>, but take an extra parameter that is used to guard
-against reading beyond the end of the input string.
-See L<perlapi/utf8_to_uvchr_buf> and L<perlapi/utf8_to_uvuni_buf>.
+XXX
=back
@@ -180,8 +186,18 @@ There have been no changes to Perl's support of various platforms between
=head1 Internal Changes
-There has been no change that affects the interface available to C<< XS >>
-between 5.15.8 and 5.15.9.
+=over 4
+
+=item *
+
+Two new functions C<utf8_to_uvchr_buf()> and C<utf8_to_uvuni_buf()> have
+been added. These are the same as C<utf8_to_uvchr> and
+C<utf8_to_uvuni> (which are now deprecated), but take an extra parameter
+that is used to guard against reading beyond the end of the input
+string.
+See L<perlapi/utf8_to_uvchr_buf> and L<perlapi/utf8_to_uvuni_buf>.
+
+=back
=head1 Selected Bug Fixes
diff --git a/proto.h b/proto.h
index 5bc242447d..d8978c6c9f 100644
--- a/proto.h
+++ b/proto.h
@@ -4564,6 +4564,7 @@ PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len)
assert(s); assert(len)
PERL_CALLCONV UV Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
+ __attribute__deprecated__
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_UTF8_TO_UVCHR \
assert(s)
@@ -4575,6 +4576,7 @@ PERL_CALLCONV UV Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLE
assert(s); assert(send)
PERL_CALLCONV UV Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
+ __attribute__deprecated__
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_UTF8_TO_UVUNI \
assert(s)
diff --git a/utf8.c b/utf8.c
index 85bf2f00c8..1d646a88d3 100644
--- a/utf8.c
+++ b/utf8.c
@@ -835,13 +835,15 @@ Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
/*
=for apidoc utf8_to_uvchr
+DEPRECATED!
+
Returns the native code point of the first character in the string C<s>
which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
length, in bytes, of that character.
Some, but not all, UTF-8 malformations are detected, and in fact, some
-malformed input could cause reading beyond the end of the input buffer.
-Use L</utf8_to_uvchr_buf> instead.
+malformed input could cause reading beyond the end of the input buffer, which
+is why this function is deprecated. Use L</utf8_to_uvchr_buf> instead.
If C<s> points to one of the detected malformations, zero is
returned and C<retlen> is set, if possible, to -1.
@@ -901,13 +903,15 @@ Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
/*
=for apidoc utf8_to_uvuni
+DEPRECATED!
+
Returns the Unicode code point of the first character in the string C<s>
which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
length, in bytes, of that character.
Some, but not all, UTF-8 malformations are detected, and in fact, some
-malformed input could cause reading beyond the end of the input buffer.
-Use L</utf8_to_uvuni_buf> instead.
+malformed input could cause reading beyond the end of the input buffer, which
+is why this function is deprecated. Use L</utf8_to_uvuni_buf> instead.
If C<s> points to one of the detected malformations, zero is
returned and C<retlen> is set, if possible, to -1.