Deprecate utf8_to_uvchr() and utf8_to_uvuni()

These functions can read beyond the end of their input strings if presented with malformed UTF-8 input. Perl core code has been converted to use other functions instead of these.
author: Karl Williamson <public@khwilliamson.com> 2012-03-19 16:31:18 -0600
committer: Karl Williamson <public@khwilliamson.com> 2012-03-19 18:23:44 -0600
commit: 977c1d31fff4d41aa42e40c904fe08b509e3a34e (patch)
tree: 6dc75d828245a74564c25954ec7096938adc2572
parent: 4b88fb76efce8c436e63b907c9842345d4fa77c7 (diff)
download: perl-977c1d31fff4d41aa42e40c904fe08b509e3a34e.tar.gz
4 files changed, 36 insertions, 14 deletions
diff --git a/embed.fnc b/embed.fnc
index d5e25fa40e..f9d214d266 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1448,8 +1448,8 @@ Apd	|int	|bytes_cmp_utf8	|NN const U8 *b|STRLEN blen|NN const U8 *u \
 				|STRLEN ulen
 ApMd	|U8*	|bytes_from_utf8|NN const U8 *s|NN STRLEN *len|NULLOK bool *is_utf8
 ApMd	|U8*	|bytes_to_utf8	|NN const U8 *s|NN STRLEN *len
-Apd	|UV	|utf8_to_uvchr	|NN const U8 *s|NULLOK STRLEN *retlen
-Apd	|UV	|utf8_to_uvuni	|NN const U8 *s|NULLOK STRLEN *retlen
+ApdD	|UV	|utf8_to_uvchr	|NN const U8 *s|NULLOK STRLEN *retlen
+ApdD	|UV	|utf8_to_uvuni	|NN const U8 *s|NULLOK STRLEN *retlen
 ApdM	|UV	|valid_utf8_to_uvchr	|NN const U8 *s|NULLOK STRLEN *retlen
 ApdM	|UV	|valid_utf8_to_uvuni	|NN const U8 *s|NULLOK STRLEN *retlen
 Apd	|UV	|utf8_to_uvchr_buf	|NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 8b04237747..b1c96c8caf 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -28,7 +28,12 @@ write C<< no feature ':all' >>.
 
 =head1 Security
 
-There have been no security related fixed between 5.15.8 and 5.15.9.
+=head2 Malformed UTF-8 input could cause attempts to read beyond the end of the buffer
+
+Two new XS-accessible functions, C<utf8_to_uvchr_buf()> and
+C<utf8_to_uvuni_buf()> are now available to prevent this, and the Perl
+core has been converted to use them.
+See L</Internal Changes>.
 
 =head1 Incompatible Changes
 
@@ -44,6 +49,11 @@ It has been documented that the current plans include requiring a
 literal C<< "{" >> to be escaped: 5.18 will emit deprecation warnings,
 and it will be required in 5.20.
 
+=head2 XS functions C<utf8_to_uvchr()> and C<utf8_to_uvuni()>
+
+Use C<utf8_to_uvchr_buf()> and C<utf8_to_uvuni_buf()> instead.
+See L</Internal Changes>.
+
 =head1 Performance Enhancements
 
 =over 4
@@ -104,11 +114,7 @@ The code has been refactored to reduce duplication.
 
 =item *
 
-Two new functions C<utf8_to_uvchr_buf()> and C<utf8_to_uvuni_buf()> have
-been added.  These are the same as C<utf8_to_uvchr> and
-C<utf8_to_uvuni>, but take an extra parameter that is used to guard
-against reading beyond the end of the input string.
-See L<perlapi/utf8_to_uvchr_buf> and L<perlapi/utf8_to_uvuni_buf>.
+XXX
 
 =back
 
@@ -180,8 +186,18 @@ There have been no changes to Perl's support of various platforms between
 
 =head1 Internal Changes
 
-There has been no change that affects the interface available to C<< XS >>
-between 5.15.8 and 5.15.9.
+=over 4
+
+=item *
+
+Two new functions C<utf8_to_uvchr_buf()> and C<utf8_to_uvuni_buf()> have
+been added.  These are the same as C<utf8_to_uvchr> and
+C<utf8_to_uvuni> (which are now deprecated), but take an extra parameter
+that is used to guard against reading beyond the end of the input
+string.
+See L<perlapi/utf8_to_uvchr_buf> and L<perlapi/utf8_to_uvuni_buf>.
+
+=back
 
 =head1 Selected Bug Fixes
 
diff --git a/proto.h b/proto.h
index 5bc242447d..d8978c6c9f 100644
--- a/proto.h
+++ b/proto.h
@@ -4564,6 +4564,7 @@ PERL_CALLCONV U8*	Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len)
 	assert(s); assert(len)
 
 PERL_CALLCONV UV	Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
+			__attribute__deprecated__
 			__attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_UTF8_TO_UVCHR	\
 	assert(s)
@@ -4575,6 +4576,7 @@ PERL_CALLCONV UV	Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLE
 	assert(s); assert(send)
 
 PERL_CALLCONV UV	Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
+			__attribute__deprecated__
 			__attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_UTF8_TO_UVUNI	\
 	assert(s)
diff --git a/utf8.c b/utf8.c
index 85bf2f00c8..1d646a88d3 100644
--- a/utf8.c
+++ b/utf8.c
@@ -835,13 +835,15 @@ Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
 /*
 =for apidoc utf8_to_uvchr
 
+DEPRECATED!
+
 Returns the native code point of the first character in the string C<s>
 which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
 length, in bytes, of that character.
 
 Some, but not all, UTF-8 malformations are detected, and in fact, some
-malformed input could cause reading beyond the end of the input buffer.
-Use L</utf8_to_uvchr_buf> instead.
+malformed input could cause reading beyond the end of the input buffer, which
+is why this function is deprecated.  Use L</utf8_to_uvchr_buf> instead.
 
 If C<s> points to one of the detected malformations, zero is
 returned and C<retlen> is set, if possible, to -1.
@@ -901,13 +903,15 @@ Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
 /*
 =for apidoc utf8_to_uvuni
 
+DEPRECATED!
+
 Returns the Unicode code point of the first character in the string C<s>
 which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
 length, in bytes, of that character.
 
 Some, but not all, UTF-8 malformations are detected, and in fact, some
-malformed input could cause reading beyond the end of the input buffer.
-Use L</utf8_to_uvuni_buf> instead.
+malformed input could cause reading beyond the end of the input buffer, which
+is why this function is deprecated.  Use L</utf8_to_uvuni_buf> instead.
 
 If C<s> points to one of the detected malformations, zero is
 returned and C<retlen> is set, if possible, to -1.
author	Karl Williamson <public@khwilliamson.com>	2012-03-19 16:31:18 -0600
committer	Karl Williamson <public@khwilliamson.com>	2012-03-19 18:23:44 -0600
commit	977c1d31fff4d41aa42e40c904fe08b509e3a34e (patch)
tree	6dc75d828245a74564c25954ec7096938adc2572
parent	4b88fb76efce8c436e63b907c9842345d4fa77c7 (diff)
download	perl-977c1d31fff4d41aa42e40c904fe08b509e3a34e.tar.gz