diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-03-19 16:31:18 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-03-19 18:23:44 -0600 |
commit | 977c1d31fff4d41aa42e40c904fe08b509e3a34e (patch) | |
tree | 6dc75d828245a74564c25954ec7096938adc2572 | |
parent | 4b88fb76efce8c436e63b907c9842345d4fa77c7 (diff) | |
download | perl-977c1d31fff4d41aa42e40c904fe08b509e3a34e.tar.gz |
Deprecate utf8_to_uvchr() and utf8_to_uvuni()
These functions can read beyond the end of their input strings if
presented with malformed UTF-8 input. Perl core code has been converted
to use other functions instead of these.
-rw-r--r-- | embed.fnc | 4 | ||||
-rw-r--r-- | pod/perldelta.pod | 32 | ||||
-rw-r--r-- | proto.h | 2 | ||||
-rw-r--r-- | utf8.c | 12 |
4 files changed, 36 insertions, 14 deletions
@@ -1448,8 +1448,8 @@ Apd |int |bytes_cmp_utf8 |NN const U8 *b|STRLEN blen|NN const U8 *u \ |STRLEN ulen ApMd |U8* |bytes_from_utf8|NN const U8 *s|NN STRLEN *len|NULLOK bool *is_utf8 ApMd |U8* |bytes_to_utf8 |NN const U8 *s|NN STRLEN *len -Apd |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen -Apd |UV |utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen +ApdD |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen +ApdD |UV |utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen ApdM |UV |valid_utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen ApdM |UV |valid_utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen Apd |UV |utf8_to_uvchr_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 8b04237747..b1c96c8caf 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -28,7 +28,12 @@ write C<< no feature ':all' >>. =head1 Security -There have been no security related fixed between 5.15.8 and 5.15.9. +=head2 Malformed UTF-8 input could cause attempts to read beyond the end of the buffer + +Two new XS-accessible functions, C<utf8_to_uvchr_buf()> and +C<utf8_to_uvuni_buf()> are now available to prevent this, and the Perl +core has been converted to use them. +See L</Internal Changes>. =head1 Incompatible Changes @@ -44,6 +49,11 @@ It has been documented that the current plans include requiring a literal C<< "{" >> to be escaped: 5.18 will emit deprecation warnings, and it will be required in 5.20. +=head2 XS functions C<utf8_to_uvchr()> and C<utf8_to_uvuni()> + +Use C<utf8_to_uvchr_buf()> and C<utf8_to_uvuni_buf()> instead. +See L</Internal Changes>. + =head1 Performance Enhancements =over 4 @@ -104,11 +114,7 @@ The code has been refactored to reduce duplication. =item * -Two new functions C<utf8_to_uvchr_buf()> and C<utf8_to_uvuni_buf()> have -been added. These are the same as C<utf8_to_uvchr> and -C<utf8_to_uvuni>, but take an extra parameter that is used to guard -against reading beyond the end of the input string. -See L<perlapi/utf8_to_uvchr_buf> and L<perlapi/utf8_to_uvuni_buf>. +XXX =back @@ -180,8 +186,18 @@ There have been no changes to Perl's support of various platforms between =head1 Internal Changes -There has been no change that affects the interface available to C<< XS >> -between 5.15.8 and 5.15.9. +=over 4 + +=item * + +Two new functions C<utf8_to_uvchr_buf()> and C<utf8_to_uvuni_buf()> have +been added. These are the same as C<utf8_to_uvchr> and +C<utf8_to_uvuni> (which are now deprecated), but take an extra parameter +that is used to guard against reading beyond the end of the input +string. +See L<perlapi/utf8_to_uvchr_buf> and L<perlapi/utf8_to_uvuni_buf>. + +=back =head1 Selected Bug Fixes @@ -4564,6 +4564,7 @@ PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len) assert(s); assert(len) PERL_CALLCONV UV Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen) + __attribute__deprecated__ __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_UTF8_TO_UVCHR \ assert(s) @@ -4575,6 +4576,7 @@ PERL_CALLCONV UV Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLE assert(s); assert(send) PERL_CALLCONV UV Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen) + __attribute__deprecated__ __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_UTF8_TO_UVUNI \ assert(s) @@ -835,13 +835,15 @@ Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen) /* =for apidoc utf8_to_uvchr +DEPRECATED! + Returns the native code point of the first character in the string C<s> which is assumed to be in UTF-8 encoding; C<retlen> will be set to the length, in bytes, of that character. Some, but not all, UTF-8 malformations are detected, and in fact, some -malformed input could cause reading beyond the end of the input buffer. -Use L</utf8_to_uvchr_buf> instead. +malformed input could cause reading beyond the end of the input buffer, which +is why this function is deprecated. Use L</utf8_to_uvchr_buf> instead. If C<s> points to one of the detected malformations, zero is returned and C<retlen> is set, if possible, to -1. @@ -901,13 +903,15 @@ Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen) /* =for apidoc utf8_to_uvuni +DEPRECATED! + Returns the Unicode code point of the first character in the string C<s> which is assumed to be in UTF-8 encoding; C<retlen> will be set to the length, in bytes, of that character. Some, but not all, UTF-8 malformations are detected, and in fact, some -malformed input could cause reading beyond the end of the input buffer. -Use L</utf8_to_uvuni_buf> instead. +malformed input could cause reading beyond the end of the input buffer, which +is why this function is deprecated. Use L</utf8_to_uvuni_buf> instead. If C<s> points to one of the detected malformations, zero is returned and C<retlen> is set, if possible, to -1. |