Deprecate is_utf8_char()

This function assumes that there is enough space in the buffer to read however many bytes are indicated by the first byte in the alleged UTF-8 encoded string. This may not be true, and so it can read beyond the buffer end. is_utf8_char_buf() should be used instead.
author: Karl Williamson <public@khwilliamson.com> 2012-02-11 14:20:56 -0700
committer: Karl Williamson <public@khwilliamson.com> 2012-02-11 14:35:46 -0700
commit: 768483871f7d05689a92ec84d2182a1b6e3c0516 (patch)
tree: 61f7ee908b7ff0c498a272236a3d42160038df4e
parent: 492a624f4a0c250e011c6b74a3403bfc885ec961 (diff)
download: perl-768483871f7d05689a92ec84d2182a1b6e3c0516.tar.gz
3 files changed, 9 insertions, 4 deletions
diff --git a/embed.fnc b/embed.fnc
index 892a7190ac..34aa251d4e 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -626,7 +626,7 @@ ApPR	|bool	|is_uni_print_lc|UV c
 ApPR	|bool	|is_uni_punct_lc|UV c
 ApPR	|bool	|is_uni_xdigit_lc|UV c
 Anpd	|bool	|is_ascii_string|NN const U8 *s|STRLEN len
-Anpd	|STRLEN	|is_utf8_char	|NN const U8 *s
+AnpdD	|STRLEN	|is_utf8_char	|NN const U8 *s
 Anpd	|STRLEN	|is_utf8_char_buf|NN const U8 *buf|NN const U8 *buf_end
 Anpd	|bool	|is_utf8_string	|NN const U8 *s|STRLEN len
 Anpdmb	|bool	|is_utf8_string_loc|NN const U8 *s|STRLEN len|NULLOK const U8 **p
diff --git a/proto.h b/proto.h
index dde1a43815..84bfbf4982 100644
--- a/proto.h
+++ b/proto.h
@@ -1819,6 +1819,7 @@ PERL_CALLCONV bool	Perl_is_utf8_ascii(pTHX_ const U8 *p)
 	assert(p)
 
 PERL_CALLCONV STRLEN	Perl_is_utf8_char(const U8 *s)
+			__attribute__deprecated__
 			__attribute__nonnull__(1);
 #define PERL_ARGS_ASSERT_IS_UTF8_CHAR	\
 	assert(s)
diff --git a/utf8.c b/utf8.c
index 2e0429e476..5c1f7c0338 100644
--- a/utf8.c
+++ b/utf8.c
@@ -355,21 +355,25 @@ Perl_is_utf8_char_buf(const U8 *buf, const U8* buf_end)
 /*
 =for apidoc is_utf8_char
 
+DEPRECATED!
+
 Tests if some arbitrary number of bytes begins in a valid UTF-8
 character.  Note that an INVARIANT (i.e. ASCII on non-EBCDIC machines)
 character is a valid UTF-8 character.  The actual number of bytes in the UTF-8
 character will be returned if it is valid, otherwise 0.
 
-WARNING: use only if you *know* that C<s> has at least either UTF8_MAXBYTES or
-UTF8SKIP(s) bytes.
+This function is deprecated due to the possibility that malformed input could
+cause reading beyond the end of the input buffer.  Use C<is_utf8_char_buf>
+instead.
 
 =cut */
+
 STRLEN
 Perl_is_utf8_char(const U8 *s)
 {
     PERL_ARGS_ASSERT_IS_UTF8_CHAR;
 
-    /* Assumes we have enough space */
+    /* Assumes we have enough space, which is why this is deprecated */
     return is_utf8_char_buf(s, s + UTF8SKIP(s));
 }
author	Karl Williamson <public@khwilliamson.com>	2012-02-11 14:20:56 -0700
committer	Karl Williamson <public@khwilliamson.com>	2012-02-11 14:35:46 -0700
commit	768483871f7d05689a92ec84d2182a1b6e3c0516 (patch)
tree	61f7ee908b7ff0c498a272236a3d42160038df4e
parent	492a624f4a0c250e011c6b74a3403bfc885ec961 (diff)
download	perl-768483871f7d05689a92ec84d2182a1b6e3c0516.tar.gz