utf8.c: Add valid_utf8_to_uvuni() and valid_utf8_to_uvchr()

These functions are like utf8_to_uvuni() and utf8_to_uvchr(), but their name implies that the input UTF-8 has been validated. They are not currently documented, as it's best for XS writers to call the functions that do validation.
author: Karl Williamson <public@khwilliamson.com> 2012-03-19 15:13:19 -0600
committer: Karl Williamson <public@khwilliamson.com> 2012-03-19 18:23:44 -0600
commit: 27d6c58a7e12243bef66c58b38e7d1415d9ca07e (patch)
tree: 666b93928ce7261a446a8438e1338099e86f0acf
parent: ec5f19d09949aac9034bb62ade44ffba8d4d2bb1 (diff)
download: perl-27d6c58a7e12243bef66c58b38e7d1415d9ca07e.tar.gz
4 files changed, 40 insertions, 0 deletions
diff --git a/embed.fnc b/embed.fnc
index 5a496906c7..d5e25fa40e 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1450,6 +1450,8 @@ ApMd	|U8*	|bytes_from_utf8|NN const U8 *s|NN STRLEN *len|NULLOK bool *is_utf8
 ApMd	|U8*	|bytes_to_utf8	|NN const U8 *s|NN STRLEN *len
 Apd	|UV	|utf8_to_uvchr	|NN const U8 *s|NULLOK STRLEN *retlen
 Apd	|UV	|utf8_to_uvuni	|NN const U8 *s|NULLOK STRLEN *retlen
+ApdM	|UV	|valid_utf8_to_uvchr	|NN const U8 *s|NULLOK STRLEN *retlen
+ApdM	|UV	|valid_utf8_to_uvuni	|NN const U8 *s|NULLOK STRLEN *retlen
 Apd	|UV	|utf8_to_uvchr_buf	|NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
 Apd	|UV	|utf8_to_uvuni_buf	|NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
 pM	|bool	|check_utf8_print	|NN const U8 *s|const STRLEN len
diff --git a/embed.h b/embed.h
index 8a390471fd..31e024c4bf 100644
--- a/embed.h
+++ b/embed.h
@@ -678,6 +678,8 @@
 #define utf8n_to_uvuni(a,b,c,d)	Perl_utf8n_to_uvuni(aTHX_ a,b,c,d)
 #define uvchr_to_utf8_flags(a,b,c)	Perl_uvchr_to_utf8_flags(aTHX_ a,b,c)
 #define uvuni_to_utf8_flags(a,b,c)	Perl_uvuni_to_utf8_flags(aTHX_ a,b,c)
+#define valid_utf8_to_uvchr(a,b)	Perl_valid_utf8_to_uvchr(aTHX_ a,b)
+#define valid_utf8_to_uvuni(a,b)	Perl_valid_utf8_to_uvuni(aTHX_ a,b)
 #define vcmp(a,b)		Perl_vcmp(aTHX_ a,b)
 #define vcroak(a,b)		Perl_vcroak(aTHX_ a,b)
 #define vdeb(a,b)		Perl_vdeb(aTHX_ a,b)
diff --git a/proto.h b/proto.h
index 9c9185592a..5bc242447d 100644
--- a/proto.h
+++ b/proto.h
@@ -4605,6 +4605,16 @@ PERL_CALLCONV U8*	Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
 #define PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS	\
 	assert(d)
 
+PERL_CALLCONV UV	Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
+			__attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR	\
+	assert(s)
+
+PERL_CALLCONV UV	Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
+			__attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI	\
+	assert(s)
+
 PERL_CALLCONV int	Perl_vcmp(pTHX_ SV *lhv, SV *rhv)
 			__attribute__nonnull__(pTHX_1)
 			__attribute__nonnull__(pTHX_2);
diff --git a/utf8.c b/utf8.c
index 1faa96d9fb..c9bc63a001 100644
--- a/utf8.c
+++ b/utf8.c
@@ -819,6 +819,19 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
 			  ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
 }
 
+/* Like L</utf8_to_uvchr_buf>(), but should only be called when it is known that
+ * there are no malformations in the input UTF-8 string C<s>.  Currently, some
+ * malformations are checked for, but this checking likely will be removed in
+ * the future */
+
+UV
+Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
+{
+    PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
+
+    return utf8_to_uvchr_buf(s, s + UTF8_MAXBYTES, retlen);
+}
+
 /*
 =for apidoc utf8_to_uvchr
 
@@ -869,6 +882,19 @@ Perl_utf8_to_uvuni_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
 			       ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
 }
 
+/* Like L</utf8_to_uvuni_buf>(), but should only be called when it is known that
+ * there are no malformations in the input UTF-8 string C<s>.  Currently, some
+ * malformations are checked for, but this checking likely will be removed in
+ * the future */
+
+UV
+Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
+{
+    PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI;
+
+    return utf8_to_uvuni_buf(s, s + UTF8_MAXBYTES, retlen);
+}
+
 /*
 =for apidoc utf8_to_uvuni
author	Karl Williamson <public@khwilliamson.com>	2012-03-19 15:13:19 -0600
committer	Karl Williamson <public@khwilliamson.com>	2012-03-19 18:23:44 -0600
commit	27d6c58a7e12243bef66c58b38e7d1415d9ca07e (patch)
tree	666b93928ce7261a446a8438e1338099e86f0acf
parent	ec5f19d09949aac9034bb62ade44ffba8d4d2bb1 (diff)
download	perl-27d6c58a7e12243bef66c58b38e7d1415d9ca07e.tar.gz