summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-03-19 15:13:19 -0600
committerKarl Williamson <public@khwilliamson.com>2012-03-19 18:23:44 -0600
commit27d6c58a7e12243bef66c58b38e7d1415d9ca07e (patch)
tree666b93928ce7261a446a8438e1338099e86f0acf
parentec5f19d09949aac9034bb62ade44ffba8d4d2bb1 (diff)
downloadperl-27d6c58a7e12243bef66c58b38e7d1415d9ca07e.tar.gz
utf8.c: Add valid_utf8_to_uvuni() and valid_utf8_to_uvchr()
These functions are like utf8_to_uvuni() and utf8_to_uvchr(), but their name implies that the input UTF-8 has been validated. They are not currently documented, as it's best for XS writers to call the functions that do validation.
-rw-r--r--embed.fnc2
-rw-r--r--embed.h2
-rw-r--r--proto.h10
-rw-r--r--utf8.c26
4 files changed, 40 insertions, 0 deletions
diff --git a/embed.fnc b/embed.fnc
index 5a496906c7..d5e25fa40e 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1450,6 +1450,8 @@ ApMd |U8* |bytes_from_utf8|NN const U8 *s|NN STRLEN *len|NULLOK bool *is_utf8
ApMd |U8* |bytes_to_utf8 |NN const U8 *s|NN STRLEN *len
Apd |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen
Apd |UV |utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen
+ApdM |UV |valid_utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen
+ApdM |UV |valid_utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen
Apd |UV |utf8_to_uvchr_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
Apd |UV |utf8_to_uvuni_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
pM |bool |check_utf8_print |NN const U8 *s|const STRLEN len
diff --git a/embed.h b/embed.h
index 8a390471fd..31e024c4bf 100644
--- a/embed.h
+++ b/embed.h
@@ -678,6 +678,8 @@
#define utf8n_to_uvuni(a,b,c,d) Perl_utf8n_to_uvuni(aTHX_ a,b,c,d)
#define uvchr_to_utf8_flags(a,b,c) Perl_uvchr_to_utf8_flags(aTHX_ a,b,c)
#define uvuni_to_utf8_flags(a,b,c) Perl_uvuni_to_utf8_flags(aTHX_ a,b,c)
+#define valid_utf8_to_uvchr(a,b) Perl_valid_utf8_to_uvchr(aTHX_ a,b)
+#define valid_utf8_to_uvuni(a,b) Perl_valid_utf8_to_uvuni(aTHX_ a,b)
#define vcmp(a,b) Perl_vcmp(aTHX_ a,b)
#define vcroak(a,b) Perl_vcroak(aTHX_ a,b)
#define vdeb(a,b) Perl_vdeb(aTHX_ a,b)
diff --git a/proto.h b/proto.h
index 9c9185592a..5bc242447d 100644
--- a/proto.h
+++ b/proto.h
@@ -4605,6 +4605,16 @@ PERL_CALLCONV U8* Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
#define PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS \
assert(d)
+PERL_CALLCONV UV Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR \
+ assert(s)
+
+PERL_CALLCONV UV Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI \
+ assert(s)
+
PERL_CALLCONV int Perl_vcmp(pTHX_ SV *lhv, SV *rhv)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2);
diff --git a/utf8.c b/utf8.c
index 1faa96d9fb..c9bc63a001 100644
--- a/utf8.c
+++ b/utf8.c
@@ -819,6 +819,19 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
}
+/* Like L</utf8_to_uvchr_buf>(), but should only be called when it is known that
+ * there are no malformations in the input UTF-8 string C<s>. Currently, some
+ * malformations are checked for, but this checking likely will be removed in
+ * the future */
+
+UV
+Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
+{
+ PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
+
+ return utf8_to_uvchr_buf(s, s + UTF8_MAXBYTES, retlen);
+}
+
/*
=for apidoc utf8_to_uvchr
@@ -869,6 +882,19 @@ Perl_utf8_to_uvuni_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
}
+/* Like L</utf8_to_uvuni_buf>(), but should only be called when it is known that
+ * there are no malformations in the input UTF-8 string C<s>. Currently, some
+ * malformations are checked for, but this checking likely will be removed in
+ * the future */
+
+UV
+Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
+{
+ PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI;
+
+ return utf8_to_uvuni_buf(s, s + UTF8_MAXBYTES, retlen);
+}
+
/*
=for apidoc utf8_to_uvuni