From 33756530b5c7b031069d47839f8132f4574d2f50 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sun, 26 Nov 2017 17:06:44 -0700 Subject: Use is_utf8_invariant_string() more Now that this function was changed to do word-at-a time searching in commit e17544a60909ed9555c0dad7cd24afc40eb736e7, we can more quickly find the first variant byte in a string, if any. Given that a lot of usage of Perl is on ASCII data, it makes sense to try this first before any byte-at-a-time processing. Since Perl can be used on things that are mostly non-ASCII, we give up at the first such one, and process the rest of the string byte-by-byte. Otherwise we could have a pipeline of finding the next variant quickly, but this would only be faster if variants were rare, which I don't feel we can be confident about, after finding at least one. --- embed.fnc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'embed.fnc') diff --git a/embed.fnc b/embed.fnc index 6f10fa8c78..d174290aae 100644 --- a/embed.fnc +++ b/embed.fnc @@ -783,7 +783,7 @@ AnidR |bool |is_utf8_invariant_string_loc|NN const U8* const s \ |STRLEN len \ |NULLOK const U8 ** ep AmnpdRP |bool |is_ascii_string|NN const U8* const s|const STRLEN len -AmnpdRP |bool |is_invariant_string|NN const U8* const s|const STRLEN len +AmnpdRP |bool |is_invariant_string|NN const U8* const s|STRLEN len #if defined(PERL_CORE) || defined (PERL_EXT) EXnidR |bool |is_utf8_non_invariant_string|NN const U8* const s \ |STRLEN len @@ -796,14 +796,14 @@ AnidR |bool |is_utf8_string_flags \ AnmdpR |bool |is_strict_utf8_string|NN const U8 *s|STRLEN len AnmdpR |bool |is_c9strict_utf8_string|NN const U8 *s|STRLEN len Anpdmb |bool |is_utf8_string_loc \ - |NN const U8 *s|const STRLEN len|NN const U8 **ep + |NN const U8 *s|STRLEN len|NN const U8 **ep Andm |bool |is_utf8_string_loc_flags \ |NN const U8 *s|STRLEN len|NN const U8 **ep \ |const U32 flags Andm |bool |is_strict_utf8_string_loc \ - |NN const U8 *s|const STRLEN len|NN const U8 **ep + |NN const U8 *s|STRLEN len|NN const U8 **ep Andm |bool |is_c9strict_utf8_string_loc \ - |NN const U8 *s|const STRLEN len|NN const U8 **ep + |NN const U8 *s|STRLEN len|NN const U8 **ep Anipd |bool |is_utf8_string_loclen \ |NN const U8 *s|STRLEN len|NULLOK const U8 **ep \ |NULLOK STRLEN *el @@ -811,18 +811,18 @@ Anid |bool |is_utf8_string_loclen_flags \ |NN const U8 *s|STRLEN len|NULLOK const U8 **ep \ |NULLOK STRLEN *el|const U32 flags Anid |bool |is_strict_utf8_string_loclen \ - |NN const U8 *s|const STRLEN len|NULLOK const U8 **ep \ + |NN const U8 *s|STRLEN len|NULLOK const U8 **ep \ |NULLOK STRLEN *el Anid |bool |is_c9strict_utf8_string_loclen \ - |NN const U8 *s|const STRLEN len|NULLOK const U8 **ep \ + |NN const U8 *s|STRLEN len|NULLOK const U8 **ep \ |NULLOK STRLEN *el Amnd |bool |is_utf8_fixed_width_buf_flags \ - |NN const U8 * const s|const STRLEN len|const U32 flags + |NN const U8 * const s|STRLEN len|const U32 flags Amnd |bool |is_utf8_fixed_width_buf_loc_flags \ - |NN const U8 * const s|const STRLEN len \ + |NN const U8 * const s|STRLEN len \ |NULLOK const U8 **ep|const U32 flags Anid |bool |is_utf8_fixed_width_buf_loclen_flags \ - |NN const U8 * const s|const STRLEN len \ + |NN const U8 * const s|STRLEN len \ |NULLOK const U8 **ep|NULLOK STRLEN *el|const U32 flags AmndP |bool |is_utf8_valid_partial_char \ |NN const U8 * const s|NN const U8 * const e -- cgit v1.2.1