diff options
author | Karl Williamson <khw@cpan.org> | 2017-11-26 17:06:44 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2017-11-27 14:48:45 -0700 |
commit | 33756530b5c7b031069d47839f8132f4574d2f50 (patch) | |
tree | 3a30b3b8f31aacdbd2a9c6cb03ffac91c315e835 /embed.fnc | |
parent | b2e7ed74dcabdba63e3e8e2ff1980e1cd109b869 (diff) | |
download | perl-33756530b5c7b031069d47839f8132f4574d2f50.tar.gz |
Use is_utf8_invariant_string() more
Now that this function was changed to do word-at-a time searching in
commit e17544a60909ed9555c0dad7cd24afc40eb736e7, we can more quickly
find the first variant byte in a string, if any. Given that a lot of
usage of Perl is on ASCII data, it makes sense to try this first before
any byte-at-a-time processing.
Since Perl can be used on things that are mostly non-ASCII, we give up
at the first such one, and process the rest of the string byte-by-byte.
Otherwise we could have a pipeline of finding the next variant quickly,
but this would only be faster if variants were rare, which I don't feel
we can be confident about, after finding at least one.
Diffstat (limited to 'embed.fnc')
-rw-r--r-- | embed.fnc | 18 |
1 files changed, 9 insertions, 9 deletions
@@ -783,7 +783,7 @@ AnidR |bool |is_utf8_invariant_string_loc|NN const U8* const s \ |STRLEN len \ |NULLOK const U8 ** ep AmnpdRP |bool |is_ascii_string|NN const U8* const s|const STRLEN len -AmnpdRP |bool |is_invariant_string|NN const U8* const s|const STRLEN len +AmnpdRP |bool |is_invariant_string|NN const U8* const s|STRLEN len #if defined(PERL_CORE) || defined (PERL_EXT) EXnidR |bool |is_utf8_non_invariant_string|NN const U8* const s \ |STRLEN len @@ -796,14 +796,14 @@ AnidR |bool |is_utf8_string_flags \ AnmdpR |bool |is_strict_utf8_string|NN const U8 *s|STRLEN len AnmdpR |bool |is_c9strict_utf8_string|NN const U8 *s|STRLEN len Anpdmb |bool |is_utf8_string_loc \ - |NN const U8 *s|const STRLEN len|NN const U8 **ep + |NN const U8 *s|STRLEN len|NN const U8 **ep Andm |bool |is_utf8_string_loc_flags \ |NN const U8 *s|STRLEN len|NN const U8 **ep \ |const U32 flags Andm |bool |is_strict_utf8_string_loc \ - |NN const U8 *s|const STRLEN len|NN const U8 **ep + |NN const U8 *s|STRLEN len|NN const U8 **ep Andm |bool |is_c9strict_utf8_string_loc \ - |NN const U8 *s|const STRLEN len|NN const U8 **ep + |NN const U8 *s|STRLEN len|NN const U8 **ep Anipd |bool |is_utf8_string_loclen \ |NN const U8 *s|STRLEN len|NULLOK const U8 **ep \ |NULLOK STRLEN *el @@ -811,18 +811,18 @@ Anid |bool |is_utf8_string_loclen_flags \ |NN const U8 *s|STRLEN len|NULLOK const U8 **ep \ |NULLOK STRLEN *el|const U32 flags Anid |bool |is_strict_utf8_string_loclen \ - |NN const U8 *s|const STRLEN len|NULLOK const U8 **ep \ + |NN const U8 *s|STRLEN len|NULLOK const U8 **ep \ |NULLOK STRLEN *el Anid |bool |is_c9strict_utf8_string_loclen \ - |NN const U8 *s|const STRLEN len|NULLOK const U8 **ep \ + |NN const U8 *s|STRLEN len|NULLOK const U8 **ep \ |NULLOK STRLEN *el Amnd |bool |is_utf8_fixed_width_buf_flags \ - |NN const U8 * const s|const STRLEN len|const U32 flags + |NN const U8 * const s|STRLEN len|const U32 flags Amnd |bool |is_utf8_fixed_width_buf_loc_flags \ - |NN const U8 * const s|const STRLEN len \ + |NN const U8 * const s|STRLEN len \ |NULLOK const U8 **ep|const U32 flags Anid |bool |is_utf8_fixed_width_buf_loclen_flags \ - |NN const U8 * const s|const STRLEN len \ + |NN const U8 * const s|STRLEN len \ |NULLOK const U8 **ep|NULLOK STRLEN *el|const U32 flags AmndP |bool |is_utf8_valid_partial_char \ |NN const U8 * const s|NN const U8 * const e |