From 33756530b5c7b031069d47839f8132f4574d2f50 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Sun, 26 Nov 2017 17:06:44 -0700
Subject: Use is_utf8_invariant_string() more

Now that this function was changed to do word-at-a time searching in
commit e17544a60909ed9555c0dad7cd24afc40eb736e7, we can more quickly
find the first variant byte in a string, if any.  Given that a lot of
usage of Perl is on ASCII data, it makes sense to try this first before
any byte-at-a-time processing.

Since Perl can be used on things that are mostly non-ASCII, we give up
at the first such one, and process the rest of the string byte-by-byte.
Otherwise we could have a pipeline of finding the next variant quickly,
but this would only be faster if variants were rare, which I don't feel
we can be confident about, after finding at least one.
---
 embed.fnc | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'embed.fnc')

diff --git a/embed.fnc b/embed.fnc
index 6f10fa8c78..d174290aae 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -783,7 +783,7 @@ AnidR	|bool	|is_utf8_invariant_string_loc|NN const U8* const s	    \
 		|STRLEN len						    \
 		|NULLOK const U8 ** ep
 AmnpdRP	|bool	|is_ascii_string|NN const U8* const s|const STRLEN len
-AmnpdRP	|bool	|is_invariant_string|NN const U8* const s|const STRLEN len
+AmnpdRP	|bool	|is_invariant_string|NN const U8* const s|STRLEN len
 #if defined(PERL_CORE) || defined (PERL_EXT)
 EXnidR	|bool	|is_utf8_non_invariant_string|NN const U8* const s	    \
 		|STRLEN len
@@ -796,14 +796,14 @@ AnidR	|bool	|is_utf8_string_flags					    \
 AnmdpR	|bool	|is_strict_utf8_string|NN const U8 *s|STRLEN len
 AnmdpR	|bool	|is_c9strict_utf8_string|NN const U8 *s|STRLEN len
 Anpdmb	|bool	|is_utf8_string_loc					    \
-		|NN const U8 *s|const STRLEN len|NN const U8 **ep
+		|NN const U8 *s|STRLEN len|NN const U8 **ep
 Andm	|bool	|is_utf8_string_loc_flags				    \
 		|NN const U8 *s|STRLEN len|NN const U8 **ep		    \
 		|const U32 flags
 Andm	|bool	|is_strict_utf8_string_loc				    \
-		|NN const U8 *s|const STRLEN len|NN const U8 **ep
+		|NN const U8 *s|STRLEN len|NN const U8 **ep
 Andm	|bool	|is_c9strict_utf8_string_loc				    \
-		|NN const U8 *s|const STRLEN len|NN const U8 **ep
+		|NN const U8 *s|STRLEN len|NN const U8 **ep
 Anipd	|bool	|is_utf8_string_loclen					    \
 		|NN const U8 *s|STRLEN len|NULLOK const U8 **ep		    \
 		|NULLOK STRLEN *el
@@ -811,18 +811,18 @@ Anid	|bool	|is_utf8_string_loclen_flags				    \
 		|NN const U8 *s|STRLEN len|NULLOK const U8 **ep		    \
 		|NULLOK STRLEN *el|const U32 flags
 Anid	|bool	|is_strict_utf8_string_loclen				    \
-		|NN const U8 *s|const STRLEN len|NULLOK const U8 **ep	    \
+		|NN const U8 *s|STRLEN len|NULLOK const U8 **ep	    \
 		|NULLOK STRLEN *el
 Anid	|bool	|is_c9strict_utf8_string_loclen				    \
-		|NN const U8 *s|const STRLEN len|NULLOK const U8 **ep	    \
+		|NN const U8 *s|STRLEN len|NULLOK const U8 **ep	    \
 		|NULLOK STRLEN *el
 Amnd	|bool	|is_utf8_fixed_width_buf_flags				    \
-		|NN const U8 * const s|const STRLEN len|const U32 flags
+		|NN const U8 * const s|STRLEN len|const U32 flags
 Amnd	|bool	|is_utf8_fixed_width_buf_loc_flags			    \
-		|NN const U8 * const s|const STRLEN len			    \
+		|NN const U8 * const s|STRLEN len			    \
 		|NULLOK const U8 **ep|const U32 flags
 Anid	|bool	|is_utf8_fixed_width_buf_loclen_flags			    \
-		|NN const U8 * const s|const STRLEN len			    \
+		|NN const U8 * const s|STRLEN len			    \
 		|NULLOK const U8 **ep|NULLOK STRLEN *el|const U32 flags
 AmndP	|bool	|is_utf8_valid_partial_char				    \
 		|NN const U8 * const s|NN const U8 * const e
-- 
cgit v1.2.1