summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-10-13 19:56:45 -0600
committerKarl Williamson <public@khwilliamson.com>2012-03-22 22:14:24 -0600
commit1118b943c2eac15b58b91de9ffbe015c923e4348 (patch)
tree2be2931528bea44ee5eda6c55659e8c046c9c911
parent053ee270675e7fb9805977318b3bbdc9ce4c4ccd (diff)
downloadperl-1118b943c2eac15b58b91de9ffbe015c923e4348.tar.gz
regexec.c: Fix "\x{FB01}\x{FB00}" =~ /ff/i
Only the first character of the string was being checked when scanning for the beginning position of the pattern match. This was so wrong, it looks like it has to be a regression. I experimented a little and did not find any. I believe (but am not certain) that a multi-char fold has to be involved. The the handling of these was so broken before 5.14 that there very well may not be a regression.
-rw-r--r--pod/perldelta.pod7
-rw-r--r--regexec.c3
-rw-r--r--t/re/re_tests6
3 files changed, 15 insertions, 1 deletions
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 1f1d4bd2bc..304f70a82f 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -315,6 +315,13 @@ pattern is in UTF-8, the target string is not, and a Latin-1 character
precedes a character in the string that should match the pattern. [perl
#101710]
+=item *
+
+In case-insensitive regular expression pattern matching, no longer on
+UTF-8 encoded strings does the scan for the start of match only look at
+the first possible position. This caused matches such as
+C<"f\x{FB00}" =~ /ff/i> to fail.
+
=back
=head1 Known Problems
diff --git a/regexec.c b/regexec.c
index 2354be1f9f..021ab8e65e 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1507,7 +1507,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
? utf8_length((U8 *) pat_string, (U8 *) pat_end)
: ln;
- e = HOP3c(strend, -((I32)lnc), s);
+ /* Set the end position to the final character available */
+ e = HOP3c(strend, -1, s);
if (!reginfo && e < s) {
e = s; /* Due to minlen logic of intuit() */
diff --git a/t/re/re_tests b/t/re/re_tests
index 35a72203cd..ae124522f8 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1522,4 +1522,10 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer
# See [perl #89750]. This makes sure that the simple fold gets generated
# in that case, to DF.
/[^\x{1E9E}]/i \x{DF} n - -
+
+/ff/i \x{FB00}\x{FB01} y $& \x{FB00}
+/ff/i \x{FB01}\x{FB00} y $& \x{FB00}
+/fi/i \x{FB01}\x{FB00} y $& \x{FB01}
+/fi/i \x{FB00}\x{FB01} y $& \x{FB01}
+
# vim: softtabstop=0 noexpandtab