summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2015-03-17 15:43:10 +0000
committerDavid Mitchell <davem@iabyn.com>2015-03-17 16:19:52 +0000
commit88203927dfd53d8b5cfca0639c2ed67e4bbe39d2 (patch)
treec49d4ecaee5e3ef5bf4ce62b0214fac17a09a7b3
parent675e93ee6690903702e1998eb285f88dccc3a8ae (diff)
downloadperl-88203927dfd53d8b5cfca0639c2ed67e4bbe39d2.tar.gz
avoid infinite loop in re_intuit_start()
On STCLASS failure, sometimes we go back and retry an anchored search if it's still within the range of the previously successful floating search. The 'in range' criterion is formally that rx_origin + float_offset_min chars < previous float match position i.e. when we match the float string, the start of the rx must be somewhere within float_offset_max..float_offset_min chars before that. If we haven't yet reached rx_origin+float_offset_min, then there are still some candidate starting positions for the rx that don't violate the float constraint. However, we do the above calculation in bytes rather than chars for efficiency (if float_offset_min is large, we could otherwise end up doing O^2 scans of the string). This is conservative and harmless because at worst we end up doing an unnecessary (but safe) fixed string scan. However, it turns out that the 'other' code block that calls fbm_instr() didn't check for the start of scn range being greater than the end; in this case, for '$' type matches, it would still match. So skip calling fbm_instr() if start > end.
-rw-r--r--regexec.c40
-rw-r--r--t/re/re_tests4
2 files changed, 30 insertions, 14 deletions
diff --git a/regexec.c b/regexec.c
index e61877ad05..6aa0da1614 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1051,18 +1051,28 @@ Perl_re_intuit_start(pTHX_
char *from = s;
char *to = last + SvCUR(must) - (SvTAIL(must)!=0);
- s = fbm_instr(
- (unsigned char*)from,
- (unsigned char*)to,
- must,
- multiline ? FBMrf_MULTILINE : 0
- );
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
- " doing 'other' fbm scan, [%"IVdf"..%"IVdf"] gave %"IVdf"\n",
- (IV)(from - strbeg),
- (IV)(to - strbeg),
- (IV)(s ? s - strbeg : -1)
- ));
+ if (from > to) {
+ s = NULL;
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
+ " skipping 'other' fbm scan: %"IVdf" > %"IVdf"\n",
+ (IV)(from - strbeg),
+ (IV)(to - strbeg)
+ ));
+ }
+ else {
+ s = fbm_instr(
+ (unsigned char*)from,
+ (unsigned char*)to,
+ must,
+ multiline ? FBMrf_MULTILINE : 0
+ );
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
+ " doing 'other' fbm scan, [%"IVdf"..%"IVdf"] gave %"IVdf"\n",
+ (IV)(from - strbeg),
+ (IV)(to - strbeg),
+ (IV)(s ? s - strbeg : -1)
+ ));
+ }
}
DEBUG_EXECUTE_r({
@@ -1305,8 +1315,10 @@ Perl_re_intuit_start(pTHX_
* The condition above is in bytes rather than
* chars for efficiency. It's conservative, in
* that it errs on the side of doing 'goto
- * do_other_substr', where a more accurate
- * char-based calculation will be done */
+ * do_other_substr'. In this case, at worst,
+ * an extra anchored search may get done, but in
+ * practice the extra fbm_instr() is likely to
+ * get skipped anyway. */
DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
" about to retry anchored at offset %ld (rx_origin now %"IVdf")...\n",
(long)(other_last - strbeg),
diff --git a/t/re/re_tests b/t/re/re_tests
index 62ebc4a4c2..89c0dc19e1 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1918,5 +1918,9 @@ A+(*PRUNE)BC(?{}) AAABC y $& AAABC
/[a-z]/i \N{KELVIN SIGN} y $& \N{KELVIN SIGN}
/[A-Z]/i \N{LATIN SMALL LETTER LONG S} y $& \N{LATIN SMALL LETTER LONG S}
+# RT #123840: these used to hang in re_intuit_start
+/w\zxy?\z/i \x{100}a\x{80}a n - -
+/w\z\R\z/i \x{100}a\x{80}a n - -
+
# Keep these lines at the end of the file
# vim: softtabstop=0 noexpandtab