diff options
author | David Mitchell <davem@iabyn.com> | 2011-12-19 11:33:07 +0000 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2011-12-19 15:06:08 +0000 |
commit | f3539662ce10aed6b074d859a24b8199fb038a70 (patch) | |
tree | edc5612f1dc027e5f50be56bba6f40591b9ecc81 | |
parent | 24b4ab6d00f8e71b56946d5d52a0d885bc4a7df9 (diff) | |
download | perl-f3539662ce10aed6b074d859a24b8199fb038a70.tar.gz |
force recompiling of regex where closures matter
There are some cases where on the second run of a run-time regex, the
text of the pattern hasn't changed, but we should still recompile to
ensure that closure behaviour is correct.
These cases are:
1) run-time code:
my $code = '(??{$x})';
for my $x (1..3) {
$x =~ /$code/; # recompile to see fresh value of $x
}
2) embedded regexes with code:
for my $x (1..3) {
my $r = qr/(??{$x})/;
"A$x" =~ /A$r/; # recompile to see new $r
}
With this fix, all the TODO tests in re/pat_re_eval.t now pass. (Note that
a couple of those TODO tests were actually broken and are fixed in this
commit)
-rw-r--r-- | regcomp.c | 40 | ||||
-rw-r--r-- | t/re/pat_re_eval.t | 51 | ||||
-rw-r--r-- | t/re/reg_eval_scope.t | 4 |
3 files changed, 55 insertions, 40 deletions
@@ -4609,7 +4609,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, bool used_setjump = FALSE; regex_charset initial_charset = get_regex_charset(orig_rx_flags); bool code_is_utf8 = 0; - + bool recompile = 0; U8 jump_ret = 0; dJMPENV; scan_data_t data; @@ -4730,6 +4730,11 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RXi_GET_DECL(((struct regexp*)SvANY(rx)), ri); if (ri->num_code_blocks) { int i; + /* the presence of an embedded qr// with code means + * we should always recompile: the text of the + * qr// may not have changed, but it may be a + * different closure than last time */ + recompile = 1; Renew(pRExC_state->code_blocks, pRExC_state->num_code_blocks + ri->num_code_blocks, struct reg_code_block); @@ -4936,17 +4941,40 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, /* return old regex if pattern hasn't changed */ if ( old_re + && !recompile && !!RX_UTF8(old_re) == !!RExC_utf8 && RX_PRECOMP(old_re) && RX_PRELEN(old_re) == plen && memEQ(RX_PRECOMP(old_re), exp, plen)) { - ReREFCNT_inc(old_re); - if (used_setjump) { - JMPENV_POP; + /* see if there are any run-time code blocks */ + int n = 0; + STRLEN s; + bool runtime = 0; + for (s = 0; s < plen; s++) { + if (n < pRExC_state->num_code_blocks + && s == pRExC_state->code_blocks[n].start) + { + s = pRExC_state->code_blocks[n].end; + n++; + continue; + } + if (exp[s] == '(' && exp[s+1] == '?' && + (exp[s+2] == '{' || (exp[s+2] == '?' && exp[s+3] == '{'))) + { + runtime = 1; + break; + } + } + /* with runtime code, always recompile */ + if (!runtime) { + ReREFCNT_inc(old_re); + if (used_setjump) { + JMPENV_POP; + } + Safefree(pRExC_state->code_blocks); + return old_re; } - Safefree(pRExC_state->code_blocks); - return old_re; } #ifdef TRIE_STUDY_OPT diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t index 83f52a870a..b35f8084d1 100644 --- a/t/re/pat_re_eval.t +++ b/t/re/pat_re_eval.t @@ -359,15 +359,6 @@ sub run_tests { # i.e. when do (?{}) blocks get (re)compiled, and what instances # of lexical vars do they close over? - # XXX remove this when TODOs are fixed - # like ok, but 1st arg indicates TODO - sub tok($$$) { - my $todo = shift; - local $::TODO = 're_eval lexical madness' if $todo; - ok($_[0], $_[1]); - } - - # if the pattern string gets utf8 upgraded while concatenating, # make sure a literal code block is still detected (by still # compiling in the absence of use re 'eval') @@ -398,25 +389,25 @@ sub run_tests { my $code1 = 'B(??{$x})'; my $code1u = $c80 . "\x{100}" . '(??{$x})'; - tok($bc, "AB$x" =~ /^A$code1$/, "[$x] unvarying runtime code AA"); - tok($bc, "A$c80\x{100}$x" =~ /^A$code1u$/, + ok("AB$x" =~ /^A$code1$/, "[$x] unvarying runtime code AA"); + ok("A$c80\x{100}$x" =~ /^A$code1u$/, "[$x] unvarying runtime code AU"); - tok($bc, "$c80\x{100}B$x" =~ /^$c80\x{100}$code1$/, + ok("$c80\x{100}B$x" =~ /^$c80\x{100}$code1$/, "[$x] unvarying runtime code UA"); - tok($bc, "$c80\x{101}$c80\x{100}$x" =~ /^$c80\x{101}$code1u$/, + ok("$c80\x{101}$c80\x{100}$x" =~ /^$c80\x{101}$code1u$/, "[$x] unvarying runtime code UU"); # mixed literal and run-time code blocks my $code2 = 'B(??{$x})'; my $code2u = $c80 . "\x{100}" . '(??{$x})'; - tok($bc, "A$x-B$x" =~ /^A(??{$x})-$code2$/, + ok("A$x-B$x" =~ /^A(??{$x})-$code2$/, "[$x] literal+runtime AA"); - tok($bc, "A$x-$c80\x{100}$x" =~ /^A(??{$x})-$code2u$/, + ok("A$x-$c80\x{100}$x" =~ /^A(??{$x})-$code2u$/, "[$x] literal+runtime AU"); - tok($bc, "$c80\x{100}$x-B$x" =~ /^$c80\x{100}(??{$x})-$code2$/, + ok("$c80\x{100}$x-B$x" =~ /^$c80\x{100}(??{$x})-$code2$/, "[$x] literal+runtime UA"); - tok($bc, "$c80\x{101}$x-$c80\x{100}$x" + ok("$c80\x{101}$x-$c80\x{100}$x" =~ /^$c80\x{101}(??{$x})-$code2u$/, "[$x] literal+runtime UU"); @@ -443,7 +434,7 @@ sub run_tests { use re 'eval'; $cr4 //= qr/C(??{$x})$/; my $code3 = 'A(??{$x})'; - tok(1, "A$x-BCa" =~ /^A$code3-B$cr4/, + ok("A$x-BCa" =~ /^$code3-B$cr4/, "[$x] literal qr once embedded text + run code"); no re 'eval'; @@ -455,12 +446,12 @@ sub run_tests { # literal qr code, embedded with text my $r2 = qr/B(??{$x})$/; - tok($bc, "AB$x" =~ /^A$r2/, "[$x] literal qr embedded text"); + ok("AB$x" =~ /^A$r2/, "[$x] literal qr embedded text"); # literal qr code, embedded with text + lit code my $r3 = qr/C(??{$x})$/; - tok($bc, "A$x-BC$x" =~ /^A(??{$x})-B$r3/, + ok("A$x-BC$x" =~ /^A(??{$x})-B$r3/, "[$x] literal qr embedded text + lit code"); # literal qr code, embedded with text + run code @@ -468,16 +459,16 @@ sub run_tests { no re 'eval'; my $r4 = qr/C(??{$x})$/; my $code4 = '(??{$x})'; - tok($bc, "A$x-BC$x" =~ /^A$code4-B$r4/, + ok("A$x-BC$x" =~ /^A$code4-B$r4/, "[$x] literal qr embedded text + run code"); use re 'eval'; # nested qr in different scopes my $code5 = '(??{$x})'; - my $r5 = qr/C(??{$x})$/; + my $r5 = qr/C(??{$x})/; use re 'eval'; - my $r6 = qr/$code5-C(??{$x})$/; + my $r6 = qr/$code5-C(??{$x})/; no re 'eval'; my @rr5; @@ -487,12 +478,12 @@ sub run_tests { my $rr5 = qr/^A(??{"$x$y"})-$r5/; push @rr5, $rr5; - tok($bc, "A$x$y-C$x" =~ $rr5, + ok("A$x$y-C$x" =~ $rr5, "[$x-$y] literal qr + r5"); my $rr6 = qr/^A(??{"$x$y"})-$r6/; push @rr6, $rr6; - tok($bc, "A$x$y-$x-C$x" =~ $rr6, + ok("A$x$y-$x-C$x" =~ $rr6, "[$x-$y] literal qr + r6"); } @@ -500,16 +491,16 @@ sub run_tests { my $y = 'Y'; my $yy = (qw(d e f))[$i]; my $rr5 = $rr5[$i]; - tok($bc, "A$x$yy-C$x" =~ $rr5, - "[$x-$yy] literal qr + r5, outside"); - tok(1, "A$x$yy-C$x-D$x" =~ /$rr5-D(??{$x})/, + ok("A$x$yy-C$x" =~ $rr5, "[$x-$yy] literal qr + r5, outside"); + ok("A$x$yy-C$x-D$x" =~ /$rr5-D(??{$x})$/, "[$x-$yy] literal qr + r5 + lit, outside"); + my $rr6 = $rr6[$i]; push @rr6, $rr6; - tok($bc, "A$x$yy-$x-C$x" =~ $rr6, + ok("A$x$yy-$x-C$x" =~ $rr6, "[$x-$yy] literal qr + r6, outside"); - tok(1, "A$x$yy-$x-C$x-D$x" =~ /$rr6-D(??{$x})/, + ok("A$x$yy-$x-C$x-D$x" =~ /$rr6-D(??{$x})/, "[$x-$yy] literal qr + r6 +lit, outside"); } } diff --git a/t/re/reg_eval_scope.t b/t/re/reg_eval_scope.t index 51d41ec988..f9a62948d9 100644 --- a/t/re/reg_eval_scope.t +++ b/t/re/reg_eval_scope.t @@ -84,8 +84,6 @@ fresh_perl_is <<'CODE', '178279371047857967101745', {}, CODE 'multiple (?{})s in "foo" =~ /$string/x'; -on; - fresh_perl_is <<'CODE', '123123', {}, for my $x(1..3) { push @regexps, qr/(?{ print $x })a/; @@ -95,8 +93,6 @@ fresh_perl_is <<'CODE', '123123', {}, CODE 'qr/(?{})/ is a closure'; -off; - "a" =~ do { package foo; qr/(?{ $::pack = __PACKAGE__ })a/ }; is $pack, 'foo', 'qr// inherits package'; "a" =~ do { use re "/x"; qr/(?{ $::re = qr-- })a/ }; |