summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2011-12-19 11:33:07 +0000
committerDavid Mitchell <davem@iabyn.com>2011-12-19 15:06:08 +0000
commitf3539662ce10aed6b074d859a24b8199fb038a70 (patch)
treeedc5612f1dc027e5f50be56bba6f40591b9ecc81
parent24b4ab6d00f8e71b56946d5d52a0d885bc4a7df9 (diff)
downloadperl-f3539662ce10aed6b074d859a24b8199fb038a70.tar.gz
force recompiling of regex where closures matter
There are some cases where on the second run of a run-time regex, the text of the pattern hasn't changed, but we should still recompile to ensure that closure behaviour is correct. These cases are: 1) run-time code: my $code = '(??{$x})'; for my $x (1..3) { $x =~ /$code/; # recompile to see fresh value of $x } 2) embedded regexes with code: for my $x (1..3) { my $r = qr/(??{$x})/; "A$x" =~ /A$r/; # recompile to see new $r } With this fix, all the TODO tests in re/pat_re_eval.t now pass. (Note that a couple of those TODO tests were actually broken and are fixed in this commit)
-rw-r--r--regcomp.c40
-rw-r--r--t/re/pat_re_eval.t51
-rw-r--r--t/re/reg_eval_scope.t4
3 files changed, 55 insertions, 40 deletions
diff --git a/regcomp.c b/regcomp.c
index 3717662677..95776a1986 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4609,7 +4609,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
bool used_setjump = FALSE;
regex_charset initial_charset = get_regex_charset(orig_rx_flags);
bool code_is_utf8 = 0;
-
+ bool recompile = 0;
U8 jump_ret = 0;
dJMPENV;
scan_data_t data;
@@ -4730,6 +4730,11 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
RXi_GET_DECL(((struct regexp*)SvANY(rx)), ri);
if (ri->num_code_blocks) {
int i;
+ /* the presence of an embedded qr// with code means
+ * we should always recompile: the text of the
+ * qr// may not have changed, but it may be a
+ * different closure than last time */
+ recompile = 1;
Renew(pRExC_state->code_blocks,
pRExC_state->num_code_blocks + ri->num_code_blocks,
struct reg_code_block);
@@ -4936,17 +4941,40 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
/* return old regex if pattern hasn't changed */
if ( old_re
+ && !recompile
&& !!RX_UTF8(old_re) == !!RExC_utf8
&& RX_PRECOMP(old_re)
&& RX_PRELEN(old_re) == plen
&& memEQ(RX_PRECOMP(old_re), exp, plen))
{
- ReREFCNT_inc(old_re);
- if (used_setjump) {
- JMPENV_POP;
+ /* see if there are any run-time code blocks */
+ int n = 0;
+ STRLEN s;
+ bool runtime = 0;
+ for (s = 0; s < plen; s++) {
+ if (n < pRExC_state->num_code_blocks
+ && s == pRExC_state->code_blocks[n].start)
+ {
+ s = pRExC_state->code_blocks[n].end;
+ n++;
+ continue;
+ }
+ if (exp[s] == '(' && exp[s+1] == '?' &&
+ (exp[s+2] == '{' || (exp[s+2] == '?' && exp[s+3] == '{')))
+ {
+ runtime = 1;
+ break;
+ }
+ }
+ /* with runtime code, always recompile */
+ if (!runtime) {
+ ReREFCNT_inc(old_re);
+ if (used_setjump) {
+ JMPENV_POP;
+ }
+ Safefree(pRExC_state->code_blocks);
+ return old_re;
}
- Safefree(pRExC_state->code_blocks);
- return old_re;
}
#ifdef TRIE_STUDY_OPT
diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t
index 83f52a870a..b35f8084d1 100644
--- a/t/re/pat_re_eval.t
+++ b/t/re/pat_re_eval.t
@@ -359,15 +359,6 @@ sub run_tests {
# i.e. when do (?{}) blocks get (re)compiled, and what instances
# of lexical vars do they close over?
- # XXX remove this when TODOs are fixed
- # like ok, but 1st arg indicates TODO
- sub tok($$$) {
- my $todo = shift;
- local $::TODO = 're_eval lexical madness' if $todo;
- ok($_[0], $_[1]);
- }
-
-
# if the pattern string gets utf8 upgraded while concatenating,
# make sure a literal code block is still detected (by still
# compiling in the absence of use re 'eval')
@@ -398,25 +389,25 @@ sub run_tests {
my $code1 = 'B(??{$x})';
my $code1u = $c80 . "\x{100}" . '(??{$x})';
- tok($bc, "AB$x" =~ /^A$code1$/, "[$x] unvarying runtime code AA");
- tok($bc, "A$c80\x{100}$x" =~ /^A$code1u$/,
+ ok("AB$x" =~ /^A$code1$/, "[$x] unvarying runtime code AA");
+ ok("A$c80\x{100}$x" =~ /^A$code1u$/,
"[$x] unvarying runtime code AU");
- tok($bc, "$c80\x{100}B$x" =~ /^$c80\x{100}$code1$/,
+ ok("$c80\x{100}B$x" =~ /^$c80\x{100}$code1$/,
"[$x] unvarying runtime code UA");
- tok($bc, "$c80\x{101}$c80\x{100}$x" =~ /^$c80\x{101}$code1u$/,
+ ok("$c80\x{101}$c80\x{100}$x" =~ /^$c80\x{101}$code1u$/,
"[$x] unvarying runtime code UU");
# mixed literal and run-time code blocks
my $code2 = 'B(??{$x})';
my $code2u = $c80 . "\x{100}" . '(??{$x})';
- tok($bc, "A$x-B$x" =~ /^A(??{$x})-$code2$/,
+ ok("A$x-B$x" =~ /^A(??{$x})-$code2$/,
"[$x] literal+runtime AA");
- tok($bc, "A$x-$c80\x{100}$x" =~ /^A(??{$x})-$code2u$/,
+ ok("A$x-$c80\x{100}$x" =~ /^A(??{$x})-$code2u$/,
"[$x] literal+runtime AU");
- tok($bc, "$c80\x{100}$x-B$x" =~ /^$c80\x{100}(??{$x})-$code2$/,
+ ok("$c80\x{100}$x-B$x" =~ /^$c80\x{100}(??{$x})-$code2$/,
"[$x] literal+runtime UA");
- tok($bc, "$c80\x{101}$x-$c80\x{100}$x"
+ ok("$c80\x{101}$x-$c80\x{100}$x"
=~ /^$c80\x{101}(??{$x})-$code2u$/,
"[$x] literal+runtime UU");
@@ -443,7 +434,7 @@ sub run_tests {
use re 'eval';
$cr4 //= qr/C(??{$x})$/;
my $code3 = 'A(??{$x})';
- tok(1, "A$x-BCa" =~ /^A$code3-B$cr4/,
+ ok("A$x-BCa" =~ /^$code3-B$cr4/,
"[$x] literal qr once embedded text + run code");
no re 'eval';
@@ -455,12 +446,12 @@ sub run_tests {
# literal qr code, embedded with text
my $r2 = qr/B(??{$x})$/;
- tok($bc, "AB$x" =~ /^A$r2/, "[$x] literal qr embedded text");
+ ok("AB$x" =~ /^A$r2/, "[$x] literal qr embedded text");
# literal qr code, embedded with text + lit code
my $r3 = qr/C(??{$x})$/;
- tok($bc, "A$x-BC$x" =~ /^A(??{$x})-B$r3/,
+ ok("A$x-BC$x" =~ /^A(??{$x})-B$r3/,
"[$x] literal qr embedded text + lit code");
# literal qr code, embedded with text + run code
@@ -468,16 +459,16 @@ sub run_tests {
no re 'eval';
my $r4 = qr/C(??{$x})$/;
my $code4 = '(??{$x})';
- tok($bc, "A$x-BC$x" =~ /^A$code4-B$r4/,
+ ok("A$x-BC$x" =~ /^A$code4-B$r4/,
"[$x] literal qr embedded text + run code");
use re 'eval';
# nested qr in different scopes
my $code5 = '(??{$x})';
- my $r5 = qr/C(??{$x})$/;
+ my $r5 = qr/C(??{$x})/;
use re 'eval';
- my $r6 = qr/$code5-C(??{$x})$/;
+ my $r6 = qr/$code5-C(??{$x})/;
no re 'eval';
my @rr5;
@@ -487,12 +478,12 @@ sub run_tests {
my $rr5 = qr/^A(??{"$x$y"})-$r5/;
push @rr5, $rr5;
- tok($bc, "A$x$y-C$x" =~ $rr5,
+ ok("A$x$y-C$x" =~ $rr5,
"[$x-$y] literal qr + r5");
my $rr6 = qr/^A(??{"$x$y"})-$r6/;
push @rr6, $rr6;
- tok($bc, "A$x$y-$x-C$x" =~ $rr6,
+ ok("A$x$y-$x-C$x" =~ $rr6,
"[$x-$y] literal qr + r6");
}
@@ -500,16 +491,16 @@ sub run_tests {
my $y = 'Y';
my $yy = (qw(d e f))[$i];
my $rr5 = $rr5[$i];
- tok($bc, "A$x$yy-C$x" =~ $rr5,
- "[$x-$yy] literal qr + r5, outside");
- tok(1, "A$x$yy-C$x-D$x" =~ /$rr5-D(??{$x})/,
+ ok("A$x$yy-C$x" =~ $rr5, "[$x-$yy] literal qr + r5, outside");
+ ok("A$x$yy-C$x-D$x" =~ /$rr5-D(??{$x})$/,
"[$x-$yy] literal qr + r5 + lit, outside");
+
my $rr6 = $rr6[$i];
push @rr6, $rr6;
- tok($bc, "A$x$yy-$x-C$x" =~ $rr6,
+ ok("A$x$yy-$x-C$x" =~ $rr6,
"[$x-$yy] literal qr + r6, outside");
- tok(1, "A$x$yy-$x-C$x-D$x" =~ /$rr6-D(??{$x})/,
+ ok("A$x$yy-$x-C$x-D$x" =~ /$rr6-D(??{$x})/,
"[$x-$yy] literal qr + r6 +lit, outside");
}
}
diff --git a/t/re/reg_eval_scope.t b/t/re/reg_eval_scope.t
index 51d41ec988..f9a62948d9 100644
--- a/t/re/reg_eval_scope.t
+++ b/t/re/reg_eval_scope.t
@@ -84,8 +84,6 @@ fresh_perl_is <<'CODE', '178279371047857967101745', {},
CODE
'multiple (?{})s in "foo" =~ /$string/x';
-on;
-
fresh_perl_is <<'CODE', '123123', {},
for my $x(1..3) {
push @regexps, qr/(?{ print $x })a/;
@@ -95,8 +93,6 @@ fresh_perl_is <<'CODE', '123123', {},
CODE
'qr/(?{})/ is a closure';
-off;
-
"a" =~ do { package foo; qr/(?{ $::pack = __PACKAGE__ })a/ };
is $pack, 'foo', 'qr// inherits package';
"a" =~ do { use re "/x"; qr/(?{ $::re = qr-- })a/ };