diff options
-rw-r--r-- | ext/re/re.pm | 16 | ||||
-rw-r--r-- | ext/re/t/regop.pl | 5 | ||||
-rw-r--r-- | ext/re/t/regop.t | 55 | ||||
-rw-r--r-- | regcomp.c | 19 | ||||
-rw-r--r-- | regcomp.h | 3 |
5 files changed, 89 insertions, 9 deletions
diff --git a/ext/re/re.pm b/ext/re/re.pm index ea7e3d021a..c2d6eed2d1 100644 --- a/ext/re/re.pm +++ b/ext/re/re.pm @@ -4,7 +4,7 @@ package re; use strict; use warnings; -our $VERSION = "0.26"; +our $VERSION = "0.27"; our @ISA = qw(Exporter); our @EXPORT_OK = ('regmust', qw(is_regexp regexp_pattern @@ -57,6 +57,7 @@ my %flags = ( TRIEC => 0x000004, DUMP => 0x000008, FLAGS => 0x000010, + TEST => 0x000020, EXECUTE => 0x00FF00, INTUIT => 0x000100, @@ -396,6 +397,14 @@ Detailed info about trie compilation. Dump the final program out after it is compiled and optimised. +=item FLAGS + +Dump the flags associated with the program + +=item TEST + +Print output intended for testing the internals of the compile process + =back =item Execute related options @@ -448,6 +457,10 @@ Enable debugging of the recursion stack in the engine. Enabling or disabling this option automatically does the same for debugging states as well. This output from this can be quite large. +=item GPOS + +Enable debugging of the \G modifier. + =item OPTIMISEM Enable enhanced optimisation debugging and start-point optimisations. @@ -473,6 +486,7 @@ debug options. Almost definitely only useful to people hacking on the offsets part of the debug engine. + =back =item Other useful flags diff --git a/ext/re/t/regop.pl b/ext/re/t/regop.pl index 961af390c3..86976ee0da 100644 --- a/ext/re/t/regop.pl +++ b/ext/re/t/regop.pl @@ -1,4 +1,4 @@ -use re Debug=>qw(DUMP EXECUTE OFFSETS TRIEC); +use re Debug=>qw(DUMP EXECUTE OFFSETS TRIEC TEST); my @tests=( XY => 'X(A|[B]Q||C|D)Y' , foobar => '[f][o][o][b][a][r]', @@ -7,7 +7,8 @@ my @tests=( 'D:\\dev/perl/ver/28321_/perl.exe'=> '/(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\.WSF|\\.WSH|\\.pyo|\\.pyc|\\.pyw|\\.py)$/i', 'q'=>'[q]', - "path_sep:\t8490" => '^(\\S{1,9}):\\s*(\\d+)$' + "path_sep:\t8490" => '^(\\S{1,9}):\\s*(\\d+)$', + '' => '(?(DEFINE)(?<foo>foo))(?(DEFINE)(?<bar>(?&foo)bar))(?(DEFINE)(?<baz>(?&bar)baz))(?(DEFINE)(?<bop>(?&baz)bop))', ); while (@tests) { my ($str,$pat)=splice @tests,0,2; diff --git a/ext/re/t/regop.t b/ext/re/t/regop.t index 8ed2029c0a..6397d4e5c3 100644 --- a/ext/re/t/regop.t +++ b/ext/re/t/regop.t @@ -55,7 +55,7 @@ foreach my $testout ( @tests ) { # that the tests for this result set are finished. # If you add a test make sure you update $NUM_SECTS # the commented output is just for legacy/debugging purposes -BEGIN{ $NUM_SECTS= 7 } +BEGIN{ $NUM_SECTS= 8 } __END__ #Compiling REx "X(A|[B]Q||C|D)Y" @@ -282,3 +282,56 @@ Freeing REx: "[q]" floating ":" at 1..9 (checking floating) stclass ANYOF[\x{00}-\x{06}\a\b\x{0E}-\x{1F}\x{21}-\x{FF}][{utf8}0100-167F 1681-1FFF 200B-2027 202A-202E 2030-205E 2060-2FFF 3001-INFINITY] anchored(SBOL) minlen 3 %MATCHED% synthetic stclass +--- +#Compiling REx "(?(DEFINE)(?<foo>foo))(?(DEFINE)(?<bar>(?&foo)bar))(?(DEFINE"... +#Got 532 bytes for offset annotations. +study_chunk_recursed_count: 5 +#Final program: +# 1: DEFINEP (3) +# 3: IFTHEN (14) +# 5: OPEN1 'foo' (7) +# 7: EXACT <foo> (9) +# 9: CLOSE1 'foo' (14) +# 11: LONGJMP (13) +# 13: TAIL (14) +# 14: DEFINEP (16) +# 16: IFTHEN (30) +# 18: OPEN2 'bar' (20) +# 20: GOSUB1[-15] (23) +# 23: EXACT <bar> (25) +# 25: CLOSE2 'bar' (30) +# 27: LONGJMP (29) +# 29: TAIL (30) +# 30: DEFINEP (32) +# 32: IFTHEN (46) +# 34: OPEN3 'baz' (36) +# 36: GOSUB2[-18] (39) +# 39: EXACT <baz> (41) +# 41: CLOSE3 'baz' (46) +# 43: LONGJMP (45) +# 45: TAIL (46) +# 46: DEFINEP (48) +# 48: IFTHEN (62) +# 50: OPEN4 'bop' (52) +# 52: GOSUB3[-18] (55) +# 55: EXACT <bop> (57) +# 57: CLOSE4 'bop' (62) +# 59: LONGJMP (61) +# 61: TAIL (62) +# 62: END (0) +minlen 0 +#Offsets: [66] +# 1:3[0] 3:10[0] 5:17[1] 7:18[3] 9:21[1] 11:21[0] 13:22[0] 14:25[0] 16:32[0] 18:39[1] 20:41[3] 23:47[3] 25:50[1] 27:50[0] 29:51[0] 30:54[0] 32:61[0] 34:68[1] 36:70[3] 39:76[3] 41:79[1] 43:79[0] 45:80[0] 46:83[0] 48:90[0] 50:97[1] 52:99[3] 55:105[3] 57:108[1] 59:108[0] 61:109[0] 62:110[0] +#Matching REx "(?(DEFINE)(?<foo>foo))(?(DEFINE)(?<bar>(?&foo)bar))(?(DEFINE"... against "" +# 0 <> <> | 1:DEFINEP(3) +# 0 <> <> | 3:IFTHEN(14) +# 0 <> <> | 14:DEFINEP(16) +# 0 <> <> | 16:IFTHEN(30) +# 0 <> <> | 30:DEFINEP(32) +# 0 <> <> | 32:IFTHEN(46) +# 0 <> <> | 46:DEFINEP(48) +# 0 <> <> | 48:IFTHEN(62) +# 0 <> <> | 62:END(0) +#Match successful! +%MATCHED% +#Freeing REx: "(?(DEFINE)(?<foo>foo))(?(DEFINE)(?<bar>(?&foo)bar))(?(DEFINE"... @@ -171,9 +171,11 @@ struct RExC_state_t { const char *lastparse; I32 lastnum; AV *paren_name_list; /* idx -> name */ + U32 study_chunk_recursed_count; #define RExC_lastparse (pRExC_state->lastparse) #define RExC_lastnum (pRExC_state->lastnum) #define RExC_paren_name_list (pRExC_state->paren_name_list) +#define RExC_study_chunk_recursed_count (pRExC_state->study_chunk_recursed_count) #endif }; @@ -3637,6 +3639,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, fake_study_recurse: + DEBUG_r( + RExC_study_chunk_recursed_count++; + ); while ( scan && OP(scan) != END && scan < last ){ UV min_subtract = 0; /* How mmany chars to subtract from the minimum node length to get a real minimum (because @@ -3646,8 +3651,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, DEBUG_OPTIMISE_MORE_r( { PerlIO_printf(Perl_debug_log, - "%*sstudy_chunk stopparen=%ld depth=%lu recursed_depth=%lu ", + "%*sstudy_chunk stopparen=%ld recursed_count=%lu depth=%lu recursed_depth=%lu ", ((int) depth*2), "", (long)stopparen, + (unsigned long)RExC_study_chunk_recursed_count, (unsigned long)depth, (unsigned long)recursed_depth); if (recursed_depth) { U32 i; @@ -4179,9 +4185,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, * However if we are not in SCF_DO_SUBSTR mode then there is * no point in doing this, and it can cause a serious slowdown. * See RT #122283. - * Note the !is_inf and !is_inf_internal flags may be - * superfluous for this decision, however I am including the - * logic anyway as I am pretty sure it wont cause any harm. * Note also that this was a workaround for the core problem * which was that during compilation logic the excessive * recursion resulted in slowly consuming all the memory on @@ -6798,6 +6801,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, reStudy: r->minlen = minlen = sawlookahead = sawplus = sawopen = sawminmod = 0; + DEBUG_r( + RExC_study_chunk_recursed_count= 0; + ); Zero(r->substrs, 1, struct reg_substr_data); if (RExC_study_chunk_recursed) Zero(RExC_study_chunk_recursed, @@ -7267,7 +7273,10 @@ reStudy: } Newxz(r->offs, RExC_npar, regexp_paren_pair); /* assume we don't need to swap parens around before we match */ - + DEBUG_TEST_r({ + PerlIO_printf(Perl_debug_log,"study_chunk_recursed_count: %lu\n", + RExC_study_chunk_recursed_count); + }); DEBUG_DUMP_r({ DEBUG_RExC_seen(); PerlIO_printf(Perl_debug_log,"Final program:\n"); @@ -856,6 +856,7 @@ re.pm, especially to the documentation. #define RE_DEBUG_COMPILE_TRIE 0x000004 #define RE_DEBUG_COMPILE_DUMP 0x000008 #define RE_DEBUG_COMPILE_FLAGS 0x000010 +#define RE_DEBUG_COMPILE_TEST 0x000020 /* Execute */ #define RE_DEBUG_EXECUTE_MASK 0x00FF00 @@ -891,6 +892,8 @@ re.pm, especially to the documentation. if (re_debug_flags & RE_DEBUG_COMPILE_TRIE) x ) #define DEBUG_FLAGS_r(x) DEBUG_r( \ if (re_debug_flags & RE_DEBUG_COMPILE_FLAGS) x ) +#define DEBUG_TEST_r(x) DEBUG_r( \ + if (re_debug_flags & RE_DEBUG_COMPILE_TEST) x ) /* Execute */ #define DEBUG_EXECUTE_r(x) DEBUG_r( \ if (re_debug_flags & RE_DEBUG_EXECUTE_MASK) x ) |