diff options
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | doc/pcreapi.3 | 2 | ||||
-rw-r--r-- | pcre.h.in | 9 | ||||
-rw-r--r-- | pcre_byte_order.c | 19 | ||||
-rw-r--r-- | pcre_compile.c | 100 | ||||
-rw-r--r-- | pcre_exec.c | 35 | ||||
-rw-r--r-- | pcre_fullinfo.c | 12 | ||||
-rw-r--r-- | pcre_internal.h | 153 | ||||
-rw-r--r-- | pcretest.c | 45 | ||||
-rw-r--r-- | testdata/saved16 | bin | 70 -> 86 bytes | |||
-rw-r--r-- | testdata/saved16BE-1 | bin | 402 -> 410 bytes | |||
-rw-r--r-- | testdata/saved16BE-2 | bin | 336 -> 344 bytes | |||
-rw-r--r-- | testdata/saved16LE-1 | bin | 402 -> 410 bytes | |||
-rw-r--r-- | testdata/saved16LE-2 | bin | 336 -> 344 bytes | |||
-rw-r--r-- | testdata/saved32 | bin | 100 -> 108 bytes | |||
-rw-r--r-- | testdata/saved32BE-1 | bin | 544 -> 552 bytes | |||
-rw-r--r-- | testdata/saved32BE-2 | bin | 448 -> 456 bytes | |||
-rw-r--r-- | testdata/saved32LE-1 | bin | 544 -> 552 bytes | |||
-rw-r--r-- | testdata/saved32LE-2 | bin | 448 -> 456 bytes | |||
-rw-r--r-- | testdata/saved8 | bin | 61 -> 77 bytes | |||
-rw-r--r-- | testdata/testinput2 | 35 | ||||
-rw-r--r-- | testdata/testinput21 | 12 | ||||
-rw-r--r-- | testdata/testinput22 | 7 | ||||
-rw-r--r-- | testdata/testoutput18-16 | 4 | ||||
-rw-r--r-- | testdata/testoutput18-32 | 6 | ||||
-rw-r--r-- | testdata/testoutput2 | 149 | ||||
-rw-r--r-- | testdata/testoutput21-16 | 12 | ||||
-rw-r--r-- | testdata/testoutput21-32 | 12 | ||||
-rw-r--r-- | testdata/testoutput22-16 | 7 | ||||
-rw-r--r-- | testdata/testoutput22-32 | 7 | ||||
-rw-r--r-- | testdata/testoutput5 | 2 |
31 files changed, 464 insertions, 169 deletions
@@ -137,6 +137,11 @@ Version 8.33 xx-xxxx-201x 36. In the interpreter, maximizing pattern repetitions for characters and character types now use tail recursion, which reduces stack usage. + +37. The value of the max lookbehind was not correctly preserved if a compiled + and saved regex was reloaded on a host of different endianness. + +38. Implemented (*LIMIT_MATCH) and (*LIMIT_RECURSION). Version 8.32 30-November-2012 diff --git a/doc/pcreapi.3 b/doc/pcreapi.3 index 42364ee..94912a5 100644 --- a/doc/pcreapi.3 +++ b/doc/pcreapi.3 @@ -962,7 +962,7 @@ have fallen out of use. To avoid confusion, they have not been re-used. name/number or by a plain number 58 a numbered reference must not be zero 59 an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT) - 60 (*VERB) not recognized + 60 (*VERB) not recognized or malformed 61 number is too big 62 subpattern name expected 63 digit expected after (?+ @@ -206,6 +206,7 @@ with J. */ #define PCRE_ERROR_DFA_BADRESTART (-30) #define PCRE_ERROR_JIT_BADOPTION (-31) #define PCRE_ERROR_BADLENGTH (-32) +#define PCRE_ERROR_UNSET (-33) /* Specific error codes for UTF-8 validity checks */ @@ -270,10 +271,12 @@ with J. */ #define PCRE_INFO_JIT 16 #define PCRE_INFO_JITSIZE 17 #define PCRE_INFO_MAXLOOKBEHIND 18 -#define PCRE_INFO_FIRSTCHARACTER 19 -#define PCRE_INFO_FIRSTCHARACTERFLAGS 20 +#define PCRE_INFO_FIRSTCHARACTER 19 +#define PCRE_INFO_FIRSTCHARACTERFLAGS 20 #define PCRE_INFO_REQUIREDCHAR 21 -#define PCRE_INFO_REQUIREDCHARFLAGS 22 +#define PCRE_INFO_REQUIREDCHARFLAGS 22 +#define PCRE_INFO_MATCHLIMIT 23 +#define PCRE_INFO_RECURSIONLIMIT 24 /* Request types for pcre_config(). Do not re-arrange, in order to remain compatible. */ diff --git a/pcre_byte_order.c b/pcre_byte_order.c index 472eb38..01cbca3 100644 --- a/pcre_byte_order.c +++ b/pcre_byte_order.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge + Copyright (c) 1997-2013 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -126,14 +126,15 @@ if (re->magic_number == MAGIC_NUMBER) } if (re->magic_number != REVERSED_MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; -if ((swap_uint16(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; +if ((swap_uint32(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; re->magic_number = MAGIC_NUMBER; re->size = swap_uint32(re->size); re->options = swap_uint32(re->options); -re->flags = swap_uint16(re->flags); -re->top_bracket = swap_uint16(re->top_bracket); -re->top_backref = swap_uint16(re->top_backref); +re->flags = swap_uint32(re->flags); +re->limit_match = swap_uint32(re->limit_match); +re->limit_recursion = swap_uint32(re->limit_recursion); + #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 re->first_char = swap_uint16(re->first_char); re->req_char = swap_uint16(re->req_char); @@ -141,15 +142,15 @@ re->req_char = swap_uint16(re->req_char); re->first_char = swap_uint32(re->first_char); re->req_char = swap_uint32(re->req_char); #endif + +re->max_lookbehind = swap_uint16(re->max_lookbehind); +re->top_bracket = swap_uint16(re->top_bracket); +re->top_backref = swap_uint16(re->top_backref); re->name_table_offset = swap_uint16(re->name_table_offset); re->name_entry_size = swap_uint16(re->name_entry_size); re->name_count = swap_uint16(re->name_count); re->ref_count = swap_uint16(re->ref_count); re->tables = tables; -#ifdef COMPILE_PCRE32 -re->dummy1 = swap_uint16(re->dummy1); -re->dummy2 = swap_uint16(re->dummy2); -#endif if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0) { diff --git a/pcre_compile.c b/pcre_compile.c index c6fb875..8d5a592 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -487,7 +487,7 @@ static const char error_texts[] = "a numbered reference must not be zero\0" "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" /* 60 */ - "(*VERB) not recognized\0" + "(*VERB) not recognized or malformed\0" "number is too big\0" "subpattern name expected\0" "digit expected after (?+\0" @@ -798,7 +798,7 @@ Otherwise further processing may be required. */ #ifndef EBCDIC /* ASCII/UTF-8 coding */ /* Not alphanumeric */ else if (c < CHAR_0 || c > CHAR_z) {} -else if ((i = escapes[c - CHAR_0]) != 0) +else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; } #else /* EBCDIC coding */ @@ -1410,11 +1410,11 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS) { /* Handle specials such as (*SKIP) or (*UTF8) etc. */ - if (ptr[1] == CHAR_ASTERISK) + if (ptr[1] == CHAR_ASTERISK) { ptr += 2; while (ptr < cd->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; - } + } /* Handle a normal, unnamed capturing parenthesis. */ @@ -3091,7 +3091,7 @@ value is a character, a negative value is an escape value. */ if (*ptr == CHAR_BACKSLASH) { int temperrorcode = 0; - escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, + escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, FALSE); if (temperrorcode != 0) return FALSE; ptr++; /* Point after the escape sequence */ @@ -4275,7 +4275,7 @@ for (;; ptr++) if (c == CHAR_BACKSLASH) { - escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, + escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, TRUE); if (*errorcodeptr != 0) goto FAILED; if (escape == 0) c = ec; @@ -5725,7 +5725,7 @@ for (;; ptr++) /* ------------------------------------------------------------ */ case CHAR_LEFT_PARENTHESIS: bravalue = OP_COND; /* Conditional group */ - tempptr = ptr; + tempptr = ptr; /* A condition can be an assertion, a number (referring to a numbered group), a name (referring to a named group), or 'R', referring to @@ -5739,26 +5739,26 @@ for (;; ptr++) by digits), and (b) a number could be a name that consists of digits. In both cases, we look for a name first; if not found, we try the other cases. - - For compatibility with auto-callouts, we allow a callout to be - specified before a condition that is an assertion. First, check for the - syntax of a callout; if found, adjust the temporary pointer that is + + For compatibility with auto-callouts, we allow a callout to be + specified before a condition that is an assertion. First, check for the + syntax of a callout; if found, adjust the temporary pointer that is used to check for an assertion condition. That's all that is needed! */ - + if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C) { for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break; if (ptr[i] == CHAR_RIGHT_PARENTHESIS) - tempptr += i + 1; - } + tempptr += i + 1; + } /* For conditions that are assertions, check the syntax, and then exit the switch. This will take control down to where bracketed groups, including assertions, are processed. */ - if (tempptr[1] == CHAR_QUESTION_MARK && + if (tempptr[1] == CHAR_QUESTION_MARK && (tempptr[2] == CHAR_EQUALS_SIGN || - tempptr[2] == CHAR_EXCLAMATION_MARK || + tempptr[2] == CHAR_EXCLAMATION_MARK || tempptr[2] == CHAR_LESS_THAN_SIGN)) break; @@ -6901,7 +6901,7 @@ for (;; ptr++) else { - if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && + if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && cd->max_lookbehind == 0) cd->max_lookbehind = 1; #ifdef SUPPORT_UCP @@ -7766,8 +7766,10 @@ pcre32_compile2(PCRE_SPTR32 pattern, int options, int *errorcodeptr, { REAL_PCRE *re; int length = 1; /* For final END opcode */ -pcre_uint32 firstchar, reqchar; pcre_int32 firstcharflags, reqcharflags; +pcre_uint32 firstchar, reqchar; +pcre_uint32 limit_match = PCRE_UINT32_MAX; +pcre_uint32 limit_recursion = PCRE_UINT32_MAX; int newline; int errorcode = 0; int skipatstart = 0; @@ -7831,19 +7833,16 @@ if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0) errorcode = ERR17; goto PCRE_EARLY_ERROR_RETURN; } - -/* If PCRE_NEVER_UTF is set, remember it. As this option steals a bit that is -also used for execution options, flatten it just in case. */ -if ((options & PCRE_NEVER_UTF) != 0) - { - never_utf = TRUE; - options &= ~PCRE_NEVER_UTF; - } +/* If PCRE_NEVER_UTF is set, remember it. */ + +if ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE; /* Check for global one-time settings at the start of the pattern, and remember the offset for later. */ +cd->external_flags = 0; /* Initialize here for LIMIT_MATCH/RECURSION */ + while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && ptr[skipatstart+1] == CHAR_ASTERISK) { @@ -7874,6 +7873,44 @@ PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */ else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0) { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; } + else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_MATCH_EQ, 12) == 0) + { + pcre_uint32 c = 0; + int p = skipatstart + 14; + while (isdigit(ptr[p])) + { + if (c > PCRE_UINT32_MAX / 10 - 1) break; /* Integer overflow */ + c = c*10 + ptr[p++] - CHAR_0; + } + if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break; + if (c < limit_match) + { + limit_match = c; + cd->external_flags |= PCRE_MLSET; + } + skipatstart = p; + continue; + } + + else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_RECURSION_EQ, 16) == 0) + { + pcre_uint32 c = 0; + int p = skipatstart + 18; + while (isdigit(ptr[p])) + { + if (c > PCRE_UINT32_MAX / 10 - 1) break; /* Integer overflow check */ + c = c*10 + ptr[p++] - CHAR_0; + } + if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break; + if (c < limit_recursion) + { + limit_recursion = c; + cd->external_flags |= PCRE_RLSET; + } + skipatstart = p; + continue; + } + if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0) { skipatstart += 5; newnl = PCRE_NEWLINE_CR; } else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3) == 0) @@ -7896,14 +7933,14 @@ PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */ options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr; else break; } - + /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */ utf = (options & PCRE_UTF8) != 0; if (utf && never_utf) { errorcode = ERR78; goto PCRE_EARLY_ERROR_RETURN2; - } + } /* Can't support UTF unless PCRE has been compiled to include the code. The return of an error code from PRIV(valid_utf)() is a new feature, introduced in @@ -8026,7 +8063,6 @@ cd->req_varyopt = 0; cd->assert_depth = 0; cd->max_lookbehind = 0; cd->external_options = options; -cd->external_flags = 0; cd->open_caps = NULL; /* Now do the pre-compile. On error, errorcode will be set non-zero, so we @@ -8076,6 +8112,8 @@ re->magic_number = MAGIC_NUMBER; re->size = (int)size; re->options = cd->external_options; re->flags = cd->external_flags; +re->limit_match = limit_match; +re->limit_recursion = limit_recursion; re->first_char = 0; re->req_char = 0; re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar); @@ -8085,7 +8123,9 @@ re->ref_count = 0; re->tables = (tables == PRIV(default_tables))? NULL : tables; re->nullpad = NULL; #ifdef COMPILE_PCRE32 -re->dummy1 = re->dummy2 = 0; +re->dummy = 0; +#else +re->dummy1 = re->dummy2 = re->dummy3 = 0; #endif /* The starting points of the name/number translation table and of the code are diff --git a/pcre_exec.c b/pcre_exec.c index 221ecf3..cc15ca3 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -6511,6 +6511,30 @@ if (extra_data != NULL && extra_data->executable_jit != NULL && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0) { + /* A facility for setting the match limit in the regex was added; this puts + a value in the compiled block. (Similarly for recursion limit, but the JIT + does not make use of that.) Because the regex is not passed to jit_exec, we + fudge up an alternative extra block, because we must not modify the extra + block that the user has passed. */ + +#if defined COMPILE_PCRE8 + pcre_extra extra_data_copy; +#elif defined COMPILE_PCRE16 + pcre16_extra extra_data_copy; +#elif defined COMPILE_PCRE32 + pcre32_extra extra_data_copy; +#endif + + if ((re->flags & PCRE_MLSET) != 0 && + ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0 || + re->limit_match < extra_data->match_limit)) + { + extra_data_copy = *extra_data; + extra_data_copy.match_limit = re->limit_match; + extra_data_copy.flags |= PCRE_EXTRA_MATCH_LIMIT; + extra_data = &extra_data_copy; + } + rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length, start_offset, options, offsets, offsetcount); @@ -6540,6 +6564,8 @@ md->callout_data = NULL; tables = re->tables; +/* The two limit values override the defaults, whatever their value. */ + if (extra_data != NULL) { register unsigned int flags = extra_data->flags; @@ -6554,6 +6580,15 @@ if (extra_data != NULL) if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables; } +/* Limits in the regex override only if they are smaller. */ + +if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit) + md->match_limit = re->limit_match; + +if ((re->flags & PCRE_RLSET) != 0 && + re->limit_recursion < md->match_limit_recursion) + md->match_limit_recursion = re->limit_recursion; + /* If the exec call supplied NULL for tables, use the inbuilt ones. This is a feature that makes it possible to save compiled regex and re-use them in other programs later. */ diff --git a/pcre_fullinfo.c b/pcre_fullinfo.c index 02c9df4..7fad1ee 100644 --- a/pcre_fullinfo.c +++ b/pcre_fullinfo.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge + Copyright (c) 1997-2013 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -221,6 +221,16 @@ switch (what) case PCRE_INFO_MAXLOOKBEHIND: *((int *)where) = re->max_lookbehind; break; + + case PCRE_INFO_MATCHLIMIT: + if ((re->flags & PCRE_MLSET) == 0) return PCRE_ERROR_UNSET; + *((unsigned long int *)where) = re->limit_match; + break; + + case PCRE_INFO_RECURSIONLIMIT: + if ((re->flags & PCRE_RLSET) == 0) return PCRE_ERROR_UNSET; + *((unsigned long int *)where) = re->limit_recursion; + break; default: return PCRE_ERROR_BADOPTION; } diff --git a/pcre_internal.h b/pcre_internal.h index 6306eb1..8fce7b0 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -194,23 +194,31 @@ preprocessor time in standard C environments. */ typedef unsigned char pcre_uint8; #if USHRT_MAX == 65535 - typedef unsigned short pcre_uint16; - typedef short pcre_int16; +typedef unsigned short pcre_uint16; +typedef short pcre_int16; +#define PCRE_UINT16_MAX USHRT_MAX +#define PCRE_INT16_MAX SHRT_MAX #elif UINT_MAX == 65535 - typedef unsigned int pcre_uint16; - typedef int pcre_int16; +typedef unsigned int pcre_uint16; +typedef int pcre_int16; +#define PCRE_UINT16_MAX UINT_MAX +#define PCRE_INT16_MAX INT_MAX #else -# error Cannot determine a type for 16-bit unsigned integers +#error Cannot determine a type for 16-bit integers #endif -#if UINT_MAX == 4294967295 - typedef unsigned int pcre_uint32; - typedef int pcre_int32; -#elif ULONG_MAX == 4294967295 - typedef unsigned long int pcre_uint32; - typedef long int pcre_int32; +#if UINT_MAX == 4294967295U +typedef unsigned int pcre_uint32; +typedef int pcre_int32; +#define PCRE_UINT32_MAX UINT_MAX +#define PCRE_INT32_MAX INT_MAX +#elif ULONG_MAX == 4294967295UL +typedef unsigned long int pcre_uint32; +typedef long int pcre_int32; +#define PCRE_UINT32_MAX ULONG_MAX +#define PCRE_INT32_MAX LONG_MAX #else -# error Cannot determine a type for 32-bit unsigned integers +#error Cannot determine a type for 32-bit integers #endif /* When checking for integer overflow in pcre_compile(), we need to handle @@ -1121,23 +1129,26 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */ /* Private flags containing information about the compiled regex. They used to -live at the top end of the options word, but that got almost full, so now they -are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as -the restrictions on partial matching have been lifted. It remains for backwards +live at the top end of the options word, but that got almost full, so they were +moved to a 16-bit flags word - which got almost full, so now they are in a +32-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as the +restrictions on partial matching have been lifted. It remains for backwards compatibility. */ -#define PCRE_MODE8 0x0001 /* compiled in 8 bit mode */ -#define PCRE_MODE16 0x0002 /* compiled in 16 bit mode */ -#define PCRE_MODE32 0x0004 /* compiled in 32 bit mode */ -#define PCRE_FIRSTSET 0x0010 /* first_char is set */ -#define PCRE_FCH_CASELESS 0x0020 /* caseless first char */ -#define PCRE_REQCHSET 0x0040 /* req_byte is set */ -#define PCRE_RCH_CASELESS 0x0080 /* caseless requested char */ -#define PCRE_STARTLINE 0x0100 /* start after \n for multiline */ -#define PCRE_NOPARTIAL 0x0200 /* can't use partial with this regex */ -#define PCRE_JCHANGED 0x0400 /* j option used in regex */ -#define PCRE_HASCRORLF 0x0800 /* explicit \r or \n in pattern */ -#define PCRE_HASTHEN 0x1000 /* pattern contains (*THEN) */ +#define PCRE_MODE8 0x00000001 /* compiled in 8 bit mode */ +#define PCRE_MODE16 0x00000002 /* compiled in 16 bit mode */ +#define PCRE_MODE32 0x00000004 /* compiled in 32 bit mode */ +#define PCRE_FIRSTSET 0x00000010 /* first_char is set */ +#define PCRE_FCH_CASELESS 0x00000020 /* caseless first char */ +#define PCRE_REQCHSET 0x00000040 /* req_byte is set */ +#define PCRE_RCH_CASELESS 0x00000080 /* caseless requested char */ +#define PCRE_STARTLINE 0x00000100 /* start after \n for multiline */ +#define PCRE_NOPARTIAL 0x00000200 /* can't use partial with this regex */ +#define PCRE_JCHANGED 0x00000400 /* j option used in regex */ +#define PCRE_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */ +#define PCRE_HASTHEN 0x00001000 /* pattern contains (*THEN) */ +#define PCRE_MLSET 0x00002000 /* match limit set by regex */ +#define PCRE_RLSET 0x00004000 /* recursion limit set by regex */ #if defined COMPILE_PCRE8 #define PCRE_MODE PCRE_MODE8 @@ -1534,6 +1545,8 @@ a positive value. */ #define STRING_UTF_RIGHTPAR "UTF)" #define STRING_UCP_RIGHTPAR "UCP)" #define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" +#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" +#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" #else /* SUPPORT_UTF */ @@ -1795,6 +1808,8 @@ only. */ #define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS #define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS #define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS +#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN +#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN #endif /* SUPPORT_UTF */ @@ -2281,48 +2296,49 @@ enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE, code vector run on as long as necessary after the end. We store an explicit offset to the name table so that if a regex is compiled on one host, saved, and then run on another where the size of pointers is different, all might still -be well. For the case of compiled-on-4 and run-on-8, we include an extra -pointer that is always NULL. For future-proofing, a few dummy fields were -originally included - even though you can never get this planning right - but -there is only one left now. - -NOTE NOTE NOTE: -Because people can now save and re-use compiled patterns, any additions to this -structure should be made at the end, and something earlier (e.g. a new -flag in the options or one of the dummy fields) should indicate that the new -fields are present. Currently PCRE always sets the dummy fields to zero. -NOTE NOTE NOTE +be well. + +The size of the structure must be a multiple of 8 bytes. For the case of +compiled-on-4 and run-on-8, we include an extra pointer that is always NULL so +that there are an even number of pointers which therefore are a multiple of 8 +bytes. + +It is necessary to fork the struct for the 32 bit library, since it needs to +use pcre_uint32 for first_char and req_char. We can't put an ifdef inside the +typedef because pcretest needs access to the struct of the 8-, 16- and 32-bit +variants. + +*** WARNING *** +When new fields are added to these structures, remember to adjust the code in +pcre_byte_order.c that is concerned with swapping the byte order of the fields +when a compiled regex is reloaded on a host with different endianness. +*** WARNING *** +There is also similar byte-flipping code in pcretest.c, which is used for +testing the byte-flipping features. It must also be kept in step. +*** WARNING *** */ -#if defined COMPILE_PCRE8 -#define REAL_PCRE real_pcre -#elif defined COMPILE_PCRE16 -#define REAL_PCRE real_pcre16 -#elif defined COMPILE_PCRE32 -#define REAL_PCRE real_pcre32 -#endif - -/* It is necessary to fork the struct for 32 bit, since it needs to use - * pcre_uchar for first_char and req_char. Can't put an ifdef inside the - * typedef since pcretest needs access to the struct of the 8-, 16- - * and 32-bit variants. */ - typedef struct real_pcre8_or_16 { pcre_uint32 magic_number; pcre_uint32 size; /* Total that was malloced */ pcre_uint32 options; /* Public options */ - pcre_uint16 flags; /* Private flags */ + pcre_uint32 flags; /* Private flags */ + pcre_uint32 limit_match; /* Limit set from regex */ + pcre_uint32 limit_recursion; /* Limit set from regex */ + pcre_uint16 first_char; /* Starting character */ + pcre_uint16 req_char; /* This character must be seen */ pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */ pcre_uint16 top_bracket; /* Highest numbered group */ pcre_uint16 top_backref; /* Highest numbered back reference */ - pcre_uint16 first_char; /* Starting character */ - pcre_uint16 req_char; /* This character must be seen */ pcre_uint16 name_table_offset; /* Offset to name table that follows */ pcre_uint16 name_entry_size; /* Size of any name items */ pcre_uint16 name_count; /* Number of name items */ pcre_uint16 ref_count; /* Reference count */ + pcre_uint16 dummy1; /* To ensure size is a multiple of 8 */ + pcre_uint16 dummy2; /* To ensure size is a multiple of 8 */ + pcre_uint16 dummy3; /* To ensure size is a multiple of 8 */ const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ - const pcre_uint8 *nullpad; /* NULL padding */ + void *nullpad; /* NULL padding */ } real_pcre8_or_16; typedef struct real_pcre8_or_16 real_pcre; @@ -2332,22 +2348,31 @@ typedef struct real_pcre32 { pcre_uint32 magic_number; pcre_uint32 size; /* Total that was malloced */ pcre_uint32 options; /* Public options */ - pcre_uint16 flags; /* Private flags */ + pcre_uint32 flags; /* Private flags */ + pcre_uint32 limit_match; /* Limit set from regex */ + pcre_uint32 limit_recursion; /* Limit set from regex */ + pcre_uint32 first_char; /* Starting character */ + pcre_uint32 req_char; /* This character must be seen */ pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */ pcre_uint16 top_bracket; /* Highest numbered group */ pcre_uint16 top_backref; /* Highest numbered back reference */ - pcre_uint32 first_char; /* Starting character */ - pcre_uint32 req_char; /* This character must be seen */ pcre_uint16 name_table_offset; /* Offset to name table that follows */ pcre_uint16 name_entry_size; /* Size of any name items */ pcre_uint16 name_count; /* Number of name items */ pcre_uint16 ref_count; /* Reference count */ - pcre_uint16 dummy1; /* for later expansion */ - pcre_uint16 dummy2; /* for later expansion */ + pcre_uint16 dummy; /* To ensure size is a multiple of 8 */ const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ - void *nullpad; /* for later expansion */ + void *nullpad; /* NULL padding */ } real_pcre32; +#if defined COMPILE_PCRE8 +#define REAL_PCRE real_pcre +#elif defined COMPILE_PCRE16 +#define REAL_PCRE real_pcre16 +#elif defined COMPILE_PCRE32 +#define REAL_PCRE real_pcre32 +#endif + /* Assert that the size of REAL_PCRE is divisible by 8 */ typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1]; @@ -2399,14 +2424,14 @@ typedef struct compile_data { int names_found; /* Number of entries so far */ int name_entry_size; /* Size of each entry */ int workspace_size; /* Size of workspace */ - unsigned int bracount; /* Count of capturing parens as we compile */ + unsigned int bracount; /* Count of capturing parens as we compile */ int final_bracount; /* Saved value after first pass */ int max_lookbehind; /* Maximum lookbehind (characters) */ int top_backref; /* Maximum back reference */ unsigned int backref_map; /* Bitmap of low back refs */ int assert_depth; /* Depth of nested assertions */ - int external_options; /* External (initial) options */ - int external_flags; /* External flag bits to be set */ + pcre_uint32 external_options; /* External (initial) options */ + pcre_uint32 external_flags; /* External flag bits to be set */ int req_varyopt; /* "After variable item" flag for reqbyte */ BOOL had_accept; /* (*ACCEPT) encountered */ BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ @@ -2395,7 +2395,7 @@ else rc = PCRE_ERROR_BADMODE; #endif -if (rc < 0) +if (rc < 0 && rc != PCRE_ERROR_UNSET) { fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc, pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option); @@ -2471,14 +2471,18 @@ BOOL utf16_char = FALSE; re->magic_number = REVERSED_MAGIC_NUMBER; re->size = swap_uint32(re->size); re->options = swap_uint32(re->options); -re->flags = swap_uint16(re->flags); -re->top_bracket = swap_uint16(re->top_bracket); -re->top_backref = swap_uint16(re->top_backref); +re->flags = swap_uint32(re->flags); +re->limit_match = swap_uint32(re->limit_match); +re->limit_recursion = swap_uint32(re->limit_recursion); re->first_char = swap_uint16(re->first_char); re->req_char = swap_uint16(re->req_char); +re->max_lookbehind = swap_uint16(re->max_lookbehind); +re->top_bracket = swap_uint16(re->top_bracket); +re->top_backref = swap_uint16(re->top_backref); re->name_table_offset = swap_uint16(re->name_table_offset); re->name_entry_size = swap_uint16(re->name_entry_size); re->name_count = swap_uint16(re->name_count); +re->ref_count = swap_uint16(re->ref_count); if (extra != NULL) { @@ -2648,14 +2652,18 @@ int length = re->name_count * re->name_entry_size; re->magic_number = REVERSED_MAGIC_NUMBER; re->size = swap_uint32(re->size); re->options = swap_uint32(re->options); -re->flags = swap_uint16(re->flags); -re->top_bracket = swap_uint16(re->top_bracket); -re->top_backref = swap_uint16(re->top_backref); +re->flags = swap_uint32(re->flags); +re->limit_match = swap_uint32(re->limit_match); +re->limit_recursion = swap_uint32(re->limit_recursion); re->first_char = swap_uint32(re->first_char); re->req_char = swap_uint32(re->req_char); +re->max_lookbehind = swap_uint16(re->max_lookbehind); +re->top_bracket = swap_uint16(re->top_bracket); +re->top_backref = swap_uint16(re->top_backref); re->name_table_offset = swap_uint16(re->name_table_offset); re->name_entry_size = swap_uint16(re->name_entry_size); re->name_count = swap_uint16(re->name_count); +re->ref_count = swap_uint16(re->ref_count); if (extra != NULL) { @@ -3525,11 +3533,11 @@ while (!done) PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL); if (rc == PCRE_ERROR_BADMODE) { - pcre_uint16 flags_in_host_byte_order; + pcre_uint32 flags_in_host_byte_order; if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER) flags_in_host_byte_order = REAL_PCRE_FLAGS(re); else - flags_in_host_byte_order = swap_uint16(REAL_PCRE_FLAGS(re)); + flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re)); /* Simulate the result of the function call below. */ fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc, pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", @@ -4010,6 +4018,7 @@ while (!done) { unsigned long int all_options; pcre_uint32 first_char, need_char; + pcre_uint32 match_limit, recursion_limit; int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged, hascrorlf, maxlookbehind; int nameentrysize, namecount; @@ -4037,9 +4046,19 @@ while (!done) (int)size, (int)regex_gotten_store); fprintf(outfile, "Capturing subpattern count = %d\n", count); + if (backrefmax > 0) fprintf(outfile, "Max back reference = %d\n", backrefmax); + if (maxlookbehind > 0) + fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind); + + if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0) + fprintf(outfile, "Match limit = %u\n", match_limit); + + if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0) + fprintf(outfile, "Recursion limit = %u\n", recursion_limit); + if (namecount > 0) { fprintf(outfile, "Named capturing subpatterns:\n"); @@ -4073,7 +4092,7 @@ while (!done) if (do_flip) all_options = swap_uint32(all_options); if (get_options == 0) fprintf(outfile, "No options\n"); - else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "", ((get_options & PCRE_CASELESS) != 0)? " caseless" : "", ((get_options & PCRE_EXTENDED) != 0)? " extended" : "", @@ -4090,7 +4109,8 @@ while (!done) ((get_options & PCRE_UCP) != 0)? " ucp" : "", ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "", ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "", - ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : ""); + ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "", + ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : ""); if (jchanged) fprintf(outfile, "Duplicate name status changes\n"); @@ -4164,9 +4184,6 @@ while (!done) } } - if (maxlookbehind > 0) - fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind); - /* Don't output study size; at present it is in any case a fixed value, but it varies, depending on the computer architecture, and so messes up the test suite. (And with the /F option, it might be diff --git a/testdata/saved16 b/testdata/saved16 Binary files differindex ff5b11d..583c434 100644 --- a/testdata/saved16 +++ b/testdata/saved16 diff --git a/testdata/saved16BE-1 b/testdata/saved16BE-1 Binary files differindex 297f2f2..e2e807d 100644 --- a/testdata/saved16BE-1 +++ b/testdata/saved16BE-1 diff --git a/testdata/saved16BE-2 b/testdata/saved16BE-2 Binary files differindex dade400..cc2718a 100644 --- a/testdata/saved16BE-2 +++ b/testdata/saved16BE-2 diff --git a/testdata/saved16LE-1 b/testdata/saved16LE-1 Binary files differindex deb4491..b037d49 100644 --- a/testdata/saved16LE-1 +++ b/testdata/saved16LE-1 diff --git a/testdata/saved16LE-2 b/testdata/saved16LE-2 Binary files differindex c8be985..d7034f7 100644 --- a/testdata/saved16LE-2 +++ b/testdata/saved16LE-2 diff --git a/testdata/saved32 b/testdata/saved32 Binary files differindex 255235d..5b6fe34 100644 --- a/testdata/saved32 +++ b/testdata/saved32 diff --git a/testdata/saved32BE-1 b/testdata/saved32BE-1 Binary files differindex 42af7b4..ebe62ca 100644 --- a/testdata/saved32BE-1 +++ b/testdata/saved32BE-1 diff --git a/testdata/saved32BE-2 b/testdata/saved32BE-2 Binary files differindex 68a896d..8168343 100644 --- a/testdata/saved32BE-2 +++ b/testdata/saved32BE-2 diff --git a/testdata/saved32LE-1 b/testdata/saved32LE-1 Binary files differindex a4044fd..e008f3a 100644 --- a/testdata/saved32LE-1 +++ b/testdata/saved32LE-1 diff --git a/testdata/saved32LE-2 b/testdata/saved32LE-2 Binary files differindex 8b35ffa..cf3bd73 100644 --- a/testdata/saved32LE-2 +++ b/testdata/saved32LE-2 diff --git a/testdata/saved8 b/testdata/saved8 Binary files differindex 9b63b1d..37d733e 100644 --- a/testdata/saved8 +++ b/testdata/saved8 diff --git a/testdata/testinput2 b/testdata/testinput2 index 972ecf3..6c82fb5 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -3802,4 +3802,39 @@ backtracking verbs. --/ /-------------------------/ +/(*LIMIT_MATCH=12bc)abc/ + +/(*LIMIT_MATCH=4294967290)abc/ + +/(*LIMIT_RECURSION=4294967280)abc/I + +/(a+)*zz/ + aaaaaaaaaaaaaz + aaaaaaaaaaaaaz\q3000 + +/(a+)*zz/S- + aaaaaaaaaaaaaz\Q10 + +/(*LIMIT_MATCH=3000)(a+)*zz/I + aaaaaaaaaaaaaz + aaaaaaaaaaaaaz\q60000 + +/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I + aaaaaaaaaaaaaz + +/(*LIMIT_MATCH=60000)(a+)*zz/I + aaaaaaaaaaaaaz + aaaaaaaaaaaaaz\q3000 + +/(*LIMIT_RECURSION=10)(a+)*zz/IS- + aaaaaaaaaaaaaz + aaaaaaaaaaaaaz\Q1000 + +/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/IS- + aaaaaaaaaaaaaz + +/(*LIMIT_RECURSION=1000)(a+)*zz/IS- + aaaaaaaaaaaaaz + aaaaaaaaaaaaaz\Q10 + /-- End of testinput2 --/ diff --git a/testdata/testinput21 b/testdata/testinput21 index 0f201ad..b285d63 100644 --- a/testdata/testinput21 +++ b/testdata/testinput21 @@ -1,9 +1,15 @@ -/-- Tests for reloading pre-compile patterns. The first one gives an error -right away. The others require the linke size to be 2. */ +/-- Tests for reloading pre-compiled patterns. The first one gives an error +right away, and can be any old pattern compiled in 8-bit mode ("abc" is +typical). The others require the link size to be 2. */x <!testsaved8 -/-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$ --/ +%-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$ + In 16-bit mode with options: S>testdata/saved16LE-1 + FS>testdata/saved16BE-1 + In 32-bit mode with options: S>testdata/saved32LE-1 + FS>testdata/saved32BE-1 +--%x <!testsaved16LE-1 diff --git a/testdata/testinput22 b/testdata/testinput22 index 46a1365..58239f1 100644 --- a/testdata/testinput22 +++ b/testdata/testinput22 @@ -1,6 +1,11 @@ /-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */ -/-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) --/8 +%-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) + In 16-bit mode with options: S8>testdata/saved16LE-1 + FS8>testdata/saved16BE-1 + In 32-bit mode with options: S8>testdata/saved32LE-1 + FS8testdata/saved32BE-1 +--%8x <!testsaved16LE-2 diff --git a/testdata/testoutput18-16 b/testdata/testoutput18-16 index e91d841..3414a66 100644 --- a/testdata/testoutput18-16 +++ b/testdata/testoutput18-16 @@ -646,7 +646,7 @@ Need char = \x{de34} 0: \x{11234} /(*UTF-32)\x{11234}/ -Failed: (*VERB) not recognized at offset 5 +Failed: (*VERB) not recognized or malformed at offset 5 /(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I Capturing subpattern count = 0 @@ -656,7 +656,7 @@ First char = 'a' Need char = 'b' /(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I -Failed: (*VERB) not recognized at offset 12 +Failed: (*VERB) not recognized or malformed at offset 12 /\h/SI8 Capturing subpattern count = 0 diff --git a/testdata/testoutput18-32 b/testdata/testoutput18-32 index 1dba7bc..e917e3b 100644 --- a/testdata/testoutput18-32 +++ b/testdata/testoutput18-32 @@ -632,7 +632,7 @@ Error -10 (bad UTF-32 string) offset=0 reason=3 Error -10 (bad UTF-32 string) offset=0 reason=1 /(*UTF16)\x{11234}/ -Failed: (*VERB) not recognized at offset 5 +Failed: (*VERB) not recognized or malformed at offset 5 /(*UTF)\x{11234}/I Capturing subpattern count = 0 @@ -643,10 +643,10 @@ No need char 0: \x{11234} /(*UTF-32)\x{11234}/ -Failed: (*VERB) not recognized at offset 5 +Failed: (*VERB) not recognized or malformed at offset 5 /(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I -Failed: (*VERB) not recognized at offset 12 +Failed: (*VERB) not recognized or malformed at offset 12 /(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I Capturing subpattern count = 0 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index de64502..fd958c2 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -448,10 +448,10 @@ Need char = '=' /(?<!bar|cattle)foo/I Capturing subpattern count = 0 +Max lookbehind = 6 No options First char = 'f' Need char = 'o' -Max lookbehind = 6 foo 0: foo catfoo @@ -631,10 +631,10 @@ No match /\Aabc/Im Capturing subpattern count = 0 +Max lookbehind = 1 Options: anchored multiline No first char No need char -Max lookbehind = 1 /^abc/Im Capturing subpattern count = 0 @@ -657,19 +657,19 @@ No need char /(?<=foo)[ab]/IS Capturing subpattern count = 0 +Max lookbehind = 3 No options No first char No need char -Max lookbehind = 3 Subject length lower bound = 1 Starting byte set: a b /(?<!foo)(alpha|omega)/IS Capturing subpattern count = 1 +Max lookbehind = 3 No options No first char Need char = 'a' -Max lookbehind = 3 Subject length lower bound = 5 Starting byte set: a o @@ -683,11 +683,11 @@ Starting byte set: a b /(?<=foo\n)^bar/Im Capturing subpattern count = 0 +Max lookbehind = 4 Contains explicit CR or LF match Options: multiline No first char Need char = 'r' -Max lookbehind = 4 foo\nbarbar 0: bar ***Failers @@ -701,11 +701,11 @@ No match /^(?<=foo\n)bar/Im Capturing subpattern count = 0 +Max lookbehind = 4 Contains explicit CR or LF match Options: multiline First char at start or follows newline Need char = 'r' -Max lookbehind = 4 foo\nbarbar 0: bar ***Failers @@ -744,10 +744,10 @@ Failed: lookbehind assertion is not fixed length at offset 13 /(?<=bullock|donkey)-cart/I Capturing subpattern count = 0 +Max lookbehind = 7 No options First char = '-' Need char = 't' -Max lookbehind = 7 the bullock-cart 0: -cart a donkey-cart race @@ -761,17 +761,17 @@ No match /(?<=ab(?i)x|y|z)/I Capturing subpattern count = 0 +Max lookbehind = 3 No options No first char No need char -Max lookbehind = 3 /(?>.*)(?<=(abcd)|(xyz))/I Capturing subpattern count = 2 +Max lookbehind = 4 No options No first char No need char -Max lookbehind = 4 alphabetabcd 0: alphabetabcd 1: abcd @@ -782,10 +782,10 @@ Max lookbehind = 4 /(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ/I Capturing subpattern count = 0 +Max lookbehind = 4 No options First char = 'Z' Need char = 'Z' -Max lookbehind = 4 abxyZZ 0: ZZ abXyZZ @@ -811,10 +811,10 @@ No match /(?<!(foo)a)bar/I Capturing subpattern count = 1 +Max lookbehind = 4 No options First char = 'b' Need char = 'r' -Max lookbehind = 4 bar 0: bar foobbar @@ -1197,10 +1197,10 @@ No need char /\Biss\B/I+ Capturing subpattern count = 0 +Max lookbehind = 1 No options First char = 'i' Need char = 's' -Max lookbehind = 1 Mississippi 0: iss 0+ issippi @@ -1218,20 +1218,20 @@ Need char = 's' /\Biss\B/IG+ Capturing subpattern count = 0 +Max lookbehind = 1 No options First char = 'i' Need char = 's' -Max lookbehind = 1 Mississippi 0: iss 0+ issippi /\Biss\B/Ig+ Capturing subpattern count = 0 +Max lookbehind = 1 No options First char = 'i' Need char = 's' -Max lookbehind = 1 Mississippi 0: iss 0+ issippi @@ -1244,10 +1244,10 @@ No match /(?<=[Ms])iss/Ig+ Capturing subpattern count = 0 +Max lookbehind = 1 No options First char = 'i' Need char = 's' -Max lookbehind = 1 Mississippi 0: iss 0+ issippi @@ -1256,10 +1256,10 @@ Max lookbehind = 1 /(?<=[Ms])iss/IG+ Capturing subpattern count = 0 +Max lookbehind = 1 No options First char = 'i' Need char = 's' -Max lookbehind = 1 Mississippi 0: iss 0+ issippi @@ -1437,10 +1437,10 @@ Need char = 'r' /...(?<=abc)/I Capturing subpattern count = 0 +Max lookbehind = 3 No options No first char No need char -Max lookbehind = 3 /abc(?!pqr)/I Capturing subpattern count = 0 @@ -3218,10 +3218,10 @@ No match End ------------------------------------------------------------------ Capturing subpattern count = 0 +Max lookbehind = 1 No options First char = '8' Need char = 'X' -Max lookbehind = 1 |\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|IDZ ------------------------------------------------------------------ @@ -3232,10 +3232,10 @@ Max lookbehind = 1 End ------------------------------------------------------------------ Capturing subpattern count = 0 +Max lookbehind = 1 No options First char = '$' Need char = 'X' -Max lookbehind = 1 /(.*)\d+\1/I Capturing subpattern count = 1 @@ -3748,10 +3748,10 @@ Callout 2: last capture = -1 /(?<=(abc)(?C))xyz/I Capturing subpattern count = 1 +Max lookbehind = 3 No options First char = 'x' Need char = 'z' -Max lookbehind = 3 abcxyz\C+ Callout 0: last capture = 1 0: <unset> @@ -5396,19 +5396,19 @@ Need char = '3' /\b.*/I Capturing subpattern count = 0 +Max lookbehind = 1 No options No first char No need char -Max lookbehind = 1 ab cd\>1 0: cd /\b.*/Is Capturing subpattern count = 0 +Max lookbehind = 1 Options: dotall No first char No need char -Max lookbehind = 1 ab cd\>1 0: cd @@ -8788,7 +8788,7 @@ No match 1: \x0a /a(*CR)b/ -Failed: (*VERB) not recognized at offset 5 +Failed: (*VERB) not recognized or malformed at offset 5 /(*CR)a.b/ a\nb @@ -11627,19 +11627,19 @@ No match /\btype\b\W*?\btext\b\W*?\bjavascript\b/IS Capturing subpattern count = 0 +Max lookbehind = 1 No options First char = 't' Need char = 't' -Max lookbehind = 1 Subject length lower bound = 18 No set of starting bytes /\btype\b\W*?\btext\b\W*?\bjavascript\b|\burl\b\W*?\bshell:|<input\b.*?\btype\b\W*?\bimage\b|\bonkeyup\b\W*?\=/IS Capturing subpattern count = 0 +Max lookbehind = 1 No options No first char No need char -Max lookbehind = 1 Subject length lower bound = 8 Starting byte set: < o t u @@ -12236,17 +12236,17 @@ settings of the anchored and startline bits. --/ /(?>.*?a)(?<=ba)/I Capturing subpattern count = 0 +Max lookbehind = 2 No options No first char Need char = 'a' -Max lookbehind = 2 /(?:.*?a)(?<=ba)/I Capturing subpattern count = 0 +Max lookbehind = 2 No options First char at start or follows newline Need char = 'a' -Max lookbehind = 2 /.*?a(*PRUNE)b/I Capturing subpattern count = 0 @@ -12292,17 +12292,17 @@ No need char /(?>.*?)(?<=(abcd)|(wxyz))/I Capturing subpattern count = 2 +Max lookbehind = 4 No options No first char No need char -Max lookbehind = 4 /(?>.*)(?<=(abcd)|(wxyz))/I Capturing subpattern count = 2 +Max lookbehind = 4 No options No first char No need char -Max lookbehind = 4 "(?>.*)foo"I Capturing subpattern count = 0 @@ -12574,4 +12574,95 @@ No match /-------------------------/ +/(*LIMIT_MATCH=12bc)abc/ +Failed: (*VERB) not recognized or malformed at offset 7 + +/(*LIMIT_MATCH=4294967290)abc/ +Failed: (*VERB) not recognized or malformed at offset 7 + +/(*LIMIT_RECURSION=4294967280)abc/I +Capturing subpattern count = 0 +Recursion limit = 4294967280 +No options +First char = 'a' +Need char = 'c' + +/(a+)*zz/ + aaaaaaaaaaaaaz +No match + aaaaaaaaaaaaaz\q3000 +Error -8 (match limit exceeded) + +/(a+)*zz/S- + aaaaaaaaaaaaaz\Q10 +Error -21 (recursion limit exceeded) + +/(*LIMIT_MATCH=3000)(a+)*zz/I +Capturing subpattern count = 1 +Match limit = 3000 +No options +No first char +Need char = 'z' + aaaaaaaaaaaaaz +Error -8 (match limit exceeded) + aaaaaaaaaaaaaz\q60000 +Error -8 (match limit exceeded) + +/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I +Capturing subpattern count = 1 +Match limit = 3000 +No options +No first char +Need char = 'z' + aaaaaaaaaaaaaz +Error -8 (match limit exceeded) + +/(*LIMIT_MATCH=60000)(a+)*zz/I +Capturing subpattern count = 1 +Match limit = 60000 +No options +No first char +Need char = 'z' + aaaaaaaaaaaaaz +No match + aaaaaaaaaaaaaz\q3000 +Error -8 (match limit exceeded) + +/(*LIMIT_RECURSION=10)(a+)*zz/IS- +Capturing subpattern count = 1 +Recursion limit = 10 +No options +No first char +Need char = 'z' +Subject length lower bound = 2 +Starting byte set: a z + aaaaaaaaaaaaaz +Error -21 (recursion limit exceeded) + aaaaaaaaaaaaaz\Q1000 +Error -21 (recursion limit exceeded) + +/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/IS- +Capturing subpattern count = 1 +Recursion limit = 10 +No options +No first char +Need char = 'z' +Subject length lower bound = 2 +Starting byte set: a z + aaaaaaaaaaaaaz +Error -21 (recursion limit exceeded) + +/(*LIMIT_RECURSION=1000)(a+)*zz/IS- +Capturing subpattern count = 1 +Recursion limit = 1000 +No options +No first char +Need char = 'z' +Subject length lower bound = 2 +Starting byte set: a z + aaaaaaaaaaaaaz +No match + aaaaaaaaaaaaaz\Q10 +Error -21 (recursion limit exceeded) + /-- End of testinput2 --/ diff --git a/testdata/testoutput21-16 b/testdata/testoutput21-16 index 0510798..e831888 100644 --- a/testdata/testoutput21-16 +++ b/testdata/testoutput21-16 @@ -1,5 +1,6 @@ -/-- Tests for reloading pre-compile patterns. The first one gives an error -right away. The others require the linke size to be 2. */ +/-- Tests for reloading pre-compiled patterns. The first one gives an error +right away, and can be any old pattern compiled in 8-bit mode ("abc" is +typical). The others require the link size to be 2. */x <!testsaved8 Compiled pattern loaded from testsaved8 @@ -7,7 +8,12 @@ No study data Error -28 from pcre16_fullinfo(0) Running in 16-bit mode but pattern was compiled in 8-bit mode -/-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$ --/ +%-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$ + In 16-bit mode with options: S>testdata/saved16LE-1 + FS>testdata/saved16BE-1 + In 32-bit mode with options: S>testdata/saved32LE-1 + FS>testdata/saved32BE-1 +--%x <!testsaved16LE-1 Compiled pattern loaded from testsaved16LE-1 diff --git a/testdata/testoutput21-32 b/testdata/testoutput21-32 index 47242f0..c6e8f6c 100644 --- a/testdata/testoutput21-32 +++ b/testdata/testoutput21-32 @@ -1,5 +1,6 @@ -/-- Tests for reloading pre-compile patterns. The first one gives an error -right away. The others require the linke size to be 2. */ +/-- Tests for reloading pre-compiled patterns. The first one gives an error +right away, and can be any old pattern compiled in 8-bit mode ("abc" is +typical). The others require the link size to be 2. */x <!testsaved8 Compiled pattern loaded from testsaved8 @@ -7,7 +8,12 @@ No study data Error -28 from pcre32_fullinfo(0) Running in 32-bit mode but pattern was compiled in 8-bit mode -/-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$ --/ +%-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$ + In 16-bit mode with options: S>testdata/saved16LE-1 + FS>testdata/saved16BE-1 + In 32-bit mode with options: S>testdata/saved32LE-1 + FS>testdata/saved32BE-1 +--%x <!testsaved16LE-1 Compiled pattern loaded from testsaved16LE-1 diff --git a/testdata/testoutput22-16 b/testdata/testoutput22-16 index 20ef6b8..b2c673d 100644 --- a/testdata/testoutput22-16 +++ b/testdata/testoutput22-16 @@ -1,6 +1,11 @@ /-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */ -/-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) --/8 +%-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) + In 16-bit mode with options: S8>testdata/saved16LE-1 + FS8>testdata/saved16BE-1 + In 32-bit mode with options: S8>testdata/saved32LE-1 + FS8testdata/saved32BE-1 +--%8x <!testsaved16LE-2 Compiled pattern loaded from testsaved16LE-2 diff --git a/testdata/testoutput22-32 b/testdata/testoutput22-32 index 7fa69b0..5a1d0da 100644 --- a/testdata/testoutput22-32 +++ b/testdata/testoutput22-32 @@ -1,6 +1,11 @@ /-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */ -/-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) --/8 +%-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) + In 16-bit mode with options: S8>testdata/saved16LE-1 + FS8>testdata/saved16BE-1 + In 32-bit mode with options: S8>testdata/saved32LE-1 + FS8testdata/saved32BE-1 +--%8x <!testsaved16LE-2 Compiled pattern loaded from testsaved16LE-2 diff --git a/testdata/testoutput5 b/testdata/testoutput5 index 0e84054..d583119 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -1818,10 +1818,10 @@ Partial match: \x{0d}\x{0d} /(?<=\x{1234}\x{1234})\bxy/I8 Capturing subpattern count = 0 +Max lookbehind = 2 Options: utf First char = 'x' Need char = 'y' -Max lookbehind = 2 /(?<!^)ETA/8 ETA |