summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-04-24 12:07:09 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-04-24 12:07:09 +0000
commit4d6103a376b6adb2b15ee14fb5b9a245dfbd05f6 (patch)
tree5627a1d00d0e0175b013339315ac96610f22f2e5
parente234df3658f7779cd093c7179e7ebd008bbc8bb8 (diff)
downloadpcre-4d6103a376b6adb2b15ee14fb5b9a245dfbd05f6.tar.gz
Code (but not yet documentation) for *LIMIT_MATCH and *LIMIT_RECURSION.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1313 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog5
-rw-r--r--doc/pcreapi.32
-rw-r--r--pcre.h.in9
-rw-r--r--pcre_byte_order.c19
-rw-r--r--pcre_compile.c100
-rw-r--r--pcre_exec.c35
-rw-r--r--pcre_fullinfo.c12
-rw-r--r--pcre_internal.h153
-rw-r--r--pcretest.c45
-rw-r--r--testdata/saved16bin70 -> 86 bytes
-rw-r--r--testdata/saved16BE-1bin402 -> 410 bytes
-rw-r--r--testdata/saved16BE-2bin336 -> 344 bytes
-rw-r--r--testdata/saved16LE-1bin402 -> 410 bytes
-rw-r--r--testdata/saved16LE-2bin336 -> 344 bytes
-rw-r--r--testdata/saved32bin100 -> 108 bytes
-rw-r--r--testdata/saved32BE-1bin544 -> 552 bytes
-rw-r--r--testdata/saved32BE-2bin448 -> 456 bytes
-rw-r--r--testdata/saved32LE-1bin544 -> 552 bytes
-rw-r--r--testdata/saved32LE-2bin448 -> 456 bytes
-rw-r--r--testdata/saved8bin61 -> 77 bytes
-rw-r--r--testdata/testinput235
-rw-r--r--testdata/testinput2112
-rw-r--r--testdata/testinput227
-rw-r--r--testdata/testoutput18-164
-rw-r--r--testdata/testoutput18-326
-rw-r--r--testdata/testoutput2149
-rw-r--r--testdata/testoutput21-1612
-rw-r--r--testdata/testoutput21-3212
-rw-r--r--testdata/testoutput22-167
-rw-r--r--testdata/testoutput22-327
-rw-r--r--testdata/testoutput52
31 files changed, 464 insertions, 169 deletions
diff --git a/ChangeLog b/ChangeLog
index a65b5d1..c277e03 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -137,6 +137,11 @@ Version 8.33 xx-xxxx-201x
36. In the interpreter, maximizing pattern repetitions for characters and
character types now use tail recursion, which reduces stack usage.
+
+37. The value of the max lookbehind was not correctly preserved if a compiled
+ and saved regex was reloaded on a host of different endianness.
+
+38. Implemented (*LIMIT_MATCH) and (*LIMIT_RECURSION).
Version 8.32 30-November-2012
diff --git a/doc/pcreapi.3 b/doc/pcreapi.3
index 42364ee..94912a5 100644
--- a/doc/pcreapi.3
+++ b/doc/pcreapi.3
@@ -962,7 +962,7 @@ have fallen out of use. To avoid confusion, they have not been re-used.
name/number or by a plain number
58 a numbered reference must not be zero
59 an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)
- 60 (*VERB) not recognized
+ 60 (*VERB) not recognized or malformed
61 number is too big
62 subpattern name expected
63 digit expected after (?+
diff --git a/pcre.h.in b/pcre.h.in
index f86f045..d676633 100644
--- a/pcre.h.in
+++ b/pcre.h.in
@@ -206,6 +206,7 @@ with J. */
#define PCRE_ERROR_DFA_BADRESTART (-30)
#define PCRE_ERROR_JIT_BADOPTION (-31)
#define PCRE_ERROR_BADLENGTH (-32)
+#define PCRE_ERROR_UNSET (-33)
/* Specific error codes for UTF-8 validity checks */
@@ -270,10 +271,12 @@ with J. */
#define PCRE_INFO_JIT 16
#define PCRE_INFO_JITSIZE 17
#define PCRE_INFO_MAXLOOKBEHIND 18
-#define PCRE_INFO_FIRSTCHARACTER 19
-#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
+#define PCRE_INFO_FIRSTCHARACTER 19
+#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
#define PCRE_INFO_REQUIREDCHAR 21
-#define PCRE_INFO_REQUIREDCHARFLAGS 22
+#define PCRE_INFO_REQUIREDCHARFLAGS 22
+#define PCRE_INFO_MATCHLIMIT 23
+#define PCRE_INFO_RECURSIONLIMIT 24
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
diff --git a/pcre_byte_order.c b/pcre_byte_order.c
index 472eb38..01cbca3 100644
--- a/pcre_byte_order.c
+++ b/pcre_byte_order.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -126,14 +126,15 @@ if (re->magic_number == MAGIC_NUMBER)
}
if (re->magic_number != REVERSED_MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
-if ((swap_uint16(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
+if ((swap_uint32(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
re->magic_number = MAGIC_NUMBER;
re->size = swap_uint32(re->size);
re->options = swap_uint32(re->options);
-re->flags = swap_uint16(re->flags);
-re->top_bracket = swap_uint16(re->top_bracket);
-re->top_backref = swap_uint16(re->top_backref);
+re->flags = swap_uint32(re->flags);
+re->limit_match = swap_uint32(re->limit_match);
+re->limit_recursion = swap_uint32(re->limit_recursion);
+
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
re->first_char = swap_uint16(re->first_char);
re->req_char = swap_uint16(re->req_char);
@@ -141,15 +142,15 @@ re->req_char = swap_uint16(re->req_char);
re->first_char = swap_uint32(re->first_char);
re->req_char = swap_uint32(re->req_char);
#endif
+
+re->max_lookbehind = swap_uint16(re->max_lookbehind);
+re->top_bracket = swap_uint16(re->top_bracket);
+re->top_backref = swap_uint16(re->top_backref);
re->name_table_offset = swap_uint16(re->name_table_offset);
re->name_entry_size = swap_uint16(re->name_entry_size);
re->name_count = swap_uint16(re->name_count);
re->ref_count = swap_uint16(re->ref_count);
re->tables = tables;
-#ifdef COMPILE_PCRE32
-re->dummy1 = swap_uint16(re->dummy1);
-re->dummy2 = swap_uint16(re->dummy2);
-#endif
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
{
diff --git a/pcre_compile.c b/pcre_compile.c
index c6fb875..8d5a592 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -487,7 +487,7 @@ static const char error_texts[] =
"a numbered reference must not be zero\0"
"an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
/* 60 */
- "(*VERB) not recognized\0"
+ "(*VERB) not recognized or malformed\0"
"number is too big\0"
"subpattern name expected\0"
"digit expected after (?+\0"
@@ -798,7 +798,7 @@ Otherwise further processing may be required. */
#ifndef EBCDIC /* ASCII/UTF-8 coding */
/* Not alphanumeric */
else if (c < CHAR_0 || c > CHAR_z) {}
-else if ((i = escapes[c - CHAR_0]) != 0)
+else if ((i = escapes[c - CHAR_0]) != 0)
{ if (i > 0) c = (pcre_uint32)i; else escape = -i; }
#else /* EBCDIC coding */
@@ -1410,11 +1410,11 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS)
{
/* Handle specials such as (*SKIP) or (*UTF8) etc. */
- if (ptr[1] == CHAR_ASTERISK)
+ if (ptr[1] == CHAR_ASTERISK)
{
ptr += 2;
while (ptr < cd->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
- }
+ }
/* Handle a normal, unnamed capturing parenthesis. */
@@ -3091,7 +3091,7 @@ value is a character, a negative value is an escape value. */
if (*ptr == CHAR_BACKSLASH)
{
int temperrorcode = 0;
- escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options,
+ escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options,
FALSE);
if (temperrorcode != 0) return FALSE;
ptr++; /* Point after the escape sequence */
@@ -4275,7 +4275,7 @@ for (;; ptr++)
if (c == CHAR_BACKSLASH)
{
- escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,
+ escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,
TRUE);
if (*errorcodeptr != 0) goto FAILED;
if (escape == 0) c = ec;
@@ -5725,7 +5725,7 @@ for (;; ptr++)
/* ------------------------------------------------------------ */
case CHAR_LEFT_PARENTHESIS:
bravalue = OP_COND; /* Conditional group */
- tempptr = ptr;
+ tempptr = ptr;
/* A condition can be an assertion, a number (referring to a numbered
group), a name (referring to a named group), or 'R', referring to
@@ -5739,26 +5739,26 @@ for (;; ptr++)
by digits), and (b) a number could be a name that consists of digits.
In both cases, we look for a name first; if not found, we try the other
cases.
-
- For compatibility with auto-callouts, we allow a callout to be
- specified before a condition that is an assertion. First, check for the
- syntax of a callout; if found, adjust the temporary pointer that is
+
+ For compatibility with auto-callouts, we allow a callout to be
+ specified before a condition that is an assertion. First, check for the
+ syntax of a callout; if found, adjust the temporary pointer that is
used to check for an assertion condition. That's all that is needed! */
-
+
if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
{
for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
- tempptr += i + 1;
- }
+ tempptr += i + 1;
+ }
/* For conditions that are assertions, check the syntax, and then exit
the switch. This will take control down to where bracketed groups,
including assertions, are processed. */
- if (tempptr[1] == CHAR_QUESTION_MARK &&
+ if (tempptr[1] == CHAR_QUESTION_MARK &&
(tempptr[2] == CHAR_EQUALS_SIGN ||
- tempptr[2] == CHAR_EXCLAMATION_MARK ||
+ tempptr[2] == CHAR_EXCLAMATION_MARK ||
tempptr[2] == CHAR_LESS_THAN_SIGN))
break;
@@ -6901,7 +6901,7 @@ for (;; ptr++)
else
{
- if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
+ if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
cd->max_lookbehind == 0)
cd->max_lookbehind = 1;
#ifdef SUPPORT_UCP
@@ -7766,8 +7766,10 @@ pcre32_compile2(PCRE_SPTR32 pattern, int options, int *errorcodeptr,
{
REAL_PCRE *re;
int length = 1; /* For final END opcode */
-pcre_uint32 firstchar, reqchar;
pcre_int32 firstcharflags, reqcharflags;
+pcre_uint32 firstchar, reqchar;
+pcre_uint32 limit_match = PCRE_UINT32_MAX;
+pcre_uint32 limit_recursion = PCRE_UINT32_MAX;
int newline;
int errorcode = 0;
int skipatstart = 0;
@@ -7831,19 +7833,16 @@ if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
errorcode = ERR17;
goto PCRE_EARLY_ERROR_RETURN;
}
-
-/* If PCRE_NEVER_UTF is set, remember it. As this option steals a bit that is
-also used for execution options, flatten it just in case. */
-if ((options & PCRE_NEVER_UTF) != 0)
- {
- never_utf = TRUE;
- options &= ~PCRE_NEVER_UTF;
- }
+/* If PCRE_NEVER_UTF is set, remember it. */
+
+if ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE;
/* Check for global one-time settings at the start of the pattern, and remember
the offset for later. */
+cd->external_flags = 0; /* Initialize here for LIMIT_MATCH/RECURSION */
+
while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
ptr[skipatstart+1] == CHAR_ASTERISK)
{
@@ -7874,6 +7873,44 @@ PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
{ skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
+ else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_MATCH_EQ, 12) == 0)
+ {
+ pcre_uint32 c = 0;
+ int p = skipatstart + 14;
+ while (isdigit(ptr[p]))
+ {
+ if (c > PCRE_UINT32_MAX / 10 - 1) break; /* Integer overflow */
+ c = c*10 + ptr[p++] - CHAR_0;
+ }
+ if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
+ if (c < limit_match)
+ {
+ limit_match = c;
+ cd->external_flags |= PCRE_MLSET;
+ }
+ skipatstart = p;
+ continue;
+ }
+
+ else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_RECURSION_EQ, 16) == 0)
+ {
+ pcre_uint32 c = 0;
+ int p = skipatstart + 18;
+ while (isdigit(ptr[p]))
+ {
+ if (c > PCRE_UINT32_MAX / 10 - 1) break; /* Integer overflow check */
+ c = c*10 + ptr[p++] - CHAR_0;
+ }
+ if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
+ if (c < limit_recursion)
+ {
+ limit_recursion = c;
+ cd->external_flags |= PCRE_RLSET;
+ }
+ skipatstart = p;
+ continue;
+ }
+
if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
{ skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3) == 0)
@@ -7896,14 +7933,14 @@ PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */
options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr;
else break;
}
-
+
/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
utf = (options & PCRE_UTF8) != 0;
if (utf && never_utf)
{
errorcode = ERR78;
goto PCRE_EARLY_ERROR_RETURN2;
- }
+ }
/* Can't support UTF unless PCRE has been compiled to include the code. The
return of an error code from PRIV(valid_utf)() is a new feature, introduced in
@@ -8026,7 +8063,6 @@ cd->req_varyopt = 0;
cd->assert_depth = 0;
cd->max_lookbehind = 0;
cd->external_options = options;
-cd->external_flags = 0;
cd->open_caps = NULL;
/* Now do the pre-compile. On error, errorcode will be set non-zero, so we
@@ -8076,6 +8112,8 @@ re->magic_number = MAGIC_NUMBER;
re->size = (int)size;
re->options = cd->external_options;
re->flags = cd->external_flags;
+re->limit_match = limit_match;
+re->limit_recursion = limit_recursion;
re->first_char = 0;
re->req_char = 0;
re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
@@ -8085,7 +8123,9 @@ re->ref_count = 0;
re->tables = (tables == PRIV(default_tables))? NULL : tables;
re->nullpad = NULL;
#ifdef COMPILE_PCRE32
-re->dummy1 = re->dummy2 = 0;
+re->dummy = 0;
+#else
+re->dummy1 = re->dummy2 = re->dummy3 = 0;
#endif
/* The starting points of the name/number translation table and of the code are
diff --git a/pcre_exec.c b/pcre_exec.c
index 221ecf3..cc15ca3 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -6511,6 +6511,30 @@ if (extra_data != NULL
&& extra_data->executable_jit != NULL
&& (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
{
+ /* A facility for setting the match limit in the regex was added; this puts
+ a value in the compiled block. (Similarly for recursion limit, but the JIT
+ does not make use of that.) Because the regex is not passed to jit_exec, we
+ fudge up an alternative extra block, because we must not modify the extra
+ block that the user has passed. */
+
+#if defined COMPILE_PCRE8
+ pcre_extra extra_data_copy;
+#elif defined COMPILE_PCRE16
+ pcre16_extra extra_data_copy;
+#elif defined COMPILE_PCRE32
+ pcre32_extra extra_data_copy;
+#endif
+
+ if ((re->flags & PCRE_MLSET) != 0 &&
+ ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0 ||
+ re->limit_match < extra_data->match_limit))
+ {
+ extra_data_copy = *extra_data;
+ extra_data_copy.match_limit = re->limit_match;
+ extra_data_copy.flags |= PCRE_EXTRA_MATCH_LIMIT;
+ extra_data = &extra_data_copy;
+ }
+
rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
start_offset, options, offsets, offsetcount);
@@ -6540,6 +6564,8 @@ md->callout_data = NULL;
tables = re->tables;
+/* The two limit values override the defaults, whatever their value. */
+
if (extra_data != NULL)
{
register unsigned int flags = extra_data->flags;
@@ -6554,6 +6580,15 @@ if (extra_data != NULL)
if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
}
+/* Limits in the regex override only if they are smaller. */
+
+if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
+ md->match_limit = re->limit_match;
+
+if ((re->flags & PCRE_RLSET) != 0 &&
+ re->limit_recursion < md->match_limit_recursion)
+ md->match_limit_recursion = re->limit_recursion;
+
/* If the exec call supplied NULL for tables, use the inbuilt ones. This
is a feature that makes it possible to save compiled regex and re-use them
in other programs later. */
diff --git a/pcre_fullinfo.c b/pcre_fullinfo.c
index 02c9df4..7fad1ee 100644
--- a/pcre_fullinfo.c
+++ b/pcre_fullinfo.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -221,6 +221,16 @@ switch (what)
case PCRE_INFO_MAXLOOKBEHIND:
*((int *)where) = re->max_lookbehind;
break;
+
+ case PCRE_INFO_MATCHLIMIT:
+ if ((re->flags & PCRE_MLSET) == 0) return PCRE_ERROR_UNSET;
+ *((unsigned long int *)where) = re->limit_match;
+ break;
+
+ case PCRE_INFO_RECURSIONLIMIT:
+ if ((re->flags & PCRE_RLSET) == 0) return PCRE_ERROR_UNSET;
+ *((unsigned long int *)where) = re->limit_recursion;
+ break;
default: return PCRE_ERROR_BADOPTION;
}
diff --git a/pcre_internal.h b/pcre_internal.h
index 6306eb1..8fce7b0 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -194,23 +194,31 @@ preprocessor time in standard C environments. */
typedef unsigned char pcre_uint8;
#if USHRT_MAX == 65535
- typedef unsigned short pcre_uint16;
- typedef short pcre_int16;
+typedef unsigned short pcre_uint16;
+typedef short pcre_int16;
+#define PCRE_UINT16_MAX USHRT_MAX
+#define PCRE_INT16_MAX SHRT_MAX
#elif UINT_MAX == 65535
- typedef unsigned int pcre_uint16;
- typedef int pcre_int16;
+typedef unsigned int pcre_uint16;
+typedef int pcre_int16;
+#define PCRE_UINT16_MAX UINT_MAX
+#define PCRE_INT16_MAX INT_MAX
#else
-# error Cannot determine a type for 16-bit unsigned integers
+#error Cannot determine a type for 16-bit integers
#endif
-#if UINT_MAX == 4294967295
- typedef unsigned int pcre_uint32;
- typedef int pcre_int32;
-#elif ULONG_MAX == 4294967295
- typedef unsigned long int pcre_uint32;
- typedef long int pcre_int32;
+#if UINT_MAX == 4294967295U
+typedef unsigned int pcre_uint32;
+typedef int pcre_int32;
+#define PCRE_UINT32_MAX UINT_MAX
+#define PCRE_INT32_MAX INT_MAX
+#elif ULONG_MAX == 4294967295UL
+typedef unsigned long int pcre_uint32;
+typedef long int pcre_int32;
+#define PCRE_UINT32_MAX ULONG_MAX
+#define PCRE_INT32_MAX LONG_MAX
#else
-# error Cannot determine a type for 32-bit unsigned integers
+#error Cannot determine a type for 32-bit integers
#endif
/* When checking for integer overflow in pcre_compile(), we need to handle
@@ -1121,23 +1129,26 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
/* Private flags containing information about the compiled regex. They used to
-live at the top end of the options word, but that got almost full, so now they
-are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
-the restrictions on partial matching have been lifted. It remains for backwards
+live at the top end of the options word, but that got almost full, so they were
+moved to a 16-bit flags word - which got almost full, so now they are in a
+32-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as the
+restrictions on partial matching have been lifted. It remains for backwards
compatibility. */
-#define PCRE_MODE8 0x0001 /* compiled in 8 bit mode */
-#define PCRE_MODE16 0x0002 /* compiled in 16 bit mode */
-#define PCRE_MODE32 0x0004 /* compiled in 32 bit mode */
-#define PCRE_FIRSTSET 0x0010 /* first_char is set */
-#define PCRE_FCH_CASELESS 0x0020 /* caseless first char */
-#define PCRE_REQCHSET 0x0040 /* req_byte is set */
-#define PCRE_RCH_CASELESS 0x0080 /* caseless requested char */
-#define PCRE_STARTLINE 0x0100 /* start after \n for multiline */
-#define PCRE_NOPARTIAL 0x0200 /* can't use partial with this regex */
-#define PCRE_JCHANGED 0x0400 /* j option used in regex */
-#define PCRE_HASCRORLF 0x0800 /* explicit \r or \n in pattern */
-#define PCRE_HASTHEN 0x1000 /* pattern contains (*THEN) */
+#define PCRE_MODE8 0x00000001 /* compiled in 8 bit mode */
+#define PCRE_MODE16 0x00000002 /* compiled in 16 bit mode */
+#define PCRE_MODE32 0x00000004 /* compiled in 32 bit mode */
+#define PCRE_FIRSTSET 0x00000010 /* first_char is set */
+#define PCRE_FCH_CASELESS 0x00000020 /* caseless first char */
+#define PCRE_REQCHSET 0x00000040 /* req_byte is set */
+#define PCRE_RCH_CASELESS 0x00000080 /* caseless requested char */
+#define PCRE_STARTLINE 0x00000100 /* start after \n for multiline */
+#define PCRE_NOPARTIAL 0x00000200 /* can't use partial with this regex */
+#define PCRE_JCHANGED 0x00000400 /* j option used in regex */
+#define PCRE_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */
+#define PCRE_HASTHEN 0x00001000 /* pattern contains (*THEN) */
+#define PCRE_MLSET 0x00002000 /* match limit set by regex */
+#define PCRE_RLSET 0x00004000 /* recursion limit set by regex */
#if defined COMPILE_PCRE8
#define PCRE_MODE PCRE_MODE8
@@ -1534,6 +1545,8 @@ a positive value. */
#define STRING_UTF_RIGHTPAR "UTF)"
#define STRING_UCP_RIGHTPAR "UCP)"
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
+#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
+#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
#else /* SUPPORT_UTF */
@@ -1795,6 +1808,8 @@ only. */
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
+#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
+#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
#endif /* SUPPORT_UTF */
@@ -2281,48 +2296,49 @@ enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
code vector run on as long as necessary after the end. We store an explicit
offset to the name table so that if a regex is compiled on one host, saved, and
then run on another where the size of pointers is different, all might still
-be well. For the case of compiled-on-4 and run-on-8, we include an extra
-pointer that is always NULL. For future-proofing, a few dummy fields were
-originally included - even though you can never get this planning right - but
-there is only one left now.
-
-NOTE NOTE NOTE:
-Because people can now save and re-use compiled patterns, any additions to this
-structure should be made at the end, and something earlier (e.g. a new
-flag in the options or one of the dummy fields) should indicate that the new
-fields are present. Currently PCRE always sets the dummy fields to zero.
-NOTE NOTE NOTE
+be well.
+
+The size of the structure must be a multiple of 8 bytes. For the case of
+compiled-on-4 and run-on-8, we include an extra pointer that is always NULL so
+that there are an even number of pointers which therefore are a multiple of 8
+bytes.
+
+It is necessary to fork the struct for the 32 bit library, since it needs to
+use pcre_uint32 for first_char and req_char. We can't put an ifdef inside the
+typedef because pcretest needs access to the struct of the 8-, 16- and 32-bit
+variants.
+
+*** WARNING ***
+When new fields are added to these structures, remember to adjust the code in
+pcre_byte_order.c that is concerned with swapping the byte order of the fields
+when a compiled regex is reloaded on a host with different endianness.
+*** WARNING ***
+There is also similar byte-flipping code in pcretest.c, which is used for
+testing the byte-flipping features. It must also be kept in step.
+*** WARNING ***
*/
-#if defined COMPILE_PCRE8
-#define REAL_PCRE real_pcre
-#elif defined COMPILE_PCRE16
-#define REAL_PCRE real_pcre16
-#elif defined COMPILE_PCRE32
-#define REAL_PCRE real_pcre32
-#endif
-
-/* It is necessary to fork the struct for 32 bit, since it needs to use
- * pcre_uchar for first_char and req_char. Can't put an ifdef inside the
- * typedef since pcretest needs access to the struct of the 8-, 16-
- * and 32-bit variants. */
-
typedef struct real_pcre8_or_16 {
pcre_uint32 magic_number;
pcre_uint32 size; /* Total that was malloced */
pcre_uint32 options; /* Public options */
- pcre_uint16 flags; /* Private flags */
+ pcre_uint32 flags; /* Private flags */
+ pcre_uint32 limit_match; /* Limit set from regex */
+ pcre_uint32 limit_recursion; /* Limit set from regex */
+ pcre_uint16 first_char; /* Starting character */
+ pcre_uint16 req_char; /* This character must be seen */
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */
pcre_uint16 top_bracket; /* Highest numbered group */
pcre_uint16 top_backref; /* Highest numbered back reference */
- pcre_uint16 first_char; /* Starting character */
- pcre_uint16 req_char; /* This character must be seen */
pcre_uint16 name_table_offset; /* Offset to name table that follows */
pcre_uint16 name_entry_size; /* Size of any name items */
pcre_uint16 name_count; /* Number of name items */
pcre_uint16 ref_count; /* Reference count */
+ pcre_uint16 dummy1; /* To ensure size is a multiple of 8 */
+ pcre_uint16 dummy2; /* To ensure size is a multiple of 8 */
+ pcre_uint16 dummy3; /* To ensure size is a multiple of 8 */
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */
- const pcre_uint8 *nullpad; /* NULL padding */
+ void *nullpad; /* NULL padding */
} real_pcre8_or_16;
typedef struct real_pcre8_or_16 real_pcre;
@@ -2332,22 +2348,31 @@ typedef struct real_pcre32 {
pcre_uint32 magic_number;
pcre_uint32 size; /* Total that was malloced */
pcre_uint32 options; /* Public options */
- pcre_uint16 flags; /* Private flags */
+ pcre_uint32 flags; /* Private flags */
+ pcre_uint32 limit_match; /* Limit set from regex */
+ pcre_uint32 limit_recursion; /* Limit set from regex */
+ pcre_uint32 first_char; /* Starting character */
+ pcre_uint32 req_char; /* This character must be seen */
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */
pcre_uint16 top_bracket; /* Highest numbered group */
pcre_uint16 top_backref; /* Highest numbered back reference */
- pcre_uint32 first_char; /* Starting character */
- pcre_uint32 req_char; /* This character must be seen */
pcre_uint16 name_table_offset; /* Offset to name table that follows */
pcre_uint16 name_entry_size; /* Size of any name items */
pcre_uint16 name_count; /* Number of name items */
pcre_uint16 ref_count; /* Reference count */
- pcre_uint16 dummy1; /* for later expansion */
- pcre_uint16 dummy2; /* for later expansion */
+ pcre_uint16 dummy; /* To ensure size is a multiple of 8 */
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */
- void *nullpad; /* for later expansion */
+ void *nullpad; /* NULL padding */
} real_pcre32;
+#if defined COMPILE_PCRE8
+#define REAL_PCRE real_pcre
+#elif defined COMPILE_PCRE16
+#define REAL_PCRE real_pcre16
+#elif defined COMPILE_PCRE32
+#define REAL_PCRE real_pcre32
+#endif
+
/* Assert that the size of REAL_PCRE is divisible by 8 */
typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1];
@@ -2399,14 +2424,14 @@ typedef struct compile_data {
int names_found; /* Number of entries so far */
int name_entry_size; /* Size of each entry */
int workspace_size; /* Size of workspace */
- unsigned int bracount; /* Count of capturing parens as we compile */
+ unsigned int bracount; /* Count of capturing parens as we compile */
int final_bracount; /* Saved value after first pass */
int max_lookbehind; /* Maximum lookbehind (characters) */
int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */
int assert_depth; /* Depth of nested assertions */
- int external_options; /* External (initial) options */
- int external_flags; /* External flag bits to be set */
+ pcre_uint32 external_options; /* External (initial) options */
+ pcre_uint32 external_flags; /* External flag bits to be set */
int req_varyopt; /* "After variable item" flag for reqbyte */
BOOL had_accept; /* (*ACCEPT) encountered */
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
diff --git a/pcretest.c b/pcretest.c
index 8ebd471..f464167 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -2395,7 +2395,7 @@ else
rc = PCRE_ERROR_BADMODE;
#endif
-if (rc < 0)
+if (rc < 0 && rc != PCRE_ERROR_UNSET)
{
fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
@@ -2471,14 +2471,18 @@ BOOL utf16_char = FALSE;
re->magic_number = REVERSED_MAGIC_NUMBER;
re->size = swap_uint32(re->size);
re->options = swap_uint32(re->options);
-re->flags = swap_uint16(re->flags);
-re->top_bracket = swap_uint16(re->top_bracket);
-re->top_backref = swap_uint16(re->top_backref);
+re->flags = swap_uint32(re->flags);
+re->limit_match = swap_uint32(re->limit_match);
+re->limit_recursion = swap_uint32(re->limit_recursion);
re->first_char = swap_uint16(re->first_char);
re->req_char = swap_uint16(re->req_char);
+re->max_lookbehind = swap_uint16(re->max_lookbehind);
+re->top_bracket = swap_uint16(re->top_bracket);
+re->top_backref = swap_uint16(re->top_backref);
re->name_table_offset = swap_uint16(re->name_table_offset);
re->name_entry_size = swap_uint16(re->name_entry_size);
re->name_count = swap_uint16(re->name_count);
+re->ref_count = swap_uint16(re->ref_count);
if (extra != NULL)
{
@@ -2648,14 +2652,18 @@ int length = re->name_count * re->name_entry_size;
re->magic_number = REVERSED_MAGIC_NUMBER;
re->size = swap_uint32(re->size);
re->options = swap_uint32(re->options);
-re->flags = swap_uint16(re->flags);
-re->top_bracket = swap_uint16(re->top_bracket);
-re->top_backref = swap_uint16(re->top_backref);
+re->flags = swap_uint32(re->flags);
+re->limit_match = swap_uint32(re->limit_match);
+re->limit_recursion = swap_uint32(re->limit_recursion);
re->first_char = swap_uint32(re->first_char);
re->req_char = swap_uint32(re->req_char);
+re->max_lookbehind = swap_uint16(re->max_lookbehind);
+re->top_bracket = swap_uint16(re->top_bracket);
+re->top_backref = swap_uint16(re->top_backref);
re->name_table_offset = swap_uint16(re->name_table_offset);
re->name_entry_size = swap_uint16(re->name_entry_size);
re->name_count = swap_uint16(re->name_count);
+re->ref_count = swap_uint16(re->ref_count);
if (extra != NULL)
{
@@ -3525,11 +3533,11 @@ while (!done)
PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
if (rc == PCRE_ERROR_BADMODE)
{
- pcre_uint16 flags_in_host_byte_order;
+ pcre_uint32 flags_in_host_byte_order;
if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
else
- flags_in_host_byte_order = swap_uint16(REAL_PCRE_FLAGS(re));
+ flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
/* Simulate the result of the function call below. */
fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
@@ -4010,6 +4018,7 @@ while (!done)
{
unsigned long int all_options;
pcre_uint32 first_char, need_char;
+ pcre_uint32 match_limit, recursion_limit;
int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
hascrorlf, maxlookbehind;
int nameentrysize, namecount;
@@ -4037,9 +4046,19 @@ while (!done)
(int)size, (int)regex_gotten_store);
fprintf(outfile, "Capturing subpattern count = %d\n", count);
+
if (backrefmax > 0)
fprintf(outfile, "Max back reference = %d\n", backrefmax);
+ if (maxlookbehind > 0)
+ fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
+
+ if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0)
+ fprintf(outfile, "Match limit = %u\n", match_limit);
+
+ if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0)
+ fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
+
if (namecount > 0)
{
fprintf(outfile, "Named capturing subpatterns:\n");
@@ -4073,7 +4092,7 @@ while (!done)
if (do_flip) all_options = swap_uint32(all_options);
if (get_options == 0) fprintf(outfile, "No options\n");
- else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
@@ -4090,7 +4109,8 @@ while (!done)
((get_options & PCRE_UCP) != 0)? " ucp" : "",
((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
- ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
+ ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "",
+ ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : "");
if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
@@ -4164,9 +4184,6 @@ while (!done)
}
}
- if (maxlookbehind > 0)
- fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
-
/* Don't output study size; at present it is in any case a fixed
value, but it varies, depending on the computer architecture, and
so messes up the test suite. (And with the /F option, it might be
diff --git a/testdata/saved16 b/testdata/saved16
index ff5b11d..583c434 100644
--- a/testdata/saved16
+++ b/testdata/saved16
Binary files differ
diff --git a/testdata/saved16BE-1 b/testdata/saved16BE-1
index 297f2f2..e2e807d 100644
--- a/testdata/saved16BE-1
+++ b/testdata/saved16BE-1
Binary files differ
diff --git a/testdata/saved16BE-2 b/testdata/saved16BE-2
index dade400..cc2718a 100644
--- a/testdata/saved16BE-2
+++ b/testdata/saved16BE-2
Binary files differ
diff --git a/testdata/saved16LE-1 b/testdata/saved16LE-1
index deb4491..b037d49 100644
--- a/testdata/saved16LE-1
+++ b/testdata/saved16LE-1
Binary files differ
diff --git a/testdata/saved16LE-2 b/testdata/saved16LE-2
index c8be985..d7034f7 100644
--- a/testdata/saved16LE-2
+++ b/testdata/saved16LE-2
Binary files differ
diff --git a/testdata/saved32 b/testdata/saved32
index 255235d..5b6fe34 100644
--- a/testdata/saved32
+++ b/testdata/saved32
Binary files differ
diff --git a/testdata/saved32BE-1 b/testdata/saved32BE-1
index 42af7b4..ebe62ca 100644
--- a/testdata/saved32BE-1
+++ b/testdata/saved32BE-1
Binary files differ
diff --git a/testdata/saved32BE-2 b/testdata/saved32BE-2
index 68a896d..8168343 100644
--- a/testdata/saved32BE-2
+++ b/testdata/saved32BE-2
Binary files differ
diff --git a/testdata/saved32LE-1 b/testdata/saved32LE-1
index a4044fd..e008f3a 100644
--- a/testdata/saved32LE-1
+++ b/testdata/saved32LE-1
Binary files differ
diff --git a/testdata/saved32LE-2 b/testdata/saved32LE-2
index 8b35ffa..cf3bd73 100644
--- a/testdata/saved32LE-2
+++ b/testdata/saved32LE-2
Binary files differ
diff --git a/testdata/saved8 b/testdata/saved8
index 9b63b1d..37d733e 100644
--- a/testdata/saved8
+++ b/testdata/saved8
Binary files differ
diff --git a/testdata/testinput2 b/testdata/testinput2
index 972ecf3..6c82fb5 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -3802,4 +3802,39 @@ backtracking verbs. --/
/-------------------------/
+/(*LIMIT_MATCH=12bc)abc/
+
+/(*LIMIT_MATCH=4294967290)abc/
+
+/(*LIMIT_RECURSION=4294967280)abc/I
+
+/(a+)*zz/
+ aaaaaaaaaaaaaz
+ aaaaaaaaaaaaaz\q3000
+
+/(a+)*zz/S-
+ aaaaaaaaaaaaaz\Q10
+
+/(*LIMIT_MATCH=3000)(a+)*zz/I
+ aaaaaaaaaaaaaz
+ aaaaaaaaaaaaaz\q60000
+
+/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
+ aaaaaaaaaaaaaz
+
+/(*LIMIT_MATCH=60000)(a+)*zz/I
+ aaaaaaaaaaaaaz
+ aaaaaaaaaaaaaz\q3000
+
+/(*LIMIT_RECURSION=10)(a+)*zz/IS-
+ aaaaaaaaaaaaaz
+ aaaaaaaaaaaaaz\Q1000
+
+/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/IS-
+ aaaaaaaaaaaaaz
+
+/(*LIMIT_RECURSION=1000)(a+)*zz/IS-
+ aaaaaaaaaaaaaz
+ aaaaaaaaaaaaaz\Q10
+
/-- End of testinput2 --/
diff --git a/testdata/testinput21 b/testdata/testinput21
index 0f201ad..b285d63 100644
--- a/testdata/testinput21
+++ b/testdata/testinput21
@@ -1,9 +1,15 @@
-/-- Tests for reloading pre-compile patterns. The first one gives an error
-right away. The others require the linke size to be 2. */
+/-- Tests for reloading pre-compiled patterns. The first one gives an error
+right away, and can be any old pattern compiled in 8-bit mode ("abc" is
+typical). The others require the link size to be 2. */x
<!testsaved8
-/-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$ --/
+%-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$
+ In 16-bit mode with options: S>testdata/saved16LE-1
+ FS>testdata/saved16BE-1
+ In 32-bit mode with options: S>testdata/saved32LE-1
+ FS>testdata/saved32BE-1
+--%x
<!testsaved16LE-1
diff --git a/testdata/testinput22 b/testdata/testinput22
index 46a1365..58239f1 100644
--- a/testdata/testinput22
+++ b/testdata/testinput22
@@ -1,6 +1,11 @@
/-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */
-/-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) --/8
+%-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az])
+ In 16-bit mode with options: S8>testdata/saved16LE-1
+ FS8>testdata/saved16BE-1
+ In 32-bit mode with options: S8>testdata/saved32LE-1
+ FS8testdata/saved32BE-1
+--%8x
<!testsaved16LE-2
diff --git a/testdata/testoutput18-16 b/testdata/testoutput18-16
index e91d841..3414a66 100644
--- a/testdata/testoutput18-16
+++ b/testdata/testoutput18-16
@@ -646,7 +646,7 @@ Need char = \x{de34}
0: \x{11234}
/(*UTF-32)\x{11234}/
-Failed: (*VERB) not recognized at offset 5
+Failed: (*VERB) not recognized or malformed at offset 5
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
Capturing subpattern count = 0
@@ -656,7 +656,7 @@ First char = 'a'
Need char = 'b'
/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
-Failed: (*VERB) not recognized at offset 12
+Failed: (*VERB) not recognized or malformed at offset 12
/\h/SI8
Capturing subpattern count = 0
diff --git a/testdata/testoutput18-32 b/testdata/testoutput18-32
index 1dba7bc..e917e3b 100644
--- a/testdata/testoutput18-32
+++ b/testdata/testoutput18-32
@@ -632,7 +632,7 @@ Error -10 (bad UTF-32 string) offset=0 reason=3
Error -10 (bad UTF-32 string) offset=0 reason=1
/(*UTF16)\x{11234}/
-Failed: (*VERB) not recognized at offset 5
+Failed: (*VERB) not recognized or malformed at offset 5
/(*UTF)\x{11234}/I
Capturing subpattern count = 0
@@ -643,10 +643,10 @@ No need char
0: \x{11234}
/(*UTF-32)\x{11234}/
-Failed: (*VERB) not recognized at offset 5
+Failed: (*VERB) not recognized or malformed at offset 5
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
-Failed: (*VERB) not recognized at offset 12
+Failed: (*VERB) not recognized or malformed at offset 12
/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
Capturing subpattern count = 0
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index de64502..fd958c2 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -448,10 +448,10 @@ Need char = '='
/(?<!bar|cattle)foo/I
Capturing subpattern count = 0
+Max lookbehind = 6
No options
First char = 'f'
Need char = 'o'
-Max lookbehind = 6
foo
0: foo
catfoo
@@ -631,10 +631,10 @@ No match
/\Aabc/Im
Capturing subpattern count = 0
+Max lookbehind = 1
Options: anchored multiline
No first char
No need char
-Max lookbehind = 1
/^abc/Im
Capturing subpattern count = 0
@@ -657,19 +657,19 @@ No need char
/(?<=foo)[ab]/IS
Capturing subpattern count = 0
+Max lookbehind = 3
No options
No first char
No need char
-Max lookbehind = 3
Subject length lower bound = 1
Starting byte set: a b
/(?<!foo)(alpha|omega)/IS
Capturing subpattern count = 1
+Max lookbehind = 3
No options
No first char
Need char = 'a'
-Max lookbehind = 3
Subject length lower bound = 5
Starting byte set: a o
@@ -683,11 +683,11 @@ Starting byte set: a b
/(?<=foo\n)^bar/Im
Capturing subpattern count = 0
+Max lookbehind = 4
Contains explicit CR or LF match
Options: multiline
No first char
Need char = 'r'
-Max lookbehind = 4
foo\nbarbar
0: bar
***Failers
@@ -701,11 +701,11 @@ No match
/^(?<=foo\n)bar/Im
Capturing subpattern count = 0
+Max lookbehind = 4
Contains explicit CR or LF match
Options: multiline
First char at start or follows newline
Need char = 'r'
-Max lookbehind = 4
foo\nbarbar
0: bar
***Failers
@@ -744,10 +744,10 @@ Failed: lookbehind assertion is not fixed length at offset 13
/(?<=bullock|donkey)-cart/I
Capturing subpattern count = 0
+Max lookbehind = 7
No options
First char = '-'
Need char = 't'
-Max lookbehind = 7
the bullock-cart
0: -cart
a donkey-cart race
@@ -761,17 +761,17 @@ No match
/(?<=ab(?i)x|y|z)/I
Capturing subpattern count = 0
+Max lookbehind = 3
No options
No first char
No need char
-Max lookbehind = 3
/(?>.*)(?<=(abcd)|(xyz))/I
Capturing subpattern count = 2
+Max lookbehind = 4
No options
No first char
No need char
-Max lookbehind = 4
alphabetabcd
0: alphabetabcd
1: abcd
@@ -782,10 +782,10 @@ Max lookbehind = 4
/(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ/I
Capturing subpattern count = 0
+Max lookbehind = 4
No options
First char = 'Z'
Need char = 'Z'
-Max lookbehind = 4
abxyZZ
0: ZZ
abXyZZ
@@ -811,10 +811,10 @@ No match
/(?<!(foo)a)bar/I
Capturing subpattern count = 1
+Max lookbehind = 4
No options
First char = 'b'
Need char = 'r'
-Max lookbehind = 4
bar
0: bar
foobbar
@@ -1197,10 +1197,10 @@ No need char
/\Biss\B/I+
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = 'i'
Need char = 's'
-Max lookbehind = 1
Mississippi
0: iss
0+ issippi
@@ -1218,20 +1218,20 @@ Need char = 's'
/\Biss\B/IG+
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = 'i'
Need char = 's'
-Max lookbehind = 1
Mississippi
0: iss
0+ issippi
/\Biss\B/Ig+
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = 'i'
Need char = 's'
-Max lookbehind = 1
Mississippi
0: iss
0+ issippi
@@ -1244,10 +1244,10 @@ No match
/(?<=[Ms])iss/Ig+
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = 'i'
Need char = 's'
-Max lookbehind = 1
Mississippi
0: iss
0+ issippi
@@ -1256,10 +1256,10 @@ Max lookbehind = 1
/(?<=[Ms])iss/IG+
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = 'i'
Need char = 's'
-Max lookbehind = 1
Mississippi
0: iss
0+ issippi
@@ -1437,10 +1437,10 @@ Need char = 'r'
/...(?<=abc)/I
Capturing subpattern count = 0
+Max lookbehind = 3
No options
No first char
No need char
-Max lookbehind = 3
/abc(?!pqr)/I
Capturing subpattern count = 0
@@ -3218,10 +3218,10 @@ No match
End
------------------------------------------------------------------
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = '8'
Need char = 'X'
-Max lookbehind = 1
|\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|IDZ
------------------------------------------------------------------
@@ -3232,10 +3232,10 @@ Max lookbehind = 1
End
------------------------------------------------------------------
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = '$'
Need char = 'X'
-Max lookbehind = 1
/(.*)\d+\1/I
Capturing subpattern count = 1
@@ -3748,10 +3748,10 @@ Callout 2: last capture = -1
/(?<=(abc)(?C))xyz/I
Capturing subpattern count = 1
+Max lookbehind = 3
No options
First char = 'x'
Need char = 'z'
-Max lookbehind = 3
abcxyz\C+
Callout 0: last capture = 1
0: <unset>
@@ -5396,19 +5396,19 @@ Need char = '3'
/\b.*/I
Capturing subpattern count = 0
+Max lookbehind = 1
No options
No first char
No need char
-Max lookbehind = 1
ab cd\>1
0: cd
/\b.*/Is
Capturing subpattern count = 0
+Max lookbehind = 1
Options: dotall
No first char
No need char
-Max lookbehind = 1
ab cd\>1
0: cd
@@ -8788,7 +8788,7 @@ No match
1: \x0a
/a(*CR)b/
-Failed: (*VERB) not recognized at offset 5
+Failed: (*VERB) not recognized or malformed at offset 5
/(*CR)a.b/
a\nb
@@ -11627,19 +11627,19 @@ No match
/\btype\b\W*?\btext\b\W*?\bjavascript\b/IS
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = 't'
Need char = 't'
-Max lookbehind = 1
Subject length lower bound = 18
No set of starting bytes
/\btype\b\W*?\btext\b\W*?\bjavascript\b|\burl\b\W*?\bshell:|<input\b.*?\btype\b\W*?\bimage\b|\bonkeyup\b\W*?\=/IS
Capturing subpattern count = 0
+Max lookbehind = 1
No options
No first char
No need char
-Max lookbehind = 1
Subject length lower bound = 8
Starting byte set: < o t u
@@ -12236,17 +12236,17 @@ settings of the anchored and startline bits. --/
/(?>.*?a)(?<=ba)/I
Capturing subpattern count = 0
+Max lookbehind = 2
No options
No first char
Need char = 'a'
-Max lookbehind = 2
/(?:.*?a)(?<=ba)/I
Capturing subpattern count = 0
+Max lookbehind = 2
No options
First char at start or follows newline
Need char = 'a'
-Max lookbehind = 2
/.*?a(*PRUNE)b/I
Capturing subpattern count = 0
@@ -12292,17 +12292,17 @@ No need char
/(?>.*?)(?<=(abcd)|(wxyz))/I
Capturing subpattern count = 2
+Max lookbehind = 4
No options
No first char
No need char
-Max lookbehind = 4
/(?>.*)(?<=(abcd)|(wxyz))/I
Capturing subpattern count = 2
+Max lookbehind = 4
No options
No first char
No need char
-Max lookbehind = 4
"(?>.*)foo"I
Capturing subpattern count = 0
@@ -12574,4 +12574,95 @@ No match
/-------------------------/
+/(*LIMIT_MATCH=12bc)abc/
+Failed: (*VERB) not recognized or malformed at offset 7
+
+/(*LIMIT_MATCH=4294967290)abc/
+Failed: (*VERB) not recognized or malformed at offset 7
+
+/(*LIMIT_RECURSION=4294967280)abc/I
+Capturing subpattern count = 0
+Recursion limit = 4294967280
+No options
+First char = 'a'
+Need char = 'c'
+
+/(a+)*zz/
+ aaaaaaaaaaaaaz
+No match
+ aaaaaaaaaaaaaz\q3000
+Error -8 (match limit exceeded)
+
+/(a+)*zz/S-
+ aaaaaaaaaaaaaz\Q10
+Error -21 (recursion limit exceeded)
+
+/(*LIMIT_MATCH=3000)(a+)*zz/I
+Capturing subpattern count = 1
+Match limit = 3000
+No options
+No first char
+Need char = 'z'
+ aaaaaaaaaaaaaz
+Error -8 (match limit exceeded)
+ aaaaaaaaaaaaaz\q60000
+Error -8 (match limit exceeded)
+
+/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
+Capturing subpattern count = 1
+Match limit = 3000
+No options
+No first char
+Need char = 'z'
+ aaaaaaaaaaaaaz
+Error -8 (match limit exceeded)
+
+/(*LIMIT_MATCH=60000)(a+)*zz/I
+Capturing subpattern count = 1
+Match limit = 60000
+No options
+No first char
+Need char = 'z'
+ aaaaaaaaaaaaaz
+No match
+ aaaaaaaaaaaaaz\q3000
+Error -8 (match limit exceeded)
+
+/(*LIMIT_RECURSION=10)(a+)*zz/IS-
+Capturing subpattern count = 1
+Recursion limit = 10
+No options
+No first char
+Need char = 'z'
+Subject length lower bound = 2
+Starting byte set: a z
+ aaaaaaaaaaaaaz
+Error -21 (recursion limit exceeded)
+ aaaaaaaaaaaaaz\Q1000
+Error -21 (recursion limit exceeded)
+
+/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/IS-
+Capturing subpattern count = 1
+Recursion limit = 10
+No options
+No first char
+Need char = 'z'
+Subject length lower bound = 2
+Starting byte set: a z
+ aaaaaaaaaaaaaz
+Error -21 (recursion limit exceeded)
+
+/(*LIMIT_RECURSION=1000)(a+)*zz/IS-
+Capturing subpattern count = 1
+Recursion limit = 1000
+No options
+No first char
+Need char = 'z'
+Subject length lower bound = 2
+Starting byte set: a z
+ aaaaaaaaaaaaaz
+No match
+ aaaaaaaaaaaaaz\Q10
+Error -21 (recursion limit exceeded)
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput21-16 b/testdata/testoutput21-16
index 0510798..e831888 100644
--- a/testdata/testoutput21-16
+++ b/testdata/testoutput21-16
@@ -1,5 +1,6 @@
-/-- Tests for reloading pre-compile patterns. The first one gives an error
-right away. The others require the linke size to be 2. */
+/-- Tests for reloading pre-compiled patterns. The first one gives an error
+right away, and can be any old pattern compiled in 8-bit mode ("abc" is
+typical). The others require the link size to be 2. */x
<!testsaved8
Compiled pattern loaded from testsaved8
@@ -7,7 +8,12 @@ No study data
Error -28 from pcre16_fullinfo(0)
Running in 16-bit mode but pattern was compiled in 8-bit mode
-/-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$ --/
+%-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$
+ In 16-bit mode with options: S>testdata/saved16LE-1
+ FS>testdata/saved16BE-1
+ In 32-bit mode with options: S>testdata/saved32LE-1
+ FS>testdata/saved32BE-1
+--%x
<!testsaved16LE-1
Compiled pattern loaded from testsaved16LE-1
diff --git a/testdata/testoutput21-32 b/testdata/testoutput21-32
index 47242f0..c6e8f6c 100644
--- a/testdata/testoutput21-32
+++ b/testdata/testoutput21-32
@@ -1,5 +1,6 @@
-/-- Tests for reloading pre-compile patterns. The first one gives an error
-right away. The others require the linke size to be 2. */
+/-- Tests for reloading pre-compiled patterns. The first one gives an error
+right away, and can be any old pattern compiled in 8-bit mode ("abc" is
+typical). The others require the link size to be 2. */x
<!testsaved8
Compiled pattern loaded from testsaved8
@@ -7,7 +8,12 @@ No study data
Error -28 from pcre32_fullinfo(0)
Running in 32-bit mode but pattern was compiled in 8-bit mode
-/-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$ --/
+%-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$
+ In 16-bit mode with options: S>testdata/saved16LE-1
+ FS>testdata/saved16BE-1
+ In 32-bit mode with options: S>testdata/saved32LE-1
+ FS>testdata/saved32BE-1
+--%x
<!testsaved16LE-1
Compiled pattern loaded from testsaved16LE-1
diff --git a/testdata/testoutput22-16 b/testdata/testoutput22-16
index 20ef6b8..b2c673d 100644
--- a/testdata/testoutput22-16
+++ b/testdata/testoutput22-16
@@ -1,6 +1,11 @@
/-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */
-/-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) --/8
+%-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az])
+ In 16-bit mode with options: S8>testdata/saved16LE-1
+ FS8>testdata/saved16BE-1
+ In 32-bit mode with options: S8>testdata/saved32LE-1
+ FS8testdata/saved32BE-1
+--%8x
<!testsaved16LE-2
Compiled pattern loaded from testsaved16LE-2
diff --git a/testdata/testoutput22-32 b/testdata/testoutput22-32
index 7fa69b0..5a1d0da 100644
--- a/testdata/testoutput22-32
+++ b/testdata/testoutput22-32
@@ -1,6 +1,11 @@
/-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */
-/-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) --/8
+%-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az])
+ In 16-bit mode with options: S8>testdata/saved16LE-1
+ FS8>testdata/saved16BE-1
+ In 32-bit mode with options: S8>testdata/saved32LE-1
+ FS8testdata/saved32BE-1
+--%8x
<!testsaved16LE-2
Compiled pattern loaded from testsaved16LE-2
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index 0e84054..d583119 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -1818,10 +1818,10 @@ Partial match: \x{0d}\x{0d}
/(?<=\x{1234}\x{1234})\bxy/I8
Capturing subpattern count = 0
+Max lookbehind = 2
Options: utf
First char = 'x'
Need char = 'y'
-Max lookbehind = 2
/(?<!^)ETA/8
ETA