summaryrefslogtreecommitdiff
path: root/ext/pcre/pcre2lib/pcre2_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/pcre2lib/pcre2_internal.h')
-rw-r--r--ext/pcre/pcre2lib/pcre2_internal.h315
1 files changed, 187 insertions, 128 deletions
diff --git a/ext/pcre/pcre2lib/pcre2_internal.h b/ext/pcre/pcre2lib/pcre2_internal.h
index 8750f2f174..d8fad1e93b 100644
--- a/ext/pcre/pcre2lib/pcre2_internal.h
+++ b/ext/pcre/pcre2lib/pcre2_internal.h
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2018 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -76,6 +76,17 @@ typedef int BOOL;
#include <valgrind/memcheck.h>
#endif
+/* -ftrivial-auto-var-init support supports initializing all local variables
+to avoid some classes of bug, but this can cause an unacceptable slowdown
+for large on-stack arrays in hot functions. This macro lets us annotate
+such arrays. */
+
+#ifdef HAVE_ATTRIBUTE_UNINITIALIZED
+#define PCRE2_KEEP_UNINITIALIZED __attribute__((uninitialized))
+#else
+#define PCRE2_KEEP_UNINITIALIZED
+#endif
+
/* Older versions of MSVC lack snprintf(). This define allows for
warning/error-free compilation and testing with MSVC compilers back to at least
MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
@@ -148,16 +159,7 @@ pcre2_match() because of the way it backtracks. */
/* When checking for integer overflow in pcre2_compile(), we need to handle
large integers. If a 64-bit integer type is available, we can use that.
Otherwise we have to cast to double, which of course requires floating point
-arithmetic. Handle this by defining a macro for the appropriate type. If
-stdint.h is available, include it; it may define INT64_MAX. Systems that do not
-have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
-by "configure". */
-
-#if defined HAVE_STDINT_H
-#include <stdint.h>
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>
-#endif
+arithmetic. Handle this by defining a macro for the appropriate type. */
#if defined INT64_MAX || defined int64_t
#define INT64_OR_DOUBLE int64_t
@@ -526,6 +528,7 @@ bytes in a code unit in that mode. */
#define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */
#define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */
#define PCRE2_HASBKC 0x00400000 /* contains \C */
+#define PCRE2_HASACCEPT 0x00800000 /* contains (*ACCEPT) */
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
@@ -535,18 +538,23 @@ enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */
PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */
PCRE2_MATCHEDBY_JIT }; /* pcre2_jit_match() */
+/* Values for the flags field in a match data block. */
+
+#define PCRE2_MD_COPIED_SUBJECT 0x01u
+
/* Magic number to provide a small check against being handed junk. */
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
/* The maximum remaining length of subject we are prepared to search for a
-req_unit match. In 8-bit mode, memchr() is used and is much faster than the
-search loop that has to be used in 16-bit and 32-bit modes. */
+req_unit match from an anchored pattern. In 8-bit mode, memchr() is used and is
+much faster than the search loop that has to be used in 16-bit and 32-bit
+modes. */
#if PCRE2_CODE_UNIT_WIDTH == 8
-#define REQ_CU_MAX 2000
+#define REQ_CU_MAX 5000
#else
-#define REQ_CU_MAX 1000
+#define REQ_CU_MAX 2000
#endif
/* Offsets for the bitmap tables in the cbits set of tables. Each table
@@ -569,11 +577,11 @@ these tables. */
without checking pcre2_jit_compile.c, which has an assertion to ensure that
ctype_word has the value 16. */
-#define ctype_space 0x01
-#define ctype_letter 0x02
-#define ctype_digit 0x04
-#define ctype_xdigit 0x08 /* not actually used any more */
-#define ctype_word 0x10 /* alphanumeric or '_' */
+#define ctype_space 0x01
+#define ctype_letter 0x02
+#define ctype_lcletter 0x04
+#define ctype_digit 0x08
+#define ctype_word 0x10 /* alphanumeric or '_' */
/* Offsets of the various tables from the base tables pointer, and
total length of the tables. */
@@ -582,7 +590,7 @@ total length of the tables. */
#define fcc_offset 256 /* Flip case */
#define cbits_offset 512 /* Character classes */
#define ctypes_offset (cbits_offset + cbit_length) /* Character types */
-#define tables_length (ctypes_offset + 256)
+#define TABLES_LENGTH (ctypes_offset + 256)
/* -------------------- Character and string names ------------------------ */
@@ -874,34 +882,52 @@ a positive value. */
#define STR_RIGHT_CURLY_BRACKET "}"
#define STR_TILDE "~"
-#define STRING_ACCEPT0 "ACCEPT\0"
-#define STRING_COMMIT0 "COMMIT\0"
-#define STRING_F0 "F\0"
-#define STRING_FAIL0 "FAIL\0"
-#define STRING_MARK0 "MARK\0"
-#define STRING_PRUNE0 "PRUNE\0"
-#define STRING_SKIP0 "SKIP\0"
-#define STRING_THEN "THEN"
-
-#define STRING_alpha0 "alpha\0"
-#define STRING_lower0 "lower\0"
-#define STRING_upper0 "upper\0"
-#define STRING_alnum0 "alnum\0"
-#define STRING_ascii0 "ascii\0"
-#define STRING_blank0 "blank\0"
-#define STRING_cntrl0 "cntrl\0"
-#define STRING_digit0 "digit\0"
-#define STRING_graph0 "graph\0"
-#define STRING_print0 "print\0"
-#define STRING_punct0 "punct\0"
-#define STRING_space0 "space\0"
-#define STRING_word0 "word\0"
-#define STRING_xdigit "xdigit"
-
-#define STRING_DEFINE "DEFINE"
-#define STRING_VERSION "VERSION"
-#define STRING_WEIRD_STARTWORD "[:<:]]"
-#define STRING_WEIRD_ENDWORD "[:>:]]"
+#define STRING_ACCEPT0 "ACCEPT\0"
+#define STRING_COMMIT0 "COMMIT\0"
+#define STRING_F0 "F\0"
+#define STRING_FAIL0 "FAIL\0"
+#define STRING_MARK0 "MARK\0"
+#define STRING_PRUNE0 "PRUNE\0"
+#define STRING_SKIP0 "SKIP\0"
+#define STRING_THEN "THEN"
+
+#define STRING_atomic0 "atomic\0"
+#define STRING_pla0 "pla\0"
+#define STRING_plb0 "plb\0"
+#define STRING_napla0 "napla\0"
+#define STRING_naplb0 "naplb\0"
+#define STRING_nla0 "nla\0"
+#define STRING_nlb0 "nlb\0"
+#define STRING_sr0 "sr\0"
+#define STRING_asr0 "asr\0"
+#define STRING_positive_lookahead0 "positive_lookahead\0"
+#define STRING_positive_lookbehind0 "positive_lookbehind\0"
+#define STRING_non_atomic_positive_lookahead0 "non_atomic_positive_lookahead\0"
+#define STRING_non_atomic_positive_lookbehind0 "non_atomic_positive_lookbehind\0"
+#define STRING_negative_lookahead0 "negative_lookahead\0"
+#define STRING_negative_lookbehind0 "negative_lookbehind\0"
+#define STRING_script_run0 "script_run\0"
+#define STRING_atomic_script_run "atomic_script_run"
+
+#define STRING_alpha0 "alpha\0"
+#define STRING_lower0 "lower\0"
+#define STRING_upper0 "upper\0"
+#define STRING_alnum0 "alnum\0"
+#define STRING_ascii0 "ascii\0"
+#define STRING_blank0 "blank\0"
+#define STRING_cntrl0 "cntrl\0"
+#define STRING_digit0 "digit\0"
+#define STRING_graph0 "graph\0"
+#define STRING_print0 "print\0"
+#define STRING_punct0 "punct\0"
+#define STRING_space0 "space\0"
+#define STRING_word0 "word\0"
+#define STRING_xdigit "xdigit"
+
+#define STRING_DEFINE "DEFINE"
+#define STRING_VERSION "VERSION"
+#define STRING_WEIRD_STARTWORD "[:<:]]"
+#define STRING_WEIRD_ENDWORD "[:>:]]"
#define STRING_CR_RIGHTPAR "CR)"
#define STRING_LF_RIGHTPAR "LF)"
@@ -1150,34 +1176,52 @@ only. */
#define STR_RIGHT_CURLY_BRACKET "\175"
#define STR_TILDE "\176"
-#define STRING_ACCEPT0 STR_A STR_C STR_C STR_E STR_P STR_T "\0"
-#define STRING_COMMIT0 STR_C STR_O STR_M STR_M STR_I STR_T "\0"
-#define STRING_F0 STR_F "\0"
-#define STRING_FAIL0 STR_F STR_A STR_I STR_L "\0"
-#define STRING_MARK0 STR_M STR_A STR_R STR_K "\0"
-#define STRING_PRUNE0 STR_P STR_R STR_U STR_N STR_E "\0"
-#define STRING_SKIP0 STR_S STR_K STR_I STR_P "\0"
-#define STRING_THEN STR_T STR_H STR_E STR_N
-
-#define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0"
-#define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0"
-#define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0"
-#define STRING_alnum0 STR_a STR_l STR_n STR_u STR_m "\0"
-#define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0"
-#define STRING_blank0 STR_b STR_l STR_a STR_n STR_k "\0"
-#define STRING_cntrl0 STR_c STR_n STR_t STR_r STR_l "\0"
-#define STRING_digit0 STR_d STR_i STR_g STR_i STR_t "\0"
-#define STRING_graph0 STR_g STR_r STR_a STR_p STR_h "\0"
-#define STRING_print0 STR_p STR_r STR_i STR_n STR_t "\0"
-#define STRING_punct0 STR_p STR_u STR_n STR_c STR_t "\0"
-#define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0"
-#define STRING_word0 STR_w STR_o STR_r STR_d "\0"
-#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
-
-#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
-#define STRING_VERSION STR_V STR_E STR_R STR_S STR_I STR_O STR_N
-#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
-#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
+#define STRING_ACCEPT0 STR_A STR_C STR_C STR_E STR_P STR_T "\0"
+#define STRING_COMMIT0 STR_C STR_O STR_M STR_M STR_I STR_T "\0"
+#define STRING_F0 STR_F "\0"
+#define STRING_FAIL0 STR_F STR_A STR_I STR_L "\0"
+#define STRING_MARK0 STR_M STR_A STR_R STR_K "\0"
+#define STRING_PRUNE0 STR_P STR_R STR_U STR_N STR_E "\0"
+#define STRING_SKIP0 STR_S STR_K STR_I STR_P "\0"
+#define STRING_THEN STR_T STR_H STR_E STR_N
+
+#define STRING_atomic0 STR_a STR_t STR_o STR_m STR_i STR_c "\0"
+#define STRING_pla0 STR_p STR_l STR_a "\0"
+#define STRING_plb0 STR_p STR_l STR_b "\0"
+#define STRING_napla0 STR_n STR_a STR_p STR_l STR_a "\0"
+#define STRING_naplb0 STR_n STR_a STR_p STR_l STR_b "\0"
+#define STRING_nla0 STR_n STR_l STR_a "\0"
+#define STRING_nlb0 STR_n STR_l STR_b "\0"
+#define STRING_sr0 STR_s STR_r "\0"
+#define STRING_asr0 STR_a STR_s STR_r "\0"
+#define STRING_positive_lookahead0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
+#define STRING_positive_lookbehind0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
+#define STRING_non_atomic_positive_lookahead0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
+#define STRING_non_atomic_positive_lookbehind0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
+#define STRING_negative_lookahead0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
+#define STRING_negative_lookbehind0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
+#define STRING_script_run0 STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0"
+#define STRING_atomic_script_run STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n
+
+#define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0"
+#define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0"
+#define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0"
+#define STRING_alnum0 STR_a STR_l STR_n STR_u STR_m "\0"
+#define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0"
+#define STRING_blank0 STR_b STR_l STR_a STR_n STR_k "\0"
+#define STRING_cntrl0 STR_c STR_n STR_t STR_r STR_l "\0"
+#define STRING_digit0 STR_d STR_i STR_g STR_i STR_t "\0"
+#define STRING_graph0 STR_g STR_r STR_a STR_p STR_h "\0"
+#define STRING_print0 STR_p STR_r STR_i STR_n STR_t "\0"
+#define STRING_punct0 STR_p STR_u STR_n STR_c STR_t "\0"
+#define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0"
+#define STRING_word0 STR_w STR_o STR_r STR_d "\0"
+#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
+
+#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
+#define STRING_VERSION STR_V STR_E STR_R STR_S STR_I STR_O STR_N
+#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
+#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
@@ -1278,7 +1322,7 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
order to the list of escapes immediately above. Furthermore, values up to
OP_DOLLM must not be changed without adjusting the table called autoposstab in
-pcre2_auto_possess.c
+pcre2_auto_possess.c.
Whenever this list is updated, the two macro definitions that follow must be
updated to match. The possessification table called "opcode_possessify" in
@@ -1476,79 +1520,81 @@ enum {
OP_KETRMIN, /* 123 order. They are for groups the repeat for ever. */
OP_KETRPOS, /* 124 Possessive unlimited repeat. */
- /* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
- asserts must remain in order. */
+ /* The assertions must come before BRA, CBRA, ONCE, and COND. */
OP_REVERSE, /* 125 Move pointer back - used in lookbehind assertions */
OP_ASSERT, /* 126 Positive lookahead */
OP_ASSERT_NOT, /* 127 Negative lookahead */
OP_ASSERTBACK, /* 128 Positive lookbehind */
OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */
-
- /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately after the
- assertions, with ONCE first, as there's a test for >= ONCE for a subpattern
- that isn't an assertion. The POS versions must immediately follow the non-POS
- versions in each case. */
-
- OP_ONCE, /* 130 Atomic group, contains captures */
- OP_BRA, /* 131 Start of non-capturing bracket */
- OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */
- OP_CBRA, /* 133 Start of capturing bracket */
- OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */
- OP_COND, /* 135 Conditional group */
+ OP_ASSERT_NA, /* 130 Positive non-atomic lookahead */
+ OP_ASSERTBACK_NA, /* 131 Positive non-atomic lookbehind */
+
+ /* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come
+ immediately after the assertions, with ONCE first, as there's a test for >=
+ ONCE for a subpattern that isn't an assertion. The POS versions must
+ immediately follow the non-POS versions in each case. */
+
+ OP_ONCE, /* 132 Atomic group, contains captures */
+ OP_SCRIPT_RUN, /* 133 Non-capture, but check characters' scripts */
+ OP_BRA, /* 134 Start of non-capturing bracket */
+ OP_BRAPOS, /* 135 Ditto, with unlimited, possessive repeat */
+ OP_CBRA, /* 136 Start of capturing bracket */
+ OP_CBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
+ OP_COND, /* 138 Conditional group */
/* These five must follow the previous five, in the same order. There's a
check for >= SBRA to distinguish the two sets. */
- OP_SBRA, /* 136 Start of non-capturing bracket, check empty */
- OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
- OP_SCBRA, /* 138 Start of capturing bracket, check empty */
- OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */
- OP_SCOND, /* 140 Conditional group, check empty */
+ OP_SBRA, /* 139 Start of non-capturing bracket, check empty */
+ OP_SBRAPOS, /* 149 Ditto, with unlimited, possessive repeat */
+ OP_SCBRA, /* 141 Start of capturing bracket, check empty */
+ OP_SCBRAPOS, /* 142 Ditto, with unlimited, possessive repeat */
+ OP_SCOND, /* 143 Conditional group, check empty */
/* The next two pairs must (respectively) be kept together. */
- OP_CREF, /* 141 Used to hold a capture number as condition */
- OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
- OP_RREF, /* 143 Used to hold a recursion number as condition */
- OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
- OP_FALSE, /* 145 Always false (used by DEFINE and VERSION) */
- OP_TRUE, /* 146 Always true (used by VERSION) */
+ OP_CREF, /* 144 Used to hold a capture number as condition */
+ OP_DNCREF, /* 145 Used to point to duplicate names as a condition */
+ OP_RREF, /* 146 Used to hold a recursion number as condition */
+ OP_DNRREF, /* 147 Used to point to duplicate names as a condition */
+ OP_FALSE, /* 148 Always false (used by DEFINE and VERSION) */
+ OP_TRUE, /* 149 Always true (used by VERSION) */
- OP_BRAZERO, /* 147 These two must remain together and in this */
- OP_BRAMINZERO, /* 148 order. */
- OP_BRAPOSZERO, /* 149 */
+ OP_BRAZERO, /* 150 These two must remain together and in this */
+ OP_BRAMINZERO, /* 151 order. */
+ OP_BRAPOSZERO, /* 152 */
/* These are backtracking control verbs */
- OP_MARK, /* 150 always has an argument */
- OP_PRUNE, /* 151 */
- OP_PRUNE_ARG, /* 152 same, but with argument */
- OP_SKIP, /* 153 */
- OP_SKIP_ARG, /* 154 same, but with argument */
- OP_THEN, /* 155 */
- OP_THEN_ARG, /* 156 same, but with argument */
- OP_COMMIT, /* 157 */
- OP_COMMIT_ARG, /* 158 same, but with argument */
+ OP_MARK, /* 153 always has an argument */
+ OP_PRUNE, /* 154 */
+ OP_PRUNE_ARG, /* 155 same, but with argument */
+ OP_SKIP, /* 156 */
+ OP_SKIP_ARG, /* 157 same, but with argument */
+ OP_THEN, /* 158 */
+ OP_THEN_ARG, /* 159 same, but with argument */
+ OP_COMMIT, /* 160 */
+ OP_COMMIT_ARG, /* 161 same, but with argument */
/* These are forced failure and success verbs. FAIL and ACCEPT do accept an
argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
without the need for a special opcode. */
- OP_FAIL, /* 159 */
- OP_ACCEPT, /* 160 */
- OP_ASSERT_ACCEPT, /* 161 Used inside assertions */
- OP_CLOSE, /* 162 Used before OP_ACCEPT to close open captures */
+ OP_FAIL, /* 162 */
+ OP_ACCEPT, /* 163 */
+ OP_ASSERT_ACCEPT, /* 164 Used inside assertions */
+ OP_CLOSE, /* 165 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
- OP_SKIPZERO, /* 163 */
+ OP_SKIPZERO, /* 166 */
/* This is used to identify a DEFINE group during compilation so that it can
be checked for having only one branch. It is changed to OP_FALSE before
compilation finishes. */
- OP_DEFINE, /* 164 */
+ OP_DEFINE, /* 167 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@@ -1561,7 +1607,7 @@ enum {
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
definitions that follow must also be updated to match. There are also tables
called "opcode_possessify" in pcre2_compile.c and "coptable" and "poptable" in
-pcre2_dfa_exec.c that must be updated. */
+pcre2_dfa_match.c that must be updated. */
/* This macro defines textual names for all the opcodes. These are used only
@@ -1594,8 +1640,11 @@ some cases doesn't actually use these names at all). */
"class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \
"Recurse", "Callout", "CalloutStr", \
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
- "Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
+ "Reverse", "Assert", "Assert not", \
+ "Assert back", "Assert back not", \
+ "Non-atomic assert", "Non-atomic assert back", \
"Once", \
+ "Script run", \
"Bra", "BraPos", "CBra", "CBraPos", \
"Cond", \
"SBra", "SBraPos", "SCBra", "SCBraPos", \
@@ -1678,7 +1727,10 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1+LINK_SIZE, /* Assert not */ \
1+LINK_SIZE, /* Assert behind */ \
1+LINK_SIZE, /* Assert behind not */ \
+ 1+LINK_SIZE, /* NA Assert */ \
+ 1+LINK_SIZE, /* NA Assert behind */ \
1+LINK_SIZE, /* ONCE */ \
+ 1+LINK_SIZE, /* SCRIPT_RUN */ \
1+LINK_SIZE, /* BRA */ \
1+LINK_SIZE, /* BRAPOS */ \
1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \
@@ -1718,13 +1770,11 @@ typedef struct pcre2_memctl {
/* Structure for building a chain of open capturing subpatterns during
compiling, so that instructions to close them can be compiled when (*ACCEPT) is
-encountered. This is also used to identify subpatterns that contain recursive
-back references to themselves, so that they can be made atomic. */
+encountered. */
typedef struct open_capitem {
struct open_capitem *next; /* Chain link */
uint16_t number; /* Capture number */
- uint16_t flag; /* Set TRUE if recursive back ref */
uint16_t assert_depth; /* Assertion depth when opened */
} open_capitem;
@@ -1747,6 +1797,8 @@ typedef struct {
uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */
uint8_t caseset; /* offset to multichar other cases or zero */
int32_t other_case; /* offset to other case, or zero if none */
+ int16_t scriptx; /* script extension value */
+ int16_t dummy; /* spare - to round to multiple of 4 bytes */
} ucd_record;
/* UCD access macros */
@@ -1769,6 +1821,7 @@ typedef struct {
#define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop
#define UCD_CASESET(ch) GET_UCD(ch)->caseset
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
+#define UCD_SCRIPTX(ch) GET_UCD(ch)->scriptx
/* Header for serialized pcre2 codes. */
@@ -1826,6 +1879,8 @@ extern const uint8_t PRIV(utf8_table4)[];
#define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_)
#define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_)
#define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
+#define _pcre2_ucd_digit_sets PCRE2_SUFFIX(_pcre2_ucd_digit_sets_)
+#define _pcre2_ucd_script_sets PCRE2_SUFFIX(_pcre2_ucd_script_sets_)
#define _pcre2_ucd_records PCRE2_SUFFIX(_pcre2_ucd_records_)
#define _pcre2_ucd_stage1 PCRE2_SUFFIX(_pcre2_ucd_stage1_)
#define _pcre2_ucd_stage2 PCRE2_SUFFIX(_pcre2_ucd_stage2_)
@@ -1847,6 +1902,8 @@ extern const uint8_t PRIV(default_tables)[];
extern const uint32_t PRIV(hspace_list)[];
extern const uint32_t PRIV(vspace_list)[];
extern const uint32_t PRIV(ucd_caseless_sets)[];
+extern const uint32_t PRIV(ucd_digit_sets)[];
+extern const uint8_t PRIV(ucd_script_sets)[];
extern const ucd_record PRIV(ucd_records)[];
#if PCRE2_CODE_UNIT_WIDTH == 32
extern const ucd_record PRIV(dummy_ucd_record)[];
@@ -1894,6 +1951,7 @@ is available. */
#define _pcre2_jit_get_target PCRE2_SUFFIX(_pcre2_jit_get_target_)
#define _pcre2_memctl_malloc PCRE2_SUFFIX(_pcre2_memctl_malloc_)
#define _pcre2_ord2utf PCRE2_SUFFIX(_pcre2_ord2utf_)
+#define _pcre2_script_run PCRE2_SUFFIX(_pcre2_script_run_)
#define _pcre2_strcmp PCRE2_SUFFIX(_pcre2_strcmp_)
#define _pcre2_strcmp_c8 PCRE2_SUFFIX(_pcre2_strcmp_c8_)
#define _pcre2_strcpy_c8 PCRE2_SUFFIX(_pcre2_strcpy_c8_)
@@ -1905,10 +1963,10 @@ is available. */
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
-extern int _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
+extern int _pcre2_auto_possessify(PCRE2_UCHAR *,
const compile_block *);
extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
- int *, uint32_t, BOOL, compile_block *);
+ int *, uint32_t, uint32_t, BOOL, compile_block *);
extern PCRE2_SPTR _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR,
BOOL, int *);
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
@@ -1920,6 +1978,7 @@ extern size_t _pcre2_jit_get_size(void *);
const char * _pcre2_jit_get_target(void);
extern void * _pcre2_memctl_malloc(size_t, pcre2_memctl *);
extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *);
+extern BOOL _pcre2_script_run(PCRE2_SPTR, PCRE2_SPTR, BOOL);
extern int _pcre2_strcmp(PCRE2_SPTR, PCRE2_SPTR);
extern int _pcre2_strcmp_c8(PCRE2_SPTR, const char *);
extern PCRE2_SIZE _pcre2_strcpy_c8(PCRE2_UCHAR *, const char *);