summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/pcrelib/pcre_internal.h')
-rw-r--r--ext/pcre/pcrelib/pcre_internal.h116
1 files changed, 76 insertions, 40 deletions
diff --git a/ext/pcre/pcrelib/pcre_internal.h b/ext/pcre/pcrelib/pcre_internal.h
index e168f3909b..de0961435b 100644
--- a/ext/pcre/pcrelib/pcre_internal.h
+++ b/ext/pcre/pcrelib/pcre_internal.h
@@ -535,7 +535,9 @@ Standard C system should have one. */
/* Private flags containing information about the compiled regex. They used to
live at the top end of the options word, but that got almost full, so now they
-are in a 16-bit flags word. */
+are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
+the restrictions on partial matching have been lifted. It remains for backwards
+compatibility. */
#define PCRE_NOPARTIAL 0x0001 /* can't use partial with this regex */
#define PCRE_FIRSTSET 0x0002 /* first_byte is set */
@@ -547,6 +549,7 @@ are in a 16-bit flags word. */
/* Options for the "extra" block produced by pcre_study(). */
#define PCRE_STUDY_MAPPED 0x01 /* a map of starting chars exists */
+#define PCRE_STUDY_MINLEN 0x02 /* a minimum length field exists */
/* Masks for identifying the public options that are permitted at compile
time, run time, or study time, respectively. */
@@ -562,14 +565,15 @@ time, run time, or study time, respectively. */
PCRE_JAVASCRIPT_COMPAT)
#define PUBLIC_EXEC_OPTIONS \
- (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
- PCRE_PARTIAL|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
- PCRE_NO_START_OPTIMIZE)
+ (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
+ PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_NEWLINE_BITS| \
+ PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE)
#define PUBLIC_DFA_EXEC_OPTIONS \
- (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
- PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS| \
- PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE)
+ (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
+ PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_SHORTEST| \
+ PCRE_DFA_RESTART|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
+ PCRE_NO_START_OPTIMIZE)
#define PUBLIC_STUDY_OPTIONS 0 /* None defined */
@@ -1206,8 +1210,8 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
OP_EOD must correspond in order to the list of escapes immediately above.
*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
-that follow must also be updated to match. There is also a table called
-"coptable" in pcre_dfa_exec.c that must be updated. */
+that follow must also be updated to match. There are also tables called
+"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
enum {
OP_END, /* 0 End of pattern */
@@ -1343,30 +1347,39 @@ enum {
OP_SCBRA, /* 98 Start of capturing bracket, check empty */
OP_SCOND, /* 99 Conditional group, check empty */
+ /* The next two pairs must (respectively) be kept together. */
+
OP_CREF, /* 100 Used to hold a capture number as condition */
- OP_RREF, /* 101 Used to hold a recursion number as condition */
- OP_DEF, /* 102 The DEFINE condition */
+ OP_NCREF, /* 101 Same, but generaged by a name reference*/
+ OP_RREF, /* 102 Used to hold a recursion number as condition */
+ OP_NRREF, /* 103 Same, but generaged by a name reference*/
+ OP_DEF, /* 104 The DEFINE condition */
- OP_BRAZERO, /* 103 These two must remain together and in this */
- OP_BRAMINZERO, /* 104 order. */
+ OP_BRAZERO, /* 105 These two must remain together and in this */
+ OP_BRAMINZERO, /* 106 order. */
/* These are backtracking control verbs */
- OP_PRUNE, /* 105 */
- OP_SKIP, /* 106 */
- OP_THEN, /* 107 */
- OP_COMMIT, /* 108 */
+ OP_PRUNE, /* 107 */
+ OP_SKIP, /* 108 */
+ OP_THEN, /* 109 */
+ OP_COMMIT, /* 110 */
/* These are forced failure and success verbs */
- OP_FAIL, /* 109 */
- OP_ACCEPT, /* 110 */
+ OP_FAIL, /* 111 */
+ OP_ACCEPT, /* 112 */
+ OP_CLOSE, /* 113 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
- OP_SKIPZERO /* 111 */
+ OP_SKIPZERO /* 114 */
};
+/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
+definitions that follow must also be updated to match. There are also tables
+called "coptable" cna "poptable" in pcre_dfa_exec.c that must be updated. */
+
/* This macro defines textual names for all the opcodes. These are used only
for debugging. The macro is referenced only in pcre_printint.c. */
@@ -1388,9 +1401,10 @@ for debugging. The macro is referenced only in pcre_printint.c. */
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", \
"AssertB", "AssertB not", "Reverse", \
"Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond", \
- "Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero", \
+ "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def", \
+ "Brazero", "Braminzero", \
"*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT", \
- "Skip zero"
+ "Close", "Skip zero"
/* This macro defines the length of fixed length operations in the compiled
@@ -1450,15 +1464,16 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1+LINK_SIZE, /* SBRA */ \
3+LINK_SIZE, /* SCBRA */ \
1+LINK_SIZE, /* SCOND */ \
- 3, /* CREF */ \
- 3, /* RREF */ \
+ 3, 3, /* CREF, NCREF */ \
+ 3, 3, /* RREF, NRREF */ \
1, /* DEF */ \
1, 1, /* BRAZERO, BRAMINZERO */ \
1, 1, 1, 1, /* PRUNE, SKIP, THEN, COMMIT, */ \
- 1, 1, 1 /* FAIL, ACCEPT, SKIPZERO */
+ 1, 1, 3, 1 /* FAIL, ACCEPT, CLOSE, SKIPZERO */
-/* A magic value for OP_RREF to indicate the "any recursion" condition. */
+/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
+condition. */
#define RREF_ANY 0xffff
@@ -1471,7 +1486,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
- ERR60, ERR61, ERR62, ERR63, ERR64 };
+ ERR60, ERR61, ERR62, ERR63, ERR64, ERR65 };
/* The real format of the start of the pcre block; the index of names and the
code vector run on as long as necessary after the end. We store an explicit
@@ -1487,7 +1502,7 @@ Because people can now save and re-use compiled patterns, any additions to this
structure should be made at the end, and something earlier (e.g. a new
flag in the options or one of the dummy fields) should indicate that the new
fields are present. Currently PCRE always sets the dummy fields to zero.
-NOTE NOTE NOTE:
+NOTE NOTE NOTE
*/
typedef struct real_pcre {
@@ -1514,10 +1529,20 @@ remark (see NOTE above) about extending this structure applies. */
typedef struct pcre_study_data {
pcre_uint32 size; /* Total that was malloced */
- pcre_uint32 options;
- uschar start_bits[32];
+ pcre_uint32 flags; /* Private flags */
+ uschar start_bits[32]; /* Starting char bits */
+ pcre_uint32 minlength; /* Minimum subject length */
} pcre_study_data;
+/* Structure for building a chain of open capturing subpatterns during
+compiling, so that instructions to close them can be compiled when (*ACCEPT) is
+encountered. */
+
+typedef struct open_capitem {
+ struct open_capitem *next; /* Chain link */
+ pcre_uint16 number; /* Capture number */
+} open_capitem;
+
/* Structure for passing "static" information around between the functions
doing the compiling, so that they are thread-safe. */
@@ -1530,6 +1555,7 @@ typedef struct compile_data {
const uschar *start_code; /* The start of the compiled code */
const uschar *start_pattern; /* The start of the pattern */
const uschar *end_pattern; /* The end of the pattern */
+ open_capitem *open_caps; /* Chain of open capture items */
uschar *hwm; /* High watermark of workspace */
uschar *name_table; /* The name/number table */
int names_found; /* Number of entries so far */
@@ -1542,6 +1568,7 @@ typedef struct compile_data {
int external_flags; /* External flag bits to be set */
int req_varyopt; /* "After variable item" flag for reqbyte */
BOOL had_accept; /* (*ACCEPT) encountered */
+ BOOL check_lookbehind; /* Lookbehinds need later checking */
int nltype; /* Newline type */
int nllen; /* Newline string length */
uschar nl[4]; /* Newline string when fixed length */
@@ -1565,6 +1592,7 @@ typedef struct recursion_info {
USPTR save_start; /* Old value of mstart */
int *offset_save; /* Pointer to start of saved offsets */
int saved_max; /* Number of saved offsets */
+ int save_offset_top; /* Current value of offset_top */
} recursion_info;
/* Structure for building a chain of data for holding the values of the subject
@@ -1589,6 +1617,9 @@ typedef struct match_data {
int offset_max; /* The maximum usable for return data */
int nltype; /* Newline type */
int nllen; /* Newline string length */
+ int name_count; /* Number of names in name table */
+ int name_entry_size; /* Size of entry in names table */
+ uschar *name_table; /* Table of names */
uschar nl[4]; /* Newline string when fixed */
const uschar *lcc; /* Points to lower casing table */
const uschar *ctypes; /* Points to table of type maps */
@@ -1599,7 +1630,7 @@ typedef struct match_data {
BOOL jscript_compat; /* JAVASCRIPT_COMPAT flag */
BOOL endonly; /* Dollar not before final \n */
BOOL notempty; /* Empty string match not wanted */
- BOOL partial; /* PARTIAL flag */
+ BOOL notempty_atstart; /* Empty string match at start not wanted */
BOOL hitend; /* Hit the end of the subject at some point */
BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */
const uschar *start_code; /* For use when recursing */
@@ -1607,6 +1638,8 @@ typedef struct match_data {
USPTR end_subject; /* End of the subject string */
USPTR start_match_ptr; /* Start of matched string */
USPTR end_match_ptr; /* Subject position at end match */
+ USPTR start_used_ptr; /* Earliest consulted character */
+ int partial; /* PARTIAL options */
int end_offset_top; /* Highwater mark at end of match */
int capture_last; /* Most recent capture number */
int start_offset; /* The start offset value */
@@ -1623,7 +1656,9 @@ typedef struct dfa_match_data {
const uschar *start_code; /* Start of the compiled pattern */
const uschar *start_subject; /* Start of the subject string */
const uschar *end_subject; /* End of subject string */
+ const uschar *start_used_ptr; /* Earliest consulted character */
const uschar *tables; /* Character tables */
+ int start_offset; /* The start offset value */
int moptions; /* Match options */
int poptions; /* Pattern options */
int nltype; /* Newline type */
@@ -1702,15 +1737,16 @@ extern const uschar _pcre_OP_lengths[];
one of the exported public functions. They have to be "external" in the C
sense, but are not part of the PCRE public API. */
-extern BOOL _pcre_is_newline(const uschar *, int, const uschar *,
- int *, BOOL);
-extern int _pcre_ord2utf8(int, uschar *);
-extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
- const pcre_study_data *, pcre_study_data *);
-extern int _pcre_valid_utf8(const uschar *, int);
-extern BOOL _pcre_was_newline(const uschar *, int, const uschar *,
- int *, BOOL);
-extern BOOL _pcre_xclass(int, const uschar *);
+extern const uschar *_pcre_find_bracket(const uschar *, BOOL, int);
+extern BOOL _pcre_is_newline(const uschar *, int, const uschar *,
+ int *, BOOL);
+extern int _pcre_ord2utf8(int, uschar *);
+extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
+ const pcre_study_data *, pcre_study_data *);
+extern int _pcre_valid_utf8(const uschar *, int);
+extern BOOL _pcre_was_newline(const uschar *, int, const uschar *,
+ int *, BOOL);
+extern BOOL _pcre_xclass(int, const uschar *);
/* Unicode character database (UCD) */