diff options
Diffstat (limited to 'ext/pcre/pcrelib/pcre_internal.h')
-rw-r--r-- | ext/pcre/pcrelib/pcre_internal.h | 116 |
1 files changed, 76 insertions, 40 deletions
diff --git a/ext/pcre/pcrelib/pcre_internal.h b/ext/pcre/pcrelib/pcre_internal.h index e168f3909b..de0961435b 100644 --- a/ext/pcre/pcrelib/pcre_internal.h +++ b/ext/pcre/pcrelib/pcre_internal.h @@ -535,7 +535,9 @@ Standard C system should have one. */ /* Private flags containing information about the compiled regex. They used to live at the top end of the options word, but that got almost full, so now they -are in a 16-bit flags word. */ +are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as +the restrictions on partial matching have been lifted. It remains for backwards +compatibility. */ #define PCRE_NOPARTIAL 0x0001 /* can't use partial with this regex */ #define PCRE_FIRSTSET 0x0002 /* first_byte is set */ @@ -547,6 +549,7 @@ are in a 16-bit flags word. */ /* Options for the "extra" block produced by pcre_study(). */ #define PCRE_STUDY_MAPPED 0x01 /* a map of starting chars exists */ +#define PCRE_STUDY_MINLEN 0x02 /* a minimum length field exists */ /* Masks for identifying the public options that are permitted at compile time, run time, or study time, respectively. */ @@ -562,14 +565,15 @@ time, run time, or study time, respectively. */ PCRE_JAVASCRIPT_COMPAT) #define PUBLIC_EXEC_OPTIONS \ - (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \ - PCRE_PARTIAL|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \ - PCRE_NO_START_OPTIMIZE) + (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \ + PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_NEWLINE_BITS| \ + PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE) #define PUBLIC_DFA_EXEC_OPTIONS \ - (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \ - PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS| \ - PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE) + (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \ + PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_SHORTEST| \ + PCRE_DFA_RESTART|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \ + PCRE_NO_START_OPTIMIZE) #define PUBLIC_STUDY_OPTIONS 0 /* None defined */ @@ -1206,8 +1210,8 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, OP_EOD must correspond in order to the list of escapes immediately above. *** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions -that follow must also be updated to match. There is also a table called -"coptable" in pcre_dfa_exec.c that must be updated. */ +that follow must also be updated to match. There are also tables called +"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */ enum { OP_END, /* 0 End of pattern */ @@ -1343,30 +1347,39 @@ enum { OP_SCBRA, /* 98 Start of capturing bracket, check empty */ OP_SCOND, /* 99 Conditional group, check empty */ + /* The next two pairs must (respectively) be kept together. */ + OP_CREF, /* 100 Used to hold a capture number as condition */ - OP_RREF, /* 101 Used to hold a recursion number as condition */ - OP_DEF, /* 102 The DEFINE condition */ + OP_NCREF, /* 101 Same, but generaged by a name reference*/ + OP_RREF, /* 102 Used to hold a recursion number as condition */ + OP_NRREF, /* 103 Same, but generaged by a name reference*/ + OP_DEF, /* 104 The DEFINE condition */ - OP_BRAZERO, /* 103 These two must remain together and in this */ - OP_BRAMINZERO, /* 104 order. */ + OP_BRAZERO, /* 105 These two must remain together and in this */ + OP_BRAMINZERO, /* 106 order. */ /* These are backtracking control verbs */ - OP_PRUNE, /* 105 */ - OP_SKIP, /* 106 */ - OP_THEN, /* 107 */ - OP_COMMIT, /* 108 */ + OP_PRUNE, /* 107 */ + OP_SKIP, /* 108 */ + OP_THEN, /* 109 */ + OP_COMMIT, /* 110 */ /* These are forced failure and success verbs */ - OP_FAIL, /* 109 */ - OP_ACCEPT, /* 110 */ + OP_FAIL, /* 111 */ + OP_ACCEPT, /* 112 */ + OP_CLOSE, /* 113 Used before OP_ACCEPT to close open captures */ /* This is used to skip a subpattern with a {0} quantifier */ - OP_SKIPZERO /* 111 */ + OP_SKIPZERO /* 114 */ }; +/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro +definitions that follow must also be updated to match. There are also tables +called "coptable" cna "poptable" in pcre_dfa_exec.c that must be updated. */ + /* This macro defines textual names for all the opcodes. These are used only for debugging. The macro is referenced only in pcre_printint.c. */ @@ -1388,9 +1401,10 @@ for debugging. The macro is referenced only in pcre_printint.c. */ "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", \ "AssertB", "AssertB not", "Reverse", \ "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond", \ - "Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero", \ + "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def", \ + "Brazero", "Braminzero", \ "*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT", \ - "Skip zero" + "Close", "Skip zero" /* This macro defines the length of fixed length operations in the compiled @@ -1450,15 +1464,16 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 1+LINK_SIZE, /* SBRA */ \ 3+LINK_SIZE, /* SCBRA */ \ 1+LINK_SIZE, /* SCOND */ \ - 3, /* CREF */ \ - 3, /* RREF */ \ + 3, 3, /* CREF, NCREF */ \ + 3, 3, /* RREF, NRREF */ \ 1, /* DEF */ \ 1, 1, /* BRAZERO, BRAMINZERO */ \ 1, 1, 1, 1, /* PRUNE, SKIP, THEN, COMMIT, */ \ - 1, 1, 1 /* FAIL, ACCEPT, SKIPZERO */ + 1, 1, 3, 1 /* FAIL, ACCEPT, CLOSE, SKIPZERO */ -/* A magic value for OP_RREF to indicate the "any recursion" condition. */ +/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion" +condition. */ #define RREF_ANY 0xffff @@ -1471,7 +1486,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, - ERR60, ERR61, ERR62, ERR63, ERR64 }; + ERR60, ERR61, ERR62, ERR63, ERR64, ERR65 }; /* The real format of the start of the pcre block; the index of names and the code vector run on as long as necessary after the end. We store an explicit @@ -1487,7 +1502,7 @@ Because people can now save and re-use compiled patterns, any additions to this structure should be made at the end, and something earlier (e.g. a new flag in the options or one of the dummy fields) should indicate that the new fields are present. Currently PCRE always sets the dummy fields to zero. -NOTE NOTE NOTE: +NOTE NOTE NOTE */ typedef struct real_pcre { @@ -1514,10 +1529,20 @@ remark (see NOTE above) about extending this structure applies. */ typedef struct pcre_study_data { pcre_uint32 size; /* Total that was malloced */ - pcre_uint32 options; - uschar start_bits[32]; + pcre_uint32 flags; /* Private flags */ + uschar start_bits[32]; /* Starting char bits */ + pcre_uint32 minlength; /* Minimum subject length */ } pcre_study_data; +/* Structure for building a chain of open capturing subpatterns during +compiling, so that instructions to close them can be compiled when (*ACCEPT) is +encountered. */ + +typedef struct open_capitem { + struct open_capitem *next; /* Chain link */ + pcre_uint16 number; /* Capture number */ +} open_capitem; + /* Structure for passing "static" information around between the functions doing the compiling, so that they are thread-safe. */ @@ -1530,6 +1555,7 @@ typedef struct compile_data { const uschar *start_code; /* The start of the compiled code */ const uschar *start_pattern; /* The start of the pattern */ const uschar *end_pattern; /* The end of the pattern */ + open_capitem *open_caps; /* Chain of open capture items */ uschar *hwm; /* High watermark of workspace */ uschar *name_table; /* The name/number table */ int names_found; /* Number of entries so far */ @@ -1542,6 +1568,7 @@ typedef struct compile_data { int external_flags; /* External flag bits to be set */ int req_varyopt; /* "After variable item" flag for reqbyte */ BOOL had_accept; /* (*ACCEPT) encountered */ + BOOL check_lookbehind; /* Lookbehinds need later checking */ int nltype; /* Newline type */ int nllen; /* Newline string length */ uschar nl[4]; /* Newline string when fixed length */ @@ -1565,6 +1592,7 @@ typedef struct recursion_info { USPTR save_start; /* Old value of mstart */ int *offset_save; /* Pointer to start of saved offsets */ int saved_max; /* Number of saved offsets */ + int save_offset_top; /* Current value of offset_top */ } recursion_info; /* Structure for building a chain of data for holding the values of the subject @@ -1589,6 +1617,9 @@ typedef struct match_data { int offset_max; /* The maximum usable for return data */ int nltype; /* Newline type */ int nllen; /* Newline string length */ + int name_count; /* Number of names in name table */ + int name_entry_size; /* Size of entry in names table */ + uschar *name_table; /* Table of names */ uschar nl[4]; /* Newline string when fixed */ const uschar *lcc; /* Points to lower casing table */ const uschar *ctypes; /* Points to table of type maps */ @@ -1599,7 +1630,7 @@ typedef struct match_data { BOOL jscript_compat; /* JAVASCRIPT_COMPAT flag */ BOOL endonly; /* Dollar not before final \n */ BOOL notempty; /* Empty string match not wanted */ - BOOL partial; /* PARTIAL flag */ + BOOL notempty_atstart; /* Empty string match at start not wanted */ BOOL hitend; /* Hit the end of the subject at some point */ BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */ const uschar *start_code; /* For use when recursing */ @@ -1607,6 +1638,8 @@ typedef struct match_data { USPTR end_subject; /* End of the subject string */ USPTR start_match_ptr; /* Start of matched string */ USPTR end_match_ptr; /* Subject position at end match */ + USPTR start_used_ptr; /* Earliest consulted character */ + int partial; /* PARTIAL options */ int end_offset_top; /* Highwater mark at end of match */ int capture_last; /* Most recent capture number */ int start_offset; /* The start offset value */ @@ -1623,7 +1656,9 @@ typedef struct dfa_match_data { const uschar *start_code; /* Start of the compiled pattern */ const uschar *start_subject; /* Start of the subject string */ const uschar *end_subject; /* End of subject string */ + const uschar *start_used_ptr; /* Earliest consulted character */ const uschar *tables; /* Character tables */ + int start_offset; /* The start offset value */ int moptions; /* Match options */ int poptions; /* Pattern options */ int nltype; /* Newline type */ @@ -1702,15 +1737,16 @@ extern const uschar _pcre_OP_lengths[]; one of the exported public functions. They have to be "external" in the C sense, but are not part of the PCRE public API. */ -extern BOOL _pcre_is_newline(const uschar *, int, const uschar *, - int *, BOOL); -extern int _pcre_ord2utf8(int, uschar *); -extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *, - const pcre_study_data *, pcre_study_data *); -extern int _pcre_valid_utf8(const uschar *, int); -extern BOOL _pcre_was_newline(const uschar *, int, const uschar *, - int *, BOOL); -extern BOOL _pcre_xclass(int, const uschar *); +extern const uschar *_pcre_find_bracket(const uschar *, BOOL, int); +extern BOOL _pcre_is_newline(const uschar *, int, const uschar *, + int *, BOOL); +extern int _pcre_ord2utf8(int, uschar *); +extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *, + const pcre_study_data *, pcre_study_data *); +extern int _pcre_valid_utf8(const uschar *, int); +extern BOOL _pcre_was_newline(const uschar *, int, const uschar *, + int *, BOOL); +extern BOOL _pcre_xclass(int, const uschar *); /* Unicode character database (UCD) */ |