summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/pcrelib/pcre_exec.c')
-rw-r--r--ext/pcre/pcrelib/pcre_exec.c465
1 files changed, 339 insertions, 126 deletions
diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c
index 65173e2ba2..ecb32977df 100644
--- a/ext/pcre/pcrelib/pcre_exec.c
+++ b/ext/pcre/pcrelib/pcre_exec.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2005 University of Cambridge
+ Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -54,7 +54,7 @@ are on the heap, not on the stack. */
typedef struct eptrblock {
struct eptrblock *epb_prev;
- const uschar *epb_saved_eptr;
+ USPTR epb_saved_eptr;
} eptrblock;
/* Flag bits for the match() function */
@@ -128,10 +128,10 @@ Returns: TRUE if matched
*/
static BOOL
-match_ref(int offset, register const uschar *eptr, int length, match_data *md,
+match_ref(int offset, register USPTR eptr, int length, match_data *md,
unsigned long int ims)
{
-const uschar *p = md->start_subject + md->offset_vector[offset];
+USPTR p = md->start_subject + md->offset_vector[offset];
#ifdef DEBUG
if (eptr >= md->end_subject)
@@ -169,32 +169,50 @@ return TRUE;
****************************************************************************
RECURSION IN THE match() FUNCTION
-The match() function is highly recursive. Some regular expressions can cause
-it to recurse thousands of times. I was writing for Unix, so I just let it
-call itself recursively. This uses the stack for saving everything that has
-to be saved for a recursive call. On Unix, the stack can be large, and this
-works fine.
+The match() function is highly recursive, though not every recursive call
+increases the recursive depth. Nevertheless, some regular expressions can cause
+it to recurse to a great depth. I was writing for Unix, so I just let it call
+itself recursively. This uses the stack for saving everything that has to be
+saved for a recursive call. On Unix, the stack can be large, and this works
+fine.
-It turns out that on non-Unix systems there are problems with programs that
-use a lot of stack. (This despite the fact that every last chip has oodles
-of memory these days, and techniques for extending the stack have been known
-for decades.) So....
+It turns out that on some non-Unix-like systems there are problems with
+programs that use a lot of stack. (This despite the fact that every last chip
+has oodles of memory these days, and techniques for extending the stack have
+been known for decades.) So....
There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
calls by keeping local variables that need to be preserved in blocks of memory
-obtained from malloc instead instead of on the stack. Macros are used to
+obtained from malloc() instead instead of on the stack. Macros are used to
achieve this so that the actual code doesn't look very different to what it
always used to.
****************************************************************************
***************************************************************************/
-/* These versions of the macros use the stack, as normal */
+/* These versions of the macros use the stack, as normal. There are debugging
+versions and production versions. */
#ifndef NO_RECURSE
#define REGISTER register
-#define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)
+#ifdef DEBUG
+#define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
+ { \
+ printf("match() called in line %d\n", __LINE__); \
+ rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
+ printf("to line %d\n", __LINE__); \
+ }
+#define RRETURN(ra) \
+ { \
+ printf("match() returned %d from line %d ", ra, __LINE__); \
+ return ra; \
+ }
+#else
+#define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
+ rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
#define RRETURN(ra) return ra
+#endif
+
#else
@@ -215,6 +233,7 @@ match(), which never changes. */
newframe->Xims = re;\
newframe->Xeptrb = rf;\
newframe->Xflags = rg;\
+ newframe->Xrdepth = frame->Xrdepth + 1;\
newframe->Xprevframe = frame;\
frame = newframe;\
DPRINTF(("restarting from line %d\n", __LINE__));\
@@ -256,6 +275,7 @@ typedef struct heapframe {
long int Xims;
eptrblock *Xeptrb;
int Xflags;
+ int Xrdepth;
/* Function local variables */
@@ -278,11 +298,11 @@ typedef struct heapframe {
#ifdef SUPPORT_UCP
int Xprop_type;
+ int Xprop_value;
int Xprop_fail_result;
int Xprop_category;
int Xprop_chartype;
- int Xprop_othercase;
- int Xprop_test_against;
+ int Xprop_script;
int *Xprop_test_variable;
#endif
@@ -343,17 +363,18 @@ Arguments:
flags can contain
match_condassert - this is an assertion condition
match_isgroup - this is the start of a bracketed group
+ rdepth the recursion depth
Returns: MATCH_MATCH if matched ) these values are >= 0
MATCH_NOMATCH if failed to match )
a negative PCRE_ERROR_xxx value if aborted by an error condition
- (e.g. stopped by recursion limit)
+ (e.g. stopped by repeated call or recursion limit)
*/
static int
-match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,
+match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
- int flags)
+ int flags, int rdepth)
{
/* These variables do not need to be preserved over recursion in this function,
so they can be ordinary variables in all cases. Mark them with "register"
@@ -381,6 +402,7 @@ frame->Xoffset_top = offset_top;
frame->Xims = ims;
frame->Xeptrb = eptrb;
frame->Xflags = flags;
+frame->Xrdepth = rdepth;
/* This is where control jumps back to to effect "recursion" */
@@ -394,6 +416,7 @@ HEAP_RECURSE:
#define ims frame->Xims
#define eptrb frame->Xeptrb
#define flags frame->Xflags
+#define rdepth frame->Xrdepth
/* Ditto for the local variables */
@@ -418,11 +441,11 @@ HEAP_RECURSE:
#ifdef SUPPORT_UCP
#define prop_type frame->Xprop_type
+#define prop_value frame->Xprop_value
#define prop_fail_result frame->Xprop_fail_result
#define prop_category frame->Xprop_category
#define prop_chartype frame->Xprop_chartype
-#define prop_othercase frame->Xprop_othercase
-#define prop_test_against frame->Xprop_test_against
+#define prop_script frame->Xprop_script
#define prop_test_variable frame->Xprop_test_variable
#endif
@@ -452,20 +475,20 @@ i, and fc and c, can be the same variables. */
#define fc c
-#ifdef SUPPORT_UTF8 /* Many of these variables are used ony */
-const uschar *charptr; /* small blocks of the code. My normal */
-#endif /* style of coding would have declared */
-const uschar *callpat; /* them within each of those blocks. */
-const uschar *data; /* However, in order to accommodate the */
-const uschar *next; /* version of this code that uses an */
-const uschar *pp; /* external "stack" implemented on the */
-const uschar *prev; /* heap, it is easier to declare them */
-const uschar *saved_eptr; /* all here, so the declarations can */
- /* be cut out in a block. The only */
-recursion_info new_recursive; /* declarations within blocks below are */
- /* for variables that do not have to */
-BOOL cur_is_word; /* be preserved over a recursive call */
-BOOL condition; /* to RMATCH(). */
+#ifdef SUPPORT_UTF8 /* Many of these variables are used only */
+const uschar *charptr; /* in small blocks of the code. My normal */
+#endif /* style of coding would have declared */
+const uschar *callpat; /* them within each of those blocks. */
+const uschar *data; /* However, in order to accommodate the */
+const uschar *next; /* version of this code that uses an */
+USPTR pp; /* external "stack" implemented on the */
+const uschar *prev; /* heap, it is easier to declare them all */
+USPTR saved_eptr; /* here, so the declarations can be cut */
+ /* out in a block. The only declarations */
+recursion_info new_recursive; /* within blocks below are for variables */
+ /* that do not have to be preserved over */
+BOOL cur_is_word; /* a recursive call to RMATCH(). */
+BOOL condition;
BOOL minimize;
BOOL prev_is_word;
@@ -473,11 +496,11 @@ unsigned long int original_ims;
#ifdef SUPPORT_UCP
int prop_type;
+int prop_value;
int prop_fail_result;
int prop_category;
int prop_chartype;
-int prop_othercase;
-int prop_test_against;
+int prop_script;
int *prop_test_variable;
#endif
@@ -499,19 +522,24 @@ eptrblock newptrb;
variables. */
#ifdef SUPPORT_UCP
+prop_value = 0;
prop_fail_result = 0;
-prop_test_against = 0;
prop_test_variable = NULL;
#endif
-/* OK, now we can get on with the real code of the function. Recursion is
-specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,
-these just turn into a recursive call to match() and a "return", respectively.
-However, RMATCH isn't like a function call because it's quite a complicated
-macro. It has to be used in one particular way. This shouldn't, however, impact
-performance when true recursion is being used. */
+/* OK, now we can get on with the real code of the function. Recursive calls
+are specified by the macro RMATCH and RRETURN is used to return. When
+NO_RECURSE is *not* defined, these just turn into a recursive call to match()
+and a "return", respectively (possibly with some debugging if DEBUG is
+defined). However, RMATCH isn't like a function call because it's quite a
+complicated macro. It has to be used in one particular way. This shouldn't,
+however, impact performance when true recursion is being used. */
+
+/* First check that we haven't called match() too many times, or that we
+haven't exceeded the recursive call limit. */
if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
+if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
original_ims = ims; /* Save for resetting on ')' */
utf8 = md->utf8; /* Local copy of the flag */
@@ -681,7 +709,7 @@ for (;;)
if (md->recursive != NULL && md->recursive->group_num == 0)
{
recursion_info *rec = md->recursive;
- DPRINTF(("Hit the end in a (?0) recursion\n"));
+ DPRINTF(("End of pattern in a (?0) recursion\n"));
md->recursive = rec->prevrec;
memmove(md->offset_vector, rec->offset_save,
rec->saved_max * sizeof(int));
@@ -800,7 +828,7 @@ for (;;)
cb.version = 1; /* Version 1 of the callout block */
cb.callout_number = ecode[1];
cb.offset_vector = md->offset_vector;
- cb.subject = (const char *)md->start_subject;
+ cb.subject = (PCRE_SPTR)md->start_subject;
cb.subject_length = md->end_subject - md->start_subject;
cb.start_match = md->start_match - md->start_subject;
cb.current_position = eptr - md->start_subject;
@@ -882,12 +910,17 @@ for (;;)
eptrb, match_isgroup);
if (rrc == MATCH_MATCH)
{
+ DPRINTF(("Recursion matched\n"));
md->recursive = new_recursive.prevrec;
if (new_recursive.offset_save != stacksave)
(pcre_free)(new_recursive.offset_save);
RRETURN(MATCH_MATCH);
}
- else if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ else if (rrc != MATCH_NOMATCH)
+ {
+ DPRINTF(("Recursion gave error %d\n", rrc));
+ RRETURN(rrc);
+ }
md->recursive = &new_recursive;
memcpy(md->offset_vector, new_recursive.offset_save,
@@ -1352,23 +1385,43 @@ for (;;)
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
{
- int chartype, rqdtype;
- int othercase;
- int category = ucp_findchar(c, &chartype, &othercase);
+ int chartype, script;
+ int category = _pcre_ucp_findprop(c, &chartype, &script);
- rqdtype = *(++ecode);
- ecode++;
-
- if (rqdtype >= 128)
+ switch(ecode[1])
{
- if ((rqdtype - 128 != category) == (op == OP_PROP))
+ case PT_ANY:
+ if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
+ break;
+
+ case PT_LAMP:
+ if ((chartype == ucp_Lu ||
+ chartype == ucp_Ll ||
+ chartype == ucp_Lt) == (op == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
- }
- else
- {
- if ((rqdtype != chartype) == (op == OP_PROP))
+ break;
+
+ case PT_GC:
+ if ((ecode[2] != category) == (op == OP_PROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
+
+ case PT_PC:
+ if ((ecode[2] != chartype) == (op == OP_PROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
+
+ case PT_SC:
+ if ((ecode[2] != script) == (op == OP_PROP))
RRETURN(MATCH_NOMATCH);
+ break;
+
+ default:
+ RRETURN(PCRE_ERROR_INTERNAL);
+ break;
}
+
+ ecode += 3;
}
break;
@@ -1379,9 +1432,8 @@ for (;;)
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
{
- int chartype;
- int othercase;
- int category = ucp_findchar(c, &chartype, &othercase);
+ int chartype, script;
+ int category = _pcre_ucp_findprop(c, &chartype, &script);
if (category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@@ -1390,7 +1442,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
- category = ucp_findchar(c, &chartype, &othercase);
+ category = _pcre_ucp_findprop(c, &chartype, &script);
if (category != ucp_M) break;
eptr += len;
}
@@ -1683,8 +1735,8 @@ for (;;)
while (eptr >= pp)
{
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
- eptr--;
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ eptr--;
}
}
@@ -1841,16 +1893,12 @@ for (;;)
ecode += length;
/* If we have Unicode property support, we can use it to test the other
- case of the character, if there is one. The result of ucp_findchar() is
- < 0 if the char isn't found, and othercase is returned as zero if there
- isn't one. */
+ case of the character, if there is one. */
if (fc != dc)
{
#ifdef SUPPORT_UCP
- int chartype;
- int othercase;
- if (ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)
+ if (dc != _pcre_ucp_othercase(fc))
#endif
RRETURN(MATCH_NOMATCH);
}
@@ -1918,10 +1966,9 @@ for (;;)
#ifdef SUPPORT_UCP
int othercase;
- int chartype;
if ((ims & PCRE_CASELESS) != 0 &&
- ucp_findchar(fc, &chartype, &othercase) >= 0 &&
- othercase > 0)
+ (othercase = _pcre_ucp_othercase(fc)) >= 0 &&
+ othercase >= 0)
oclength = _pcre_ord2utf8(othercase, occhars);
#endif /* SUPPORT_UCP */
@@ -2408,16 +2455,7 @@ for (;;)
{
prop_fail_result = ctype == OP_NOTPROP;
prop_type = *ecode++;
- if (prop_type >= 128)
- {
- prop_test_against = prop_type - 128;
- prop_test_variable = &prop_category;
- }
- else
- {
- prop_test_against = prop_type;
- prop_test_variable = &prop_chartype;
- }
+ prop_value = *ecode++;
}
else prop_type = -1;
#endif
@@ -2434,14 +2472,68 @@ for (;;)
if (min > 0)
{
#ifdef SUPPORT_UCP
- if (prop_type > 0)
+ if (prop_type >= 0)
{
- for (i = 1; i <= min; i++)
+ switch(prop_type)
{
- GETCHARINC(c, eptr);
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
- if ((*prop_test_variable == prop_test_against) == prop_fail_result)
- RRETURN(MATCH_NOMATCH);
+ case PT_ANY:
+ if (prop_fail_result) RRETURN(MATCH_NOMATCH);
+ for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ GETCHARINC(c, eptr);
+ }
+ break;
+
+ case PT_LAMP:
+ for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ GETCHARINC(c, eptr);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ if ((prop_chartype == ucp_Lu ||
+ prop_chartype == ucp_Ll ||
+ prop_chartype == ucp_Lt) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ break;
+
+ case PT_GC:
+ for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ GETCHARINC(c, eptr);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ if ((prop_category == prop_value) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ break;
+
+ case PT_PC:
+ for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ GETCHARINC(c, eptr);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ if ((prop_chartype == prop_value) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ break;
+
+ case PT_SC:
+ for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ GETCHARINC(c, eptr);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ if ((prop_script == prop_value) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ break;
+
+ default:
+ RRETURN(PCRE_ERROR_INTERNAL);
+ break;
}
}
@@ -2453,7 +2545,7 @@ for (;;)
for (i = 1; i <= min; i++)
{
GETCHARINCTEST(c, eptr);
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@@ -2462,7 +2554,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
if (prop_category != ucp_M) break;
eptr += len;
}
@@ -2624,17 +2716,78 @@ for (;;)
if (minimize)
{
#ifdef SUPPORT_UCP
- if (prop_type > 0)
+ if (prop_type >= 0)
{
- for (fi = min;; fi++)
+ switch(prop_type)
{
- RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
- GETCHARINC(c, eptr);
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
- if ((*prop_test_variable == prop_test_against) == prop_fail_result)
- RRETURN(MATCH_NOMATCH);
+ case PT_ANY:
+ for (fi = min;; fi++)
+ {
+ RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ GETCHARINC(c, eptr);
+ if (prop_fail_result) RRETURN(MATCH_NOMATCH);
+ }
+ break;
+
+ case PT_LAMP:
+ for (fi = min;; fi++)
+ {
+ RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ GETCHARINC(c, eptr);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ if ((prop_chartype == ucp_Lu ||
+ prop_chartype == ucp_Ll ||
+ prop_chartype == ucp_Lt) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ break;
+
+ case PT_GC:
+ for (fi = min;; fi++)
+ {
+ RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ GETCHARINC(c, eptr);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ if ((prop_category == prop_value) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ break;
+
+ case PT_PC:
+ for (fi = min;; fi++)
+ {
+ RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ GETCHARINC(c, eptr);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ if ((prop_chartype == prop_value) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ break;
+
+ case PT_SC:
+ for (fi = min;; fi++)
+ {
+ RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ GETCHARINC(c, eptr);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ if ((prop_script == prop_value) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ break;
+
+ default:
+ RRETURN(PCRE_ERROR_INTERNAL);
+ break;
}
}
@@ -2649,7 +2802,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@@ -2658,7 +2811,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
if (prop_category != ucp_M) break;
eptr += len;
}
@@ -2783,17 +2936,74 @@ for (;;)
pp = eptr; /* Remember where we started */
#ifdef SUPPORT_UCP
- if (prop_type > 0)
+ if (prop_type >= 0)
{
- for (i = min; i < max; i++)
+ switch(prop_type)
{
- int len = 1;
- if (eptr >= md->end_subject) break;
- GETCHARLEN(c, eptr, len);
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
- if ((*prop_test_variable == prop_test_against) == prop_fail_result)
- break;
- eptr+= len;
+ case PT_ANY:
+ for (i = min; i < max; i++)
+ {
+ int len = 1;
+ if (eptr >= md->end_subject) break;
+ GETCHARLEN(c, eptr, len);
+ if (prop_fail_result) break;
+ eptr+= len;
+ }
+ break;
+
+ case PT_LAMP:
+ for (i = min; i < max; i++)
+ {
+ int len = 1;
+ if (eptr >= md->end_subject) break;
+ GETCHARLEN(c, eptr, len);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ if ((prop_chartype == ucp_Lu ||
+ prop_chartype == ucp_Ll ||
+ prop_chartype == ucp_Lt) == prop_fail_result)
+ break;
+ eptr+= len;
+ }
+ break;
+
+ case PT_GC:
+ for (i = min; i < max; i++)
+ {
+ int len = 1;
+ if (eptr >= md->end_subject) break;
+ GETCHARLEN(c, eptr, len);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ if ((prop_category == prop_value) == prop_fail_result)
+ break;
+ eptr+= len;
+ }
+ break;
+
+ case PT_PC:
+ for (i = min; i < max; i++)
+ {
+ int len = 1;
+ if (eptr >= md->end_subject) break;
+ GETCHARLEN(c, eptr, len);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ if ((prop_chartype == prop_value) == prop_fail_result)
+ break;
+ eptr+= len;
+ }
+ break;
+
+ case PT_SC:
+ for (i = min; i < max; i++)
+ {
+ int len = 1;
+ if (eptr >= md->end_subject) break;
+ GETCHARLEN(c, eptr, len);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ if ((prop_script == prop_value) == prop_fail_result)
+ break;
+ eptr+= len;
+ }
+ break;
}
/* eptr is now past the end of the maximum run */
@@ -2816,7 +3026,7 @@ for (;;)
{
if (eptr >= md->end_subject) break;
GETCHARINCTEST(c, eptr);
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
if (prop_category == ucp_M) break;
while (eptr < md->end_subject)
{
@@ -2825,7 +3035,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
if (prop_category != ucp_M) break;
eptr += len;
}
@@ -2846,7 +3056,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
- prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);
+ prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
if (prop_category != ucp_M) break;
eptr--;
}
@@ -3200,9 +3410,9 @@ Returns: > 0 => success; value is the number of elements filled in
< -1 => some kind of unexpected problem
*/
-EXPORT int
+PCRE_DATA_SCOPE int
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
- const char *subject, int length, int start_offset, int options, int *offsets,
+ PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
int offsetcount)
{
int rc, resetcount, ocount;
@@ -3219,9 +3429,9 @@ BOOL req_byte_caseless = FALSE;
match_data match_block;
const uschar *tables;
const uschar *start_bits = NULL;
-const uschar *start_match = (const uschar *)subject + start_offset;
-const uschar *end_subject;
-const uschar *req_byte_ptr = start_match - 1;
+USPTR start_match = (USPTR)subject + start_offset;
+USPTR end_subject;
+USPTR req_byte_ptr = start_match - 1;
pcre_study_data internal_study;
const pcre_study_data *study;
@@ -3242,6 +3452,7 @@ the default values. */
study = NULL;
match_block.match_limit = MATCH_LIMIT;
+match_block.match_limit_recursion = MATCH_LIMIT_RECURSION;
match_block.callout_data = NULL;
/* The table pointer is always in native byte order. */
@@ -3255,6 +3466,8 @@ if (extra_data != NULL)
study = (const pcre_study_data *)extra_data->study_data;
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
match_block.match_limit = extra_data->match_limit;
+ if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
+ match_block.match_limit_recursion = extra_data->match_limit_recursion;
if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
match_block.callout_data = extra_data->callout_data;
if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
@@ -3289,7 +3502,7 @@ firstline = (re->options & PCRE_FIRSTLINE) != 0;
match_block.start_code = (const uschar *)external_re + re->name_table_offset +
re->name_count * re->name_entry_size;
-match_block.start_subject = (const uschar *)subject;
+match_block.start_subject = (USPTR)subject;
match_block.start_offset = start_offset;
match_block.end_subject = match_block.start_subject + length;
end_subject = match_block.end_subject;
@@ -3415,7 +3628,7 @@ the loop runs just once. */
do
{
- const uschar *save_end_subject = end_subject;
+ USPTR save_end_subject = end_subject;
/* Reset the maximum number of extractions we might see. */
@@ -3434,7 +3647,7 @@ do
if (firstline)
{
- const uschar *t = start_match;
+ USPTR t = start_match;
while (t < save_end_subject && *t != '\n') t++;
end_subject = t;
}
@@ -3504,7 +3717,7 @@ do
end_subject - start_match < REQ_BYTE_MAX &&
!match_block.partial)
{
- register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0);
+ register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
/* We don't need to repeat the search if we haven't yet reached the
place we found it at last time. */
@@ -3550,7 +3763,7 @@ do
match_block.match_call_count = 0;
rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,
- match_isgroup);
+ match_isgroup, 0);
/* When the result is no match, if the subject's first character was a
newline and the PCRE_FIRSTLINE option is set, break (which will return