summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/pcrelib/pcre_exec.c')
-rw-r--r--ext/pcre/pcrelib/pcre_exec.c102
1 files changed, 42 insertions, 60 deletions
diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c
index 7aa3f85e5e..82b79af8c5 100644
--- a/ext/pcre/pcrelib/pcre_exec.c
+++ b/ext/pcre/pcrelib/pcre_exec.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2015 University of Cambridge
+ Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -1038,7 +1038,7 @@ for (;;)
the result of a recursive call to match() whatever happened so it was
possible to reduce stack usage by turning this into a tail recursion,
except in the case of a possibly empty group. However, now that there is
- the possibility of (*THEN) occurring in the final alternative, this
+ the possiblity of (*THEN) occurring in the final alternative, this
optimization is no longer always possible.
We can optimize if we know there are no (*THEN)s in the pattern; at present
@@ -1165,11 +1165,16 @@ for (;;)
if (rrc == MATCH_KETRPOS)
{
offset_top = md->end_offset_top;
- eptr = md->end_match_ptr;
ecode = md->start_code + code_offset;
save_capture_last = md->capture_last;
matched_once = TRUE;
mstart = md->start_match_ptr; /* In case \K changed it */
+ if (eptr == md->end_match_ptr) /* Matched an empty string */
+ {
+ do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
+ break;
+ }
+ eptr = md->end_match_ptr;
continue;
}
@@ -1239,10 +1244,15 @@ for (;;)
if (rrc == MATCH_KETRPOS)
{
offset_top = md->end_offset_top;
- eptr = md->end_match_ptr;
ecode = md->start_code + code_offset;
matched_once = TRUE;
mstart = md->start_match_ptr; /* In case \K reset it */
+ if (eptr == md->end_match_ptr) /* Matched an empty string */
+ {
+ do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
+ break;
+ }
+ eptr = md->end_match_ptr;
continue;
}
@@ -1977,6 +1987,19 @@ for (;;)
}
}
+ /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
+ and return the MATCH_KETRPOS. This makes it possible to do the repeats one
+ at a time from the outer level, thus saving stack. This must precede the
+ empty string test - in this case that test is done at the outer level. */
+
+ if (*ecode == OP_KETRPOS)
+ {
+ md->start_match_ptr = mstart; /* In case \K reset it */
+ md->end_match_ptr = eptr;
+ md->end_offset_top = offset_top;
+ RRETURN(MATCH_KETRPOS);
+ }
+
/* For an ordinary non-repeating ket, just continue at this level. This
also happens for a repeating ket if no characters were matched in the
group. This is the forcible breaking of infinite loops as implemented in
@@ -1999,18 +2022,6 @@ for (;;)
break;
}
- /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
- and return the MATCH_KETRPOS. This makes it possible to do the repeats one
- at a time from the outer level, thus saving stack. */
-
- if (*ecode == OP_KETRPOS)
- {
- md->start_match_ptr = mstart; /* In case \K reset it */
- md->end_match_ptr = eptr;
- md->end_offset_top = offset_top;
- RRETURN(MATCH_KETRPOS);
- }
-
/* The normal repeating kets try the rest of the pattern or restart from
the preceding bracket, in the appropriate order. In the second case, we can
use tail recursion to avoid using another stack frame, unless we have an
@@ -5679,54 +5690,25 @@ for (;;)
switch(ctype)
{
case OP_ANY:
- if (max < INT_MAX)
+ for (i = min; i < max; i++)
{
- for (i = min; i < max; i++)
+ if (eptr >= md->end_subject)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (IS_NEWLINE(eptr)) break;
- if (md->partial != 0 && /* Take care with CRLF partial */
- eptr + 1 >= md->end_subject &&
- NLBLOCK->nltype == NLTYPE_FIXED &&
- NLBLOCK->nllen == 2 &&
- UCHAR21(eptr) == NLBLOCK->nl[0])
- {
- md->hitend = TRUE;
- if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
- }
- eptr++;
- ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
+ SCHECK_PARTIAL();
+ break;
}
- }
-
- /* Handle unlimited UTF-8 repeat */
-
- else
- {
- for (i = min; i < max; i++)
+ if (IS_NEWLINE(eptr)) break;
+ if (md->partial != 0 && /* Take care with CRLF partial */
+ eptr + 1 >= md->end_subject &&
+ NLBLOCK->nltype == NLTYPE_FIXED &&
+ NLBLOCK->nllen == 2 &&
+ UCHAR21(eptr) == NLBLOCK->nl[0])
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (IS_NEWLINE(eptr)) break;
- if (md->partial != 0 && /* Take care with CRLF partial */
- eptr + 1 >= md->end_subject &&
- NLBLOCK->nltype == NLTYPE_FIXED &&
- NLBLOCK->nllen == 2 &&
- UCHAR21(eptr) == NLBLOCK->nl[0])
- {
- md->hitend = TRUE;
- if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
- }
- eptr++;
- ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
+ md->hitend = TRUE;
+ if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
+ eptr++;
+ ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
}
break;
@@ -6517,7 +6499,7 @@ tables = re->tables;
if (extra_data != NULL)
{
- register unsigned int flags = extra_data->flags;
+ unsigned long int flags = extra_data->flags;
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
study = (const pcre_study_data *)extra_data->study_data;
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)