summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/pcrelib/pcre_exec.c')
-rw-r--r--ext/pcre/pcrelib/pcre_exec.c196
1 files changed, 87 insertions, 109 deletions
diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c
index e4da5227a2..0d32655866 100644
--- a/ext/pcre/pcrelib/pcre_exec.c
+++ b/ext/pcre/pcrelib/pcre_exec.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2013 University of Cambridge
+ Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -134,7 +134,7 @@ pcre_uint32 c;
BOOL utf = md->utf;
if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
while (length-- > 0)
- if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
+ if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
}
#endif
@@ -237,8 +237,8 @@ if (caseless)
{
pcre_uint32 cc, cp;
if (eptr >= md->end_subject) return -2; /* Partial match */
- cc = RAWUCHARTEST(eptr);
- cp = RAWUCHARTEST(p);
+ cc = UCHAR21TEST(eptr);
+ cp = UCHAR21TEST(p);
if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
p++;
eptr++;
@@ -254,7 +254,7 @@ else
while (length-- > 0)
{
if (eptr >= md->end_subject) return -2; /* Partial match */
- if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1;
+ if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
}
}
@@ -1167,11 +1167,16 @@ for (;;)
if (rrc == MATCH_KETRPOS)
{
offset_top = md->end_offset_top;
- eptr = md->end_match_ptr;
ecode = md->start_code + code_offset;
save_capture_last = md->capture_last;
matched_once = TRUE;
mstart = md->start_match_ptr; /* In case \K changed it */
+ if (eptr == md->end_match_ptr) /* Matched an empty string */
+ {
+ do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
+ break;
+ }
+ eptr = md->end_match_ptr;
continue;
}
@@ -1241,10 +1246,15 @@ for (;;)
if (rrc == MATCH_KETRPOS)
{
offset_top = md->end_offset_top;
- eptr = md->end_match_ptr;
ecode = md->start_code + code_offset;
matched_once = TRUE;
mstart = md->start_match_ptr; /* In case \K reset it */
+ if (eptr == md->end_match_ptr) /* Matched an empty string */
+ {
+ do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
+ break;
+ }
+ eptr = md->end_match_ptr;
continue;
}
@@ -1979,6 +1989,19 @@ for (;;)
}
}
+ /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
+ and return the MATCH_KETRPOS. This makes it possible to do the repeats one
+ at a time from the outer level, thus saving stack. This must precede the
+ empty string test - in this case that test is done at the outer level. */
+
+ if (*ecode == OP_KETRPOS)
+ {
+ md->start_match_ptr = mstart; /* In case \K reset it */
+ md->end_match_ptr = eptr;
+ md->end_offset_top = offset_top;
+ RRETURN(MATCH_KETRPOS);
+ }
+
/* For an ordinary non-repeating ket, just continue at this level. This
also happens for a repeating ket if no characters were matched in the
group. This is the forcible breaking of infinite loops as implemented in
@@ -2001,18 +2024,6 @@ for (;;)
break;
}
- /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
- and return the MATCH_KETRPOS. This makes it possible to do the repeats one
- at a time from the outer level, thus saving stack. */
-
- if (*ecode == OP_KETRPOS)
- {
- md->start_match_ptr = mstart; /* In case \K reset it */
- md->end_match_ptr = eptr;
- md->end_offset_top = offset_top;
- RRETURN(MATCH_KETRPOS);
- }
-
/* The normal repeating kets try the rest of the pattern or restart from
the preceding bracket, in the appropriate order. In the second case, we can
use tail recursion to avoid using another stack frame, unless we have an
@@ -2103,7 +2114,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
+ UCHAR21TEST(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2147,7 +2158,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
+ UCHAR21TEST(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2290,7 +2301,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
+ UCHAR21TEST(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2444,7 +2455,7 @@ for (;;)
{
SCHECK_PARTIAL();
}
- else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++;
+ else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
break;
case CHAR_LF:
@@ -2691,16 +2702,22 @@ for (;;)
pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
ecode += 1 + 2*IMM2_SIZE;
+ /* Setting the default length first and initializing 'offset' avoids
+ compiler warnings in the REF_REPEAT code. */
+
+ length = (md->jscript_compat)? 0 : -1;
+ offset = 0;
+
while (count-- > 0)
{
offset = GET2(slot, 0) << 1;
- if (offset < offset_top && md->offset_vector[offset] >= 0) break;
+ if (offset < offset_top && md->offset_vector[offset] >= 0)
+ {
+ length = md->offset_vector[offset+1] - md->offset_vector[offset];
+ break;
+ }
slot += md->name_entry_size;
}
- if (count < 0)
- length = (md->jscript_compat)? 0 : -1;
- else
- length = md->offset_vector[offset+1] - md->offset_vector[offset];
}
goto REF_REPEAT;
@@ -3212,7 +3229,7 @@ for (;;)
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
RRETURN(MATCH_NOMATCH);
}
- while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH);
+ while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
}
else
#endif
@@ -3252,7 +3269,7 @@ for (;;)
if (fc < 128)
{
- pcre_uint32 cc = RAWUCHAR(eptr);
+ pcre_uint32 cc = UCHAR21(eptr);
if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
ecode++;
eptr++;
@@ -3521,7 +3538,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = RAWUCHARTEST(eptr);
+ cc = UCHAR21TEST(eptr);
if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
eptr++;
}
@@ -3539,7 +3556,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = RAWUCHARTEST(eptr);
+ cc = UCHAR21TEST(eptr);
if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
eptr++;
}
@@ -3556,7 +3573,7 @@ for (;;)
SCHECK_PARTIAL();
break;
}
- cc = RAWUCHARTEST(eptr);
+ cc = UCHAR21TEST(eptr);
if (fc != cc && foc != cc) break;
eptr++;
}
@@ -3583,7 +3600,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
+ if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
}
if (min == max) continue;
@@ -3600,7 +3617,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
+ if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
}
@@ -3614,7 +3631,7 @@ for (;;)
SCHECK_PARTIAL();
break;
}
- if (fc != RAWUCHARTEST(eptr)) break;
+ if (fc != UCHAR21TEST(eptr)) break;
eptr++;
}
if (possessive) continue; /* No backtracking */
@@ -4369,7 +4386,7 @@ for (;;)
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- RAWUCHAR(eptr) == NLBLOCK->nl[0])
+ UCHAR21(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -4411,7 +4428,7 @@ for (;;)
default: RRETURN(MATCH_NOMATCH);
case CHAR_CR:
- if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
+ if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
break;
case CHAR_LF:
@@ -4521,7 +4538,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = RAWUCHAR(eptr);
+ cc = UCHAR21(eptr);
if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
RRETURN(MATCH_NOMATCH);
eptr++;
@@ -4538,7 +4555,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = RAWUCHAR(eptr);
+ cc = UCHAR21(eptr);
if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
RRETURN(MATCH_NOMATCH);
eptr++;
@@ -4555,7 +4572,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = RAWUCHAR(eptr);
+ cc = UCHAR21(eptr);
if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
RRETURN(MATCH_NOMATCH);
eptr++;
@@ -4572,7 +4589,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = RAWUCHAR(eptr);
+ cc = UCHAR21(eptr);
if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
RRETURN(MATCH_NOMATCH);
eptr++;
@@ -4589,7 +4606,7 @@ for (;;)
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = RAWUCHAR(eptr);
+ cc = UCHAR21(eptr);
if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
RRETURN(MATCH_NOMATCH);
eptr++;
@@ -5150,7 +5167,7 @@ for (;;)
{
default: RRETURN(MATCH_NOMATCH);
case CHAR_CR:
- if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
+ if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
break;
case CHAR_LF:
@@ -5675,54 +5692,25 @@ for (;;)
switch(ctype)
{
case OP_ANY:
- if (max < INT_MAX)
+ for (i = min; i < max; i++)
{
- for (i = min; i < max; i++)
+ if (eptr >= md->end_subject)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (IS_NEWLINE(eptr)) break;
- if (md->partial != 0 && /* Take care with CRLF partial */
- eptr + 1 >= md->end_subject &&
- NLBLOCK->nltype == NLTYPE_FIXED &&
- NLBLOCK->nllen == 2 &&
- RAWUCHAR(eptr) == NLBLOCK->nl[0])
- {
- md->hitend = TRUE;
- if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
- }
- eptr++;
- ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
+ SCHECK_PARTIAL();
+ break;
}
- }
-
- /* Handle unlimited UTF-8 repeat */
-
- else
- {
- for (i = min; i < max; i++)
+ if (IS_NEWLINE(eptr)) break;
+ if (md->partial != 0 && /* Take care with CRLF partial */
+ eptr + 1 >= md->end_subject &&
+ NLBLOCK->nltype == NLTYPE_FIXED &&
+ NLBLOCK->nllen == 2 &&
+ UCHAR21(eptr) == NLBLOCK->nl[0])
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (IS_NEWLINE(eptr)) break;
- if (md->partial != 0 && /* Take care with CRLF partial */
- eptr + 1 >= md->end_subject &&
- NLBLOCK->nltype == NLTYPE_FIXED &&
- NLBLOCK->nllen == 2 &&
- RAWUCHAR(eptr) == NLBLOCK->nl[0])
- {
- md->hitend = TRUE;
- if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
- }
- eptr++;
- ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
+ md->hitend = TRUE;
+ if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
+ eptr++;
+ ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
}
break;
@@ -5772,7 +5760,7 @@ for (;;)
if (c == CHAR_CR)
{
if (++eptr >= md->end_subject) break;
- if (RAWUCHAR(eptr) == CHAR_LF) eptr++;
+ if (UCHAR21(eptr) == CHAR_LF) eptr++;
}
else
{
@@ -5935,8 +5923,8 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
BACKCHAR(eptr);
- if (ctype == OP_ANYNL && eptr > pp && RAWUCHAR(eptr) == CHAR_NL &&
- RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
+ if (ctype == OP_ANYNL && eptr > pp && UCHAR21(eptr) == CHAR_NL &&
+ UCHAR21(eptr - 1) == CHAR_CR) eptr--;
}
}
else
@@ -6513,7 +6501,7 @@ tables = re->tables;
if (extra_data != NULL)
{
- register unsigned int flags = extra_data->flags;
+ unsigned long int flags = extra_data->flags;
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
study = (const pcre_study_data *)extra_data->study_data;
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
@@ -6783,10 +6771,10 @@ for(;;)
if (first_char != first_char2)
while (start_match < end_subject &&
- (smc = RAWUCHARTEST(start_match)) != first_char && smc != first_char2)
+ (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
start_match++;
else
- while (start_match < end_subject && RAWUCHARTEST(start_match) != first_char)
+ while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
start_match++;
}
@@ -6818,7 +6806,7 @@ for(;;)
if (start_match[-1] == CHAR_CR &&
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
start_match < end_subject &&
- RAWUCHARTEST(start_match) == CHAR_NL)
+ UCHAR21TEST(start_match) == CHAR_NL)
start_match++;
}
}
@@ -6829,22 +6817,12 @@ for(;;)
{
while (start_match < end_subject)
{
- register pcre_uint32 c = RAWUCHARTEST(start_match);
+ register pcre_uint32 c = UCHAR21TEST(start_match);
#ifndef COMPILE_PCRE8
if (c > 255) c = 255;
#endif
- if ((start_bits[c/8] & (1 << (c&7))) == 0)
- {
- start_match++;
-#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- /* In non 8-bit mode, the iteration will stop for
- characters > 255 at the beginning or not stop at all. */
- if (utf)
- ACROSSCHAR(start_match < end_subject, *start_match,
- start_match++);
-#endif
- }
- else break;
+ if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
+ start_match++;
}
}
} /* Starting optimizations */
@@ -6897,7 +6875,7 @@ for(;;)
{
while (p < end_subject)
{
- register pcre_uint32 pp = RAWUCHARINCTEST(p);
+ register pcre_uint32 pp = UCHAR21INCTEST(p);
if (pp == req_char || pp == req_char2) { p--; break; }
}
}
@@ -6905,7 +6883,7 @@ for(;;)
{
while (p < end_subject)
{
- if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
+ if (UCHAR21INCTEST(p) == req_char) { p--; break; }
}
}