summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/pcrelib/pcre_exec.c')
-rw-r--r--ext/pcre/pcrelib/pcre_exec.c87
1 files changed, 70 insertions, 17 deletions
diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c
index 1c570a03bd..6fc2126c8d 100644
--- a/ext/pcre/pcrelib/pcre_exec.c
+++ b/ext/pcre/pcrelib/pcre_exec.c
@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
pattern matching using an NFA algorithm, trying to mimic Perl as closely as
possible. There are also some static supporting functions. */
-#include <config.h>
+#include "config.h"
#define NLBLOCK md /* Block containing newline information */
#define PSSTART start_subject /* Field containing processed string start */
@@ -1524,12 +1524,16 @@ for (;;)
case 0x000d:
if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
break;
+
case 0x000a:
+ break;
+
case 0x000b:
case 0x000c:
case 0x0085:
case 0x2028:
case 0x2029:
+ if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
break;
}
ecode++;
@@ -2952,12 +2956,16 @@ for (;;)
case 0x000d:
if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
break;
+
case 0x000a:
+ break;
+
case 0x000b:
case 0x000c:
case 0x0085:
case 0x2028:
case 0x2029:
+ if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
break;
}
}
@@ -3170,9 +3178,12 @@ for (;;)
if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
break;
case 0x000a:
+ break;
+
case 0x000b:
case 0x000c:
case 0x0085:
+ if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
break;
}
}
@@ -3424,11 +3435,14 @@ for (;;)
if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
break;
case 0x000a:
+ break;
+
case 0x000b:
case 0x000c:
case 0x0085:
case 0x2028:
case 0x2029:
+ if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
break;
}
break;
@@ -3580,10 +3594,14 @@ for (;;)
case 0x000d:
if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
break;
+
case 0x000a:
+ break;
+
case 0x000b:
case 0x000c:
case 0x0085:
+ if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
break;
}
break;
@@ -3881,8 +3899,10 @@ for (;;)
}
else
{
- if (c != 0x000a && c != 0x000b && c != 0x000c &&
- c != 0x0085 && c != 0x2028 && c != 0x2029)
+ if (c != 0x000a &&
+ (md->bsr_anycrlf ||
+ (c != 0x000b && c != 0x000c &&
+ c != 0x0085 && c != 0x2028 && c != 0x2029)))
break;
eptr += len;
}
@@ -4072,7 +4092,9 @@ for (;;)
}
else
{
- if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
+ if (c != 0x000a &&
+ (md->bsr_anycrlf ||
+ (c != 0x000b && c != 0x000c && c != 0x0085)))
break;
eptr++;
}
@@ -4222,12 +4244,17 @@ HEAP_RETURN:
switch (frame->Xwhere)
{
LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
- LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
- LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
- LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
- LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
- LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47) LBL(48)
- LBL(49) LBL(50) LBL(51) LBL(52) LBL(53) LBL(54)
+ LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
+ LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
+ LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
+ LBL(53) LBL(54)
+#ifdef SUPPORT_UTF8
+ LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
+ LBL(32) LBL(34) LBL(42) LBL(46)
+#ifdef SUPPORT_UCP
+ LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
+#endif /* SUPPORT_UCP */
+#endif /* SUPPORT_UTF8 */
default:
DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
return PCRE_ERROR_INTERNAL;
@@ -4406,7 +4433,7 @@ if (re->magic_number != MAGIC_NUMBER)
/* Set up other data */
anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
-startline = (re->options & PCRE_STARTLINE) != 0;
+startline = (re->flags & PCRE_STARTLINE) != 0;
firstline = (re->options & PCRE_FIRSTLINE) != 0;
/* The code starts after the real_pcre block and the capture name table. */
@@ -4433,11 +4460,37 @@ md->recursive = NULL; /* No recursion at top level */
md->lcc = tables + lcc_offset;
md->ctypes = tables + ctypes_offset;
+/* Handle different \R options. */
+
+switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
+ {
+ case 0:
+ if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
+ md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
+ else
+#ifdef BSR_ANYCRLF
+ md->bsr_anycrlf = TRUE;
+#else
+ md->bsr_anycrlf = FALSE;
+#endif
+ break;
+
+ case PCRE_BSR_ANYCRLF:
+ md->bsr_anycrlf = TRUE;
+ break;
+
+ case PCRE_BSR_UNICODE:
+ md->bsr_anycrlf = FALSE;
+ break;
+
+ default: return PCRE_ERROR_BADNEWLINE;
+ }
+
/* Handle different types of newline. The three bits give eight cases. If
nothing is set at run time, whatever was used at compile time applies. */
-switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
- PCRE_NEWLINE_BITS)
+switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
+ (pcre_uint32)options) & PCRE_NEWLINE_BITS)
{
case 0: newline = NEWLINE; break; /* Compile-time default */
case PCRE_NEWLINE_CR: newline = '\r'; break;
@@ -4476,7 +4529,7 @@ else
/* Partial matching is supported only for a restricted set of regexes at the
moment. */
-if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
+if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
return PCRE_ERROR_BADPARTIAL;
/* Check a UTF-8 string if required. Unfortunately there's no way of passing
@@ -4553,7 +4606,7 @@ studied, there may be a bitmap of possible first characters. */
if (!anchored)
{
- if ((re->options & PCRE_FIRSTSET) != 0)
+ if ((re->flags & PCRE_FIRSTSET) != 0)
{
first_byte = re->first_byte & 255;
if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
@@ -4568,7 +4621,7 @@ if (!anchored)
/* For anchored or unanchored matches, there may be a "last known required
character" set. */
-if ((re->options & PCRE_REQCHSET) != 0)
+if ((re->flags & PCRE_REQCHSET) != 0)
{
req_byte = re->req_byte & 255;
req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
@@ -4790,7 +4843,7 @@ for(;;)
if (start_match[-1] == '\r' &&
start_match < end_subject &&
*start_match == '\n' &&
- (re->options & PCRE_HASCRORLF) == 0 &&
+ (re->flags & PCRE_HASCRORLF) == 0 &&
(md->nltype == NLTYPE_ANY ||
md->nltype == NLTYPE_ANYCRLF ||
md->nllen == 2))