summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_compile.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/pcrelib/pcre_compile.c')
-rw-r--r--ext/pcre/pcrelib/pcre_compile.c58
1 files changed, 33 insertions, 25 deletions
diff --git a/ext/pcre/pcrelib/pcre_compile.c b/ext/pcre/pcrelib/pcre_compile.c
index 34721c8863..dd94473573 100644
--- a/ext/pcre/pcrelib/pcre_compile.c
+++ b/ext/pcre/pcrelib/pcre_compile.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2006 University of Cambridge
+ Copyright (c) 1997-2007 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -82,7 +82,7 @@ are simple data values; negative values are for special things like \d and so
on. Zero means further processing is needed (for things like \x), or the escape
is invalid. */
-#if !EBCDIC /* This is the "normal" table for ASCII systems */
+#ifndef EBCDIC /* This is the "normal" table for ASCII systems */
static const short int escapes[] = {
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
@@ -96,7 +96,7 @@ static const short int escapes[] = {
0, 0, -ESC_z /* x - z */
};
-#else /* This is the "abnormal" table for EBCDIC systems */
+#else /* This is the "abnormal" table for EBCDIC systems */
static const short int escapes[] = {
/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|',
/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0,
@@ -262,7 +262,7 @@ For convenience, we use the same bit definitions as in chartables:
Then we can use ctype_digit and ctype_xdigit in the code. */
-#if !EBCDIC /* This is the "normal" case, for ASCII systems */
+#ifndef EBCDIC /* This is the "normal" case, for ASCII systems */
static const unsigned char digitab[] =
{
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
@@ -298,7 +298,7 @@ static const unsigned char digitab[] =
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
-#else /* This is the "abnormal" case, for EBCDIC systems */
+#else /* This is the "abnormal" case, for EBCDIC systems */
static const unsigned char digitab[] =
{
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */
@@ -312,7 +312,7 @@ static const unsigned char digitab[] =
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 40 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 72- | */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 50 */
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- ¬ */
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- 95 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 60 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ? */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */
@@ -346,7 +346,7 @@ static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */
0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 */
0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /* 72- | */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 */
- 0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- ¬ */
+ 0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- 95 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 */
0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ? */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */
@@ -421,11 +421,11 @@ if (c == 0) *errorcodeptr = ERR1;
a table. A non-zero result is something that can be returned immediately.
Otherwise further processing may be required. */
-#if !EBCDIC /* ASCII coding */
+#ifndef EBCDIC /* ASCII coding */
else if (c < '0' || c > 'z') {} /* Not alphameric */
else if ((i = escapes[c - '0']) != 0) c = i;
-#else /* EBCDIC coding */
+#else /* EBCDIC coding */
else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */
else if ((i = escapes[c - 0x48]) != 0) c = i;
#endif
@@ -562,10 +562,10 @@ else
if (c == 0 && cc == '0') continue; /* Leading zeroes */
count++;
-#if !EBCDIC /* ASCII coding */
+#ifndef EBCDIC /* ASCII coding */
if (cc >= 'a') cc -= 32; /* Convert to upper case */
c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
-#else /* EBCDIC coding */
+#else /* EBCDIC coding */
if (cc >= 'a' && cc <= 'z') cc += 64; /* Convert to upper case */
c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));
#endif
@@ -589,10 +589,10 @@ else
{
int cc; /* Some compilers don't like ++ */
cc = *(++ptr); /* in initializers */
-#if !EBCDIC /* ASCII coding */
+#ifndef EBCDIC /* ASCII coding */
if (cc >= 'a') cc -= 32; /* Convert to upper case */
c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
-#else /* EBCDIC coding */
+#else /* EBCDIC coding */
if (cc <= 'z') cc += 64; /* Convert to upper case */
c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));
#endif
@@ -611,10 +611,10 @@ else
return 0;
}
-#if !EBCDIC /* ASCII coding */
+#ifndef EBCDIC /* ASCII coding */
if (c >= 'a' && c <= 'z') c -= 32;
c ^= 0x40;
-#else /* EBCDIC coding */
+#else /* EBCDIC coding */
if (c >= 'a' && c <= 'z') c += 64;
c ^= 0xC0;
#endif
@@ -1246,6 +1246,7 @@ for (;;)
else
{
code += _pcre_OP_lengths[c];
+#ifdef SUPPORT_UTF8
if (utf8) switch(c)
{
case OP_CHAR:
@@ -1266,6 +1267,7 @@ for (;;)
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
break;
}
+#endif
}
}
}
@@ -1309,6 +1311,7 @@ for (;;)
else
{
code += _pcre_OP_lengths[c];
+#ifdef SUPPORT_UTF8
if (utf8) switch(c)
{
case OP_CHAR:
@@ -1329,6 +1332,7 @@ for (;;)
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
break;
}
+#endif
}
}
}
@@ -5039,7 +5043,7 @@ Returns: pointer to compiled data block, or NULL on error,
with errorptr and erroroffset set
*/
-PCRE_DATA_SCOPE pcre *
+PCRE_EXP_DEFN pcre *
pcre_compile(const char *pattern, int options, const char **errorptr,
int *erroroffset, const unsigned char *tables)
{
@@ -5047,7 +5051,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
}
-PCRE_DATA_SCOPE pcre *
+PCRE_EXP_DEFN pcre *
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
const char **errorptr, int *erroroffset, const unsigned char *tables)
{
@@ -5096,7 +5100,7 @@ if (errorcodeptr != NULL) *errorcodeptr = ERR0;
if (erroroffset == NULL)
{
errorcode = ERR16;
- goto PCRE_EARLY_ERROR_RETURN;
+ goto PCRE_EARLY_ERROR_RETURN2;
}
*erroroffset = 0;
@@ -5109,7 +5113,7 @@ if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
(*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
{
errorcode = ERR44;
- goto PCRE_UTF8_ERROR_RETURN;
+ goto PCRE_EARLY_ERROR_RETURN2;
}
#else
if ((options & PCRE_UTF8) != 0)
@@ -5134,7 +5138,8 @@ cd->cbits = tables + cbits_offset;
cd->ctypes = tables + ctypes_offset;
/* Handle different types of newline. The three bits give seven cases. The
-current code allows for fixed one- or two-byte sequences, plus "any". */
+current code allows for fixed one- or two-byte sequences, plus "any" and
+"anycrlf". */
switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
{
@@ -5144,10 +5149,15 @@ switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
case PCRE_NEWLINE_CR+
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
case PCRE_NEWLINE_ANY: newline = -1; break;
+ case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
}
-if (newline < 0)
+if (newline == -2)
+ {
+ cd->nltype = NLTYPE_ANYCRLF;
+ }
+else if (newline < 0)
{
cd->nltype = NLTYPE_ANY;
}
@@ -5321,9 +5331,7 @@ if (errorcode != 0)
(pcre_free)(re);
PCRE_EARLY_ERROR_RETURN:
*erroroffset = ptr - (const uschar *)pattern;
-#ifdef SUPPORT_UTF8
- PCRE_UTF8_ERROR_RETURN:
-#endif
+ PCRE_EARLY_ERROR_RETURN2:
*errorptr = error_texts[errorcode];
if (errorcodeptr != NULL) *errorcodeptr = errorcode;
return NULL;
@@ -5413,7 +5421,7 @@ if ((re->options & PCRE_REQCHSET) != 0)
else printf("Req char = \\x%02x%s\n", ch, caseless);
}
-pcre_printint(re, stdout);
+pcre_printint(re, stdout, TRUE);
/* This check is done here in the debugging case so that the code that
was compiled can be seen. */