diff options
Diffstat (limited to 'ext/pcre/pcrelib/pcre_compile.c')
-rw-r--r-- | ext/pcre/pcrelib/pcre_compile.c | 748 |
1 files changed, 526 insertions, 222 deletions
diff --git a/ext/pcre/pcrelib/pcre_compile.c b/ext/pcre/pcrelib/pcre_compile.c index 54e23ea39e..53027e603d 100644 --- a/ext/pcre/pcrelib/pcre_compile.c +++ b/ext/pcre/pcrelib/pcre_compile.c @@ -122,7 +122,7 @@ static const short int escapes[] = { -ESC_H, 0, 0, -ESC_K, 0, 0, - 0, 0, + -ESC_N, 0, -ESC_P, -ESC_Q, -ESC_R, -ESC_S, 0, 0, @@ -169,7 +169,7 @@ static const short int escapes[] = { /* B8 */ 0, 0, 0, 0, 0, ']', '=', '-', /* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G, /* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0, -/* D0 */ '}', 0, -ESC_K, 0, 0, 0, 0, -ESC_P, +/* D0 */ '}', 0, -ESC_K, 0, 0,-ESC_N, 0, -ESC_P, /* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0, /* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X, /* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0, @@ -186,11 +186,14 @@ string is built from string macros so that it works in UTF-8 mode on EBCDIC platforms. */ typedef struct verbitem { - int len; - int op; + int len; /* Length of verb name */ + int op; /* Op when no arg, or -1 if arg mandatory */ + int op_arg; /* Op when arg present, or -1 if not allowed */ } verbitem; static const char verbnames[] = + "\0" /* Empty name is a shorthand for MARK */ + STRING_MARK0 STRING_ACCEPT0 STRING_COMMIT0 STRING_F0 @@ -200,13 +203,15 @@ static const char verbnames[] = STRING_THEN; static const verbitem verbs[] = { - { 6, OP_ACCEPT }, - { 6, OP_COMMIT }, - { 1, OP_FAIL }, - { 4, OP_FAIL }, - { 5, OP_PRUNE }, - { 4, OP_SKIP }, - { 4, OP_THEN } + { 0, -1, OP_MARK }, + { 4, -1, OP_MARK }, + { 6, OP_ACCEPT, -1 }, + { 6, OP_COMMIT, -1 }, + { 1, OP_FAIL, -1 }, + { 4, OP_FAIL, -1 }, + { 5, OP_PRUNE, OP_PRUNE_ARG }, + { 4, OP_SKIP, OP_SKIP_ARG }, + { 4, OP_THEN, OP_THEN_ARG } }; static const int verbcount = sizeof(verbs)/sizeof(verbitem); @@ -254,6 +259,53 @@ static const int posix_class_maps[] = { cbit_xdigit,-1, 0 /* xdigit */ }; +/* Table of substitutes for \d etc when PCRE_UCP is set. The POSIX class +substitutes must be in the order of the names, defined above, and there are +both positive and negative cases. NULL means no substitute. */ + +#ifdef SUPPORT_UCP +static const uschar *substitutes[] = { + (uschar *)"\\P{Nd}", /* \D */ + (uschar *)"\\p{Nd}", /* \d */ + (uschar *)"\\P{Xsp}", /* \S */ /* NOTE: Xsp is Perl space */ + (uschar *)"\\p{Xsp}", /* \s */ + (uschar *)"\\P{Xwd}", /* \W */ + (uschar *)"\\p{Xwd}" /* \w */ +}; + +static const uschar *posix_substitutes[] = { + (uschar *)"\\p{L}", /* alpha */ + (uschar *)"\\p{Ll}", /* lower */ + (uschar *)"\\p{Lu}", /* upper */ + (uschar *)"\\p{Xan}", /* alnum */ + NULL, /* ascii */ + (uschar *)"\\h", /* blank */ + NULL, /* cntrl */ + (uschar *)"\\p{Nd}", /* digit */ + NULL, /* graph */ + NULL, /* print */ + NULL, /* punct */ + (uschar *)"\\p{Xps}", /* space */ /* NOTE: Xps is POSIX space */ + (uschar *)"\\p{Xwd}", /* word */ + NULL, /* xdigit */ + /* Negated cases */ + (uschar *)"\\P{L}", /* ^alpha */ + (uschar *)"\\P{Ll}", /* ^lower */ + (uschar *)"\\P{Lu}", /* ^upper */ + (uschar *)"\\P{Xan}", /* ^alnum */ + NULL, /* ^ascii */ + (uschar *)"\\H", /* ^blank */ + NULL, /* ^cntrl */ + (uschar *)"\\P{Nd}", /* ^digit */ + NULL, /* ^graph */ + NULL, /* ^print */ + NULL, /* ^punct */ + (uschar *)"\\P{Xps}", /* ^space */ /* NOTE: Xps is POSIX space */ + (uschar *)"\\P{Xwd}", /* ^word */ + NULL /* ^xdigit */ +}; +#define POSIX_SUBSIZE (sizeof(posix_substitutes)/sizeof(uschar *)) +#endif #define STRING(a) # a #define XSTRING(s) STRING(s) @@ -317,7 +369,7 @@ static const char error_texts[] = /* 35 */ "invalid condition (?(0)\0" "\\C not allowed in lookbehind assertion\0" - "PCRE does not support \\L, \\l, \\N, \\U, or \\u\0" + "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0" "number after (?C is > 255\0" "closing ) for (?C expected\0" /* 40 */ @@ -343,7 +395,7 @@ static const char error_texts[] = "inconsistent NEWLINE options\0" "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0" "a numbered reference must not be zero\0" - "(*VERB) with an argument is not supported\0" + "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" /* 60 */ "(*VERB) not recognized\0" "number is too big\0" @@ -351,7 +403,10 @@ static const char error_texts[] = "digit expected after (?+\0" "] is an invalid data character in JavaScript compatibility mode\0" /* 65 */ - "different names for subpatterns of the same number are not allowed\0"; + "different names for subpatterns of the same number are not allowed\0" + "(*MARK) must have an argument\0" + "this version of PCRE is not compiled with PCRE_UCP support\0" + ; /* Table to identify digits and hex digits. This is used when compiling patterns. Note that the tables in chartables are dependent on the locale, and @@ -584,7 +639,6 @@ else case CHAR_l: case CHAR_L: - case CHAR_N: case CHAR_u: case CHAR_U: *errorcodeptr = ERR37; @@ -822,6 +876,19 @@ else } } +/* Perl supports \N{name} for character names, as well as plain \N for "not +newline". PCRE does not support \N{name}. */ + +if (c == -ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET) + *errorcodeptr = ERR37; + +/* If PCRE_UCP is set, we change the values for \d etc. */ + +if ((options & PCRE_UCP) != 0 && c <= -ESC_D && c >= -ESC_w) + c -= (ESC_DU - ESC_D); + +/* Set the pointer to the final character before returning. */ + *ptrptr = ptr; return c; } @@ -1060,25 +1127,39 @@ dealing with. The very first call may not start with a parenthesis. */ if (ptr[0] == CHAR_LEFT_PARENTHESIS) { - if (ptr[1] == CHAR_QUESTION_MARK && - ptr[2] == CHAR_VERTICAL_LINE) + /* Handle specials such as (*SKIP) or (*UTF8) etc. */ + + if (ptr[1] == CHAR_ASTERISK) ptr += 2; + + /* Handle a normal, unnamed capturing parenthesis. */ + + else if (ptr[1] != CHAR_QUESTION_MARK) + { + *count += 1; + if (name == NULL && *count == lorn) return *count; + ptr++; + } + + /* All cases now have (? at the start. Remember when we are in a group + where the parenthesis numbers are duplicated. */ + + else if (ptr[2] == CHAR_VERTICAL_LINE) { ptr += 3; dup_parens = TRUE; } - /* Handle a normal, unnamed capturing parenthesis */ + /* Handle comments; all characters are allowed until a ket is reached. */ - else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK) + else if (ptr[2] == CHAR_NUMBER_SIGN) { - *count += 1; - if (name == NULL && *count == lorn) return *count; - ptr++; + for (ptr += 3; *ptr != 0; ptr++) if (*ptr == CHAR_RIGHT_PARENTHESIS) break; + goto FAIL_EXIT; } /* Handle a condition. If it is an assertion, just carry on so that it is processed as normal. If not, skip to the closing parenthesis of the - condition (there can't be any nested parens. */ + condition (there can't be any nested parens). */ else if (ptr[2] == CHAR_LEFT_PARENTHESIS) { @@ -1090,7 +1171,7 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS) } } - /* We have either (? or (* and not a condition */ + /* Start with (? but not a condition. */ else { @@ -1212,8 +1293,7 @@ for (; *ptr != 0; ptr++) else if (*ptr == CHAR_RIGHT_PARENTHESIS) { if (dup_parens && *count < hwm_count) *count = hwm_count; - *ptrptr = ptr; - return -1; + goto FAIL_EXIT; } else if (*ptr == CHAR_VERTICAL_LINE && dup_parens) @@ -1613,7 +1693,8 @@ for (;;) /* Otherwise, we can get the item's length from the table, except that for repeated character types, we have to test for \p and \P, which have an extra - two bytes of parameters. */ + two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we + must add in its length. */ else { @@ -1637,6 +1718,13 @@ for (;;) case OP_TYPEPOSUPTO: if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2; break; + + case OP_MARK: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + code += code[1]; + break; } /* Add in the fixed length from the table */ @@ -1708,7 +1796,8 @@ for (;;) /* Otherwise, we can get the item's length from the table, except that for repeated character types, we have to test for \p and \P, which have an extra - two bytes of parameters. */ + two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we + must add in its length. */ else { @@ -1732,6 +1821,13 @@ for (;;) case OP_TYPEEXACT: if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2; break; + + case OP_MARK: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + code += code[1]; + break; } /* Add in the fixed length from the table */ @@ -2001,6 +2097,16 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE break; #endif + /* MARK, and PRUNE/SKIP/THEN with an argument must skip over the argument + string. */ + + case OP_MARK: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + code += code[1]; + break; + /* None of the remaining opcodes are required to match a character. */ default: @@ -2221,8 +2327,8 @@ auto_callout(uschar *code, const uschar *ptr, compile_data *cd) { *code++ = OP_CALLOUT; *code++ = 255; -PUT(code, 0, ptr - cd->start_pattern); /* Pattern offset */ -PUT(code, LINK_SIZE, 0); /* Default length */ +PUT(code, 0, (int)(ptr - cd->start_pattern)); /* Pattern offset */ +PUT(code, LINK_SIZE, 0); /* Default length */ return code + 2*LINK_SIZE; } @@ -2247,7 +2353,7 @@ Returns: nothing static void complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd) { -int length = ptr - cd->start_pattern - GET(previous_callout, 2); +int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2)); PUT(previous_callout, 2 + LINK_SIZE, length); } @@ -2297,6 +2403,69 @@ for (++c; c <= d; c++) return TRUE; } + + + +/************************************************* +* Check a character and a property * +*************************************************/ + +/* This function is called by check_auto_possessive() when a property item +is adjacent to a fixed character. + +Arguments: + c the character + ptype the property type + pdata the data for the type + negated TRUE if it's a negated property (\P or \p{^) + +Returns: TRUE if auto-possessifying is OK +*/ + +static BOOL +check_char_prop(int c, int ptype, int pdata, BOOL negated) +{ +const ucd_record *prop = GET_UCD(c); +switch(ptype) + { + case PT_LAMP: + return (prop->chartype == ucp_Lu || + prop->chartype == ucp_Ll || + prop->chartype == ucp_Lt) == negated; + + case PT_GC: + return (pdata == _pcre_ucp_gentype[prop->chartype]) == negated; + + case PT_PC: + return (pdata == prop->chartype) == negated; + + case PT_SC: + return (pdata == prop->script) == negated; + + /* These are specials */ + + case PT_ALNUM: + return (_pcre_ucp_gentype[prop->chartype] == ucp_L || + _pcre_ucp_gentype[prop->chartype] == ucp_N) == negated; + + case PT_SPACE: /* Perl space */ + return (_pcre_ucp_gentype[prop->chartype] == ucp_Z || + c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) + == negated; + + case PT_PXSPACE: /* POSIX space */ + return (_pcre_ucp_gentype[prop->chartype] == ucp_Z || + c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || + c == CHAR_FF || c == CHAR_CR) + == negated; + + case PT_WORD: + return (_pcre_ucp_gentype[prop->chartype] == ucp_L || + _pcre_ucp_gentype[prop->chartype] == ucp_N || + c == CHAR_UNDERSCORE) == negated; + } +return FALSE; +} #endif /* SUPPORT_UCP */ @@ -2310,10 +2479,8 @@ whether the next thing could possibly match the repeated item. If not, it makes sense to automatically possessify the repeated item. Arguments: - op_code the repeated op code - this data for this item, depends on the opcode + previous pointer to the repeated opcode utf8 TRUE in UTF-8 mode - utf8_char used for utf8 character bytes, NULL if not relevant ptr next character in pattern options options bits cd contains pointers to tables etc. @@ -2322,10 +2489,11 @@ Returns: TRUE if possessifying is wanted */ static BOOL -check_auto_possessive(int op_code, int item, BOOL utf8, uschar *utf8_char, - const uschar *ptr, int options, compile_data *cd) +check_auto_possessive(const uschar *previous, BOOL utf8, const uschar *ptr, + int options, compile_data *cd) { -int next; +int c, next; +int op_code = *previous++; /* Skip whitespace and comments in extended mode */ @@ -2386,23 +2554,18 @@ if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK || strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0) return FALSE; -/* Now compare the next item with the previous opcode. If the previous is a -positive single character match, "item" either contains the character or, if -"item" is greater than 127 in utf8 mode, the character's bytes are in -utf8_char. */ - - -/* Handle cases when the next item is a character. */ +/* Now compare the next item with the previous opcode. First, handle cases when +the next item is a character. */ if (next >= 0) switch(op_code) { case OP_CHAR: #ifdef SUPPORT_UTF8 - if (utf8 && item > 127) { GETCHAR(item, utf8_char); } + GETCHARTEST(c, previous); #else - (void)(utf8_char); /* Keep compiler happy by referencing function argument */ + c = *previous; #endif - return item != next; + return c != next; /* For CHARNC (caseless character) we must check the other case. If we have Unicode property support, we can use it to test the other case of @@ -2410,9 +2573,11 @@ if (next >= 0) switch(op_code) case OP_CHARNC: #ifdef SUPPORT_UTF8 - if (utf8 && item > 127) { GETCHAR(item, utf8_char); } + GETCHARTEST(c, previous); +#else + c = *previous; #endif - if (item == next) return FALSE; + if (c == next) return FALSE; #ifdef SUPPORT_UTF8 if (utf8) { @@ -2423,16 +2588,16 @@ if (next >= 0) switch(op_code) #else othercase = NOTACHAR; #endif - return (unsigned int)item != othercase; + return (unsigned int)c != othercase; } else #endif /* SUPPORT_UTF8 */ - return (item != cd->fcc[next]); /* Non-UTF-8 mode */ + return (c != cd->fcc[next]); /* Non-UTF-8 mode */ - /* For OP_NOT, "item" must be a single-byte character. */ + /* For OP_NOT, its data is always a single-byte character. */ case OP_NOT: - if (item == next) return TRUE; + if ((c = *previous) == next) return TRUE; if ((options & PCRE_CASELESS) == 0) return FALSE; #ifdef SUPPORT_UTF8 if (utf8) @@ -2444,11 +2609,14 @@ if (next >= 0) switch(op_code) #else othercase = NOTACHAR; #endif - return (unsigned int)item == othercase; + return (unsigned int)c == othercase; } else #endif /* SUPPORT_UTF8 */ - return (item == cd->fcc[next]); /* Non-UTF-8 mode */ + return (c == cd->fcc[next]); /* Non-UTF-8 mode */ + + /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set. + When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ case OP_DIGIT: return next > 127 || (cd->ctypes[next] & ctype_digit) == 0; @@ -2491,11 +2659,12 @@ if (next >= 0) switch(op_code) case 0x202f: case 0x205f: case 0x3000: - return op_code != OP_HSPACE; + return op_code == OP_NOT_HSPACE; default: - return op_code == OP_HSPACE; + return op_code != OP_NOT_HSPACE; } + case OP_ANYNL: case OP_VSPACE: case OP_NOT_VSPACE: switch(next) @@ -2507,48 +2676,62 @@ if (next >= 0) switch(op_code) case 0x85: case 0x2028: case 0x2029: - return op_code != OP_VSPACE; + return op_code == OP_NOT_VSPACE; default: - return op_code == OP_VSPACE; + return op_code != OP_NOT_VSPACE; } +#ifdef SUPPORT_UCP + case OP_PROP: + return check_char_prop(next, previous[0], previous[1], FALSE); + + case OP_NOTPROP: + return check_char_prop(next, previous[0], previous[1], TRUE); +#endif + default: return FALSE; } -/* Handle the case when the next item is \d, \s, etc. */ +/* Handle the case when the next item is \d, \s, etc. Note that when PCRE_UCP +is set, \d turns into ESC_du rather than ESC_d, etc., so ESC_d etc. are +generated only when PCRE_UCP is *not* set, that is, when only ASCII +characteristics are recognized. Similarly, the opcodes OP_DIGIT etc. are +replaced by OP_PROP codes when PCRE_UCP is set. */ switch(op_code) { case OP_CHAR: case OP_CHARNC: #ifdef SUPPORT_UTF8 - if (utf8 && item > 127) { GETCHAR(item, utf8_char); } + GETCHARTEST(c, previous); +#else + c = *previous; #endif switch(-next) { case ESC_d: - return item > 127 || (cd->ctypes[item] & ctype_digit) == 0; + return c > 127 || (cd->ctypes[c] & ctype_digit) == 0; case ESC_D: - return item <= 127 && (cd->ctypes[item] & ctype_digit) != 0; + return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0; case ESC_s: - return item > 127 || (cd->ctypes[item] & ctype_space) == 0; + return c > 127 || (cd->ctypes[c] & ctype_space) == 0; case ESC_S: - return item <= 127 && (cd->ctypes[item] & ctype_space) != 0; + return c <= 127 && (cd->ctypes[c] & ctype_space) != 0; case ESC_w: - return item > 127 || (cd->ctypes[item] & ctype_word) == 0; + return c > 127 || (cd->ctypes[c] & ctype_word) == 0; case ESC_W: - return item <= 127 && (cd->ctypes[item] & ctype_word) != 0; + return c <= 127 && (cd->ctypes[c] & ctype_word) != 0; case ESC_h: case ESC_H: - switch(item) + switch(c) { case 0x09: case 0x20: @@ -2576,7 +2759,7 @@ switch(op_code) case ESC_v: case ESC_V: - switch(item) + switch(c) { case 0x0a: case 0x0b: @@ -2590,38 +2773,92 @@ switch(op_code) return -next == ESC_v; } + /* When PCRE_UCP is set, these values get generated for \d etc. Find + their substitutions and process them. The result will always be either + -ESC_p or -ESC_P. Then fall through to process those values. */ + +#ifdef SUPPORT_UCP + case ESC_du: + case ESC_DU: + case ESC_wu: + case ESC_WU: + case ESC_su: + case ESC_SU: + { + int temperrorcode = 0; + ptr = substitutes[-next - ESC_DU]; + next = check_escape(&ptr, &temperrorcode, 0, options, FALSE); + if (temperrorcode != 0) return FALSE; + ptr++; /* For compatibility */ + } + /* Fall through */ + + case ESC_p: + case ESC_P: + { + int ptype, pdata, errorcodeptr; + BOOL negated; + + ptr--; /* Make ptr point at the p or P */ + ptype = get_ucp(&ptr, &negated, &pdata, &errorcodeptr); + if (ptype < 0) return FALSE; + ptr++; /* Point past the final curly ket */ + + /* If the property item is optional, we have to give up. (When generated + from \d etc by PCRE_UCP, this test will have been applied much earlier, + to the original \d etc. At this point, ptr will point to a zero byte. */ + + if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK || + strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0) + return FALSE; + + /* Do the property check. */ + + return check_char_prop(c, ptype, pdata, (next == -ESC_P) != negated); + } +#endif + default: return FALSE; } + /* In principle, support for Unicode properties should be integrated here as + well. It means re-organizing the above code so as to get hold of the property + values before switching on the op-code. However, I wonder how many patterns + combine ASCII \d etc with Unicode properties? (Note that if PCRE_UCP is set, + these op-codes are never generated.) */ + case OP_DIGIT: return next == -ESC_D || next == -ESC_s || next == -ESC_W || - next == -ESC_h || next == -ESC_v; + next == -ESC_h || next == -ESC_v || next == -ESC_R; case OP_NOT_DIGIT: return next == -ESC_d; case OP_WHITESPACE: - return next == -ESC_S || next == -ESC_d || next == -ESC_w; + return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R; case OP_NOT_WHITESPACE: return next == -ESC_s || next == -ESC_h || next == -ESC_v; case OP_HSPACE: - return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w; + return next == -ESC_S || next == -ESC_H || next == -ESC_d || + next == -ESC_w || next == -ESC_v || next == -ESC_R; case OP_NOT_HSPACE: return next == -ESC_h; /* Can't have \S in here because VT matches \S (Perl anomaly) */ + case OP_ANYNL: case OP_VSPACE: return next == -ESC_V || next == -ESC_d || next == -ESC_w; case OP_NOT_VSPACE: - return next == -ESC_v; + return next == -ESC_v || next == -ESC_R; case OP_WORDCHAR: - return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v; + return next == -ESC_W || next == -ESC_s || next == -ESC_h || + next == -ESC_v || next == -ESC_R; case OP_NOT_WORDCHAR: return next == -ESC_w || next == -ESC_d; @@ -2685,6 +2922,7 @@ BOOL inescq = FALSE; BOOL groupsetfirstbyte = FALSE; const uschar *ptr = *ptrptr; const uschar *tempptr; +const uschar *nestptr = NULL; uschar *previous = NULL; uschar *previous_callout = NULL; uschar *save_hwm = NULL; @@ -2755,6 +2993,16 @@ for (;; ptr++) c = *ptr; + /* If we are at the end of a nested substitution, revert to the outer level + string. Nesting only happens one level deep. */ + + if (c == 0 && nestptr != NULL) + { + ptr = nestptr; + nestptr = NULL; + c = *ptr; + } + /* If we are in the pre-compile phase, accumulate the length used for the previous cycle of this loop. */ @@ -2785,7 +3033,7 @@ for (;; ptr++) goto FAILED; } - *lengthptr += code - last_code; + *lengthptr += (int)(code - last_code); DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c)); /* If "previous" is set and it is not at the start of the work space, move @@ -2903,7 +3151,7 @@ for (;; ptr++) *errorcodeptr = ERR20; goto FAILED; } - *lengthptr += code - last_code; /* To include callout length */ + *lengthptr += (int)(code - last_code); /* To include callout length */ DPRINTF((">> end branch\n")); } return TRUE; @@ -3108,7 +3356,7 @@ for (;; ptr++) ptr++; } - posix_class = check_posix_name(ptr, tempptr - ptr); + posix_class = check_posix_name(ptr, (int)(tempptr - ptr)); if (posix_class < 0) { *errorcodeptr = ERR30; @@ -3122,10 +3370,25 @@ for (;; ptr++) if ((options & PCRE_CASELESS) != 0 && posix_class <= 2) posix_class = 0; - /* We build the bit map for the POSIX class in a chunk of local store - because we may be adding and subtracting from it, and we don't want to - subtract bits that may be in the main map already. At the end we or the - result into the bit map that is being built. */ + /* When PCRE_UCP is set, some of the POSIX classes are converted to + different escape sequences that use Unicode properties. */ + +#ifdef SUPPORT_UCP + if ((options & PCRE_UCP) != 0) + { + int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0); + if (posix_substitutes[pc] != NULL) + { + nestptr = tempptr + 1; + ptr = posix_substitutes[pc] - 1; + continue; + } + } +#endif + /* In the non-UCP case, we build the bit map for the POSIX class in a + chunk of local store because we may be adding and subtracting from it, + and we don't want to subtract bits that may be in the main map already. + At the end we or the result into the bit map that is being built. */ posix_class *= 3; @@ -3169,19 +3432,18 @@ for (;; ptr++) /* Backslash may introduce a single character, or it may introduce one of the specials, which just set a flag. The sequence \b is a special - case. Inside a class (and only there) it is treated as backspace. - Elsewhere it marks a word boundary. Other escapes have preset maps ready - to 'or' into the one we are building. We assume they have more than one - character in them, so set class_charcount bigger than one. */ + case. Inside a class (and only there) it is treated as backspace. We + assume that other escapes have more than one character in them, so set + class_charcount bigger than one. Unrecognized escapes fall through and + are either treated as literal characters (by default), or are faulted if + PCRE_EXTRA is set. */ if (c == CHAR_BACKSLASH) { c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE); if (*errorcodeptr != 0) goto FAILED; - if (-c == ESC_b) c = CHAR_BS; /* \b is backspace in a class */ - else if (-c == ESC_X) c = CHAR_X; /* \X is literal X in a class */ - else if (-c == ESC_R) c = CHAR_R; /* \R is literal R in a class */ + if (-c == ESC_b) c = CHAR_BS; /* \b is backspace in a class */ else if (-c == ESC_Q) /* Handle start of quoted string */ { if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) @@ -3198,10 +3460,20 @@ for (;; ptr++) register const uschar *cbits = cd->cbits; class_charcount += 2; /* Greater than 1 is what matters */ - /* Save time by not doing this in the pre-compile phase. */ - - if (lengthptr == NULL) switch (-c) + switch (-c) { +#ifdef SUPPORT_UCP + case ESC_du: /* These are the values given for \d etc */ + case ESC_DU: /* when PCRE_UCP is set. We replace the */ + case ESC_wu: /* escape sequence with an appropriate \p */ + case ESC_WU: /* or \P to test Unicode properties instead */ + case ESC_su: /* of the default ASCII testing. */ + case ESC_SU: + nestptr = ptr; + ptr = substitutes[-c - ESC_DU] - 1; /* Just before substitute */ + class_charcount -= 2; /* Undo! */ + continue; +#endif case ESC_d: for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit]; continue; @@ -3231,20 +3503,7 @@ for (;; ptr++) classbits[1] |= 0x08; /* Perl 5.004 onwards omits VT from \s */ continue; - default: /* Not recognized; fall through */ - break; /* Need "default" setting to stop compiler warning. */ - } - - /* In the pre-compile phase, just do the recognition. */ - - else if (c == -ESC_d || c == -ESC_D || c == -ESC_w || - c == -ESC_W || c == -ESC_s || c == -ESC_S) continue; - - /* We need to deal with \H, \h, \V, and \v in both phases because - they use extra memory. */ - - if (-c == ESC_h) - { + case ESC_h: SETBIT(classbits, 0x09); /* VT */ SETBIT(classbits, 0x20); /* SPACE */ SETBIT(classbits, 0xa0); /* NSBP */ @@ -3268,10 +3527,8 @@ for (;; ptr++) } #endif continue; - } - if (-c == ESC_H) - { + case ESC_H: for (c = 0; c < 32; c++) { int x = 0xff; @@ -3313,10 +3570,8 @@ for (;; ptr++) } #endif continue; - } - if (-c == ESC_v) - { + case ESC_v: SETBIT(classbits, 0x0a); /* LF */ SETBIT(classbits, 0x0b); /* VT */ SETBIT(classbits, 0x0c); /* FF */ @@ -3332,10 +3587,8 @@ for (;; ptr++) } #endif continue; - } - if (-c == ESC_V) - { + case ESC_V: for (c = 0; c < 32; c++) { int x = 0xff; @@ -3365,38 +3618,38 @@ for (;; ptr++) } #endif continue; - } - - /* We need to deal with \P and \p in both phases. */ #ifdef SUPPORT_UCP - if (-c == ESC_p || -c == ESC_P) - { - BOOL negated; - int pdata; - int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr); - if (ptype < 0) goto FAILED; - class_utf8 = TRUE; - *class_utf8data++ = ((-c == ESC_p) != negated)? - XCL_PROP : XCL_NOTPROP; - *class_utf8data++ = ptype; - *class_utf8data++ = pdata; - class_charcount -= 2; /* Not a < 256 character */ - continue; - } + case ESC_p: + case ESC_P: + { + BOOL negated; + int pdata; + int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr); + if (ptype < 0) goto FAILED; + class_utf8 = TRUE; + *class_utf8data++ = ((-c == ESC_p) != negated)? + XCL_PROP : XCL_NOTPROP; + *class_utf8data++ = ptype; + *class_utf8data++ = pdata; + class_charcount -= 2; /* Not a < 256 character */ + continue; + } #endif - /* Unrecognized escapes are faulted if PCRE is running in its - strict mode. By default, for compatibility with Perl, they are - treated as literals. */ + /* Unrecognized escapes are faulted if PCRE is running in its + strict mode. By default, for compatibility with Perl, they are + treated as literals. */ - if ((options & PCRE_EXTRA) != 0) - { - *errorcodeptr = ERR7; - goto FAILED; + default: + if ((options & PCRE_EXTRA) != 0) + { + *errorcodeptr = ERR7; + goto FAILED; + } + class_charcount -= 2; /* Undo the default count from above */ + c = *ptr; /* Get the final character and fall through */ + break; } - - class_charcount -= 2; /* Undo the default count from above */ - c = *ptr; /* Get the final character and fall through */ } /* Fall through if we have a single character (c >= 0). This may be @@ -3466,14 +3719,11 @@ for (;; ptr++) d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE); if (*errorcodeptr != 0) goto FAILED; - /* \b is backspace; \X is literal X; \R is literal R; any other - special means the '-' was literal */ + /* \b is backspace; any other special means the '-' was literal */ if (d < 0) { - if (d == -ESC_b) d = CHAR_BS; - else if (d == -ESC_X) d = CHAR_X; - else if (d == -ESC_R) d = CHAR_R; else + if (d == -ESC_b) d = CHAR_BS; else { ptr = oldptr; goto LONE_SINGLE_CHARACTER; /* A few lines below */ @@ -3639,35 +3889,23 @@ for (;; ptr++) } } - /* Loop until ']' reached. This "while" is the end of the "do" above. */ + /* Loop until ']' reached. This "while" is the end of the "do" far above. + If we are at the end of an internal nested string, revert to the outer + string. */ + + while (((c = *(++ptr)) != 0 || + (nestptr != NULL && + (ptr = nestptr, nestptr = NULL, c = *(++ptr)) != 0)) && + (c != CHAR_RIGHT_SQUARE_BRACKET || inescq)); - while ((c = *(++ptr)) != 0 && (c != CHAR_RIGHT_SQUARE_BRACKET || inescq)); + /* Check for missing terminating ']' */ - if (c == 0) /* Missing terminating ']' */ + if (c == 0) { *errorcodeptr = ERR6; goto FAILED; } - -/* This code has been disabled because it would mean that \s counts as -an explicit \r or \n reference, and that's not really what is wanted. Now -we set the flag only if there is a literal "\r" or "\n" in the class. */ - -#if 0 - /* Remember whether \r or \n are in this class */ - - if (negate_class) - { - if ((classbits[1] & 0x24) != 0x24) cd->external_flags |= PCRE_HASCRORLF; - } - else - { - if ((classbits[1] & 0x24) != 0) cd->external_flags |= PCRE_HASCRORLF; - } -#endif - - /* If class_charcount is 1, we saw precisely one character whose value is less than 256. As long as there were no characters >= 128 and there was no use of \p or \P, in other words, no use of any XCLASS features, we can @@ -3731,13 +3969,14 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ /* If there are characters with values > 255, we have to compile an extended class, with its own opcode, unless there was a negated special - such as \S in the class, because in that case all characters > 255 are in - the class, so any that were explicitly given as well can be ignored. If - (when there are explicit characters > 255 that must be listed) there are no - characters < 256, we can omit the bitmap in the actual compiled code. */ + such as \S in the class, and PCRE_UCP is not set, because in that case all + characters > 255 are in the class, so any that were explicitly given as + well can be ignored. If (when there are explicit characters > 255 that must + be listed) there are no characters < 256, we can omit the bitmap in the + actual compiled code. */ #ifdef SUPPORT_UTF8 - if (class_utf8 && !should_flip_negation) + if (class_utf8 && (!should_flip_negation || (options & PCRE_UCP) != 0)) { *class_utf8data++ = XCL_END; /* Marks the end of extra data */ *code++ = OP_XCLASS; @@ -3763,10 +4002,11 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ } #endif - /* If there are no characters > 255, set the opcode to OP_CLASS or - OP_NCLASS, depending on whether the whole class was negated and whether - there were negative specials such as \S in the class. Then copy the 32-byte - map into the code vector, negating it if necessary. */ + /* If there are no characters > 255, or they are all to be included or + excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the + whole class was negated and whether there were negative specials such as \S + (non-UCP) in the class. Then copy the 32-byte map into the code vector, + negating it if necessary. */ *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; if (negate_class) @@ -3890,8 +4130,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ if (!possessive_quantifier && repeat_max < 0 && - check_auto_possessive(*previous, c, utf8, utf8_char, ptr + 1, - options, cd)) + check_auto_possessive(previous, utf8, ptr + 1, options, cd)) { repeat_type = 0; /* Force greedy */ possessive_quantifier = TRUE; @@ -3912,7 +4151,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ c = previous[1]; if (!possessive_quantifier && repeat_max < 0 && - check_auto_possessive(OP_NOT, c, utf8, NULL, ptr + 1, options, cd)) + check_auto_possessive(previous, utf8, ptr + 1, options, cd)) { repeat_type = 0; /* Force greedy */ possessive_quantifier = TRUE; @@ -3936,7 +4175,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ if (!possessive_quantifier && repeat_max < 0 && - check_auto_possessive(c, 0, utf8, NULL, ptr + 1, options, cd)) + check_auto_possessive(previous, utf8, ptr + 1, options, cd)) { repeat_type = 0; /* Force greedy */ possessive_quantifier = TRUE; @@ -4146,7 +4385,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ { register int i; int ketoffset = 0; - int len = code - previous; + int len = (int)(code - previous); uschar *bralink = NULL; /* Repeating a DEFINE group is pointless */ @@ -4167,7 +4406,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ { register uschar *ket = previous; do ket += GET(ket, 1); while (*ket != OP_KET); - ketoffset = code - ket; + ketoffset = (int)(code - ket); } /* The case of a zero minimum is special because of the need to stick @@ -4235,7 +4474,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ /* We chain together the bracket offset fields that have to be filled in later when the ends of the brackets are reached. */ - offset = (bralink == NULL)? 0 : previous - bralink; + offset = (bralink == NULL)? 0 : (int)(previous - bralink); bralink = previous; PUTINC(previous, 0, offset); } @@ -4344,7 +4583,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ { int offset; *code++ = OP_BRA; - offset = (bralink == NULL)? 0 : code - bralink; + offset = (bralink == NULL)? 0 : (int)(code - bralink); bralink = code; PUTINC(code, 0, offset); } @@ -4365,7 +4604,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ while (bralink != NULL) { int oldlinkoffset; - int offset = code - bralink + 1; + int offset = (int)(code - bralink + 1); uschar *bra = code - offset; oldlinkoffset = GET(bra, 1); bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; @@ -4453,7 +4692,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ #endif } - len = code - tempcode; + len = (int)(code - tempcode); if (len > 0) switch (*tempcode) { case OP_STAR: *tempcode = OP_POSSTAR; break; @@ -4512,24 +4751,34 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ /* First deal with various "verbs" that can be introduced by '*'. */ - if (*(++ptr) == CHAR_ASTERISK && (cd->ctypes[ptr[1]] & ctype_letter) != 0) + if (*(++ptr) == CHAR_ASTERISK && + ((cd->ctypes[ptr[1]] & ctype_letter) != 0 || ptr[1] == ':')) { int i, namelen; + int arglen = 0; const char *vn = verbnames; - const uschar *name = ++ptr; + const uschar *name = ptr + 1; + const uschar *arg = NULL; previous = NULL; while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {}; + namelen = (int)(ptr - name); + if (*ptr == CHAR_COLON) { - *errorcodeptr = ERR59; /* Not supported */ - goto FAILED; + arg = ++ptr; + while ((cd->ctypes[*ptr] & (ctype_letter|ctype_digit)) != 0 + || *ptr == '_') ptr++; + arglen = (int)(ptr - arg); } + if (*ptr != CHAR_RIGHT_PARENTHESIS) { *errorcodeptr = ERR60; goto FAILED; } - namelen = ptr - name; + + /* Scan the table of verb names */ + for (i = 0; i < verbcount; i++) { if (namelen == verbs[i].len && @@ -4547,13 +4796,41 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ PUT2INC(code, 0, oc->number); } } - *code++ = verbs[i].op; - break; + + /* Handle the cases with/without an argument */ + + if (arglen == 0) + { + if (verbs[i].op < 0) /* Argument is mandatory */ + { + *errorcodeptr = ERR66; + goto FAILED; + } + *code++ = verbs[i].op; + } + + else + { + if (verbs[i].op_arg < 0) /* Argument is forbidden */ + { + *errorcodeptr = ERR59; + goto FAILED; + } + *code++ = verbs[i].op_arg; + *code++ = arglen; + memcpy(code, arg, arglen); + code += arglen; + *code++ = 0; + } + + break; /* Found verb, exit loop */ } + vn += verbs[i].len + 1; } - if (i < verbcount) continue; - *errorcodeptr = ERR60; + + if (i < verbcount) continue; /* Successfully handled a verb */ + *errorcodeptr = ERR60; /* Verb not recognized */ goto FAILED; } @@ -4672,7 +4949,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ recno * 10 + *ptr - CHAR_0 : -1; ptr++; } - namelen = ptr - name; + namelen = (int)(ptr - name); if ((terminator > 0 && *ptr++ != terminator) || *ptr++ != CHAR_RIGHT_PARENTHESIS) @@ -4868,8 +5145,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ goto FAILED; } *code++ = n; - PUT(code, 0, ptr - cd->start_pattern + 1); /* Pattern offset */ - PUT(code, LINK_SIZE, 0); /* Default length */ + PUT(code, 0, (int)(ptr - cd->start_pattern + 1)); /* Pattern offset */ + PUT(code, LINK_SIZE, 0); /* Default length */ code += 2 * LINK_SIZE; } previous = NULL; @@ -4902,7 +5179,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ name = ++ptr; while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++; - namelen = ptr - name; + namelen = (int)(ptr - name); /* In the pre-compile phase, just do a syntax check. */ @@ -5032,7 +5309,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ NAMED_REF_OR_RECURSE: name = ++ptr; while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++; - namelen = ptr - name; + namelen = (int)(ptr - name); /* In the pre-compile phase, do a syntax check and set a dummy reference number. */ @@ -5201,7 +5478,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ of the group. */ called = cd->start_code + recno; - PUTINC(cd->hwm, 0, code + 2 + LINK_SIZE - cd->start_code); + PUTINC(cd->hwm, 0, (int)(code + 2 + LINK_SIZE - cd->start_code)); } /* If not a forward reference, and the subpattern is still open, @@ -5225,7 +5502,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ code += 1 + LINK_SIZE; *code = OP_RECURSE; - PUT(code, 1, called - cd->start_code); + PUT(code, 1, (int)(called - cd->start_code)); code += 1 + LINK_SIZE; *code = OP_KET; @@ -5336,8 +5613,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ } /* End of switch for character following (? */ } /* End of (? handling */ - /* Opening parenthesis not followed by '?'. If PCRE_NO_AUTO_CAPTURE is set, - all unadorned brackets become non-capturing and behave like (?:...) + /* Opening parenthesis not followed by '*' or '?'. If PCRE_NO_AUTO_CAPTURE + is set, all unadorned brackets become non-capturing and behave like (?:...) brackets. */ else if ((options & PCRE_NO_AUTO_CAPTURE) != 0) @@ -5529,11 +5806,12 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ /* ===================================================================*/ /* Handle metasequences introduced by \. For ones like \d, the ESC_ values - are arranged to be the negation of the corresponding OP_values. For the - back references, the values are ESC_REF plus the reference number. Only - back references and those types that consume a character may be repeated. - We can test for values between ESC_b and ESC_Z for the latter; this may - have to change if any new ones are ever created. */ + are arranged to be the negation of the corresponding OP_values in the + default case when PCRE_UCP is not set. For the back references, the values + are ESC_REF plus the reference number. Only back references and those types + that consume a character may be repeated. We can test for values between + ESC_b and ESC_Z for the latter; this may have to change if any new ones are + ever created. */ case CHAR_BACKSLASH: tempptr = ptr; @@ -5693,12 +5971,24 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ #endif /* For the rest (including \X when Unicode properties are supported), we - can obtain the OP value by negating the escape value. */ + can obtain the OP value by negating the escape value in the default + situation when PCRE_UCP is not set. When it *is* set, we substitute + Unicode property tests. */ else { - previous = (-c > ESC_b && -c < ESC_Z)? code : NULL; - *code++ = -c; +#ifdef SUPPORT_UCP + if (-c >= ESC_DU && -c <= ESC_wu) + { + nestptr = ptr + 1; /* Where to resume */ + ptr = substitutes[-c - ESC_DU] - 1; /* Just before substitute */ + } + else +#endif + { + previous = (-c > ESC_b && -c < ESC_Z)? code : NULL; + *code++ = -c; + } } continue; } @@ -6030,7 +6320,7 @@ for (;;) { if (lengthptr == NULL) { - int branch_length = code - last_branch; + int branch_length = (int)(code - last_branch); do { int prev_length = GET(last_branch, 1); @@ -6044,7 +6334,7 @@ for (;;) /* Fill in the ket */ *code = OP_KET; - PUT(code, 1, code - start_bracket); + PUT(code, 1, (int)(code - start_bracket)); code += 1 + LINK_SIZE; /* If it was a capturing subpattern, check to see if it contained any @@ -6059,9 +6349,9 @@ for (;;) code - start_bracket); *start_bracket = OP_ONCE; code += 1 + LINK_SIZE; - PUT(start_bracket, 1, code - start_bracket); + PUT(start_bracket, 1, (int)(code - start_bracket)); *code = OP_KET; - PUT(code, 1, code - start_bracket); + PUT(code, 1, (int)(code - start_bracket)); code += 1 + LINK_SIZE; length += 2 + 2*LINK_SIZE; } @@ -6116,7 +6406,7 @@ for (;;) else { *code = OP_ALT; - PUT(code, 1, code - last_branch); + PUT(code, 1, (int)(code - last_branch)); bc.current_branch = last_branch = code; code += 1 + LINK_SIZE; } @@ -6435,7 +6725,7 @@ int length = 1; /* For final END opcode */ int firstbyte, reqbyte, newline; int errorcode = 0; int skipatstart = 0; -BOOL utf8 = (options & PCRE_UTF8) != 0; +BOOL utf8; size_t size; uschar *code; const uschar *codestart; @@ -6505,6 +6795,8 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0) { skipatstart += 7; options |= PCRE_UTF8; continue; } + else if (strncmp((char *)(ptr+skipatstart+2), STRING_UCP_RIGHTPAR, 4) == 0) + { skipatstart += 6; options |= PCRE_UCP; continue; } if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0) { skipatstart += 5; newnl = PCRE_NEWLINE_CR; } @@ -6529,6 +6821,8 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && else break; } +utf8 = (options & PCRE_UTF8) != 0; + /* Can't support UTF8 unless PCRE has been compiled to include the code. */ #ifdef SUPPORT_UTF8 @@ -6546,6 +6840,16 @@ if (utf8) } #endif +/* Can't support UCP unless PCRE has been compiled to include the code. */ + +#ifndef SUPPORT_UCP +if ((options & PCRE_UCP) != 0) + { + errorcode = ERR67; + goto PCRE_EARLY_ERROR_RETURN; + } +#endif + /* Check validity of \R options. */ switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) @@ -6674,7 +6978,7 @@ regex compiled on a system with 4-byte pointers is run on another with 8-byte pointers. */ re->magic_number = MAGIC_NUMBER; -re->size = size; +re->size = (int)size; re->options = cd->external_options; re->flags = cd->external_flags; re->dummy1 = 0; @@ -6745,7 +7049,7 @@ while (errorcode == 0 && cd->hwm > cworkspace) recno = GET(codestart, offset); groupptr = _pcre_find_bracket(codestart, utf8, recno); if (groupptr == NULL) errorcode = ERR53; - else PUT(((uschar *)codestart), offset, groupptr - codestart); + else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart)); } /* Give an error if there's back reference to a non-existent capturing @@ -6800,7 +7104,7 @@ if (errorcode != 0) { (pcre_free)(re); PCRE_EARLY_ERROR_RETURN: - *erroroffset = ptr - (const uschar *)pattern; + *erroroffset = (int)(ptr - (const uschar *)pattern); PCRE_EARLY_ERROR_RETURN2: *errorptr = find_error_text(errorcode); if (errorcodeptr != NULL) *errorcodeptr = errorcode; |