1 files changed, 319 insertions, 173 deletions
diff --git a/ext/pcre/pcrelib/pcre_compile.c b/ext/pcre/pcrelib/pcre_compile.c
index 77ec51cace..ef33cfc388 100644
--- a/ext/pcre/pcrelib/pcre_compile.c
+++ b/ext/pcre/pcrelib/pcre_compile.c
@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
 supporting internal functions that are not used by other modules. */
 
 
-#include <config.h>
+#include "config.h"
 
 #define NLBLOCK cd             /* Block containing newline information */
 #define PSSTART start_pattern  /* Field containing processed string start */
@@ -138,35 +138,47 @@ static const short int escapes[] = {
 #endif
 
 
-/* Table of special "verbs" like (*PRUNE) */
+/* Table of special "verbs" like (*PRUNE). This is a short table, so it is
+searched linearly. Put all the names into a single string, in order to reduce
+the number of relocations when a shared library is dynamically linked. */
 
 typedef struct verbitem {
-  const char *name;
   int   len;
   int   op;
 } verbitem;
 
+static const char verbnames[] =
+  "ACCEPT\0"
+  "COMMIT\0"
+  "F\0"
+  "FAIL\0"
+  "PRUNE\0"
+  "SKIP\0"
+  "THEN";
+
 static verbitem verbs[] = {
-  { "ACCEPT", 6, OP_ACCEPT },
-  { "COMMIT", 6, OP_COMMIT },
-  { "F",      1, OP_FAIL },
-  { "FAIL",   4, OP_FAIL },
-  { "PRUNE",  5, OP_PRUNE },
-  { "SKIP",   4, OP_SKIP  },
-  { "THEN",   4, OP_THEN  }
+  { 6, OP_ACCEPT },
+  { 6, OP_COMMIT },
+  { 1, OP_FAIL },
+  { 4, OP_FAIL },
+  { 5, OP_PRUNE },
+  { 4, OP_SKIP  },
+  { 4, OP_THEN  }
 };
 
 static int verbcount = sizeof(verbs)/sizeof(verbitem);
 
 
-/* Tables of names of POSIX character classes and their lengths. The list is
-terminated by a zero length entry. The first three must be alpha, lower, upper,
-as this is assumed for handling case independence. */
+/* Tables of names of POSIX character classes and their lengths. The names are
+now all in a single string, to reduce the number of relocations when a shared
+library is dynamically loaded. The list of lengths is terminated by a zero
+length entry. The first three must be alpha, lower, upper, as this is assumed
+for handling case independence. */
 
-static const char *const posix_names[] = {
-  "alpha", "lower", "upper",
-  "alnum", "ascii", "blank", "cntrl", "digit", "graph",
-  "print", "punct", "space", "word",  "xdigit" };
+static const char posix_names[] =
+  "alpha\0"  "lower\0"  "upper\0"  "alnum\0"  "ascii\0"  "blank\0"
+  "cntrl\0"  "digit\0"  "graph\0"  "print\0"  "punct\0"  "space\0"
+  "word\0"   "xdigit";
 
 static const uschar posix_name_lengths[] = {
   5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
@@ -205,84 +217,90 @@ static const int posix_class_maps[] = {
 /* The texts of compile-time error messages. These are "char *" because they
 are passed to the outside world. Do not ever re-use any error number, because
 they are documented. Always add a new error instead. Messages marked DEAD below
-are no longer used. */
-
-static const char *error_texts[] = {
-  "no error",
-  "\\ at end of pattern",
-  "\\c at end of pattern",
-  "unrecognized character follows \\",
-  "numbers out of order in {} quantifier",
+are no longer used. This used to be a table of strings, but in order to reduce
+the number of relocations needed when a shared library is loaded dynamically,
+it is now one long string. We cannot use a table of offsets, because the
+lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we
+simply count through to the one we want - this isn't a performance issue
+because these strings are used only when there is a compilation error. */
+
+static const char error_texts[] =
+  "no error\0"
+  "\\ at end of pattern\0"
+  "\\c at end of pattern\0"
+  "unrecognized character follows \\\0"
+  "numbers out of order in {} quantifier\0"
   /* 5 */
-  "number too big in {} quantifier",
-  "missing terminating ] for character class",
-  "invalid escape sequence in character class",
-  "range out of order in character class",
-  "nothing to repeat",
+  "number too big in {} quantifier\0"
+  "missing terminating ] for character class\0"
+  "invalid escape sequence in character class\0"
+  "range out of order in character class\0"
+  "nothing to repeat\0"
   /* 10 */
-  "operand of unlimited repeat could match the empty string",  /** DEAD **/
-  "internal error: unexpected repeat",
-  "unrecognized character after (?",
-  "POSIX named classes are supported only within a class",
-  "missing )",
+  "operand of unlimited repeat could match the empty string\0"  /** DEAD **/
+  "internal error: unexpected repeat\0"
+  "unrecognized character after (? or (?-\0"
+  "POSIX named classes are supported only within a class\0"
+  "missing )\0"
   /* 15 */
-  "reference to non-existent subpattern",
-  "erroffset passed as NULL",
-  "unknown option bit(s) set",
-  "missing ) after comment",
-  "parentheses nested too deeply",  /** DEAD **/
+  "reference to non-existent subpattern\0"
+  "erroffset passed as NULL\0"
+  "unknown option bit(s) set\0"
+  "missing ) after comment\0"
+  "parentheses nested too deeply\0"  /** DEAD **/
   /* 20 */
-  "regular expression is too large",
-  "failed to get memory",
-  "unmatched parentheses",
-  "internal error: code overflow",
-  "unrecognized character after (?<",
+  "regular expression is too large\0"
+  "failed to get memory\0"
+  "unmatched parentheses\0"
+  "internal error: code overflow\0"
+  "unrecognized character after (?<\0"
   /* 25 */
-  "lookbehind assertion is not fixed length",
-  "malformed number or name after (?(",
-  "conditional group contains more than two branches",
-  "assertion expected after (?(",
-  "(?R or (?[+-]digits must be followed by )",
+  "lookbehind assertion is not fixed length\0"
+  "malformed number or name after (?(\0"
+  "conditional group contains more than two branches\0"
+  "assertion expected after (?(\0"
+  "(?R or (?[+-]digits must be followed by )\0"
   /* 30 */
-  "unknown POSIX class name",
-  "POSIX collating elements are not supported",
-  "this version of PCRE is not compiled with PCRE_UTF8 support",
-  "spare error",  /** DEAD **/
-  "character value in \\x{...} sequence is too large",
+  "unknown POSIX class name\0"
+  "POSIX collating elements are not supported\0"
+  "this version of PCRE is not compiled with PCRE_UTF8 support\0"
+  "spare error\0"  /** DEAD **/
+  "character value in \\x{...} sequence is too large\0"
   /* 35 */
-  "invalid condition (?(0)",
-  "\\C not allowed in lookbehind assertion",
-  "PCRE does not support \\L, \\l, \\N, \\U, or \\u",
-  "number after (?C is > 255",
-  "closing ) for (?C expected",
+  "invalid condition (?(0)\0"
+  "\\C not allowed in lookbehind assertion\0"
+  "PCRE does not support \\L, \\l, \\N, \\U, or \\u\0"
+  "number after (?C is > 255\0"
+  "closing ) for (?C expected\0"
   /* 40 */
-  "recursive call could loop indefinitely",
-  "unrecognized character after (?P",
-  "syntax error in subpattern name (missing terminator)",
-  "two named subpatterns have the same name",
-  "invalid UTF-8 string",
+  "recursive call could loop indefinitely\0"
+  "unrecognized character after (?P\0"
+  "syntax error in subpattern name (missing terminator)\0"
+  "two named subpatterns have the same name\0"
+  "invalid UTF-8 string\0"
   /* 45 */
-  "support for \\P, \\p, and \\X has not been compiled",
-  "malformed \\P or \\p sequence",
-  "unknown property name after \\P or \\p",
-  "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)",
-  "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")",
+  "support for \\P, \\p, and \\X has not been compiled\0"
+  "malformed \\P or \\p sequence\0"
+  "unknown property name after \\P or \\p\0"
+  "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
+  "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
   /* 50 */
-  "repeated subpattern is too long",    /** DEAD **/
-  "octal value is greater than \\377 (not in UTF-8 mode)",
-  "internal error: overran compiling workspace",
-  "internal error: previously-checked referenced subpattern not found",
-  "DEFINE group contains more than one branch",
+  "repeated subpattern is too long\0"    /** DEAD **/
+  "octal value is greater than \\377 (not in UTF-8 mode)\0"
+  "internal error: overran compiling workspace\0"
+  "internal error: previously-checked referenced subpattern not found\0"
+  "DEFINE group contains more than one branch\0"
   /* 55 */
-  "repeating a DEFINE group is not allowed",
-  "inconsistent NEWLINE options",
-  "\\g is not followed by a braced name or an optionally braced non-zero number",
-  "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number",
-  "(*VERB) with an argument is not supported",
+  "repeating a DEFINE group is not allowed\0"
+  "inconsistent NEWLINE options\0"
+  "\\g is not followed by a braced name or an optionally braced non-zero number\0"
+  "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number\0"
+  "(*VERB) with an argument is not supported\0"
   /* 60 */
-  "(*VERB) not recognized",
-  "number is too big"
-};
+  "(*VERB) not recognized\0"
+  "number is too big\0"
+  "subpattern name expected\0"
+  "digit expected after (?+";
 
 
 /* Table to identify digits and hex digits. This is used when compiling
@@ -418,6 +436,28 @@ static BOOL
 
 
 /*************************************************
+*            Find an error text                  *
+*************************************************/
+
+/* The error texts are now all in one long string, to save on relocations. As
+some of the text is of unknown length, we can't use a table of offsets.
+Instead, just count through the strings. This is not a performance issue
+because it happens only when there has been a compilation error.
+
+Argument:   the error number
+Returns:    pointer to the error string
+*/
+
+static const char *
+find_error_text(int n)
+{
+const char *s = error_texts;
+for (; n > 0; n--) while (*s++ != 0);
+return s;
+}
+
+
+/*************************************************
 *            Handle escapes                      *
 *************************************************/
 
@@ -456,16 +496,16 @@ ptr--;                            /* Set pointer back to the last byte */
 
 if (c == 0) *errorcodeptr = ERR1;
 
-/* Non-alphamerics are literals. For digits or letters, do an initial lookup in
-a table. A non-zero result is something that can be returned immediately.
+/* Non-alphanumerics are literals. For digits or letters, do an initial lookup
+in a table. A non-zero result is something that can be returned immediately.
 Otherwise further processing may be required. */
 
 #ifndef EBCDIC  /* ASCII coding */
-else if (c < '0' || c > 'z') {}                           /* Not alphameric */
+else if (c < '0' || c > 'z') {}                           /* Not alphanumeric */
 else if ((i = escapes[c - '0']) != 0) c = i;
 
 #else           /* EBCDIC coding */
-else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */
+else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */
 else if ((i = escapes[c - 0x48]) != 0)  c = i;
 #endif
 
@@ -682,10 +722,10 @@ else
     break;
 
     /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
-    other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
-    for Perl compatibility, it is a literal. This code looks a bit odd, but
-    there used to be some cases other than the default, and there may be again
-    in future, so I haven't "optimized" it. */
+    other alphanumeric following \ is an error if PCRE_EXTRA was set;
+    otherwise, for Perl compatibility, it is a literal. This code looks a bit
+    odd, but there used to be some cases other than the default, and there may
+    be again in future, so I haven't "optimized" it. */
 
     default:
     if ((options & PCRE_EXTRA) != 0) switch(c)
@@ -774,7 +814,7 @@ top = _pcre_utt_size;
 while (bot < top)
   {
   i = (bot + top) >> 1;
-  c = strcmp(name, _pcre_utt[i].name);
+  c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset);
   if (c == 0)
     {
     *dptr = _pcre_utt[i].value;
@@ -1466,8 +1506,9 @@ for (;;)
 can match the empty string or not. It is called from could_be_empty()
 below and from compile_branch() when checking for an unlimited repeat of a
 group that can match nothing. Note that first_significant_code() skips over
-assertions. If we hit an unclosed bracket, we return "empty" - this means we've
-struck an inner bracket whose current branch will already have been scanned.
+backward and negative forward assertions when its final argument is TRUE. If we
+hit an unclosed bracket, we return "empty" - this means we've struck an inner
+bracket whose current branch will already have been scanned.
 
 Arguments:
   code        points to start of search
@@ -1489,6 +1530,16 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
 
   c = *code;
 
+  /* Skip over forward assertions; the other assertions are skipped by
+  first_significant_code() with a TRUE final argument. */
+
+  if (c == OP_ASSERT)
+    {
+    do code += GET(code, 1); while (*code == OP_ALT);
+    c = *code;
+    continue;
+    }
+
   /* Groups with zero repeats can of course be empty; skip them. */
 
   if (c == OP_BRAZERO || c == OP_BRAMINZERO)
@@ -1684,29 +1735,48 @@ return TRUE;
 *************************************************/
 
 /* This function is called when the sequence "[:" or "[." or "[=" is
-encountered in a character class. It checks whether this is followed by an
-optional ^ and then a sequence of letters, terminated by a matching ":]" or
-".]" or "=]".
+encountered in a character class. It checks whether this is followed by a
+sequence of characters terminated by a matching ":]" or ".]" or "=]". If we
+reach an unescaped ']' without the special preceding character, return FALSE.
+
+Originally, this function only recognized a sequence of letters between the
+terminators, but it seems that Perl recognizes any sequence of characters,
+though of course unknown POSIX names are subsequently rejected. Perl gives an
+"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE
+didn't consider this to be a POSIX class. Likewise for [:1234:].
+
+The problem in trying to be exactly like Perl is in the handling of escapes. We
+have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
+class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
+below handles the special case of \], but does not try to do any other escape
+processing. This makes it different from Perl for cases such as [:l\ower:]
+where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
+"l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
+I think.
 
-Argument:
+Arguments:
   ptr      pointer to the initial [
   endptr   where to return the end pointer
-  cd       pointer to compile data
 
 Returns:   TRUE or FALSE
 */
 
 static BOOL
-check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)
+check_posix_syntax(const uschar *ptr, const uschar **endptr)
 {
 int terminator;          /* Don't combine these lines; the Solaris cc */
 terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
-if (*(++ptr) == '^') ptr++;
-while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
-if (*ptr == terminator && ptr[1] == ']')
+for (++ptr; *ptr != 0; ptr++)
   {
-  *endptr = ptr;
-  return TRUE;
+  if (*ptr == '\\' && ptr[1] == ']') ptr++; else
+    {
+    if (*ptr == ']') return FALSE;
+    if (*ptr == terminator && ptr[1] == ']')
+      {
+      *endptr = ptr;
+      return TRUE;
+      }
+    }
   }
 return FALSE;
 }
@@ -1731,11 +1801,13 @@ Returns:     a value representing the name, or -1 if unknown
 static int
 check_posix_name(const uschar *ptr, int len)
 {
+const char *pn = posix_names;
 register int yield = 0;
 while (posix_name_lengths[yield] != 0)
   {
   if (len == posix_name_lengths[yield] &&
-    strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;
+    strncmp((const char *)ptr, pn, len) == 0) return yield;
+  pn += posix_name_lengths[yield] + 1;
   yield++;
   }
 return -1;
@@ -2341,6 +2413,7 @@ req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
 for (;; ptr++)
   {
   BOOL negate_class;
+  BOOL should_flip_negation;
   BOOL possessive_quantifier;
   BOOL is_quantifier;
   BOOL is_recurse;
@@ -2564,7 +2637,7 @@ for (;; ptr++)
     they are encountered at the top level, so we'll do that too. */
 
     if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
-        check_posix_syntax(ptr, &tempptr, cd))
+        check_posix_syntax(ptr, &tempptr))
       {
       *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;
       goto FAILED;
@@ -2589,6 +2662,12 @@ for (;; ptr++)
       else break;
       }
 
+    /* If a class contains a negative special such as \S, we need to flip the
+    negation flag at the end, so that support for characters > 255 works
+    correctly (they are all included in the class). */
+
+    should_flip_negation = FALSE;
+
     /* Keep a count of chars with values < 256 so that we can optimize the case
     of just a single character (as long as it's < 256). However, For higher
     valued UTF-8 characters, we don't yet do any optimization. */
@@ -2644,7 +2723,7 @@ for (;; ptr++)
 
       if (c == '[' &&
           (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
-          check_posix_syntax(ptr, &tempptr, cd))
+          check_posix_syntax(ptr, &tempptr))
         {
         BOOL local_negate = FALSE;
         int posix_class, taboffset, tabopt;
@@ -2661,6 +2740,7 @@ for (;; ptr++)
         if (*ptr == '^')
           {
           local_negate = TRUE;
+          should_flip_negation = TRUE;  /* Note negative special */
           ptr++;
           }
 
@@ -2735,7 +2815,7 @@ for (;; ptr++)
         c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
         if (*errorcodeptr != 0) goto FAILED;
 
-        if (-c == ESC_b) c = '\b';       /* \b is backslash in a class */
+        if (-c == ESC_b) c = '\b';       /* \b is backspace in a class */
         else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */
         else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */
         else if (-c == ESC_Q)            /* Handle start of quoted string */
@@ -2763,6 +2843,7 @@ for (;; ptr++)
             continue;
 
             case ESC_D:
+            should_flip_negation = TRUE;
             for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
             continue;
 
@@ -2771,6 +2852,7 @@ for (;; ptr++)
             continue;
 
             case ESC_W:
+            should_flip_negation = TRUE;
             for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
             continue;
 
@@ -2780,13 +2862,11 @@ for (;; ptr++)
             continue;
 
             case ESC_S:
+            should_flip_negation = TRUE;
             for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
             classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
             continue;
 
-            case ESC_E: /* Perl ignores an orphan \E */
-            continue;
-
             default:    /* Not recognized; fall through */
             break;      /* Need "default" setting to stop compiler warning. */
             }
@@ -2974,6 +3054,12 @@ for (;; ptr++)
 
       oldptr = ptr;
 
+      /* Remember \r or \n */
+
+      if (c == '\r' || c == '\n') cd->external_flags |= PCRE_HASCRORLF;
+
+      /* Check for range */
+
       if (!inescq && ptr[1] == '-')
         {
         int d;
@@ -3015,7 +3101,7 @@ for (;; ptr++)
           d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
           if (*errorcodeptr != 0) goto FAILED;
 
-          /* \b is backslash; \X is literal X; \R is literal R; any other
+          /* \b is backspace; \X is literal X; \R is literal R; any other
           special means the '-' was literal */
 
           if (d < 0)
@@ -3041,6 +3127,10 @@ for (;; ptr++)
 
         if (d == c) goto LONE_SINGLE_CHARACTER;  /* A few lines below */
 
+        /* Remember \r or \n */
+
+        if (d == '\r' || d == '\n') cd->external_flags |= PCRE_HASCRORLF;
+
         /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
         matching, we have to use an XCLASS with extra data items. Caseless
         matching for characters > 127 is available only if UCP support is
@@ -3194,16 +3284,24 @@ for (;; ptr++)
       goto FAILED;
       }
 
+
+/* This code has been disabled because it would mean that \s counts as
+an explicit \r or \n reference, and that's not really what is wanted. Now
+we set the flag only if there is a literal "\r" or "\n" in the class. */
+
+#if 0
     /* Remember whether \r or \n are in this class */
 
     if (negate_class)
       {
-      if ((classbits[1] & 0x24) != 0x24) cd->external_options |= PCRE_HASCRORLF;
+      if ((classbits[1] & 0x24) != 0x24) cd->external_flags |= PCRE_HASCRORLF;
       }
     else
       {
-      if ((classbits[1] & 0x24) != 0) cd->external_options |= PCRE_HASCRORLF;
+      if ((classbits[1] & 0x24) != 0) cd->external_flags |= PCRE_HASCRORLF;
       }
+#endif
+
 
     /* If class_charcount is 1, we saw precisely one character whose value is
     less than 256. As long as there were no characters >= 128 and there was no
@@ -3267,11 +3365,14 @@ for (;; ptr++)
     zeroreqbyte = reqbyte;
 
     /* If there are characters with values > 255, we have to compile an
-    extended class, with its own opcode. If there are no characters < 256,
-    we can omit the bitmap in the actual compiled code. */
+    extended class, with its own opcode, unless there was a negated special
+    such as \S in the class, because in that case all characters > 255 are in
+    the class, so any that were explicitly given as well can be ignored. If
+    (when there are explicit characters > 255 that must be listed) there are no
+    characters < 256, we can omit the bitmap in the actual compiled code. */
 
 #ifdef SUPPORT_UTF8
-    if (class_utf8)
+    if (class_utf8 && !should_flip_negation)
       {
       *class_utf8data++ = XCL_END;    /* Marks the end of extra data */
       *code++ = OP_XCLASS;
@@ -3297,20 +3398,19 @@ for (;; ptr++)
       }
 #endif
 
-    /* If there are no characters > 255, negate the 32-byte map if necessary,
-    and copy it into the code vector. If this is the first thing in the branch,
-    there can be no first char setting, whatever the repeat count. Any reqbyte
-    setting must remain unchanged after any kind of repeat. */
+    /* If there are no characters > 255, set the opcode to OP_CLASS or
+    OP_NCLASS, depending on whether the whole class was negated and whether
+    there were negative specials such as \S in the class. Then copy the 32-byte
+    map into the code vector, negating it if necessary. */
 
+    *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
     if (negate_class)
       {
-      *code++ = OP_NCLASS;
       if (lengthptr == NULL)    /* Save time in the pre-compile phase */
         for (c = 0; c < 32; c++) code[c] = ~classbits[c];
       }
     else
       {
-      *code++ = OP_CLASS;
       memcpy(code, classbits, 32);
       }
     code += 32;
@@ -3496,7 +3596,7 @@ for (;; ptr++)
       /* All real repeats make it impossible to handle partial matching (maybe
       one day we will be able to remove this restriction). */
 
-      if (repeat_max != 1) cd->nopartial = TRUE;
+      if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
 
       /* Combine the op_type with the repeat_type */
 
@@ -3646,7 +3746,7 @@ for (;; ptr++)
       /* All real repeats make it impossible to handle partial matching (maybe
       one day we will be able to remove this restriction). */
 
-      if (repeat_max != 1) cd->nopartial = TRUE;
+      if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
 
       if (repeat_min == 0 && repeat_max == -1)
         *code++ = OP_CRSTAR + repeat_type;
@@ -3946,7 +4046,9 @@ for (;; ptr++)
       int len;
       if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
           *tempcode == OP_NOTEXACT)
-        tempcode += _pcre_OP_lengths[*tempcode];
+        tempcode += _pcre_OP_lengths[*tempcode] +
+          ((*tempcode == OP_TYPEEXACT &&
+             (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
       len = code - tempcode;
       if (len > 0) switch (*tempcode)
         {
@@ -4004,6 +4106,7 @@ for (;; ptr++)
     if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
       {
       int i, namelen;
+      const char *vn = verbnames;
       const uschar *name = ++ptr;
       previous = NULL;
       while ((cd->ctypes[*++ptr] & ctype_letter) != 0);
@@ -4021,12 +4124,13 @@ for (;; ptr++)
       for (i = 0; i < verbcount; i++)
         {
         if (namelen == verbs[i].len &&
-            strncmp((char *)name, verbs[i].name, namelen) == 0)
+            strncmp((char *)name, vn, namelen) == 0)
           {
           *code = verbs[i].op;
           if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;
           break;
           }
+        vn += verbs[i].len + 1;
         }
       if (i < verbcount) continue;
       *errorcodeptr = ERR60;
@@ -4171,16 +4275,13 @@ for (;; ptr++)
             *errorcodeptr = ERR58;
             goto FAILED;
             }
-          if (refsign == '-')
+          recno = (refsign == '-')?
+            cd->bracount - recno + 1 : recno +cd->bracount;
+          if (recno <= 0 || recno > cd->final_bracount)
             {
-            recno = cd->bracount - recno + 1;
-            if (recno <= 0)
-              {
-              *errorcodeptr = ERR15;
-              goto FAILED;
-              }
+            *errorcodeptr = ERR15;
+            goto FAILED;
             }
-          else recno += cd->bracount;
           PUT2(code, 2+LINK_SIZE, recno);
           break;
           }
@@ -4252,9 +4353,10 @@ for (;; ptr++)
           skipbytes = 1;
           }
 
-        /* Check for the "name" actually being a subpattern number. */
+        /* Check for the "name" actually being a subpattern number. We are
+        in the second pass here, so final_bracount is set. */
 
-        else if (recno > 0)
+        else if (recno > 0 && recno <= cd->final_bracount)
           {
           PUT2(code, 2+LINK_SIZE, recno);
           }
@@ -4448,7 +4550,9 @@ for (;; ptr++)
 
         /* We come here from the Python syntax above that handles both
         references (?P=name) and recursion (?P>name), as well as falling
-        through from the Perl recursion syntax (?&name). */
+        through from the Perl recursion syntax (?&name). We also come here from
+        the Perl \k<name> or \k'name' back reference syntax and the \k{name}
+        .NET syntax. */
 
         NAMED_REF_OR_RECURSE:
         name = ++ptr;
@@ -4460,6 +4564,11 @@ for (;; ptr++)
 
         if (lengthptr != NULL)
           {
+          if (namelen == 0)
+            {
+            *errorcodeptr = ERR62;
+            goto FAILED;
+            }
           if (*ptr != terminator)
             {
             *errorcodeptr = ERR42;
@@ -4473,14 +4582,19 @@ for (;; ptr++)
           recno = 0;
           }
 
-        /* In the real compile, seek the name in the table */
+        /* In the real compile, seek the name in the table. We check the name
+        first, and then check that we have reached the end of the name in the
+        table. That way, if the name that is longer than any in the table,
+        the comparison will fail without reading beyond the table entry. */
 
         else
           {
           slot = cd->name_table;
           for (i = 0; i < cd->names_found; i++)
             {
-            if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;
+            if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&
+                slot[2+namelen] == 0)
+              break;
             slot += cd->name_entry_size;
             }
 
@@ -4517,7 +4631,15 @@ for (;; ptr++)
           {
           const uschar *called;
 
-          if ((refsign = *ptr) == '+') ptr++;
+          if ((refsign = *ptr) == '+')
+            {
+            ptr++;
+            if ((digitab[*ptr] & ctype_digit) == 0)
+              {
+              *errorcodeptr = ERR63;
+              goto FAILED;
+              }
+            }
           else if (refsign == '-')
             {
             if ((digitab[ptr[1]] & ctype_digit) == 0)
@@ -4643,7 +4765,7 @@ for (;; ptr++)
 
             case 'J':    /* Record that it changed in the external options */
             *optset |= PCRE_DUPNAMES;
-            cd->external_options |= PCRE_JCHANGED;
+            cd->external_flags |= PCRE_JCHANGED;
             break;
 
             case 'i': *optset |= PCRE_CASELESS; break;
@@ -5063,7 +5185,7 @@ for (;; ptr++)
     /* Remember if \r or \n were seen */
 
     if (mcbuffer[0] == '\r' || mcbuffer[0] == '\n')
-      cd->external_options |= PCRE_HASCRORLF;
+      cd->external_flags |= PCRE_HASCRORLF;
 
     /* Set the first and required bytes appropriately. If no previous first
     byte, set it from this character, but revert to none on a zero repeat.
@@ -5743,24 +5865,46 @@ cd->fcc = tables + fcc_offset;
 cd->cbits = tables + cbits_offset;
 cd->ctypes = tables + ctypes_offset;
 
-/* Check for newline settings at the start of the pattern, and remember the
-offset for later. */
+/* Check for global one-time settings at the start of the pattern, and remember
+the offset for later. */
 
-if (ptr[0] == '(' && ptr[1] == '*')
+while (ptr[skipatstart] == '(' && ptr[skipatstart+1] == '*')
   {
   int newnl = 0;
-  if (strncmp((char *)(ptr+2), "CR)", 3) == 0)
-    { skipatstart = 5; newnl = PCRE_NEWLINE_CR; }
-  else if (strncmp((char *)(ptr+2), "LF)", 3)  == 0)
-    { skipatstart = 5; newnl = PCRE_NEWLINE_LF; }
-  else if (strncmp((char *)(ptr+2), "CRLF)", 5)  == 0)
-    { skipatstart = 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
-  else if (strncmp((char *)(ptr+2), "ANY)", 4) == 0)
-    { skipatstart = 6; newnl = PCRE_NEWLINE_ANY; }
-  else if (strncmp((char *)(ptr+2), "ANYCRLF)", 8)  == 0)
-    { skipatstart = 10; newnl = PCRE_NEWLINE_ANYCRLF; }
-  if (skipatstart > 0)
+  int newbsr = 0;
+
+  if (strncmp((char *)(ptr+skipatstart+2), "CR)", 3) == 0)
+    { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
+  else if (strncmp((char *)(ptr+skipatstart+2), "LF)", 3)  == 0)
+    { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
+  else if (strncmp((char *)(ptr+skipatstart+2), "CRLF)", 5)  == 0)
+    { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
+  else if (strncmp((char *)(ptr+skipatstart+2), "ANY)", 4) == 0)
+    { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
+  else if (strncmp((char *)(ptr+skipatstart+2), "ANYCRLF)", 8)  == 0)
+    { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
+
+  else if (strncmp((char *)(ptr+skipatstart+2), "BSR_ANYCRLF)", 12) == 0)
+    { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
+  else if (strncmp((char *)(ptr+skipatstart+2), "BSR_UNICODE)", 12) == 0)
+    { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
+
+  if (newnl != 0)
     options = (options & ~PCRE_NEWLINE_BITS) | newnl;
+  else if (newbsr != 0)
+    options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr;
+  else break;
+  }
+
+/* Check validity of \R options. */
+
+switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
+  {
+  case 0:
+  case PCRE_BSR_ANYCRLF:
+  case PCRE_BSR_UNICODE:
+  break;
+  default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
   }
 
 /* Handle different types of newline. The three bits give seven cases. The
@@ -5822,7 +5966,7 @@ to compile parts of the pattern into; the compiled code is discarded when it is
 no longer needed, so hopefully this workspace will never overflow, though there
 is a test for its doing so. */
 
-cd->bracount = 0;
+cd->bracount = cd->final_bracount = 0;
 cd->names_found = 0;
 cd->name_entry_size = 0;
 cd->name_table = NULL;
@@ -5832,8 +5976,8 @@ cd->hwm = cworkspace;
 cd->start_pattern = (const uschar *)pattern;
 cd->end_pattern = (const uschar *)(pattern + strlen(pattern));
 cd->req_varyopt = 0;
-cd->nopartial = FALSE;
 cd->external_options = options;
+cd->external_flags = 0;
 
 /* Now do the pre-compile. On error, errorcode will be set non-zero, so we
 don't need to look at the result of the function here. The initial options have
@@ -5872,14 +6016,16 @@ if (re == NULL)
   goto PCRE_EARLY_ERROR_RETURN;
   }
 
-/* Put in the magic number, and save the sizes, initial options, and character
-table pointer. NULL is used for the default character tables. The nullpad field
-is at the end; it's there to help in the case when a regex compiled on a system
-with 4-byte pointers is run on another with 8-byte pointers. */
+/* Put in the magic number, and save the sizes, initial options, internal
+flags, and character table pointer. NULL is used for the default character
+tables. The nullpad field is at the end; it's there to help in the case when a
+regex compiled on a system with 4-byte pointers is run on another with 8-byte
+pointers. */
 
 re->magic_number = MAGIC_NUMBER;
 re->size = size;
 re->options = cd->external_options;
+re->flags = cd->external_flags;
 re->dummy1 = 0;
 re->first_byte = 0;
 re->req_byte = 0;
@@ -5897,6 +6043,7 @@ field. Reset the bracket count and the names_found field. Also reset the hwm
 field; this time it's used for remembering forward references to subpatterns.
 */
 
+cd->final_bracount = cd->bracount;  /* Save for checking forward references */
 cd->bracount = 0;
 cd->names_found = 0;
 cd->name_table = (uschar *)re + re->name_table_offset;
@@ -5904,7 +6051,6 @@ codestart = cd->name_table + re->name_entry_size * re->name_count;
 cd->start_code = codestart;
 cd->hwm = cworkspace;
 cd->req_varyopt = 0;
-cd->nopartial = FALSE;
 cd->had_accept = FALSE;
 
 /* Set up a starting, non-extracting bracket, then compile the expression. On
@@ -5918,8 +6064,8 @@ code = (uschar *)codestart;
   &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
 re->top_bracket = cd->bracount;
 re->top_backref = cd->top_backref;
+re->flags = cd->external_flags;
 
-if (cd->nopartial) re->options |= PCRE_NOPARTIAL;
 if (cd->had_accept) reqbyte = -1;   /* Must disable after (*ACCEPT) */
 
 /* If not reached end of pattern on success, there's an excess bracket. */
@@ -5962,7 +6108,7 @@ if (errorcode != 0)
   PCRE_EARLY_ERROR_RETURN:
   *erroroffset = ptr - (const uschar *)pattern;
   PCRE_EARLY_ERROR_RETURN2:
-  *errorptr = error_texts[errorcode];
+  *errorptr = find_error_text(errorcode);
   if (errorcodeptr != NULL) *errorcodeptr = errorcode;
   return NULL;
   }
@@ -5991,10 +6137,10 @@ if ((re->options & PCRE_ANCHORED) == 0)
       int ch = firstbyte & 255;
       re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&
          cd->fcc[ch] == ch)? ch : firstbyte;
-      re->options |= PCRE_FIRSTSET;
+      re->flags |= PCRE_FIRSTSET;
       }
     else if (is_startline(codestart, 0, cd->backref_map))
-      re->options |= PCRE_STARTLINE;
+      re->flags |= PCRE_STARTLINE;
     }
   }
 
@@ -6008,7 +6154,7 @@ if (reqbyte >= 0 &&
   int ch = reqbyte & 255;
   re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&
     cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;
-  re->options |= PCRE_REQCHSET;
+  re->flags |= PCRE_REQCHSET;
   }
 
 /* Print out the compiled data if debugging is enabled. This is never the
@@ -6021,7 +6167,7 @@ printf("Length = %d top_bracket = %d top_backref = %d\n",
 
 printf("Options=%08x\n", re->options);
 
-if ((re->options & PCRE_FIRSTSET) != 0)
+if ((re->flags & PCRE_FIRSTSET) != 0)
   {
   int ch = re->first_byte & 255;
   const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)?
@@ -6030,7 +6176,7 @@ if ((re->options & PCRE_FIRSTSET) != 0)
     else printf("First char = \\x%02x%s\n", ch, caseless);
   }
 
-if ((re->options & PCRE_REQCHSET) != 0)
+if ((re->flags & PCRE_REQCHSET) != 0)
   {
   int ch = re->req_byte & 255;
   const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)?
@@ -6047,7 +6193,7 @@ was compiled can be seen. */
 if (code - codestart > length)
   {
   (pcre_free)(re);
-  *errorptr = error_texts[ERR23];
+  *errorptr = find_error_text(ERR23);
   *erroroffset = ptr - (uschar *)pattern;
   if (errorcodeptr != NULL) *errorcodeptr = ERR23;
   return NULL;