1 files changed, 526 insertions, 222 deletions
diff --git a/ext/pcre/pcrelib/pcre_compile.c b/ext/pcre/pcrelib/pcre_compile.c
index 54e23ea39e..53027e603d 100644
--- a/ext/pcre/pcrelib/pcre_compile.c
+++ b/ext/pcre/pcrelib/pcre_compile.c
@@ -122,7 +122,7 @@ static const short int escapes[] = {
      -ESC_H,                  0,
      0,                       -ESC_K,
      0,                       0,
-     0,                       0,
+     -ESC_N,                  0,
      -ESC_P,                  -ESC_Q,
      -ESC_R,                  -ESC_S,
      0,                       0,
@@ -169,7 +169,7 @@ static const short int escapes[] = {
 /*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-',
 /*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G,
 /*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0,
-/*  D0 */   '}',     0, -ESC_K,       0,      0,     0,      0, -ESC_P,
+/*  D0 */   '}',     0, -ESC_K,       0,      0,-ESC_N,      0, -ESC_P,
 /*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0,
 /*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X,
 /*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0,
@@ -186,11 +186,14 @@ string is built from string macros so that it works in UTF-8 mode on EBCDIC
 platforms. */
 
 typedef struct verbitem {
-  int   len;
-  int   op;
+  int   len;                 /* Length of verb name */
+  int   op;                  /* Op when no arg, or -1 if arg mandatory */
+  int   op_arg;              /* Op when arg present, or -1 if not allowed */
 } verbitem;
 
 static const char verbnames[] =
+  "\0"                       /* Empty name is a shorthand for MARK */
+  STRING_MARK0
   STRING_ACCEPT0
   STRING_COMMIT0
   STRING_F0
@@ -200,13 +203,15 @@ static const char verbnames[] =
   STRING_THEN;
 
 static const verbitem verbs[] = {
-  { 6, OP_ACCEPT },
-  { 6, OP_COMMIT },
-  { 1, OP_FAIL },
-  { 4, OP_FAIL },
-  { 5, OP_PRUNE },
-  { 4, OP_SKIP  },
-  { 4, OP_THEN  }
+  { 0, -1,        OP_MARK },
+  { 4, -1,        OP_MARK },
+  { 6, OP_ACCEPT, -1 },
+  { 6, OP_COMMIT, -1 },
+  { 1, OP_FAIL,   -1 },
+  { 4, OP_FAIL,   -1 },
+  { 5, OP_PRUNE,  OP_PRUNE_ARG },
+  { 4, OP_SKIP,   OP_SKIP_ARG  },
+  { 4, OP_THEN,   OP_THEN_ARG  }
 };
 
 static const int verbcount = sizeof(verbs)/sizeof(verbitem);
@@ -254,6 +259,53 @@ static const int posix_class_maps[] = {
   cbit_xdigit,-1,          0              /* xdigit */
 };
 
+/* Table of substitutes for \d etc when PCRE_UCP is set. The POSIX class
+substitutes must be in the order of the names, defined above, and there are
+both positive and negative cases. NULL means no substitute. */
+
+#ifdef SUPPORT_UCP
+static const uschar *substitutes[] = {
+  (uschar *)"\\P{Nd}",    /* \D */
+  (uschar *)"\\p{Nd}",    /* \d */
+  (uschar *)"\\P{Xsp}",   /* \S */       /* NOTE: Xsp is Perl space */
+  (uschar *)"\\p{Xsp}",   /* \s */
+  (uschar *)"\\P{Xwd}",   /* \W */
+  (uschar *)"\\p{Xwd}"    /* \w */
+};
+
+static const uschar *posix_substitutes[] = {
+  (uschar *)"\\p{L}",     /* alpha */
+  (uschar *)"\\p{Ll}",    /* lower */
+  (uschar *)"\\p{Lu}",    /* upper */
+  (uschar *)"\\p{Xan}",   /* alnum */
+  NULL,                   /* ascii */
+  (uschar *)"\\h",        /* blank */
+  NULL,                   /* cntrl */
+  (uschar *)"\\p{Nd}",    /* digit */
+  NULL,                   /* graph */
+  NULL,                   /* print */
+  NULL,                   /* punct */
+  (uschar *)"\\p{Xps}",   /* space */    /* NOTE: Xps is POSIX space */
+  (uschar *)"\\p{Xwd}",   /* word */
+  NULL,                   /* xdigit */
+  /* Negated cases */
+  (uschar *)"\\P{L}",     /* ^alpha */
+  (uschar *)"\\P{Ll}",    /* ^lower */
+  (uschar *)"\\P{Lu}",    /* ^upper */
+  (uschar *)"\\P{Xan}",   /* ^alnum */
+  NULL,                   /* ^ascii */
+  (uschar *)"\\H",        /* ^blank */
+  NULL,                   /* ^cntrl */
+  (uschar *)"\\P{Nd}",    /* ^digit */
+  NULL,                   /* ^graph */
+  NULL,                   /* ^print */
+  NULL,                   /* ^punct */
+  (uschar *)"\\P{Xps}",   /* ^space */   /* NOTE: Xps is POSIX space */
+  (uschar *)"\\P{Xwd}",   /* ^word */
+  NULL                    /* ^xdigit */
+};
+#define POSIX_SUBSIZE (sizeof(posix_substitutes)/sizeof(uschar *))
+#endif
 
 #define STRING(a)  # a
 #define XSTRING(s) STRING(s)
@@ -317,7 +369,7 @@ static const char error_texts[] =
   /* 35 */
   "invalid condition (?(0)\0"
   "\\C not allowed in lookbehind assertion\0"
-  "PCRE does not support \\L, \\l, \\N, \\U, or \\u\0"
+  "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
   "number after (?C is > 255\0"
   "closing ) for (?C expected\0"
   /* 40 */
@@ -343,7 +395,7 @@ static const char error_texts[] =
   "inconsistent NEWLINE options\0"
   "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
   "a numbered reference must not be zero\0"
-  "(*VERB) with an argument is not supported\0"
+  "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
   /* 60 */
   "(*VERB) not recognized\0"
   "number is too big\0"
@@ -351,7 +403,10 @@ static const char error_texts[] =
   "digit expected after (?+\0"
   "] is an invalid data character in JavaScript compatibility mode\0"
   /* 65 */
-  "different names for subpatterns of the same number are not allowed\0";
+  "different names for subpatterns of the same number are not allowed\0"
+  "(*MARK) must have an argument\0"
+  "this version of PCRE is not compiled with PCRE_UCP support\0"
+  ;
 
 /* Table to identify digits and hex digits. This is used when compiling
 patterns. Note that the tables in chartables are dependent on the locale, and
@@ -584,7 +639,6 @@ else
 
     case CHAR_l:
     case CHAR_L:
-    case CHAR_N:
     case CHAR_u:
     case CHAR_U:
     *errorcodeptr = ERR37;
@@ -822,6 +876,19 @@ else
     }
   }
 
+/* Perl supports \N{name} for character names, as well as plain \N for "not
+newline". PCRE does not support \N{name}. */
+
+if (c == -ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
+  *errorcodeptr = ERR37;
+
+/* If PCRE_UCP is set, we change the values for \d etc. */
+
+if ((options & PCRE_UCP) != 0 && c <= -ESC_D && c >= -ESC_w)
+  c -= (ESC_DU - ESC_D);
+
+/* Set the pointer to the final character before returning. */
+
 *ptrptr = ptr;
 return c;
 }
@@ -1060,25 +1127,39 @@ dealing with. The very first call may not start with a parenthesis. */
 
 if (ptr[0] == CHAR_LEFT_PARENTHESIS)
   {
-  if (ptr[1] == CHAR_QUESTION_MARK &&
-      ptr[2] == CHAR_VERTICAL_LINE)
+  /* Handle specials such as (*SKIP) or (*UTF8) etc. */
+
+  if (ptr[1] == CHAR_ASTERISK) ptr += 2;
+
+  /* Handle a normal, unnamed capturing parenthesis. */
+
+  else if (ptr[1] != CHAR_QUESTION_MARK)
+    {
+    *count += 1;
+    if (name == NULL && *count == lorn) return *count;
+    ptr++;
+    }
+
+  /* All cases now have (? at the start. Remember when we are in a group
+  where the parenthesis numbers are duplicated. */
+
+  else if (ptr[2] == CHAR_VERTICAL_LINE)
     {
     ptr += 3;
     dup_parens = TRUE;
     }
 
-  /* Handle a normal, unnamed capturing parenthesis */
+  /* Handle comments; all characters are allowed until a ket is reached. */
 
-  else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)
+  else if (ptr[2] == CHAR_NUMBER_SIGN)
     {
-    *count += 1;
-    if (name == NULL && *count == lorn) return *count;
-    ptr++;
+    for (ptr += 3; *ptr != 0; ptr++) if (*ptr == CHAR_RIGHT_PARENTHESIS) break;
+    goto FAIL_EXIT;
     }
 
   /* Handle a condition. If it is an assertion, just carry on so that it
   is processed as normal. If not, skip to the closing parenthesis of the
-  condition (there can't be any nested parens. */
+  condition (there can't be any nested parens). */
 
   else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
     {
@@ -1090,7 +1171,7 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS)
       }
     }
 
-  /* We have either (? or (* and not a condition */
+  /* Start with (? but not a condition. */
 
   else
     {
@@ -1212,8 +1293,7 @@ for (; *ptr != 0; ptr++)
   else if (*ptr == CHAR_RIGHT_PARENTHESIS)
     {
     if (dup_parens && *count < hwm_count) *count = hwm_count;
-    *ptrptr = ptr;
-    return -1;
+    goto FAIL_EXIT;
     }
 
   else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
@@ -1613,7 +1693,8 @@ for (;;)
 
   /* Otherwise, we can get the item's length from the table, except that for
   repeated character types, we have to test for \p and \P, which have an extra
-  two bytes of parameters. */
+  two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
+  must add in its length. */
 
   else
     {
@@ -1637,6 +1718,13 @@ for (;;)
       case OP_TYPEPOSUPTO:
       if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
       break;
+
+      case OP_MARK:
+      case OP_PRUNE_ARG:
+      case OP_SKIP_ARG:
+      case OP_THEN_ARG:
+      code += code[1];
+      break;
       }
 
     /* Add in the fixed length from the table */
@@ -1708,7 +1796,8 @@ for (;;)
 
   /* Otherwise, we can get the item's length from the table, except that for
   repeated character types, we have to test for \p and \P, which have an extra
-  two bytes of parameters. */
+  two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
+  must add in its length. */
 
   else
     {
@@ -1732,6 +1821,13 @@ for (;;)
       case OP_TYPEEXACT:
       if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
       break;
+
+      case OP_MARK:
+      case OP_PRUNE_ARG:
+      case OP_SKIP_ARG:
+      case OP_THEN_ARG:
+      code += code[1];
+      break;
       }
 
     /* Add in the fixed length from the table */
@@ -2001,6 +2097,16 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
     break;
 #endif
 
+    /* MARK, and PRUNE/SKIP/THEN with an argument must skip over the argument
+    string. */
+
+    case OP_MARK:
+    case OP_PRUNE_ARG:
+    case OP_SKIP_ARG:
+    case OP_THEN_ARG:
+    code += code[1];
+    break;
+
     /* None of the remaining opcodes are required to match a character. */
 
     default:
@@ -2221,8 +2327,8 @@ auto_callout(uschar *code, const uschar *ptr, compile_data *cd)
 {
 *code++ = OP_CALLOUT;
 *code++ = 255;
-PUT(code, 0, ptr - cd->start_pattern);  /* Pattern offset */
-PUT(code, LINK_SIZE, 0);                /* Default length */
+PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */
+PUT(code, LINK_SIZE, 0);                       /* Default length */
 return code + 2*LINK_SIZE;
 }
 
@@ -2247,7 +2353,7 @@ Returns:             nothing
 static void
 complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)
 {
-int length = ptr - cd->start_pattern - GET(previous_callout, 2);
+int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));
 PUT(previous_callout, 2 + LINK_SIZE, length);
 }
 
@@ -2297,6 +2403,69 @@ for (++c; c <= d; c++)
 
 return TRUE;
 }
+
+
+
+/*************************************************
+*        Check a character and a property        *
+*************************************************/
+
+/* This function is called by check_auto_possessive() when a property item
+is adjacent to a fixed character.
+
+Arguments:
+  c            the character
+  ptype        the property type
+  pdata        the data for the type
+  negated      TRUE if it's a negated property (\P or \p{^)
+
+Returns:       TRUE if auto-possessifying is OK
+*/
+
+static BOOL
+check_char_prop(int c, int ptype, int pdata, BOOL negated)
+{
+const ucd_record *prop = GET_UCD(c);
+switch(ptype)
+  {
+  case PT_LAMP:
+  return (prop->chartype == ucp_Lu ||
+          prop->chartype == ucp_Ll ||
+          prop->chartype == ucp_Lt) == negated;
+
+  case PT_GC:
+  return (pdata == _pcre_ucp_gentype[prop->chartype]) == negated;
+
+  case PT_PC:
+  return (pdata == prop->chartype) == negated;
+
+  case PT_SC:
+  return (pdata == prop->script) == negated;
+
+  /* These are specials */
+
+  case PT_ALNUM:
+  return (_pcre_ucp_gentype[prop->chartype] == ucp_L ||
+          _pcre_ucp_gentype[prop->chartype] == ucp_N) == negated;
+
+  case PT_SPACE:    /* Perl space */
+  return (_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
+          c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
+          == negated;
+
+  case PT_PXSPACE:  /* POSIX space */
+  return (_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
+          c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+          c == CHAR_FF || c == CHAR_CR)
+          == negated;
+
+  case PT_WORD:
+  return (_pcre_ucp_gentype[prop->chartype] == ucp_L ||
+          _pcre_ucp_gentype[prop->chartype] == ucp_N ||
+          c == CHAR_UNDERSCORE) == negated;
+  }
+return FALSE;
+}
 #endif  /* SUPPORT_UCP */
 
 
@@ -2310,10 +2479,8 @@ whether the next thing could possibly match the repeated item. If not, it makes
 sense to automatically possessify the repeated item.
 
 Arguments:
-  op_code       the repeated op code
-  this          data for this item, depends on the opcode
+  previous      pointer to the repeated opcode
   utf8          TRUE in UTF-8 mode
-  utf8_char     used for utf8 character bytes, NULL if not relevant
   ptr           next character in pattern
   options       options bits
   cd            contains pointers to tables etc.
@@ -2322,10 +2489,11 @@ Returns:        TRUE if possessifying is wanted
 */
 
 static BOOL
-check_auto_possessive(int op_code, int item, BOOL utf8, uschar *utf8_char,
-  const uschar *ptr, int options, compile_data *cd)
+check_auto_possessive(const uschar *previous, BOOL utf8, const uschar *ptr,
+  int options, compile_data *cd)
 {
-int next;
+int c, next;
+int op_code = *previous++;
 
 /* Skip whitespace and comments in extended mode */
 
@@ -2386,23 +2554,18 @@ if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
   strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
     return FALSE;
 
-/* Now compare the next item with the previous opcode. If the previous is a
-positive single character match, "item" either contains the character or, if
-"item" is greater than 127 in utf8 mode, the character's bytes are in
-utf8_char. */
-
-
-/* Handle cases when the next item is a character. */
+/* Now compare the next item with the previous opcode. First, handle cases when
+the next item is a character. */
 
 if (next >= 0) switch(op_code)
   {
   case OP_CHAR:
 #ifdef SUPPORT_UTF8
-  if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
+  GETCHARTEST(c, previous);
 #else
-  (void)(utf8_char);  /* Keep compiler happy by referencing function argument */
+  c = *previous;
 #endif
-  return item != next;
+  return c != next;
 
   /* For CHARNC (caseless character) we must check the other case. If we have
   Unicode property support, we can use it to test the other case of
@@ -2410,9 +2573,11 @@ if (next >= 0) switch(op_code)
 
   case OP_CHARNC:
 #ifdef SUPPORT_UTF8
-  if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
+  GETCHARTEST(c, previous);
+#else
+  c = *previous;
 #endif
-  if (item == next) return FALSE;
+  if (c == next) return FALSE;
 #ifdef SUPPORT_UTF8
   if (utf8)
     {
@@ -2423,16 +2588,16 @@ if (next >= 0) switch(op_code)
 #else
     othercase = NOTACHAR;
 #endif
-    return (unsigned int)item != othercase;
+    return (unsigned int)c != othercase;
     }
   else
 #endif  /* SUPPORT_UTF8 */
-  return (item != cd->fcc[next]);  /* Non-UTF-8 mode */
+  return (c != cd->fcc[next]);  /* Non-UTF-8 mode */
 
-  /* For OP_NOT, "item" must be a single-byte character. */
+  /* For OP_NOT, its data is always a single-byte character. */
 
   case OP_NOT:
-  if (item == next) return TRUE;
+  if ((c = *previous) == next) return TRUE;
   if ((options & PCRE_CASELESS) == 0) return FALSE;
 #ifdef SUPPORT_UTF8
   if (utf8)
@@ -2444,11 +2609,14 @@ if (next >= 0) switch(op_code)
 #else
     othercase = NOTACHAR;
 #endif
-    return (unsigned int)item == othercase;
+    return (unsigned int)c == othercase;
     }
   else
 #endif  /* SUPPORT_UTF8 */
-  return (item == cd->fcc[next]);  /* Non-UTF-8 mode */
+  return (c == cd->fcc[next]);  /* Non-UTF-8 mode */
+
+  /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
+  When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
 
   case OP_DIGIT:
   return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;
@@ -2491,11 +2659,12 @@ if (next >= 0) switch(op_code)
     case 0x202f:
     case 0x205f:
     case 0x3000:
-    return op_code != OP_HSPACE;
+    return op_code == OP_NOT_HSPACE;
     default:
-    return op_code == OP_HSPACE;
+    return op_code != OP_NOT_HSPACE;
     }
 
+  case OP_ANYNL:
   case OP_VSPACE:
   case OP_NOT_VSPACE:
   switch(next)
@@ -2507,48 +2676,62 @@ if (next >= 0) switch(op_code)
     case 0x85:
     case 0x2028:
     case 0x2029:
-    return op_code != OP_VSPACE;
+    return op_code == OP_NOT_VSPACE;
     default:
-    return op_code == OP_VSPACE;
+    return op_code != OP_NOT_VSPACE;
     }
 
+#ifdef SUPPORT_UCP
+  case OP_PROP:
+  return check_char_prop(next, previous[0], previous[1], FALSE);
+
+  case OP_NOTPROP:
+  return check_char_prop(next, previous[0], previous[1], TRUE);
+#endif
+
   default:
   return FALSE;
   }
 
 
-/* Handle the case when the next item is \d, \s, etc. */
+/* Handle the case when the next item is \d, \s, etc. Note that when PCRE_UCP
+is set, \d turns into ESC_du rather than ESC_d, etc., so ESC_d etc. are
+generated only when PCRE_UCP is *not* set, that is, when only ASCII
+characteristics are recognized. Similarly, the opcodes OP_DIGIT etc. are
+replaced by OP_PROP codes when PCRE_UCP is set. */
 
 switch(op_code)
   {
   case OP_CHAR:
   case OP_CHARNC:
 #ifdef SUPPORT_UTF8
-  if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
+  GETCHARTEST(c, previous);
+#else
+  c = *previous;
 #endif
   switch(-next)
     {
     case ESC_d:
-    return item > 127 || (cd->ctypes[item] & ctype_digit) == 0;
+    return c > 127 || (cd->ctypes[c] & ctype_digit) == 0;
 
     case ESC_D:
-    return item <= 127 && (cd->ctypes[item] & ctype_digit) != 0;
+    return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0;
 
     case ESC_s:
-    return item > 127 || (cd->ctypes[item] & ctype_space) == 0;
+    return c > 127 || (cd->ctypes[c] & ctype_space) == 0;
 
     case ESC_S:
-    return item <= 127 && (cd->ctypes[item] & ctype_space) != 0;
+    return c <= 127 && (cd->ctypes[c] & ctype_space) != 0;
 
     case ESC_w:
-    return item > 127 || (cd->ctypes[item] & ctype_word) == 0;
+    return c > 127 || (cd->ctypes[c] & ctype_word) == 0;
 
     case ESC_W:
-    return item <= 127 && (cd->ctypes[item] & ctype_word) != 0;
+    return c <= 127 && (cd->ctypes[c] & ctype_word) != 0;
 
     case ESC_h:
     case ESC_H:
-    switch(item)
+    switch(c)
       {
       case 0x09:
       case 0x20:
@@ -2576,7 +2759,7 @@ switch(op_code)
 
     case ESC_v:
     case ESC_V:
-    switch(item)
+    switch(c)
       {
       case 0x0a:
       case 0x0b:
@@ -2590,38 +2773,92 @@ switch(op_code)
       return -next == ESC_v;
       }
 
+    /* When PCRE_UCP is set, these values get generated for \d etc. Find
+    their substitutions and process them. The result will always be either
+    -ESC_p or -ESC_P. Then fall through to process those values. */
+
+#ifdef SUPPORT_UCP
+    case ESC_du:
+    case ESC_DU:
+    case ESC_wu:
+    case ESC_WU:
+    case ESC_su:
+    case ESC_SU:
+      {
+      int temperrorcode = 0;
+      ptr = substitutes[-next - ESC_DU];
+      next = check_escape(&ptr, &temperrorcode, 0, options, FALSE);
+      if (temperrorcode != 0) return FALSE;
+      ptr++;    /* For compatibility */
+      }
+    /* Fall through */
+
+    case ESC_p:
+    case ESC_P:
+      {
+      int ptype, pdata, errorcodeptr;
+      BOOL negated;
+
+      ptr--;      /* Make ptr point at the p or P */
+      ptype = get_ucp(&ptr, &negated, &pdata, &errorcodeptr);
+      if (ptype < 0) return FALSE;
+      ptr++;      /* Point past the final curly ket */
+
+      /* If the property item is optional, we have to give up. (When generated
+      from \d etc by PCRE_UCP, this test will have been applied much earlier,
+      to the original \d etc. At this point, ptr will point to a zero byte. */
+
+      if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
+        strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
+          return FALSE;
+
+      /* Do the property check. */
+
+      return check_char_prop(c, ptype, pdata, (next == -ESC_P) != negated);
+      }
+#endif
+
     default:
     return FALSE;
     }
 
+  /* In principle, support for Unicode properties should be integrated here as
+  well. It means re-organizing the above code so as to get hold of the property
+  values before switching on the op-code. However, I wonder how many patterns
+  combine ASCII \d etc with Unicode properties? (Note that if PCRE_UCP is set,
+  these op-codes are never generated.) */
+
   case OP_DIGIT:
   return next == -ESC_D || next == -ESC_s || next == -ESC_W ||
-         next == -ESC_h || next == -ESC_v;
+         next == -ESC_h || next == -ESC_v || next == -ESC_R;
 
   case OP_NOT_DIGIT:
   return next == -ESC_d;
 
   case OP_WHITESPACE:
-  return next == -ESC_S || next == -ESC_d || next == -ESC_w;
+  return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R;
 
   case OP_NOT_WHITESPACE:
   return next == -ESC_s || next == -ESC_h || next == -ESC_v;
 
   case OP_HSPACE:
-  return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w;
+  return next == -ESC_S || next == -ESC_H || next == -ESC_d ||
+         next == -ESC_w || next == -ESC_v || next == -ESC_R;
 
   case OP_NOT_HSPACE:
   return next == -ESC_h;
 
   /* Can't have \S in here because VT matches \S (Perl anomaly) */
+  case OP_ANYNL:
   case OP_VSPACE:
   return next == -ESC_V || next == -ESC_d || next == -ESC_w;
 
   case OP_NOT_VSPACE:
-  return next == -ESC_v;
+  return next == -ESC_v || next == -ESC_R;
 
   case OP_WORDCHAR:
-  return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;
+  return next == -ESC_W || next == -ESC_s || next == -ESC_h ||
+         next == -ESC_v || next == -ESC_R;
 
   case OP_NOT_WORDCHAR:
   return next == -ESC_w || next == -ESC_d;
@@ -2685,6 +2922,7 @@ BOOL inescq = FALSE;
 BOOL groupsetfirstbyte = FALSE;
 const uschar *ptr = *ptrptr;
 const uschar *tempptr;
+const uschar *nestptr = NULL;
 uschar *previous = NULL;
 uschar *previous_callout = NULL;
 uschar *save_hwm = NULL;
@@ -2755,6 +2993,16 @@ for (;; ptr++)
 
   c = *ptr;
 
+  /* If we are at the end of a nested substitution, revert to the outer level
+  string. Nesting only happens one level deep. */
+
+  if (c == 0 && nestptr != NULL)
+    {
+    ptr = nestptr;
+    nestptr = NULL;
+    c = *ptr;
+    }
+
   /* If we are in the pre-compile phase, accumulate the length used for the
   previous cycle of this loop. */
 
@@ -2785,7 +3033,7 @@ for (;; ptr++)
       goto FAILED;
       }
 
-    *lengthptr += code - last_code;
+    *lengthptr += (int)(code - last_code);
     DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));
 
     /* If "previous" is set and it is not at the start of the work space, move
@@ -2903,7 +3151,7 @@ for (;; ptr++)
         *errorcodeptr = ERR20;
         goto FAILED;
         }
-      *lengthptr += code - last_code;   /* To include callout length */
+      *lengthptr += (int)(code - last_code);   /* To include callout length */
       DPRINTF((">> end branch\n"));
       }
     return TRUE;
@@ -3108,7 +3356,7 @@ for (;; ptr++)
           ptr++;
           }
 
-        posix_class = check_posix_name(ptr, tempptr - ptr);
+        posix_class = check_posix_name(ptr, (int)(tempptr - ptr));
         if (posix_class < 0)
           {
           *errorcodeptr = ERR30;
@@ -3122,10 +3370,25 @@ for (;; ptr++)
         if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
           posix_class = 0;
 
-        /* We build the bit map for the POSIX class in a chunk of local store
-        because we may be adding and subtracting from it, and we don't want to
-        subtract bits that may be in the main map already. At the end we or the
-        result into the bit map that is being built. */
+        /* When PCRE_UCP is set, some of the POSIX classes are converted to
+        different escape sequences that use Unicode properties. */
+
+#ifdef SUPPORT_UCP
+        if ((options & PCRE_UCP) != 0)
+          {
+          int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);
+          if (posix_substitutes[pc] != NULL)
+            {
+            nestptr = tempptr + 1;
+            ptr = posix_substitutes[pc] - 1;
+            continue;
+            }
+          }
+#endif
+        /* In the non-UCP case, we build the bit map for the POSIX class in a
+        chunk of local store because we may be adding and subtracting from it,
+        and we don't want to subtract bits that may be in the main map already.
+        At the end we or the result into the bit map that is being built. */
 
         posix_class *= 3;
 
@@ -3169,19 +3432,18 @@ for (;; ptr++)
 
       /* Backslash may introduce a single character, or it may introduce one
       of the specials, which just set a flag. The sequence \b is a special
-      case. Inside a class (and only there) it is treated as backspace.
-      Elsewhere it marks a word boundary. Other escapes have preset maps ready
-      to 'or' into the one we are building. We assume they have more than one
-      character in them, so set class_charcount bigger than one. */
+      case. Inside a class (and only there) it is treated as backspace. We
+      assume that other escapes have more than one character in them, so set
+      class_charcount bigger than one. Unrecognized escapes fall through and
+      are either treated as literal characters (by default), or are faulted if
+      PCRE_EXTRA is set. */
 
       if (c == CHAR_BACKSLASH)
         {
         c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
         if (*errorcodeptr != 0) goto FAILED;
 
-        if (-c == ESC_b) c = CHAR_BS;       /* \b is backspace in a class */
-        else if (-c == ESC_X) c = CHAR_X;   /* \X is literal X in a class */
-        else if (-c == ESC_R) c = CHAR_R;   /* \R is literal R in a class */
+        if (-c == ESC_b) c = CHAR_BS;    /* \b is backspace in a class */
         else if (-c == ESC_Q)            /* Handle start of quoted string */
           {
           if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
@@ -3198,10 +3460,20 @@ for (;; ptr++)
           register const uschar *cbits = cd->cbits;
           class_charcount += 2;     /* Greater than 1 is what matters */
 
-          /* Save time by not doing this in the pre-compile phase. */
-
-          if (lengthptr == NULL) switch (-c)
+          switch (-c)
             {
+#ifdef SUPPORT_UCP
+            case ESC_du:     /* These are the values given for \d etc */
+            case ESC_DU:     /* when PCRE_UCP is set. We replace the */
+            case ESC_wu:     /* escape sequence with an appropriate \p */
+            case ESC_WU:     /* or \P to test Unicode properties instead */
+            case ESC_su:     /* of the default ASCII testing. */
+            case ESC_SU:
+            nestptr = ptr;
+            ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */
+            class_charcount -= 2;                /* Undo! */
+            continue;
+#endif
             case ESC_d:
             for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit];
             continue;
@@ -3231,20 +3503,7 @@ for (;; ptr++)
             classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
             continue;
 
-            default:    /* Not recognized; fall through */
-            break;      /* Need "default" setting to stop compiler warning. */
-            }
-
-          /* In the pre-compile phase, just do the recognition. */
-
-          else if (c == -ESC_d || c == -ESC_D || c == -ESC_w ||
-                   c == -ESC_W || c == -ESC_s || c == -ESC_S) continue;
-
-          /* We need to deal with \H, \h, \V, and \v in both phases because
-          they use extra memory. */
-
-          if (-c == ESC_h)
-            {
+            case ESC_h:
             SETBIT(classbits, 0x09); /* VT */
             SETBIT(classbits, 0x20); /* SPACE */
             SETBIT(classbits, 0xa0); /* NSBP */
@@ -3268,10 +3527,8 @@ for (;; ptr++)
               }
 #endif
             continue;
-            }
 
-          if (-c == ESC_H)
-            {
+            case ESC_H:
             for (c = 0; c < 32; c++)
               {
               int x = 0xff;
@@ -3313,10 +3570,8 @@ for (;; ptr++)
               }
 #endif
             continue;
-            }
 
-          if (-c == ESC_v)
-            {
+            case ESC_v:
             SETBIT(classbits, 0x0a); /* LF */
             SETBIT(classbits, 0x0b); /* VT */
             SETBIT(classbits, 0x0c); /* FF */
@@ -3332,10 +3587,8 @@ for (;; ptr++)
               }
 #endif
             continue;
-            }
 
-          if (-c == ESC_V)
-            {
+            case ESC_V:
             for (c = 0; c < 32; c++)
               {
               int x = 0xff;
@@ -3365,38 +3618,38 @@ for (;; ptr++)
               }
 #endif
             continue;
-            }
-
-          /* We need to deal with \P and \p in both phases. */
 
 #ifdef SUPPORT_UCP
-          if (-c == ESC_p || -c == ESC_P)
-            {
-            BOOL negated;
-            int pdata;
-            int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
-            if (ptype < 0) goto FAILED;
-            class_utf8 = TRUE;
-            *class_utf8data++ = ((-c == ESC_p) != negated)?
-              XCL_PROP : XCL_NOTPROP;
-            *class_utf8data++ = ptype;
-            *class_utf8data++ = pdata;
-            class_charcount -= 2;   /* Not a < 256 character */
-            continue;
-            }
+            case ESC_p:
+            case ESC_P:
+              {
+              BOOL negated;
+              int pdata;
+              int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
+              if (ptype < 0) goto FAILED;
+              class_utf8 = TRUE;
+              *class_utf8data++ = ((-c == ESC_p) != negated)?
+                XCL_PROP : XCL_NOTPROP;
+              *class_utf8data++ = ptype;
+              *class_utf8data++ = pdata;
+              class_charcount -= 2;   /* Not a < 256 character */
+              continue;
+              }
 #endif
-          /* Unrecognized escapes are faulted if PCRE is running in its
-          strict mode. By default, for compatibility with Perl, they are
-          treated as literals. */
+            /* Unrecognized escapes are faulted if PCRE is running in its
+            strict mode. By default, for compatibility with Perl, they are
+            treated as literals. */
 
-          if ((options & PCRE_EXTRA) != 0)
-            {
-            *errorcodeptr = ERR7;
-            goto FAILED;
+            default:
+            if ((options & PCRE_EXTRA) != 0)
+              {
+              *errorcodeptr = ERR7;
+              goto FAILED;
+              }
+            class_charcount -= 2;  /* Undo the default count from above */
+            c = *ptr;              /* Get the final character and fall through */
+            break;
             }
-
-          class_charcount -= 2;  /* Undo the default count from above */
-          c = *ptr;              /* Get the final character and fall through */
           }
 
         /* Fall through if we have a single character (c >= 0). This may be
@@ -3466,14 +3719,11 @@ for (;; ptr++)
           d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
           if (*errorcodeptr != 0) goto FAILED;
 
-          /* \b is backspace; \X is literal X; \R is literal R; any other
-          special means the '-' was literal */
+          /* \b is backspace; any other special means the '-' was literal */
 
           if (d < 0)
             {
-            if (d == -ESC_b) d = CHAR_BS;
-            else if (d == -ESC_X) d = CHAR_X;
-            else if (d == -ESC_R) d = CHAR_R; else
+            if (d == -ESC_b) d = CHAR_BS; else
               {
               ptr = oldptr;
               goto LONE_SINGLE_CHARACTER;  /* A few lines below */
@@ -3639,35 +3889,23 @@ for (;; ptr++)
         }
       }
 
-    /* Loop until ']' reached. This "while" is the end of the "do" above. */
+    /* Loop until ']' reached. This "while" is the end of the "do" far above.
+    If we are at the end of an internal nested string, revert to the outer
+    string. */
+
+    while (((c = *(++ptr)) != 0 ||
+           (nestptr != NULL &&
+             (ptr = nestptr, nestptr = NULL, c = *(++ptr)) != 0)) &&
+           (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
 
-    while ((c = *(++ptr)) != 0 && (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
+    /* Check for missing terminating ']' */
 
-    if (c == 0)                          /* Missing terminating ']' */
+    if (c == 0)
       {
       *errorcodeptr = ERR6;
       goto FAILED;
       }
 
-
-/* This code has been disabled because it would mean that \s counts as
-an explicit \r or \n reference, and that's not really what is wanted. Now
-we set the flag only if there is a literal "\r" or "\n" in the class. */
-
-#if 0
-    /* Remember whether \r or \n are in this class */
-
-    if (negate_class)
-      {
-      if ((classbits[1] & 0x24) != 0x24) cd->external_flags |= PCRE_HASCRORLF;
-      }
-    else
-      {
-      if ((classbits[1] & 0x24) != 0) cd->external_flags |= PCRE_HASCRORLF;
-      }
-#endif
-
-
     /* If class_charcount is 1, we saw precisely one character whose value is
     less than 256. As long as there were no characters >= 128 and there was no
     use of \p or \P, in other words, no use of any XCLASS features, we can
@@ -3731,13 +3969,14 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
     /* If there are characters with values > 255, we have to compile an
     extended class, with its own opcode, unless there was a negated special
-    such as \S in the class, because in that case all characters > 255 are in
-    the class, so any that were explicitly given as well can be ignored. If
-    (when there are explicit characters > 255 that must be listed) there are no
-    characters < 256, we can omit the bitmap in the actual compiled code. */
+    such as \S in the class, and PCRE_UCP is not set, because in that case all
+    characters > 255 are in the class, so any that were explicitly given as
+    well can be ignored. If (when there are explicit characters > 255 that must
+    be listed) there are no characters < 256, we can omit the bitmap in the
+    actual compiled code. */
 
 #ifdef SUPPORT_UTF8
-    if (class_utf8 && !should_flip_negation)
+    if (class_utf8 && (!should_flip_negation || (options & PCRE_UCP) != 0))
       {
       *class_utf8data++ = XCL_END;    /* Marks the end of extra data */
       *code++ = OP_XCLASS;
@@ -3763,10 +4002,11 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
       }
 #endif
 
-    /* If there are no characters > 255, set the opcode to OP_CLASS or
-    OP_NCLASS, depending on whether the whole class was negated and whether
-    there were negative specials such as \S in the class. Then copy the 32-byte
-    map into the code vector, negating it if necessary. */
+    /* If there are no characters > 255, or they are all to be included or
+    excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
+    whole class was negated and whether there were negative specials such as \S
+    (non-UCP) in the class. Then copy the 32-byte map into the code vector,
+    negating it if necessary. */
 
     *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
     if (negate_class)
@@ -3890,8 +4130,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
       if (!possessive_quantifier &&
           repeat_max < 0 &&
-          check_auto_possessive(*previous, c, utf8, utf8_char, ptr + 1,
-            options, cd))
+          check_auto_possessive(previous, utf8, ptr + 1, options, cd))
         {
         repeat_type = 0;    /* Force greedy */
         possessive_quantifier = TRUE;
@@ -3912,7 +4151,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
       c = previous[1];
       if (!possessive_quantifier &&
           repeat_max < 0 &&
-          check_auto_possessive(OP_NOT, c, utf8, NULL, ptr + 1, options, cd))
+          check_auto_possessive(previous, utf8, ptr + 1, options, cd))
         {
         repeat_type = 0;    /* Force greedy */
         possessive_quantifier = TRUE;
@@ -3936,7 +4175,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
       if (!possessive_quantifier &&
           repeat_max < 0 &&
-          check_auto_possessive(c, 0, utf8, NULL, ptr + 1, options, cd))
+          check_auto_possessive(previous, utf8, ptr + 1, options, cd))
         {
         repeat_type = 0;    /* Force greedy */
         possessive_quantifier = TRUE;
@@ -4146,7 +4385,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
       {
       register int i;
       int ketoffset = 0;
-      int len = code - previous;
+      int len = (int)(code - previous);
       uschar *bralink = NULL;
 
       /* Repeating a DEFINE group is pointless */
@@ -4167,7 +4406,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         {
         register uschar *ket = previous;
         do ket += GET(ket, 1); while (*ket != OP_KET);
-        ketoffset = code - ket;
+        ketoffset = (int)(code - ket);
         }
 
       /* The case of a zero minimum is special because of the need to stick
@@ -4235,7 +4474,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
           /* We chain together the bracket offset fields that have to be
           filled in later when the ends of the brackets are reached. */
 
-          offset = (bralink == NULL)? 0 : previous - bralink;
+          offset = (bralink == NULL)? 0 : (int)(previous - bralink);
           bralink = previous;
           PUTINC(previous, 0, offset);
           }
@@ -4344,7 +4583,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
             {
             int offset;
             *code++ = OP_BRA;
-            offset = (bralink == NULL)? 0 : code - bralink;
+            offset = (bralink == NULL)? 0 : (int)(code - bralink);
             bralink = code;
             PUTINC(code, 0, offset);
             }
@@ -4365,7 +4604,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         while (bralink != NULL)
           {
           int oldlinkoffset;
-          int offset = code - bralink + 1;
+          int offset = (int)(code - bralink + 1);
           uschar *bra = code - offset;
           oldlinkoffset = GET(bra, 1);
           bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
@@ -4453,7 +4692,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 #endif
         }
 
-      len = code - tempcode;
+      len = (int)(code - tempcode);
       if (len > 0) switch (*tempcode)
         {
         case OP_STAR:  *tempcode = OP_POSSTAR; break;
@@ -4512,24 +4751,34 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
     /* First deal with various "verbs" that can be introduced by '*'. */
 
-    if (*(++ptr) == CHAR_ASTERISK && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
+    if (*(++ptr) == CHAR_ASTERISK &&
+         ((cd->ctypes[ptr[1]] & ctype_letter) != 0 || ptr[1] == ':'))
       {
       int i, namelen;
+      int arglen = 0;
       const char *vn = verbnames;
-      const uschar *name = ++ptr;
+      const uschar *name = ptr + 1;
+      const uschar *arg = NULL;
       previous = NULL;
       while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
+      namelen = (int)(ptr - name);
+
       if (*ptr == CHAR_COLON)
         {
-        *errorcodeptr = ERR59;   /* Not supported */
-        goto FAILED;
+        arg = ++ptr;
+        while ((cd->ctypes[*ptr] & (ctype_letter|ctype_digit)) != 0
+          || *ptr == '_') ptr++;
+        arglen = (int)(ptr - arg);
         }
+
       if (*ptr != CHAR_RIGHT_PARENTHESIS)
         {
         *errorcodeptr = ERR60;
         goto FAILED;
         }
-      namelen = ptr - name;
+
+      /* Scan the table of verb names */
+
       for (i = 0; i < verbcount; i++)
         {
         if (namelen == verbs[i].len &&
@@ -4547,13 +4796,41 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
               PUT2INC(code, 0, oc->number);
               }
             }
-          *code++ = verbs[i].op;
-          break;
+
+          /* Handle the cases with/without an argument */
+
+          if (arglen == 0)
+            {
+            if (verbs[i].op < 0)   /* Argument is mandatory */
+              {
+              *errorcodeptr = ERR66;
+              goto FAILED;
+              }
+            *code++ = verbs[i].op;
+            }
+
+          else
+            {
+            if (verbs[i].op_arg < 0)   /* Argument is forbidden */
+              {
+              *errorcodeptr = ERR59;
+              goto FAILED;
+              }
+            *code++ = verbs[i].op_arg;
+            *code++ = arglen;
+            memcpy(code, arg, arglen);
+            code += arglen;
+            *code++ = 0;
+            }
+
+          break;  /* Found verb, exit loop */
           }
+
         vn += verbs[i].len + 1;
         }
-      if (i < verbcount) continue;
-      *errorcodeptr = ERR60;
+
+      if (i < verbcount) continue;    /* Successfully handled a verb */
+      *errorcodeptr = ERR60;          /* Verb not recognized */
       goto FAILED;
       }
 
@@ -4672,7 +4949,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
               recno * 10 + *ptr - CHAR_0 : -1;
           ptr++;
           }
-        namelen = ptr - name;
+        namelen = (int)(ptr - name);
 
         if ((terminator > 0 && *ptr++ != terminator) ||
             *ptr++ != CHAR_RIGHT_PARENTHESIS)
@@ -4868,8 +5145,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
             goto FAILED;
             }
           *code++ = n;
-          PUT(code, 0, ptr - cd->start_pattern + 1);  /* Pattern offset */
-          PUT(code, LINK_SIZE, 0);                    /* Default length */
+          PUT(code, 0, (int)(ptr - cd->start_pattern + 1)); /* Pattern offset */
+          PUT(code, LINK_SIZE, 0);                          /* Default length */
           code += 2 * LINK_SIZE;
           }
         previous = NULL;
@@ -4902,7 +5179,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
           name = ++ptr;
 
           while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
-          namelen = ptr - name;
+          namelen = (int)(ptr - name);
 
           /* In the pre-compile phase, just do a syntax check. */
 
@@ -5032,7 +5309,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         NAMED_REF_OR_RECURSE:
         name = ++ptr;
         while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
-        namelen = ptr - name;
+        namelen = (int)(ptr - name);
 
         /* In the pre-compile phase, do a syntax check and set a dummy
         reference number. */
@@ -5201,7 +5478,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
               of the group. */
 
               called = cd->start_code + recno;
-              PUTINC(cd->hwm, 0, code + 2 + LINK_SIZE - cd->start_code);
+              PUTINC(cd->hwm, 0, (int)(code + 2 + LINK_SIZE - cd->start_code));
               }
 
             /* If not a forward reference, and the subpattern is still open,
@@ -5225,7 +5502,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
           code += 1 + LINK_SIZE;
 
           *code = OP_RECURSE;
-          PUT(code, 1, called - cd->start_code);
+          PUT(code, 1, (int)(called - cd->start_code));
           code += 1 + LINK_SIZE;
 
           *code = OP_KET;
@@ -5336,8 +5613,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         }     /* End of switch for character following (? */
       }       /* End of (? handling */
 
-    /* Opening parenthesis not followed by '?'. If PCRE_NO_AUTO_CAPTURE is set,
-    all unadorned brackets become non-capturing and behave like (?:...)
+    /* Opening parenthesis not followed by '*' or '?'. If PCRE_NO_AUTO_CAPTURE
+    is set, all unadorned brackets become non-capturing and behave like (?:...)
     brackets. */
 
     else if ((options & PCRE_NO_AUTO_CAPTURE) != 0)
@@ -5529,11 +5806,12 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
     /* ===================================================================*/
     /* Handle metasequences introduced by \. For ones like \d, the ESC_ values
-    are arranged to be the negation of the corresponding OP_values. For the
-    back references, the values are ESC_REF plus the reference number. Only
-    back references and those types that consume a character may be repeated.
-    We can test for values between ESC_b and ESC_Z for the latter; this may
-    have to change if any new ones are ever created. */
+    are arranged to be the negation of the corresponding OP_values in the
+    default case when PCRE_UCP is not set. For the back references, the values
+    are ESC_REF plus the reference number. Only back references and those types
+    that consume a character may be repeated. We can test for values between
+    ESC_b and ESC_Z for the latter; this may have to change if any new ones are
+    ever created. */
 
     case CHAR_BACKSLASH:
     tempptr = ptr;
@@ -5693,12 +5971,24 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 #endif
 
       /* For the rest (including \X when Unicode properties are supported), we
-      can obtain the OP value by negating the escape value. */
+      can obtain the OP value by negating the escape value in the default
+      situation when PCRE_UCP is not set. When it *is* set, we substitute
+      Unicode property tests. */
 
       else
         {
-        previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
-        *code++ = -c;
+#ifdef SUPPORT_UCP
+        if (-c >= ESC_DU && -c <= ESC_wu)
+          {
+          nestptr = ptr + 1;                   /* Where to resume */
+          ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */
+          }
+        else
+#endif
+          {
+          previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
+          *code++ = -c;
+          }
         }
       continue;
       }
@@ -6030,7 +6320,7 @@ for (;;)
     {
     if (lengthptr == NULL)
       {
-      int branch_length = code - last_branch;
+      int branch_length = (int)(code - last_branch);
       do
         {
         int prev_length = GET(last_branch, 1);
@@ -6044,7 +6334,7 @@ for (;;)
     /* Fill in the ket */
 
     *code = OP_KET;
-    PUT(code, 1, code - start_bracket);
+    PUT(code, 1, (int)(code - start_bracket));
     code += 1 + LINK_SIZE;
 
     /* If it was a capturing subpattern, check to see if it contained any
@@ -6059,9 +6349,9 @@ for (;;)
           code - start_bracket);
         *start_bracket = OP_ONCE;
         code += 1 + LINK_SIZE;
-        PUT(start_bracket, 1, code - start_bracket);
+        PUT(start_bracket, 1, (int)(code - start_bracket));
         *code = OP_KET;
-        PUT(code, 1, code - start_bracket);
+        PUT(code, 1, (int)(code - start_bracket));
         code += 1 + LINK_SIZE;
         length += 2 + 2*LINK_SIZE;
         }
@@ -6116,7 +6406,7 @@ for (;;)
   else
     {
     *code = OP_ALT;
-    PUT(code, 1, code - last_branch);
+    PUT(code, 1, (int)(code - last_branch));
     bc.current_branch = last_branch = code;
     code += 1 + LINK_SIZE;
     }
@@ -6435,7 +6725,7 @@ int length = 1;  /* For final END opcode */
 int firstbyte, reqbyte, newline;
 int errorcode = 0;
 int skipatstart = 0;
-BOOL utf8 = (options & PCRE_UTF8) != 0;
+BOOL utf8;
 size_t size;
 uschar *code;
 const uschar *codestart;
@@ -6505,6 +6795,8 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
 
   if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)
     { skipatstart += 7; options |= PCRE_UTF8; continue; }
+  else if (strncmp((char *)(ptr+skipatstart+2), STRING_UCP_RIGHTPAR, 4) == 0)
+    { skipatstart += 6; options |= PCRE_UCP; continue; }
 
   if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)
     { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
@@ -6529,6 +6821,8 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
   else break;
   }
 
+utf8 = (options & PCRE_UTF8) != 0;
+
 /* Can't support UTF8 unless PCRE has been compiled to include the code. */
 
 #ifdef SUPPORT_UTF8
@@ -6546,6 +6840,16 @@ if (utf8)
   }
 #endif
 
+/* Can't support UCP unless PCRE has been compiled to include the code. */
+
+#ifndef SUPPORT_UCP
+if ((options & PCRE_UCP) != 0)
+  {
+  errorcode = ERR67;
+  goto PCRE_EARLY_ERROR_RETURN;
+  }
+#endif
+
 /* Check validity of \R options. */
 
 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
@@ -6674,7 +6978,7 @@ regex compiled on a system with 4-byte pointers is run on another with 8-byte
 pointers. */
 
 re->magic_number = MAGIC_NUMBER;
-re->size = size;
+re->size = (int)size;
 re->options = cd->external_options;
 re->flags = cd->external_flags;
 re->dummy1 = 0;
@@ -6745,7 +7049,7 @@ while (errorcode == 0 && cd->hwm > cworkspace)
   recno = GET(codestart, offset);
   groupptr = _pcre_find_bracket(codestart, utf8, recno);
   if (groupptr == NULL) errorcode = ERR53;
-    else PUT(((uschar *)codestart), offset, groupptr - codestart);
+    else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart));
   }
 
 /* Give an error if there's back reference to a non-existent capturing
@@ -6800,7 +7104,7 @@ if (errorcode != 0)
   {
   (pcre_free)(re);
   PCRE_EARLY_ERROR_RETURN:
-  *erroroffset = ptr - (const uschar *)pattern;
+  *erroroffset = (int)(ptr - (const uschar *)pattern);
   PCRE_EARLY_ERROR_RETURN2:
   *errorptr = find_error_text(errorcode);
   if (errorcodeptr != NULL) *errorcodeptr = errorcode;