1 files changed, 55 insertions, 8 deletions
diff --git a/src/regex.c b/src/regex.c
index 846c87041b1..ae80ad0cee8 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -2530,6 +2530,7 @@ regex_compile (pattern, size, syntax, bufp)
   bufp->syntax = syntax;
   bufp->fastmap_accurate = 0;
   bufp->not_bol = bufp->not_eol = 0;
+  bufp->used_syntax = 0;
 
   /* Set `used' to zero, so that if we return an error, the pattern
      printer (for debugging) will think there's no pattern.  We reset it
@@ -2942,6 +2943,14 @@ regex_compile (pattern, size, syntax, bufp)
 			      SET_LIST_BIT (translated);
 			  }
 
+			/* In most cases the matching rule for char classes
+			   only uses the syntax table for multibyte chars,
+			   so that the content of the syntax-table it is not
+			   hardcoded in the range_table.  SPACE and WORD are
+			   the two exceptions.  */
+			if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
+			  bufp->used_syntax = 1;
+
 			/* Repeat the loop. */
 			continue;
 		      }
@@ -3877,11 +3886,13 @@ analyse_first (p, pend, fastmap, multibyte)
 	  if (fastmap)
 	    {
 	      int c = RE_STRING_CHAR (p + 1, pend - p);
-
+	      /* When fast-scanning, the fastmap can be indexed either with
+		 a char (smaller than 256) or with the first byte of
+		 a char's byte sequence.  So we have to conservatively add
+		 both to the table.  */
 	      if (SINGLE_BYTE_CHAR_P (c))
 		fastmap[c] = 1;
-	      else
-		fastmap[p[1]] = 1;
+	      fastmap[p[1]] = 1;
 	    }
 	  break;
 
@@ -3899,6 +3910,10 @@ analyse_first (p, pend, fastmap, multibyte)
 	     So any that are not listed in the charset
 	     are possible matches, even in multibyte buffers.  */
 	  if (!fastmap) break;
+	  /* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially
+	     because it will automatically be set when needed by virtue of
+	     being larger than the highest char of its charset (0xbf) but
+	     smaller than (1<<BYTEWIDTH).  */
 	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
 	       j < (1 << BYTEWIDTH); j++)
 	    fastmap[j] = 1;
@@ -3909,7 +3924,13 @@ analyse_first (p, pend, fastmap, multibyte)
 	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
 	       j >= 0; j--)
 	    if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
-	      fastmap[j] = 1;
+	      {
+		fastmap[j] = 1;
+#ifdef emacs
+		if (j >= 0x80 && j < 0xa0)
+		  fastmap[LEADING_CODE_8_BIT_CONTROL] = 1;
+#endif
+	      }
 
 	  if ((not && multibyte)
 	      /* Any character set can possibly contain a character
@@ -4352,11 +4373,33 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
 		    }
 		}
 	      else
-		while (range > lim && !fastmap[*d])
+		do
 		  {
-		    d++;
-		    range--;
-		  }
+		    re_char *d_start = d;
+		    while (range > lim && !fastmap[*d])
+		      {
+			d++;
+			range--;
+		      }
+#ifdef emacs
+		    if (multibyte && range > lim)
+		      {
+			/* Check that we are at the beginning of a char.  */
+			int at_boundary;
+			AT_CHAR_BOUNDARY_P (at_boundary, d, d_start);
+			if (at_boundary)
+			  break;
+			else
+			  { /* We have matched an internal byte of a char
+			       rather than the leading byte, so it's a false
+			       positive: we should keep scanning.  */
+			    d++; range--;
+			  }
+		      }
+		    else
+#endif
+		      break;
+		  } while (1);
 
 	      startpos += irange - range;
 	    }
@@ -6197,6 +6240,10 @@ re_compile_pattern (pattern, length, bufp)
 {
   reg_errcode_t ret;
 
+#ifdef emacs
+  gl_state.current_syntax_table = current_buffer->syntax_table;
+#endif
+
   /* GNU code is written to assume at least RE_NREGS registers will be set
      (and at least one extra will be -1).  */
   bufp->regs_allocated = REGS_UNALLOCATED;