summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorStefan Monnier <monnier@iro.umontreal.ca>2006-09-22 17:30:13 +0000
committerStefan Monnier <monnier@iro.umontreal.ca>2006-09-22 17:30:13 +0000
commit4560a582d623dbf040f4176bdebb8107c12c2bb8 (patch)
tree4e9eed296a37bf4d9f85a6a8c96dddd3ff9469ed /src
parent3ffcda547185fe2950f0ffe108604a1a13dd7b8b (diff)
downloademacs-4560a582d623dbf040f4176bdebb8107c12c2bb8.tar.gz
(analyse_first): For eight-bit-control chars, mark both the
char's value and its leading byte in the fastmap. (re_search_2): When fast-scanning without translation, be careful to check that we only match the leading byte of a multibyte char.
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog13
-rw-r--r--src/regex.c50
2 files changed, 54 insertions, 9 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index b4ddda11f88..cec46b21557 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,14 @@
+2006-09-22 Stefan Monnier <monnier@iro.umontreal.ca>
+
+ * regex.c (analyse_first): For eight-bit-control chars, mark both the
+ char's value and its leading byte in the fastmap.
+ (re_search_2): When fast-scanning without translation, be careful to
+ check that we only match the leading byte of a multibyte char.
+
+ * charset.h (PREV_CHAR_BOUNDARY): Make it work from within a char's
+ byte sequence.
+ (AT_CHAR_BOUNDARY): New macro.
+
2006-09-22 Kenichi Handa <handa@m17n.org>
* fns.c (optimize_sub_char_table): Don't optimize a sub-char-table
@@ -271,7 +282,7 @@
2006-08-27 Martin Rudalics <rudalics@gmx.at>
- * xdisp.c (mouse_autoselect_window): Removed.
+ * xdisp.c (mouse_autoselect_window): Remove.
(Vmouse_autoselect_window): New variable. DEFVAR_LISP it.
* dispextern.h (mouse_autoselect_window): Remove extern.
diff --git a/src/regex.c b/src/regex.c
index 763b490c906..66e363e731c 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -3877,11 +3877,13 @@ analyse_first (p, pend, fastmap, multibyte)
if (fastmap)
{
int c = RE_STRING_CHAR (p + 1, pend - p);
-
+ /* When fast-scanning, the fastmap can be indexed either with
+ a char (smaller than 256) or with the first byte of
+ a char's byte sequence. So we have to conservatively add
+ both to the table. */
if (SINGLE_BYTE_CHAR_P (c))
fastmap[c] = 1;
- else
- fastmap[p[1]] = 1;
+ fastmap[p[1]] = 1;
}
break;
@@ -3899,6 +3901,10 @@ analyse_first (p, pend, fastmap, multibyte)
So any that are not listed in the charset
are possible matches, even in multibyte buffers. */
if (!fastmap) break;
+ /* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially
+ because it will automatically be set when needed by virtue of
+ being larger than the highest char of its charset (0xbf) but
+ smaller than (1<<BYTEWIDTH). */
for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
j < (1 << BYTEWIDTH); j++)
fastmap[j] = 1;
@@ -3909,7 +3915,13 @@ analyse_first (p, pend, fastmap, multibyte)
for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
j >= 0; j--)
if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
- fastmap[j] = 1;
+ {
+ fastmap[j] = 1;
+#ifdef emacs
+ if (j >= 0x80 && j < 0xa0)
+ fastmap[LEADING_CODE_8_BIT_CONTROL] = 1;
+#endif
+ }
if ((not && multibyte)
/* Any character set can possibly contain a character
@@ -4352,11 +4364,33 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
}
}
else
- while (range > lim && !fastmap[*d])
+ do
{
- d++;
- range--;
- }
+ re_char *d_start = d;
+ while (range > lim && !fastmap[*d])
+ {
+ d++;
+ range--;
+ }
+#ifdef emacs
+ if (multibyte && range > lim)
+ {
+ /* Check that we are at the beginning of a char. */
+ int at_boundary;
+ AT_CHAR_BOUNDARY_P (at_boundary, d, d_start);
+ if (at_boundary)
+ break;
+ else
+ { /* We have matched an internal byte of a char
+ rather than the leading byte, so it's a false
+ positive: we should keep scanning. */
+ d++; range--;
+ }
+ }
+ else
+#endif
+ break;
+ } while (1);
startpos += irange - range;
}