diff options
author | Mattias EngdegÄrd <mattiase@acm.org> | 2019-06-28 10:20:55 +0200 |
---|---|---|
committer | Mattias EngdegÄrd <mattiase@acm.org> | 2019-06-28 17:30:18 +0200 |
commit | a1f76adfb03c23bb4242928e8efe6193c301f0c1 (patch) | |
tree | 7e2a5c58656ffbe78d34dc58639d7cd5bf8f943a /src/regex-emacs.c | |
parent | aae5bf4438712c9fe761c5e4b5a871192852cd97 (diff) | |
download | emacs-a1f76adfb03c23bb4242928e8efe6193c301f0c1.tar.gz |
Correct regexp matching of raw bytes
Make regexp matching of raw bytes work in all combination of unibyte
and multibyte patterns and targets, as exact strings and in character
alternatives (bug#3687).
* src/regex-emacs.c (analyze_first):
Include raw byte in fastmap when pattern is a multibyte exact string.
Include leading byte in fastmap for raw bytes in character alternatives.
(re_match_2_internal):
Decrement the byte count by the number of bytes in the pattern character,
not 1.
* test/src/regex-emacs-tests.el (regexp-unibyte-unibyte)
(regexp-multibyte-unibyte, regexp-unibyte-mutibyte)
(regexp-multibyte-multibyte): New tests.
Diffstat (limited to 'src/regex-emacs.c')
-rw-r--r-- | src/regex-emacs.c | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c index c353a78fb4f..5887eaa30c7 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c @@ -2794,6 +2794,7 @@ static int analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte) { int j, k; + int nbits; bool not; /* If all elements for base leading-codes in fastmap is set, this @@ -2854,7 +2855,14 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte) each byte is a character. Thus, this works in both cases. */ fastmap[p[1]] = 1; - if (! multibyte) + if (multibyte) + { + /* Cover the case of matching a raw char in a + multibyte regexp against unibyte. */ + if (CHAR_BYTE8_HEAD_P (p[1])) + fastmap[CHAR_TO_BYTE8 (STRING_CHAR (p + 1))] = 1; + } + else { /* For the case of matching this unibyte regex against multibyte, we must set a leading code of @@ -2886,11 +2894,18 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte) case charset: if (!fastmap) break; not = (re_opcode_t) *(p - 1) == charset_not; - for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++; - j >= 0; j--) + nbits = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; + p++; + for (j = 0; j < nbits; j++) if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) fastmap[j] = 1; + /* To match raw bytes (in the 80..ff range) against multibyte + strings, add their leading bytes to the fastmap. */ + for (j = 0x80; j < nbits; j++) + if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) + fastmap[CHAR_LEADING_CODE (BYTE8_TO_CHAR (j))] = 1; + if (/* Any leading code can possibly start a character which doesn't match the specified set of characters. */ not @@ -4251,8 +4266,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, } p += pat_charlen; d++; + mcnt -= pat_charlen; } - while (--mcnt); + while (mcnt > 0); break; |