From fb7d53887851476c84f38ecc9a63901d5d620806 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Wed, 30 Apr 2014 11:22:27 +0900 Subject: grep: make KWset and DFA agree about invalid sequences in patterns See: http://bugs.gnu.org/17376 * src/dfa.c (dfambcache): Don't cache invalid sequences, because they can't be represented by wide characters. (dfambcache, mbs_to_wchar): Return WEOF for invalid sequences. (ctok): New global variable. (parse_bracket_exp, atom, match_anychar, match_mb_charset): Don't allow WEOF. (lex): Set 'ctok'. * src/kwsearch.c (Fexecute): * src/searchutils.c (is_mb_middle): Don't check here. * tests/invalid-multibyte-infloop: Adjust to fixed behavior. * tests/prefix-of-multibyte: Add test cases for this bug. --- src/searchutils.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'src/searchutils.c') diff --git a/src/searchutils.c b/src/searchutils.c index 6440f073..3c78f31c 100644 --- a/src/searchutils.c +++ b/src/searchutils.c @@ -228,7 +228,6 @@ is_mb_middle (const char **good, const char *buf, const char *end, size_t match_len) { const char *p = *good; - const char *prev = p; mbstate_t cur_state; if (using_utf8 () && buf - p > MB_CUR_MAX) @@ -250,10 +249,6 @@ is_mb_middle (const char **good, const char *buf, const char *end, if (mbclen == (size_t) -2) mbclen = mbrlen (p, end - p, &cur_state); - /* Store the beginning of the previous complete multibyte character. */ - if (mbclen != (size_t) -2) - prev = p; - if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) { /* An invalid sequence, or a truncated multibyte character. @@ -264,11 +259,11 @@ is_mb_middle (const char **good, const char *buf, const char *end, p += mbclen; } - *good = prev; + *good = p; if (p > buf) return true; /* P == BUF here. */ - return 0 < match_len && match_len < mbrlen (p, end - p, &cur_state); + return false; } -- cgit v1.2.1