diff options
author | Paolo Bonzini <bonzini@gnu.org> | 2010-03-29 08:54:30 +0200 |
---|---|---|
committer | Paolo Bonzini <bonzini@gnu.org> | 2010-03-29 10:23:22 +0200 |
commit | 6d952beebdac1d8f31ba20abb12a756a9877e50d (patch) | |
tree | 37535a05da2c18d2fbdec8caa04594d1d3408d17 | |
parent | c2b9a4fed35d198b8af8f38af877b8d1d52ec6f1 (diff) | |
download | grep-6d952beebdac1d8f31ba20abb12a756a9877e50d.tar.gz |
grep -F: fix a bug with SJIS character sets
Commit db9d6 would erroneously skip matches in SJIS character sets. In
this character set low bytes (i.e. ASCII bytes) are also valid second
bytes in a double-byte character, so you have to continue looking for
a match, even if you match in the middle of a double-byte character.
* src/kwsearch.c: Ensure that beg is advanced by at least one byte,
but do not fail immediately after matching in the middle of a double-byte
character.
-rw-r--r-- | src/kwsearch.c | 16 |
1 files changed, 11 insertions, 5 deletions
diff --git a/src/kwsearch.c b/src/kwsearch.c index a20c3a86..973eb60d 100644 --- a/src/kwsearch.c +++ b/src/kwsearch.c @@ -105,14 +105,20 @@ Fexecute (char const *buf, size_t size, size_t *match_size, goto failure; len = kwsmatch.size[0]; #ifdef MBS_SUPPORT - char const *s0 = mb_start; if (MB_CUR_MAX > 1 && is_mb_middle (&mb_start, beg + offset, buf + size, len)) { - if (mb_start == s0) - goto failure; - beg = mb_start - 1; - continue; /* It is a part of multibyte character. */ + /* The match was a part of multibyte character, advance at least + one byte to ensure no infinite loop happens. */ + mbstate_t s; + memset (&s, 0, sizeof s); + size_t mb_len = mbrlen (mb_start, (buf + size) - (beg + offset), &s); + if (mb_len == (size_t) -2) + goto failure; + beg = mb_start; + if (mb_len != (size_t) -1) + beg += mb_len - 1; + continue; } #endif /* MBS_SUPPORT */ beg += offset; |