summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <bonzini@gnu.org>2010-03-29 08:54:30 +0200
committerPaolo Bonzini <bonzini@gnu.org>2010-03-29 10:23:22 +0200
commit6d952beebdac1d8f31ba20abb12a756a9877e50d (patch)
tree37535a05da2c18d2fbdec8caa04594d1d3408d17
parentc2b9a4fed35d198b8af8f38af877b8d1d52ec6f1 (diff)
downloadgrep-6d952beebdac1d8f31ba20abb12a756a9877e50d.tar.gz
grep -F: fix a bug with SJIS character sets
Commit db9d6 would erroneously skip matches in SJIS character sets. In this character set low bytes (i.e. ASCII bytes) are also valid second bytes in a double-byte character, so you have to continue looking for a match, even if you match in the middle of a double-byte character. * src/kwsearch.c: Ensure that beg is advanced by at least one byte, but do not fail immediately after matching in the middle of a double-byte character.
-rw-r--r--src/kwsearch.c16
1 files changed, 11 insertions, 5 deletions
diff --git a/src/kwsearch.c b/src/kwsearch.c
index a20c3a86..973eb60d 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -105,14 +105,20 @@ Fexecute (char const *buf, size_t size, size_t *match_size,
goto failure;
len = kwsmatch.size[0];
#ifdef MBS_SUPPORT
- char const *s0 = mb_start;
if (MB_CUR_MAX > 1 && is_mb_middle (&mb_start, beg + offset, buf + size,
len))
{
- if (mb_start == s0)
- goto failure;
- beg = mb_start - 1;
- continue; /* It is a part of multibyte character. */
+ /* The match was a part of multibyte character, advance at least
+ one byte to ensure no infinite loop happens. */
+ mbstate_t s;
+ memset (&s, 0, sizeof s);
+ size_t mb_len = mbrlen (mb_start, (buf + size) - (beg + offset), &s);
+ if (mb_len == (size_t) -2)
+ goto failure;
+ beg = mb_start;
+ if (mb_len != (size_t) -1)
+ beg += mb_len - 1;
+ continue;
}
#endif /* MBS_SUPPORT */
beg += offset;