summaryrefslogtreecommitdiff
path: root/src/searchutils.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2020-09-09 12:43:11 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2020-09-09 12:44:37 -0700
commitde6f36d9b6d702b14ac4ee58dfbcab740c7ca749 (patch)
treebcf2ca0f5fd0efa7b31b93874fc6911b39768fce /src/searchutils.c
parent1021a92aa915ac500b2be267dde6acf342b86038 (diff)
downloadgrep-de6f36d9b6d702b14ac4ee58dfbcab740c7ca749.tar.gz
grep: fix -w bug in UTF-8 locales
Problem reported by Mayo Fark (Bug#43225). * src/searchutils.c (wordchar_prev): In a UTF-8 locale, do not assume that an encoding-error byte cannot be part of a word constituent, as this assumption is incorrect for the last byte of a multibyte word constituent. * tests/word-delim-multibyte: Add a test for the bug.
Diffstat (limited to 'src/searchutils.c')
-rw-r--r--src/searchutils.c2
1 files changed, 1 insertions, 1 deletions
diff --git a/src/searchutils.c b/src/searchutils.c
index 84c319c7..c4bb8020 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -195,7 +195,7 @@ wordchar_prev (char const *buf, char const *cur, char const *end)
return 0;
unsigned char b = *--cur;
if (! localeinfo.multibyte
- || (localeinfo.using_utf8 && localeinfo.sbclen[b] != -2))
+ || (localeinfo.using_utf8 && localeinfo.sbclen[b] == 1))
return sbwordchar[b];
char const *p = buf;
cur -= mb_goback (&p, NULL, cur, end);