diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2014-05-05 23:53:59 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2014-05-05 23:55:58 -0700 |
commit | 94555dd281cdcd7530bc2c4466f0bbfd8d47d5c0 (patch) | |
tree | 55aadf31afc07a2f09fcaf223207811391ae00eb /src/searchutils.c | |
parent | 00024ede52a0d9cb04ac6549cabffab485f5e8aa (diff) | |
download | grep-94555dd281cdcd7530bc2c4466f0bbfd8d47d5c0.tar.gz |
grep: fix -w match next to a multibyte letter
* NEWS: Document this.
* src/dfasearch.c, src/kwsearch.c (WCHAR): Remove.
(wordchar): New static function.
* src/dfasearch.c (EGexecute):
* src/kwsearch.c (Fexecute): Use the new functions, so that the
code works correctly if a multibyte character adjacent to the
match has two or more bytes.
* src/search.h, src/searchutils.c (mb_prev_wc, mb_next_wc):
New functions.
* tests/word-delim-multibyte: Add a test for grep -w (which now
passes), and a test for \> (which still fails). The \< test also
still fails.
Diffstat (limited to 'src/searchutils.c')
-rw-r--r-- | src/searchutils.c | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/src/searchutils.c b/src/searchutils.c index d92ede5a..5eb9a12f 100644 --- a/src/searchutils.c +++ b/src/searchutils.c @@ -263,3 +263,27 @@ mb_goback (char const **mb_start, char const *cur, char const *end) *mb_start = p; return p == cur ? 0 : cur - p0; } + +/* In the buffer BUF, return the wide character that is encoded just + before CUR. The buffer ends at END. Return WEOF if there is no + wide character just before CUR. */ +wint_t +mb_prev_wc (char const *buf, char const *cur, char const *end) +{ + if (cur == buf) + return WEOF; + char const *p = buf; + cur--; + cur -= mb_goback (&p, cur, end); + return mb_next_wc (cur, end); +} + +/* Return the wide character that is encoded at CUR. The buffer ends + at END. Return WEOF if there is no wide character encoded at CUR. */ +wint_t +mb_next_wc (char const *cur, char const *end) +{ + wchar_t wc; + mbstate_t mbs = { 0 }; + return mbrtowc (&wc, cur, end - cur, &mbs) < (size_t) -2 ? wc : WEOF; +} |