summaryrefslogtreecommitdiff
path: root/src/searchutils.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2014-05-05 23:53:59 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2014-05-05 23:55:58 -0700
commit94555dd281cdcd7530bc2c4466f0bbfd8d47d5c0 (patch)
tree55aadf31afc07a2f09fcaf223207811391ae00eb /src/searchutils.c
parent00024ede52a0d9cb04ac6549cabffab485f5e8aa (diff)
downloadgrep-94555dd281cdcd7530bc2c4466f0bbfd8d47d5c0.tar.gz
grep: fix -w match next to a multibyte letter
* NEWS: Document this. * src/dfasearch.c, src/kwsearch.c (WCHAR): Remove. (wordchar): New static function. * src/dfasearch.c (EGexecute): * src/kwsearch.c (Fexecute): Use the new functions, so that the code works correctly if a multibyte character adjacent to the match has two or more bytes. * src/search.h, src/searchutils.c (mb_prev_wc, mb_next_wc): New functions. * tests/word-delim-multibyte: Add a test for grep -w (which now passes), and a test for \> (which still fails). The \< test also still fails.
Diffstat (limited to 'src/searchutils.c')
-rw-r--r--src/searchutils.c24
1 files changed, 24 insertions, 0 deletions
diff --git a/src/searchutils.c b/src/searchutils.c
index d92ede5a..5eb9a12f 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -263,3 +263,27 @@ mb_goback (char const **mb_start, char const *cur, char const *end)
*mb_start = p;
return p == cur ? 0 : cur - p0;
}
+
+/* In the buffer BUF, return the wide character that is encoded just
+ before CUR. The buffer ends at END. Return WEOF if there is no
+ wide character just before CUR. */
+wint_t
+mb_prev_wc (char const *buf, char const *cur, char const *end)
+{
+ if (cur == buf)
+ return WEOF;
+ char const *p = buf;
+ cur--;
+ cur -= mb_goback (&p, cur, end);
+ return mb_next_wc (cur, end);
+}
+
+/* Return the wide character that is encoded at CUR. The buffer ends
+ at END. Return WEOF if there is no wide character encoded at CUR. */
+wint_t
+mb_next_wc (char const *cur, char const *end)
+{
+ wchar_t wc;
+ mbstate_t mbs = { 0 };
+ return mbrtowc (&wc, cur, end - cur, &mbs) < (size_t) -2 ? wc : WEOF;
+}