diff options
author | Norihiro Tanaka <noritnk@kcn.ne.jp> | 2016-08-31 23:44:08 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2016-09-01 08:52:47 -0700 |
commit | 960ad317db21e781b04010f4128bb149273a3327 (patch) | |
tree | 20d0faaef5877a432f12ef4843e8f851cc04c7b5 /src/searchutils.c | |
parent | dc9d5bf33857b6c84f233e7c0d3192993b287928 (diff) | |
download | grep-960ad317db21e781b04010f4128bb149273a3327.tar.gz |
grep: speed up -iF in multibyte locales
In a multibyte locale, if a pattern is composed of only single byte
characters and their all counterparts are also single byte characters
and the pattern does not have invalid sequences, grep -iF uses the
fgrep matcher, the same as in a single byte locale (Bug#23752).
* NEWS: Mention it.
* src/grep.c (lonesome_lower): New constant.
(fgrep_icase_available): New function.
(fgrep_to_grep_pattern): Simplify it.
(main): Use them.
* src/searchutils.c (kwsinit): New arg MB_TRANS; all uses changed.
Try fgrep matcher for case insensitive matching by grep -F in multibyte
locale.
Diffstat (limited to 'src/searchutils.c')
-rw-r--r-- | src/searchutils.c | 26 |
1 files changed, 22 insertions, 4 deletions
diff --git a/src/searchutils.c b/src/searchutils.c index 8081d418..87f51a4a 100644 --- a/src/searchutils.c +++ b/src/searchutils.c @@ -25,15 +25,33 @@ #define NCHAR (UCHAR_MAX + 1) void -kwsinit (kwset_t *kwset) +kwsinit (kwset_t *kwset, bool mb_trans) { static char trans[NCHAR]; int i; - if (match_icase && MB_CUR_MAX == 1) + if (match_icase && (MB_CUR_MAX == 1 || mb_trans)) { - for (i = 0; i < NCHAR; ++i) - trans[i] = toupper (i); + if (MB_CUR_MAX == 1) + for (i = 0; i < NCHAR; ++i) + trans[i] = toupper (i); + else + for (i = 0; i < NCHAR; ++i) + { + wint_t wc = localeinfo.sbctowc[i]; + wint_t uwc = towupper (wc); + if (uwc != wc) + { + char s[MB_LEN_MAX]; + mbstate_t mbs = { 0 }; + size_t len = wcrtomb (s, uwc, &mbs); + if (len > 1) + abort (); + trans[i] = s[0]; + } + else + trans[i] = i; + } *kwset = kwsalloc (trans, false); } |