summaryrefslogtreecommitdiff
path: root/src/searchutils.c
diff options
context:
space:
mode:
authorNorihiro Tanaka <noritnk@kcn.ne.jp>2016-08-31 23:44:08 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2016-09-01 08:52:47 -0700
commit960ad317db21e781b04010f4128bb149273a3327 (patch)
tree20d0faaef5877a432f12ef4843e8f851cc04c7b5 /src/searchutils.c
parentdc9d5bf33857b6c84f233e7c0d3192993b287928 (diff)
downloadgrep-960ad317db21e781b04010f4128bb149273a3327.tar.gz
grep: speed up -iF in multibyte locales
In a multibyte locale, if a pattern is composed of only single byte characters and their all counterparts are also single byte characters and the pattern does not have invalid sequences, grep -iF uses the fgrep matcher, the same as in a single byte locale (Bug#23752). * NEWS: Mention it. * src/grep.c (lonesome_lower): New constant. (fgrep_icase_available): New function. (fgrep_to_grep_pattern): Simplify it. (main): Use them. * src/searchutils.c (kwsinit): New arg MB_TRANS; all uses changed. Try fgrep matcher for case insensitive matching by grep -F in multibyte locale.
Diffstat (limited to 'src/searchutils.c')
-rw-r--r--src/searchutils.c26
1 files changed, 22 insertions, 4 deletions
diff --git a/src/searchutils.c b/src/searchutils.c
index 8081d418..87f51a4a 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -25,15 +25,33 @@
#define NCHAR (UCHAR_MAX + 1)
void
-kwsinit (kwset_t *kwset)
+kwsinit (kwset_t *kwset, bool mb_trans)
{
static char trans[NCHAR];
int i;
- if (match_icase && MB_CUR_MAX == 1)
+ if (match_icase && (MB_CUR_MAX == 1 || mb_trans))
{
- for (i = 0; i < NCHAR; ++i)
- trans[i] = toupper (i);
+ if (MB_CUR_MAX == 1)
+ for (i = 0; i < NCHAR; ++i)
+ trans[i] = toupper (i);
+ else
+ for (i = 0; i < NCHAR; ++i)
+ {
+ wint_t wc = localeinfo.sbctowc[i];
+ wint_t uwc = towupper (wc);
+ if (uwc != wc)
+ {
+ char s[MB_LEN_MAX];
+ mbstate_t mbs = { 0 };
+ size_t len = wcrtomb (s, uwc, &mbs);
+ if (len > 1)
+ abort ();
+ trans[i] = s[0];
+ }
+ else
+ trans[i] = i;
+ }
*kwset = kwsalloc (trans, false);
}