summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNorihiro Tanaka <noritnk@kcn.ne.jp>2014-12-07 20:16:41 +0900
committerJim Meyering <meyering@fb.com>2015-07-25 09:17:11 -0700
commitbfa9df03034ccfb65da9950cf1e1207faef1213c (patch)
treee33eab9b28356849f85c5da7172400cb433f2a23
parentea0ebaaa6106ad38afa3cf858a1b54ec675afb05 (diff)
downloadgrep-bfa9df03034ccfb65da9950cf1e1207faef1213c.tar.gz
dfa: remove word delimiter support for multibyte locales
DFA supports word delimiter expressions, but it does not behave correctly for multibyte locales. Even if it were to be fixed, the DFA matcher's performance would be no better than that of regex. Thus, this change removes DFA support for word delimiter expressions in multibyte locales. * src/dfa.c (dfa_supported): Return false also when a pattern uses any word delimiter expression in a multibyte locale.
-rw-r--r--src/dfa.c9
1 files changed, 9 insertions, 0 deletions
diff --git a/src/dfa.c b/src/dfa.c
index a28404be..d1e76e14 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3358,6 +3358,7 @@ skip_remains_mb (struct dfa *d, unsigned char const *p,
Here is the list of features that make this DFA matcher punt:
- [M-N]-range-in-MB-locale: regex is up to 25% faster on [a-z]
- back-reference: (.)\1
+ - word-delimiter-in-MB-locale: \<, \>, \b
*/
static inline char *
dfaexec_main (struct dfa *d, char const *begin, char *end, int allow_nl,
@@ -3645,6 +3646,14 @@ dfa_supported (struct dfa const *d)
{
switch (d->tokens[i])
{
+ case BEGWORD:
+ case ENDWORD:
+ case LIMWORD:
+ case NOTLIMWORD:
+ if (!d->multibyte)
+ continue;
+ /* fallthrough */
+
case BACKREF:
case MBCSET:
return false;