summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2016-12-23 12:43:46 -0800
committerPaul Eggert <eggert@cs.ucla.edu>2016-12-23 17:22:54 -0800
commit192d61e2828e13c4a2f1a81cd128721a229c88f9 (patch)
tree835bafdaac9d9986e9fcac255d54c828763e1164
parent4fa1971d98c79b56b466eff57117351dc395ee2a (diff)
downloadgrep-192d61e2828e13c4a2f1a81cd128721a229c88f9.tar.gz
grep: speed up -wf in C locale
Problem reported by Norihiro Tanaka (Bug#22357#100). This patch improves the performance on that benchmark on my platform so that grep is now only about 2x slower than grep 2.26, which means it is considerably faster than grep 2.25 and earlier. * src/kwsearch.c (Fexecute): Use wordchars_size to boost performance for this case. * src/search.h, src/searchutils.c (wordchars_size): New function.
-rw-r--r--src/kwsearch.c6
-rw-r--r--src/search.h1
-rw-r--r--src/searchutils.c9
3 files changed, 16 insertions, 0 deletions
diff --git a/src/kwsearch.c b/src/kwsearch.c
index b30dfd06..6005b600 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -150,6 +150,12 @@ Fexecute (char const *buf, size_t size, size_t *match_size,
break;
len = kwsmatch.size[0];
}
+
+ /* No word match was found at BEG. Skip past word constituents,
+ since they cannot precede the next match and not skipping
+ them could make things much slower. */
+ beg += wordchars_size (beg, buf + size);
+ mb_start = beg;
} /* for (beg in buf) */
return -1;
diff --git a/src/search.h b/src/search.h
index 6fe17975..1def4d6b 100644
--- a/src/search.h
+++ b/src/search.h
@@ -48,6 +48,7 @@ typedef signed char mb_len_map_t;
/* searchutils.c */
extern void wordinit (void);
extern kwset_t kwsinit (bool);
+extern size_t wordchars_size (char const *, char const *);
extern size_t wordchar_next (char const *, char const *);
extern bool wordchar_prev (char const *, char const *, char const *);
extern ptrdiff_t mb_goback (char const **, char const *, char const *);
diff --git a/src/searchutils.c b/src/searchutils.c
index e0a1db33..6f6ae0b7 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -146,6 +146,15 @@ wordchars_count (char const *buf, char const *end, bool countall)
return n;
}
+/* Examine the start of BUF for the longest prefix containing just
+ word constituents. Return the total number of bytes in the prefix.
+ The buffer ends at END. */
+size_t
+wordchars_size (char const *buf, char const *end)
+{
+ return wordchars_count (buf, end, true);
+}
+
/* If BUF starts with a word constituent, return the number of bytes
used to represent it; otherwise, return zero. The buffer ends at END. */
size_t