summaryrefslogtreecommitdiff
path: root/src/kwset.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2014-04-25 18:31:47 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2014-04-26 20:58:49 -0700
commit7600fde5bcf50872a89d427fea4add1f590ada36 (patch)
tree536c0824aece439f6480a7f1c0f081873c54aeed /src/kwset.c
parentd3d99505cb704563cedf4643301d54ecc717ae57 (diff)
downloadgrep-7600fde5bcf50872a89d427fea4add1f590ada36.tar.gz
kwset: improve performance when large Boyer-Moore key doesn't match
* src/kwset.c (bmexec): As a heuristic, prefer memchr to seeking by delta1 only when the latter doesn't advance much.
Diffstat (limited to 'src/kwset.c')
-rw-r--r--src/kwset.c13
1 files changed, 10 insertions, 3 deletions
diff --git a/src/kwset.c b/src/kwset.c
index f86ee03f..406fa0e0 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -570,6 +570,7 @@ bmexec (kwset_t kwset, char const *text, size_t size)
/* 11 is not a bug, the initial offset happens only once. */
for (ep = text + size - 11 * len; tp <= ep; )
{
+ char const *tp0 = tp;
d = d1[U(tp[-1])], tp += d;
d = d1[U(tp[-1])], tp += d;
if (d != 0)
@@ -584,9 +585,14 @@ bmexec (kwset_t kwset, char const *text, size_t size)
d = d1[U(tp[-1])], tp += d;
if (d != 0)
{
- /* Typically memchr is faster than seeking by
- delta1 when there is no chance to match for
- a while. */
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+
+ /* As a heuristic, prefer memchr to seeking by
+ delta1 when the latter doesn't advance much. */
+ int advance_heuristic = 4 * sizeof (long);
+ if (advance_heuristic <= tp - tp0)
+ goto big_advance;
tp--;
tp = memchr_trans (tp, gc1, text + size - tp, trans);
if (! tp)
@@ -597,6 +603,7 @@ bmexec (kwset_t kwset, char const *text, size_t size)
}
if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
return tp - text;
+ big_advance:;
}
/* Now we have only a few characters left to search. We