diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2014-04-25 18:31:47 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2014-04-26 20:58:49 -0700 |
commit | 7600fde5bcf50872a89d427fea4add1f590ada36 (patch) | |
tree | 536c0824aece439f6480a7f1c0f081873c54aeed /src/kwset.c | |
parent | d3d99505cb704563cedf4643301d54ecc717ae57 (diff) | |
download | grep-7600fde5bcf50872a89d427fea4add1f590ada36.tar.gz |
kwset: improve performance when large Boyer-Moore key doesn't match
* src/kwset.c (bmexec): As a heuristic, prefer memchr to seeking
by delta1 only when the latter doesn't advance much.
Diffstat (limited to 'src/kwset.c')
-rw-r--r-- | src/kwset.c | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/src/kwset.c b/src/kwset.c index f86ee03f..406fa0e0 100644 --- a/src/kwset.c +++ b/src/kwset.c @@ -570,6 +570,7 @@ bmexec (kwset_t kwset, char const *text, size_t size) /* 11 is not a bug, the initial offset happens only once. */ for (ep = text + size - 11 * len; tp <= ep; ) { + char const *tp0 = tp; d = d1[U(tp[-1])], tp += d; d = d1[U(tp[-1])], tp += d; if (d != 0) @@ -584,9 +585,14 @@ bmexec (kwset_t kwset, char const *text, size_t size) d = d1[U(tp[-1])], tp += d; if (d != 0) { - /* Typically memchr is faster than seeking by - delta1 when there is no chance to match for - a while. */ + d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])], tp += d; + + /* As a heuristic, prefer memchr to seeking by + delta1 when the latter doesn't advance much. */ + int advance_heuristic = 4 * sizeof (long); + if (advance_heuristic <= tp - tp0) + goto big_advance; tp--; tp = memchr_trans (tp, gc1, text + size - tp, trans); if (! tp) @@ -597,6 +603,7 @@ bmexec (kwset_t kwset, char const *text, size_t size) } if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset)) return tp - text; + big_advance:; } /* Now we have only a few characters left to search. We |