diff options
author | Norihiro Tanaka <noritnk@kcn.ne.jp> | 2014-05-09 15:26:38 +0900 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2014-05-09 14:49:30 -0700 |
commit | 523e085b6a0e6b8d19e912011dd62b15c05a14d7 (patch) | |
tree | c70462632786a371c4ffac1a1075a1628bd9c6f3 /src/dfasearch.c | |
parent | a0951492eb630fb26e33f02f440a24305db33b51 (diff) | |
download | grep-523e085b6a0e6b8d19e912011dd62b15c05a14d7.tar.gz |
grep: retry DFA superset after matching multiple lines
* src/dfasearch.c (EGexecute): Without this patch, the code reverts
to KWset when the DFA superset matches multiple lines.
However, if the DFA superset matches multiple lines, it most likely
also matches a single line, and reverting to KWset means dfafast
won't work effectively. Change the code so that it retries the DFA
superset immediately after it matches multipline lines. On my platform
this improves the performance of "LC_ALL=C grep '\(ab\)cd\1d' k" from
3.48 to 2.14 seconds realtime, where k contains the output of
"yes abcdabc | head -50000000".
Diffstat (limited to 'src/dfasearch.c')
-rw-r--r-- | src/dfasearch.c | 32 |
1 files changed, 19 insertions, 13 deletions
diff --git a/src/dfasearch.c b/src/dfasearch.c index 42026660..9fb74496 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -284,26 +284,32 @@ EGexecute (char const *buf, size_t size, size_t *match_size, /* Try matching with the superset of DFA, if it's defined. */ if (superset && !exact_kwset_match) { - next_beg = dfaexec (superset, dfa_beg, (char *) end, 1, - &count, NULL); - /* If there's no match, or if we've matched the sentinel, - we're done. */ - if (next_beg == NULL || next_beg == end) - continue; - - /* Narrow down to the line we've found. */ - if (count != 0) + while (true) { + next_beg = dfaexec (superset, dfa_beg, (char *) end, 1, + &count, NULL); + /* If there's no match, or if we've matched the sentinel, + we're done. */ + if (next_beg == NULL || next_beg == end) + break; + + if (count == 0) + break; + count = 0; + /* If dfaexec may match in multiple lines, try to match in one line. */ - end = memrchr (buf, eol, next_beg - buf); - end++; - continue; + beg = memrchr (buf, eol, next_beg - buf); + beg = beg ? beg + 1 : buf; + dfa_beg = beg; } + if (next_beg == NULL || next_beg == end) + continue; + + /* Narrow down to the line we've found. */ end = memchr (next_beg, eol, buflim - next_beg); end = end ? end + 1 : buflim; } - /* Try matching with DFA. */ next_beg = dfaexec (dfa, dfa_beg, (char *) end, 0, &count, &backref); |