summaryrefslogtreecommitdiff
path: root/src/dfasearch.c
diff options
context:
space:
mode:
authorNorihiro Tanaka <noritnk@kcn.ne.jp>2014-05-09 15:26:38 +0900
committerPaul Eggert <eggert@cs.ucla.edu>2014-05-09 14:49:30 -0700
commit523e085b6a0e6b8d19e912011dd62b15c05a14d7 (patch)
treec70462632786a371c4ffac1a1075a1628bd9c6f3 /src/dfasearch.c
parenta0951492eb630fb26e33f02f440a24305db33b51 (diff)
downloadgrep-523e085b6a0e6b8d19e912011dd62b15c05a14d7.tar.gz
grep: retry DFA superset after matching multiple lines
* src/dfasearch.c (EGexecute): Without this patch, the code reverts to KWset when the DFA superset matches multiple lines. However, if the DFA superset matches multiple lines, it most likely also matches a single line, and reverting to KWset means dfafast won't work effectively. Change the code so that it retries the DFA superset immediately after it matches multipline lines. On my platform this improves the performance of "LC_ALL=C grep '\(ab\)cd\1d' k" from 3.48 to 2.14 seconds realtime, where k contains the output of "yes abcdabc | head -50000000".
Diffstat (limited to 'src/dfasearch.c')
-rw-r--r--src/dfasearch.c32
1 files changed, 19 insertions, 13 deletions
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 42026660..9fb74496 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -284,26 +284,32 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
/* Try matching with the superset of DFA, if it's defined. */
if (superset && !exact_kwset_match)
{
- next_beg = dfaexec (superset, dfa_beg, (char *) end, 1,
- &count, NULL);
- /* If there's no match, or if we've matched the sentinel,
- we're done. */
- if (next_beg == NULL || next_beg == end)
- continue;
-
- /* Narrow down to the line we've found. */
- if (count != 0)
+ while (true)
{
+ next_beg = dfaexec (superset, dfa_beg, (char *) end, 1,
+ &count, NULL);
+ /* If there's no match, or if we've matched the sentinel,
+ we're done. */
+ if (next_beg == NULL || next_beg == end)
+ break;
+
+ if (count == 0)
+ break;
+ count = 0;
+
/* If dfaexec may match in multiple lines, try to
match in one line. */
- end = memrchr (buf, eol, next_beg - buf);
- end++;
- continue;
+ beg = memrchr (buf, eol, next_beg - buf);
+ beg = beg ? beg + 1 : buf;
+ dfa_beg = beg;
}
+ if (next_beg == NULL || next_beg == end)
+ continue;
+
+ /* Narrow down to the line we've found. */
end = memchr (next_beg, eol, buflim - next_beg);
end = end ? end + 1 : buflim;
}
-
/* Try matching with DFA. */
next_beg = dfaexec (dfa, dfa_beg, (char *) end, 0, &count, &backref);