diff options
author | Norihiro Tanaka <noritnk@kcn.ne.jp> | 2014-04-28 21:13:57 +0900 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2014-04-30 18:31:01 -0700 |
commit | 74e77131657a127f00e6ddc860968ccb3d2f6e03 (patch) | |
tree | 2387963a9a432fd17d6311d0945ec83733f1b07a | |
parent | d4c8de2f3e4f6dfaf964f04600d4bb37285d5623 (diff) | |
download | grep-74e77131657a127f00e6ddc860968ccb3d2f6e03.tar.gz |
grep: simplify superset
* src/dfa.h (dfahint): Remove decl.
(dfasuperset): New decl.
* src/dfa.c (dfahint): Remove.
(dfassbuild): Rename from dfasuperset.
(dfasuperset): New function. It returns the superset of D.
* src/dfasearch.c: Use dfasuperset instead of dfahint, and simplify.
-rw-r--r-- | src/dfa.c | 26 | ||||
-rw-r--r-- | src/dfa.h | 13 | ||||
-rw-r--r-- | src/dfasearch.c | 106 |
3 files changed, 69 insertions, 76 deletions
@@ -3395,24 +3395,12 @@ dfaexec (struct dfa *d, char const *begin, char *end, return (char *) p; } -/* Search through a buffer looking for a potential match for D. - Return the offset of the byte after the first potential match. - If there is no match, return (size_t) -1. If D lacks a superset - so it's not known whether there is a match, return (size_t) -2. - BEGIN points to the beginning of the buffer, and END points to the - first byte after its end. Store a sentinel byte (usually newline) - in *END, so the actual buffer must be one byte longer. If COUNT is - non-NULL, increment *COUNT once for each newline processed. */ -size_t -dfahint (struct dfa *d, char const *begin, char *end, size_t *count) +/* Return superset for D, which searchs through a buffer looking for a + potential match. */ +struct dfa * +dfasuperset (struct dfa *d) { - if (! d->superset) - return -2; - else - { - char const *match = dfaexec (d->superset, begin, end, 1, count, NULL); - return match ? match - begin : -1; - } + return d->superset; } bool @@ -3499,7 +3487,7 @@ dfaoptimize (struct dfa *d) } static void -dfasuperset (struct dfa *d) +dfassbuild (struct dfa *d) { size_t i, j; charclass ccl; @@ -3583,7 +3571,7 @@ dfacomp (char const *s, size_t len, struct dfa *d, int searchflag) dfaparse (s, len, d); dfamust (d); dfaoptimize (d); - dfasuperset (d); + dfassbuild (d); dfaanalyze (d, searchflag); if (d->superset) dfaanalyze (d->superset, searchflag); @@ -71,16 +71,9 @@ extern void dfacomp (char const *, size_t, struct dfa *, int); extern char *dfaexec (struct dfa *d, char const *begin, char *end, int newline, size_t *count, int *backref); -/* Search through a buffer looking for a potential match for D. - Return the offset of the byte after the first potential match. - If there is no match, return (size_t) -1. If D lacks a superset - so it's not known whether there is a match, return (size_t) -2. - BEGIN points to the beginning of the buffer, and END points to the - first byte after its end. Store a sentinel byte (usually newline) - in *END, so the actual buffer must be one byte longer. If COUNT is - non-NULL, increment *COUNT once for each newline processed. */ -extern size_t dfahint (struct dfa *d, char const *begin, char *end, - size_t *count); +/* Return superset for D, which searchs through a buffer looking for a + potential match. */ +extern struct dfa *dfasuperset (struct dfa *d); /* Return true if the DFA is likely to be fast. */ extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE; diff --git a/src/dfasearch.c b/src/dfasearch.c index 7edc96b7..c1822b45 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -213,6 +213,7 @@ EGexecute (char const *buf, size_t size, size_t *match_size, size_t len, best_len; struct kwsmatch kwsm; size_t i; + struct dfa *superset = dfasuperset (dfa); bool dfafast = dfaisfast (dfa); mb_start = buf; @@ -220,26 +221,37 @@ EGexecute (char const *buf, size_t size, size_t *match_size, for (beg = end = buf; end < buflim; beg = end) { + end = buflim; + if (!start_ptr) { - /* We don't care about an exact match. */ + char const *next_beg, *dfa_beg = beg; + size_t count = 0; + bool narrowed = false; + + /* Try matching with KWset, if it's defined. */ if (kwset) { + char const *prev_beg; + /* Find a possible match using the KWset matcher. */ size_t offset = kwsexec (kwset, beg - begline, buflim - beg + begline, &kwsm); if (offset == (size_t) -1) goto failure; match = beg + offset; - - /* Narrow down to the line containing the candidate. */ + prev_beg = beg; + + /* Narrow down to the line we've found. */ beg = memrchr (buf, eol, match - buf); beg = beg ? beg + 1 : buf; - end = memchr (match, eol, buflim - match); - end = end ? end + 1 : buflim; + dfa_beg = beg; if (kwsm.index < kwset_exact_matches) { + end = memchr (match, eol, buflim - match); + end = end ? end + 1 : buflim; + if (MB_CUR_MAX == 1) goto success; if (mb_start < beg) @@ -250,61 +262,63 @@ EGexecute (char const *buf, size_t size, size_t *match_size, /* The matched line starts in the middle of a multibyte character. Perform the DFA search starting from the beginning of the next character. */ - if (! dfaexec (dfa, mb_start, (char *) end, 0, NULL, - &backref)) - continue; + dfa_beg = mb_start; + narrowed = true; } else if (!dfafast) { - if (dfahint (dfa, beg, (char *) end, NULL) == (size_t) -1) - continue; - if (! dfaexec (dfa, beg, (char *) end, 0, NULL, &backref)) - continue; + end = memchr (match, eol, buflim - match); + end = end ? end + 1 : buflim; + narrowed = true; } } - if (!kwset || dfafast) + + /* Try matching with the superset of DFA, if it's defined. */ + if (superset && !(kwset && kwsm.index < kwset_exact_matches)) { - /* No good fixed strings or DFA is fast; use DFA. */ - size_t offset, count; - char const *next_beg; - count = 0; - offset = dfahint (dfa, beg, (char *) buflim, &count); - if (offset == (size_t) -1) - goto failure; - if (offset == (size_t) -2) + next_beg = dfaexec (superset, dfa_beg, (char *) end, 1, &count, NULL); + /* If there's no match, or if we've matched the sentinel, + we're done. */ + if (next_beg == NULL || next_beg == end) + continue; + if (!narrowed) { - /* No use hint. */ - next_beg = dfaexec (dfa, beg, (char *) buflim, 0, - NULL, &backref); - /* If there's no match, or if we've matched the sentinel, - we're done. */ - if (next_beg == NULL || next_beg == buflim) - goto failure; + /* Narrow down to the line we've found. */ + if (count != 0) + { + beg = memrchr (buf, eol, next_beg - buf); + beg = beg ? beg + 1 : buf; + + /* If dfaexec may match in multiple lines, try to + match in one line. */ + end = beg; + continue; + } + end = memchr (next_beg, eol, buflim - next_beg); + end = end ? end + 1 : buflim; + narrowed = true; } - else - next_beg = beg + offset; + } + + /* Try matching with DFA. */ + next_beg = dfaexec (dfa, dfa_beg, (char *) end, 0, &count, &backref); + + /* If there's no match, or if we've matched the sentinel, + we're done. */ + if (next_beg == NULL || next_beg == end) + continue; + if (!narrowed) + { /* Narrow down to the line we've found. */ - beg = memrchr (buf, eol, next_beg - buf); - beg = beg ? beg + 1 : buf; if (count != 0) { - /* If dfahint may match in multiple lines, try to - match in one line. */ - end = beg; - continue; + beg = memrchr (buf, eol, next_beg - buf); + beg = beg ? beg + 1 : buf; } end = memchr (next_beg, eol, buflim - next_beg); end = end ? end + 1 : buflim; - if (offset != (size_t) -2) - { - next_beg = dfaexec (dfa, beg, (char *) end, 0, NULL, - &backref); - /* If there's no match, or if we've matched the sentinel, - we're done. */ - if (next_beg == NULL || next_beg == end) - continue; - } } + /* Successful, no backreferences encountered! */ if (!backref) goto success; @@ -314,8 +328,6 @@ EGexecute (char const *buf, size_t size, size_t *match_size, { /* We are looking for the leftmost (then longest) exact match. We will go through the outer loop only once. */ - beg = buf; - end = buflim; ptr = start_ptr; } |