summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNorihiro Tanaka <noritnk@kcn.ne.jp>2014-04-28 21:13:57 +0900
committerPaul Eggert <eggert@cs.ucla.edu>2014-04-30 18:31:01 -0700
commit74e77131657a127f00e6ddc860968ccb3d2f6e03 (patch)
tree2387963a9a432fd17d6311d0945ec83733f1b07a
parentd4c8de2f3e4f6dfaf964f04600d4bb37285d5623 (diff)
downloadgrep-74e77131657a127f00e6ddc860968ccb3d2f6e03.tar.gz
grep: simplify superset
* src/dfa.h (dfahint): Remove decl. (dfasuperset): New decl. * src/dfa.c (dfahint): Remove. (dfassbuild): Rename from dfasuperset. (dfasuperset): New function. It returns the superset of D. * src/dfasearch.c: Use dfasuperset instead of dfahint, and simplify.
-rw-r--r--src/dfa.c26
-rw-r--r--src/dfa.h13
-rw-r--r--src/dfasearch.c106
3 files changed, 69 insertions, 76 deletions
diff --git a/src/dfa.c b/src/dfa.c
index 507d7f60..1bc69250 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3395,24 +3395,12 @@ dfaexec (struct dfa *d, char const *begin, char *end,
return (char *) p;
}
-/* Search through a buffer looking for a potential match for D.
- Return the offset of the byte after the first potential match.
- If there is no match, return (size_t) -1. If D lacks a superset
- so it's not known whether there is a match, return (size_t) -2.
- BEGIN points to the beginning of the buffer, and END points to the
- first byte after its end. Store a sentinel byte (usually newline)
- in *END, so the actual buffer must be one byte longer. If COUNT is
- non-NULL, increment *COUNT once for each newline processed. */
-size_t
-dfahint (struct dfa *d, char const *begin, char *end, size_t *count)
+/* Return superset for D, which searchs through a buffer looking for a
+ potential match. */
+struct dfa *
+dfasuperset (struct dfa *d)
{
- if (! d->superset)
- return -2;
- else
- {
- char const *match = dfaexec (d->superset, begin, end, 1, count, NULL);
- return match ? match - begin : -1;
- }
+ return d->superset;
}
bool
@@ -3499,7 +3487,7 @@ dfaoptimize (struct dfa *d)
}
static void
-dfasuperset (struct dfa *d)
+dfassbuild (struct dfa *d)
{
size_t i, j;
charclass ccl;
@@ -3583,7 +3571,7 @@ dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
dfaparse (s, len, d);
dfamust (d);
dfaoptimize (d);
- dfasuperset (d);
+ dfassbuild (d);
dfaanalyze (d, searchflag);
if (d->superset)
dfaanalyze (d->superset, searchflag);
diff --git a/src/dfa.h b/src/dfa.h
index fbcb191d..2de7ba83 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -71,16 +71,9 @@ extern void dfacomp (char const *, size_t, struct dfa *, int);
extern char *dfaexec (struct dfa *d, char const *begin, char *end,
int newline, size_t *count, int *backref);
-/* Search through a buffer looking for a potential match for D.
- Return the offset of the byte after the first potential match.
- If there is no match, return (size_t) -1. If D lacks a superset
- so it's not known whether there is a match, return (size_t) -2.
- BEGIN points to the beginning of the buffer, and END points to the
- first byte after its end. Store a sentinel byte (usually newline)
- in *END, so the actual buffer must be one byte longer. If COUNT is
- non-NULL, increment *COUNT once for each newline processed. */
-extern size_t dfahint (struct dfa *d, char const *begin, char *end,
- size_t *count);
+/* Return superset for D, which searchs through a buffer looking for a
+ potential match. */
+extern struct dfa *dfasuperset (struct dfa *d);
/* Return true if the DFA is likely to be fast. */
extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE;
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 7edc96b7..c1822b45 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -213,6 +213,7 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
size_t len, best_len;
struct kwsmatch kwsm;
size_t i;
+ struct dfa *superset = dfasuperset (dfa);
bool dfafast = dfaisfast (dfa);
mb_start = buf;
@@ -220,26 +221,37 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
for (beg = end = buf; end < buflim; beg = end)
{
+ end = buflim;
+
if (!start_ptr)
{
- /* We don't care about an exact match. */
+ char const *next_beg, *dfa_beg = beg;
+ size_t count = 0;
+ bool narrowed = false;
+
+ /* Try matching with KWset, if it's defined. */
if (kwset)
{
+ char const *prev_beg;
+
/* Find a possible match using the KWset matcher. */
size_t offset = kwsexec (kwset, beg - begline,
buflim - beg + begline, &kwsm);
if (offset == (size_t) -1)
goto failure;
match = beg + offset;
-
- /* Narrow down to the line containing the candidate. */
+ prev_beg = beg;
+
+ /* Narrow down to the line we've found. */
beg = memrchr (buf, eol, match - buf);
beg = beg ? beg + 1 : buf;
- end = memchr (match, eol, buflim - match);
- end = end ? end + 1 : buflim;
+ dfa_beg = beg;
if (kwsm.index < kwset_exact_matches)
{
+ end = memchr (match, eol, buflim - match);
+ end = end ? end + 1 : buflim;
+
if (MB_CUR_MAX == 1)
goto success;
if (mb_start < beg)
@@ -250,61 +262,63 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
/* The matched line starts in the middle of a multibyte
character. Perform the DFA search starting from the
beginning of the next character. */
- if (! dfaexec (dfa, mb_start, (char *) end, 0, NULL,
- &backref))
- continue;
+ dfa_beg = mb_start;
+ narrowed = true;
}
else if (!dfafast)
{
- if (dfahint (dfa, beg, (char *) end, NULL) == (size_t) -1)
- continue;
- if (! dfaexec (dfa, beg, (char *) end, 0, NULL, &backref))
- continue;
+ end = memchr (match, eol, buflim - match);
+ end = end ? end + 1 : buflim;
+ narrowed = true;
}
}
- if (!kwset || dfafast)
+
+ /* Try matching with the superset of DFA, if it's defined. */
+ if (superset && !(kwset && kwsm.index < kwset_exact_matches))
{
- /* No good fixed strings or DFA is fast; use DFA. */
- size_t offset, count;
- char const *next_beg;
- count = 0;
- offset = dfahint (dfa, beg, (char *) buflim, &count);
- if (offset == (size_t) -1)
- goto failure;
- if (offset == (size_t) -2)
+ next_beg = dfaexec (superset, dfa_beg, (char *) end, 1, &count, NULL);
+ /* If there's no match, or if we've matched the sentinel,
+ we're done. */
+ if (next_beg == NULL || next_beg == end)
+ continue;
+ if (!narrowed)
{
- /* No use hint. */
- next_beg = dfaexec (dfa, beg, (char *) buflim, 0,
- NULL, &backref);
- /* If there's no match, or if we've matched the sentinel,
- we're done. */
- if (next_beg == NULL || next_beg == buflim)
- goto failure;
+ /* Narrow down to the line we've found. */
+ if (count != 0)
+ {
+ beg = memrchr (buf, eol, next_beg - buf);
+ beg = beg ? beg + 1 : buf;
+
+ /* If dfaexec may match in multiple lines, try to
+ match in one line. */
+ end = beg;
+ continue;
+ }
+ end = memchr (next_beg, eol, buflim - next_beg);
+ end = end ? end + 1 : buflim;
+ narrowed = true;
}
- else
- next_beg = beg + offset;
+ }
+
+ /* Try matching with DFA. */
+ next_beg = dfaexec (dfa, dfa_beg, (char *) end, 0, &count, &backref);
+
+ /* If there's no match, or if we've matched the sentinel,
+ we're done. */
+ if (next_beg == NULL || next_beg == end)
+ continue;
+ if (!narrowed)
+ {
/* Narrow down to the line we've found. */
- beg = memrchr (buf, eol, next_beg - buf);
- beg = beg ? beg + 1 : buf;
if (count != 0)
{
- /* If dfahint may match in multiple lines, try to
- match in one line. */
- end = beg;
- continue;
+ beg = memrchr (buf, eol, next_beg - buf);
+ beg = beg ? beg + 1 : buf;
}
end = memchr (next_beg, eol, buflim - next_beg);
end = end ? end + 1 : buflim;
- if (offset != (size_t) -2)
- {
- next_beg = dfaexec (dfa, beg, (char *) end, 0, NULL,
- &backref);
- /* If there's no match, or if we've matched the sentinel,
- we're done. */
- if (next_beg == NULL || next_beg == end)
- continue;
- }
}
+
/* Successful, no backreferences encountered! */
if (!backref)
goto success;
@@ -314,8 +328,6 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
{
/* We are looking for the leftmost (then longest) exact match.
We will go through the outer loop only once. */
- beg = buf;
- end = buflim;
ptr = start_ptr;
}