summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/grep.c29
-rw-r--r--src/grep.h3
-rw-r--r--src/search.c173
3 files changed, 111 insertions, 94 deletions
diff --git a/src/grep.c b/src/grep.c
index a0cf3ba8..28ae71be 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -761,6 +761,7 @@ print_line_middle (const char *beg, const char *lim)
{
size_t match_size;
size_t match_offset;
+ const char *cur = beg;
const char *mid = NULL;
char *buf; /* XXX */
const char *ibeg; /* XXX */
@@ -781,9 +782,9 @@ print_line_middle (const char *beg, const char *lim)
ibeg = beg;
}
- while ( lim > beg
- && ( (match_offset = execute(ibeg, lim - beg, &match_size, 1))
- != (size_t) -1))
+ while ( lim > cur
+ && ((match_offset = execute(ibeg, lim - beg, &match_size,
+ ibeg + (cur - beg))) != (size_t) -1))
{
char const *b = beg + match_offset;
@@ -798,7 +799,7 @@ print_line_middle (const char *beg, const char *lim)
/* XXX - Could really advance by one whole multi-octet character. */
match_size = 1;
if (!mid)
- mid = beg;
+ mid = cur;
}
else
{
@@ -809,11 +810,10 @@ print_line_middle (const char *beg, const char *lim)
PR_SGR_START(mlines_color);
if (mid)
{
- fwrite (mid, sizeof (char), (beg - mid) + match_offset, stdout);
+ cur = mid;
mid = NULL;
}
- else
- fwrite (beg, sizeof (char), match_offset, stdout);
+ fwrite (cur, sizeof (char), b - cur, stdout);
}
PR_SGR_START_IF(grep_color);
@@ -822,19 +822,18 @@ print_line_middle (const char *beg, const char *lim)
if (only_matching)
fputs("\n", stdout);
}
- beg = b + match_size;
- ibeg += match_offset + match_size; /* XXX */
+ cur = b + match_size;
}
if (buf)
free(buf); /* XXX */
if (only_matching)
- beg = lim;
+ cur = lim;
else if (mid)
- beg = mid;
+ cur = mid;
- return beg;
+ return cur;
}
static const char *
@@ -904,7 +903,8 @@ prpending (char const *lim)
size_t match_size;
--pending;
if (outleft
- || ((execute(lastout, nl + 1 - lastout, &match_size, 0) == (size_t) -1)
+ || ((execute(lastout, nl + 1 - lastout,
+ &match_size, NULL) == (size_t) -1)
== !out_invert))
prline (lastout, nl + 1, SEP_CHAR_CONTEXT);
else
@@ -994,7 +994,8 @@ grepbuf (char const *beg, char const *lim)
nlines = 0;
p = beg;
- while ((match_offset = execute(p, lim - p, &match_size, 0)) != (size_t) -1)
+ while ((match_offset = execute(p, lim - p, &match_size,
+ NULL)) != (size_t) -1)
{
char const *b = p + match_offset;
char const *endp = b + match_size;
diff --git a/src/grep.h b/src/grep.h
index 3384c40c..f8b6f454 100644
--- a/src/grep.h
+++ b/src/grep.h
@@ -34,7 +34,8 @@
(char const *pattern, size_t size)
#define EXECUTE_RET size_t
#define EXECUTE_ARGS \
- (char const *buf, size_t size, size_t *match_size, int exact)
+ (char const *buf, size_t size, size_t *match_size, char const *start_ptr)
+ /* start_ptr == NULL means the caller is not looking for an exact match. */
#ifdef GREP_PROGRAM
/* Function definitions. */
diff --git a/src/search.c b/src/search.c
index f06a75e2..206260d7 100644
--- a/src/search.c
+++ b/src/search.c
@@ -297,9 +297,9 @@ COMPILE_FCT(Ecompile)
EXECUTE_FCT(EGexecute)
{
- register char const *buflim, *beg, *end;
+ register char const *buflim, *beg, *end, *match, *best_match;
char eol = eolbyte;
- int backref, start, len;
+ int backref, start, len, best_len;
struct kwsmatch kwsm;
size_t i, ret_val;
#ifdef MBS_SUPPORT
@@ -310,6 +310,8 @@ EXECUTE_FCT(EGexecute)
{
char *case_buf = xmalloc(size);
memcpy(case_buf, buf, size);
+ if (start_ptr)
+ start_ptr = case_buf + (start_ptr - buf);
buf = case_buf;
}
if (kwset)
@@ -321,8 +323,9 @@ EXECUTE_FCT(EGexecute)
for (beg = end = buf; end < buflim; beg = end)
{
- if (!exact)
+ if (!start_ptr)
{
+ /* We don't care about an exact match. */
if (kwset)
{
/* Find a possible match using the KWset matcher. */
@@ -363,26 +366,38 @@ EXECUTE_FCT(EGexecute)
goto success;
}
else
- end = beg + size;
+ {
+ /* We are looking for the leftmost (then longest) exact match.
+ We will go through the outer loop only once. */
+ beg = start_ptr;
+ end = buflim;
+ }
/* If we've made it to this point, this means DFA has seen
a probable match, and we need to run it through Regex. */
+ best_match = end;
+ best_len = 0;
for (i = 0; i < pcount; i++)
{
patterns[i].regexbuf.not_eol = 0;
- if (0 <= (start = re_search (&(patterns[i].regexbuf), beg,
- end - beg - 1, 0,
- end - beg - 1, &(patterns[i].regs))))
+ if (0 <= (start = re_search (&(patterns[i].regexbuf),
+ buf, end - buf - 1,
+ beg - buf, end - beg - 1,
+ &(patterns[i].regs))))
{
len = patterns[i].regs.end[0] - start;
- if (exact)
- {
- *match_size = len;
- return start;
- }
+ match = buf + start;
+ if (match > best_match)
+ continue;
+ if (start_ptr && !match_words)
+ goto assess_pattern_match;
if ((!match_lines && !match_words)
|| (match_lines && len == end - beg - 1))
- goto success;
+ {
+ match = beg;
+ len = end - beg;
+ goto assess_pattern_match;
+ }
/* If -w, check if the match aligns with word boundaries.
We do this iteratively because:
(a) the line may contain more than one occurence of the
@@ -391,53 +406,75 @@ EXECUTE_FCT(EGexecute)
given point, and we may need to consider a shorter one to
find a word boundary. */
if (match_words)
- while (start >= 0)
+ while (match <= best_match)
{
- if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
+ if ((match == buf || !WCHAR ((unsigned char) match[-1]))
&& (len == end - beg - 1
- || !WCHAR ((unsigned char) beg[start + len])))
- goto success;
+ || !WCHAR ((unsigned char) match[len])))
+ goto assess_pattern_match;
if (len > 0)
{
/* Try a shorter length anchored at the same place. */
--len;
patterns[i].regexbuf.not_eol = 1;
- len = re_match (&(patterns[i].regexbuf), beg,
- start + len, start,
+ len = re_match (&(patterns[i].regexbuf),
+ buf, match + len - beg, match - buf,
&(patterns[i].regs));
}
if (len <= 0)
{
/* Try looking further on. */
- if (start == end - beg - 1)
+ if (match == end - 1)
break;
- ++start;
+ match++;
patterns[i].regexbuf.not_eol = 0;
- start = re_search (&(patterns[i].regexbuf), beg,
- end - beg - 1,
- start, end - beg - 1 - start,
+ start = re_search (&(patterns[i].regexbuf),
+ buf, end - buf - 1,
+ match - buf, end - match - 1,
&(patterns[i].regs));
+ if (start < 0)
+ break;
len = patterns[i].regs.end[0] - start;
+ match = buf + start;
}
- }
- }
+ } /* while (match <= best_match) */
+ continue;
+ assess_pattern_match:
+ if (!start_ptr)
+ {
+ /* Good enough for a non-exact match.
+ No need to look at further patterns, if any. */
+ beg = match;
+ goto success_in_len;
+ }
+ if (match < best_match || (match == best_match && len > best_len))
+ {
+ /* Best exact match: leftmost, then longest. */
+ best_match = match;
+ best_len = len;
+ }
+ } /* if re_search >= 0 */
} /* for Regex patterns. */
+ if (best_match < end)
+ {
+ /* We have found an exact match. We were just
+ waiting for the best one (leftmost then longest). */
+ beg = best_match;
+ len = best_len;
+ goto success_in_len;
+ }
} /* for (beg = end ..) */
failure:
-#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
- {
- if (match_icase)
- free((char*)buf);
- if (mb_properties)
- free(mb_properties);
- }
-#endif /* MBS_SUPPORT */
- return (size_t) -1;
+ ret_val = -1;
+ goto out;
success:
+ len = end - beg;
+ success_in_len:
+ *match_size = len;
ret_val = beg - buf;
+ out:
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1)
{
@@ -447,7 +484,6 @@ EXECUTE_FCT(EGexecute)
free(mb_properties);
}
#endif /* MBS_SUPPORT */
- *match_size = end - beg;
return ret_val;
}
#endif /* defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) */
@@ -490,47 +526,27 @@ EXECUTE_FCT(Fexecute)
{
char *case_buf = xmalloc(size);
memcpy(case_buf, buf, size);
+ if (start_ptr)
+ start_ptr = case_buf + (start_ptr - buf);
buf = case_buf;
}
mb_properties = check_multibyte_string(buf, size);
}
#endif /* MBS_SUPPORT */
- for (beg = buf; beg <= buf + size; ++beg)
+ for (beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++)
{
size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
if (offset == (size_t) -1)
- {
-#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
- {
- if (match_icase)
- free ((char*)buf);
- free(mb_properties);
- }
-#endif /* MBS_SUPPORT */
- return offset;
- }
+ goto failure;
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
continue; /* It is a part of multibyte character. */
#endif /* MBS_SUPPORT */
beg += offset;
len = kwsmatch.size[0];
- if (exact)
- {
- *match_size = len;
- ret_val = beg - buf;
-#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
- {
- if (match_icase)
- free ((char*)buf);
- free(mb_properties);
- }
-#endif /* MBS_SUPPORT */
- return ret_val;
- }
+ if (start_ptr && !match_words)
+ goto success_in_beg_and_len;
if (match_lines)
{
if (beg > buf && beg[-1] != eol)
@@ -552,31 +568,29 @@ EXECUTE_FCT(Fexecute)
try = beg + offset;
len = kwsmatch.size[0];
}
- else
+ else if (!start_ptr)
goto success;
- }
+ else
+ goto success_in_beg_and_len;
+ } /* for (try) */
else
goto success;
- }
+ } /* for (beg in buf) */
-#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
- {
- if (match_icase)
- free((char*)buf);
- if (mb_properties)
- free(mb_properties);
- }
-#endif /* MBS_SUPPORT */
- return -1;
+ failure:
+ ret_val = -1;
+ goto out;
success:
end = memchr (beg + len, eol, (buf + size) - (beg + len));
end++;
while (buf < beg && beg[-1] != eol)
--beg;
- *match_size = end - beg;
+ len = end - beg;
+ success_in_beg_and_len:
+ *match_size = len;
ret_val = beg - buf;
+ out:
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1)
{
@@ -673,7 +687,8 @@ EXECUTE_FCT(Pexecute)
is just for performance improvement in pcre_exec. */
int sub[300];
- int e = pcre_exec (cre, extra, buf, size, 0, 0,
+ int e = pcre_exec (cre, extra, buf, size,
+ start_ptr ? (start_ptr - buf) : 0, 0,
sub, sizeof sub / sizeof *sub);
if (e <= 0)
@@ -697,7 +712,7 @@ EXECUTE_FCT(Pexecute)
char const *end = buf + sub[1];
char const *buflim = buf + size;
char eol = eolbyte;
- if (!exact)
+ if (!start_ptr)
{
/* FIXME: The case when '\n' is not found indicates a bug:
Since grep is line oriented, the match should never contain