summaryrefslogtreecommitdiff
path: root/src/pcresearch.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2014-10-24 13:27:29 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2014-10-24 13:35:19 -0700
commit16f6616acaa4fcf44edbc3b56ca10fc06b07cf25 (patch)
tree7c29ee4edb115141168c73fa7a8eccdc2ff6e9af /src/pcresearch.c
parentb2490802defe3c3bf7ef0036a4515d006a08a769 (diff)
downloadgrep-16f6616acaa4fcf44edbc3b56ca10fc06b07cf25.tar.gz
grep: fix grep -P crash
Reported by Shlomi Fish in: http://bugs.gnu.org/18806 Commit 9fa500407137f49f6edc3c6b4ee6c7096f0190c5 (2014-09-16) is a hack that I put in to speed up 'grep -P'. Unfortunately, not only is it violation of modularity, it's also a bug magnet, as we have found out with Bug#18738 and Bug#18806. Remove the optimization instead of applying more bandaids. Perhaps we can think of a better way of doing the optimization, or perhaps we can just live with a slower grep -P (as -P is inherently slower anyway...). * src/grep.c, src/grep.h (validated_boundary): Remove. All uses removed. * src/pcresearch.c (Pexecute): Do not worry about validated_boundary.
Diffstat (limited to 'src/pcresearch.c')
-rw-r--r--src/pcresearch.c37
1 files changed, 14 insertions, 23 deletions
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 1fd5bdec..5451029e 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -156,7 +156,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
char const *line_start = buf;
int e = PCRE_ERROR_NOMATCH;
char const *line_end;
- char const *validated = validated_boundary;
/* If the input type is unknown, the caller is still testing the
input, which means the current buffer cannot contain encoding
@@ -210,34 +209,28 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
int options = 0;
if (!bol)
options |= PCRE_NOTBOL;
- if (multiline || p + search_bytes <= validated)
+ if (multiline)
options |= PCRE_NO_UTF8_CHECK;
- int valid_bytes = validated - p;
- if (valid_bytes <= 0)
+ e = pcre_exec (cre, extra, p, search_bytes, 0,
+ options, sub, NSUB);
+ if (e != PCRE_ERROR_BADUTF8)
{
- e = pcre_exec (cre, extra, p, search_bytes, 0,
- options, sub, NSUB);
- if (e != PCRE_ERROR_BADUTF8)
+ if (0 < e && multiline && sub[1] - sub[0] != 0)
{
- validated = p + search_bytes;
- if (0 < e && multiline && sub[1] - sub[0] != 0)
+ char const *nl = memchr (p + sub[0], eolbyte,
+ sub[1] - sub[0]);
+ if (nl)
{
- char const *nl = memchr (p + sub[0], eolbyte,
- sub[1] - sub[0]);
- if (nl)
- {
- /* This match crosses a line boundary; reject it. */
- p += sub[0];
- line_end = nl;
- continue;
- }
+ /* This match crosses a line boundary; reject it. */
+ p += sub[0];
+ line_end = nl;
+ continue;
}
- break;
}
- valid_bytes = sub[0];
- validated = p + valid_bytes;
+ break;
}
+ int valid_bytes = sub[0];
/* Try to match the string before the encoding error.
Again, handle the empty-match case specially, for speed. */
@@ -263,8 +256,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
bol = true;
}
- validated_boundary = validated;
-
if (e <= 0)
{
switch (e)