diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2014-10-24 13:27:29 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2014-10-24 13:35:19 -0700 |
commit | 16f6616acaa4fcf44edbc3b56ca10fc06b07cf25 (patch) | |
tree | 7c29ee4edb115141168c73fa7a8eccdc2ff6e9af /src/pcresearch.c | |
parent | b2490802defe3c3bf7ef0036a4515d006a08a769 (diff) | |
download | grep-16f6616acaa4fcf44edbc3b56ca10fc06b07cf25.tar.gz |
grep: fix grep -P crash
Reported by Shlomi Fish in: http://bugs.gnu.org/18806
Commit 9fa500407137f49f6edc3c6b4ee6c7096f0190c5 (2014-09-16) is a
hack that I put in to speed up 'grep -P'. Unfortunately, not only
is it violation of modularity, it's also a bug magnet, as we have
found out with Bug#18738 and Bug#18806. Remove the optimization
instead of applying more bandaids. Perhaps we can think of a
better way of doing the optimization, or perhaps we can just live
with a slower grep -P (as -P is inherently slower anyway...).
* src/grep.c, src/grep.h (validated_boundary):
Remove. All uses removed.
* src/pcresearch.c (Pexecute): Do not worry about validated_boundary.
Diffstat (limited to 'src/pcresearch.c')
-rw-r--r-- | src/pcresearch.c | 37 |
1 files changed, 14 insertions, 23 deletions
diff --git a/src/pcresearch.c b/src/pcresearch.c index 1fd5bdec..5451029e 100644 --- a/src/pcresearch.c +++ b/src/pcresearch.c @@ -156,7 +156,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size, char const *line_start = buf; int e = PCRE_ERROR_NOMATCH; char const *line_end; - char const *validated = validated_boundary; /* If the input type is unknown, the caller is still testing the input, which means the current buffer cannot contain encoding @@ -210,34 +209,28 @@ Pexecute (char const *buf, size_t size, size_t *match_size, int options = 0; if (!bol) options |= PCRE_NOTBOL; - if (multiline || p + search_bytes <= validated) + if (multiline) options |= PCRE_NO_UTF8_CHECK; - int valid_bytes = validated - p; - if (valid_bytes <= 0) + e = pcre_exec (cre, extra, p, search_bytes, 0, + options, sub, NSUB); + if (e != PCRE_ERROR_BADUTF8) { - e = pcre_exec (cre, extra, p, search_bytes, 0, - options, sub, NSUB); - if (e != PCRE_ERROR_BADUTF8) + if (0 < e && multiline && sub[1] - sub[0] != 0) { - validated = p + search_bytes; - if (0 < e && multiline && sub[1] - sub[0] != 0) + char const *nl = memchr (p + sub[0], eolbyte, + sub[1] - sub[0]); + if (nl) { - char const *nl = memchr (p + sub[0], eolbyte, - sub[1] - sub[0]); - if (nl) - { - /* This match crosses a line boundary; reject it. */ - p += sub[0]; - line_end = nl; - continue; - } + /* This match crosses a line boundary; reject it. */ + p += sub[0]; + line_end = nl; + continue; } - break; } - valid_bytes = sub[0]; - validated = p + valid_bytes; + break; } + int valid_bytes = sub[0]; /* Try to match the string before the encoding error. Again, handle the empty-match case specially, for speed. */ @@ -263,8 +256,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size, bol = true; } - validated_boundary = validated; - if (e <= 0) { switch (e) |