summaryrefslogtreecommitdiff
path: root/src/pcresearch.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2014-09-15 18:33:19 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2014-09-16 18:23:50 -0700
commit9ea9254ea58456b84ed2f0c1481ca91cdd325bf7 (patch)
tree4b2c8473cf8fc000d971ee8e070cfe892a7730ae /src/pcresearch.c
parentaf3572e2651379566441c9d718dec7e809d3810d (diff)
downloadgrep-9ea9254ea58456b84ed2f0c1481ca91cdd325bf7.tar.gz
grep: fix -P speedup bug with empty match
* src/pcresearch.c (NSUB): New top-level constant, replacing 'nsub' within Pexecute. (Pcompile, Pexecute): Use it. (Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8 match failure. * tests/pcre-invalid-utf8-input: Test for this bug.
Diffstat (limited to 'src/pcresearch.c')
-rw-r--r--src/pcresearch.c32
1 files changed, 19 insertions, 13 deletions
diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758c..c41f7ef7 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
string matches when that flag is used. */
static int empty_match[2];
+/* This must be at least 2; everything after that is for performance
+ in pcre_exec. */
+enum { NSUB = 300 };
+
void
Pcompile (char const *pattern, size_t size)
{
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
# endif
free (re);
- empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
- empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+ int sub[NSUB];
+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+ PCRE_NOTBOL, sub, NSUB);
+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
#endif /* HAVE_LIBPCRE */
}
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
error (EXIT_TROUBLE, 0, _("internal error"));
return -1;
#else
- /* This array must have at least two elements; everything after that
- is just for performance improvement in pcre_exec. */
- enum { nsub = 300 };
- int sub[nsub];
-
+ int sub[NSUB];
char const *p = start_ptr ? start_ptr : buf;
bool bol = p[-1] == eolbyte;
char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
{
int options = bol ? 0 : PCRE_NOTBOL;
int valid_bytes;
- e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+ e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
if (e != PCRE_ERROR_BADUTF8)
break;
valid_bytes = sub[0];
- e = (valid_bytes == 0
- ? empty_match[bol]
- : pcre_exec (cre, extra, p, valid_bytes, 0,
- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
- sub, nsub));
+ if (valid_bytes == 0)
+ {
+ sub[1] = 0;
+ e = empty_match[bol];
+ }
+ else
+ e = pcre_exec (cre, extra, p, valid_bytes, 0,
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+ sub, NSUB);
if (e != PCRE_ERROR_NOMATCH)
break;
p += valid_bytes + 1;