diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2015-07-03 08:10:54 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2015-07-03 08:20:31 -0700 |
commit | 14f8e489e3708f5592bdc0fea3a876302d93d4db (patch) | |
tree | de2871b33099f04beae3807418988876231ca92a /src/pcresearch.c | |
parent | cd85f6ad171e784a6177e590b24be41e05a6b605 (diff) | |
download | grep-14f8e489e3708f5592bdc0fea3a876302d93d4db.tar.gz |
grep: don't mishandle left context in -P
http://bugs.gnu.org/20957
* src/pcresearch.c (jit_exec): New arg SEARCH_OFFSET.
Caller changed.
(Pexecute): Pass the left context to pcre_exec, so that PCRE
regular-expression matching can see it.
* tests/pcre-context: New file, to test for this bug.
* tests/Makefile.am (TESTS): Add it.
Diffstat (limited to 'src/pcresearch.c')
-rw-r--r-- | src/pcresearch.c | 55 |
1 files changed, 34 insertions, 21 deletions
diff --git a/src/pcresearch.c b/src/pcresearch.c index aa05e20e..b1f83104 100644 --- a/src/pcresearch.c +++ b/src/pcresearch.c @@ -43,16 +43,18 @@ static pcre_extra *extra; static int jit_stack_size; # endif -/* Match the already-compiled PCRE pattern against the data in P, of - size SEARCH_BYTES, with options OPTIONS, and storing resulting - matches into SUB. Return the (nonnegative) match location or a - (negative) error number. */ +/* Match the already-compiled PCRE pattern against the data in SUBJECT, + of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with + options OPTIONS, and storing resulting matches into SUB. Return + the (nonnegative) match location or a (negative) error number. */ static int -jit_exec (char const *p, int search_bytes, int options, int *sub) +jit_exec (char const *subject, int search_bytes, int search_offset, + int options, int *sub) { while (true) { - int e = pcre_exec (cre, extra, p, search_bytes, 0, options, sub, NSUB); + int e = pcre_exec (cre, extra, subject, search_bytes, search_offset, + options, sub, NSUB); # if PCRE_STUDY_JIT_COMPILE if (e == PCRE_ERROR_JIT_STACKLIMIT @@ -187,6 +189,11 @@ Pexecute (char const *buf, size_t size, size_t *match_size, int e = PCRE_ERROR_NOMATCH; char const *line_end; + /* The search address to pass to pcre_exec. This is the start of + the buffer, or just past the most-recently discovered encoding + error. */ + char const *subject = buf; + /* If the input type is unknown, the caller is still testing the input, which means the current buffer cannot contain encoding errors and a multiline search is typically more efficient. @@ -226,12 +233,13 @@ Pexecute (char const *buf, size_t size, size_t *match_size, bol = false; } + int search_offset = p - subject; + /* Check for an empty match; this is faster than letting pcre_exec do it. */ - int search_bytes = line_end - p; - if (search_bytes == 0) + if (p == line_end) { - sub[0] = sub[1] = 0; + sub[0] = sub[1] = search_offset; e = empty_match[bol]; break; } @@ -242,17 +250,18 @@ Pexecute (char const *buf, size_t size, size_t *match_size, if (multiline) options |= PCRE_NO_UTF8_CHECK; - e = jit_exec (p, search_bytes, options, sub); + e = jit_exec (subject, line_end - subject, search_offset, + options, sub); if (e != PCRE_ERROR_BADUTF8) { if (0 < e && multiline && sub[1] - sub[0] != 0) { - char const *nl = memchr (p + sub[0], eolbyte, + char const *nl = memchr (subject + sub[0], eolbyte, sub[1] - sub[0]); if (nl) { /* This match crosses a line boundary; reject it. */ - p += sub[0]; + p = subject + sub[0]; line_end = nl; continue; } @@ -261,22 +270,26 @@ Pexecute (char const *buf, size_t size, size_t *match_size, } int valid_bytes = sub[0]; - /* Try to match the string before the encoding error. - Again, handle the empty-match case specially, for speed. */ - if (valid_bytes == 0) + /* Try to match the string before the encoding error. */ + if (valid_bytes < search_offset) + e = PCRE_ERROR_NOMATCH; + else if (valid_bytes == 0) { + /* Handle the empty-match case specially, for speed. + This optimization is valid if VALID_BYTES is zero, + which means SEARCH_OFFSET is also zero. */ sub[1] = 0; e = empty_match[bol]; } else - e = pcre_exec (cre, extra, p, valid_bytes, 0, - options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, - sub, NSUB); + e = jit_exec (subject, valid_bytes, search_offset, + options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub); + if (e != PCRE_ERROR_NOMATCH) break; /* Treat the encoding error as data that cannot match. */ - p += valid_bytes + 1; + p = subject += valid_bytes + 1; bol = false; } @@ -315,8 +328,8 @@ Pexecute (char const *buf, size_t size, size_t *match_size, } else { - char const *matchbeg = p + sub[0]; - char const *matchend = p + sub[1]; + char const *matchbeg = subject + sub[0]; + char const *matchend = subject + sub[1]; char const *beg; char const *end; if (start_ptr) |