summaryrefslogtreecommitdiff
path: root/src/pcresearch.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2015-07-03 08:10:54 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2015-07-03 08:20:31 -0700
commit14f8e489e3708f5592bdc0fea3a876302d93d4db (patch)
treede2871b33099f04beae3807418988876231ca92a /src/pcresearch.c
parentcd85f6ad171e784a6177e590b24be41e05a6b605 (diff)
downloadgrep-14f8e489e3708f5592bdc0fea3a876302d93d4db.tar.gz
grep: don't mishandle left context in -P
http://bugs.gnu.org/20957 * src/pcresearch.c (jit_exec): New arg SEARCH_OFFSET. Caller changed. (Pexecute): Pass the left context to pcre_exec, so that PCRE regular-expression matching can see it. * tests/pcre-context: New file, to test for this bug. * tests/Makefile.am (TESTS): Add it.
Diffstat (limited to 'src/pcresearch.c')
-rw-r--r--src/pcresearch.c55
1 files changed, 34 insertions, 21 deletions
diff --git a/src/pcresearch.c b/src/pcresearch.c
index aa05e20e..b1f83104 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -43,16 +43,18 @@ static pcre_extra *extra;
static int jit_stack_size;
# endif
-/* Match the already-compiled PCRE pattern against the data in P, of
- size SEARCH_BYTES, with options OPTIONS, and storing resulting
- matches into SUB. Return the (nonnegative) match location or a
- (negative) error number. */
+/* Match the already-compiled PCRE pattern against the data in SUBJECT,
+ of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with
+ options OPTIONS, and storing resulting matches into SUB. Return
+ the (nonnegative) match location or a (negative) error number. */
static int
-jit_exec (char const *p, int search_bytes, int options, int *sub)
+jit_exec (char const *subject, int search_bytes, int search_offset,
+ int options, int *sub)
{
while (true)
{
- int e = pcre_exec (cre, extra, p, search_bytes, 0, options, sub, NSUB);
+ int e = pcre_exec (cre, extra, subject, search_bytes, search_offset,
+ options, sub, NSUB);
# if PCRE_STUDY_JIT_COMPILE
if (e == PCRE_ERROR_JIT_STACKLIMIT
@@ -187,6 +189,11 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
int e = PCRE_ERROR_NOMATCH;
char const *line_end;
+ /* The search address to pass to pcre_exec. This is the start of
+ the buffer, or just past the most-recently discovered encoding
+ error. */
+ char const *subject = buf;
+
/* If the input type is unknown, the caller is still testing the
input, which means the current buffer cannot contain encoding
errors and a multiline search is typically more efficient.
@@ -226,12 +233,13 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
bol = false;
}
+ int search_offset = p - subject;
+
/* Check for an empty match; this is faster than letting
pcre_exec do it. */
- int search_bytes = line_end - p;
- if (search_bytes == 0)
+ if (p == line_end)
{
- sub[0] = sub[1] = 0;
+ sub[0] = sub[1] = search_offset;
e = empty_match[bol];
break;
}
@@ -242,17 +250,18 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
if (multiline)
options |= PCRE_NO_UTF8_CHECK;
- e = jit_exec (p, search_bytes, options, sub);
+ e = jit_exec (subject, line_end - subject, search_offset,
+ options, sub);
if (e != PCRE_ERROR_BADUTF8)
{
if (0 < e && multiline && sub[1] - sub[0] != 0)
{
- char const *nl = memchr (p + sub[0], eolbyte,
+ char const *nl = memchr (subject + sub[0], eolbyte,
sub[1] - sub[0]);
if (nl)
{
/* This match crosses a line boundary; reject it. */
- p += sub[0];
+ p = subject + sub[0];
line_end = nl;
continue;
}
@@ -261,22 +270,26 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
}
int valid_bytes = sub[0];
- /* Try to match the string before the encoding error.
- Again, handle the empty-match case specially, for speed. */
- if (valid_bytes == 0)
+ /* Try to match the string before the encoding error. */
+ if (valid_bytes < search_offset)
+ e = PCRE_ERROR_NOMATCH;
+ else if (valid_bytes == 0)
{
+ /* Handle the empty-match case specially, for speed.
+ This optimization is valid if VALID_BYTES is zero,
+ which means SEARCH_OFFSET is also zero. */
sub[1] = 0;
e = empty_match[bol];
}
else
- e = pcre_exec (cre, extra, p, valid_bytes, 0,
- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
- sub, NSUB);
+ e = jit_exec (subject, valid_bytes, search_offset,
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub);
+
if (e != PCRE_ERROR_NOMATCH)
break;
/* Treat the encoding error as data that cannot match. */
- p += valid_bytes + 1;
+ p = subject += valid_bytes + 1;
bol = false;
}
@@ -315,8 +328,8 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
}
else
{
- char const *matchbeg = p + sub[0];
- char const *matchend = p + sub[1];
+ char const *matchbeg = subject + sub[0];
+ char const *matchend = subject + sub[1];
char const *beg;
char const *end;
if (start_ptr)