summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2016-01-15 23:04:23 -0800
committerPaul Eggert <eggert@cs.ucla.edu>2016-01-15 23:05:30 -0800
commit68d5d036cf6d4b02f3e6a2ced750231b523efae9 (patch)
treec5ef6e8db327894a9c567a957c2a4b8b52ccd499 /src
parentba10d3618d407245780e836e6a51255da145a316 (diff)
downloadgrep-68d5d036cf6d4b02f3e6a2ced750231b523efae9.tar.gz
grep: -x now supersedes -w more consistently
* NEWS, doc/grep.texi (Matching Control): Mention this. * src/dfasearch.c (EGexecute): * src/pcresearch.c (Pcompile): Don't get confused by -w if -x is also present. * src/pcresearch.c (Pcompile): Remove misleading comment about non-UTF-8 multibyte locales, as PCRE doesn't support them. Calculate buffer sizes more carefully; the old method allocated a buffer slightly too big, seemingly due to luck. * tests/backref-word, tests/pcre: Add tests for this bug.
Diffstat (limited to 'src')
-rw-r--r--src/dfasearch.c6
-rw-r--r--src/pcresearch.c24
2 files changed, 16 insertions, 14 deletions
diff --git a/src/dfasearch.c b/src/dfasearch.c
index a330eac0..e04a2dfd 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -363,14 +363,14 @@ EGexecute (char *buf, size_t size, size_t *match_size,
len = end - ptr;
goto assess_pattern_match;
}
- /* If -w, check if the match aligns with word boundaries.
- We do this iteratively because:
+ /* If -w and not -x, check whether the match aligns with
+ word boundaries. Do this iteratively because:
(a) the line may contain more than one occurrence of the
pattern, and
(b) Several alternatives in the pattern might be valid at a
given point, and we may need to consider a shorter one to
find a word boundary. */
- if (match_words)
+ if (!match_lines && match_words)
while (match <= best_match)
{
regoff_t shorter_len = 0;
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 1fae94d8..3fee67a3 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -98,7 +98,13 @@ Pcompile (char const *pattern, size_t size)
#else
int e;
char const *ep;
- char *re = xnmalloc (4, size + 7);
+ static char const wprefix[] = "(?<!\\w)(?:";
+ static char const wsuffix[] = ")(?!\\w)";
+ static char const xprefix[] = "^(?:";
+ static char const xsuffix[] = ")$";
+ int fix_len_max = MAX (sizeof wprefix - 1 + sizeof wsuffix - 1,
+ sizeof xprefix - 1 + sizeof xsuffix - 1);
+ char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
int flags = (PCRE_MULTILINE
| (match_icase ? PCRE_CASELESS : 0));
char const *patlim = pattern + size;
@@ -120,20 +126,16 @@ Pcompile (char const *pattern, size_t size)
error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
*n = '\0';
- if (match_lines)
- strcpy (n, "^(?:");
if (match_words)
- strcpy (n, "(?<!\\w)(?:");
+ strcpy (n, wprefix);
+ if (match_lines)
+ strcpy (n, xprefix);
n += strlen (n);
/* The PCRE interface doesn't allow NUL bytes in the pattern, so
replace each NUL byte in the pattern with the four characters
"\000", removing a preceding backslash if there are an odd
- number of backslashes before the NUL.
-
- FIXME: This method does not work with some multibyte character
- encodings, notably Shift-JIS, where a multibyte character can end
- in a backslash byte. */
+ number of backslashes before the NUL. */
for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
{
memcpy (n, p, pnul - p);
@@ -149,9 +151,9 @@ Pcompile (char const *pattern, size_t size)
n += patlim - p;
*n = '\0';
if (match_words)
- strcpy (n, ")(?!\\w)");
+ strcpy (n, wsuffix);
if (match_lines)
- strcpy (n, ")$");
+ strcpy (n, xsuffix);
cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
if (!cre)