diff options
-rw-r--r-- | NEWS | 5 | ||||
-rw-r--r-- | doc/grep.texi | 1 | ||||
-rw-r--r-- | src/dfasearch.c | 6 | ||||
-rw-r--r-- | src/pcresearch.c | 24 | ||||
-rwxr-xr-x | tests/backref-word | 4 | ||||
-rwxr-xr-x | tests/pcre | 5 |
6 files changed, 29 insertions, 16 deletions
@@ -32,9 +32,12 @@ GNU grep NEWS -*- outline -*- This partly reverts the --exclude-related change in 2.22. [bug introduced in grep-2.22] - --line-buffer is no longer ineffective when combined with -l + --line-buffer is no longer ineffective when combined with -l. [bug introduced in grep-2.5] + -xw is now equivalent to -x more consistently, with -P and with backrefs. + [bug only partially fixed in grep-2.19] + * Noteworthy changes in release 2.22 (2015-11-01) [stable] diff --git a/doc/grep.texi b/doc/grep.texi index 76769b90..8883b275 100644 --- a/doc/grep.texi +++ b/doc/grep.texi @@ -233,6 +233,7 @@ Similarly, it must be either at the end of the line or followed by a non-word constituent character. Word-constituent characters are letters, digits, and the underscore. +This option has no effect if @option{-x} is also specified. @item -x @itemx --line-regexp diff --git a/src/dfasearch.c b/src/dfasearch.c index a330eac0..e04a2dfd 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -363,14 +363,14 @@ EGexecute (char *buf, size_t size, size_t *match_size, len = end - ptr; goto assess_pattern_match; } - /* If -w, check if the match aligns with word boundaries. - We do this iteratively because: + /* If -w and not -x, check whether the match aligns with + word boundaries. Do this iteratively because: (a) the line may contain more than one occurrence of the pattern, and (b) Several alternatives in the pattern might be valid at a given point, and we may need to consider a shorter one to find a word boundary. */ - if (match_words) + if (!match_lines && match_words) while (match <= best_match) { regoff_t shorter_len = 0; diff --git a/src/pcresearch.c b/src/pcresearch.c index 1fae94d8..3fee67a3 100644 --- a/src/pcresearch.c +++ b/src/pcresearch.c @@ -98,7 +98,13 @@ Pcompile (char const *pattern, size_t size) #else int e; char const *ep; - char *re = xnmalloc (4, size + 7); + static char const wprefix[] = "(?<!\\w)(?:"; + static char const wsuffix[] = ")(?!\\w)"; + static char const xprefix[] = "^(?:"; + static char const xsuffix[] = ")$"; + int fix_len_max = MAX (sizeof wprefix - 1 + sizeof wsuffix - 1, + sizeof xprefix - 1 + sizeof xsuffix - 1); + char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4); int flags = (PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0)); char const *patlim = pattern + size; @@ -120,20 +126,16 @@ Pcompile (char const *pattern, size_t size) error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern")); *n = '\0'; - if (match_lines) - strcpy (n, "^(?:"); if (match_words) - strcpy (n, "(?<!\\w)(?:"); + strcpy (n, wprefix); + if (match_lines) + strcpy (n, xprefix); n += strlen (n); /* The PCRE interface doesn't allow NUL bytes in the pattern, so replace each NUL byte in the pattern with the four characters "\000", removing a preceding backslash if there are an odd - number of backslashes before the NUL. - - FIXME: This method does not work with some multibyte character - encodings, notably Shift-JIS, where a multibyte character can end - in a backslash byte. */ + number of backslashes before the NUL. */ for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1) { memcpy (n, p, pnul - p); @@ -149,9 +151,9 @@ Pcompile (char const *pattern, size_t size) n += patlim - p; *n = '\0'; if (match_words) - strcpy (n, ")(?!\\w)"); + strcpy (n, wsuffix); if (match_lines) - strcpy (n, ")$"); + strcpy (n, xsuffix); cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ()); if (!cre) diff --git a/tests/backref-word b/tests/backref-word index 557c6d87..e5b5486a 100755 --- a/tests/backref-word +++ b/tests/backref-word @@ -9,6 +9,10 @@ for LOC in en_US.UTF-8 zh_CN $LOCALE_FR_UTF8; do out=out1-$LOC LC_ALL=$LOC grep -w '\(foo\) \1' exp1 > $out || fail=1 compare exp1 $out || fail=1 + + LC_ALL=$LOC grep -wx '\(foo\) \1' exp1 > $out + test $? -eq 1 || fail=1 + compare /dev/null $out || fail=1 done Exit $fail @@ -1,5 +1,5 @@ #! /bin/sh -# Ensure that with -P, \s matches a newline. +# Simple PCRE tests. # # Copyright (C) 2001, 2006, 2009-2016 Free Software Foundation, Inc. # @@ -15,4 +15,7 @@ fail=0 echo | grep -P '\s*$' || fail=1 echo | grep -zP '\s$' || fail=1 +echo '.ab' | grep -Pwx ab +test $? -eq 1 || fail=1 + Exit $fail |