grep: -x now supersedes -w more consistently

* NEWS, doc/grep.texi (Matching Control): Mention this. * src/dfasearch.c (EGexecute): * src/pcresearch.c (Pcompile): Don't get confused by -w if -x is also present. * src/pcresearch.c (Pcompile): Remove misleading comment about non-UTF-8 multibyte locales, as PCRE doesn't support them. Calculate buffer sizes more carefully; the old method allocated a buffer slightly too big, seemingly due to luck. * tests/backref-word, tests/pcre: Add tests for this bug.
author: Paul Eggert <eggert@cs.ucla.edu> 2016-01-15 23:04:23 -0800
committer: Paul Eggert <eggert@cs.ucla.edu> 2016-01-15 23:05:30 -0800
commit: 68d5d036cf6d4b02f3e6a2ced750231b523efae9 (patch)
tree: c5ef6e8db327894a9c567a957c2a4b8b52ccd499 /src
parent: ba10d3618d407245780e836e6a51255da145a316 (diff)
download: grep-68d5d036cf6d4b02f3e6a2ced750231b523efae9.tar.gz
2 files changed, 16 insertions, 14 deletions
diff --git a/src/dfasearch.c b/src/dfasearch.c
index a330eac0..e04a2dfd 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -363,14 +363,14 @@ EGexecute (char *buf, size_t size, size_t *match_size,
                   len = end - ptr;
                   goto assess_pattern_match;
                 }
-              /* If -w, check if the match aligns with word boundaries.
-                 We do this iteratively because:
+              /* If -w and not -x, check whether the match aligns with
+                 word boundaries.  Do this iteratively because:
                  (a) the line may contain more than one occurrence of the
                  pattern, and
                  (b) Several alternatives in the pattern might be valid at a
                  given point, and we may need to consider a shorter one to
                  find a word boundary.  */
-              if (match_words)
+              if (!match_lines && match_words)
                 while (match <= best_match)
                   {
                     regoff_t shorter_len = 0;
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 1fae94d8..3fee67a3 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -98,7 +98,13 @@ Pcompile (char const *pattern, size_t size)
 #else
   int e;
   char const *ep;
-  char *re = xnmalloc (4, size + 7);
+  static char const wprefix[] = "(?<!\\w)(?:";
+  static char const wsuffix[] = ")(?!\\w)";
+  static char const xprefix[] = "^(?:";
+  static char const xsuffix[] = ")$";
+  int fix_len_max = MAX (sizeof wprefix - 1 + sizeof wsuffix - 1,
+                         sizeof xprefix - 1 + sizeof xsuffix - 1);
+  char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
   int flags = (PCRE_MULTILINE
                | (match_icase ? PCRE_CASELESS : 0));
   char const *patlim = pattern + size;
@@ -120,20 +126,16 @@ Pcompile (char const *pattern, size_t size)
     error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
 
   *n = '\0';
-  if (match_lines)
-    strcpy (n, "^(?:");
   if (match_words)
-    strcpy (n, "(?<!\\w)(?:");
+    strcpy (n, wprefix);
+  if (match_lines)
+    strcpy (n, xprefix);
   n += strlen (n);
 
   /* The PCRE interface doesn't allow NUL bytes in the pattern, so
      replace each NUL byte in the pattern with the four characters
      "\000", removing a preceding backslash if there are an odd
-     number of backslashes before the NUL.
-
-     FIXME: This method does not work with some multibyte character
-     encodings, notably Shift-JIS, where a multibyte character can end
-     in a backslash byte.  */
+     number of backslashes before the NUL.  */
   for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
     {
       memcpy (n, p, pnul - p);
@@ -149,9 +151,9 @@ Pcompile (char const *pattern, size_t size)
   n += patlim - p;
   *n = '\0';
   if (match_words)
-    strcpy (n, ")(?!\\w)");
+    strcpy (n, wsuffix);
   if (match_lines)
-    strcpy (n, ")$");
+    strcpy (n, xsuffix);
 
   cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
   if (!cre)
author	Paul Eggert <eggert@cs.ucla.edu>	2016-01-15 23:04:23 -0800
committer	Paul Eggert <eggert@cs.ucla.edu>	2016-01-15 23:05:30 -0800
commit	68d5d036cf6d4b02f3e6a2ced750231b523efae9 (patch)
tree	c5ef6e8db327894a9c567a957c2a4b8b52ccd499 /src
parent	ba10d3618d407245780e836e6a51255da145a316 (diff)
download	grep-68d5d036cf6d4b02f3e6a2ced750231b523efae9.tar.gz