diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2016-01-15 23:04:23 -0800 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2016-01-15 23:05:30 -0800 |
commit | 68d5d036cf6d4b02f3e6a2ced750231b523efae9 (patch) | |
tree | c5ef6e8db327894a9c567a957c2a4b8b52ccd499 /src | |
parent | ba10d3618d407245780e836e6a51255da145a316 (diff) | |
download | grep-68d5d036cf6d4b02f3e6a2ced750231b523efae9.tar.gz |
grep: -x now supersedes -w more consistently
* NEWS, doc/grep.texi (Matching Control): Mention this.
* src/dfasearch.c (EGexecute):
* src/pcresearch.c (Pcompile):
Don't get confused by -w if -x is also present.
* src/pcresearch.c (Pcompile): Remove misleading comment about
non-UTF-8 multibyte locales, as PCRE doesn't support them.
Calculate buffer sizes more carefully; the old method
allocated a buffer slightly too big, seemingly due to luck.
* tests/backref-word, tests/pcre: Add tests for this bug.
Diffstat (limited to 'src')
-rw-r--r-- | src/dfasearch.c | 6 | ||||
-rw-r--r-- | src/pcresearch.c | 24 |
2 files changed, 16 insertions, 14 deletions
diff --git a/src/dfasearch.c b/src/dfasearch.c index a330eac0..e04a2dfd 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -363,14 +363,14 @@ EGexecute (char *buf, size_t size, size_t *match_size, len = end - ptr; goto assess_pattern_match; } - /* If -w, check if the match aligns with word boundaries. - We do this iteratively because: + /* If -w and not -x, check whether the match aligns with + word boundaries. Do this iteratively because: (a) the line may contain more than one occurrence of the pattern, and (b) Several alternatives in the pattern might be valid at a given point, and we may need to consider a shorter one to find a word boundary. */ - if (match_words) + if (!match_lines && match_words) while (match <= best_match) { regoff_t shorter_len = 0; diff --git a/src/pcresearch.c b/src/pcresearch.c index 1fae94d8..3fee67a3 100644 --- a/src/pcresearch.c +++ b/src/pcresearch.c @@ -98,7 +98,13 @@ Pcompile (char const *pattern, size_t size) #else int e; char const *ep; - char *re = xnmalloc (4, size + 7); + static char const wprefix[] = "(?<!\\w)(?:"; + static char const wsuffix[] = ")(?!\\w)"; + static char const xprefix[] = "^(?:"; + static char const xsuffix[] = ")$"; + int fix_len_max = MAX (sizeof wprefix - 1 + sizeof wsuffix - 1, + sizeof xprefix - 1 + sizeof xsuffix - 1); + char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4); int flags = (PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0)); char const *patlim = pattern + size; @@ -120,20 +126,16 @@ Pcompile (char const *pattern, size_t size) error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern")); *n = '\0'; - if (match_lines) - strcpy (n, "^(?:"); if (match_words) - strcpy (n, "(?<!\\w)(?:"); + strcpy (n, wprefix); + if (match_lines) + strcpy (n, xprefix); n += strlen (n); /* The PCRE interface doesn't allow NUL bytes in the pattern, so replace each NUL byte in the pattern with the four characters "\000", removing a preceding backslash if there are an odd - number of backslashes before the NUL. - - FIXME: This method does not work with some multibyte character - encodings, notably Shift-JIS, where a multibyte character can end - in a backslash byte. */ + number of backslashes before the NUL. */ for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1) { memcpy (n, p, pnul - p); @@ -149,9 +151,9 @@ Pcompile (char const *pattern, size_t size) n += patlim - p; *n = '\0'; if (match_words) - strcpy (n, ")(?!\\w)"); + strcpy (n, wsuffix); if (match_lines) - strcpy (n, ")$"); + strcpy (n, xsuffix); cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ()); if (!cre) |