diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2023-03-19 01:50:00 -0700 |
---|---|---|
committer | Jim Meyering <meyering@meta.com> | 2023-03-19 08:43:01 -0700 |
commit | 99330c2b1dc8b619dff8a5a6a35f524d382508c8 (patch) | |
tree | dba7c6cf4aa7081208994e4ce5c5f2d4c36329de /src/grep.c | |
parent | 373b4434ebc15f447ca6f96007ed6181c9a2a496 (diff) | |
download | grep-99330c2b1dc8b619dff8a5a6a35f524d382508c8.tar.gz |
grep: forward port to PCRE2 10.43
* doc/grep.texi: Document this.
* src/grep.c: Move recent changes into pcresearch.c.
(P_MATCHER_INDEX): Remove.
(pcre_pattern_expand_backslash_d): Move from here ...
* src/pcresearch.c: ... to here.
(PCRE2_EXTRA_ASCII_BSD): Default to 0.
(Pcompile): Use PCRE2_EXTRA_ASCII_BSD if available,
and expand \d to [0-9] otherwise.
Diffstat (limited to 'src/grep.c')
-rw-r--r-- | src/grep.c | 82 |
1 files changed, 1 insertions, 81 deletions
@@ -2089,8 +2089,7 @@ static struct #endif }; /* Keep these in sync with the 'matchers' table. */ -enum { E_MATCHER_INDEX = 1, F_MATCHER_INDEX = 2, G_MATCHER_INDEX = 0, - P_MATCHER_INDEX = 6 }; +enum { E_MATCHER_INDEX = 1, F_MATCHER_INDEX = 2, G_MATCHER_INDEX = 0 }; /* Return the index of the matcher corresponding to M if available. MATCHER is the index of the previous matcher, or -1 if none. @@ -2379,80 +2378,6 @@ fgrep_to_grep_pattern (char **keys_p, idx_t *len_p) *len_p = p - new_keys; } -/* Replace each \d in *KEYS_P with [0-9], to ensure that \d matches only ASCII - digits. Now that we enable PCRE2_UCP for pcre regexps, \d would otherwise - match non-ASCII digits in some locales. Use \p{Nd} if you require to match - those. */ -static void -pcre_pattern_expand_backslash_d (char **keys_p, idx_t *len_p) -{ - idx_t len = *len_p; - char *keys = *keys_p; - mbstate_t mb_state = { 0 }; - char *new_keys = xnmalloc (len / 2 + 1, 5); - char *p = new_keys; - bool prev_backslash = false; - - for (ptrdiff_t n; len; keys += n, len -= n) - { - n = mb_clen (keys, len, &mb_state); - switch (n) - { - case -2: - n = len; - FALLTHROUGH; - default: - if (prev_backslash) - { - prev_backslash = false; - *p++ = '\\'; - } - p = mempcpy (p, keys, n); - break; - - case -1: - if (prev_backslash) - { - prev_backslash = false; - *p++ = '\\'; - } - memset (&mb_state, 0, sizeof mb_state); - n = 1; - FALLTHROUGH; - case 1: - if (prev_backslash) - { - prev_backslash = false; - switch (*keys) - { - case 'd': - p = mempcpy (p, "[0-9]", 5); - break; - default: - *p++ = '\\'; - *p++ = *keys; - break; - } - } - else - { - if (*keys == '\\') - prev_backslash = true; - else - *p++ = *keys; - } - break; - } - } - - if (prev_backslash) - *p++ = '\\'; - *p = '\n'; - free (*keys_p); - *keys_p = new_keys; - *len_p = p - new_keys; -} - /* If it is easy, convert the MATCHER-style patterns KEYS (of size *LEN_P) to -F style, update *LEN_P to a possibly-smaller value, and return F_MATCHER_INDEX. If not, leave KEYS and *LEN_P alone and @@ -3045,11 +2970,6 @@ main (int argc, char **argv) matcher = try_fgrep_pattern (matcher, keys, &keycc); } - /* If -P, replace each \d with [0-9]. - Those who want to match non-ASCII digits must use \p{Nd}. */ - if (matcher == P_MATCHER_INDEX) - pcre_pattern_expand_backslash_d (&keys, &keycc); - execute = matchers[matcher].execute; compiled_pattern = matchers[matcher].compile (keys, keycc, matchers[matcher].syntax, |