summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2023-03-19 01:50:00 -0700
committerJim Meyering <meyering@meta.com>2023-03-19 08:43:01 -0700
commit99330c2b1dc8b619dff8a5a6a35f524d382508c8 (patch)
treedba7c6cf4aa7081208994e4ce5c5f2d4c36329de /src
parent373b4434ebc15f447ca6f96007ed6181c9a2a496 (diff)
downloadgrep-99330c2b1dc8b619dff8a5a6a35f524d382508c8.tar.gz
grep: forward port to PCRE2 10.43
* doc/grep.texi: Document this. * src/grep.c: Move recent changes into pcresearch.c. (P_MATCHER_INDEX): Remove. (pcre_pattern_expand_backslash_d): Move from here ... * src/pcresearch.c: ... to here. (PCRE2_EXTRA_ASCII_BSD): Default to 0. (Pcompile): Use PCRE2_EXTRA_ASCII_BSD if available, and expand \d to [0-9] otherwise.
Diffstat (limited to 'src')
-rw-r--r--src/grep.c82
-rw-r--r--src/pcresearch.c90
2 files changed, 88 insertions, 84 deletions
diff --git a/src/grep.c b/src/grep.c
index 6ba881ef..7547b641 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -2089,8 +2089,7 @@ static struct
#endif
};
/* Keep these in sync with the 'matchers' table. */
-enum { E_MATCHER_INDEX = 1, F_MATCHER_INDEX = 2, G_MATCHER_INDEX = 0,
- P_MATCHER_INDEX = 6 };
+enum { E_MATCHER_INDEX = 1, F_MATCHER_INDEX = 2, G_MATCHER_INDEX = 0 };
/* Return the index of the matcher corresponding to M if available.
MATCHER is the index of the previous matcher, or -1 if none.
@@ -2379,80 +2378,6 @@ fgrep_to_grep_pattern (char **keys_p, idx_t *len_p)
*len_p = p - new_keys;
}
-/* Replace each \d in *KEYS_P with [0-9], to ensure that \d matches only ASCII
- digits. Now that we enable PCRE2_UCP for pcre regexps, \d would otherwise
- match non-ASCII digits in some locales. Use \p{Nd} if you require to match
- those. */
-static void
-pcre_pattern_expand_backslash_d (char **keys_p, idx_t *len_p)
-{
- idx_t len = *len_p;
- char *keys = *keys_p;
- mbstate_t mb_state = { 0 };
- char *new_keys = xnmalloc (len / 2 + 1, 5);
- char *p = new_keys;
- bool prev_backslash = false;
-
- for (ptrdiff_t n; len; keys += n, len -= n)
- {
- n = mb_clen (keys, len, &mb_state);
- switch (n)
- {
- case -2:
- n = len;
- FALLTHROUGH;
- default:
- if (prev_backslash)
- {
- prev_backslash = false;
- *p++ = '\\';
- }
- p = mempcpy (p, keys, n);
- break;
-
- case -1:
- if (prev_backslash)
- {
- prev_backslash = false;
- *p++ = '\\';
- }
- memset (&mb_state, 0, sizeof mb_state);
- n = 1;
- FALLTHROUGH;
- case 1:
- if (prev_backslash)
- {
- prev_backslash = false;
- switch (*keys)
- {
- case 'd':
- p = mempcpy (p, "[0-9]", 5);
- break;
- default:
- *p++ = '\\';
- *p++ = *keys;
- break;
- }
- }
- else
- {
- if (*keys == '\\')
- prev_backslash = true;
- else
- *p++ = *keys;
- }
- break;
- }
- }
-
- if (prev_backslash)
- *p++ = '\\';
- *p = '\n';
- free (*keys_p);
- *keys_p = new_keys;
- *len_p = p - new_keys;
-}
-
/* If it is easy, convert the MATCHER-style patterns KEYS (of size
*LEN_P) to -F style, update *LEN_P to a possibly-smaller value, and
return F_MATCHER_INDEX. If not, leave KEYS and *LEN_P alone and
@@ -3045,11 +2970,6 @@ main (int argc, char **argv)
matcher = try_fgrep_pattern (matcher, keys, &keycc);
}
- /* If -P, replace each \d with [0-9].
- Those who want to match non-ASCII digits must use \p{Nd}. */
- if (matcher == P_MATCHER_INDEX)
- pcre_pattern_expand_backslash_d (&keys, &keycc);
-
execute = matchers[matcher].execute;
compiled_pattern =
matchers[matcher].compile (keys, keycc, matchers[matcher].syntax,
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 5b111bea..d3701816 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -35,6 +35,9 @@
# define PCRE2_ERROR_DEPTHLIMIT PCRE2_ERROR_RECURSIONLIMIT
# define pcre2_set_depth_limit pcre2_set_recursion_limit
#endif
+#ifndef PCRE2_EXTRA_ASCII_BSD
+# define PCRE2_EXTRA_ASCII_BSD 0
+#endif
struct pcre_comp
{
@@ -130,12 +133,89 @@ bad_utf8_from_pcre2 (int e)
#endif
}
+/* Replace each \d in *KEYS_P with [0-9], to ensure that \d matches only ASCII
+ digits. Now that we enable PCRE2_UCP for pcre regexps, \d would otherwise
+ match non-ASCII digits in some locales. Use \p{Nd} if you require to match
+ those. */
+static void
+pcre_pattern_expand_backslash_d (char **keys_p, idx_t *len_p)
+{
+ idx_t len = *len_p;
+ char *keys = *keys_p;
+ mbstate_t mb_state = { 0 };
+ char *new_keys = xnmalloc (len / 2 + 1, 5);
+ char *p = new_keys;
+ bool prev_backslash = false;
+
+ for (ptrdiff_t n; len; keys += n, len -= n)
+ {
+ n = mb_clen (keys, len, &mb_state);
+ switch (n)
+ {
+ case -2:
+ n = len;
+ FALLTHROUGH;
+ default:
+ if (prev_backslash)
+ {
+ prev_backslash = false;
+ *p++ = '\\';
+ }
+ p = mempcpy (p, keys, n);
+ break;
+
+ case -1:
+ if (prev_backslash)
+ {
+ prev_backslash = false;
+ *p++ = '\\';
+ }
+ memset (&mb_state, 0, sizeof mb_state);
+ n = 1;
+ FALLTHROUGH;
+ case 1:
+ if (prev_backslash)
+ {
+ prev_backslash = false;
+ switch (*keys)
+ {
+ case 'd':
+ p = mempcpy (p, "[0-9]", 5);
+ break;
+ default:
+ *p++ = '\\';
+ *p++ = *keys;
+ break;
+ }
+ }
+ else
+ {
+ if (*keys == '\\')
+ prev_backslash = true;
+ else
+ *p++ = *keys;
+ }
+ break;
+ }
+ }
+
+ if (prev_backslash)
+ *p++ = '\\';
+ *p = '\n';
+ free (*keys_p);
+ *keys_p = new_keys;
+ *len_p = p - new_keys;
+}
+
/* Compile the -P style PATTERN, containing SIZE bytes that are
followed by '\n'. Return a description of the compiled pattern. */
void *
Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
{
+ if (! PCRE2_EXTRA_ASCII_BSD)
+ pcre_pattern_expand_backslash_d (&pattern, &size);
+
PCRE2_SIZE e;
int ec;
int flags = PCRE2_DOLLAR_ENDONLY | (match_icase ? PCRE2_CASELESS : 0);
@@ -168,12 +248,16 @@ Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
if (rawmemchr (pattern, '\n') != patlim)
die (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
+#ifdef PCRE2_EXTRA_MATCH_LINE
+ uint32_t extra_options = (PCRE2_EXTRA_ASCII_BSD
+ | (match_lines ? PCRE2_EXTRA_MATCH_LINE : 0));
+ pcre2_set_compile_extra_options (ccontext, extra_options);
+#endif
+
void *re_storage = NULL;
if (match_lines)
{
-#ifdef PCRE2_EXTRA_MATCH_LINE
- pcre2_set_compile_extra_options (ccontext, PCRE2_EXTRA_MATCH_LINE);
-#else
+#ifndef PCRE2_EXTRA_MATCH_LINE
static char const /* These sizes omit trailing NUL. */
xprefix[4] = "^(?:", xsuffix[2] = ")$";
idx_t re_size = size + sizeof xprefix + sizeof xsuffix;