summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarco Trevisan (Treviño) <mail@3v1n0.net>2022-09-06 14:49:10 +0200
committerMarco Trevisan (Treviño) <mail@3v1n0.net>2022-09-12 13:55:39 +0200
commitd639c4ec009537b743dcd2209184638d9f5d68b9 (patch)
tree691078ecccc8ad128f627c7b88d14002aadade57
parentd4966911e6b35d8923bc6cd58e7cb8a1b0e09d4a (diff)
downloadglib-d639c4ec009537b743dcd2209184638d9f5d68b9.tar.gz
regex: Do not mix PCRE2 Compile, Match, Newline and BSR flags
As per the PCRE2 port we still used to try to map the old GRegex flags (PCRE1 based) with the new PCRE2 ones, but doing that we were also mixing flags with enums, leading to unexpected behaviors when trying to get new line and BSR options out of bigger flags arrays. So, avoid doing any mapping and store the values as native PCRE2 flags internally and converting them back only when requested. This fixes some regressions on newline handling. Fixes: #2729 Fixes: #2688 Fixes: GNOME/gtksourceview#278
-rw-r--r--glib/gregex.c637
-rw-r--r--glib/tests/regex.c18
2 files changed, 341 insertions, 314 deletions
diff --git a/glib/gregex.c b/glib/gregex.c
index 08c43ef4b..2eb9b858e 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -3,6 +3,7 @@
* Copyright (C) 1999, 2000 Scott Wimer
* Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com>
* Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org>
+ * Copyright (C) 2022, Marco Trevisan <marco.trevisan@canonical.com>
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*
@@ -110,62 +111,105 @@
* library written by Philip Hazel.
*/
-/* Signifies that flags have already been converted from pcre1 to pcre2. The
- * value 0x04000000u is also the value of PCRE2_MATCH_INVALID_UTF in pcre2.h,
- * but it is not used in gregex, so we can reuse it for this flag.
- */
-#define G_REGEX_FLAGS_CONVERTED 0x04000000u
+#define G_REGEX_PCRE_GENERIC_MASK (PCRE2_ANCHORED | \
+ PCRE2_NO_UTF_CHECK | \
+ PCRE2_ENDANCHORED)
+
/* Mask of all the possible values for GRegexCompileFlags. */
-#define G_REGEX_COMPILE_MASK (PCRE2_CASELESS | \
- PCRE2_MULTILINE | \
- PCRE2_DOTALL | \
- PCRE2_EXTENDED | \
- PCRE2_ANCHORED | \
- PCRE2_DOLLAR_ENDONLY | \
- PCRE2_UNGREEDY | \
- PCRE2_UTF | \
- PCRE2_NO_AUTO_CAPTURE | \
- PCRE2_FIRSTLINE | \
- PCRE2_DUPNAMES | \
- PCRE2_NEWLINE_CR | \
- PCRE2_NEWLINE_LF | \
- PCRE2_NEWLINE_CRLF | \
- PCRE2_NEWLINE_ANYCRLF | \
- PCRE2_BSR_ANYCRLF | \
- G_REGEX_FLAGS_CONVERTED)
-
-/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
-#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
-#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF | \
- G_REGEX_FLAGS_CONVERTED)
+#define G_REGEX_COMPILE_MASK (G_REGEX_DEFAULT | \
+ G_REGEX_CASELESS | \
+ G_REGEX_MULTILINE | \
+ G_REGEX_DOTALL | \
+ G_REGEX_EXTENDED | \
+ G_REGEX_ANCHORED | \
+ G_REGEX_DOLLAR_ENDONLY | \
+ G_REGEX_UNGREEDY | \
+ G_REGEX_RAW | \
+ G_REGEX_NO_AUTO_CAPTURE | \
+ G_REGEX_OPTIMIZE | \
+ G_REGEX_FIRSTLINE | \
+ G_REGEX_DUPNAMES | \
+ G_REGEX_NEWLINE_CR | \
+ G_REGEX_NEWLINE_LF | \
+ G_REGEX_NEWLINE_CRLF | \
+ G_REGEX_NEWLINE_ANYCRLF | \
+ G_REGEX_BSR_ANYCRLF)
+
+#define G_REGEX_PCRE2_COMPILE_MASK (PCRE2_ALLOW_EMPTY_CLASS | \
+ PCRE2_ALT_BSUX | \
+ PCRE2_AUTO_CALLOUT | \
+ PCRE2_CASELESS | \
+ PCRE2_DOLLAR_ENDONLY | \
+ PCRE2_DOTALL | \
+ PCRE2_DUPNAMES | \
+ PCRE2_EXTENDED | \
+ PCRE2_FIRSTLINE | \
+ PCRE2_MATCH_UNSET_BACKREF | \
+ PCRE2_MULTILINE | \
+ PCRE2_NEVER_UCP | \
+ PCRE2_NEVER_UTF | \
+ PCRE2_NO_AUTO_CAPTURE | \
+ PCRE2_NO_AUTO_POSSESS | \
+ PCRE2_NO_DOTSTAR_ANCHOR | \
+ PCRE2_NO_START_OPTIMIZE | \
+ PCRE2_UCP | \
+ PCRE2_UNGREEDY | \
+ PCRE2_UTF | \
+ PCRE2_NEVER_BACKSLASH_C | \
+ PCRE2_ALT_CIRCUMFLEX | \
+ PCRE2_ALT_VERBNAMES | \
+ PCRE2_USE_OFFSET_LIMIT | \
+ PCRE2_EXTENDED_MORE | \
+ PCRE2_LITERAL | \
+ PCRE2_MATCH_INVALID_UTF | \
+ G_REGEX_PCRE_GENERIC_MASK)
+
+#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF)
/* Mask of all the possible values for GRegexMatchFlags. */
-#define G_REGEX_MATCH_MASK (PCRE2_ANCHORED | \
- PCRE2_NOTBOL | \
- PCRE2_NOTEOL | \
- PCRE2_NOTEMPTY | \
- PCRE2_NEWLINE_CR | \
- PCRE2_NEWLINE_LF | \
- PCRE2_NEWLINE_CRLF | \
- PCRE2_NEWLINE_ANY | \
- PCRE2_NEWLINE_ANYCRLF | \
- PCRE2_BSR_ANYCRLF | \
- PCRE2_BSR_UNICODE | \
- PCRE2_PARTIAL_SOFT | \
- PCRE2_PARTIAL_HARD | \
- PCRE2_NOTEMPTY_ATSTART | \
- G_REGEX_FLAGS_CONVERTED)
-
+#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_DEFAULT | \
+ G_REGEX_MATCH_ANCHORED | \
+ G_REGEX_MATCH_NOTBOL | \
+ G_REGEX_MATCH_NOTEOL | \
+ G_REGEX_MATCH_NOTEMPTY | \
+ G_REGEX_MATCH_PARTIAL | \
+ G_REGEX_MATCH_NEWLINE_CR | \
+ G_REGEX_MATCH_NEWLINE_LF | \
+ G_REGEX_MATCH_NEWLINE_CRLF | \
+ G_REGEX_MATCH_NEWLINE_ANY | \
+ G_REGEX_MATCH_NEWLINE_ANYCRLF | \
+ G_REGEX_MATCH_BSR_ANYCRLF | \
+ G_REGEX_MATCH_BSR_ANY | \
+ G_REGEX_MATCH_PARTIAL_SOFT | \
+ G_REGEX_MATCH_PARTIAL_HARD | \
+ G_REGEX_MATCH_NOTEMPTY_ATSTART)
+
+#define G_REGEX_PCRE2_MATCH_MASK (PCRE2_NOTBOL |\
+ PCRE2_NOTEOL |\
+ PCRE2_NOTEMPTY |\
+ PCRE2_NOTEMPTY_ATSTART |\
+ PCRE2_PARTIAL_SOFT |\
+ PCRE2_PARTIAL_HARD |\
+ PCRE2_NO_JIT |\
+ PCRE2_COPY_MATCHED_SUBJECT |\
+ G_REGEX_PCRE_GENERIC_MASK)
+
+/* TODO: Support PCRE2_NEWLINE_NUL */
#define G_REGEX_NEWLINE_MASK (PCRE2_NEWLINE_CR | \
PCRE2_NEWLINE_LF | \
PCRE2_NEWLINE_CRLF | \
PCRE2_NEWLINE_ANYCRLF)
-#define G_REGEX_MATCH_NEWLINE_MASK (PCRE2_NEWLINE_CR | \
- PCRE2_NEWLINE_LF | \
- PCRE2_NEWLINE_CRLF | \
- PCRE2_NEWLINE_ANYCRLF | \
- PCRE2_NEWLINE_ANY)
+#define G_REGEX_COMPILE_NEWLINE_MASK (G_REGEX_NEWLINE_CR | \
+ G_REGEX_NEWLINE_LF | \
+ G_REGEX_NEWLINE_CRLF | \
+ G_REGEX_NEWLINE_ANYCRLF)
+
+#define G_REGEX_MATCH_NEWLINE_MASK (G_REGEX_MATCH_NEWLINE_CR | \
+ G_REGEX_MATCH_NEWLINE_LF | \
+ G_REGEX_MATCH_NEWLINE_CRLF | \
+ G_REGEX_MATCH_NEWLINE_ANY | \
+ G_REGEX_MATCH_NEWLINE_ANYCRLF)
/* if the string is in UTF-8 use g_utf8_ functions, else use
* use just +/- 1. */
@@ -180,7 +224,7 @@ struct _GMatchInfo
{
gint ref_count; /* the ref count (atomic) */
GRegex *regex; /* the regex */
- GRegexMatchFlags match_opts; /* options used at match time on the regex */
+ uint32_t match_opts; /* pcre match options used at match time on the regex */
gint matches; /* number of matching sub patterns, guaranteed to be <= (n_subpatterns + 1) if doing a single match (rather than matching all) */
gint n_subpatterns; /* total number of sub patterns in the regex */
gint pos; /* position in the string where last match left off */
@@ -206,9 +250,10 @@ struct _GRegex
gint ref_count; /* the ref count for the immutable part (atomic) */
gchar *pattern; /* the pattern */
pcre2_code *pcre_re; /* compiled form of the pattern */
- GRegexCompileFlags compile_opts; /* options used at compile time on the pattern, pcre2 values */
+ uint32_t compile_opts; /* options used at compile time on the pattern, pcre2 values */
GRegexCompileFlags orig_compile_opts; /* options used at compile time on the pattern, gregex values */
- GRegexMatchFlags match_opts; /* options used at match time on the regex */
+ uint32_t match_opts; /* pcre2 options used at match time on the regex */
+ GRegexMatchFlags orig_match_opts; /* options used as default match options, gregex values */
gint jit_options; /* options which were enabled for jit compiler */
JITStatus jit_status; /* indicates the status of jit compiler for this compiled regex */
};
@@ -225,197 +270,182 @@ static GList *split_replacement (const gchar *replacement,
GError **error);
static void free_interpolation_data (InterpolationData *data);
-static gint
-map_to_pcre2_compile_flags (gint pcre1_flags)
+static uint32_t
+get_pcre2_compile_options (GRegexCompileFlags compile_flags)
{
- /* Maps compile flags from pcre1 to pcre2 values
- */
- gint pcre2_flags = G_REGEX_FLAGS_CONVERTED;
-
- if (pcre1_flags & G_REGEX_FLAGS_CONVERTED)
- return pcre1_flags;
+ /* Maps compile flags to pcre2 values */
+ uint32_t pcre2_flags = 0;
- if (pcre1_flags & G_REGEX_CASELESS)
+ if (compile_flags & G_REGEX_CASELESS)
pcre2_flags |= PCRE2_CASELESS;
- if (pcre1_flags & G_REGEX_MULTILINE)
+ if (compile_flags & G_REGEX_MULTILINE)
pcre2_flags |= PCRE2_MULTILINE;
- if (pcre1_flags & G_REGEX_DOTALL)
+ if (compile_flags & G_REGEX_DOTALL)
pcre2_flags |= PCRE2_DOTALL;
- if (pcre1_flags & G_REGEX_EXTENDED)
+ if (compile_flags & G_REGEX_EXTENDED)
pcre2_flags |= PCRE2_EXTENDED;
- if (pcre1_flags & G_REGEX_ANCHORED)
+ if (compile_flags & G_REGEX_ANCHORED)
pcre2_flags |= PCRE2_ANCHORED;
- if (pcre1_flags & G_REGEX_DOLLAR_ENDONLY)
+ if (compile_flags & G_REGEX_DOLLAR_ENDONLY)
pcre2_flags |= PCRE2_DOLLAR_ENDONLY;
- if (pcre1_flags & G_REGEX_UNGREEDY)
+ if (compile_flags & G_REGEX_UNGREEDY)
pcre2_flags |= PCRE2_UNGREEDY;
- if (!(pcre1_flags & G_REGEX_RAW))
+ if (!(compile_flags & G_REGEX_RAW))
pcre2_flags |= PCRE2_UTF;
- if (pcre1_flags & G_REGEX_NO_AUTO_CAPTURE)
+ if (compile_flags & G_REGEX_NO_AUTO_CAPTURE)
pcre2_flags |= PCRE2_NO_AUTO_CAPTURE;
- if (pcre1_flags & G_REGEX_FIRSTLINE)
+ if (compile_flags & G_REGEX_FIRSTLINE)
pcre2_flags |= PCRE2_FIRSTLINE;
- if (pcre1_flags & G_REGEX_DUPNAMES)
+ if (compile_flags & G_REGEX_DUPNAMES)
pcre2_flags |= PCRE2_DUPNAMES;
- if (pcre1_flags & G_REGEX_NEWLINE_CR)
- pcre2_flags |= PCRE2_NEWLINE_CR;
- if (pcre1_flags & G_REGEX_NEWLINE_LF)
- pcre2_flags |= PCRE2_NEWLINE_LF;
- /* Check for exact match for a composite flag */
- if ((pcre1_flags & G_REGEX_NEWLINE_CRLF) == G_REGEX_NEWLINE_CRLF)
- pcre2_flags |= PCRE2_NEWLINE_CRLF;
- /* Check for exact match for a composite flag */
- if ((pcre1_flags & G_REGEX_NEWLINE_ANYCRLF) == G_REGEX_NEWLINE_ANYCRLF)
- pcre2_flags |= PCRE2_NEWLINE_ANYCRLF;
- if (pcre1_flags & G_REGEX_BSR_ANYCRLF)
- pcre2_flags |= PCRE2_BSR_ANYCRLF;
-
- /* these are not available in pcre2, but we use G_REGEX_OPTIMIZE as a special
- * case to request JIT compilation */
- if (pcre1_flags & G_REGEX_OPTIMIZE)
- pcre2_flags |= 0;
-G_GNUC_BEGIN_IGNORE_DEPRECATIONS
- if (pcre1_flags & G_REGEX_JAVASCRIPT_COMPAT)
- pcre2_flags |= 0;
-G_GNUC_END_IGNORE_DEPRECATIONS
-
- return pcre2_flags;
+
+ return pcre2_flags & G_REGEX_PCRE2_COMPILE_MASK;
}
-static gint
-map_to_pcre2_match_flags (gint pcre1_flags)
+static uint32_t
+get_pcre2_match_options (GRegexMatchFlags match_flags,
+ GRegexCompileFlags compile_flags)
{
- /* Maps match flags from pcre1 to pcre2 values
- */
- gint pcre2_flags = G_REGEX_FLAGS_CONVERTED;
-
- if (pcre1_flags & G_REGEX_FLAGS_CONVERTED)
- return pcre1_flags;
+ /* Maps match flags to pcre2 values */
+ uint32_t pcre2_flags = 0;
- if (pcre1_flags & G_REGEX_MATCH_ANCHORED)
+ if (match_flags & G_REGEX_MATCH_ANCHORED)
pcre2_flags |= PCRE2_ANCHORED;
- if (pcre1_flags & G_REGEX_MATCH_NOTBOL)
+ if (match_flags & G_REGEX_MATCH_NOTBOL)
pcre2_flags |= PCRE2_NOTBOL;
- if (pcre1_flags & G_REGEX_MATCH_NOTEOL)
+ if (match_flags & G_REGEX_MATCH_NOTEOL)
pcre2_flags |= PCRE2_NOTEOL;
- if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY)
+ if (match_flags & G_REGEX_MATCH_NOTEMPTY)
pcre2_flags |= PCRE2_NOTEMPTY;
- if (pcre1_flags & G_REGEX_MATCH_NEWLINE_CR)
- pcre2_flags |= PCRE2_NEWLINE_CR;
- if (pcre1_flags & G_REGEX_MATCH_NEWLINE_LF)
- pcre2_flags |= PCRE2_NEWLINE_LF;
- /* Check for exact match for a composite flag */
- if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_CRLF) == G_REGEX_MATCH_NEWLINE_CRLF)
- pcre2_flags |= PCRE2_NEWLINE_CRLF;
- if (pcre1_flags & G_REGEX_MATCH_NEWLINE_ANY)
- pcre2_flags |= PCRE2_NEWLINE_ANY;
- /* Check for exact match for a composite flag */
- if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_ANYCRLF) == G_REGEX_MATCH_NEWLINE_ANYCRLF)
- pcre2_flags |= PCRE2_NEWLINE_ANYCRLF;
- if (pcre1_flags & G_REGEX_MATCH_BSR_ANYCRLF)
- pcre2_flags |= PCRE2_BSR_ANYCRLF;
- if (pcre1_flags & G_REGEX_MATCH_BSR_ANY)
- pcre2_flags |= PCRE2_BSR_UNICODE;
- if (pcre1_flags & G_REGEX_MATCH_PARTIAL_SOFT)
+ if (match_flags & G_REGEX_MATCH_PARTIAL_SOFT)
pcre2_flags |= PCRE2_PARTIAL_SOFT;
- if (pcre1_flags & G_REGEX_MATCH_PARTIAL_HARD)
+ if (match_flags & G_REGEX_MATCH_PARTIAL_HARD)
pcre2_flags |= PCRE2_PARTIAL_HARD;
- if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART)
+ if (match_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART)
pcre2_flags |= PCRE2_NOTEMPTY_ATSTART;
- return pcre2_flags;
+ if (compile_flags & G_REGEX_RAW)
+ pcre2_flags |= PCRE2_NO_UTF_CHECK;
+
+ return pcre2_flags & G_REGEX_PCRE2_MATCH_MASK;
}
-static gint
-map_to_pcre1_compile_flags (gint pcre2_flags)
+static GRegexCompileFlags
+g_regex_compile_flags_from_pcre2 (uint32_t pcre2_flags)
{
- /* Maps compile flags from pcre2 to pcre1 values
- */
- gint pcre1_flags = 0;
-
- if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED))
- return pcre2_flags;
+ GRegexCompileFlags compile_flags = G_REGEX_DEFAULT;
if (pcre2_flags & PCRE2_CASELESS)
- pcre1_flags |= G_REGEX_CASELESS;
+ compile_flags |= G_REGEX_CASELESS;
if (pcre2_flags & PCRE2_MULTILINE)
- pcre1_flags |= G_REGEX_MULTILINE;
+ compile_flags |= G_REGEX_MULTILINE;
if (pcre2_flags & PCRE2_DOTALL)
- pcre1_flags |= G_REGEX_DOTALL;
+ compile_flags |= G_REGEX_DOTALL;
if (pcre2_flags & PCRE2_EXTENDED)
- pcre1_flags |= G_REGEX_EXTENDED;
+ compile_flags |= G_REGEX_EXTENDED;
if (pcre2_flags & PCRE2_ANCHORED)
- pcre1_flags |= G_REGEX_ANCHORED;
+ compile_flags |= G_REGEX_ANCHORED;
if (pcre2_flags & PCRE2_DOLLAR_ENDONLY)
- pcre1_flags |= G_REGEX_DOLLAR_ENDONLY;
+ compile_flags |= G_REGEX_DOLLAR_ENDONLY;
if (pcre2_flags & PCRE2_UNGREEDY)
- pcre1_flags |= G_REGEX_UNGREEDY;
+ compile_flags |= G_REGEX_UNGREEDY;
if (!(pcre2_flags & PCRE2_UTF))
- pcre1_flags |= G_REGEX_RAW;
+ compile_flags |= G_REGEX_RAW;
if (pcre2_flags & PCRE2_NO_AUTO_CAPTURE)
- pcre1_flags |= G_REGEX_NO_AUTO_CAPTURE;
+ compile_flags |= G_REGEX_NO_AUTO_CAPTURE;
if (pcre2_flags & PCRE2_FIRSTLINE)
- pcre1_flags |= G_REGEX_FIRSTLINE;
+ compile_flags |= G_REGEX_FIRSTLINE;
if (pcre2_flags & PCRE2_DUPNAMES)
- pcre1_flags |= G_REGEX_DUPNAMES;
- if (pcre2_flags & PCRE2_NEWLINE_CR)
- pcre1_flags |= G_REGEX_NEWLINE_CR;
- if (pcre2_flags & PCRE2_NEWLINE_LF)
- pcre1_flags |= G_REGEX_NEWLINE_LF;
- /* Check for exact match for a composite flag */
- if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF)
- pcre1_flags |= G_REGEX_NEWLINE_CRLF;
- /* Check for exact match for a composite flag */
- if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF)
- pcre1_flags |= G_REGEX_NEWLINE_ANYCRLF;
- if (pcre2_flags & PCRE2_BSR_ANYCRLF)
- pcre1_flags |= G_REGEX_BSR_ANYCRLF;
-
- return pcre1_flags;
+ compile_flags |= G_REGEX_DUPNAMES;
+
+ return compile_flags & G_REGEX_COMPILE_MASK;
}
-static gint
-map_to_pcre1_match_flags (gint pcre2_flags)
+static GRegexMatchFlags
+g_regex_match_flags_from_pcre2 (uint32_t pcre2_flags)
{
- /* Maps match flags from pcre2 to pcre1 values
- */
- gint pcre1_flags = 0;
-
- if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED))
- return pcre2_flags;
+ GRegexMatchFlags match_flags = G_REGEX_MATCH_DEFAULT;
if (pcre2_flags & PCRE2_ANCHORED)
- pcre1_flags |= G_REGEX_MATCH_ANCHORED;
+ match_flags |= G_REGEX_MATCH_ANCHORED;
if (pcre2_flags & PCRE2_NOTBOL)
- pcre1_flags |= G_REGEX_MATCH_NOTBOL;
+ match_flags |= G_REGEX_MATCH_NOTBOL;
if (pcre2_flags & PCRE2_NOTEOL)
- pcre1_flags |= G_REGEX_MATCH_NOTEOL;
+ match_flags |= G_REGEX_MATCH_NOTEOL;
if (pcre2_flags & PCRE2_NOTEMPTY)
- pcre1_flags |= G_REGEX_MATCH_NOTEMPTY;
- if (pcre2_flags & PCRE2_NEWLINE_CR)
- pcre1_flags |= G_REGEX_MATCH_NEWLINE_CR;
- if (pcre2_flags & PCRE2_NEWLINE_LF)
- pcre1_flags |= G_REGEX_MATCH_NEWLINE_LF;
- /* Check for exact match for a composite flag */
- if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF)
- pcre1_flags |= G_REGEX_MATCH_NEWLINE_CRLF;
- if (pcre2_flags & PCRE2_NEWLINE_ANY)
- pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANY;
- /* Check for exact match for a composite flag */
- if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF)
- pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANYCRLF;
- if (pcre2_flags & PCRE2_BSR_ANYCRLF)
- pcre1_flags |= G_REGEX_MATCH_BSR_ANYCRLF;
- if (pcre2_flags & PCRE2_BSR_UNICODE)
- pcre1_flags |= G_REGEX_MATCH_BSR_ANY;
+ match_flags |= G_REGEX_MATCH_NOTEMPTY;
if (pcre2_flags & PCRE2_PARTIAL_SOFT)
- pcre1_flags |= G_REGEX_MATCH_PARTIAL_SOFT;
+ match_flags |= G_REGEX_MATCH_PARTIAL_SOFT;
if (pcre2_flags & PCRE2_PARTIAL_HARD)
- pcre1_flags |= G_REGEX_MATCH_PARTIAL_HARD;
+ match_flags |= G_REGEX_MATCH_PARTIAL_HARD;
if (pcre2_flags & PCRE2_NOTEMPTY_ATSTART)
- pcre1_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART;
+ match_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART;
+
+ return (match_flags & G_REGEX_MATCH_MASK);
+}
+
+static uint32_t
+get_pcre2_newline_compile_options (GRegexCompileFlags compile_flags)
+{
+ compile_flags &= G_REGEX_COMPILE_NEWLINE_MASK;
+
+ switch (compile_flags)
+ {
+ case G_REGEX_NEWLINE_CR:
+ return PCRE2_NEWLINE_CR;
+ case G_REGEX_NEWLINE_LF:
+ return PCRE2_NEWLINE_LF;
+ case G_REGEX_NEWLINE_CRLF:
+ return PCRE2_NEWLINE_CRLF;
+ case G_REGEX_NEWLINE_ANYCRLF:
+ return PCRE2_NEWLINE_ANYCRLF;
+ default:
+ if (compile_flags != 0)
+ return 0;
+
+ return PCRE2_NEWLINE_ANY;
+ }
+}
+
+static uint32_t
+get_pcre2_newline_match_options (GRegexMatchFlags match_flags)
+{
+ switch (match_flags & G_REGEX_MATCH_NEWLINE_MASK)
+ {
+ case G_REGEX_MATCH_NEWLINE_CR:
+ return PCRE2_NEWLINE_CR;
+ case G_REGEX_MATCH_NEWLINE_LF:
+ return PCRE2_NEWLINE_LF;
+ case G_REGEX_MATCH_NEWLINE_CRLF:
+ return PCRE2_NEWLINE_CRLF;
+ case G_REGEX_MATCH_NEWLINE_ANY:
+ return PCRE2_NEWLINE_ANY;
+ case G_REGEX_MATCH_NEWLINE_ANYCRLF:
+ return PCRE2_NEWLINE_ANYCRLF;
+ default:
+ return 0;
+ }
+}
+
+static uint32_t
+get_pcre2_bsr_compile_options (GRegexCompileFlags compile_flags)
+{
+ if (compile_flags & G_REGEX_BSR_ANYCRLF)
+ return PCRE2_BSR_ANYCRLF;
- return pcre1_flags;
+ return PCRE2_BSR_UNICODE;
+}
+
+static uint32_t
+get_pcre2_bsr_match_options (GRegexMatchFlags match_flags)
+{
+ if (match_flags & G_REGEX_MATCH_BSR_ANYCRLF)
+ return PCRE2_BSR_ANYCRLF;
+
+ if (match_flags & G_REGEX_MATCH_BSR_ANY)
+ return PCRE2_BSR_UNICODE;
+
+ return 0;
}
static const gchar *
@@ -744,12 +774,12 @@ translate_compile_error (gint *errcode, const gchar **errmsg)
/* GMatchInfo */
static GMatchInfo *
-match_info_new (const GRegex *regex,
- const gchar *string,
- gint string_len,
- gint start_position,
- gint match_options,
- gboolean is_dfa)
+match_info_new (const GRegex *regex,
+ const gchar *string,
+ gint string_len,
+ gint start_position,
+ GRegexMatchFlags match_options,
+ gboolean is_dfa)
{
GMatchInfo *match_info;
@@ -763,7 +793,8 @@ match_info_new (const GRegex *regex,
match_info->string_len = string_len;
match_info->matches = PCRE2_ERROR_NOMATCH;
match_info->pos = start_position;
- match_info->match_opts = match_options;
+ match_info->match_opts =
+ get_pcre2_match_options (match_options, regex->orig_compile_opts);
pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT,
&match_info->n_subpatterns);
@@ -824,8 +855,8 @@ recalc_match_offsets (GMatchInfo *match_info,
}
static void
-enable_jit_with_match_options (GRegex *regex,
- GRegexMatchFlags match_options)
+enable_jit_with_match_options (GRegex *regex,
+ uint32_t match_options)
{
gint old_jit_options, new_jit_options, retval;
@@ -1011,7 +1042,7 @@ g_match_info_next (GMatchInfo *match_info,
return FALSE;
}
- opts = map_to_pcre2_match_flags (match_info->regex->match_opts | match_info->match_opts);
+ opts = match_info->regex->match_opts | match_info->match_opts;
enable_jit_with_match_options (match_info->regex, opts);
if (match_info->regex->jit_status == JIT_STATUS_ENABLED)
@@ -1020,7 +1051,7 @@ g_match_info_next (GMatchInfo *match_info,
(PCRE2_SPTR8) match_info->string,
match_info->string_len,
match_info->pos,
- opts & ~G_REGEX_FLAGS_CONVERTED,
+ opts,
match_info->match_data,
match_info->match_context);
}
@@ -1030,7 +1061,7 @@ g_match_info_next (GMatchInfo *match_info,
(PCRE2_SPTR8) match_info->string,
match_info->string_len,
match_info->pos,
- opts & ~G_REGEX_FLAGS_CONVERTED,
+ opts,
match_info->match_data,
match_info->match_context);
}
@@ -1565,14 +1596,14 @@ g_regex_unref (GRegex *regex)
}
}
-/*
- * @match_options: (inout) (optional):
- */
-static pcre2_code *regex_compile (const gchar *pattern,
- GRegexCompileFlags compile_options,
- GRegexCompileFlags *compile_options_out,
- GRegexMatchFlags *match_options,
- GError **error);
+static pcre2_code * regex_compile (const gchar *pattern,
+ uint32_t compile_options,
+ uint32_t newline_options,
+ uint32_t bsr_options,
+ GError **error);
+
+static uint32_t get_pcre2_inline_compile_options (pcre2_code *re,
+ uint32_t compile_options);
/**
* g_regex_new:
@@ -1598,11 +1629,10 @@ g_regex_new (const gchar *pattern,
GRegex *regex;
pcre2_code *re;
static gsize initialised = 0;
- GRegexCompileFlags orig_compile_opts;
-
- orig_compile_opts = compile_options;
- compile_options = map_to_pcre2_compile_flags (compile_options);
- match_options = map_to_pcre2_match_flags (match_options);
+ uint32_t pcre_compile_options;
+ uint32_t pcre_match_options;
+ uint32_t newline_options;
+ uint32_t bsr_options;
g_return_val_if_fail (pattern != NULL, NULL);
g_return_val_if_fail (error == NULL || *error == NULL, NULL);
@@ -1620,113 +1650,97 @@ g_regex_new (const gchar *pattern,
g_once_init_leave (&initialised, supports_utf8 ? 1 : 2);
}
- if (G_UNLIKELY (initialised != 1))
+ if (G_UNLIKELY (initialised != 1))
{
g_set_error_literal (error, G_REGEX_ERROR, G_REGEX_ERROR_COMPILE,
_("PCRE library is compiled with incompatible options"));
return NULL;
}
- switch (compile_options & G_REGEX_NEWLINE_MASK)
+ pcre_compile_options = get_pcre2_compile_options (compile_options);
+ pcre_match_options = get_pcre2_match_options (match_options, compile_options);
+
+ newline_options = get_pcre2_newline_match_options (match_options);
+ if (newline_options == 0)
+ newline_options = get_pcre2_newline_compile_options (compile_options);
+
+ if (newline_options == 0)
{
- case 0: /* PCRE2_NEWLINE_ANY */
- case PCRE2_NEWLINE_CR:
- case PCRE2_NEWLINE_LF:
- case PCRE2_NEWLINE_CRLF:
- case PCRE2_NEWLINE_ANYCRLF:
- break;
- default:
g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS,
"Invalid newline flags");
return NULL;
}
- re = regex_compile (pattern, compile_options, &compile_options,
- &match_options, error);
+ bsr_options = get_pcre2_bsr_match_options (match_options);
+ if (!bsr_options)
+ bsr_options = get_pcre2_bsr_compile_options (compile_options);
+
+ re = regex_compile (pattern, pcre_compile_options,
+ newline_options, bsr_options, error);
if (re == NULL)
return NULL;
+ pcre_compile_options |=
+ get_pcre2_inline_compile_options (re, pcre_compile_options);
+
regex = g_new0 (GRegex, 1);
regex->ref_count = 1;
regex->pattern = g_strdup (pattern);
regex->pcre_re = re;
- regex->compile_opts = compile_options;
- regex->orig_compile_opts = orig_compile_opts;
- regex->match_opts = match_options;
+ regex->compile_opts = pcre_compile_options;
+ regex->orig_compile_opts = compile_options;
+ regex->match_opts = pcre_match_options;
+ regex->orig_match_opts = match_options;
enable_jit_with_match_options (regex, regex->match_opts);
return regex;
}
-static gint
-extract_newline_options (const GRegexCompileFlags compile_options,
- const GRegexMatchFlags *match_options)
-{
- gint newline_options = PCRE2_NEWLINE_ANY;
-
- if (compile_options & G_REGEX_NEWLINE_MASK)
- newline_options = compile_options & G_REGEX_NEWLINE_MASK;
- if (match_options && *match_options & G_REGEX_MATCH_NEWLINE_MASK)
- newline_options = *match_options & G_REGEX_MATCH_NEWLINE_MASK;
-
- return newline_options;
-}
-
-static gint
-extract_bsr_options (const GRegexCompileFlags compile_options,
- const GRegexMatchFlags *match_options)
-{
- gint bsr_options = PCRE2_BSR_UNICODE;
-
- if (compile_options & PCRE2_BSR_ANYCRLF)
- bsr_options = PCRE2_BSR_ANYCRLF;
- if (match_options && *match_options & PCRE2_BSR_ANYCRLF)
- bsr_options = PCRE2_BSR_ANYCRLF;
- if (match_options && *match_options & PCRE2_BSR_UNICODE)
- bsr_options = PCRE2_BSR_UNICODE;
-
- return bsr_options;
-}
-
static pcre2_code *
-regex_compile (const gchar *pattern,
- GRegexCompileFlags compile_options,
- GRegexCompileFlags *compile_options_out,
- GRegexMatchFlags *match_options,
- GError **error)
+regex_compile (const gchar *pattern,
+ uint32_t compile_options,
+ uint32_t newline_options,
+ uint32_t bsr_options,
+ GError **error)
{
pcre2_code *re;
pcre2_compile_context *context;
const gchar *errmsg;
PCRE2_SIZE erroffset;
gint errcode;
- GRegexCompileFlags nonpcre_compile_options;
- uint32_t pcre_compile_options;
-
- nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK;
context = pcre2_compile_context_create (NULL);
/* set newline options */
- pcre2_set_newline (context, extract_newline_options (compile_options, match_options));
+ if (pcre2_set_newline (context, newline_options) != 0)
+ {
+ g_set_error (error, G_REGEX_ERROR,
+ G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS,
+ "Invalid newline flags");
+ pcre2_compile_context_free (context);
+ return NULL;
+ }
/* set bsr options */
- pcre2_set_bsr (context, extract_bsr_options (compile_options, match_options));
+ if (pcre2_set_bsr (context, bsr_options) != 0)
+ {
+ g_set_error (error, G_REGEX_ERROR,
+ G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS,
+ "Invalid BSR flags");
+ pcre2_compile_context_free (context);
+ return NULL;
+ }
/* In case UTF-8 mode is used, also set PCRE2_NO_UTF_CHECK */
if (compile_options & PCRE2_UTF)
- {
- compile_options |= PCRE2_NO_UTF_CHECK;
- if (match_options != NULL)
- *match_options |= PCRE2_NO_UTF_CHECK;
- }
+ compile_options |= PCRE2_NO_UTF_CHECK;
compile_options |= PCRE2_UCP;
/* compile the pattern */
re = pcre2_compile ((PCRE2_SPTR8) pattern,
PCRE2_ZERO_TERMINATED,
- compile_options & ~G_REGEX_FLAGS_CONVERTED,
+ compile_options,
&errcode,
&erroffset,
context);
@@ -1757,16 +1771,22 @@ regex_compile (const gchar *pattern,
return NULL;
}
+ return re;
+}
+
+static uint32_t
+get_pcre2_inline_compile_options (pcre2_code *re,
+ uint32_t compile_options)
+{
+ uint32_t pcre_compile_options;
+ uint32_t nonpcre_compile_options;
+
/* For options set at the beginning of the pattern, pcre puts them into
* compile options, e.g. "(?i)foo" will make the pcre structure store
* PCRE2_CASELESS even though it wasn't explicitly given for compilation. */
+ nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK;
pcre2_pattern_info (re, PCRE2_INFO_ALLOPTIONS, &pcre_compile_options);
- compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK;
-
- /* Don't leak PCRE2_NEWLINE_ANY, which is part of PCRE2_NEWLINE_ANYCRLF */
- if ((pcre_compile_options & PCRE2_NEWLINE_ANYCRLF) != PCRE2_NEWLINE_ANYCRLF)
- compile_options &= ~PCRE2_NEWLINE_ANY;
-
+ compile_options = pcre_compile_options & G_REGEX_PCRE2_COMPILE_MASK;
compile_options |= nonpcre_compile_options;
if (!(compile_options & PCRE2_DUPNAMES))
@@ -1777,10 +1797,7 @@ regex_compile (const gchar *pattern,
compile_options |= PCRE2_DUPNAMES;
}
- if (compile_options_out != 0)
- *compile_options_out = compile_options;
-
- return re;
+ return compile_options;
}
/**
@@ -1942,7 +1959,7 @@ g_regex_get_compile_flags (const GRegex *regex)
break;
}
- return map_to_pcre1_compile_flags (regex->compile_opts) | extra_flags;
+ return g_regex_compile_flags_from_pcre2 (regex->compile_opts) | extra_flags;
}
/**
@@ -1958,9 +1975,15 @@ g_regex_get_compile_flags (const GRegex *regex)
GRegexMatchFlags
g_regex_get_match_flags (const GRegex *regex)
{
+ uint32_t flags;
+
g_return_val_if_fail (regex != NULL, 0);
- return map_to_pcre1_match_flags (regex->match_opts & G_REGEX_MATCH_MASK);
+ flags = g_regex_match_flags_from_pcre2 (regex->match_opts);
+ flags |= (regex->orig_match_opts & G_REGEX_MATCH_NEWLINE_MASK);
+ flags |= (regex->orig_match_opts & (G_REGEX_MATCH_BSR_ANY | G_REGEX_MATCH_BSR_ANYCRLF));
+
+ return flags;
}
/**
@@ -1994,9 +2017,6 @@ g_regex_match_simple (const gchar *pattern,
GRegex *regex;
gboolean result;
- compile_options = map_to_pcre2_compile_flags (compile_options);
- match_options = map_to_pcre2_match_flags (match_options);
-
regex = g_regex_new (pattern, compile_options, G_REGEX_MATCH_DEFAULT, NULL);
if (!regex)
return FALSE;
@@ -2064,8 +2084,6 @@ g_regex_match (const GRegex *regex,
GRegexMatchFlags match_options,
GMatchInfo **match_info)
{
- match_options = map_to_pcre2_match_flags (match_options);
-
return g_regex_match_full (regex, string, -1, 0, match_options,
match_info, NULL);
}
@@ -2149,8 +2167,6 @@ g_regex_match_full (const GRegex *regex,
GMatchInfo *info;
gboolean match_ok;
- match_options = map_to_pcre2_match_flags (match_options);
-
g_return_val_if_fail (regex != NULL, FALSE);
g_return_val_if_fail (string != NULL, FALSE);
g_return_val_if_fail (start_position >= 0, FALSE);
@@ -2201,8 +2217,6 @@ g_regex_match_all (const GRegex *regex,
GRegexMatchFlags match_options,
GMatchInfo **match_info)
{
- match_options = map_to_pcre2_match_flags (match_options);
-
return g_regex_match_all_full (regex, string, -1, 0, match_options,
match_info, NULL);
}
@@ -2274,8 +2288,8 @@ g_regex_match_all_full (const GRegex *regex,
gboolean done;
pcre2_code *pcre_re;
gboolean retval;
-
- match_options = map_to_pcre2_match_flags (match_options);
+ uint32_t newline_options;
+ uint32_t bsr_options;
g_return_val_if_fail (regex != NULL, FALSE);
g_return_val_if_fail (string != NULL, FALSE);
@@ -2283,6 +2297,14 @@ g_regex_match_all_full (const GRegex *regex,
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, FALSE);
+ newline_options = get_pcre2_newline_match_options (match_options);
+ if (!newline_options)
+ newline_options = get_pcre2_newline_compile_options (regex->orig_compile_opts);
+
+ bsr_options = get_pcre2_bsr_match_options (match_options);
+ if (!bsr_options)
+ bsr_options = get_pcre2_bsr_compile_options (regex->orig_compile_opts);
+
/* For PCRE2 we need to turn off PCRE2_NO_AUTO_POSSESS, which is an
* optimization for normal regex matching, but results in omitting some
* shorter matches here, and an observable behaviour change.
@@ -2291,7 +2313,7 @@ g_regex_match_all_full (const GRegex *regex,
* codesearch.debian.net, so don't bother caching the recompiled RE. */
pcre_re = regex_compile (regex->pattern,
regex->compile_opts | PCRE2_NO_AUTO_POSSESS,
- NULL, NULL, error);
+ newline_options, bsr_options, error);
if (pcre_re == NULL)
return FALSE;
@@ -2305,7 +2327,7 @@ g_regex_match_all_full (const GRegex *regex,
info->matches = pcre2_dfa_match (pcre_re,
(PCRE2_SPTR8) info->string, info->string_len,
info->pos,
- (regex->match_opts | match_options | PCRE2_NO_UTF_CHECK) & ~G_REGEX_FLAGS_CONVERTED,
+ (regex->match_opts | info->match_opts),
info->match_data,
info->match_context,
info->workspace, info->n_workspace);
@@ -2438,9 +2460,6 @@ g_regex_split_simple (const gchar *pattern,
GRegex *regex;
gchar **result;
- compile_options = map_to_pcre2_compile_flags (compile_options);
- match_options = map_to_pcre2_match_flags (match_options);
-
regex = g_regex_new (pattern, compile_options, 0, NULL);
if (!regex)
return NULL;
@@ -2484,8 +2503,6 @@ g_regex_split (const GRegex *regex,
const gchar *string,
GRegexMatchFlags match_options)
{
- match_options = map_to_pcre2_match_flags (match_options);
-
return g_regex_split_full (regex, string, -1, 0,
match_options, 0, NULL);
}
@@ -2550,8 +2567,6 @@ g_regex_split_full (const GRegex *regex,
/* the returned array of char **s */
gchar **string_list;
- match_options = map_to_pcre2_match_flags (match_options);
-
g_return_val_if_fail (regex != NULL, NULL);
g_return_val_if_fail (string != NULL, NULL);
g_return_val_if_fail (start_position >= 0, NULL);
@@ -3176,8 +3191,6 @@ g_regex_replace (const GRegex *regex,
GList *list;
GError *tmp_error = NULL;
- match_options = map_to_pcre2_match_flags (match_options);
-
g_return_val_if_fail (regex != NULL, NULL);
g_return_val_if_fail (string != NULL, NULL);
g_return_val_if_fail (start_position >= 0, NULL);
@@ -3247,8 +3260,6 @@ g_regex_replace_literal (const GRegex *regex,
GRegexMatchFlags match_options,
GError **error)
{
- match_options = map_to_pcre2_match_flags (match_options);
-
g_return_val_if_fail (replacement != NULL, NULL);
g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL);
@@ -3337,8 +3348,6 @@ g_regex_replace_eval (const GRegex *regex,
gboolean done = FALSE;
GError *tmp_error = NULL;
- match_options = map_to_pcre2_match_flags (match_options);
-
g_return_val_if_fail (regex != NULL, NULL);
g_return_val_if_fail (string != NULL, NULL);
g_return_val_if_fail (start_position >= 0, NULL);
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
index abf27e619..ce946d059 100644
--- a/glib/tests/regex.c
+++ b/glib/tests/regex.c
@@ -1,6 +1,7 @@
/*
* Copyright (C) 2005 - 2006, Marco Barisione <marco@barisione.org>
* Copyright (C) 2010 Red Hat, Inc.
+ * Copyright (C) 2022, Marco Trevisan <marco.trevisan@canonical.com>
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*
@@ -2355,7 +2356,13 @@ main (int argc, char *argv[])
/* TEST_NEW_CHECK_FLAGS(pattern, compile_opts, match_ops, real_compile_opts, real_match_opts) */
TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, 0, G_REGEX_OPTIMIZE, 0);
+ TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTEMPTY,
+ G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTEMPTY);
+ TEST_NEW_CHECK_FLAGS ("a", 0, G_REGEX_MATCH_NEWLINE_ANYCRLF | G_REGEX_MATCH_BSR_ANYCRLF,
+ G_REGEX_NEWLINE_ANYCRLF | G_REGEX_BSR_ANYCRLF,
+ G_REGEX_MATCH_NEWLINE_ANYCRLF | G_REGEX_MATCH_BSR_ANYCRLF);
TEST_NEW_CHECK_FLAGS ("a", G_REGEX_RAW, 0, G_REGEX_RAW, 0);
+ TEST_NEW_CHECK_FLAGS ("(?J)a", 0, 0, G_REGEX_DUPNAMES, 0);
TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0);
TEST_NEW_CHECK_FLAGS ("(*UTF8)a", 0, 0, 0 /* this is the default in GRegex */, 0);
TEST_NEW_CHECK_FLAGS ("(*UCP)a", 0, 0, 0 /* this always on in GRegex */, 0);
@@ -2561,6 +2568,8 @@ main (int argc, char *argv[])
TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, 0, "a\rb\rc", -1, 0, 0, TRUE);
TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_LF, 0, "a\rb\rc", -1, 0, 0, FALSE);
TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CRLF, 0, "a\rb\rc", -1, 0, 0, FALSE);
+ TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_ANYCRLF, 0, "a\r\nb\nc", -1, 0, 0, TRUE);
+ TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_ANYCRLF, 0, "a\r\nb\rc", -1, 0, 0, TRUE);
TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a\nb\nc", -1, 0, 0, FALSE);
TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a\nb\nc", -1, 0, 0, TRUE);
TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a\nb\nc", -1, 0, 0, FALSE);
@@ -2570,6 +2579,8 @@ main (int argc, char *argv[])
TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a\rb\rc", -1, 0, 0, TRUE);
TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a\rb\rc", -1, 0, 0, FALSE);
TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a\rb\rc", -1, 0, 0, FALSE);
+ TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a\r\nb\rc", -1, 0, 0, TRUE);
+ TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a\r\nb\nc", -1, 0, 0, TRUE);
TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_ANY, "a\nb\nc", -1, 0, 0, TRUE);
TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_ANY, "a\rb\rc", -1, 0, 0, TRUE);
@@ -2579,6 +2590,13 @@ main (int argc, char *argv[])
TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_CRLF, "a\r\nb\r\nc", -1, 0, 0, TRUE);
TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_CRLF, "a\rb\rc", -1, 0, 0, FALSE);
+ /* See https://gitlab.gnome.org/GNOME/glib/-/issues/2729#note_1544130 */
+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANY, "a", -1, 0, 0, TRUE);
+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a", -1, 0, 0, TRUE);
+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, TRUE);
+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a", -1, 0, 0, TRUE);
+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a", -1, 0, 0, TRUE);
+
TEST_MATCH("a#\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
TEST_MATCH("a#\r\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
TEST_MATCH("a#\rb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);