summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitlab-ci.yml8
-rw-r--r--.gitlab-ci/coverity.Dockerfile2
-rw-r--r--.gitlab-ci/debian-stable.Dockerfile1
-rw-r--r--.gitlab-ci/fedora.Dockerfile1
-rw-r--r--.gitlab-ci/mingw.Dockerfile1
-rwxr-xr-x.gitlab-ci/test-msys2.sh1
-rw-r--r--docs/reference/glib/regex-syntax.xml46
-rw-r--r--glib/gregex.c1113
-rw-r--r--glib/gregex.h11
-rw-r--r--glib/meson.build10
-rw-r--r--glib/tests/meson.build4
-rw-r--r--glib/tests/regex.c175
-rw-r--r--meson.build41
-rw-r--r--po/sk.po2
-rw-r--r--subprojects/pcre.wrap11
-rw-r--r--subprojects/pcre2.wrap9
16 files changed, 828 insertions, 608 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 8fe839a75..aeeabc36e 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -10,10 +10,10 @@ cache:
- _ccache/
variables:
- FEDORA_IMAGE: "registry.gitlab.gnome.org/gnome/glib/fedora:v17"
- COVERITY_IMAGE: "registry.gitlab.gnome.org/gnome/glib/coverity:v6"
- DEBIAN_IMAGE: "registry.gitlab.gnome.org/gnome/glib/debian-stable:v12"
- MINGW_IMAGE: "registry.gitlab.gnome.org/gnome/glib/mingw:v8"
+ FEDORA_IMAGE: "registry.gitlab.gnome.org/gnome/glib/fedora:v18"
+ COVERITY_IMAGE: "registry.gitlab.gnome.org/gnome/glib/coverity:v7"
+ DEBIAN_IMAGE: "registry.gitlab.gnome.org/gnome/glib/debian-stable:v13"
+ MINGW_IMAGE: "registry.gitlab.gnome.org/gnome/glib/mingw:v9"
MESON_TEST_TIMEOUT_MULTIPLIER: 2
G_MESSAGES_DEBUG: all
MESON_COMMON_OPTIONS: "--buildtype debug --wrap-mode=nodownload --fatal-meson-warnings"
diff --git a/.gitlab-ci/coverity.Dockerfile b/.gitlab-ci/coverity.Dockerfile
index 5ee757e2d..241861ec9 100644
--- a/.gitlab-ci/coverity.Dockerfile
+++ b/.gitlab-ci/coverity.Dockerfile
@@ -1,4 +1,4 @@
-FROM registry.gitlab.gnome.org/gnome/glib/fedora:v17
+FROM registry.gitlab.gnome.org/gnome/glib/fedora:v18
ARG COVERITY_SCAN_PROJECT_NAME
ARG COVERITY_SCAN_TOKEN
diff --git a/.gitlab-ci/debian-stable.Dockerfile b/.gitlab-ci/debian-stable.Dockerfile
index 0622aa26b..5dcf2d3c2 100644
--- a/.gitlab-ci/debian-stable.Dockerfile
+++ b/.gitlab-ci/debian-stable.Dockerfile
@@ -26,7 +26,6 @@ RUN apt-get update -qq && apt-get install --no-install-recommends -qq -y \
libelf-dev \
libffi-dev \
libmount-dev \
- libpcre3-dev \
libpcre2-dev \
libselinux1-dev \
libxml2-utils \
diff --git a/.gitlab-ci/fedora.Dockerfile b/.gitlab-ci/fedora.Dockerfile
index b83a34c8c..2506f9386 100644
--- a/.gitlab-ci/fedora.Dockerfile
+++ b/.gitlab-ci/fedora.Dockerfile
@@ -43,7 +43,6 @@ RUN dnf -y update \
libxslt \
ncurses-compat-libs \
ninja-build \
- pcre-devel \
pcre2-devel \
"python3-dbusmock >= 0.18.3-2" \
python3-pip \
diff --git a/.gitlab-ci/mingw.Dockerfile b/.gitlab-ci/mingw.Dockerfile
index 5fe5e67de..b109122fc 100644
--- a/.gitlab-ci/mingw.Dockerfile
+++ b/.gitlab-ci/mingw.Dockerfile
@@ -43,7 +43,6 @@ RUN dnf -y install \
mingw64-zlib \
ncurses-compat-libs \
ninja-build \
- pcre-devel \
pcre2-devel \
python3 \
python3-pip \
diff --git a/.gitlab-ci/test-msys2.sh b/.gitlab-ci/test-msys2.sh
index d891ee008..a345fda6a 100755
--- a/.gitlab-ci/test-msys2.sh
+++ b/.gitlab-ci/test-msys2.sh
@@ -18,7 +18,6 @@ pacman --noconfirm -S --needed \
mingw-w64-$MSYS2_ARCH-gettext \
mingw-w64-$MSYS2_ARCH-libffi \
mingw-w64-$MSYS2_ARCH-meson \
- mingw-w64-$MSYS2_ARCH-pcre \
mingw-w64-$MSYS2_ARCH-pcre2 \
mingw-w64-$MSYS2_ARCH-python3 \
mingw-w64-$MSYS2_ARCH-python-pip \
diff --git a/docs/reference/glib/regex-syntax.xml b/docs/reference/glib/regex-syntax.xml
index 5dd92917a..0b413aa02 100644
--- a/docs/reference/glib/regex-syntax.xml
+++ b/docs/reference/glib/regex-syntax.xml
@@ -2442,52 +2442,6 @@ processing option does not affect the called subpattern.
</para>
</refsect1>
-<!-- Callouts are not supported by GRegex
-<refsect1>
-<title>Callouts</title>
-<para>
-Perl has a feature whereby using the sequence (?{...}) causes arbitrary
-Perl code to be obeyed in the middle of matching a regular expression.
-This makes it possible, amongst other things, to extract different substrings that match the same pair of parentheses when there is a repetition.
-</para>
-
-<para>
-PCRE provides a similar feature, but of course it cannot obey arbitrary
-Perl code. The feature is called "callout". The caller of PCRE provides
-an external function by putting its entry point in the global variable
-pcre_callout. By default, this variable contains NULL, which disables
-all calling out.
-</para>
-
-<para>
-Within a regular expression, (?C) indicates the points at which the
-external function is to be called. If you want to identify different
-callout points, you can put a number less than 256 after the letter C.
-The default value is zero. For example, this pattern has two callout
-points:
-</para>
-
-<programlisting>
-(?C1)abc(?C2)def
-</programlisting>
-
-<para>
-If the PCRE_AUTO_CALLOUT flag is passed to pcre_compile(), callouts are
-automatically installed before each item in the pattern. They are all
-numbered 255.
-</para>
-
-<para>
-During matching, when PCRE reaches a callout point (and pcre_callout is
-set), the external function is called. It is provided with the number
-of the callout, the position in the pattern, and, optionally, one item
-of data originally supplied by the caller of pcre_exec(). The callout
-function may cause matching to proceed, to backtrack, or to fail altogether. A complete description of the interface to the callout function
-is given in the pcrecallout documentation.
-</para>
-</refsect1>
--->
-
<refsect1>
<title>Copyright</title>
<para>
diff --git a/glib/gregex.c b/glib/gregex.c
index 5254d8d28..55672249c 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -24,7 +24,8 @@
#include <string.h>
-#include <pcre.h>
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
#include "gtypes.h"
#include "gregex.h"
@@ -109,87 +110,63 @@
* library written by Philip Hazel.
*/
+/* Signifies that flags have already been converted from pcre1 to pcre2. The
+ * value 0x04000000u is also the value of PCRE2_MATCH_INVALID_UTF in pcre2.h,
+ * but it is not used in gregex, so we can reuse it for this flag.
+ */
+#define G_REGEX_FLAGS_CONVERTED 0x04000000u
/* Mask of all the possible values for GRegexCompileFlags. */
-#define G_REGEX_COMPILE_MASK (G_REGEX_CASELESS | \
- G_REGEX_MULTILINE | \
- G_REGEX_DOTALL | \
- G_REGEX_EXTENDED | \
- G_REGEX_ANCHORED | \
- G_REGEX_DOLLAR_ENDONLY | \
- G_REGEX_UNGREEDY | \
- G_REGEX_RAW | \
- G_REGEX_NO_AUTO_CAPTURE | \
- G_REGEX_OPTIMIZE | \
- G_REGEX_FIRSTLINE | \
- G_REGEX_DUPNAMES | \
- G_REGEX_NEWLINE_CR | \
- G_REGEX_NEWLINE_LF | \
- G_REGEX_NEWLINE_CRLF | \
- G_REGEX_NEWLINE_ANYCRLF | \
- G_REGEX_BSR_ANYCRLF | \
- G_REGEX_JAVASCRIPT_COMPAT)
+#define G_REGEX_COMPILE_MASK (PCRE2_CASELESS | \
+ PCRE2_MULTILINE | \
+ PCRE2_DOTALL | \
+ PCRE2_EXTENDED | \
+ PCRE2_ANCHORED | \
+ PCRE2_DOLLAR_ENDONLY | \
+ PCRE2_UNGREEDY | \
+ PCRE2_UTF | \
+ PCRE2_NO_AUTO_CAPTURE | \
+ PCRE2_FIRSTLINE | \
+ PCRE2_DUPNAMES | \
+ PCRE2_NEWLINE_CR | \
+ PCRE2_NEWLINE_LF | \
+ PCRE2_NEWLINE_CRLF | \
+ PCRE2_NEWLINE_ANYCRLF | \
+ PCRE2_BSR_ANYCRLF | \
+ G_REGEX_FLAGS_CONVERTED)
/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
-#define G_REGEX_COMPILE_NONPCRE_MASK (G_REGEX_RAW | \
- G_REGEX_OPTIMIZE)
+#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF | \
+ G_REGEX_FLAGS_CONVERTED)
/* Mask of all the possible values for GRegexMatchFlags. */
-#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \
- G_REGEX_MATCH_NOTBOL | \
- G_REGEX_MATCH_NOTEOL | \
- G_REGEX_MATCH_NOTEMPTY | \
- G_REGEX_MATCH_PARTIAL | \
- G_REGEX_MATCH_NEWLINE_CR | \
- G_REGEX_MATCH_NEWLINE_LF | \
- G_REGEX_MATCH_NEWLINE_CRLF | \
- G_REGEX_MATCH_NEWLINE_ANY | \
- G_REGEX_MATCH_NEWLINE_ANYCRLF | \
- G_REGEX_MATCH_BSR_ANYCRLF | \
- G_REGEX_MATCH_BSR_ANY | \
- G_REGEX_MATCH_PARTIAL_SOFT | \
- G_REGEX_MATCH_PARTIAL_HARD | \
- G_REGEX_MATCH_NOTEMPTY_ATSTART)
-
-/* we rely on these flags having the same values */
-G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS);
-G_STATIC_ASSERT (G_REGEX_MULTILINE == PCRE_MULTILINE);
-G_STATIC_ASSERT (G_REGEX_DOTALL == PCRE_DOTALL);
-G_STATIC_ASSERT (G_REGEX_EXTENDED == PCRE_EXTENDED);
-G_STATIC_ASSERT (G_REGEX_ANCHORED == PCRE_ANCHORED);
-G_STATIC_ASSERT (G_REGEX_DOLLAR_ENDONLY == PCRE_DOLLAR_ENDONLY);
-G_STATIC_ASSERT (G_REGEX_UNGREEDY == PCRE_UNGREEDY);
-G_STATIC_ASSERT (G_REGEX_NO_AUTO_CAPTURE == PCRE_NO_AUTO_CAPTURE);
-G_STATIC_ASSERT (G_REGEX_FIRSTLINE == PCRE_FIRSTLINE);
-G_STATIC_ASSERT (G_REGEX_DUPNAMES == PCRE_DUPNAMES);
-G_STATIC_ASSERT (G_REGEX_NEWLINE_CR == PCRE_NEWLINE_CR);
-G_STATIC_ASSERT (G_REGEX_NEWLINE_LF == PCRE_NEWLINE_LF);
-G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
-G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
-
-G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
-
-/* These PCRE flags are unused or not exposed publicly in GRegexFlags, so
- * it should be ok to reuse them for different things.
- */
-G_STATIC_ASSERT (G_REGEX_OPTIMIZE == PCRE_NO_UTF8_CHECK);
-G_STATIC_ASSERT (G_REGEX_RAW == PCRE_UTF8);
+#define G_REGEX_MATCH_MASK (PCRE2_ANCHORED | \
+ PCRE2_NOTBOL | \
+ PCRE2_NOTEOL | \
+ PCRE2_NOTEMPTY | \
+ PCRE2_PARTIAL_SOFT | \
+ PCRE2_NEWLINE_CR | \
+ PCRE2_NEWLINE_LF | \
+ PCRE2_NEWLINE_CRLF | \
+ PCRE2_NEWLINE_ANY | \
+ PCRE2_NEWLINE_ANYCRLF | \
+ PCRE2_BSR_ANYCRLF | \
+ PCRE2_BSR_UNICODE | \
+ PCRE2_PARTIAL_SOFT | \
+ PCRE2_PARTIAL_HARD | \
+ PCRE2_NOTEMPTY_ATSTART | \
+ G_REGEX_FLAGS_CONVERTED)
+
+#define G_REGEX_NEWLINE_MASK (PCRE2_NEWLINE_CR | \
+ PCRE2_NEWLINE_LF | \
+ PCRE2_NEWLINE_CRLF | \
+ PCRE2_NEWLINE_ANYCRLF)
+
+#define G_REGEX_MATCH_NEWLINE_MASK (PCRE2_NEWLINE_CR | \
+ PCRE2_NEWLINE_LF | \
+ PCRE2_NEWLINE_CRLF | \
+ PCRE2_NEWLINE_ANYCRLF | \
+ PCRE2_NEWLINE_ANY)
/* if the string is in UTF-8 use g_utf8_ functions, else use
* use just +/- 1. */
@@ -210,24 +187,26 @@ struct _GMatchInfo
gint pos; /* position in the string where last match left off */
gint n_offsets; /* number of offsets */
gint *offsets; /* array of offsets paired 0,1 ; 2,3 ; 3,4 etc */
- gint *workspace; /* workspace for pcre_dfa_exec() */
+ gint *workspace; /* workspace for pcre2_dfa_match() */
gint n_workspace; /* number of workspace elements */
const gchar *string; /* string passed to the match function */
gssize string_len; /* length of string, in bytes */
+ pcre2_match_context *match_context;
+ pcre2_match_data *match_data;
};
struct _GRegex
{
gint ref_count; /* the ref count for the immutable part (atomic) */
gchar *pattern; /* the pattern */
- pcre *pcre_re; /* compiled form of the pattern */
- GRegexCompileFlags compile_opts; /* options used at compile time on the pattern */
+ pcre2_code *pcre_re; /* compiled form of the pattern */
+ GRegexCompileFlags compile_opts; /* options used at compile time on the pattern, pcre2 values */
+ GRegexCompileFlags orig_compile_opts; /* options used at compile time on the pattern, gregex values */
GRegexMatchFlags match_opts; /* options used at match time on the regex */
- pcre_extra *extra; /* data stored when G_REGEX_OPTIMIZE is used */
};
/* TRUE if ret is an error code, FALSE otherwise. */
-#define IS_PCRE_ERROR(ret) ((ret) < PCRE_ERROR_NOMATCH && (ret) != PCRE_ERROR_PARTIAL)
+#define IS_PCRE2_ERROR(ret) ((ret) < PCRE2_ERROR_NOMATCH && (ret) != PCRE2_ERROR_PARTIAL)
typedef struct _InterpolationData InterpolationData;
static gboolean interpolation_list_needs_match (GList *list);
@@ -238,70 +217,249 @@ static GList *split_replacement (const gchar *replacement,
GError **error);
static void free_interpolation_data (InterpolationData *data);
+static gint
+map_to_pcre2_compile_flags (gint pcre1_flags)
+{
+ /* Maps compile flags from pcre1 to pcre2 values
+ */
+ gint pcre2_flags = G_REGEX_FLAGS_CONVERTED;
+
+ if (pcre1_flags & G_REGEX_FLAGS_CONVERTED)
+ return pcre1_flags;
+
+ if (pcre1_flags & G_REGEX_CASELESS)
+ pcre2_flags |= PCRE2_CASELESS;
+ if (pcre1_flags & G_REGEX_MULTILINE)
+ pcre2_flags |= PCRE2_MULTILINE;
+ if (pcre1_flags & G_REGEX_DOTALL)
+ pcre2_flags |= PCRE2_DOTALL;
+ if (pcre1_flags & G_REGEX_EXTENDED)
+ pcre2_flags |= PCRE2_EXTENDED;
+ if (pcre1_flags & G_REGEX_ANCHORED)
+ pcre2_flags |= PCRE2_ANCHORED;
+ if (pcre1_flags & G_REGEX_DOLLAR_ENDONLY)
+ pcre2_flags |= PCRE2_DOLLAR_ENDONLY;
+ if (pcre1_flags & G_REGEX_UNGREEDY)
+ pcre2_flags |= PCRE2_UNGREEDY;
+ if (!(pcre1_flags & G_REGEX_RAW))
+ pcre2_flags |= PCRE2_UTF;
+ if (pcre1_flags & G_REGEX_NO_AUTO_CAPTURE)
+ pcre2_flags |= PCRE2_NO_AUTO_CAPTURE;
+ if (pcre1_flags & G_REGEX_FIRSTLINE)
+ pcre2_flags |= PCRE2_FIRSTLINE;
+ if (pcre1_flags & G_REGEX_DUPNAMES)
+ pcre2_flags |= PCRE2_DUPNAMES;
+ if (pcre1_flags & G_REGEX_NEWLINE_CR)
+ pcre2_flags |= PCRE2_NEWLINE_CR;
+ if (pcre1_flags & G_REGEX_NEWLINE_LF)
+ pcre2_flags |= PCRE2_NEWLINE_LF;
+ /* Check for exact match for a composite flag */
+ if ((pcre1_flags & G_REGEX_NEWLINE_CRLF) == G_REGEX_NEWLINE_CRLF)
+ pcre2_flags |= PCRE2_NEWLINE_CRLF;
+ /* Check for exact match for a composite flag */
+ if ((pcre1_flags & G_REGEX_NEWLINE_ANYCRLF) == G_REGEX_NEWLINE_ANYCRLF)
+ pcre2_flags |= PCRE2_NEWLINE_ANYCRLF;
+ if (pcre1_flags & G_REGEX_BSR_ANYCRLF)
+ pcre2_flags |= PCRE2_BSR_ANYCRLF;
+
+ /* these are not available in pcre2 */
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
+ if (pcre1_flags & G_REGEX_OPTIMIZE)
+ pcre2_flags |= 0;
+ if (pcre1_flags & G_REGEX_JAVASCRIPT_COMPAT)
+ pcre2_flags |= 0;
+G_GNUC_END_IGNORE_DEPRECATIONS
+
+ return pcre2_flags;
+}
+
+static gint
+map_to_pcre2_match_flags (gint pcre1_flags)
+{
+ /* Maps match flags from pcre1 to pcre2 values
+ */
+ gint pcre2_flags = G_REGEX_FLAGS_CONVERTED;
+
+ if (pcre1_flags & G_REGEX_FLAGS_CONVERTED)
+ return pcre1_flags;
+
+ if (pcre1_flags & G_REGEX_MATCH_ANCHORED)
+ pcre2_flags |= PCRE2_ANCHORED;
+ if (pcre1_flags & G_REGEX_MATCH_NOTBOL)
+ pcre2_flags |= PCRE2_NOTBOL;
+ if (pcre1_flags & G_REGEX_MATCH_NOTEOL)
+ pcre2_flags |= PCRE2_NOTEOL;
+ if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY)
+ pcre2_flags |= PCRE2_NOTEMPTY;
+ if (pcre1_flags & G_REGEX_MATCH_PARTIAL)
+ pcre2_flags |= PCRE2_PARTIAL_SOFT;
+ if (pcre1_flags & G_REGEX_MATCH_NEWLINE_CR)
+ pcre2_flags |= PCRE2_NEWLINE_CR;
+ if (pcre1_flags & G_REGEX_MATCH_NEWLINE_LF)
+ pcre2_flags |= PCRE2_NEWLINE_LF;
+ /* Check for exact match for a composite flag */
+ if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_CRLF) == G_REGEX_MATCH_NEWLINE_CRLF)
+ pcre2_flags |= PCRE2_NEWLINE_CRLF;
+ if (pcre1_flags & G_REGEX_MATCH_NEWLINE_ANY)
+ pcre2_flags |= PCRE2_NEWLINE_ANY;
+ /* Check for exact match for a composite flag */
+ if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_ANYCRLF) == G_REGEX_MATCH_NEWLINE_ANYCRLF)
+ pcre2_flags |= PCRE2_NEWLINE_ANYCRLF;
+ if (pcre1_flags & G_REGEX_MATCH_BSR_ANYCRLF)
+ pcre2_flags |= PCRE2_BSR_ANYCRLF;
+ if (pcre1_flags & G_REGEX_MATCH_BSR_ANY)
+ pcre2_flags |= PCRE2_BSR_UNICODE;
+ if (pcre1_flags & G_REGEX_MATCH_PARTIAL_SOFT)
+ pcre2_flags |= PCRE2_PARTIAL_SOFT;
+ if (pcre1_flags & G_REGEX_MATCH_PARTIAL_HARD)
+ pcre2_flags |= PCRE2_PARTIAL_HARD;
+ if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART)
+ pcre2_flags |= PCRE2_NOTEMPTY_ATSTART;
+
+ return pcre2_flags;
+}
+
+static gint
+map_to_pcre1_compile_flags (gint pcre2_flags)
+{
+ /* Maps compile flags from pcre2 to pcre1 values
+ */
+ gint pcre1_flags = 0;
+
+ if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED))
+ return pcre2_flags;
+
+ if (pcre2_flags & PCRE2_CASELESS)
+ pcre1_flags |= G_REGEX_CASELESS;
+ if (pcre2_flags & PCRE2_MULTILINE)
+ pcre1_flags |= G_REGEX_MULTILINE;
+ if (pcre2_flags & PCRE2_DOTALL)
+ pcre1_flags |= G_REGEX_DOTALL;
+ if (pcre2_flags & PCRE2_EXTENDED)
+ pcre1_flags |= G_REGEX_EXTENDED;
+ if (pcre2_flags & PCRE2_ANCHORED)
+ pcre1_flags |= G_REGEX_ANCHORED;
+ if (pcre2_flags & PCRE2_DOLLAR_ENDONLY)
+ pcre1_flags |= G_REGEX_DOLLAR_ENDONLY;
+ if (pcre2_flags & PCRE2_UNGREEDY)
+ pcre1_flags |= G_REGEX_UNGREEDY;
+ if (!(pcre2_flags & PCRE2_UTF))
+ pcre1_flags |= G_REGEX_RAW;
+ if (pcre2_flags & PCRE2_NO_AUTO_CAPTURE)
+ pcre1_flags |= G_REGEX_NO_AUTO_CAPTURE;
+ if (pcre2_flags & PCRE2_FIRSTLINE)
+ pcre1_flags |= G_REGEX_FIRSTLINE;
+ if (pcre2_flags & PCRE2_DUPNAMES)
+ pcre1_flags |= G_REGEX_DUPNAMES;
+ if (pcre2_flags & PCRE2_NEWLINE_CR)
+ pcre1_flags |= G_REGEX_NEWLINE_CR;
+ if (pcre2_flags & PCRE2_NEWLINE_LF)
+ pcre1_flags |= G_REGEX_NEWLINE_LF;
+ /* Check for exact match for a composite flag */
+ if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF)
+ pcre1_flags |= G_REGEX_NEWLINE_CRLF;
+ /* Check for exact match for a composite flag */
+ if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF)
+ pcre1_flags |= G_REGEX_NEWLINE_ANYCRLF;
+ if (pcre2_flags & PCRE2_BSR_ANYCRLF)
+ pcre1_flags |= G_REGEX_BSR_ANYCRLF;
+
+ return pcre1_flags;
+}
+
+static gint
+map_to_pcre1_match_flags (gint pcre2_flags)
+{
+ /* Maps match flags from pcre2 to pcre1 values
+ */
+ gint pcre1_flags = 0;
+
+ if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED))
+ return pcre2_flags;
+
+ if (pcre2_flags & PCRE2_ANCHORED)
+ pcre1_flags |= G_REGEX_MATCH_ANCHORED;
+ if (pcre2_flags & PCRE2_NOTBOL)
+ pcre1_flags |= G_REGEX_MATCH_NOTBOL;
+ if (pcre2_flags & PCRE2_NOTEOL)
+ pcre1_flags |= G_REGEX_MATCH_NOTEOL;
+ if (pcre2_flags & PCRE2_NOTEMPTY)
+ pcre1_flags |= G_REGEX_MATCH_NOTEMPTY;
+ if (pcre2_flags & PCRE2_PARTIAL_SOFT)
+ pcre1_flags |= G_REGEX_MATCH_PARTIAL;
+ if (pcre2_flags & PCRE2_NEWLINE_CR)
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_CR;
+ if (pcre2_flags & PCRE2_NEWLINE_LF)
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_LF;
+ /* Check for exact match for a composite flag */
+ if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF)
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_CRLF;
+ if (pcre2_flags & PCRE2_NEWLINE_ANY)
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANY;
+ /* Check for exact match for a composite flag */
+ if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF)
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANYCRLF;
+ if (pcre2_flags & PCRE2_BSR_ANYCRLF)
+ pcre1_flags |= G_REGEX_MATCH_BSR_ANYCRLF;
+ if (pcre2_flags & PCRE2_BSR_UNICODE)
+ pcre1_flags |= G_REGEX_MATCH_BSR_ANY;
+ if (pcre2_flags & PCRE2_PARTIAL_SOFT)
+ pcre1_flags |= G_REGEX_MATCH_PARTIAL_SOFT;
+ if (pcre2_flags & PCRE2_PARTIAL_HARD)
+ pcre1_flags |= G_REGEX_MATCH_PARTIAL_HARD;
+ if (pcre2_flags & PCRE2_NOTEMPTY_ATSTART)
+ pcre1_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART;
+
+ return pcre1_flags;
+}
static const gchar *
match_error (gint errcode)
{
switch (errcode)
{
- case PCRE_ERROR_NOMATCH:
+ case PCRE2_ERROR_NOMATCH:
/* not an error */
break;
- case PCRE_ERROR_NULL:
+ case PCRE2_ERROR_NULL:
/* NULL argument, this should not happen in GRegex */
g_warning ("A NULL argument was passed to PCRE");
break;
- case PCRE_ERROR_BADOPTION:
+ case PCRE2_ERROR_BADOPTION:
return "bad options";
- case PCRE_ERROR_BADMAGIC:
+ case PCRE2_ERROR_BADMAGIC:
return _("corrupted object");
- case PCRE_ERROR_UNKNOWN_OPCODE:
- return N_("internal error or corrupted object");
- case PCRE_ERROR_NOMEMORY:
+ case PCRE2_ERROR_NOMEMORY:
return _("out of memory");
- case PCRE_ERROR_NOSUBSTRING:
- /* not used by pcre_exec() */
+ case PCRE2_ERROR_NOSUBSTRING:
+ /* not used by pcre2_match() */
break;
- case PCRE_ERROR_MATCHLIMIT:
+ case PCRE2_ERROR_MATCHLIMIT:
return _("backtracking limit reached");
- case PCRE_ERROR_CALLOUT:
+ case PCRE2_ERROR_CALLOUT:
/* callouts are not implemented */
break;
- case PCRE_ERROR_BADUTF8:
- case PCRE_ERROR_BADUTF8_OFFSET:
+ case PCRE2_ERROR_BADUTFOFFSET:
/* we do not check if strings are valid */
break;
- case PCRE_ERROR_PARTIAL:
+ case PCRE2_ERROR_PARTIAL:
/* not an error */
break;
- case PCRE_ERROR_BADPARTIAL:
- return _("the pattern contains items not supported for partial matching");
- case PCRE_ERROR_INTERNAL:
+ case PCRE2_ERROR_INTERNAL:
return _("internal error");
- case PCRE_ERROR_BADCOUNT:
- /* negative ovecsize, this should not happen in GRegex */
- g_warning ("A negative ovecsize was passed to PCRE");
- break;
- case PCRE_ERROR_DFA_UITEM:
+ case PCRE2_ERROR_DFA_UITEM:
return _("the pattern contains items not supported for partial matching");
- case PCRE_ERROR_DFA_UCOND:
+ case PCRE2_ERROR_DFA_UCOND:
return _("back references as conditions are not supported for partial matching");
- case PCRE_ERROR_DFA_UMLIMIT:
- /* the match_field field is not used in GRegex */
- break;
- case PCRE_ERROR_DFA_WSSIZE:
+ case PCRE2_ERROR_DFA_WSSIZE:
/* handled expanding the workspace */
break;
- case PCRE_ERROR_DFA_RECURSE:
- case PCRE_ERROR_RECURSIONLIMIT:
+ case PCRE2_ERROR_DFA_RECURSE:
+ case PCRE2_ERROR_RECURSIONLIMIT:
return _("recursion limit reached");
- case PCRE_ERROR_BADNEWLINE:
- return _("invalid combination of newline flags");
- case PCRE_ERROR_BADOFFSET:
+ case PCRE2_ERROR_BADOFFSET:
return _("bad offset");
- case PCRE_ERROR_SHORTUTF8:
- return _("short utf8");
- case PCRE_ERROR_RECURSELOOP:
+ case PCRE2_ERROR_RECURSELOOP:
return _("recursion loop");
default:
break;
@@ -312,242 +470,263 @@ match_error (gint errcode)
static void
translate_compile_error (gint *errcode, const gchar **errmsg)
{
- /* Compile errors are created adding 100 to the error code returned
- * by PCRE.
- * If errcode is known we put the translatable error message in
- * erromsg. If errcode is unknown we put the generic
- * G_REGEX_ERROR_COMPILE error code in errcode and keep the
- * untranslated error message returned by PCRE.
+ /* If errcode is known we put the translatable error message in
+ * errmsg. If errcode is unknown we put the generic
+ * G_REGEX_ERROR_COMPILE error code in errcode.
* Note that there can be more PCRE errors with the same GRegexError
* and that some PCRE errors are useless for us.
*/
- *errcode += 100;
switch (*errcode)
{
- case G_REGEX_ERROR_STRAY_BACKSLASH:
+ case PCRE2_ERROR_END_BACKSLASH:
+ *errcode = G_REGEX_ERROR_STRAY_BACKSLASH;
*errmsg = _("\\ at end of pattern");
break;
- case G_REGEX_ERROR_MISSING_CONTROL_CHAR:
+ case PCRE2_ERROR_END_BACKSLASH_C:
+ *errcode = G_REGEX_ERROR_MISSING_CONTROL_CHAR;
*errmsg = _("\\c at end of pattern");
break;
- case G_REGEX_ERROR_UNRECOGNIZED_ESCAPE:
+ case PCRE2_ERROR_UNKNOWN_ESCAPE:
+ case PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE:
+ *errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE;
*errmsg = _("unrecognized character following \\");
break;
- case G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER:
+ case PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER:
+ *errcode = G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER;
*errmsg = _("numbers out of order in {} quantifier");
break;
- case G_REGEX_ERROR_QUANTIFIER_TOO_BIG:
+ case PCRE2_ERROR_QUANTIFIER_TOO_BIG:
+ *errcode = G_REGEX_ERROR_QUANTIFIER_TOO_BIG;
*errmsg = _("number too big in {} quantifier");
break;
- case G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS:
+ case PCRE2_ERROR_MISSING_SQUARE_BRACKET:
+ *errcode = G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS;
*errmsg = _("missing terminating ] for character class");
break;
- case G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS:
+ case PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS:
+ *errcode = G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS;
*errmsg = _("invalid escape sequence in character class");
break;
- case G_REGEX_ERROR_RANGE_OUT_OF_ORDER:
+ case PCRE2_ERROR_CLASS_RANGE_ORDER:
+ *errcode = G_REGEX_ERROR_RANGE_OUT_OF_ORDER;
*errmsg = _("range out of order in character class");
break;
- case G_REGEX_ERROR_NOTHING_TO_REPEAT:
+ case PCRE2_ERROR_QUANTIFIER_INVALID:
+ case PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT:
+ *errcode = G_REGEX_ERROR_NOTHING_TO_REPEAT;
*errmsg = _("nothing to repeat");
break;
- case 111: /* internal error: unexpected repeat */
- *errcode = G_REGEX_ERROR_INTERNAL;
- *errmsg = _("unexpected repeat");
- break;
- case G_REGEX_ERROR_UNRECOGNIZED_CHARACTER:
+ case PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY:
+ *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
*errmsg = _("unrecognized character after (? or (?-");
break;
- case G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS:
+ case PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS:
+ *errcode = G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS;
*errmsg = _("POSIX named classes are supported only within a class");
break;
- case G_REGEX_ERROR_UNMATCHED_PARENTHESIS:
+ case PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING:
+ *errcode = G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED;
+ *errmsg = _("POSIX collating elements are not supported");
+ break;
+ case PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS:
+ case PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS:
+ case PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING:
+ *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS;
*errmsg = _("missing terminating )");
break;
- case G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE:
+ case PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE:
+ *errcode = G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE;
*errmsg = _("reference to non-existent subpattern");
break;
- case G_REGEX_ERROR_UNTERMINATED_COMMENT:
+ case PCRE2_ERROR_MISSING_COMMENT_CLOSING:
+ *errcode = G_REGEX_ERROR_UNTERMINATED_COMMENT;
*errmsg = _("missing ) after comment");
break;
- case G_REGEX_ERROR_EXPRESSION_TOO_LARGE:
+ case PCRE2_ERROR_PATTERN_TOO_LARGE:
+ *errcode = G_REGEX_ERROR_EXPRESSION_TOO_LARGE;
*errmsg = _("regular expression is too large");
break;
- case G_REGEX_ERROR_MEMORY_ERROR:
- *errmsg = _("failed to get memory");
- break;
- case 122: /* unmatched parentheses */
- *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS;
- *errmsg = _(") without opening (");
- break;
- case 123: /* internal error: code overflow */
- *errcode = G_REGEX_ERROR_INTERNAL;
- *errmsg = _("code overflow");
- break;
- case 124: /* "unrecognized character after (?<\0 */
- *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
- *errmsg = _("unrecognized character after (?<");
+ case PCRE2_ERROR_MISSING_CONDITION_CLOSING:
+ *errcode = G_REGEX_ERROR_MALFORMED_CONDITION;
+ *errmsg = _("malformed number or name after (?(");
break;
- case G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND:
+ case PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH:
+ *errcode = G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND;
*errmsg = _("lookbehind assertion is not fixed length");
break;
- case G_REGEX_ERROR_MALFORMED_CONDITION:
- *errmsg = _("malformed number or name after (?(");
- break;
- case G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES:
+ case PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES:
+ *errcode = G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES;
*errmsg = _("conditional group contains more than two branches");
break;
- case G_REGEX_ERROR_ASSERTION_EXPECTED:
+ case PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED:
+ *errcode = G_REGEX_ERROR_ASSERTION_EXPECTED;
*errmsg = _("assertion expected after (?(");
break;
- case 129:
- *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS;
- /* translators: '(?R' and '(?[+-]digits' are both meant as (groups of)
- * sequences here, '(?-54' would be an example for the second group.
- */
- *errmsg = _("(?R or (?[+-]digits must be followed by )");
+ case PCRE2_ERROR_BAD_RELATIVE_REFERENCE:
+ *errcode = G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE;
+ *errmsg = _("a numbered reference must not be zero");
break;
- case G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME:
+ case PCRE2_ERROR_UNKNOWN_POSIX_CLASS:
+ *errcode = G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME;
*errmsg = _("unknown POSIX class name");
break;
- case G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED:
- *errmsg = _("POSIX collating elements are not supported");
- break;
- case G_REGEX_ERROR_HEX_CODE_TOO_LARGE:
+ case PCRE2_ERROR_CODE_POINT_TOO_BIG:
+ case PCRE2_ERROR_INVALID_HEXADECIMAL:
+ *errcode = G_REGEX_ERROR_HEX_CODE_TOO_LARGE;
*errmsg = _("character value in \\x{...} sequence is too large");
break;
- case G_REGEX_ERROR_INVALID_CONDITION:
- *errmsg = _("invalid condition (?(0)");
- break;
- case G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND:
+ case PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C:
+ *errcode = G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND;
*errmsg = _("\\C not allowed in lookbehind assertion");
break;
- case 137: /* PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0 */
- /* A number of Perl escapes are not handled by PCRE.
- * Therefore it explicitly raises ERR37.
- */
- *errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE;
- *errmsg = _("escapes \\L, \\l, \\N{name}, \\U, and \\u are not supported");
- break;
- case G_REGEX_ERROR_INFINITE_LOOP:
- *errmsg = _("recursive call could loop indefinitely");
- break;
- case 141: /* unrecognized character after (?P\0 */
- *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
- *errmsg = _("unrecognized character after (?P");
- break;
- case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR:
+ case PCRE2_ERROR_MISSING_NAME_TERMINATOR:
+ *errcode = G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR;
*errmsg = _("missing terminator in subpattern name");
break;
- case G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME:
+ case PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME:
+ *errcode = G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME;
*errmsg = _("two named subpatterns have the same name");
break;
- case G_REGEX_ERROR_MALFORMED_PROPERTY:
+ case PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY:
+ *errcode = G_REGEX_ERROR_MALFORMED_PROPERTY;
*errmsg = _("malformed \\P or \\p sequence");
break;
- case G_REGEX_ERROR_UNKNOWN_PROPERTY:
+ case PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY:
+ *errcode = G_REGEX_ERROR_UNKNOWN_PROPERTY;
*errmsg = _("unknown property name after \\P or \\p");
break;
- case G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG:
+ case PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG:
+ *errcode = G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG;
*errmsg = _("subpattern name is too long (maximum 32 characters)");
break;
- case G_REGEX_ERROR_TOO_MANY_SUBPATTERNS:
+ case PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS:
+ *errcode = G_REGEX_ERROR_TOO_MANY_SUBPATTERNS;
*errmsg = _("too many named subpatterns (maximum 10,000)");
break;
- case G_REGEX_ERROR_INVALID_OCTAL_VALUE:
+ case PCRE2_ERROR_OCTAL_BYTE_TOO_BIG:
+ *errcode = G_REGEX_ERROR_INVALID_OCTAL_VALUE;
*errmsg = _("octal value is greater than \\377");
break;
- case 152: /* internal error: overran compiling workspace */
- *errcode = G_REGEX_ERROR_INTERNAL;
- *errmsg = _("overran compiling workspace");
- break;
- case 153: /* internal error: previously-checked referenced subpattern not found */
- *errcode = G_REGEX_ERROR_INTERNAL;
- *errmsg = _("previously-checked referenced subpattern not found");
- break;
- case G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE:
+ case PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES:
+ *errcode = G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE;
*errmsg = _("DEFINE group contains more than one branch");
break;
- case G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS:
+ case PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE:
+ *errcode = G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS;
*errmsg = _("inconsistent NEWLINE options");
break;
- case G_REGEX_ERROR_MISSING_BACK_REFERENCE:
+ case PCRE2_ERROR_BACKSLASH_G_SYNTAX:
+ *errcode = G_REGEX_ERROR_MISSING_BACK_REFERENCE;
*errmsg = _("\\g is not followed by a braced, angle-bracketed, or quoted name or "
"number, or by a plain number");
break;
- case G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE:
- *errmsg = _("a numbered reference must not be zero");
- break;
- case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN:
+ case PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED:
+ *errcode = G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN;
*errmsg = _("an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)");
break;
- case G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB:
+ case PCRE2_ERROR_VERB_UNKNOWN:
+ *errcode = G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB;
*errmsg = _("(*VERB) not recognized");
break;
- case G_REGEX_ERROR_NUMBER_TOO_BIG:
+ case PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG:
+ *errcode = G_REGEX_ERROR_NUMBER_TOO_BIG;
*errmsg = _("number is too big");
break;
- case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME:
+ case PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED:
+ *errcode = G_REGEX_ERROR_MISSING_SUBPATTERN_NAME;
*errmsg = _("missing subpattern name after (?&");
break;
- case G_REGEX_ERROR_MISSING_DIGIT:
- *errmsg = _("digit expected after (?+");
- break;
- case G_REGEX_ERROR_INVALID_DATA_CHARACTER:
- *errmsg = _("] is an invalid data character in JavaScript compatibility mode");
- break;
- case G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME:
+ case PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH:
+ *errcode = G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME;
*errmsg = _("different names for subpatterns of the same number are not allowed");
break;
- case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED:
+ case PCRE2_ERROR_MARK_MISSING_ARGUMENT:
+ *errcode = G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED;
*errmsg = _("(*MARK) must have an argument");
break;
- case G_REGEX_ERROR_INVALID_CONTROL_CHAR:
+ case PCRE2_ERROR_BACKSLASH_C_SYNTAX:
+ *errcode = G_REGEX_ERROR_INVALID_CONTROL_CHAR;
*errmsg = _( "\\c must be followed by an ASCII character");
break;
- case G_REGEX_ERROR_MISSING_NAME:
+ case PCRE2_ERROR_BACKSLASH_K_SYNTAX:
+ *errcode = G_REGEX_ERROR_MISSING_NAME;
*errmsg = _("\\k is not followed by a braced, angle-bracketed, or quoted name");
break;
- case G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS:
+ case PCRE2_ERROR_BACKSLASH_N_IN_CLASS:
+ *errcode = G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS;
*errmsg = _("\\N is not supported in a class");
break;
- case G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES:
- *errmsg = _("too many forward references");
- break;
- case G_REGEX_ERROR_NAME_TOO_LONG:
+ case PCRE2_ERROR_VERB_NAME_TOO_LONG:
+ *errcode = G_REGEX_ERROR_NAME_TOO_LONG;
*errmsg = _("name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)");
break;
- case G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE:
- *errmsg = _("character value in \\u.... sequence is too large");
+ case PCRE2_ERROR_INTERNAL_CODE_OVERFLOW:
+ *errcode = G_REGEX_ERROR_INTERNAL;
+ *errmsg = _("code overflow");
break;
-
- case 116: /* erroffset passed as NULL */
- /* This should not happen as we never pass a NULL erroffset */
- g_warning ("erroffset passed as NULL");
- *errcode = G_REGEX_ERROR_COMPILE;
+ case PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P:
+ *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
+ *errmsg = _("unrecognized character after (?P");
break;
- case 117: /* unknown option bit(s) set */
- /* This should not happen as we check options before passing them
- * to pcre_compile2() */
- g_warning ("unknown option bit(s) set");
- *errcode = G_REGEX_ERROR_COMPILE;
+ case PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE:
+ *errcode = G_REGEX_ERROR_INTERNAL;
+ *errmsg = _("overran compiling workspace");
break;
- case 132: /* this version of PCRE is compiled without UTF support */
- case 144: /* invalid UTF-8 string */
- case 145: /* support for \\P, \\p, and \\X has not been compiled */
- case 167: /* this version of PCRE is not compiled with Unicode property support */
- case 173: /* disallowed Unicode code point (>= 0xd800 && <= 0xdfff) */
- case 174: /* invalid UTF-16 string */
- /* These errors should not happen as we are using an UTF-8 and UCP-enabled PCRE
- * and we do not check if strings are valid */
- case 170: /* internal error: unknown opcode in find_fixedlength() */
+ case PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN:
*errcode = G_REGEX_ERROR_INTERNAL;
+ *errmsg = _("previously-checked referenced subpattern not found");
break;
-
+ case PCRE2_ERROR_HEAP_FAILED:
+ case PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW:
+ case PCRE2_ERROR_UNICODE_NOT_SUPPORTED:
+ case PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT:
+ case PCRE2_ERROR_NO_SURROGATES_IN_UTF16:
+ case PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS:
+ case PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE:
+ case PCRE2_ERROR_INTERNAL_STUDY_ERROR:
+ case PCRE2_ERROR_UTF_IS_DISABLED:
+ case PCRE2_ERROR_UCP_IS_DISABLED:
+ case PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS:
+ case PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED:
+ case PCRE2_ERROR_INTERNAL_BAD_CODE:
+ case PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP:
+ *errcode = G_REGEX_ERROR_INTERNAL;
+ *errmsg = _("internal error");
+ break;
+ case PCRE2_ERROR_INVALID_SUBPATTERN_NAME:
+ case PCRE2_ERROR_CLASS_INVALID_RANGE:
+ case PCRE2_ERROR_ZERO_RELATIVE_REFERENCE:
+ case PCRE2_ERROR_PARENTHESES_STACK_CHECK:
+ case PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED:
+ case PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG:
+ case PCRE2_ERROR_MISSING_CALLOUT_CLOSING:
+ case PCRE2_ERROR_ESCAPE_INVALID_IN_VERB:
+ case PCRE2_ERROR_NULL_PATTERN:
+ case PCRE2_ERROR_BAD_OPTIONS:
+ case PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP:
+ case PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE:
+ case PCRE2_ERROR_INVALID_OCTAL:
+ case PCRE2_ERROR_CALLOUT_STRING_TOO_LONG:
+ case PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG:
+ case PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS:
+ case PCRE2_ERROR_VERSION_CONDITION_SYNTAX:
+ case PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER:
+ case PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER:
+ case PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED:
+ case PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP:
+ case PCRE2_ERROR_PATTERN_TOO_COMPLICATED:
+ case PCRE2_ERROR_LOOKBEHIND_TOO_LONG:
+ case PCRE2_ERROR_PATTERN_STRING_TOO_LONG:
+ case PCRE2_ERROR_BAD_LITERAL_OPTIONS:
default:
*errcode = G_REGEX_ERROR_COMPILE;
+ *errmsg = _("internal error");
+ break;
}
+
+ g_assert (*errcode != 0);
+ g_assert (*errmsg != NULL);
}
/* GMatchInfo */
@@ -570,12 +749,16 @@ match_info_new (const GRegex *regex,
match_info->regex = g_regex_ref ((GRegex *)regex);
match_info->string = string;
match_info->string_len = string_len;
- match_info->matches = PCRE_ERROR_NOMATCH;
+ match_info->matches = PCRE2_ERROR_NOMATCH;
match_info->pos = start_position;
match_info->match_opts = match_options;
- pcre_fullinfo (regex->pcre_re, regex->extra,
- PCRE_INFO_CAPTURECOUNT, &match_info->n_subpatterns);
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT,
+ &match_info->n_subpatterns);
+
+ match_info->match_context = pcre2_match_context_create (NULL);
+ pcre2_set_match_limit (match_info->match_context, 65536); /* should be plenty */
+ pcre2_set_recursion_limit (match_info->match_context, 64); /* should be plenty */
if (is_dfa)
{
@@ -595,9 +778,41 @@ match_info_new (const GRegex *regex,
match_info->offsets[0] = -1;
match_info->offsets[1] = -1;
+ match_info->match_data = pcre2_match_data_create_from_pattern (
+ match_info->regex->pcre_re,
+ NULL);
+
return match_info;
}
+static gboolean
+recalc_match_offsets (GMatchInfo *match_info,
+ GError **error)
+{
+ PCRE2_SIZE *ovector;
+ gint i;
+
+ if (pcre2_get_ovector_count (match_info->match_data) > G_MAXINT / 2)
+ {
+ g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH,
+ _("Error while matching regular expression %s: %s"),
+ match_info->regex->pattern, _("code overflow"));
+ return FALSE;
+ }
+
+ match_info->n_offsets = pcre2_get_ovector_count (match_info->match_data) * 2;
+ ovector = pcre2_get_ovector_pointer (match_info->match_data);
+ match_info->offsets = g_realloc_n (match_info->offsets,
+ match_info->n_offsets,
+ sizeof (gint));
+ for (i = 0; i < match_info->n_offsets; i++)
+ {
+ match_info->offsets[i] = (int) ovector[i];
+ }
+
+ return TRUE;
+}
+
/**
* g_match_info_get_regex:
* @match_info: a #GMatchInfo
@@ -669,6 +884,10 @@ g_match_info_unref (GMatchInfo *match_info)
if (g_atomic_int_dec_and_test (&match_info->ref_count))
{
g_regex_unref (match_info->regex);
+ if (match_info->match_context)
+ pcre2_match_context_free (match_info->match_context);
+ if (match_info->match_data)
+ pcre2_match_data_free (match_info->match_data);
g_free (match_info->offsets);
g_free (match_info->workspace);
g_free (match_info);
@@ -715,6 +934,7 @@ g_match_info_next (GMatchInfo *match_info,
{
gint prev_match_start;
gint prev_match_end;
+ gint opts;
g_return_val_if_fail (match_info != NULL, FALSE);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
@@ -727,25 +947,29 @@ g_match_info_next (GMatchInfo *match_info,
{
/* we have reached the end of the string */
match_info->pos = -1;
- match_info->matches = PCRE_ERROR_NOMATCH;
+ match_info->matches = PCRE2_ERROR_NOMATCH;
return FALSE;
}
- match_info->matches = pcre_exec (match_info->regex->pcre_re,
- match_info->regex->extra,
- match_info->string,
- match_info->string_len,
- match_info->pos,
- match_info->regex->match_opts | match_info->match_opts,
- match_info->offsets,
- match_info->n_offsets);
- if (IS_PCRE_ERROR (match_info->matches))
+ opts = map_to_pcre2_match_flags (match_info->regex->match_opts | match_info->match_opts);
+ match_info->matches = pcre2_match (match_info->regex->pcre_re,
+ (PCRE2_SPTR8) match_info->string,
+ match_info->string_len,
+ match_info->pos,
+ opts & ~G_REGEX_FLAGS_CONVERTED,
+ match_info->match_data,
+ match_info->match_context);
+
+ if (IS_PCRE2_ERROR (match_info->matches))
{
g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH,
_("Error while matching regular expression %s: %s"),
match_info->regex->pattern, match_error (match_info->matches));
return FALSE;
}
+ else
+ if (!recalc_match_offsets (match_info, error))
+ return FALSE;
/* avoid infinite loops if the pattern is an empty string or something
* equivalent */
@@ -755,7 +979,7 @@ g_match_info_next (GMatchInfo *match_info,
{
/* we have reached the end of the string */
match_info->pos = -1;
- match_info->matches = PCRE_ERROR_NOMATCH;
+ match_info->matches = PCRE2_ERROR_NOMATCH;
return FALSE;
}
@@ -833,10 +1057,10 @@ g_match_info_get_match_count (const GMatchInfo *match_info)
{
g_return_val_if_fail (match_info, -1);
- if (match_info->matches == PCRE_ERROR_NOMATCH)
+ if (match_info->matches == PCRE2_ERROR_NOMATCH)
/* no match */
return 0;
- else if (match_info->matches < PCRE_ERROR_NOMATCH)
+ else if (match_info->matches < PCRE2_ERROR_NOMATCH)
/* error */
return -1;
else
@@ -891,7 +1115,7 @@ g_match_info_is_partial_match (const GMatchInfo *match_info)
{
g_return_val_if_fail (match_info != NULL, FALSE);
- return match_info->matches == PCRE_ERROR_PARTIAL;
+ return match_info->matches == PCRE2_ERROR_PARTIAL;
}
/**
@@ -988,8 +1212,6 @@ gchar *
g_match_info_fetch (const GMatchInfo *match_info,
gint match_num)
{
- /* we cannot use pcre_get_substring() because it allocates the
- * string using pcre_malloc(). */
gchar *match = NULL;
gint start, end;
@@ -1069,24 +1291,25 @@ g_match_info_fetch_pos (const GMatchInfo *match_info,
* Returns number of first matched subpattern with name @name.
* There may be more than one in case when DUPNAMES is used,
* and not all subpatterns with that name match;
- * pcre_get_stringnumber() does not work in that case.
+ * pcre2_substring_number_from_name() does not work in that case.
*/
static gint
get_matched_substring_number (const GMatchInfo *match_info,
const gchar *name)
{
gint entrysize;
- gchar *first, *last;
+ PCRE2_SPTR first, last;
guchar *entry;
- if (!(match_info->regex->compile_opts & G_REGEX_DUPNAMES))
- return pcre_get_stringnumber (match_info->regex->pcre_re, name);
+ if (!(match_info->regex->compile_opts & PCRE2_DUPNAMES))
+ return pcre2_substring_number_from_name (match_info->regex->pcre_re, (PCRE2_SPTR8) name);
- /* This code is copied from pcre_get.c: get_first_set() */
- entrysize = pcre_get_stringtable_entries (match_info->regex->pcre_re,
- name,
- &first,
- &last);
+ /* This code is analogous to code from pcre2_substring.c:
+ * pcre2_substring_get_byname() */
+ entrysize = pcre2_substring_nametable_scan (match_info->regex->pcre_re,
+ (PCRE2_SPTR8) name,
+ &first,
+ &last);
if (entrysize <= 0)
return entrysize;
@@ -1124,8 +1347,6 @@ gchar *
g_match_info_fetch_named (const GMatchInfo *match_info,
const gchar *name)
{
- /* we cannot use pcre_get_named_substring() because it allocates the
- * string using pcre_malloc(). */
gint num;
g_return_val_if_fail (match_info != NULL, NULL);
@@ -1207,8 +1428,6 @@ g_match_info_fetch_named_pos (const GMatchInfo *match_info,
gchar **
g_match_info_fetch_all (const GMatchInfo *match_info)
{
- /* we cannot use pcre_get_substring_list() because the returned value
- * isn't suitable for g_strfreev(). */
gchar **result;
gint i;
@@ -1266,9 +1485,7 @@ g_regex_unref (GRegex *regex)
{
g_free (regex->pattern);
if (regex->pcre_re != NULL)
- pcre_free (regex->pcre_re);
- if (regex->extra != NULL)
- pcre_free (regex->extra);
+ pcre2_code_free (regex->pcre_re);
g_free (regex);
}
}
@@ -1276,11 +1493,11 @@ g_regex_unref (GRegex *regex)
/*
* @match_options: (inout) (optional):
*/
-static pcre *regex_compile (const gchar *pattern,
- GRegexCompileFlags compile_options,
- GRegexCompileFlags *compile_options_out,
- GRegexMatchFlags *match_options,
- GError **error);
+static pcre2_code *regex_compile (const gchar *pattern,
+ GRegexCompileFlags compile_options,
+ GRegexCompileFlags *compile_options_out,
+ GRegexMatchFlags *match_options,
+ GError **error);
/**
* g_regex_new:
@@ -1304,10 +1521,13 @@ g_regex_new (const gchar *pattern,
GError **error)
{
GRegex *regex;
- pcre *re;
- const gchar *errmsg;
- gboolean optimize = FALSE;
+ pcre2_code *re;
static gsize initialised = 0;
+ GRegexCompileFlags orig_compile_opts;
+
+ orig_compile_opts = compile_options;
+ compile_options = map_to_pcre2_compile_flags (compile_options);
+ match_options = map_to_pcre2_match_flags (match_options);
g_return_val_if_fail (pattern != NULL, NULL);
g_return_val_if_fail (error == NULL || *error == NULL, NULL);
@@ -1316,17 +1536,13 @@ g_regex_new (const gchar *pattern,
if (g_once_init_enter (&initialised))
{
- int supports_utf8, supports_ucp;
+ int supports_utf8;
- pcre_config (PCRE_CONFIG_UTF8, &supports_utf8);
+ pcre2_config (PCRE2_CONFIG_UNICODE, &supports_utf8);
if (!supports_utf8)
g_critical (_("PCRE library is compiled without UTF8 support"));
- pcre_config (PCRE_CONFIG_UNICODE_PROPERTIES, &supports_ucp);
- if (!supports_ucp)
- g_critical (_("PCRE library is compiled without UTF8 properties support"));
-
- g_once_init_leave (&initialised, supports_utf8 && supports_ucp ? 1 : 2);
+ g_once_init_leave (&initialised, supports_utf8 ? 1 : 2);
}
if (G_UNLIKELY (initialised != 1))
@@ -1336,14 +1552,22 @@ g_regex_new (const gchar *pattern,
return NULL;
}
- /* G_REGEX_OPTIMIZE has the same numeric value of PCRE_NO_UTF8_CHECK,
- * as we do not need to wrap PCRE_NO_UTF8_CHECK. */
- if (compile_options & G_REGEX_OPTIMIZE)
- optimize = TRUE;
+ switch (compile_options & G_REGEX_NEWLINE_MASK)
+ {
+ case 0: /* PCRE2_NEWLINE_ANY */
+ case PCRE2_NEWLINE_CR:
+ case PCRE2_NEWLINE_LF:
+ case PCRE2_NEWLINE_CRLF:
+ case PCRE2_NEWLINE_ANYCRLF:
+ break;
+ default:
+ g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS,
+ "Invalid newline flags");
+ return NULL;
+ }
re = regex_compile (pattern, compile_options, &compile_options,
&match_options, error);
-
if (re == NULL)
return NULL;
@@ -1352,80 +1576,85 @@ g_regex_new (const gchar *pattern,
regex->pattern = g_strdup (pattern);
regex->pcre_re = re;
regex->compile_opts = compile_options;
+ regex->orig_compile_opts = orig_compile_opts;
regex->match_opts = match_options;
- if (optimize)
- {
- regex->extra = pcre_study (regex->pcre_re, 0, &errmsg);
- if (errmsg != NULL)
- {
- GError *tmp_error = g_error_new (G_REGEX_ERROR,
- G_REGEX_ERROR_OPTIMIZE,
- _("Error while optimizing "
- "regular expression %s: %s"),
- regex->pattern,
- errmsg);
- g_propagate_error (error, tmp_error);
-
- g_regex_unref (regex);
- return NULL;
- }
- }
-
return regex;
}
-static pcre *
-regex_compile (const gchar *pattern,
- GRegexCompileFlags compile_options,
- GRegexCompileFlags *compile_options_out,
- GRegexMatchFlags *match_options,
- GError **error)
+static gint
+extract_newline_options (const GRegexCompileFlags compile_options,
+ const GRegexMatchFlags *match_options)
+{
+ gint newline_options = PCRE2_NEWLINE_ANY;
+
+ if (compile_options & G_REGEX_NEWLINE_MASK)
+ newline_options = compile_options & G_REGEX_NEWLINE_MASK;
+ if (match_options && *match_options & G_REGEX_MATCH_NEWLINE_MASK)
+ newline_options = *match_options & G_REGEX_MATCH_NEWLINE_MASK;
+
+ return newline_options;
+}
+
+static gint
+extract_bsr_options (const GRegexCompileFlags compile_options,
+ const GRegexMatchFlags *match_options)
+{
+ gint bsr_options = PCRE2_BSR_UNICODE;
+
+ if (compile_options & PCRE2_BSR_ANYCRLF)
+ bsr_options = PCRE2_BSR_ANYCRLF;
+ if (match_options && *match_options & PCRE2_BSR_ANYCRLF)
+ bsr_options = PCRE2_BSR_ANYCRLF;
+ if (match_options && *match_options & PCRE2_BSR_UNICODE)
+ bsr_options = PCRE2_BSR_UNICODE;
+
+ return bsr_options;
+}
+
+static pcre2_code *
+regex_compile (const gchar *pattern,
+ GRegexCompileFlags compile_options,
+ GRegexCompileFlags *compile_options_out,
+ GRegexMatchFlags *match_options,
+ GError **error)
{
- pcre *re;
+ pcre2_code *re;
+ pcre2_compile_context *context;
const gchar *errmsg;
- gint erroffset;
+ PCRE2_SIZE erroffset;
gint errcode;
GRegexCompileFlags nonpcre_compile_options;
unsigned long int pcre_compile_options;
nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK;
- /* In GRegex the string are, by default, UTF-8 encoded. PCRE
- * instead uses UTF-8 only if required with PCRE_UTF8. */
- if (compile_options & G_REGEX_RAW)
- {
- /* disable utf-8 */
- compile_options &= ~G_REGEX_RAW;
- }
- else
- {
- /* enable utf-8 */
- compile_options |= PCRE_UTF8 | PCRE_NO_UTF8_CHECK;
+ context = pcre2_compile_context_create (NULL);
- if (match_options != NULL)
- *match_options |= PCRE_NO_UTF8_CHECK;
- }
+ /* set newline options */
+ pcre2_set_newline (context, extract_newline_options (compile_options, match_options));
+
+ /* set bsr options */
+ pcre2_set_bsr (context, extract_bsr_options (compile_options, match_options));
- /* PCRE_NEWLINE_ANY is the default for the internal PCRE but
- * not for the system one. */
- if (!(compile_options & G_REGEX_NEWLINE_CR) &&
- !(compile_options & G_REGEX_NEWLINE_LF))
+ /* In case UTF-8 mode is used, also set PCRE2_NO_UTF_CHECK */
+ if (compile_options & PCRE2_UTF)
{
- compile_options |= PCRE_NEWLINE_ANY;
+ compile_options |= PCRE2_NO_UTF_CHECK;
+ if (match_options != NULL)
+ *match_options |= PCRE2_NO_UTF_CHECK;
}
- compile_options |= PCRE_UCP;
-
- /* PCRE_BSR_UNICODE is the default for the internal PCRE but
- * possibly not for the system one.
- */
- if (~compile_options & G_REGEX_BSR_ANYCRLF)
- compile_options |= PCRE_BSR_UNICODE;
+ compile_options |= PCRE2_UCP;
/* compile the pattern */
- re = pcre_compile2 (pattern, compile_options, &errcode,
- &errmsg, &erroffset, NULL);
+ re = pcre2_compile ((PCRE2_SPTR8) pattern,
+ PCRE2_ZERO_TERMINATED,
+ compile_options & ~G_REGEX_FLAGS_CONVERTED,
+ &errcode,
+ &erroffset,
+ context);
+ pcre2_compile_context_free (context);
/* if the compilation failed, set the error member and return
* immediately */
@@ -1442,7 +1671,7 @@ regex_compile (const gchar *pattern,
tmp_error = g_error_new (G_REGEX_ERROR, errcode,
_("Error while compiling regular "
- "expression %s at char %d: %s"),
+ "expression %s at char %" G_GSIZE_FORMAT ": %s"),
pattern, erroffset, errmsg);
g_propagate_error (error, tmp_error);
@@ -1451,22 +1680,22 @@ regex_compile (const gchar *pattern,
/* For options set at the beginning of the pattern, pcre puts them into
* compile options, e.g. "(?i)foo" will make the pcre structure store
- * PCRE_CASELESS even though it wasn't explicitly given for compilation. */
- pcre_fullinfo (re, NULL, PCRE_INFO_OPTIONS, &pcre_compile_options);
+ * PCRE2_CASELESS even though it wasn't explicitly given for compilation. */
+ pcre2_pattern_info (re, PCRE2_INFO_ALLOPTIONS, &pcre_compile_options);
compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK;
- /* Don't leak PCRE_NEWLINE_ANY, which is part of PCRE_NEWLINE_ANYCRLF */
- if ((pcre_compile_options & PCRE_NEWLINE_ANYCRLF) != PCRE_NEWLINE_ANYCRLF)
- compile_options &= ~PCRE_NEWLINE_ANY;
+ /* Don't leak PCRE2_NEWLINE_ANY, which is part of PCRE2_NEWLINE_ANYCRLF */
+ if ((pcre_compile_options & PCRE2_NEWLINE_ANYCRLF) != PCRE2_NEWLINE_ANYCRLF)
+ compile_options &= ~PCRE2_NEWLINE_ANY;
compile_options |= nonpcre_compile_options;
- if (!(compile_options & G_REGEX_DUPNAMES))
+ if (!(compile_options & PCRE2_DUPNAMES))
{
gboolean jchanged = FALSE;
- pcre_fullinfo (re, NULL, PCRE_INFO_JCHANGED, &jchanged);
+ pcre2_pattern_info (re, PCRE2_INFO_JCHANGED, &jchanged);
if (jchanged)
- compile_options |= G_REGEX_DUPNAMES;
+ compile_options |= PCRE2_DUPNAMES;
}
if (compile_options_out != 0)
@@ -1511,8 +1740,7 @@ g_regex_get_max_backref (const GRegex *regex)
{
gint value;
- pcre_fullinfo (regex->pcre_re, regex->extra,
- PCRE_INFO_BACKREFMAX, &value);
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_BACKREFMAX, &value);
return value;
}
@@ -1532,8 +1760,7 @@ g_regex_get_capture_count (const GRegex *regex)
{
gint value;
- pcre_fullinfo (regex->pcre_re, regex->extra,
- PCRE_INFO_CAPTURECOUNT, &value);
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT, &value);
return value;
}
@@ -1553,8 +1780,7 @@ g_regex_get_has_cr_or_lf (const GRegex *regex)
{
gint value;
- pcre_fullinfo (regex->pcre_re, regex->extra,
- PCRE_INFO_HASCRORLF, &value);
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_HASCRORLF, &value);
return !!value;
}
@@ -1576,8 +1802,8 @@ g_regex_get_max_lookbehind (const GRegex *regex)
{
gint max_lookbehind;
- pcre_fullinfo (regex->pcre_re, regex->extra,
- PCRE_INFO_MAXLOOKBEHIND, &max_lookbehind);
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_MAXLOOKBEHIND,
+ &max_lookbehind);
return max_lookbehind;
}
@@ -1599,9 +1825,47 @@ g_regex_get_max_lookbehind (const GRegex *regex)
GRegexCompileFlags
g_regex_get_compile_flags (const GRegex *regex)
{
+ gint extra_flags, info_value;
+
g_return_val_if_fail (regex != NULL, 0);
- return regex->compile_opts;
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
+ /* Preserve original G_REGEX_OPTIMIZE */
+ extra_flags = (regex->orig_compile_opts & G_REGEX_OPTIMIZE);
+G_GNUC_END_IGNORE_DEPRECATIONS
+
+ /* Also include the newline options */
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_NEWLINE, &info_value);
+ switch (info_value)
+ {
+ case PCRE2_NEWLINE_ANYCRLF:
+ extra_flags |= G_REGEX_NEWLINE_ANYCRLF;
+ break;
+ case PCRE2_NEWLINE_CRLF:
+ extra_flags |= G_REGEX_NEWLINE_CRLF;
+ break;
+ case PCRE2_NEWLINE_LF:
+ extra_flags |= G_REGEX_NEWLINE_LF;
+ break;
+ case PCRE2_NEWLINE_CR:
+ extra_flags |= G_REGEX_NEWLINE_CR;
+ break;
+ default:
+ break;
+ }
+
+ /* Also include the bsr options */
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_BSR, &info_value);
+ switch (info_value)
+ {
+ case PCRE2_BSR_ANYCRLF:
+ extra_flags |= G_REGEX_BSR_ANYCRLF;
+ break;
+ default:
+ break;
+ }
+
+ return map_to_pcre1_compile_flags (regex->compile_opts) | extra_flags;
}
/**
@@ -1619,7 +1883,7 @@ g_regex_get_match_flags (const GRegex *regex)
{
g_return_val_if_fail (regex != NULL, 0);
- return regex->match_opts & G_REGEX_MATCH_MASK;
+ return map_to_pcre1_match_flags (regex->match_opts & G_REGEX_MATCH_MASK);
}
/**
@@ -1653,6 +1917,9 @@ g_regex_match_simple (const gchar *pattern,
GRegex *regex;
gboolean result;
+ compile_options = map_to_pcre2_compile_flags (compile_options);
+ match_options = map_to_pcre2_match_flags (match_options);
+
regex = g_regex_new (pattern, compile_options, G_REGEX_MATCH_DEFAULT, NULL);
if (!regex)
return FALSE;
@@ -1720,6 +1987,8 @@ g_regex_match (const GRegex *regex,
GRegexMatchFlags match_options,
GMatchInfo **match_info)
{
+ match_options = map_to_pcre2_match_flags (match_options);
+
return g_regex_match_full (regex, string, -1, 0, match_options,
match_info, NULL);
}
@@ -1803,6 +2072,8 @@ g_regex_match_full (const GRegex *regex,
GMatchInfo *info;
gboolean match_ok;
+ match_options = map_to_pcre2_match_flags (match_options);
+
g_return_val_if_fail (regex != NULL, FALSE);
g_return_val_if_fail (string != NULL, FALSE);
g_return_val_if_fail (start_position >= 0, FALSE);
@@ -1853,6 +2124,8 @@ g_regex_match_all (const GRegex *regex,
GRegexMatchFlags match_options,
GMatchInfo **match_info)
{
+ match_options = map_to_pcre2_match_flags (match_options);
+
return g_regex_match_all_full (regex, string, -1, 0, match_options,
match_info, NULL);
}
@@ -1922,39 +2195,29 @@ g_regex_match_all_full (const GRegex *regex,
{
GMatchInfo *info;
gboolean done;
- pcre *pcre_re;
- pcre_extra *extra;
+ pcre2_code *pcre_re;
gboolean retval;
+ match_options = map_to_pcre2_match_flags (match_options);
+
g_return_val_if_fail (regex != NULL, FALSE);
g_return_val_if_fail (string != NULL, FALSE);
g_return_val_if_fail (start_position >= 0, FALSE);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, FALSE);
-#ifdef PCRE_NO_AUTO_POSSESS
- /* For PCRE >= 8.34 we need to turn off PCRE_NO_AUTO_POSSESS, which
- * is an optimization for normal regex matching, but results in omitting
- * some shorter matches here, and an observable behaviour change.
+ /* For PCRE2 we need to turn off PCRE2_NO_AUTO_POSSESS, which is an
+ * optimization for normal regex matching, but results in omitting some
+ * shorter matches here, and an observable behaviour change.
*
* DFA matching is rather niche, and very rarely used according to
* codesearch.debian.net, so don't bother caching the recompiled RE. */
pcre_re = regex_compile (regex->pattern,
- regex->compile_opts | PCRE_NO_AUTO_POSSESS,
+ regex->compile_opts | PCRE2_NO_AUTO_POSSESS,
NULL, NULL, error);
-
if (pcre_re == NULL)
return FALSE;
- /* Not bothering to cache the optimization data either, with similar
- * reasoning */
- extra = NULL;
-#else
- /* For PCRE < 8.33 the precompiled regex is fine. */
- pcre_re = regex->pcre_re;
- extra = regex->extra;
-#endif
-
info = match_info_new (regex, string, string_len, start_position,
match_options, TRUE);
@@ -1962,29 +2225,38 @@ g_regex_match_all_full (const GRegex *regex,
while (!done)
{
done = TRUE;
- info->matches = pcre_dfa_exec (pcre_re, extra,
- info->string, info->string_len,
- info->pos,
- regex->match_opts | match_options,
- info->offsets, info->n_offsets,
- info->workspace, info->n_workspace);
- if (info->matches == PCRE_ERROR_DFA_WSSIZE)
+ info->matches = pcre2_dfa_match (pcre_re,
+ (PCRE2_SPTR8) info->string, info->string_len,
+ info->pos,
+ (regex->match_opts | match_options | PCRE2_NO_UTF_CHECK) & ~G_REGEX_FLAGS_CONVERTED,
+ info->match_data,
+ info->match_context,
+ info->workspace, info->n_workspace);
+
+ if (!recalc_match_offsets (info, error))
+ return FALSE;
+
+ if (info->matches == PCRE2_ERROR_DFA_WSSIZE)
{
/* info->workspace is too small. */
info->n_workspace *= 2;
- info->workspace = g_realloc (info->workspace,
- info->n_workspace * sizeof (gint));
+ info->workspace = g_realloc_n (info->workspace,
+ info->n_workspace,
+ sizeof (gint));
done = FALSE;
}
else if (info->matches == 0)
{
/* info->offsets is too small. */
info->n_offsets *= 2;
- info->offsets = g_realloc (info->offsets,
- info->n_offsets * sizeof (gint));
+ info->offsets = g_realloc_n (info->offsets,
+ info->n_offsets,
+ sizeof (gint));
+ pcre2_match_data_free (info->match_data);
+ info->match_data = pcre2_match_data_create (info->n_offsets, NULL);
done = FALSE;
}
- else if (IS_PCRE_ERROR (info->matches))
+ else if (IS_PCRE2_ERROR (info->matches))
{
g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH,
_("Error while matching regular expression %s: %s"),
@@ -1992,9 +2264,7 @@ g_regex_match_all_full (const GRegex *regex,
}
}
-#ifdef PCRE_NO_AUTO_POSSESS
- pcre_free (pcre_re);
-#endif
+ pcre2_code_free (pcre_re);
/* don’t assert that (info->matches <= info->n_subpatterns + 1) as that only
* holds true for a single match, rather than matching all */
@@ -2032,8 +2302,8 @@ g_regex_get_string_number (const GRegex *regex,
g_return_val_if_fail (regex != NULL, -1);
g_return_val_if_fail (name != NULL, -1);
- num = pcre_get_stringnumber (regex->pcre_re, name);
- if (num == PCRE_ERROR_NOSUBSTRING)
+ num = pcre2_substring_number_from_name (regex->pcre_re, (PCRE2_SPTR8) name);
+ if (num == PCRE2_ERROR_NOSUBSTRING)
num = -1;
return num;
@@ -2088,6 +2358,9 @@ g_regex_split_simple (const gchar *pattern,
GRegex *regex;
gchar **result;
+ compile_options = map_to_pcre2_compile_flags (compile_options);
+ match_options = map_to_pcre2_match_flags (match_options);
+
regex = g_regex_new (pattern, compile_options, 0, NULL);
if (!regex)
return NULL;
@@ -2131,6 +2404,8 @@ g_regex_split (const GRegex *regex,
const gchar *string,
GRegexMatchFlags match_options)
{
+ match_options = map_to_pcre2_match_flags (match_options);
+
return g_regex_split_full (regex, string, -1, 0,
match_options, 0, NULL);
}
@@ -2195,6 +2470,8 @@ g_regex_split_full (const GRegex *regex,
/* the returned array of char **s */
gchar **string_list;
+ match_options = map_to_pcre2_match_flags (match_options);
+
g_return_val_if_fail (regex != NULL, NULL);
g_return_val_if_fail (string != NULL, NULL);
g_return_val_if_fail (start_position >= 0, NULL);
@@ -2819,6 +3096,8 @@ g_regex_replace (const GRegex *regex,
GList *list;
GError *tmp_error = NULL;
+ match_options = map_to_pcre2_match_flags (match_options);
+
g_return_val_if_fail (regex != NULL, NULL);
g_return_val_if_fail (string != NULL, NULL);
g_return_val_if_fail (start_position >= 0, NULL);
@@ -2888,6 +3167,8 @@ g_regex_replace_literal (const GRegex *regex,
GRegexMatchFlags match_options,
GError **error)
{
+ match_options = map_to_pcre2_match_flags (match_options);
+
g_return_val_if_fail (replacement != NULL, NULL);
g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL);
@@ -2976,6 +3257,8 @@ g_regex_replace_eval (const GRegex *regex,
gboolean done = FALSE;
GError *tmp_error = NULL;
+ match_options = map_to_pcre2_match_flags (match_options);
+
g_return_val_if_fail (regex != NULL, NULL);
g_return_val_if_fail (string != NULL, NULL);
g_return_val_if_fail (start_position >= 0, NULL);
diff --git a/glib/gregex.h b/glib/gregex.h
index 3fd61806f..7010d52ab 100644
--- a/glib/gregex.h
+++ b/glib/gregex.h
@@ -264,7 +264,9 @@ GQuark g_regex_error_quark (void);
* in the usual way).
* @G_REGEX_OPTIMIZE: Optimize the regular expression. If the pattern will
* be used many times, then it may be worth the effort to optimize it
- * to improve the speed of matches.
+ * to improve the speed of matches. Deprecated in GLib 2.74 which now uses
+ * libpcre2, which doesn’t require separate optimization of queries. This
+ * option is now a no-op. Deprecated: 2.74
* @G_REGEX_FIRSTLINE: Limits an unanchored pattern to match before (or at) the
* first newline. Since: 2.34
* @G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not
@@ -287,7 +289,8 @@ GQuark g_regex_error_quark (void);
* is recognised. If this option is set, then "\R" only recognizes the newline
* characters '\r', '\n' and '\r\n'. Since: 2.34
* @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
- * JavaScript rather than PCRE. Since: 2.34
+ * JavaScript rather than PCRE. Since GLib 2.74 this is no longer supported,
+ * as libpcre2 does not support it. Since: 2.34 Deprecated: 2.74
*
* Flags specifying compile-time options.
*
@@ -308,7 +311,7 @@ typedef enum
G_REGEX_UNGREEDY = 1 << 9,
G_REGEX_RAW = 1 << 11,
G_REGEX_NO_AUTO_CAPTURE = 1 << 12,
- G_REGEX_OPTIMIZE = 1 << 13,
+ G_REGEX_OPTIMIZE GLIB_DEPRECATED_ENUMERATOR_IN_2_74 = 1 << 13,
G_REGEX_FIRSTLINE = 1 << 18,
G_REGEX_DUPNAMES = 1 << 19,
G_REGEX_NEWLINE_CR = 1 << 20,
@@ -316,7 +319,7 @@ typedef enum
G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22,
G_REGEX_BSR_ANYCRLF = 1 << 23,
- G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
+ G_REGEX_JAVASCRIPT_COMPAT GLIB_DEPRECATED_ENUMERATOR_IN_2_74 = 1 << 25
} GRegexCompileFlags;
/**
diff --git a/glib/meson.build b/glib/meson.build
index 4f8240b24..6062c11a1 100644
--- a/glib/meson.build
+++ b/glib/meson.build
@@ -357,13 +357,13 @@ else
glib_dtrace_hdr = []
endif
-pcre_static_args = []
+pcre2_static_args = []
-if use_pcre_static_flag
- pcre_static_args = ['-DPCRE_STATIC']
+if use_pcre2_static_flag
+ pcre2_static_args = ['-DPCRE2_STATIC']
endif
-glib_c_args = ['-DG_LOG_DOMAIN="GLib"', '-DGLIB_COMPILATION'] + pcre_static_args + glib_hidden_visibility_args
+glib_c_args = ['-DG_LOG_DOMAIN="GLib"', '-DGLIB_COMPILATION'] + pcre2_static_args + glib_hidden_visibility_args
libglib = library('glib-2.0',
glib_dtrace_obj, glib_dtrace_hdr,
sources : [deprecated_sources, glib_sources],
@@ -375,7 +375,7 @@ libglib = library('glib-2.0',
link_args : [noseh_link_args, glib_link_flags, win32_ldflags],
include_directories : configinc,
link_with: [charset_lib, gnulib_lib],
- dependencies : [pcre, thread_dep, librt] + libintl_deps + libiconv + platform_deps + [gnulib_libm_dependency, libm] + [libsysprof_capture_dep],
+ dependencies : [pcre2, thread_dep, librt] + libintl_deps + libiconv + platform_deps + [gnulib_libm_dependency, libm] + [libsysprof_capture_dep],
c_args : glib_c_args,
objc_args : glib_c_args,
)
diff --git a/glib/tests/meson.build b/glib/tests/meson.build
index 9b3b3bfa4..193224d86 100644
--- a/glib/tests/meson.build
+++ b/glib/tests/meson.build
@@ -89,8 +89,8 @@ glib_tests = {
},
'refstring' : {},
'regex' : {
- 'dependencies' : [pcre],
- 'c_args' : use_pcre_static_flag ? ['-DPCRE_STATIC'] : [],
+ 'dependencies' : [pcre2],
+ 'c_args' : use_pcre2_static_flag ? ['-DPCRE2_STATIC'] : [],
},
'relation' : {},
'rwlock' : {},
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
index c39d640fa..3355f64e5 100644
--- a/glib/tests/regex.c
+++ b/glib/tests/regex.c
@@ -27,7 +27,8 @@
#include <locale.h>
#include "glib.h"
-#include <pcre.h>
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
/* U+20AC EURO SIGN (symbol, currency) */
#define EURO "\xe2\x82\xac"
@@ -1503,7 +1504,7 @@ test_properties (void)
gchar *str;
error = NULL;
- regex = g_regex_new ("\\p{L}\\p{Ll}\\p{Lu}\\p{L&}\\p{N}\\p{Nd}", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("\\p{L}\\p{Ll}\\p{Lu}\\p{L&}\\p{N}\\p{Nd}", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
res = g_regex_match (regex, "ppPP01", 0, &match);
g_assert (res);
str = g_match_info_fetch (match, 0);
@@ -1524,7 +1525,7 @@ test_class (void)
gchar *str;
error = NULL;
- regex = g_regex_new ("[abc\\x{0B1E}\\p{Mn}\\x{0391}-\\x{03A9}]", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("[abc\\x{0B1E}\\p{Mn}\\x{0391}-\\x{03A9}]", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
res = g_regex_match (regex, "a:b:\340\254\236:\333\253:\316\240", 0, &match);
g_assert (res);
str = g_match_info_fetch (match, 0);
@@ -1570,7 +1571,7 @@ test_lookahead (void)
gint start, end;
error = NULL;
- regex = g_regex_new ("\\w+(?=;)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("\\w+(?=;)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "word1 word2: word3;", 0, &match);
@@ -1584,7 +1585,7 @@ test_lookahead (void)
g_regex_unref (regex);
error = NULL;
- regex = g_regex_new ("foo(?!bar)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("foo(?!bar)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "foobar foobaz", 0, &match);
@@ -1599,7 +1600,7 @@ test_lookahead (void)
g_regex_unref (regex);
error = NULL;
- regex = g_regex_new ("(?!bar)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?!bar)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "foobar foobaz", 0, &match);
@@ -1632,7 +1633,7 @@ test_lookbehind (void)
gint start, end;
error = NULL;
- regex = g_regex_new ("(?<!foo)bar", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<!foo)bar", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "foobar boobar", 0, &match);
@@ -1647,7 +1648,7 @@ test_lookbehind (void)
g_regex_unref (regex);
error = NULL;
- regex = g_regex_new ("(?<=bullock|donkey) poo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<=bullock|donkey) poo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "don poo, and bullock poo", 0, &match);
@@ -1660,17 +1661,17 @@ test_lookbehind (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?<!dogs?|cats?) x", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<!dogs?|cats?) x", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex == NULL);
g_assert_error (error, G_REGEX_ERROR, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND);
g_clear_error (&error);
- regex = g_regex_new ("(?<=ab(c|de)) foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<=ab(c|de)) foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex == NULL);
g_assert_error (error, G_REGEX_ERROR, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND);
g_clear_error (&error);
- regex = g_regex_new ("(?<=abc|abde)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<=abc|abde)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "abfoo, abdfoo, abcfoo", 0, &match);
@@ -1682,7 +1683,7 @@ test_lookbehind (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^.*+(?<=abcd)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^.*+(?<=abcd)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "abcabcabcabcabcabcabcabcabcd", 0, &match);
@@ -1691,7 +1692,7 @@ test_lookbehind (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?<=\\d{3})(?<!999)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<=\\d{3})(?<!999)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match);
@@ -1703,7 +1704,7 @@ test_lookbehind (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?<=\\d{3}...)(?<!999)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<=\\d{3}...)(?<!999)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match);
@@ -1715,7 +1716,7 @@ test_lookbehind (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?<=\\d{3}(?!999)...)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<=\\d{3}(?!999)...)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match);
@@ -1727,7 +1728,7 @@ test_lookbehind (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?<=(?<!foo)bar)baz", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<=(?<!foo)bar)baz", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "foobarbaz barfoobaz barbarbaz", 0, &match);
@@ -1752,7 +1753,7 @@ test_subpattern (void)
gint start;
error = NULL;
- regex = g_regex_new ("cat(aract|erpillar|)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("cat(aract|erpillar|)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
g_assert_cmpint (g_regex_get_capture_count (regex), ==, 1);
@@ -1770,7 +1771,7 @@ test_subpattern (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("the ((red|white) (king|queen))", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("the ((red|white) (king|queen))", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
g_assert_cmpint (g_regex_get_capture_count (regex), ==, 3);
@@ -1794,7 +1795,7 @@ test_subpattern (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("the ((?:red|white) (king|queen))", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("the ((?:red|white) (king|queen))", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "the white queen", 0, &match);
@@ -1814,7 +1815,7 @@ test_subpattern (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?|(Sat)(ur)|(Sun))day (morning|afternoon)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?|(Sat)(ur)|(Sun))day (morning|afternoon)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
g_assert_cmpint (g_regex_get_capture_count (regex), ==, 3);
@@ -1834,7 +1835,7 @@ test_subpattern (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?|(abc)|(def))\\1", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?|(abc)|(def))\\1", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
g_assert_cmpint (g_regex_get_max_backref (regex), ==, 1);
@@ -1852,7 +1853,7 @@ test_subpattern (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?|(abc)|(def))(?1)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?|(abc)|(def))(?1)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "abcabc abcdef defabc defdef", 0, &match);
@@ -1869,7 +1870,7 @@ test_subpattern (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|(?<DN>Sat)(?:urday)?", G_REGEX_OPTIMIZE|G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|(?<DN>Sat)(?:urday)?", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "Mon Tuesday Wed Saturday", 0, &match);
@@ -1896,7 +1897,7 @@ test_subpattern (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^(a|b\\1)+$", G_REGEX_OPTIMIZE|G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^(a|b\\1)+$", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "aaaaaaaaaaaaaaaa", 0, &match);
@@ -1920,7 +1921,7 @@ test_condition (void)
gboolean res;
error = NULL;
- regex = g_regex_new ("^(a+)(\\()?[^()]+(?(-1)\\))(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^(a+)(\\()?[^()]+(?(-1)\\))(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "a(zzzzzz)b", 0, &match);
@@ -1934,7 +1935,7 @@ test_condition (void)
g_regex_unref (regex);
error = NULL;
- regex = g_regex_new ("^(a+)(?<OPEN>\\()?[^()]+(?(<OPEN>)\\))(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^(a+)(?<OPEN>\\()?[^()]+(?(<OPEN>)\\))(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "a(zzzzzz)b", 0, &match);
@@ -1947,7 +1948,7 @@ test_condition (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^(a+)(?(+1)\\[|\\<)?[^()]+(\\])?(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^(a+)(?(+1)\\[|\\<)?[^()]+(\\])?(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "a[zzzzzz]b", 0, &match);
@@ -1962,7 +1963,7 @@ test_condition (void)
regex = g_regex_new ("(?(DEFINE) (?<byte> 2[0-4]\\d | 25[0-5] | 1\\d\\d | [1-9]?\\d) )"
"\\b (?&byte) (\\.(?&byte)){3} \\b",
- G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, 0, &error);
+ G_REGEX_EXTENDED, 0, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "128.0.0.1", 0, &match);
@@ -1981,7 +1982,7 @@ test_condition (void)
regex = g_regex_new ("^(?(?=[^a-z]*[a-z])"
"\\d{2}-[a-z]{3}-\\d{2} | \\d{2}-\\d{2}-\\d{2} )$",
- G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, 0, &error);
+ G_REGEX_EXTENDED, 0, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "01-abc-24", 0, &match);
@@ -2014,7 +2015,7 @@ test_recursion (void)
gint start;
error = NULL;
- regex = g_regex_new ("\\( ( [^()]++ | (?R) )* \\)", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("\\( ( [^()]++ | (?R) )* \\)", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "(middle)", 0, &match);
@@ -2031,7 +2032,7 @@ test_recursion (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^( \\( ( [^()]++ | (?1) )* \\) )$", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^( \\( ( [^()]++ | (?1) )* \\) )$", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "((((((((((((((((middle))))))))))))))))", 0, &match);
@@ -2044,7 +2045,7 @@ test_recursion (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^(?<pn> \\( ( [^()]++ | (?&pn) )* \\) )$", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^(?<pn> \\( ( [^()]++ | (?&pn) )* \\) )$", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
g_regex_match (regex, "(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()", 0, &match);
@@ -2053,7 +2054,7 @@ test_recursion (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "<ab<01<23<4>>>>", 0, &match);
@@ -2072,7 +2073,7 @@ test_recursion (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^((.)(?1)\\2|.)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^((.)(?1)\\2|.)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "abcdcba", 0, &match);
@@ -2085,7 +2086,7 @@ test_recursion (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^(?:((.)(?1)\\2|)|((.)(?3)\\4|.))$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^(?:((.)(?1)\\2|)|((.)(?3)\\4|.))$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "abcdcba", 0, &match);
@@ -2098,7 +2099,7 @@ test_recursion (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^\\W*+(?:((.)\\W*+(?1)\\W*+\\2|)|((.)\\W*+(?3)\\W*+\\4|\\W*+.\\W*+))\\W*+$", G_REGEX_OPTIMIZE|G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^\\W*+(?:((.)\\W*+(?1)\\W*+\\2|)|((.)\\W*+(?3)\\W*+\\4|\\W*+.\\W*+))\\W*+$", G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "abcdcba", 0, &match);
@@ -2169,21 +2170,21 @@ test_max_lookbehind (void)
}
static gboolean
-pcre_ge (guint64 major, guint64 minor)
+pcre2_ge (guint64 major, guint64 minor)
{
- const char *version;
- gchar *ptr;
- guint64 pcre_major, pcre_minor;
+ gchar version[32];
+ const gchar *ptr;
+ guint64 pcre2_major, pcre2_minor;
- /* e.g. 8.35 2014-04-04 */
- version = pcre_version ();
+ /* e.g. 10.36 2020-12-04 */
+ pcre2_config (PCRE2_CONFIG_VERSION, version);
- pcre_major = g_ascii_strtoull (version, &ptr, 10);
+ pcre2_major = g_ascii_strtoull (version, (gchar **) &ptr, 10);
/* ptr points to ".MINOR (release date)" */
g_assert (ptr[0] == '.');
- pcre_minor = g_ascii_strtoull (ptr + 1, NULL, 10);
+ pcre2_minor = g_ascii_strtoull (ptr + 1, NULL, 10);
- return (pcre_major > major) || (pcre_major == major && pcre_minor >= minor);
+ return (pcre2_major > major) || (pcre2_major == major && pcre2_minor >= minor);
}
int
@@ -2205,18 +2206,26 @@ main (int argc, char *argv[])
g_test_add_func ("/regex/max-lookbehind", test_max_lookbehind);
/* TEST_NEW(pattern, compile_opts, match_opts) */
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
TEST_NEW("[A-Z]+", G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL);
+G_GNUC_END_IGNORE_DEPRECATIONS
TEST_NEW("", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
TEST_NEW(".*", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
TEST_NEW(".*", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT);
+G_GNUC_END_IGNORE_DEPRECATIONS
TEST_NEW(".*", G_REGEX_MULTILINE, G_REGEX_MATCH_DEFAULT);
TEST_NEW(".*", G_REGEX_DOTALL, G_REGEX_MATCH_DEFAULT);
TEST_NEW(".*", G_REGEX_DOTALL, G_REGEX_MATCH_NOTBOL);
TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT);
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_CASELESS | G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT);
+G_GNUC_END_IGNORE_DEPRECATIONS
TEST_NEW("(?P<A>x)|(?P<A>y)", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT);
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
TEST_NEW("(?P<A>x)|(?P<A>y)", G_REGEX_DUPNAMES | G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT);
+G_GNUC_END_IGNORE_DEPRECATIONS
/* This gives "internal error: code overflow" with pcre 6.0 */
TEST_NEW("(?i)(?-i)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
TEST_NEW ("(?i)a", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
@@ -2227,9 +2236,10 @@ main (int argc, char *argv[])
TEST_NEW ("(?U)[a-z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
/* TEST_NEW_CHECK_FLAGS(pattern, compile_opts, match_ops, real_compile_opts, real_match_opts) */
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, 0, G_REGEX_OPTIMIZE, 0);
+G_GNUC_END_IGNORE_DEPRECATIONS
TEST_NEW_CHECK_FLAGS ("a", G_REGEX_RAW, 0, G_REGEX_RAW, 0);
- TEST_NEW_CHECK_FLAGS ("(?X)a", 0, 0, 0 /* not exposed by GRegex */, 0);
TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0);
TEST_NEW_CHECK_FLAGS ("(*UTF8)a", 0, 0, 0 /* this is the default in GRegex */, 0);
TEST_NEW_CHECK_FLAGS ("(*UCP)a", 0, 0, 0 /* this always on in GRegex */, 0);
@@ -2257,16 +2267,16 @@ main (int argc, char *argv[])
TEST_NEW_FAIL ("a{4,2}", 0, G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER);
TEST_NEW_FAIL ("a{999999,}", 0, G_REGEX_ERROR_QUANTIFIER_TOO_BIG);
TEST_NEW_FAIL ("[a-z", 0, G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS);
- TEST_NEW_FAIL ("(?X)[\\B]", 0, G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS);
+ TEST_NEW_FAIL ("[\\B]", 0, G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS);
TEST_NEW_FAIL ("[z-a]", 0, G_REGEX_ERROR_RANGE_OUT_OF_ORDER);
TEST_NEW_FAIL ("{2,4}", 0, G_REGEX_ERROR_NOTHING_TO_REPEAT);
TEST_NEW_FAIL ("a(?u)", 0, G_REGEX_ERROR_UNRECOGNIZED_CHARACTER);
- TEST_NEW_FAIL ("a(?<$foo)bar", 0, G_REGEX_ERROR_UNRECOGNIZED_CHARACTER);
+ TEST_NEW_FAIL ("a(?<$foo)bar", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME);
TEST_NEW_FAIL ("a[:alpha:]b", 0, G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS);
TEST_NEW_FAIL ("a(b", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
TEST_NEW_FAIL ("a)b", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
TEST_NEW_FAIL ("a(?R", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
- TEST_NEW_FAIL ("a(?-54", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
+ TEST_NEW_FAIL ("a(?-54", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE);
TEST_NEW_FAIL ("(ab\\2)", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE);
TEST_NEW_FAIL ("a(?#abc", 0, G_REGEX_ERROR_UNTERMINATED_COMMENT);
TEST_NEW_FAIL ("(?<=a+)b", 0, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND);
@@ -2276,51 +2286,31 @@ main (int argc, char *argv[])
TEST_NEW_FAIL ("a[[:fubar:]]b", 0, G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME);
TEST_NEW_FAIL ("[[.ch.]]", 0, G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED);
TEST_NEW_FAIL ("\\x{110000}", 0, G_REGEX_ERROR_HEX_CODE_TOO_LARGE);
- TEST_NEW_FAIL ("^(?(0)f|b)oo", 0, G_REGEX_ERROR_INVALID_CONDITION);
+ TEST_NEW_FAIL ("^(?(0)f|b)oo", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE);
TEST_NEW_FAIL ("(?<=\\C)X", 0, G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND);
- TEST_NEW_FAIL ("(?!\\w)(?R)", 0, G_REGEX_ERROR_INFINITE_LOOP);
- if (pcre_ge (8, 37))
- {
- /* The expected errors changed here. */
- TEST_NEW_FAIL ("(?(?<ab))", 0, G_REGEX_ERROR_ASSERTION_EXPECTED);
- }
- else
- {
- TEST_NEW_FAIL ("(?(?<ab))", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR);
- }
-
- if (pcre_ge (8, 35))
- {
- /* The expected errors changed here. */
- TEST_NEW_FAIL ("(?P<sub>foo)\\g<sub", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR);
- }
- else
- {
- TEST_NEW_FAIL ("(?P<sub>foo)\\g<sub", 0, G_REGEX_ERROR_MISSING_BACK_REFERENCE);
- }
+ TEST_NEW ("(?!\\w)(?R)", 0, 0);
+ TEST_NEW_FAIL ("(?(?<ab))", 0, G_REGEX_ERROR_ASSERTION_EXPECTED);
+ TEST_NEW_FAIL ("(?P<sub>foo)\\g<sub", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR);
TEST_NEW_FAIL ("(?P<x>eks)(?P<x>eccs)", 0, G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME);
-#if 0
- TEST_NEW_FAIL (?, 0, G_REGEX_ERROR_MALFORMED_PROPERTY);
- TEST_NEW_FAIL (?, 0, G_REGEX_ERROR_UNKNOWN_PROPERTY);
-#endif
TEST_NEW_FAIL ("\\666", G_REGEX_RAW, G_REGEX_ERROR_INVALID_OCTAL_VALUE);
TEST_NEW_FAIL ("^(?(DEFINE) abc | xyz ) ", 0, G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE);
TEST_NEW_FAIL ("a", G_REGEX_NEWLINE_CRLF | G_REGEX_NEWLINE_ANYCRLF, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS);
TEST_NEW_FAIL ("^(a)\\g{3", 0, G_REGEX_ERROR_MISSING_BACK_REFERENCE);
- TEST_NEW_FAIL ("^(a)\\g{0}", 0, G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE);
- TEST_NEW_FAIL ("abc(*FAIL:123)xyz", 0, G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN);
+ TEST_NEW_FAIL ("^(a)\\g{0}", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE);
+ TEST_NEW ("abc(*FAIL:123)xyz", 0, 0);
TEST_NEW_FAIL ("a(*FOOBAR)b", 0, G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB);
- TEST_NEW_FAIL ("(?i:A{1,}\\6666666666)", 0, G_REGEX_ERROR_NUMBER_TOO_BIG);
+ if (pcre2_ge (10, 37))
+ {
+ TEST_NEW ("(?i:A{1,}\\6666666666)", 0, 0);
+ }
TEST_NEW_FAIL ("(?<a>)(?&)", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME);
- TEST_NEW_FAIL ("(?+-a)", 0, G_REGEX_ERROR_MISSING_DIGIT);
- TEST_NEW_FAIL ("TA]", G_REGEX_JAVASCRIPT_COMPAT, G_REGEX_ERROR_INVALID_DATA_CHARACTER);
+ TEST_NEW_FAIL ("(?+-a)", 0, G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE);
TEST_NEW_FAIL ("(?|(?<a>A)|(?<b>B))", 0, G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME);
TEST_NEW_FAIL ("a(*MARK)b", 0, G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED);
TEST_NEW_FAIL ("^\\c€", 0, G_REGEX_ERROR_INVALID_CONTROL_CHAR);
TEST_NEW_FAIL ("\\k", 0, G_REGEX_ERROR_MISSING_NAME);
TEST_NEW_FAIL ("a[\\NB]c", 0, G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS);
TEST_NEW_FAIL ("(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEFG)XX", 0, G_REGEX_ERROR_NAME_TOO_LONG);
- TEST_NEW_FAIL ("\\u0100", G_REGEX_RAW | G_REGEX_JAVASCRIPT_COMPAT, G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE);
/* These errors can't really be tested easily:
* G_REGEX_ERROR_EXPRESSION_TOO_LARGE
@@ -2476,7 +2466,15 @@ main (int argc, char *argv[])
TEST_MATCH("a#\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
TEST_MATCH("a#\r\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
TEST_MATCH("a#\rb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
- TEST_MATCH("a#\nb", G_REGEX_EXTENDED, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, FALSE);
+ /* Due to PCRE2 only supporting newline settings passed to pcre2_compile (and
+ * not to pcre2_match also), we have to compile the pattern with the
+ * effective (combined from compile and match options) newline setting.
+ * However, this setting also affects how newlines are interpreted *inside*
+ * the pattern. With G_REGEX_EXTENDED, this changes where the comment
+ * (started with `#`) ends.
+ */
+ /* On PCRE1, this test expected no match; on PCRE2 it matches because of the above. */
+ TEST_MATCH("a#\nb", G_REGEX_EXTENDED, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, TRUE /*FALSE*/);
TEST_MATCH("a#\nb", G_REGEX_EXTENDED | G_REGEX_NEWLINE_CR, 0, "a", -1, 0, 0, TRUE);
TEST_MATCH("line\nbreak", G_REGEX_MULTILINE, 0, "this is a line\nbreak", -1, 0, 0, TRUE);
@@ -2489,21 +2487,19 @@ main (int argc, char *argv[])
* with pcre's internal tables. Bug #678273 */
TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "DŽ", -1, 0, 0, TRUE);
TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "dž", -1, 0, 0, TRUE);
-#if PCRE_MAJOR > 8 || (PCRE_MAJOR == 8 && PCRE_MINOR >= 32)
- /* This would incorrectly fail to match in pcre < 8.32, so only assert
- * this for known-good pcre. */
TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "Dž", -1, 0, 0, TRUE);
-#endif
/* TEST_MATCH_NEXT#(pattern, string, string_len, start_position, ...) */
TEST_MATCH_NEXT0("a", "x", -1, 0);
TEST_MATCH_NEXT0("a", "ax", -1, 1);
TEST_MATCH_NEXT0("a", "xa", 1, 0);
TEST_MATCH_NEXT0("a", "axa", 1, 2);
+ TEST_MATCH_NEXT1("", "", -1, 0, "", 0, 0);
TEST_MATCH_NEXT1("a", "a", -1, 0, "a", 0, 1);
TEST_MATCH_NEXT1("a", "xax", -1, 0, "a", 1, 2);
TEST_MATCH_NEXT1(EURO, ENG EURO, -1, 0, EURO, 2, 5);
TEST_MATCH_NEXT1("a*", "", -1, 0, "", 0, 0);
+ TEST_MATCH_NEXT2("", "a", -1, 0, "", 0, 0, "", 1, 1);
TEST_MATCH_NEXT2("a*", "aa", -1, 0, "aa", 0, 2, "", 2, 2);
TEST_MATCH_NEXT2(EURO "*", EURO EURO, -1, 0, EURO EURO, 0, 6, "", 6, 6);
TEST_MATCH_NEXT2("a", "axa", -1, 0, "a", 0, 1, "a", 2, 3);
@@ -2677,11 +2673,6 @@ main (int argc, char *argv[])
TEST_EXPAND("a", "a", "\\0130", FALSE, "X");
TEST_EXPAND("a", "a", "\\\\\\0", FALSE, "\\a");
TEST_EXPAND("a(?P<G>.)c", "xabcy", "X\\g<G>X", FALSE, "XbX");
-#if !(PCRE_MAJOR > 8 || (PCRE_MAJOR == 8 && PCRE_MINOR >= 34))
- /* PCRE >= 8.34 no longer allows this usage. */
- TEST_EXPAND("(.)(?P<1>.)", "ab", "\\1", FALSE, "a");
- TEST_EXPAND("(.)(?P<1>.)", "ab", "\\g<1>", FALSE, "a");
-#endif
TEST_EXPAND(".", EURO, "\\0", FALSE, EURO);
TEST_EXPAND("(.)", EURO, "\\1", FALSE, EURO);
TEST_EXPAND("(?P<G>.)", EURO, "\\g<G>", FALSE, EURO);
@@ -2800,6 +2791,10 @@ main (int argc, char *argv[])
TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)", "A", 1);
TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)", "B", 2);
TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)", "C", -1);
+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "A", 1);
+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "B", 2);
+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "C", 3);
+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "D", -1);
TEST_GET_STRING_NUMBER("(?P<A>.)(.)(?P<B>a)", "A", 1);
TEST_GET_STRING_NUMBER("(?P<A>.)(.)(?P<B>a)", "B", 3);
TEST_GET_STRING_NUMBER("(?P<A>.)(.)(?P<B>a)", "C", -1);
diff --git a/meson.build b/meson.build
index 0edbc2bf0..00d38cbec 100644
--- a/meson.build
+++ b/meson.build
@@ -2017,37 +2017,38 @@ else
libiconv = dependency('iconv')
endif
-pcre = dependency('libpcre', version: '>= 8.31', required : false) # Should check for Unicode support, too. FIXME
-if not pcre.found()
+pcre2 = dependency('libpcre2-8', version: '>= 10.32', required : false)
+if not pcre2.found()
if cc.get_id() == 'msvc' or cc.get_id() == 'clang-cl'
- # MSVC: Search for the PCRE library by the configuration, which corresponds
- # to the output of CMake builds of PCRE. Note that debugoptimized
+ # MSVC: Search for the PCRE2 library by the configuration, which corresponds
+ # to the output of CMake builds of PCRE2. Note that debugoptimized
# is really a Release build with .PDB files.
if vs_crt == 'debug'
- pcre = cc.find_library('pcred', required : false)
+ pcre2 = cc.find_library('pcre2d-8', required : false)
else
- pcre = cc.find_library('pcre', required : false)
+ pcre2 = cc.find_library('pcre2-8', required : false)
endif
endif
endif
# Try again with the fallback
-if not pcre.found()
- pcre = dependency('libpcre', required : true, fallback : ['pcre', 'pcre_dep'])
- use_pcre_static_flag = true
+if not pcre2.found()
+ pcre2 = dependency('libpcre2-8', required : true, fallback : ['pcre2', 'libpcre2_8'])
+ use_pcre2_static_flag = true
elif host_system == 'windows'
- pcre_static = cc.links('''#define PCRE_STATIC
- #include <pcre.h>
- int main() {
- void *p = NULL;
- pcre_free(p);
- return 0;
- }''',
- dependencies: pcre,
- name : 'Windows system PCRE is a static build')
- use_pcre_static_flag = pcre_static
+ pcre2_static = cc.links('''#define PCRE2_STATIC
+ #define PCRE2_CODE_UNIT_WIDTH 8
+ #include <pcre2.h>
+ int main() {
+ void *p = NULL;
+ pcre2_code_free(p);
+ return 0;
+ }''',
+ dependencies: pcre2,
+ name : 'Windows system PCRE2 is a static build')
+ use_pcre2_static_flag = pcre2_static
else
- use_pcre_static_flag = false
+ use_pcre2_static_flag = false
endif
# Import the gvdb sources as a subproject to avoid having the copylib in-tree
diff --git a/po/sk.po b/po/sk.po
index 8d6a1ced7..747ad2715 100644
--- a/po/sk.po
+++ b/po/sk.po
@@ -5630,7 +5630,7 @@ msgstr "zlý ofset"
msgid "short utf8"
msgstr "krátke utf8"
-# Ide o omyl programátora: case PCRE_ERROR_RECURSELOOP: return _("recursion loop");
+# Ide o omyl programátora: case PCRE2_ERROR_RECURSELOOP: return _("recursion loop");
#: glib/gregex.c:303
msgid "recursion loop"
msgstr "rekurzívna slučka"
diff --git a/subprojects/pcre.wrap b/subprojects/pcre.wrap
deleted file mode 100644
index 22f524303..000000000
--- a/subprojects/pcre.wrap
+++ /dev/null
@@ -1,11 +0,0 @@
-[wrap-file]
-directory = pcre-8.45
-source_url = https://sourceforge.net/projects/pcre/files/pcre/8.45/pcre-8.45.tar.bz2
-source_filename = pcre-8.45.tar.bz2
-source_hash = 4dae6fdcd2bb0bb6c37b5f97c33c2be954da743985369cddac3546e3218bffb8
-patch_filename = pcre_8.45-1_patch.zip
-patch_url = https://wrapdb.mesonbuild.com/v2/pcre_8.45-1/get_patch
-patch_hash = 821f9b0e5578c5b3983434465de93e30ddfceb92e331a0c9110aba6bf0634ffa
-
-[provide]
-libpcre = pcre_dep
diff --git a/subprojects/pcre2.wrap b/subprojects/pcre2.wrap
index d210eb65d..6196f1d0c 100644
--- a/subprojects/pcre2.wrap
+++ b/subprojects/pcre2.wrap
@@ -3,14 +3,13 @@ directory = pcre2-10.40
source_url = https://github.com/PhilipHazel/pcre2/releases/download/pcre2-10.40/pcre2-10.40.tar.bz2
source_filename = pcre2-10.40.tar.bz2
source_hash = 14e4b83c4783933dc17e964318e6324f7cae1bc75d8f3c79bc6969f00c159d68
-patch_filename = pcre2_10.40-2_patch.zip
-patch_url = https://wrapdb.mesonbuild.com/v2/pcre2_10.40-2/get_patch
-patch_hash = 6c1bbb3c00ec3917b3abee922dedc19ad8b3a199db65ba893a61241e3428bdbd
-wrapdb_version = 10.40-2
+patch_filename = pcre2_10.40-3_patch.zip
+patch_url = https://wrapdb.mesonbuild.com/v2/pcre2_10.40-3/get_patch
+patch_hash = 95391923529b4c1647a2cf88cd3b59cceb4f92393775e011f530e7865de0c7fb
+wrapdb_version = 10.40-3
[provide]
libpcre2-8 = -libpcre2_8
libpcre2-16 = -libpcre2_16
libpcre2-32 = -libpcre2_32
libpcre2-posix = -libpcre2_posix
-