summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Hergert <chergert@redhat.com>2022-09-05 16:10:34 -0700
committerChristian Hergert <chergert@redhat.com>2022-09-05 16:11:49 -0700
commitfd0128f0fd957ce64baeb95d071b39f2f3cc679d (patch)
treeb64ea1822b94875ed18ee06440b164da138c8ee7
parent0dae82fb1e21ce3dd2cf911d5b5e2318688754f8 (diff)
downloadgtksourceview-wip/chergert/backport-implregex-to-gsv4.tar.gz
regex: backport from GRegex to ImplRegexwip/chergert/backport-implregex-to-gsv4
This uses the same regex abstraction from GSV 5.x to use PCRE2 directly rather than indirectly through GRegex.
-rw-r--r--gtksourceview/gtksourceregex.c88
-rw-r--r--gtksourceview/gtksourceregex.h70
-rw-r--r--gtksourceview/gtksourcesearchcontext.c116
-rw-r--r--gtksourceview/implregex-private.h89
-rw-r--r--gtksourceview/implregex.c1141
-rw-r--r--gtksourceview/meson.build2
-rw-r--r--meson.build2
-rw-r--r--subprojects/pcre2.wrap10
8 files changed, 1375 insertions, 143 deletions
diff --git a/gtksourceview/gtksourceregex.c b/gtksourceview/gtksourceregex.c
index 77570837..ce7f9449 100644
--- a/gtksourceview/gtksourceregex.c
+++ b/gtksourceview/gtksourceregex.c
@@ -29,21 +29,23 @@
#include <glib/gi18n-lib.h>
#include "gtksourceutils-private.h"
+#include "implregex-private.h"
+
/*
* GRegex wrapper which adds a few features needed for syntax highlighting,
* in particular resolving "\%{...@start}" and forbidding the use of \C.
*/
/* Regex used to match "\%{...@start}". */
-static GRegex *
+static ImplRegex *
get_start_ref_regex (void)
{
- static GRegex *start_ref_regex = NULL;
+ static ImplRegex *start_ref_regex = NULL;
if (start_ref_regex == NULL)
{
- start_ref_regex = g_regex_new ("(?<!\\\\)(\\\\\\\\)*\\\\%\\{(.*?)@start\\}",
- G_REGEX_OPTIMIZE, 0, NULL);
+ start_ref_regex = impl_regex_new ("(?<!\\\\)(\\\\\\\\)*\\\\%\\{(.*?)@start\\}",
+ G_REGEX_OPTIMIZE, 0, NULL);
}
return start_ref_regex;
@@ -57,8 +59,8 @@ struct _GtkSourceRegex
GRegexCompileFlags flags;
} info;
struct {
- GRegex *regex;
- GMatchInfo *match;
+ ImplRegex *regex;
+ ImplMatchInfo *match;
} regex;
} u;
@@ -105,16 +107,16 @@ find_single_byte_escape (const gchar *string)
* gtk_source_regex_new:
* @pattern: the regular expression.
* @flags: compile options for @pattern.
- * @error: location to store the error occuring, or %NULL to ignore errors.
+ * @error: location to store the error occurring, or %NULL to ignore errors.
*
* Creates a new regex.
*
* Returns: a newly-allocated #GtkSourceRegex.
*/
GtkSourceRegex *
-_gtk_source_regex_new (const gchar *pattern,
- GRegexCompileFlags flags,
- GError **error)
+_gtk_source_regex_new (const gchar *pattern,
+ GRegexCompileFlags flags,
+ GError **error)
{
GtkSourceRegex *regex;
@@ -132,7 +134,7 @@ _gtk_source_regex_new (const gchar *pattern,
regex = g_slice_new0 (GtkSourceRegex);
regex->ref_count = 1;
- if (g_regex_match (get_start_ref_regex (), pattern, 0, NULL))
+ if (impl_regex_match (get_start_ref_regex (), pattern, 0, NULL))
{
regex->resolved = FALSE;
regex->u.info.pattern = g_strdup (pattern);
@@ -141,9 +143,9 @@ _gtk_source_regex_new (const gchar *pattern,
else
{
regex->resolved = TRUE;
- regex->u.regex.regex = g_regex_new (pattern,
- flags | G_REGEX_OPTIMIZE | G_REGEX_NEWLINE_LF, 0,
- error);
+ regex->u.regex.regex = impl_regex_new (pattern,
+ flags | G_REGEX_OPTIMIZE | G_REGEX_NEWLINE_LF, 0,
+ error);
if (regex->u.regex.regex == NULL)
{
@@ -170,9 +172,9 @@ _gtk_source_regex_unref (GtkSourceRegex *regex)
{
if (regex->resolved)
{
- g_regex_unref (regex->u.regex.regex);
+ impl_regex_unref (regex->u.regex.regex);
if (regex->u.regex.match)
- g_match_info_free (regex->u.regex.match);
+ impl_match_info_free (regex->u.regex.match);
}
else
{
@@ -188,27 +190,25 @@ struct RegexResolveData {
};
static gboolean
-replace_start_regex (const GMatchInfo *match_info,
- GString *expanded_regex,
- gpointer user_data)
+replace_start_regex (const ImplMatchInfo *match_info,
+ GString *expanded_regex,
+ gpointer user_data)
{
gchar *num_string, *subst, *subst_escaped, *escapes;
gint num;
struct RegexResolveData *data = user_data;
- escapes = g_match_info_fetch (match_info, 1);
- num_string = g_match_info_fetch (match_info, 2);
+ escapes = impl_match_info_fetch (match_info, 1);
+ num_string = impl_match_info_fetch (match_info, 2);
num = _gtk_source_utils_string_to_int (num_string);
if (num < 0)
{
- subst = g_match_info_fetch_named (data->start_regex->u.regex.match,
- num_string);
+ subst = impl_match_info_fetch_named (data->start_regex->u.regex.match, num_string);
}
else
{
- subst = g_match_info_fetch (data->start_regex->u.regex.match,
- num);
+ subst = impl_match_info_fetch (data->start_regex->u.regex.match, num);
}
if (subst != NULL)
@@ -263,18 +263,18 @@ _gtk_source_regex_resolve (GtkSourceRegex *regex,
data.start_regex = start_regex;
data.matched_text = matched_text;
- expanded_regex = g_regex_replace_eval (get_start_ref_regex (),
- regex->u.info.pattern,
- -1, 0, 0,
- replace_start_regex,
- &data, NULL);
+ expanded_regex = impl_regex_replace_eval (get_start_ref_regex (),
+ regex->u.info.pattern,
+ -1, 0, 0,
+ replace_start_regex,
+ &data, NULL);
new_regex = _gtk_source_regex_new (expanded_regex, regex->u.info.flags, NULL);
if (new_regex == NULL || !new_regex->resolved)
{
_gtk_source_regex_unref (new_regex);
g_warning ("Regular expression %s cannot be expanded.",
regex->u.info.pattern);
- /* Returns a regex that nevers matches. */
+ /* Returns a regex that never matches. */
new_regex = _gtk_source_regex_new ("$never-match^", 0, NULL);
}
@@ -301,14 +301,14 @@ _gtk_source_regex_match (GtkSourceRegex *regex,
if (regex->u.regex.match)
{
- g_match_info_free (regex->u.regex.match);
+ impl_match_info_free (regex->u.regex.match);
regex->u.regex.match = NULL;
}
- result = g_regex_match_full (regex->u.regex.regex, line,
- byte_length, byte_pos,
- 0, &regex->u.regex.match,
- NULL);
+ result = impl_regex_match_full (regex->u.regex.regex, line,
+ byte_length, byte_pos,
+ 0, &regex->u.regex.match,
+ NULL);
return result;
}
@@ -319,7 +319,7 @@ _gtk_source_regex_fetch (GtkSourceRegex *regex,
{
g_assert (regex->resolved);
- return g_match_info_fetch (regex->u.regex.match, num);
+ return impl_match_info_fetch (regex->u.regex.match, num);
}
void
@@ -333,8 +333,8 @@ _gtk_source_regex_fetch_pos (GtkSourceRegex *regex,
g_assert (regex->resolved);
- /* g_match_info_fetch_pos() can return TRUE with start_pos/end_pos set to -1 */
- if (!g_match_info_fetch_pos (regex->u.regex.match, num, &byte_start_pos, &byte_end_pos))
+ /* impl_match_info_fetch_pos() can return TRUE with start_pos/end_pos set to -1 */
+ if (!impl_match_info_fetch_pos (regex->u.regex.match, num, &byte_start_pos, &byte_end_pos))
{
if (start_pos != NULL)
*start_pos = -1;
@@ -356,12 +356,12 @@ _gtk_source_regex_fetch_pos_bytes (GtkSourceRegex *regex,
gint *start_pos_p, /* byte offsets */
gint *end_pos_p) /* byte offsets */
{
- gint start_pos;
- gint end_pos;
+ gint start_pos = -1;
+ gint end_pos = -1;
g_assert (regex->resolved);
- if (!g_match_info_fetch_pos (regex->u.regex.match, num, &start_pos, &end_pos))
+ if (!impl_match_info_fetch_pos (regex->u.regex.match, num, &start_pos, &end_pos))
{
start_pos = -1;
end_pos = -1;
@@ -384,7 +384,7 @@ _gtk_source_regex_fetch_named_pos (GtkSourceRegex *regex,
g_assert (regex->resolved);
- if (!g_match_info_fetch_named_pos (regex->u.regex.match, name, &byte_start_pos, &byte_end_pos))
+ if (!impl_match_info_fetch_named_pos (regex->u.regex.match, name, &byte_start_pos, &byte_end_pos))
{
if (start_pos != NULL)
*start_pos = -1;
@@ -405,6 +405,6 @@ _gtk_source_regex_get_pattern (GtkSourceRegex *regex)
{
g_assert (regex->resolved);
- return g_regex_get_pattern (regex->u.regex.regex);
+ return impl_regex_get_pattern (regex->u.regex.regex);
}
diff --git a/gtksourceview/gtksourceregex.h b/gtksourceview/gtksourceregex.h
index edf9d6b7..b70793f6 100644
--- a/gtksourceview/gtksourceregex.h
+++ b/gtksourceview/gtksourceregex.h
@@ -28,56 +28,46 @@
G_BEGIN_DECLS
GTK_SOURCE_INTERNAL
-GtkSourceRegex *_gtk_source_regex_new (const gchar *pattern,
- GRegexCompileFlags flags,
- GError **error);
-
+GtkSourceRegex *_gtk_source_regex_new (const gchar *pattern,
+ GRegexCompileFlags flags,
+ GError **error);
GTK_SOURCE_INTERNAL
-GtkSourceRegex *_gtk_source_regex_ref (GtkSourceRegex *regex);
-
+GtkSourceRegex *_gtk_source_regex_ref (GtkSourceRegex *regex);
GTK_SOURCE_INTERNAL
-void _gtk_source_regex_unref (GtkSourceRegex *regex);
-
+void _gtk_source_regex_unref (GtkSourceRegex *regex);
GTK_SOURCE_INTERNAL
-GtkSourceRegex *_gtk_source_regex_resolve (GtkSourceRegex *regex,
- GtkSourceRegex *start_regex,
- const gchar *matched_text);
-
+GtkSourceRegex *_gtk_source_regex_resolve (GtkSourceRegex *regex,
+ GtkSourceRegex *start_regex,
+ const gchar *matched_text);
GTK_SOURCE_INTERNAL
-gboolean _gtk_source_regex_is_resolved (GtkSourceRegex *regex);
-
+gboolean _gtk_source_regex_is_resolved (GtkSourceRegex *regex);
GTK_SOURCE_INTERNAL
-gboolean _gtk_source_regex_match (GtkSourceRegex *regex,
- const gchar *line,
- gint byte_length,
- gint byte_pos);
-
+gboolean _gtk_source_regex_match (GtkSourceRegex *regex,
+ const gchar *line,
+ gint byte_length,
+ gint byte_pos);
GTK_SOURCE_INTERNAL
-gchar *_gtk_source_regex_fetch (GtkSourceRegex *regex,
- gint num);
-
+gchar *_gtk_source_regex_fetch (GtkSourceRegex *regex,
+ gint num);
GTK_SOURCE_INTERNAL
-void _gtk_source_regex_fetch_pos (GtkSourceRegex *regex,
- const gchar *text,
- gint num,
- gint *start_pos, /* character offsets */
- gint *end_pos); /* character offsets */
-
+void _gtk_source_regex_fetch_pos (GtkSourceRegex *regex,
+ const gchar *text,
+ gint num,
+ gint *start_pos,
+ gint *end_pos);
GTK_SOURCE_INTERNAL
-void _gtk_source_regex_fetch_pos_bytes (GtkSourceRegex *regex,
- gint num,
- gint *start_pos_p, /* byte offsets */
- gint *end_pos_p); /* byte offsets */
-
+void _gtk_source_regex_fetch_pos_bytes (GtkSourceRegex *regex,
+ gint num,
+ gint *start_pos_p,
+ gint *end_pos_p);
GTK_SOURCE_INTERNAL
-void _gtk_source_regex_fetch_named_pos (GtkSourceRegex *regex,
- const gchar *text,
- const gchar *name,
- gint *start_pos, /* character offsets */
- gint *end_pos); /* character offsets */
-
+void _gtk_source_regex_fetch_named_pos (GtkSourceRegex *regex,
+ const gchar *text,
+ const gchar *name,
+ gint *start_pos,
+ gint *end_pos);
GTK_SOURCE_INTERNAL
-const gchar *_gtk_source_regex_get_pattern (GtkSourceRegex *regex);
+const gchar *_gtk_source_regex_get_pattern (GtkSourceRegex *regex);
G_END_DECLS
diff --git a/gtksourceview/gtksourcesearchcontext.c b/gtksourceview/gtksourcesearchcontext.c
index 90811670..69a9c514 100644
--- a/gtksourceview/gtksourcesearchcontext.c
+++ b/gtksourceview/gtksourcesearchcontext.c
@@ -35,6 +35,8 @@
#include "gtksourceiter.h"
#include "gtksource-enumtypes.h"
+#include "implregex-private.h"
+
/**
* SECTION:searchcontext
* @Short_description: Search context
@@ -343,7 +345,7 @@ struct _GtkSourceSearchContextPrivate
*/
gint text_nb_lines;
- GRegex *regex;
+ ImplRegex *regex;
GError *regex_error;
gint occurrences_count;
@@ -569,7 +571,7 @@ regex_search_get_real_start (GtkSourceSearchContext *search,
GtkTextIter *real_start,
gint *start_pos)
{
- gint max_lookbehind = g_regex_get_max_lookbehind (search->priv->regex);
+ gint max_lookbehind = impl_regex_get_max_lookbehind (search->priv->regex);
gint i;
gchar *text;
@@ -614,35 +616,35 @@ regex_search_get_match_options (const GtkTextIter *real_start,
}
/* Get the @match_start and @match_end iters of the @match_info.
- * g_match_info_fetch_pos() returns byte positions. To get the iters, we need to
- * know the number of UTF-8 characters. A GMatchInfo can contain several matches
- * (with g_match_info_next()). So instead of calling g_utf8_strlen() each time
+ * impl_match_info_fetch_pos() returns byte positions. To get the iters, we need to
+ * know the number of UTF-8 characters. A ImplMatchInfo can contain several matches
+ * (with impl_match_info_next()). So instead of calling g_utf8_strlen() each time
* at the beginning of @subject, @iter and @iter_byte_pos are used to remember
* where g_utf8_strlen() stopped.
*/
static gboolean
-regex_search_fetch_match (GMatchInfo *match_info,
- const gchar *subject,
- gssize subject_length,
- GtkTextIter *iter,
- gint *iter_byte_pos,
- GtkTextIter *match_start,
- GtkTextIter *match_end)
-{
- gint start_byte_pos;
- gint end_byte_pos;
+regex_search_fetch_match (ImplMatchInfo *match_info,
+ const gchar *subject,
+ gssize subject_length,
+ GtkTextIter *iter,
+ gint *iter_byte_pos,
+ GtkTextIter *match_start,
+ GtkTextIter *match_end)
+{
+ gint start_byte_pos = 0;
+ gint end_byte_pos = 0;
gint nb_chars;
g_assert (*iter_byte_pos <= subject_length);
g_assert (match_start != NULL);
g_assert (match_end != NULL);
- if (!g_match_info_matches (match_info))
+ if (!impl_match_info_matches (match_info))
{
return FALSE;
}
- if (!g_match_info_fetch_pos (match_info, 0, &start_byte_pos, &end_byte_pos))
+ if (!impl_match_info_fetch_pos (match_info, 0, &start_byte_pos, &end_byte_pos))
{
g_warning ("Impossible to fetch regex match position.");
return FALSE;
@@ -715,7 +717,7 @@ basic_forward_regex_search (GtkSourceSearchContext *search,
GRegexMatchFlags match_options;
gchar *subject;
gssize subject_length;
- GMatchInfo *match_info;
+ ImplMatchInfo *match_info;
GtkTextIter iter;
gint iter_byte_pos;
GtkTextIter m_start;
@@ -725,13 +727,13 @@ basic_forward_regex_search (GtkSourceSearchContext *search,
subject = gtk_text_iter_get_visible_text (&real_start, &end);
subject_length = strlen (subject);
- g_regex_match_full (search->priv->regex,
- subject,
- subject_length,
- start_pos,
- match_options,
- &match_info,
- &search->priv->regex_error);
+ impl_regex_match_full (search->priv->regex,
+ subject,
+ subject_length,
+ start_pos,
+ match_options,
+ &match_info,
+ &search->priv->regex_error);
iter = real_start;
iter_byte_pos = 0;
@@ -744,13 +746,13 @@ basic_forward_regex_search (GtkSourceSearchContext *search,
&m_start,
&m_end);
- if (!found && g_match_info_is_partial_match (match_info))
+ if (!found && impl_match_info_is_partial_match (match_info))
{
gtk_text_iter_forward_lines (&end, nb_lines);
nb_lines <<= 1;
g_free (subject);
- g_match_info_free (match_info);
+ impl_match_info_free (match_info);
continue;
}
@@ -789,7 +791,7 @@ basic_forward_regex_search (GtkSourceSearchContext *search,
}
g_free (subject);
- g_match_info_free (match_info);
+ impl_match_info_free (match_info);
break;
}
@@ -1824,7 +1826,7 @@ regex_search_scan_segment (GtkSourceSearchContext *search,
gchar *subject;
gssize subject_length;
GRegexMatchFlags match_options;
- GMatchInfo *match_info;
+ ImplMatchInfo *match_info;
GtkTextIter iter;
gint iter_byte_pos;
gboolean segment_finished;
@@ -1887,13 +1889,13 @@ regex_search_scan_segment (GtkSourceSearchContext *search,
g_free (subject_escaped);
});
- g_regex_match_full (search->priv->regex,
- subject,
- subject_length,
- start_pos,
- match_options,
- &match_info,
- &search->priv->regex_error);
+ impl_regex_match_full (search->priv->regex,
+ subject,
+ subject_length,
+ start_pos,
+ match_options,
+ &match_info,
+ &search->priv->regex_error);
iter = real_start;
iter_byte_pos = 0;
@@ -1921,7 +1923,7 @@ regex_search_scan_segment (GtkSourceSearchContext *search,
search->priv->occurrences_count++;
- g_match_info_next (match_info, &search->priv->regex_error);
+ impl_match_info_next (match_info, &search->priv->regex_error);
}
if (search->priv->regex_error != NULL)
@@ -1929,7 +1931,7 @@ regex_search_scan_segment (GtkSourceSearchContext *search,
g_object_notify (G_OBJECT (search), "regex-error");
}
- if (g_match_info_is_partial_match (match_info))
+ if (impl_match_info_is_partial_match (match_info))
{
segment_finished = FALSE;
@@ -1953,15 +1955,15 @@ regex_search_scan_segment (GtkSourceSearchContext *search,
}
g_free (subject);
- g_match_info_free (match_info);
+ impl_match_info_free (match_info);
return segment_finished;
}
static void
regex_search_scan_chunk (GtkSourceSearchContext *search,
- const GtkTextIter *chunk_start,
- const GtkTextIter *chunk_end)
+ const GtkTextIter *chunk_start,
+ const GtkTextIter *chunk_end)
{
GtkTextIter segment_start = *chunk_start;
@@ -2318,7 +2320,7 @@ update_regex (GtkSourceSearchContext *search)
if (search->priv->regex != NULL)
{
- g_regex_unref (search->priv->regex);
+ impl_regex_unref (search->priv->regex);
search->priv->regex = NULL;
}
@@ -2331,7 +2333,7 @@ update_regex (GtkSourceSearchContext *search)
if (search_text != NULL &&
gtk_source_search_settings_get_regex_enabled (search->priv->settings))
{
- GRegexCompileFlags compile_flags = G_REGEX_OPTIMIZE | G_REGEX_MULTILINE;
+ GRegexCompileFlags compile_flags = G_REGEX_MULTILINE;
gchar *pattern = (gchar *)search_text;
search->priv->text_nb_lines = 0;
@@ -2346,10 +2348,10 @@ update_regex (GtkSourceSearchContext *search)
pattern = g_strdup_printf ("\\b%s\\b", search_text);
}
- search->priv->regex = g_regex_new (pattern,
- compile_flags,
- G_REGEX_MATCH_NOTEMPTY,
- &search->priv->regex_error);
+ search->priv->regex = impl_regex_new (pattern,
+ compile_flags,
+ G_REGEX_MATCH_NOTEMPTY,
+ &search->priv->regex_error);
if (search->priv->regex_error != NULL)
{
@@ -2675,11 +2677,7 @@ gtk_source_search_context_finalize (GObject *object)
{
GtkSourceSearchContext *search = GTK_SOURCE_SEARCH_CONTEXT (object);
- if (search->priv->regex != NULL)
- {
- g_regex_unref (search->priv->regex);
- }
-
+ g_clear_pointer (&search->priv->regex, impl_regex_unref);
g_clear_error (&search->priv->regex_error);
G_OBJECT_CLASS (gtk_source_search_context_parent_class)->finalize (object);
@@ -3603,13 +3601,13 @@ regex_replace (GtkSourceSearchContext *search,
match_options = regex_search_get_match_options (&real_start, &real_end);
match_options |= G_REGEX_MATCH_ANCHORED;
- subject_replaced = g_regex_replace (search->priv->regex,
- subject,
- -1,
- start_pos,
- replace,
- match_options,
- &tmp_error);
+ subject_replaced = impl_regex_replace (search->priv->regex,
+ subject,
+ -1,
+ start_pos,
+ replace,
+ match_options,
+ &tmp_error);
if (tmp_error != NULL)
{
diff --git a/gtksourceview/implregex-private.h b/gtksourceview/implregex-private.h
new file mode 100644
index 00000000..b0809414
--- /dev/null
+++ b/gtksourceview/implregex-private.h
@@ -0,0 +1,89 @@
+/*
+ * This file is part of GtkSourceView
+ *
+ * Copyright 2020 Christian Hergert <chergert@redhat.com>
+ *
+ * GtkSourceView is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GtkSourceView is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#pragma once
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef struct _ImplRegex ImplRegex;
+typedef struct _ImplMatchInfo ImplMatchInfo;
+
+typedef gboolean (*ImplRegexEvalCallback) (const ImplMatchInfo *match_info,
+ GString *result,
+ gpointer user_data);
+
+
+ImplRegex *impl_regex_new (const char *pattern,
+ GRegexCompileFlags compile_options,
+ GRegexMatchFlags match_options,
+ GError **error);
+gboolean impl_regex_match (const ImplRegex *regex,
+ const char *string,
+ GRegexMatchFlags match_options,
+ ImplMatchInfo **match_info);
+ImplRegex *impl_regex_ref (ImplRegex *regex);
+void impl_regex_unref (ImplRegex *regex);
+void impl_match_info_free (ImplMatchInfo *match_info);
+char *impl_match_info_fetch (const ImplMatchInfo *match_info,
+ int match_num);
+char *impl_match_info_fetch_named (const ImplMatchInfo *match_info,
+ const char *name);
+char *impl_regex_replace_eval (const ImplRegex *regex,
+ const char *string,
+ gssize string_len,
+ gsize start_position,
+ GRegexMatchFlags match_options,
+ ImplRegexEvalCallback eval,
+ gpointer user_data,
+ GError **error);
+char *impl_regex_replace (const ImplRegex *regex,
+ const char *string,
+ gssize string_len,
+ int start_position,
+ const char *replacement,
+ GRegexMatchFlags match_options,
+ GError **error);
+gboolean impl_regex_match_full (const ImplRegex *regex,
+ const char *string,
+ gssize string_len,
+ gsize start_position,
+ GRegexMatchFlags match_options,
+ ImplMatchInfo **match_info,
+ GError **error);
+gboolean impl_match_info_fetch_pos (const ImplMatchInfo *match_info,
+ int match_num,
+ int *start_pos,
+ int *end_pos);
+gboolean impl_match_info_fetch_named_pos (const ImplMatchInfo *match_info,
+ const char *name,
+ int *start_pos,
+ int *end_pos);
+gboolean impl_match_info_is_partial_match (const ImplMatchInfo *match_info);
+gboolean impl_match_info_matches (const ImplMatchInfo *match_info);
+gboolean impl_match_info_next (ImplMatchInfo *match_info,
+ GError **error);
+int impl_match_info_get_match_count (const ImplMatchInfo *match_info);
+const char *impl_regex_get_pattern (const ImplRegex *regex);
+int impl_regex_get_max_lookbehind (const ImplRegex *regex);
+
+G_END_DECLS
diff --git a/gtksourceview/implregex.c b/gtksourceview/implregex.c
new file mode 100644
index 00000000..e524ac71
--- /dev/null
+++ b/gtksourceview/implregex.c
@@ -0,0 +1,1141 @@
+/*
+ * This file is part of GtkSourceView
+ *
+ * Copyright 1999, 2000 Scott Wimer
+ * Copyright 2004, Matthias Clasen <mclasen@redhat.com>
+ * Copyright 2005 - 2007, Marco Barisione <marco@barisione.org>
+ * Copyright 2020 Christian Hergert <chergert@redhat.com>
+ *
+ * GtkSourceView is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GtkSourceView is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+/* Some code in this file is based upon GRegex from GLib */
+/* GRegex -- regular expression API wrapper around PCRE.
+ *
+ * Copyright (C) 1999, 2000 Scott Wimer
+ * Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com>
+ * Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+
+#include <glib/gi18n.h>
+#include <string.h>
+
+#include "implregex-private.h"
+
+struct _ImplRegex
+{
+ int ref_count;
+ char *pattern;
+ gsize compile_flags;
+ gsize match_flags;
+ pcre2_compile_context *context;
+ pcre2_code *code;
+ guint has_jit : 1;
+};
+
+struct _ImplMatchInfo
+{
+ gsize compile_flags;
+ gsize match_flags;
+ ImplRegex *regex;
+ const char *string;
+ gsize string_len;
+ pcre2_match_data *match_data;
+ PCRE2_SIZE *offsets;
+ int matches;
+ uint32_t n_subpatterns;
+ gssize pos;
+};
+
+/* if the string is in UTF-8 use g_utf8_ functions, else use use just +/- 1. */
+#define NEXT_CHAR(re, s) ((!((re)->compile_flags & PCRE2_UTF)) ? ((s) + 1) : g_utf8_next_char (s))
+
+#define TAKE(f,gbit,pbit) \
+ G_STMT_START { \
+ if (f & gbit) \
+ { \
+ ret |= pbit; \
+ f &= ~gbit; \
+ } \
+ } G_STMT_END
+
+static gsize
+translate_compile_flags (GRegexCompileFlags flags)
+{
+ gsize ret = PCRE2_UCP;
+
+ if ((flags & G_REGEX_RAW) == 0)
+ {
+ ret |= (PCRE2_UTF | PCRE2_NO_UTF_CHECK);
+ flags &= ~G_REGEX_RAW;
+ }
+
+ if (~flags & G_REGEX_BSR_ANYCRLF)
+ {
+ ret |= PCRE2_BSR_UNICODE;
+ flags &= ~G_REGEX_BSR_ANYCRLF;
+ }
+
+ TAKE (flags, G_REGEX_ANCHORED, PCRE2_ANCHORED);
+ TAKE (flags, G_REGEX_CASELESS, PCRE2_CASELESS);
+ TAKE (flags, G_REGEX_EXTENDED, PCRE2_EXTENDED);
+ TAKE (flags, G_REGEX_DUPNAMES, PCRE2_DUPNAMES);
+ TAKE (flags, G_REGEX_MULTILINE, PCRE2_MULTILINE);
+ TAKE (flags, G_REGEX_NEWLINE_ANYCRLF, PCRE2_NEWLINE_ANYCRLF);
+ TAKE (flags, G_REGEX_NEWLINE_CR, PCRE2_NEWLINE_CR);
+ TAKE (flags, G_REGEX_NEWLINE_LF, PCRE2_NEWLINE_LF);
+
+ flags &= ~G_REGEX_OPTIMIZE;
+
+ g_assert (flags == 0);
+
+ return ret;
+}
+
+static gsize
+translate_match_flags (GRegexMatchFlags flags)
+{
+ gsize ret = 0;
+
+ TAKE (flags, G_REGEX_MATCH_ANCHORED, PCRE2_ANCHORED);
+ TAKE (flags, G_REGEX_MATCH_NOTBOL, PCRE2_NOTBOL);
+ TAKE (flags, G_REGEX_MATCH_NOTEOL, PCRE2_NOTEOL);
+ TAKE (flags, G_REGEX_MATCH_PARTIAL_SOFT, PCRE2_PARTIAL_SOFT);
+ TAKE (flags, G_REGEX_MATCH_PARTIAL_HARD, PCRE2_PARTIAL_HARD);
+ TAKE (flags, G_REGEX_MATCH_NOTEMPTY, PCRE2_NOTEMPTY);
+
+ g_assert (flags == 0);
+
+ return ret;
+}
+
+static gboolean
+set_regex_error (GError **error,
+ int rc)
+{
+ if (rc < PCRE2_ERROR_NOMATCH && rc != PCRE2_ERROR_PARTIAL)
+ {
+ if (error != NULL)
+ {
+ guchar errstr[128];
+
+ pcre2_get_error_message (rc, errstr, sizeof errstr - 1);
+ errstr[sizeof errstr - 1] = 0;
+
+ g_set_error_literal (error,
+ G_REGEX_ERROR,
+ G_REGEX_ERROR_MATCH,
+ (const gchar *)errstr);
+ }
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+ImplRegex *
+impl_regex_new (const char *pattern,
+ GRegexCompileFlags compile_options,
+ GRegexMatchFlags match_options,
+ GError **error)
+{
+ pcre2_compile_context *context;
+ ImplRegex *regex;
+ PCRE2_SIZE erroffset;
+ int errnumber = -1;
+
+ g_return_val_if_fail (pattern != NULL, NULL);
+
+ context = pcre2_compile_context_create (NULL);
+
+ regex = g_slice_new0 (ImplRegex);
+ regex->ref_count = 1;
+ regex->context = context;
+ regex->pattern = g_strdup (pattern);
+ regex->compile_flags = translate_compile_flags (compile_options);
+ regex->match_flags = translate_match_flags (match_options);
+
+ if (compile_options & G_REGEX_NEWLINE_LF)
+ pcre2_set_newline (context, PCRE2_NEWLINE_LF);
+ else if (compile_options & G_REGEX_NEWLINE_CR)
+ pcre2_set_newline (context, PCRE2_NEWLINE_CR);
+ else if (compile_options & G_REGEX_NEWLINE_CRLF)
+ pcre2_set_newline (context, PCRE2_NEWLINE_CRLF);
+ else if (compile_options & G_REGEX_NEWLINE_ANYCRLF)
+ pcre2_set_newline (context, PCRE2_NEWLINE_ANYCRLF);
+ else
+ pcre2_set_newline (context, PCRE2_NEWLINE_ANY);
+
+ regex->code = pcre2_compile ((PCRE2_SPTR)pattern,
+ PCRE2_ZERO_TERMINATED,
+ regex->compile_flags,
+ &errnumber,
+ &erroffset,
+ context);
+
+ if (regex->code == NULL)
+ {
+ char errmsg[128];
+
+ pcre2_get_error_message (errnumber, (guchar *)errmsg, sizeof errmsg-1);
+
+ g_set_error (error,
+ G_REGEX_ERROR,
+ G_REGEX_ERROR_COMPILE,
+ "%s: offset %d of pattern %s",
+ errmsg,
+ (int)erroffset,
+ pattern);
+ impl_regex_unref (regex);
+ return NULL;
+ }
+
+ /* Now try to JIT the pattern for faster execution time */
+ if (compile_options & G_REGEX_OPTIMIZE)
+ {
+ regex->has_jit = pcre2_jit_compile (regex->code, PCRE2_JIT_COMPLETE) == 0;
+ }
+
+ return regex;
+}
+
+const char *
+impl_regex_get_pattern (const ImplRegex *regex)
+{
+ g_return_val_if_fail (regex != NULL, NULL);
+
+ return regex->pattern;
+}
+
+ImplRegex *
+impl_regex_ref (ImplRegex *regex)
+{
+ g_return_val_if_fail (regex != NULL, NULL);
+ g_return_val_if_fail (regex->ref_count > 0, NULL);
+
+ regex->ref_count++;
+
+ return regex;
+}
+
+void
+impl_regex_unref (ImplRegex *regex)
+{
+ g_return_if_fail (regex != NULL);
+ g_return_if_fail (regex->ref_count > 0);
+
+ regex->ref_count--;
+
+ if (regex->ref_count == 0)
+ {
+ g_clear_pointer (&regex->pattern, g_free);
+ g_clear_pointer (&regex->code, pcre2_code_free);
+ g_clear_pointer (&regex->context, pcre2_compile_context_free);
+ g_slice_free (ImplRegex, regex);
+ }
+}
+
+static ImplMatchInfo *
+impl_match_info_new (ImplRegex *regex,
+ GRegexMatchFlags match_options,
+ const char *string,
+ gssize string_len,
+ gssize position)
+{
+ ImplMatchInfo *match_info;
+
+ g_assert (regex != NULL);
+ g_assert (string != NULL);
+ g_assert (string_len <= strlen (string));
+
+ if (string_len < 0)
+ {
+ string_len = strlen (string);
+ }
+
+ match_info = g_slice_new0 (ImplMatchInfo);
+ match_info->regex = impl_regex_ref (regex);
+ match_info->match_flags = regex->match_flags | translate_match_flags (match_options);
+ match_info->pos = MAX (0, position);
+ match_info->matches = PCRE2_ERROR_NOMATCH;
+ match_info->string = string;
+ match_info->string_len = string_len;
+ match_info->match_data = pcre2_match_data_create_from_pattern (regex->code, NULL);
+
+ if (match_info->match_data == NULL)
+ g_error ("Failed to allocate match data");
+
+ pcre2_pattern_info (regex->code, PCRE2_INFO_CAPTURECOUNT, &match_info->n_subpatterns);
+
+ match_info->offsets = pcre2_get_ovector_pointer (match_info->match_data);
+ match_info->offsets[0] = -1;
+ match_info->offsets[1] = -1;
+
+ return match_info;
+}
+
+void
+impl_match_info_free (ImplMatchInfo *match_info)
+{
+ if (match_info != NULL)
+ {
+ g_clear_pointer (&match_info->match_data, pcre2_match_data_free);
+ g_clear_pointer (&match_info->regex, impl_regex_unref);
+ match_info->string = NULL;
+ match_info->string_len = 0;
+ match_info->compile_flags = 0;
+ match_info->match_flags = 0;
+ match_info->matches = 0;
+ match_info->pos = 0;
+ match_info->offsets = NULL;
+ g_slice_free (ImplMatchInfo, match_info);
+ }
+}
+
+gboolean
+impl_regex_match (const ImplRegex *regex,
+ const char *string,
+ GRegexMatchFlags match_options,
+ ImplMatchInfo **match_info)
+{
+ g_return_val_if_fail (regex != NULL, FALSE);
+ g_return_val_if_fail (regex->code != NULL, FALSE);
+ g_return_val_if_fail (string != NULL, FALSE);
+
+ return impl_regex_match_full (regex, string, -1, 0, match_options, match_info, NULL);
+}
+
+char *
+impl_match_info_fetch (const ImplMatchInfo *match_info,
+ int match_num)
+{
+ char *match = NULL;
+ int begin = -1;
+ int end = -1;
+
+ g_return_val_if_fail (match_info != NULL, NULL);
+ g_return_val_if_fail (match_info->string != NULL, NULL);
+ g_return_val_if_fail (match_info->offsets != NULL, NULL);
+ g_return_val_if_fail (impl_match_info_matches (match_info), NULL);
+ g_return_val_if_fail (match_num >= 0, NULL);
+
+ if (!impl_match_info_fetch_pos (match_info, match_num, &begin, &end))
+ match = NULL;
+ else if (begin == -1)
+ match = g_strdup ("");
+ else
+ match = g_strndup (&match_info->string[begin], end - begin);
+
+ return match;
+}
+
+char *
+impl_match_info_fetch_named (const ImplMatchInfo *match_info,
+ const char *name)
+{
+ int begin = -1;
+ int end = -1;
+
+ g_return_val_if_fail (match_info != NULL, NULL);
+
+ if (impl_match_info_fetch_named_pos (match_info, name, &begin, &end))
+ {
+ if (begin >= 0 && end >= 0)
+ {
+ return g_strndup (match_info->string + begin, end - begin);
+ }
+ }
+
+ return NULL;
+}
+
+char *
+impl_regex_replace_eval (const ImplRegex *regex,
+ const char *string,
+ gssize string_len,
+ gsize start_position,
+ GRegexMatchFlags match_options,
+ ImplRegexEvalCallback eval,
+ gpointer user_data,
+ GError **error)
+{
+ ImplMatchInfo *match_info;
+ GString *result;
+ gsize str_pos = 0;
+ gboolean done = FALSE;
+ GError *tmp_error = NULL;
+
+ g_return_val_if_fail (regex != NULL, NULL);
+ g_return_val_if_fail (string != NULL, NULL);
+ g_return_val_if_fail (eval != NULL, NULL);
+
+ if (string_len < 0)
+ {
+ string_len = strlen (string);
+ }
+
+ result = g_string_sized_new (string_len);
+
+ /* run down the string making matches. */
+ impl_regex_match_full (regex,
+ string,
+ string_len,
+ start_position,
+ match_options,
+ &match_info,
+ &tmp_error);
+
+ g_assert (match_info != NULL);
+
+ while (!done && impl_match_info_matches (match_info))
+ {
+ g_string_append_len (result,
+ string + str_pos,
+ match_info->offsets[0] - str_pos);
+ done = (*eval) (match_info, result, user_data);
+ str_pos = match_info->offsets[1];
+ impl_match_info_next (match_info, &tmp_error);
+
+ /* We already matched, so ignore future matches */
+ if (g_error_matches (tmp_error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH))
+ {
+ g_clear_error (&tmp_error);
+ break;
+ }
+ }
+
+ impl_match_info_free (match_info);
+
+ if (tmp_error != NULL)
+ {
+ g_propagate_error (error, tmp_error);
+ g_string_free (result, TRUE);
+ return NULL;
+ }
+
+ g_string_append_len (result, string + str_pos, string_len - str_pos);
+
+ return g_string_free (result, FALSE);
+}
+
+gboolean
+impl_regex_match_full (const ImplRegex *regex,
+ const char *string,
+ gssize string_len,
+ gsize start_position,
+ GRegexMatchFlags match_options,
+ ImplMatchInfo **match_info,
+ GError **error)
+{
+ ImplMatchInfo *local_match_info = NULL;
+ gboolean ret = FALSE;
+
+ g_return_val_if_fail (regex != NULL, FALSE);
+ g_return_val_if_fail (regex->code != NULL, FALSE);
+ g_return_val_if_fail (string != NULL, FALSE);
+
+ if (string_len < 0)
+ {
+ string_len = strlen (string);
+ }
+
+ local_match_info = impl_match_info_new ((ImplRegex *)regex, match_options, string, string_len, start_position);
+
+ ret = impl_match_info_next (local_match_info, error);
+
+ if (match_info != NULL)
+ {
+ *match_info = g_steal_pointer (&local_match_info);
+ }
+ else
+ {
+ impl_match_info_free (local_match_info);
+ }
+
+ return ret;
+}
+
+enum
+{
+ REPL_TYPE_STRING,
+ REPL_TYPE_CHARACTER,
+ REPL_TYPE_SYMBOLIC_REFERENCE,
+ REPL_TYPE_NUMERIC_REFERENCE,
+ REPL_TYPE_CHANGE_CASE
+};
+
+typedef enum
+{
+ CHANGE_CASE_NONE = 1 << 0,
+ CHANGE_CASE_UPPER = 1 << 1,
+ CHANGE_CASE_LOWER = 1 << 2,
+ CHANGE_CASE_UPPER_SINGLE = 1 << 3,
+ CHANGE_CASE_LOWER_SINGLE = 1 << 4,
+ CHANGE_CASE_SINGLE_MASK = CHANGE_CASE_UPPER_SINGLE | CHANGE_CASE_LOWER_SINGLE,
+ CHANGE_CASE_LOWER_MASK = CHANGE_CASE_LOWER | CHANGE_CASE_LOWER_SINGLE,
+ CHANGE_CASE_UPPER_MASK = CHANGE_CASE_UPPER | CHANGE_CASE_UPPER_SINGLE
+} ChangeCase;
+
+typedef struct _InterpolationData
+{
+ char *text;
+ int type;
+ int num;
+ char c;
+ ChangeCase change_case;
+} InterpolationData;
+
+static void
+free_interpolation_data (InterpolationData *data)
+{
+ g_free (data->text);
+ g_free (data);
+}
+
+static const char *
+expand_escape (const char *replacement,
+ const char *p,
+ InterpolationData *data,
+ GError **error)
+{
+ const char *q, *r;
+ int x, d, h, i;
+ const char *error_detail;
+ int base = 0;
+ GError *tmp_error = NULL;
+
+ p++;
+ switch (*p)
+ {
+ case 't':
+ p++;
+ data->c = '\t';
+ data->type = REPL_TYPE_CHARACTER;
+ break;
+ case 'n':
+ p++;
+ data->c = '\n';
+ data->type = REPL_TYPE_CHARACTER;
+ break;
+ case 'v':
+ p++;
+ data->c = '\v';
+ data->type = REPL_TYPE_CHARACTER;
+ break;
+ case 'r':
+ p++;
+ data->c = '\r';
+ data->type = REPL_TYPE_CHARACTER;
+ break;
+ case 'f':
+ p++;
+ data->c = '\f';
+ data->type = REPL_TYPE_CHARACTER;
+ break;
+ case 'a':
+ p++;
+ data->c = '\a';
+ data->type = REPL_TYPE_CHARACTER;
+ break;
+ case 'b':
+ p++;
+ data->c = '\b';
+ data->type = REPL_TYPE_CHARACTER;
+ break;
+ case '\\':
+ p++;
+ data->c = '\\';
+ data->type = REPL_TYPE_CHARACTER;
+ break;
+ case 'x':
+ p++;
+ x = 0;
+ if (*p == '{')
+ {
+ p++;
+ do
+ {
+ h = g_ascii_xdigit_value (*p);
+ if (h < 0)
+ {
+ error_detail = _("hexadecimal digit or “}” expected");
+ goto error;
+ }
+ x = x * 16 + h;
+ p++;
+ }
+ while (*p != '}');
+ p++;
+ }
+ else
+ {
+ for (i = 0; i < 2; i++)
+ {
+ h = g_ascii_xdigit_value (*p);
+ if (h < 0)
+ {
+ error_detail = _("hexadecimal digit expected");
+ goto error;
+ }
+ x = x * 16 + h;
+ p++;
+ }
+ }
+ data->type = REPL_TYPE_STRING;
+ data->text = g_new0 (gchar, 8);
+ g_unichar_to_utf8 (x, data->text);
+ break;
+ case 'l':
+ p++;
+ data->type = REPL_TYPE_CHANGE_CASE;
+ data->change_case = CHANGE_CASE_LOWER_SINGLE;
+ break;
+ case 'u':
+ p++;
+ data->type = REPL_TYPE_CHANGE_CASE;
+ data->change_case = CHANGE_CASE_UPPER_SINGLE;
+ break;
+ case 'L':
+ p++;
+ data->type = REPL_TYPE_CHANGE_CASE;
+ data->change_case = CHANGE_CASE_LOWER;
+ break;
+ case 'U':
+ p++;
+ data->type = REPL_TYPE_CHANGE_CASE;
+ data->change_case = CHANGE_CASE_UPPER;
+ break;
+ case 'E':
+ p++;
+ data->type = REPL_TYPE_CHANGE_CASE;
+ data->change_case = CHANGE_CASE_NONE;
+ break;
+ case 'g':
+ p++;
+ if (*p != '<')
+ {
+ error_detail = _("missing “<” in symbolic reference");
+ goto error;
+ }
+ q = p + 1;
+ do
+ {
+ p++;
+ if (!*p)
+ {
+ error_detail = _("unfinished symbolic reference");
+ goto error;
+ }
+ }
+ while (*p != '>');
+ if (p - q == 0)
+ {
+ error_detail = _("zero-length symbolic reference");
+ goto error;
+ }
+ if (g_ascii_isdigit (*q))
+ {
+ x = 0;
+ do
+ {
+ h = g_ascii_digit_value (*q);
+ if (h < 0)
+ {
+ error_detail = _("digit expected");
+ p = q;
+ goto error;
+ }
+ x = x * 10 + h;
+ q++;
+ }
+ while (q != p);
+ data->num = x;
+ data->type = REPL_TYPE_NUMERIC_REFERENCE;
+ }
+ else
+ {
+ r = q;
+ do
+ {
+ if (!g_ascii_isalnum (*r))
+ {
+ error_detail = _("illegal symbolic reference");
+ p = r;
+ goto error;
+ }
+ r++;
+ }
+ while (r != p);
+ data->text = g_strndup (q, p - q);
+ data->type = REPL_TYPE_SYMBOLIC_REFERENCE;
+ }
+ p++;
+ break;
+ case '0':
+ /* if \0 is followed by a number is an octal number representing a
+ * character, else it is a numeric reference. */
+ if (g_ascii_digit_value (*g_utf8_next_char (p)) >= 0)
+ {
+ base = 8;
+ p = g_utf8_next_char (p);
+ }
+ /* Fallthrough */
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ x = 0;
+ d = 0;
+ for (i = 0; i < 3; i++)
+ {
+ h = g_ascii_digit_value (*p);
+ if (h < 0)
+ break;
+ if (h > 7)
+ {
+ if (base == 8)
+ break;
+ else
+ base = 10;
+ }
+ if (i == 2 && base == 10)
+ break;
+ x = x * 8 + h;
+ d = d * 10 + h;
+ p++;
+ }
+ if (base == 8 || i == 3)
+ {
+ data->type = REPL_TYPE_STRING;
+ data->text = g_new0 (gchar, 8);
+ g_unichar_to_utf8 (x, data->text);
+ }
+ else
+ {
+ data->type = REPL_TYPE_NUMERIC_REFERENCE;
+ data->num = d;
+ }
+ break;
+ case 0:
+ error_detail = _("stray final “\\”");
+ goto error;
+ break;
+ default:
+ error_detail = _("unknown escape sequence");
+ goto error;
+ }
+
+ return p;
+
+error:
+ /* G_GSSIZE_FORMAT doesn't work with gettext, so we use %lu */
+ tmp_error = g_error_new (G_REGEX_ERROR,
+ G_REGEX_ERROR_REPLACE,
+ _("Error while parsing replacement "
+ "text “%s” at char %lu: %s"),
+ replacement,
+ (gulong)(p - replacement),
+ error_detail);
+ g_propagate_error (error, tmp_error);
+
+ return NULL;
+}
+
+static GList *
+split_replacement (const gchar *replacement,
+ GError **error)
+{
+ GList *list = NULL;
+ InterpolationData *data;
+ const gchar *p, *start;
+
+ start = p = replacement;
+ while (*p)
+ {
+ if (*p == '\\')
+ {
+ data = g_new0 (InterpolationData, 1);
+ start = p = expand_escape (replacement, p, data, error);
+ if (p == NULL)
+ {
+ g_list_free_full (list, (GDestroyNotify) free_interpolation_data);
+ free_interpolation_data (data);
+
+ return NULL;
+ }
+ list = g_list_prepend (list, data);
+ }
+ else
+ {
+ p++;
+ if (*p == '\\' || *p == '\0')
+ {
+ if (p - start > 0)
+ {
+ data = g_new0 (InterpolationData, 1);
+ data->text = g_strndup (start, p - start);
+ data->type = REPL_TYPE_STRING;
+ list = g_list_prepend (list, data);
+ }
+ }
+ }
+ }
+
+ return g_list_reverse (list);
+}
+
+/* Change the case of c based on change_case. */
+#define CHANGE_CASE(c, change_case) \
+ (((change_case) & CHANGE_CASE_LOWER_MASK) ? \
+ g_unichar_tolower (c) : \
+ g_unichar_toupper (c))
+
+static void
+string_append (GString *string,
+ const gchar *text,
+ ChangeCase *change_case)
+{
+ gunichar c;
+
+ if (text[0] == '\0')
+ return;
+
+ if (*change_case == CHANGE_CASE_NONE)
+ {
+ g_string_append (string, text);
+ }
+ else if (*change_case & CHANGE_CASE_SINGLE_MASK)
+ {
+ c = g_utf8_get_char (text);
+ g_string_append_unichar (string, CHANGE_CASE (c, *change_case));
+ g_string_append (string, g_utf8_next_char (text));
+ *change_case = CHANGE_CASE_NONE;
+ }
+ else
+ {
+ while (*text != '\0')
+ {
+ c = g_utf8_get_char (text);
+ g_string_append_unichar (string, CHANGE_CASE (c, *change_case));
+ text = g_utf8_next_char (text);
+ }
+ }
+}
+
+static gboolean
+interpolate_replacement (const ImplMatchInfo *match_info,
+ GString *result,
+ gpointer data)
+{
+ GList *list;
+ InterpolationData *idata;
+ gchar *match;
+ ChangeCase change_case = CHANGE_CASE_NONE;
+
+ for (list = data; list; list = list->next)
+ {
+ idata = list->data;
+ switch (idata->type)
+ {
+ case REPL_TYPE_STRING:
+ string_append (result, idata->text, &change_case);
+ break;
+ case REPL_TYPE_CHARACTER:
+ g_string_append_c (result, CHANGE_CASE (idata->c, change_case));
+ if (change_case & CHANGE_CASE_SINGLE_MASK)
+ change_case = CHANGE_CASE_NONE;
+ break;
+ case REPL_TYPE_NUMERIC_REFERENCE:
+ match = impl_match_info_fetch (match_info, idata->num);
+ if (match)
+ {
+ string_append (result, match, &change_case);
+ g_free (match);
+ }
+ break;
+ case REPL_TYPE_SYMBOLIC_REFERENCE:
+ match = impl_match_info_fetch_named (match_info, idata->text);
+ if (match)
+ {
+ string_append (result, match, &change_case);
+ g_free (match);
+ }
+ break;
+ case REPL_TYPE_CHANGE_CASE:
+ change_case = idata->change_case;
+ break;
+ default:
+ g_warn_if_reached ();
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+char *
+impl_regex_replace (const ImplRegex *regex,
+ const char *string,
+ gssize string_len,
+ int start_position,
+ const char *replacement,
+ GRegexMatchFlags match_options,
+ GError **error)
+{
+ char *result;
+ GList *list;
+ GError *tmp_error = NULL;
+
+ g_return_val_if_fail (regex != NULL, NULL);
+ g_return_val_if_fail (string != NULL, NULL);
+ g_return_val_if_fail (start_position >= 0, NULL);
+ g_return_val_if_fail (replacement != NULL, NULL);
+ g_return_val_if_fail (error == NULL || *error == NULL, NULL);
+
+ list = split_replacement (replacement, &tmp_error);
+
+ if (tmp_error != NULL)
+ {
+ g_propagate_error (error, tmp_error);
+ return NULL;
+ }
+
+ result = impl_regex_replace_eval (regex,
+ string, string_len, start_position,
+ match_options,
+ interpolate_replacement,
+ (gpointer)list,
+ &tmp_error);
+
+ if (tmp_error != NULL)
+ g_propagate_error (error, tmp_error);
+
+ g_list_free_full (list, (GDestroyNotify) free_interpolation_data);
+
+ return result;
+}
+
+gboolean
+impl_match_info_fetch_pos (const ImplMatchInfo *match_info,
+ int match_num,
+ int *start_pos,
+ int *end_pos)
+{
+ g_return_val_if_fail (match_info != NULL, FALSE);
+ g_return_val_if_fail (match_info->match_data != NULL, FALSE);
+ g_return_val_if_fail (match_info->offsets != NULL, FALSE);
+ g_return_val_if_fail (match_num >= 0, FALSE);
+
+ if (match_info->matches < 0)
+ return FALSE;
+
+ /* make sure the sub expression number they're requesting is less than
+ * the total number of sub expressions in the regex. When matching all
+ * (g_regex_match_all()), also compare against the number of matches */
+ if (match_num >= MAX (match_info->matches, match_info->n_subpatterns + 1))
+ return FALSE;
+
+ if (start_pos)
+ *start_pos = (match_num < match_info->matches) ? match_info->offsets[2 * match_num] : -1;
+
+ if (end_pos)
+ *end_pos = (match_num < match_info->matches) ? match_info->offsets[2 * match_num + 1] : -1;
+
+ return TRUE;
+}
+
+gboolean
+impl_match_info_fetch_named_pos (const ImplMatchInfo *match_info,
+ const char *name,
+ int *start_pos,
+ int *end_pos)
+{
+ int num;
+
+ g_return_val_if_fail (match_info != NULL, FALSE);
+ g_return_val_if_fail (match_info->match_data != NULL, FALSE);
+ g_return_val_if_fail (match_info->regex != NULL, FALSE);
+ g_return_val_if_fail (start_pos != NULL, FALSE);
+ g_return_val_if_fail (end_pos != NULL, FALSE);
+
+ num = pcre2_substring_number_from_name (match_info->regex->code, (PCRE2_SPTR)name);
+
+ if (num >= 0)
+ {
+ return impl_match_info_fetch_pos (match_info, num, start_pos, end_pos);
+ }
+
+ return FALSE;
+}
+
+gboolean
+impl_match_info_matches (const ImplMatchInfo *match_info)
+{
+ g_return_val_if_fail (match_info != NULL, FALSE);
+ g_return_val_if_fail (match_info->matches != 0, FALSE);
+
+ return match_info->matches >= 0;
+}
+
+gboolean
+impl_match_info_next (ImplMatchInfo *match_info,
+ GError **error)
+{
+ gssize prev_match_start;
+ gssize prev_match_end;
+
+ g_return_val_if_fail (match_info != NULL, FALSE);
+ g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
+ g_return_val_if_fail (match_info->pos >= 0, FALSE);
+
+ prev_match_start = match_info->offsets[0];
+ prev_match_end = match_info->offsets[1];
+
+ if (match_info->pos > match_info->string_len)
+ {
+ /* we have reached the end of the string */
+ match_info->pos = -1;
+ match_info->matches = PCRE2_ERROR_NOMATCH;
+ return FALSE;
+ }
+
+ if (match_info->regex->has_jit)
+ {
+ match_info->matches = pcre2_jit_match (match_info->regex->code,
+ (PCRE2_SPTR)match_info->string,
+ match_info->string_len,
+ match_info->pos,
+ match_info->match_flags,
+ match_info->match_data,
+ NULL);
+ }
+ else
+ {
+ gsize match_flags = match_info->regex->match_flags | match_info->match_flags;
+
+ if (match_info->regex->compile_flags & PCRE2_UTF)
+ match_flags |= PCRE2_NO_UTF_CHECK;
+
+ match_info->matches = pcre2_match (match_info->regex->code,
+ (PCRE2_SPTR)match_info->string,
+ match_info->string_len,
+ match_info->pos,
+ match_flags,
+ match_info->match_data,
+ NULL);
+ }
+
+ if (set_regex_error (error, match_info->matches))
+ return FALSE;
+
+ /* avoid infinite loops if the pattern is an empty string or something
+ * equivalent */
+ if (match_info->pos == match_info->offsets[1])
+ {
+ if (match_info->pos > match_info->string_len)
+ {
+ /* we have reached the end of the string */
+ match_info->pos = -1;
+ match_info->matches = PCRE2_ERROR_NOMATCH;
+ return FALSE;
+ }
+
+ match_info->pos = NEXT_CHAR (match_info->regex, &match_info->string[match_info->pos]) -
+ match_info->string;
+
+
+ }
+ else
+ {
+ match_info->pos = match_info->offsets[1];
+ }
+
+ g_assert (match_info->matches <= (int)match_info->n_subpatterns + 1);
+
+ /* it's possible to get two identical matches when we are matching
+ * empty strings, for instance if the pattern is "(?=[A-Z0-9])" and
+ * the string is "RegExTest" we have:
+ * - search at position 0: match from 0 to 0
+ * - search at position 1: match from 3 to 3
+ * - search at position 3: match from 3 to 3 (duplicate)
+ * - search at position 4: match from 5 to 5
+ * - search at position 5: match from 5 to 5 (duplicate)
+ * - search at position 6: no match -> stop
+ * so we have to ignore the duplicates.
+ * see bug #515944: http://bugzilla.gnome.org/show_bug.cgi?id=515944 */
+ if (match_info->matches >= 0 &&
+ prev_match_start == match_info->offsets[0] &&
+ prev_match_end == match_info->offsets[1])
+ {
+ /* ignore this match and search the next one */
+ return impl_match_info_next (match_info, error);
+ }
+
+ return match_info->matches >= 0;
+}
+
+int
+impl_regex_get_max_lookbehind (const ImplRegex *regex)
+{
+ uint32_t value = 0;
+
+ g_return_val_if_fail (regex != NULL, 0);
+ g_return_val_if_fail (regex->code != NULL, 0);
+
+ pcre2_pattern_info (regex->code, PCRE2_INFO_MAXLOOKBEHIND, &value);
+
+ return value;
+}
+
+gboolean
+impl_match_info_is_partial_match (const ImplMatchInfo *match_info)
+{
+ g_return_val_if_fail (match_info != NULL, FALSE);
+
+ return match_info->matches == PCRE2_ERROR_PARTIAL;
+}
+
+int
+impl_match_info_get_match_count (const ImplMatchInfo *match_info)
+{
+ g_return_val_if_fail (match_info != NULL, 0);
+
+ return MAX (0, match_info->matches);
+}
diff --git a/gtksourceview/meson.build b/gtksourceview/meson.build
index b6192604..c4ddb4a8 100644
--- a/gtksourceview/meson.build
+++ b/gtksourceview/meson.build
@@ -103,6 +103,7 @@ core_private_c = files([
'gtksourcepixbufhelper.c',
'gtksourceregex.c',
'gtksourceundomanagerdefault.c',
+ 'implregex.c',
])
core_c_args = [
@@ -119,6 +120,7 @@ core_deps = [
gio_dep,
gtk_dep,
libxml_dep,
+ pcre2_dep,
]
if config_h.has('OS_OSX')
diff --git a/meson.build b/meson.build
index 039d8390..0b76dd7f 100644
--- a/meson.build
+++ b/meson.build
@@ -79,6 +79,7 @@ gladeui_req = '>= 3.9'
introspection_req = '>= 1.42.0'
gtk_doc_req = '>= 1.25'
fribidi_req = '>= 0.19.7'
+pcre2_req = '>= 10.21'
glib_dep = dependency('glib-2.0', version: glib_req)
gobject_dep = dependency('gobject-2.0', version: glib_req)
@@ -86,6 +87,7 @@ gio_dep = dependency('gio-2.0', version: glib_req)
gtk_dep = dependency('gtk+-3.0', version: gtk_req)
libxml_dep = dependency('libxml-2.0', version: libxml_req, required: cc.get_id() != 'msvc')
fribidi_dep = dependency('fribidi', version: fribidi_req)
+pcre2_dep = dependency('libpcre2-8', version: pcre2_req, fallback : ['pcre2', 'libpcre2_8'])
gtk_quartz_dep = dependency('gtk+-quartz-3.0', version: gtk_doc_req, required: false)
diff --git a/subprojects/pcre2.wrap b/subprojects/pcre2.wrap
new file mode 100644
index 00000000..65417c61
--- /dev/null
+++ b/subprojects/pcre2.wrap
@@ -0,0 +1,10 @@
+[wrap-file]
+directory = pcre2-10.23
+
+source_url = https://github.com/PhilipHazel/pcre2/releases/download/pcre2-10.23/pcre2-10.23.zip
+source_filename = pcre2-10.23.zip
+source_hash = 6301a525a8a7e63a5fac0c2fbfa0374d3eb133e511d886771e097e427707094a
+
+patch_url = https://wrapdb.mesonbuild.com/v1/projects/pcre2/10.23/1/get_zip
+patch_filename = pcre2-10.23-1-wrap.zip
+patch_hash = ad6b4f042a911d06805fbbeeb9ffed0a988b282561164d0624a3ce02e93d4e24