diff options
author | Christian Persch <chpe@gnome.org> | 2015-09-15 21:38:41 +0200 |
---|---|---|
committer | Christian Persch <chpe@gnome.org> | 2015-09-29 22:26:11 +0200 |
commit | eb45e6cbc7c747d0cf277acebfcbc61e96269b27 (patch) | |
tree | b02825b3d68dd099f69c2a66b216416d257abbae | |
parent | 9ca7b21abd4a4d4d65f3edcdb2476aca067773c1 (diff) | |
download | vte-eb45e6cbc7c747d0cf277acebfcbc61e96269b27.tar.gz |
lib: Add PCRE2 support
Add VteRegex wrapping PCRE2's pcre2_code_8* to add refcounting,
and add API to VteTerminal to use it for matching and searching.
-rw-r--r-- | configure.ac | 28 | ||||
-rw-r--r-- | doc/reference/vte-docs.xml | 7 | ||||
-rw-r--r-- | doc/reference/vte-sections.txt | 26 | ||||
-rw-r--r-- | src/Makefile.am | 33 | ||||
-rw-r--r-- | src/app.ui | 2 | ||||
-rw-r--r-- | src/app.vala | 111 | ||||
-rw-r--r-- | src/debug.c | 3 | ||||
-rw-r--r-- | src/debug.h | 5 | ||||
-rw-r--r-- | src/vte.cc | 834 | ||||
-rw-r--r-- | src/vte/vte.h | 5 | ||||
-rw-r--r-- | src/vte/vtedeprecated.h | 12 | ||||
-rw-r--r-- | src/vte/vteregex.h | 77 | ||||
-rw-r--r-- | src/vte/vteterminal.h | 15 | ||||
-rw-r--r-- | src/vteapp.c | 56 | ||||
-rw-r--r-- | src/vteinternal.hh | 27 | ||||
-rw-r--r-- | src/vtepcre2.h | 27 | ||||
-rw-r--r-- | src/vteregex.cc | 296 | ||||
-rw-r--r-- | src/vteregexinternal.hh | 20 |
18 files changed, 1407 insertions, 177 deletions
diff --git a/configure.ac b/configure.ac index ee34914b..9c147e84 100644 --- a/configure.ac +++ b/configure.ac @@ -199,6 +199,9 @@ GLIB_REQUIRED=2.40.0 GIO_REQUIRED=2.40.0 PANGO_REQUIRED=1.22.0 GNUTLS_REQUIRED=3.2.7 +PCRE2_REQUIRED=10.00 + +# GNUTLS AC_MSG_CHECKING([whether gnutls support is requested]) AC_ARG_WITH([gnutls], @@ -215,6 +218,28 @@ fi AM_CONDITIONAL([WITH_GNUTLS],[test "$with_gnutls" = "yes"]) +# PCRE2 + +AC_MSG_CHECKING([whether PCRE2 support is requested]) +AC_ARG_WITH([pcre2], + [AS_HELP_STRING([--without-pcre2],[Disable pcre2 support])], + [],[with_pcre2=yes]) +AC_MSG_RESULT([$with_pcre2]) + +PCRE2_PKGS= +if test "$with_pcre2" = "yes"; then + PCRE2_PKGS="libpcre2-8 >= $PCRE2_REQUIRED" + + PKG_CHECK_MODULES([PCRE2],[$PCRE2_PKGS],, + [AC_MSG_ERROR([PCRE2 requested but libpcre2-8 not found. Use --without-pcre2 to disable PCRE2])]) + + AC_DEFINE([WITH_PCRE2],[1],[Define to 1 to enable pcre2 support]) +fi + +AM_CONDITIONAL([WITH_PCRE2],[test "$with_pcre2" = "yes"]) + +# GLIB tools + AC_DEFINE(GDK_MULTIHEAD_SAFE,1,[Force use of GDK multihead-safe APIs.]) AC_PATH_PROG([GLIB_GENMARSHAL],[glib-genmarshal]) @@ -244,7 +269,7 @@ AC_CHECK_FUNCS([ceil floor round]) # Search for the required modules. -VTE_PKGS="glib-2.0 >= $GLIB_REQUIRED gobject-2.0 pango >= $PANGO_REQUIRED gtk+-$GTK_API_VERSION >= $GTK_REQUIRED gobject-2.0 gio-2.0 gio-unix-2.0 zlib $GNUTLS_PKGS" +VTE_PKGS="glib-2.0 >= $GLIB_REQUIRED gobject-2.0 pango >= $PANGO_REQUIRED gtk+-$GTK_API_VERSION >= $GTK_REQUIRED gobject-2.0 gio-2.0 gio-unix-2.0 zlib $GNUTLS_PKGS $PCRE2_PKGS" PKG_CHECK_MODULES([VTE],[$VTE_PKGS]) AC_SUBST([VTE_PKGS]) @@ -404,6 +429,7 @@ cat <<EOF | tee -a config.log Configuration for libvte $VERSION for gtk+-$GTK_API_VERSION GNUTLS: $with_gnutls + PCRE2: $with_pcre2 Installing Glade catalogue: $enable_glade_catalogue Debugging: $enable_debug Introspection: $enable_introspection diff --git a/doc/reference/vte-docs.xml b/doc/reference/vte-docs.xml index 4111c6cc..0bf076e6 100644 --- a/doc/reference/vte-docs.xml +++ b/doc/reference/vte-docs.xml @@ -63,6 +63,9 @@ <xi:include href="xml/vte-terminal.xml"/> </chapter> <chapter> + <xi:include href="xml/vte-regex.xml"/> + </chapter> + <chapter> <xi:include href="xml/vte-pty.xml"/> </chapter> <chapter> @@ -87,6 +90,10 @@ <title>Index of new symbols in 0.40</title> <xi:include href="xml/api-index-0.40.xml"><xi:fallback /></xi:include> </index> + <index id="api-index-0-44" role="0.44"> + <title>Index of new symbols in 0.44</title> + <xi:include href="xml/api-index-0.44.xml"><xi:fallback /></xi:include> + </index> <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include> diff --git a/doc/reference/vte-sections.txt b/doc/reference/vte-sections.txt index 8fe08a72..ecdaae34 100644 --- a/doc/reference/vte-sections.txt +++ b/doc/reference/vte-sections.txt @@ -53,7 +53,7 @@ vte_terminal_get_text vte_terminal_get_text_include_trailing_spaces vte_terminal_get_text_range vte_terminal_get_cursor_position -vte_terminal_match_add_gregex +vte_terminal_match_add_regex vte_terminal_match_remove vte_terminal_match_remove_all vte_terminal_match_check @@ -69,9 +69,9 @@ vte_terminal_get_word_char_exceptions vte_terminal_write_contents_sync vte_terminal_search_find_next vte_terminal_search_find_previous -vte_terminal_search_get_gregex +vte_terminal_search_get_regex vte_terminal_search_get_wrap_around -vte_terminal_search_set_gregex +vte_terminal_search_set_regex vte_terminal_search_set_wrap_around <SUBSECTION> @@ -118,6 +118,9 @@ vte_terminal_get_current_file_uri <SUBSECTION Deprecated> vte_terminal_match_set_cursor +vte_terminal_match_add_gregex +vte_terminal_search_get_gregex +vte_terminal_search_set_gregex <SUBSECTION Private> VteCharAttributes @@ -126,6 +129,23 @@ VteTerminalClassPrivate </SECTION> <SECTION> +<FILE>vte-regex</FILE> +<TITLE>VteRegex</TITLE> +VteRegex +vte_regex_ref +vte_regex_unref +vte_regex_new +vte_regex_jit +vte_regex_new_pcre +vte_regex_get_pcre + +<SUBSECTION Standard> +VTE_TYPE_REGEX +vte_regex_get_type +VTE_REGEX_ERROR +vte_regex_error_quark + +<SECTION> <FILE>vte-pty</FILE> <TITLE>Vte PTY</TITLE> VtePtyFlags diff --git a/src/Makefile.am b/src/Makefile.am index 4bbff1ec..f74a6da5 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -16,6 +16,7 @@ header_HEADERS = \ vte/vteglobals.h \ vte/vtemacros.h \ vte/vtepty.h \ + vte/vteregex.h \ vte/vteterminal.h \ $(NULL) @@ -42,6 +43,7 @@ libvte_@VTE_API_MAJOR_VERSION@_@VTE_API_MINOR_VERSION@_la_SOURCES = \ vte/vteglobals.h \ vte/vtemacros.h \ vte/vtepty.h \ + vte/vteregex.h \ vte/vteterminal.h \ buffer.h \ caps.cc \ @@ -70,7 +72,10 @@ libvte_@VTE_API_MAJOR_VERSION@_@VTE_API_MINOR_VERSION@_la_SOURCES = \ vtedraw.h \ vteint.h \ vteinternal.hh \ + vtepcre2.h \ vtepty-private.h \ + vteregex.cc \ + vteregexinternal.hh \ vterowdata.cc \ vterowdata.h \ vteseq.cc \ @@ -189,7 +194,7 @@ Vte_@VTE_API_VERSION_U@_gir_LIBS = libvte-$(VTE_API_VERSION).la Vte_@VTE_API_VERSION_U@_gir_EXPORT_PACKAGES = vte-$(VTE_API_VERSION) Vte_@VTE_API_VERSION_U@_gir_SCANNERFLAGS = --c-include "vte/vte.h" Vte_@VTE_API_VERSION_U@_gir_FILES = \ - $(filter-out vte/vtedeprecated.h,$(header_HEADERS)) \ + $(header_HEADERS) \ $(nodist_header_HEADERS) \ vte.cc \ vtetypebuiltins.cc \ @@ -234,25 +239,26 @@ endif # HAVE_INTROSPECTION # C Test application testvte_SOURCES = \ - vteapp.c \ - debug.c \ - debug.h \ - $(NULL) + vteapp.c \ + vtepcre2.h \ + debug.c \ + debug.h \ + $(NULL) testvte_CPPFLAGS = \ - -DGLIB_DISABLE_DEPRECATION_WARNINGS \ - -DGDK_DISABLE_DEPRECATION_WARNINGS \ + -DGLIB_DISABLE_DEPRECATION_WARNINGS \ + -DGDK_DISABLE_DEPRECATION_WARNINGS \ -I$(srcdir)/vte \ -I$(builddir)/vte \ - $(AM_CPPFLAGS) + $(AM_CPPFLAGS) testvte_CFLAGS = \ - $(VTE_CFLAGS) \ - $(AM_CFLAGS) + $(VTE_CFLAGS) \ + $(AM_CFLAGS) testvte_LDADD = \ - libvte-$(VTE_API_VERSION).la \ - $(VTE_LIBS) + libvte-$(VTE_API_VERSION).la \ + $(VTE_LIBS) # VALA Test application @@ -305,6 +311,9 @@ vte_@VTE_API_MAJOR_VERSION@_@VTE_API_MINOR_VERSION@_LDADD = \ if HAVE_GTK_3_16 vte_@VTE_API_MAJOR_VERSION@_@VTE_API_MINOR_VERSION@_VALAFLAGS += -D GTK_3_16 endif +if WITH_PCRE2 +vte_@VTE_API_MAJOR_VERSION@_@VTE_API_MINOR_VERSION@_VALAFLAGS += -D WITH_PCRE2 +endif CLEANFILES += \ app.c \ @@ -35,7 +35,7 @@ <property name="visible">True</property> <property name="can_focus">False</property> <property name="show_close_button">True</property> - <property name="decoration_layout">icon:close</property> + <property name="decoration_layout">:close</property> <child> <object class="GtkButton" id="copy_button"> <property name="visible">True</property> diff --git a/src/app.vala b/src/app.vala index e6ee9342..0b8d1561 100644 --- a/src/app.vala +++ b/src/app.vala @@ -35,8 +35,13 @@ class SearchPopover : Gtk.Popover [GtkChild] private Gtk.Button reveal_button; [GtkChild] private Gtk.Revealer revealer; - private GLib.RegexCompileFlags regex_flags = 0; - private GLib.Regex? regex = null; + private bool regex_caseless = false; + private bool regex_multiline = false; + private string? regex_pattern = null; + private GLib.Regex? regex_gregex = null; +#if WITH_PCRE2 + private Vte.Regex? regex_regex = null; +#endif public SearchPopover(Vte.Terminal term, Gtk.Widget relative_to) @@ -66,9 +71,18 @@ class SearchPopover : Gtk.Popover update_sensitivity(); } + private bool have_regex() + { + return regex_gregex != null +#if WITH_PCRE2 + || regex_regex != null +#endif + ; + } + private void update_sensitivity() { - bool can_search = regex != null; + bool can_search = have_regex(); search_prev_button.set_sensitive(can_search); search_next_button.set_sensitive(can_search); @@ -76,19 +90,21 @@ class SearchPopover : Gtk.Popover private void update_regex() { - GLib.RegexCompileFlags flags; string search_text; - string pattern; + string pattern = null; + bool caseless = false; + bool multiline = false; + GLib.Regex? gregex = null; +#if WITH_PCRE2 + Vte.Regex? regex = null; +#endif search_text = search_entry.get_text(); - flags = GLib.RegexCompileFlags.OPTIMIZE; - - if (!match_case_checkbutton.active) - flags |= GLib.RegexCompileFlags.CASELESS; + caseless = !match_case_checkbutton.active; if (regex_checkbutton.active) { pattern = search_text; - flags |= GLib.RegexCompileFlags.MULTILINE; + multiline = true; } else { pattern = GLib.Regex.escape_string(search_text); } @@ -96,15 +112,49 @@ class SearchPopover : Gtk.Popover if (entire_word_checkbutton.active) pattern = "\\b" + pattern + "\\b"; - if (regex != null && - regex_flags == flags && - pattern == regex.get_pattern()) + if (caseless == regex_caseless && + multiline == regex_multiline && + pattern == regex_pattern) return; - regex_flags = flags; + regex_pattern = null; + regex_caseless = caseless; + regex_multiline = multiline; + if (search_text.length != 0) { try { - regex = new GLib.Regex(pattern, flags, 0); +#if WITH_PCRE2 + if (!App.Options.no_pcre) { + uint32 flags; + + flags = 0x40080000u /* PCRE2_UTF | PCRE2_NO_UTF_CHECK */; + if (caseless) + flags |= 0x00000008u; /* PCRE2_CASELESS */ + if (multiline) + flags |= 0x00000400u; /* PCRE2_MULTILINE */ + regex = new Vte.Regex(pattern, pattern.length, flags); + + try { + regex.jit(0x00000001u /* PCRE2_JIT_COMPLETE */); + regex.jit(0x00000002u /* PCRE2_JIT_PARTIAL_SOFT */); + } catch (Error e) { + printerr("JITing regex \"%s\" failed: %s\n", pattern, e.message); + } + } else +#endif /* WITH_PCRE2 */ + { + GLib.RegexCompileFlags flags; + + flags = GLib.RegexCompileFlags.OPTIMIZE; + if (caseless) + flags |= GLib.RegexCompileFlags.CASELESS; + if (multiline) + flags |= GLib.RegexCompileFlags.MULTILINE; + + gregex = new GLib.Regex(pattern, flags, 0); + } + + regex_pattern = pattern; } catch (Error e) { regex = null; } @@ -112,13 +162,19 @@ class SearchPopover : Gtk.Popover regex = null; } - terminal.search_set_gregex(regex, 0); +#if WITH_PCRE2 + if (regex != null) + terminal.search_set_regex(regex, 0); + else +#endif + terminal.search_set_gregex(gregex, 0); + update_sensitivity(); } private void search(bool backward) { - if (regex == null) + if (!have_regex()) return; if (backward) @@ -318,11 +374,23 @@ class Window : Gtk.ApplicationWindow for (int i = 0; i < dingus.length; ++i) { try { - GLib.Regex regex; int tag; +#if WITH_PCRE2 + if (!App.Options.no_pcre) { + Vte.Regex regex; + + regex = new Vte.Regex(dingus[i], dingus[i].length, + 0x40080008u /* PCRE2_UTF | PCRE2_NO_UTF_CHECK | PCRE2_CASELESS */); + tag = terminal.match_add_regex(regex, 0); + } else +#endif + { + GLib.Regex regex; + + regex = new GLib.Regex(dingus[i], GLib.RegexCompileFlags.OPTIMIZE, 0); + tag = terminal.match_add_gregex(regex, 0); + } - regex = new GLib.Regex(dingus[i], GLib.RegexCompileFlags.OPTIMIZE, 0); - tag = terminal.match_add_gregex(regex, 0); terminal.match_set_cursor_type(tag, cursors[i % cursors.length]); } catch (Error e) { printerr("Failed to compile regex \"%s\": %s\n", dingus[i], e.message); @@ -723,6 +791,7 @@ class App : Gtk.Application public static bool no_context_menu = false; public static bool no_double_buffer = false; public static bool no_geometry_hints = false; + public static bool no_pcre = false; public static bool no_rewrap = false; public static bool no_shell = false; public static bool object_notifications = false; @@ -887,6 +956,8 @@ class App : Gtk.Application "Add environment variable to the child\'s environment", "VAR=VALUE" }, { "font", 'f', 0, OptionArg.STRING, ref font_string, "Specify a font to use", null }, + { "gregex", 0, 0, OptionArg.NONE, ref no_pcre, + "Use GRegex instead of PCRE2", null }, { "geometry", 'g', 0, OptionArg.STRING, ref geometry, "Set the size (in characters) and position", "GEOMETRY" }, { "highlight-background-color", 0, 0, OptionArg.STRING, ref hl_bg_color_string, diff --git a/src/debug.c b/src/debug.c index 6ed9341f..b8dc3956 100644 --- a/src/debug.c +++ b/src/debug.c @@ -51,7 +51,8 @@ _vte_debug_init(void) { "pangocairo", VTE_DEBUG_PANGOCAIRO }, { "widget-size", VTE_DEBUG_WIDGET_SIZE }, { "style", VTE_DEBUG_STYLE }, - { "resize", VTE_DEBUG_RESIZE } + { "resize", VTE_DEBUG_RESIZE }, + { "regex", VTE_DEBUG_REGEX } }; _vte_debug_flags = g_parse_debug_string (g_getenv("VTE_DEBUG"), diff --git a/src/debug.h b/src/debug.h index a8cfbefb..51b3b554 100644 --- a/src/debug.h +++ b/src/debug.h @@ -60,7 +60,8 @@ typedef enum { VTE_DEBUG_PANGOCAIRO = 1 << 20, VTE_DEBUG_WIDGET_SIZE = 1 << 21, VTE_DEBUG_STYLE = 1 << 22, - VTE_DEBUG_RESIZE = 1 << 23 + VTE_DEBUG_RESIZE = 1 << 23, + VTE_DEBUG_REGEX = 1 << 24 } VteDebugFlags; void _vte_debug_init(void); @@ -72,7 +73,7 @@ static inline gboolean _vte_debug_on(guint flags) G_GNUC_CONST G_GNUC_UNUSED; static inline gboolean _vte_debug_on(guint flags) { - return (_vte_debug_flags & flags) == flags; + return (_vte_debug_flags & flags) != 0; } #ifdef VTE_DEBUG @@ -29,6 +29,8 @@ #include <math.h> +#include <glib.h> + #include <vte/vte.h> #include "vte-private.h" @@ -56,6 +58,11 @@ #include "vtepty.h" #include "vtepty-private.h" +#ifdef WITH_PCRE2 +#include "vtepcre2.h" +#include "vteregexinternal.hh" +#endif + #ifdef HAVE_LOCALE_H #include <locale.h> #endif @@ -111,6 +118,13 @@ static gboolean vte_cell_is_selected(VteTerminal *terminal, glong col, glong row, gpointer data); static void vte_terminal_extend_selection(VteTerminal *terminal, long x, long y, gboolean always_grow, gboolean force); +static char *vte_terminal_get_text_range_full(VteTerminal *terminal, + glong start_row, glong start_col, + glong end_row, glong end_col, + VteSelectionFunc is_selected, + gpointer user_data, + GArray *attributes, + gsize *ret_len); static char *vte_terminal_get_text_range_maybe_wrapped(VteTerminal *terminal, glong start_row, glong start_col, @@ -120,13 +134,15 @@ static char *vte_terminal_get_text_range_maybe_wrapped(VteTerminal *terminal, VteSelectionFunc is_selected, gpointer data, GArray *attributes, - gboolean include_trailing_spaces); + gboolean include_trailing_spaces, + gsize *ret_len); static char *vte_terminal_get_text_maybe_wrapped(VteTerminal *terminal, gboolean wrap, VteSelectionFunc is_selected, gpointer data, GArray *attributes, - gboolean include_trailing_spaces); + gboolean include_trailing_spaces, + gsize *ret_len); static void _vte_terminal_disconnect_pty_read(VteTerminal *terminal); static void _vte_terminal_disconnect_pty_write(VteTerminal *terminal); static void vte_terminal_stop_processing (VteTerminal *terminal); @@ -1150,13 +1166,24 @@ regex_match_clear_cursor (struct vte_match_regex *regex) } static void +regex_and_flags_clear(struct vte_regex_and_flags *regex) +{ + if (regex->mode == VTE_REGEX_PCRE2) { + vte_regex_unref(regex->pcre.regex); + regex->pcre.regex = NULL; + } else if (regex->mode == VTE_REGEX_GREGEX) { + g_regex_unref(regex->gregex.regex); + regex->gregex.regex = NULL; + } + regex->mode = VTE_REGEX_UNDECIDED; +} + +static void regex_match_clear (struct vte_match_regex *regex) { + regex_and_flags_clear(®ex->regex); regex_match_clear_cursor(regex); - g_regex_unref(regex->regex); - regex->regex = NULL; - regex->tag = -1; } @@ -1258,58 +1285,114 @@ vte_terminal_cursor_new(VteTerminal *terminal, GdkCursorType cursor_type) return cursor; } +static int +vte_terminal_match_add_internal(VteTerminal *terminal, + struct vte_match_regex *new_regex_match) +{ + VteTerminalPrivate *pvt = terminal->pvt; + struct vte_match_regex *regex_match; + guint ret, len; + + /* Search for a hole. */ + len = pvt->match_regexes->len; + for (ret = 0; ret < len; ret++) { + regex_match = &g_array_index(pvt->match_regexes, + struct vte_match_regex, + ret); + if (regex_match->tag == -1) { + break; + } + } + + /* Set the tag to the insertion point. */ + new_regex_match->tag = ret; + + if (ret < len) { + /* Overwrite. */ + g_array_index(pvt->match_regexes, + struct vte_match_regex, + ret) = *new_regex_match; + } else { + /* Append. */ + g_array_append_vals(pvt->match_regexes, new_regex_match, 1); + } + + return ret; +} + /** * vte_terminal_match_add_gregex: * @terminal: a #VteTerminal - * @regex: a #GRegex - * @flags: the #GRegexMatchFlags to use when matching the regex + * @gregex: a #GRegex + * @gflags: the #GRegexMatchFlags to use when matching the regex + * + * Adds the regular expression @regex to the list of matching expressions. When the + * user moves the mouse cursor over a section of displayed text which matches + * this expression, the text will be highlighted. + * + * Returns: an integer associated with this expression, or -1 if @gregex could not be + * transformed into a #VteRegex or @flags were incompatible + * + * Deprecated: 0.44: Use vte_terminal_match_add_regex() or vte_terminal_match_add_regex_full() instead. + */ +int +vte_terminal_match_add_gregex(VteTerminal *terminal, + GRegex *gregex, + GRegexMatchFlags gflags) +{ + struct vte_match_regex new_regex_match; + + g_return_val_if_fail(VTE_IS_TERMINAL(terminal), -1); + g_return_val_if_fail(gregex != NULL, -1); + + /* Can't mix GRegex and PCRE2 */ + g_return_val_if_fail(terminal->pvt->match_regex_mode != VTE_REGEX_PCRE2, -1); + terminal->pvt->match_regex_mode = VTE_REGEX_GREGEX; + + new_regex_match.regex.mode = VTE_REGEX_GREGEX; + new_regex_match.regex.gregex.regex = g_regex_ref(gregex); + new_regex_match.regex.gregex.match_flags = gflags; + new_regex_match.cursor_mode = VTE_REGEX_CURSOR_GDKCURSORTYPE; + new_regex_match.cursor.cursor_type = VTE_DEFAULT_CURSOR; + + return vte_terminal_match_add_internal(terminal, &new_regex_match); +} + +/** + * vte_terminal_match_add_regex: + * @terminal: a #VteTerminal + * @regex: (transfer none): a #VteRegex + * @flags: PCRE2 match flags, or 0 * * Adds the regular expression @regex to the list of matching expressions. When the * user moves the mouse cursor over a section of displayed text which matches * this expression, the text will be highlighted. * * Returns: an integer associated with this expression + * + * Since: 0.44 */ int -vte_terminal_match_add_gregex(VteTerminal *terminal, GRegex *regex, GRegexMatchFlags flags) +vte_terminal_match_add_regex(VteTerminal *terminal, + VteRegex *regex, + guint32 flags) { - VteTerminalPrivate *pvt; - struct vte_match_regex new_regex_match, *regex_match; - guint ret, len; + struct vte_match_regex new_regex_match; g_return_val_if_fail(VTE_IS_TERMINAL(terminal), -1); g_return_val_if_fail(regex != NULL, -1); - pvt = terminal->pvt; + /* Can't mix GRegex and PCRE2 */ + g_return_val_if_fail(terminal->pvt->match_regex_mode != VTE_REGEX_GREGEX, -1); + terminal->pvt->match_regex_mode = VTE_REGEX_PCRE2; - /* Search for a hole. */ - len = pvt->match_regexes->len; - for (ret = 0; ret < len; ret++) { - regex_match = &g_array_index(pvt->match_regexes, - struct vte_match_regex, - ret); - if (regex_match->tag == -1) { - break; - } - } - - /* Set the tag to the insertion point. */ - new_regex_match.regex = g_regex_ref(regex); - new_regex_match.match_flags = flags; - new_regex_match.tag = ret; + new_regex_match.regex.mode = VTE_REGEX_PCRE2; + new_regex_match.regex.pcre.regex = vte_regex_ref(regex); + new_regex_match.regex.pcre.match_flags = flags; new_regex_match.cursor_mode = VTE_REGEX_CURSOR_GDKCURSORTYPE; new_regex_match.cursor.cursor_type = VTE_DEFAULT_CURSOR; - if (ret < pvt->match_regexes->len) { - /* Overwrite. */ - g_array_index(pvt->match_regexes, - struct vte_match_regex, - ret) = new_regex_match; - } else { - /* Append. */ - g_array_append_val(pvt->match_regexes, new_regex_match); - } - return new_regex_match.tag; + return vte_terminal_match_add_internal(terminal, &new_regex_match); } /** @@ -1390,10 +1473,307 @@ vte_terminal_match_set_cursor_name(VteTerminal *terminal, vte_terminal_match_hilite_clear(terminal); } +#ifdef WITH_PCRE2 + +/* creates a pcre match context with appropriate limits */ +static pcre2_match_context_8 * +create_match_context(void) +{ + pcre2_match_context_8 *match_context; + + match_context = pcre2_match_context_create_8(NULL /* general context */); + pcre2_set_match_limit_8(match_context, 65536); /* should be plenty */ + pcre2_set_recursion_limit_8(match_context, 64); /* should be plenty */ + + return match_context; +} + /* Check if a given cell on the screen contains part of a matched string. If * it does, return the string, and store the match tag in the optional tag * argument. */ static char * +vte_terminal_match_check_internal_pcre(VteTerminal *terminal, + glong column, + glong row, + int *tag, + int *start, + int *end) +{ + guint i; + struct vte_match_regex *regex = NULL; + struct _VteCharAttributes *attr = NULL; + gssize line_length, offset, sattr, eattr, start_blank, end_blank, position; + gchar *line, eol; + pcre2_match_data_8 *match_data; + pcre2_match_context_8 *match_context; + gsize *ovector; + + _vte_debug_print(VTE_DEBUG_REGEX, + "Checking for pcre match at (%ld,%ld).\n", row, column); + + /* Identical with vte_terminal_match_check_internal_gregex until END */ + if (tag != NULL) { + *tag = -1; + } + if (start != NULL) { + *start = 0; + } + if (end != NULL) { + *end = 0; + } + /* Map the pointer position to a portion of the string. */ + eattr = terminal->pvt->match_attributes->len; + for (offset = eattr; offset--; ) { + attr = &g_array_index(terminal->pvt->match_attributes, + struct _VteCharAttributes, + offset); + if (row < attr->row) { + eattr = offset; + } + if (row == attr->row && + column == attr->column && + terminal->pvt->match_contents[offset] != ' ') { + break; + } + } + + _VTE_DEBUG_IF(VTE_DEBUG_REGEX) { + if (offset < 0) + g_printerr("Cursor is not on a character.\n"); + else { + gunichar c; + char utf[7]; + c = g_utf8_get_char (terminal->pvt->match_contents + offset); + utf[g_unichar_to_utf8(g_unichar_isprint(c) ? c : 0xFFFD, utf)] = 0; + + g_printerr("Cursor is on character U+%04X '%s' at %d.\n", + c, utf, offset); + } + } + + /* If the pointer isn't on a matchable character, bug out. */ + if (offset < 0) { + return NULL; + } + + /* If the pointer is on a newline, bug out. */ + if ((g_ascii_isspace(terminal->pvt->match_contents[offset])) || + (terminal->pvt->match_contents[offset] == '\0')) { + _vte_debug_print(VTE_DEBUG_EVENTS, + "Cursor is on whitespace.\n"); + return NULL; + } + + /* Snip off any final newlines. */ + while (terminal->pvt->match_contents[eattr] == '\n' || + terminal->pvt->match_contents[eattr] == '\0') { + eattr--; + } + /* and scan forwards to find the end of this line */ + while (!(terminal->pvt->match_contents[eattr] == '\n' || + terminal->pvt->match_contents[eattr] == '\0')) { + eattr++; + } + + /* find the start of row */ + if (row == 0) { + sattr = 0; + } else { + for (sattr = offset; sattr > 0; sattr--) { + attr = &g_array_index(terminal->pvt->match_attributes, + struct _VteCharAttributes, + sattr); + if (row > attr->row) { + break; + } + } + } + /* Scan backwards to find the start of this line */ + while (sattr > 0 && + ! (terminal->pvt->match_contents[sattr] == '\n' || + terminal->pvt->match_contents[sattr] == '\0')) { + sattr--; + } + /* and skip any initial newlines. */ + while (terminal->pvt->match_contents[sattr] == '\n' || + terminal->pvt->match_contents[sattr] == '\0') { + sattr++; + } + if (eattr <= sattr) { /* blank line */ + return NULL; + } + if (eattr <= offset || sattr > offset) { + /* nothing to match on this line */ + return NULL; + } + offset -= sattr; + eattr -= sattr; + + /* END identical */ + + line = terminal->pvt->match_contents + sattr; + line_length = eattr; + + /* temporarily shorten the contents to this row */ + // FIXME obsolete + eol = line[eattr]; + line[eattr] = '\0'; + + start_blank = 0; + end_blank = eattr; + + // _vte_debug_print(VTE_DEBUG_REGEX, "Cursor offset: %" G_GSSIZE_FORMAT " in line with length %" G_GSSIZE_FORMAT "): %*s\n", + // offset, line_length, -(int)line_length, line); + + match_context = create_match_context(); + match_data = pcre2_match_data_create_8(256 /* should be plenty */, NULL /* general context */); + + /* Now iterate over each regex we need to match against. */ + for (i = 0; i < terminal->pvt->match_regexes->len; i++) { + int (* match_fn) (const pcre2_code_8 *, + PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE, uint32_t, + pcre2_match_data_8 *, pcre2_match_context_8 *); + int r = 0; + + regex = &g_array_index(terminal->pvt->match_regexes, + struct vte_match_regex, + i); + /* Skip holes. */ + if (regex->tag < 0) { + continue; + } + + g_assert_cmpint(regex->regex.mode, ==, VTE_REGEX_PCRE2); + + if (_vte_regex_get_jited(regex->regex.pcre.regex)) + match_fn = pcre2_jit_match_8; + else + match_fn = pcre2_match_8; + + /* We'll only match the first item in the buffer which + * matches, so we'll have to skip each match until we + * stop getting matches. */ + + position = 0; + while (position < line_length && + ((r = match_fn(vte_regex_get_pcre(regex->regex.pcre.regex), + (PCRE2_SPTR8)line, line_length , /* subject, length */ + position, /* start offset */ + regex->regex.pcre.match_flags | + PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY | PCRE2_PARTIAL_SOFT /* FIXME: HARD? */, + match_data, + match_context)) >= 0 || r == PCRE2_ERROR_PARTIAL)) { + gsize ko = offset; + gsize rm_so, rm_eo; + gssize sblank=G_MINSSIZE, eblank=G_MAXSSIZE; + + ovector = pcre2_get_ovector_pointer_8(match_data); + rm_so = ovector[0]; + rm_eo = ovector[1]; + if (G_UNLIKELY(rm_so == PCRE2_UNSET || rm_eo == PCRE2_UNSET)) + break; + + /* The offsets should be "sane". We set NOTEMPTY, but check anyway */ + if (G_UNLIKELY(rm_so == rm_eo)) { + /* rm_so is before the end of subject string's length, so this is safe */ + position = g_utf8_next_char(line + rm_eo) - line; + continue; + } + + _VTE_DEBUG_IF(VTE_DEBUG_REGEX) { + gchar *match; + struct _VteCharAttributes *_sattr, *_eattr; + match = g_strndup(line + rm_so, rm_eo - rm_so); + _sattr = &g_array_index(terminal->pvt->match_attributes, + struct _VteCharAttributes, + rm_so); + _eattr = &g_array_index(terminal->pvt->match_attributes, + struct _VteCharAttributes, + rm_eo - 1); + g_printerr("%s match `%s' from %d(%ld,%ld) to %d(%ld,%ld) (%d).\n", + r == PCRE2_ERROR_PARTIAL ? "Partial":"Full", + match, + rm_so, + _sattr->column, + _sattr->row, + rm_eo - 1, + _eattr->column, + _eattr->row, + offset); + g_free(match); + } + + /* advance position */ + position = rm_eo; + + /* FIXME: do handle newline / partial matches at end of line/start of next line */ + if (r == PCRE2_ERROR_PARTIAL) + continue; + + /* If the pointer is in this substring, then we're done. */ + if (ko >= rm_so && ko < rm_eo) { + gchar *result; + if (tag != NULL) { + *tag = regex->tag; + } + if (start != NULL) { + *start = sattr + rm_so; + } + if (end != NULL) { + *end = sattr + rm_eo - 1; + } + + result = g_strndup(line + rm_so, rm_eo - rm_so); + + vte_terminal_set_cursor_from_regex_match(terminal, regex); + + // FIXME obsolete + line[eattr] = eol; + + pcre2_match_data_free_8(match_data); + pcre2_match_context_free_8(match_context); + return result; + + } + + if (ko > rm_eo - 1 && rm_eo > sblank) { + sblank = rm_eo; + } + if (ko < rm_so && rm_so < eblank) { + eblank = rm_so; + } + + if (sblank > start_blank) { + start_blank = sblank; + } + if (eblank < end_blank) { + end_blank = eblank; + } + } + + if (G_UNLIKELY(r < PCRE2_ERROR_PARTIAL)) + _vte_debug_print(VTE_DEBUG_REGEX, "Unexpected pcre2_match error code: %d\n", r); + } + + pcre2_match_data_free_8(match_data); + pcre2_match_context_free_8(match_context); + + // FIXME obsolete + line[eattr] = eol; + + // FIXME: WTF is this doing and why? + if (start != NULL) { + *start = sattr + start_blank; + } + if (end != NULL) { + *end = sattr + end_blank - 1; + } + return NULL; +} + +#endif /* WITH_PCRE2 */ + +static char * vte_terminal_match_check_internal_gregex(VteTerminal *terminal, long column, glong row, int *tag, int *start, int *end) @@ -1403,12 +1783,14 @@ vte_terminal_match_check_internal_gregex(VteTerminal *terminal, int offset; struct vte_match_regex *regex = NULL; struct _VteCharAttributes *attr = NULL; - gssize sattr, eattr; + gssize line_length, sattr, eattr; gchar *line, eol; GMatchInfo *match_info; - _vte_debug_print(VTE_DEBUG_EVENTS, + _vte_debug_print(VTE_DEBUG_REGEX, "Checking for gregex match at (%ld,%ld).\n", row, column); + + /* Identical with vte_terminal_match_check_internal_pcre until END */ if (tag != NULL) { *tag = -1; } @@ -1434,13 +1816,18 @@ vte_terminal_match_check_internal_gregex(VteTerminal *terminal, } } - _VTE_DEBUG_IF(VTE_DEBUG_EVENTS) { + _VTE_DEBUG_IF(VTE_DEBUG_REGEX) { if (offset < 0) g_printerr("Cursor is not on a character.\n"); - else - g_printerr("Cursor is on character '%c' at %d.\n", - g_utf8_get_char (terminal->pvt->match_contents + offset), - offset); + else { + gunichar c; + char utf[7]; + c = g_utf8_get_char (terminal->pvt->match_contents + offset); + utf[g_unichar_to_utf8(g_unichar_isprint(c) ? c : 0xFFFD, utf)] = 0; + + g_printerr("Cursor is on character U+%04X '%s' at %d.\n", + c, utf, offset); + } } /* If the pointer isn't on a matchable character, bug out. */ @@ -1451,7 +1838,7 @@ vte_terminal_match_check_internal_gregex(VteTerminal *terminal, /* If the pointer is on a newline, bug out. */ if ((g_ascii_isspace(terminal->pvt->match_contents[offset])) || (terminal->pvt->match_contents[offset] == '\0')) { - _vte_debug_print(VTE_DEBUG_EVENTS, + _vte_debug_print(VTE_DEBUG_REGEX, "Cursor is on whitespace.\n"); return NULL; } @@ -1501,14 +1888,21 @@ vte_terminal_match_check_internal_gregex(VteTerminal *terminal, offset -= sattr; eattr -= sattr; + /* END identical */ + /* temporarily shorten the contents to this row */ line = terminal->pvt->match_contents + sattr; + line_length = eattr; + eol = line[eattr]; line[eattr] = '\0'; start_blank = 0; end_blank = eattr; + // _vte_debug_print(VTE_DEBUG_REGEX, "Cursor offset: %" G_GSSIZE_FORMAT " in line with length %" G_GSSIZE_FORMAT "): %*s\n", + // offset, line_length, -(int)line_length, line); + /* Now iterate over each regex we need to match against. */ for (i = 0; i < terminal->pvt->match_regexes->len; i++) { regex = &g_array_index(terminal->pvt->match_regexes, @@ -1518,12 +1912,15 @@ vte_terminal_match_check_internal_gregex(VteTerminal *terminal, if (regex->tag < 0) { continue; } + + g_assert_cmpint(regex->regex.mode, ==, VTE_REGEX_GREGEX); + /* We'll only match the first item in the buffer which * matches, so we'll have to skip each match until we * stop getting matches. */ - if (!g_regex_match_full(regex->regex, + if (!g_regex_match_full(regex->regex.gregex.regex, line, -1, 0, - regex->match_flags, + regex->regex.gregex.match_flags, &match_info, NULL)) { g_match_info_free(match_info); @@ -1539,7 +1936,7 @@ vte_terminal_match_check_internal_gregex(VteTerminal *terminal, /* The offsets should be "sane". */ g_assert(rm_so < eattr); g_assert(rm_eo <= eattr); - _VTE_DEBUG_IF(VTE_DEBUG_MISC) { + _VTE_DEBUG_IF(VTE_DEBUG_REGEX) { gchar *match; struct _VteCharAttributes *_sattr, *_eattr; match = g_strndup(line + rm_so, rm_eo - rm_so); @@ -1618,11 +2015,20 @@ vte_terminal_match_check_internal(VteTerminal *terminal, long column, glong row, int *tag, int *start, int *end) { - if (terminal->pvt->match_contents == NULL) { + VteTerminalPrivate *pvt = terminal->pvt; + + if (pvt->match_contents == NULL) { vte_terminal_match_contents_refresh(terminal); } - return vte_terminal_match_check_internal_gregex(terminal, column, row, tag, start, end); +#ifdef WITH_PCRE2 + if (G_LIKELY(pvt->match_regex_mode == VTE_REGEX_PCRE2)) + return vte_terminal_match_check_internal_pcre(terminal, column, row, tag, start, end); +#endif + if (pvt->match_regex_mode == VTE_REGEX_GREGEX) + return vte_terminal_match_check_internal_gregex(terminal, column, row, tag, start, end); + + return NULL; } static gboolean @@ -1674,7 +2080,7 @@ vte_terminal_match_check(VteTerminal *terminal, glong column, glong row, char *ret; g_return_val_if_fail(VTE_IS_TERMINAL(terminal), NULL); delta = terminal->pvt->screen->scroll_delta; - _vte_debug_print(VTE_DEBUG_EVENTS, + _vte_debug_print(VTE_DEBUG_EVENTS | VTE_DEBUG_REGEX, "Checking for match at (%ld,%ld).\n", row, column); if (rowcol_inside_match (terminal, row + delta, column)) { @@ -1689,7 +2095,7 @@ vte_terminal_match_check(VteTerminal *terminal, glong column, glong row, column, row + delta, tag, NULL, NULL); } - _VTE_DEBUG_IF(VTE_DEBUG_EVENTS) { + _VTE_DEBUG_IF(VTE_DEBUG_EVENTS | VTE_DEBUG_REGEX) { if (ret != NULL) g_printerr("Matched `%s'.\n", ret); } return ret; @@ -3554,6 +3960,12 @@ vte_get_features (void) #else "-GNUTLS" #endif + " " +#ifdef WITH_PCRE2 + "+PCRE2" +#else + "-PCRE2" +#endif ; } @@ -6122,6 +6534,24 @@ vte_terminal_get_text_range(VteTerminal *terminal, GArray *attributes) { g_return_val_if_fail(VTE_IS_TERMINAL(terminal), NULL); + return vte_terminal_get_text_range_full(terminal, + start_row, start_col, + end_row, end_col, + is_selected, user_data, + attributes, + NULL); +} + +static char * +vte_terminal_get_text_range_full(VteTerminal *terminal, + glong start_row, glong start_col, + glong end_row, glong end_col, + VteSelectionFunc is_selected, + gpointer user_data, + GArray *attributes, + gsize *ret_len) +{ + g_return_val_if_fail(VTE_IS_TERMINAL(terminal), NULL); return vte_terminal_get_text_range_maybe_wrapped(terminal, start_row, start_col, end_row, end_col, @@ -6129,7 +6559,8 @@ vte_terminal_get_text_range(VteTerminal *terminal, is_selected, user_data, attributes, - FALSE); + FALSE, + ret_len); } static char * @@ -6140,7 +6571,8 @@ vte_terminal_get_text_range_maybe_wrapped(VteTerminal *terminal, VteSelectionFunc is_selected, gpointer data, GArray *attributes, - gboolean include_trailing_spaces) + gboolean include_trailing_spaces, + gsize *ret_len) { glong col, row, last_empty, last_emptycol, last_nonempty, last_nonemptycol; const VteCell *pcell = NULL; @@ -6265,6 +6697,8 @@ vte_terminal_get_text_range_maybe_wrapped(VteTerminal *terminal, } /* Sanity check. */ g_assert(attributes == NULL || string->len == attributes->len); + if (ret_len) + *ret_len = string->len; return g_string_free(string, FALSE); } @@ -6274,7 +6708,8 @@ vte_terminal_get_text_maybe_wrapped(VteTerminal *terminal, VteSelectionFunc is_selected, gpointer data, GArray *attributes, - gboolean include_trailing_spaces) + gboolean include_trailing_spaces, + gsize *ret_len) { long start_row, start_col, end_row, end_col; start_row = terminal->pvt->screen->scroll_delta; @@ -6288,7 +6723,8 @@ vte_terminal_get_text_maybe_wrapped(VteTerminal *terminal, is_selected, data, attributes, - include_trailing_spaces); + include_trailing_spaces, + ret_len); } /** @@ -6318,7 +6754,8 @@ vte_terminal_get_text(VteTerminal *terminal, is_selected, user_data, attributes, - FALSE); + FALSE, + NULL); } /** @@ -6350,7 +6787,8 @@ vte_terminal_get_text_include_trailing_spaces(VteTerminal *terminal, is_selected, user_data, attributes, - TRUE); + TRUE, + NULL); } /* @@ -8475,11 +8913,15 @@ vte_terminal_init(VteTerminal *terminal) _vte_terminal_save_cursor(terminal, &terminal->pvt->alternate_screen); /* Matching data. */ + pvt->match_regex_mode = VTE_REGEX_UNDECIDED; pvt->match_regexes = g_array_new(FALSE, TRUE, sizeof(struct vte_match_regex)); pvt->match_tag = -1; vte_terminal_match_hilite_clear(terminal); + /* Search data */ + pvt->search_regex.mode = VTE_REGEX_UNDECIDED; + /* Rendering data */ pvt->draw = _vte_draw_new(); @@ -8825,8 +9267,7 @@ vte_terminal_finalize(GObject *object) g_array_free(terminal->pvt->match_regexes, TRUE); } - if (terminal->pvt->search_regex) - g_regex_unref (terminal->pvt->search_regex); + regex_and_flags_clear(&terminal->pvt->search_regex); if (terminal->pvt->search_attrs) g_array_free (terminal->pvt->search_attrs, TRUE); @@ -13217,31 +13658,94 @@ vte_terminal_write_contents_sync (VteTerminal *terminal, /* TODO Add properties & signals */ /** + * vte_terminal_search_set_regex: + * @terminal: a #VteTerminal + * @regex: (allow-none): a #VteRegex, or %NULL + * @flags: PCRE2 match flags, or 0 + * + * Sets the regex to search for. Unsets the search regex when passed %NULL. + * + * Since: 0.44 + */ +void +vte_terminal_search_set_regex (VteTerminal *terminal, + VteRegex *regex, + guint32 flags) +{ + struct vte_regex_and_flags *search_regex; + + g_return_if_fail(VTE_IS_TERMINAL(terminal)); + + search_regex = &terminal->pvt->search_regex; + + if (search_regex->mode == VTE_REGEX_PCRE2 && + search_regex->pcre.regex == regex && + search_regex->pcre.match_flags == flags) + return; + + regex_and_flags_clear(search_regex); + + if (regex != NULL) { + search_regex->mode = VTE_REGEX_PCRE2; + search_regex->pcre.regex = vte_regex_ref(regex); + search_regex->pcre.match_flags = flags; + } + + _vte_invalidate_all (terminal); +} + +/** + * vte_terminal_search_get_regex: + * @terminal: a #VteTerminal + * + * Returns: (transfer none): the search #VteRegex regex set in @terminal, or %NULL + * + * Since: 0.44 + */ +VteRegex * +vte_terminal_search_get_regex(VteTerminal *terminal) +{ + g_return_val_if_fail(VTE_IS_TERMINAL(terminal), NULL); + + if (G_LIKELY(terminal->pvt->search_regex.mode == VTE_REGEX_PCRE2)) + return terminal->pvt->search_regex.pcre.regex; + else + return NULL; +} + +/** * vte_terminal_search_set_gregex: * @terminal: a #VteTerminal - * @regex: (allow-none): a #GRegex, or %NULL - * @flags: flags from #GRegexMatchFlags + * @gregex: (allow-none): a #GRegex, or %NULL + * @gflags: flags from #GRegexMatchFlags * * Sets the #GRegex regex to search for. Unsets the search regex when passed %NULL. + * + * Deprecated: 0.44: use vte_terminal_search_set_regex() instead. */ void vte_terminal_search_set_gregex (VteTerminal *terminal, - GRegex *regex, - GRegexMatchFlags flags) + GRegex *gregex, + GRegexMatchFlags gflags) { + struct vte_regex_and_flags *search_regex; + g_return_if_fail(VTE_IS_TERMINAL(terminal)); - if (terminal->pvt->search_regex == regex) - return; + search_regex = &terminal->pvt->search_regex; - if (terminal->pvt->search_regex) { - g_regex_unref (terminal->pvt->search_regex); - terminal->pvt->search_regex = NULL; - } + if (search_regex->mode == VTE_REGEX_GREGEX && + search_regex->gregex.regex == gregex && + search_regex->gregex.match_flags == gflags) + return; - if (regex) - terminal->pvt->search_regex = g_regex_ref (regex); - terminal->pvt->search_match_flags = flags; + regex_and_flags_clear(search_regex); + + if (gregex != NULL) { + search_regex->mode = VTE_REGEX_PCRE2; + search_regex->gregex.regex = g_regex_ref(gregex); + search_regex->gregex.match_flags = gflags; + } _vte_invalidate_all (terminal); } @@ -13251,13 +13755,18 @@ vte_terminal_search_set_gregex (VteTerminal *terminal, * @terminal: a #VteTerminal * * Returns: (transfer none): the search #GRegex regex set in @terminal, or %NULL + * + * Deprecated: 0.44: use vte_terminal_search_get_regex() instead. */ GRegex * vte_terminal_search_get_gregex (VteTerminal *terminal) { g_return_val_if_fail(VTE_IS_TERMINAL(terminal), NULL); - return terminal->pvt->search_regex; + if (G_LIKELY(terminal->pvt->search_regex.mode == VTE_REGEX_GREGEX)) + return terminal->pvt->search_regex.gregex.regex; + else + return NULL; } /** @@ -13292,15 +13801,18 @@ vte_terminal_search_get_wrap_around (VteTerminal *terminal) } static gboolean -vte_terminal_search_rows (VteTerminal *terminal, - long start_row, - long end_row, - gboolean backward) +vte_terminal_search_rows(VteTerminal *terminal, +#ifdef WITH_PCRE2 + pcre2_match_context_8 *match_context, + pcre2_match_data_8 *match_data, +#endif + long start_row, + long end_row, + gboolean backward) { VteTerminalPrivate *pvt; char *row_text; - GMatchInfo *match_info; - GError *error = NULL; + gsize row_text_length; int start, end; long start_col, end_col; gchar *word; @@ -13310,26 +13822,75 @@ vte_terminal_search_rows (VteTerminal *terminal, pvt = terminal->pvt; - row_text = vte_terminal_get_text_range (terminal, start_row, 0, end_row, -1, NULL, NULL, NULL); + row_text = vte_terminal_get_text_range_full (terminal, start_row, 0, end_row, -1, NULL, NULL, NULL, &row_text_length); + +#ifdef WITH_PCRE2 + if (G_LIKELY(pvt->search_regex.mode == VTE_REGEX_PCRE2)) { + int (* match_fn) (const pcre2_code_8 *, + PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE, uint32_t, + pcre2_match_data_8 *, pcre2_match_context_8 *); + gsize *ovector, so, eo; + int r; + + if (_vte_regex_get_jited(pvt->search_regex.pcre.regex)) + match_fn = pcre2_jit_match_8; + else + match_fn = pcre2_match_8; + + r = match_fn(vte_regex_get_pcre(pvt->search_regex.pcre.regex), + (PCRE2_SPTR8)row_text, row_text_length , /* subject, length */ + 0, /* start offset */ + pvt->search_regex.pcre.match_flags | + PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY | PCRE2_PARTIAL_SOFT /* FIXME: HARD? */, + match_data, + match_context); + + if (r == PCRE2_ERROR_NOMATCH) + return FALSE; + // FIXME: handle partial matches (PCRE2_ERROR_PARTIAL) + if (r < 0) + return FALSE; + + ovector = pcre2_get_ovector_pointer_8(match_data); + so = ovector[0]; + eo = ovector[1]; + if (G_UNLIKELY(so == PCRE2_UNSET || eo == PCRE2_UNSET)) + return FALSE; + + start = so; + end = eo; + word = g_strndup(row_text, end - start); + } else +#endif /* WITH_PCRE2 */ + { + GMatchInfo *match_info; + GError *error = NULL; - g_regex_match_full (pvt->search_regex, row_text, -1, 0, - (GRegexMatchFlags)(pvt->search_match_flags | G_REGEX_MATCH_NOTEMPTY), - &match_info, &error); - if (error) { - g_printerr ("Error while matching: %s\n", error->message); - g_error_free (error); - g_match_info_free (match_info); - g_free (row_text); - return TRUE; - } + g_assert_cmpuint(pvt->search_regex.mode, ==, VTE_REGEX_GREGEX); + + g_regex_match_full (pvt->search_regex.gregex.regex, row_text, row_text_length, 0, + (GRegexMatchFlags)(pvt->search_regex.gregex.match_flags | G_REGEX_MATCH_NOTEMPTY), + &match_info, &error); + if (error) { + g_printerr ("Error while matching: %s\n", error->message); + g_error_free (error); + g_match_info_free (match_info); + g_free (row_text); + return TRUE; + } - if (!g_match_info_matches (match_info)) { - g_match_info_free (match_info); - g_free (row_text); - return FALSE; - } + if (!g_match_info_matches (match_info)) { + g_match_info_free (match_info); + g_free (row_text); + return FALSE; + } - word = g_match_info_fetch (match_info, 0); + word = g_match_info_fetch (match_info, 0); + /* This gives us the offset in the buffer */ + g_match_info_fetch_pos (match_info, 0, &start, &end); + + g_match_info_free (match_info); + } /* Fetch text again, with attributes */ g_free (row_text); @@ -13338,9 +13899,6 @@ vte_terminal_search_rows (VteTerminal *terminal, attrs = pvt->search_attrs; row_text = vte_terminal_get_text_range (terminal, start_row, 0, end_row, -1, NULL, NULL, attrs); - /* This gives us the offset in the buffer */ - g_match_info_fetch_pos (match_info, 0, &start, &end); - ca = &g_array_index (attrs, VteCharAttributes, start); start_row = ca->row; start_col = ca->column; @@ -13350,7 +13908,6 @@ vte_terminal_search_rows (VteTerminal *terminal, g_free (word); g_free (row_text); - g_match_info_free (match_info); _vte_terminal_select_text (terminal, start_col, start_row, end_col, end_row, 0, 0); /* Quite possibly the math here should not access adjustment directly... */ @@ -13369,6 +13926,10 @@ vte_terminal_search_rows (VteTerminal *terminal, static gboolean vte_terminal_search_rows_iter (VteTerminal *terminal, +#ifdef WITH_PCRE2 + pcre2_match_context_8 *match_context, + pcre2_match_data_8 *match_data, +#endif long start_row, long end_row, gboolean backward) @@ -13386,7 +13947,11 @@ vte_terminal_search_rows_iter (VteTerminal *terminal, row = _vte_terminal_find_row_data (terminal, iter_start_row); } while (row && row->attr.soft_wrapped); - if (vte_terminal_search_rows (terminal, iter_start_row, iter_end_row, backward)) + if (vte_terminal_search_rows (terminal, +#ifdef WITH_PCRE2 + match_context, match_data, +#endif + iter_start_row, iter_end_row, backward)) return TRUE; } } else { @@ -13399,7 +13964,11 @@ vte_terminal_search_rows_iter (VteTerminal *terminal, iter_end_row++; } while (row && row->attr.soft_wrapped); - if (vte_terminal_search_rows (terminal, iter_start_row, iter_end_row, backward)) + if (vte_terminal_search_rows (terminal, +#ifdef WITH_PCRE2 + match_context, match_data, +#endif + iter_start_row, iter_end_row, backward)) return TRUE; } } @@ -13414,11 +13983,16 @@ vte_terminal_search_find (VteTerminal *terminal, VteTerminalPrivate *pvt; long buffer_start_row, buffer_end_row; long last_start_row, last_end_row; + gboolean match_found = TRUE; +#ifdef WITH_PCRE2 + pcre2_match_context_8 *match_context = NULL; + pcre2_match_data_8 *match_data = NULL; +#endif g_return_val_if_fail(VTE_IS_TERMINAL(terminal), FALSE); pvt = terminal->pvt; - if (!pvt->search_regex) + if (pvt->search_regex.mode == VTE_REGEX_UNDECIDED) return FALSE; /* TODO @@ -13426,6 +14000,13 @@ vte_terminal_search_find (VteTerminal *terminal, * Moreover, the whole search thing is implemented very inefficiently. */ +#ifdef WITH_PCRE2 + if (G_LIKELY(pvt->search_regex.mode == VTE_REGEX_PCRE2)) { + match_context = create_match_context(); + match_data = pcre2_match_data_create_8(256 /* should be plenty */, NULL /* general context */); + } +#endif + buffer_start_row = _vte_ring_delta (terminal->pvt->screen->row_data); buffer_end_row = _vte_ring_next (terminal->pvt->screen->row_data); @@ -13442,11 +14023,19 @@ vte_terminal_search_find (VteTerminal *terminal, /* If search fails, we make an empty selection at the last searched * position... */ if (backward) { - if (vte_terminal_search_rows_iter (terminal, buffer_start_row, last_start_row, backward)) - return TRUE; + if (vte_terminal_search_rows_iter (terminal, +#ifdef WITH_PCRE2 + match_context, match_data, +#endif + buffer_start_row, last_start_row, backward)) + goto found; if (pvt->search_wrap_around && - vte_terminal_search_rows_iter (terminal, last_end_row, buffer_end_row, backward)) - return TRUE; + vte_terminal_search_rows_iter (terminal, +#ifdef WITH_PCRE2 + match_context, match_data, +#endif + last_end_row, buffer_end_row, backward)) + goto found; if (pvt->has_selection) { if (pvt->search_wrap_around) _vte_terminal_select_empty_at (terminal, @@ -13457,12 +14046,21 @@ vte_terminal_search_find (VteTerminal *terminal, -1, buffer_start_row - 1); } + match_found = FALSE; } else { - if (vte_terminal_search_rows_iter (terminal, last_end_row, buffer_end_row, backward)) - return TRUE; + if (vte_terminal_search_rows_iter (terminal, +#ifdef WITH_PCRE2 + match_context, match_data, +#endif + last_end_row, buffer_end_row, backward)) + goto found; if (pvt->search_wrap_around && - vte_terminal_search_rows_iter (terminal, buffer_start_row, last_start_row, backward)) - return TRUE; + vte_terminal_search_rows_iter (terminal, +#ifdef WITH_PCRE2 + match_context, match_data, +#endif + buffer_start_row, last_start_row, backward)) + goto found; if (pvt->has_selection) { if (pvt->search_wrap_around) _vte_terminal_select_empty_at (terminal, @@ -13473,9 +14071,19 @@ vte_terminal_search_find (VteTerminal *terminal, -1, buffer_end_row); } + match_found = FALSE; } - return FALSE; + found: + +#ifdef WITH_PCRE2 + if (match_data) + pcre2_match_data_free_8(match_data); + if (match_context) + pcre2_match_context_free_8(match_context); +#endif + + return match_found; } /** diff --git a/src/vte/vte.h b/src/vte/vte.h index 6da222db..85d267b7 100644 --- a/src/vte/vte.h +++ b/src/vte/vte.h @@ -26,13 +26,14 @@ #include "vteenums.h" #include "vteglobals.h" #include "vtepty.h" +#include "vteregex.h" #include "vteterminal.h" #include "vtetypebuiltins.h" #include "vteversion.h" -#ifndef VTE_DISABLE_DEPRECATED +#if !defined(VTE_DISABLE_DEPRECATED) || defined(VTE_COMPILATION) #include "vtedeprecated.h" -#endif /* VTE_DISABLE_DEPRECATED */ +#endif /* !VTE_DISABLE_DEPRECATED */ #undef __VTE_VTE_H_INSIDE__ diff --git a/src/vte/vtedeprecated.h b/src/vte/vtedeprecated.h index 9c1f69b9..0d41e48d 100644 --- a/src/vte/vtedeprecated.h +++ b/src/vte/vtedeprecated.h @@ -32,10 +32,22 @@ G_BEGIN_DECLS _VTE_DEPRECATED +int vte_terminal_match_add_gregex(VteTerminal *terminal, + GRegex *gregex, + GRegexMatchFlags gflags) _VTE_GNUC_NONNULL(1) _VTE_GNUC_NONNULL(2); + +_VTE_DEPRECATED void vte_terminal_match_set_cursor(VteTerminal *terminal, int tag, GdkCursor *cursor) _VTE_GNUC_NONNULL(1); +_VTE_DEPRECATED +void vte_terminal_search_set_gregex (VteTerminal *terminal, + GRegex *gregex, + GRegexMatchFlags gflags) _VTE_GNUC_NONNULL(1); + +_VTE_DEPRECATED +GRegex *vte_terminal_search_get_gregex (VteTerminal *terminal) _VTE_GNUC_NONNULL(1); _VTE_DEPRECATED void vte_pty_close (VtePty *pty) _VTE_GNUC_NONNULL(1); diff --git a/src/vte/vteregex.h b/src/vte/vteregex.h new file mode 100644 index 00000000..a248e9e3 --- /dev/null +++ b/src/vte/vteregex.h @@ -0,0 +1,77 @@ +/* + * Copyright © 2015 Christian Persch + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __VTE_VTE_REGEX_H__ +#define __VTE_VTE_REGEX_H__ + +#if !defined (__VTE_VTE_H_INSIDE__) && !defined (VTE_COMPILATION) +#error "Only <vte/vte.h> can be included directly." +#endif + +#include <glib.h> +#include <glib-object.h> + +G_BEGIN_DECLS + +#if !defined(_PCRE2_H) && !defined(__GI_SCANNER__) +typedef struct pcre2_real_code_8 pcre2_code_8; +#endif /* !_PCRE2_H */ + +typedef struct _VteRegex VteRegex; + +#define VTE_TYPE_REGEX (vte_regex_get_type()) +GType vte_regex_get_type (void); + +#define VTE_REGEX_ERROR (vte_regex_error_quark()) +GQuark vte_regex_error_quark (void); + +/* This is PCRE2_NO_UTF_CHECK | PCRE2_UTF */ +#define VTE_REGEX_FLAGS_DEFAULT (0x00080000u | 0x40000000u) + +typedef enum { + /* Negative values are PCRE2 errors */ + + /* VTE specific values */ + VTE_REGEX_ERROR_INCOMPATIBLE = G_MAXINT-1, + VTE_REGEX_ERROR_NOT_SUPPORTED = G_MAXINT +} VteRegexError; + +VteRegex *vte_regex_ref (VteRegex *regex) _VTE_GNUC_NONNULL(1); + +VteRegex *vte_regex_unref (VteRegex *regex) _VTE_GNUC_NONNULL(1); + +VteRegex *vte_regex_new (const char *pattern, + gssize pattern_length, + guint32 flags, + GError **error) _VTE_GNUC_NONNULL(1); + +gboolean vte_regex_jit (VteRegex *regex, + guint32 flags, + GError **error) _VTE_GNUC_NONNULL(1); + +#ifndef __GI_SCANNER__ + +VteRegex *vte_regex_new_pcre (pcre2_code_8 *code, + GError **error) _VTE_GNUC_NONNULL(1); + +const pcre2_code_8 *vte_regex_get_pcre (VteRegex *regex) _VTE_GNUC_NONNULL(1); + +#endif + +G_END_DECLS + +#endif /* __VTE_VTE_REGEX_H__ */ diff --git a/src/vte/vteterminal.h b/src/vte/vteterminal.h index dbe3ffed..fae0f23f 100644 --- a/src/vte/vteterminal.h +++ b/src/vte/vteterminal.h @@ -31,6 +31,7 @@ #include "vteenums.h" #include "vtemacros.h" #include "vtepty.h" +#include "vteregex.h" #if defined(VTE_COMPILATION) && defined(__cplusplus) class VteTerminalPrivate; @@ -289,9 +290,9 @@ void vte_terminal_get_cursor_position(VteTerminal *terminal, /* Add a matching expression, returning the tag the widget assigns to that * expression. */ -int vte_terminal_match_add_gregex(VteTerminal *terminal, - GRegex *regex, - GRegexMatchFlags flags) _VTE_GNUC_NONNULL(1); +int vte_terminal_match_add_regex(VteTerminal *terminal, + VteRegex *regex, + guint32 flags) _VTE_GNUC_NONNULL(1) _VTE_GNUC_NONNULL(2); /* Set the cursor to be used when the pointer is over a given match. */ void vte_terminal_match_set_cursor_type(VteTerminal *terminal, int tag, @@ -313,10 +314,10 @@ char *vte_terminal_match_check_event(VteTerminal *terminal, GdkEvent *event, int *tag) _VTE_GNUC_NONNULL(1) _VTE_GNUC_NONNULL(2) G_GNUC_MALLOC; -void vte_terminal_search_set_gregex (VteTerminal *terminal, - GRegex *regex, - GRegexMatchFlags flags) _VTE_GNUC_NONNULL(1); -GRegex *vte_terminal_search_get_gregex (VteTerminal *terminal) _VTE_GNUC_NONNULL(1); +void vte_terminal_search_set_regex (VteTerminal *terminal, + VteRegex *regex, + guint32 flags) _VTE_GNUC_NONNULL(1); +VteRegex *vte_terminal_search_get_regex (VteTerminal *terminal) _VTE_GNUC_NONNULL(1); void vte_terminal_search_set_wrap_around (VteTerminal *terminal, gboolean wrap_around) _VTE_GNUC_NONNULL(1); gboolean vte_terminal_search_get_wrap_around (VteTerminal *terminal) _VTE_GNUC_NONNULL(1); diff --git a/src/vteapp.c b/src/vteapp.c index a5f79782..dfc06044 100644 --- a/src/vteapp.c +++ b/src/vteapp.c @@ -33,6 +33,10 @@ #undef VTE_DISABLE_DEPRECATED #include <vte/vte.h> +#ifdef WITH_PCRE2 +#include "vtepcre2.h" +#endif + #include <glib/gi18n.h> #define DINGUS1 "(((gopher|news|telnet|nntp|file|http|ftp|https)://)|(www|ftp)[-A-Za-z0-9]*\\.)[-A-Za-z0-9\\.]+(:[0-9]*)?" @@ -525,24 +529,47 @@ parse_color (const gchar *value, static void add_dingus (VteTerminal *terminal, - char **dingus) + char **dingus, + gboolean use_gregex) { const GdkCursorType cursors[] = { GDK_GUMBY, GDK_HAND1 }; - GRegex *regex; - GError *error; int id, i; for (i = 0; dingus[i]; ++i) { - error = NULL; - if (!(regex = g_regex_new(dingus[i], G_REGEX_OPTIMIZE, 0, &error))) { + GRegex *gregex = NULL; + GError *error = NULL; +#ifdef WITH_PCRE2 + VteRegex *regex = NULL; + + if (!use_gregex) + regex = vte_regex_new(dingus[i], -1, + PCRE2_UTF | PCRE2_NO_UTF_CHECK, + &error); + else +#endif + gregex = g_regex_new(dingus[i], G_REGEX_OPTIMIZE, 0, &error); + + if (error) { g_warning("Failed to compile regex '%s': %s\n", dingus[i], error->message); g_error_free(error); continue; } - id = vte_terminal_match_add_gregex(terminal, regex, 0); - g_regex_unref (regex); +#ifdef WITH_PCRE2 + if (!use_gregex) + id = vte_terminal_match_add_regex(terminal, regex, 0); + else +#endif + id = vte_terminal_match_add_gregex(terminal, gregex, 0); + +#ifdef WITH_PCRE2 + if (regex) + vte_regex_unref(regex); +#endif + if (gregex) + g_regex_unref (gregex); + vte_terminal_match_set_cursor_type(terminal, id, cursors[i % G_N_ELEMENTS(cursors)]); } @@ -568,8 +595,8 @@ main(int argc, char **argv) console = FALSE, keep = FALSE, icon_title = FALSE, shell = TRUE, reverse = FALSE, use_geometry_hints = TRUE, - use_scrolled_window = FALSE, - show_object_notifications = FALSE, rewrap = TRUE; + use_scrolled_window = FALSE, use_gregex = FALSE, + show_object_notifications = FALSE, rewrap = TRUE; char *geometry = NULL; gint lines = -1; const char *message = "Launching interactive shell...\r\n"; @@ -603,6 +630,13 @@ main(int argc, char **argv) G_OPTION_ARG_STRING_ARRAY, &dingus, "Add regex highlight", NULL }, +#ifdef WITH_PCRE2 + { + "gregex", 0, 0, + G_OPTION_ARG_NONE, &use_gregex, + "Use GRegex instead of PCRE2", NULL + }, +#endif { "no-rewrap", 'R', G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &rewrap, @@ -976,10 +1010,10 @@ main(int argc, char **argv) /* Match "abcdefg". */ if (!no_builtin_dingus) { - add_dingus (terminal, (char **) builtin_dingus); + add_dingus (terminal, (char **) builtin_dingus, use_gregex); } if (dingus) { - add_dingus (terminal, dingus); + add_dingus (terminal, dingus, use_gregex); g_strfreev (dingus); } diff --git a/src/vteinternal.hh b/src/vteinternal.hh index 04d8104b..9175b8f5 100644 --- a/src/vteinternal.hh +++ b/src/vteinternal.hh @@ -21,6 +21,12 @@ #include <glib.h> typedef enum { + VTE_REGEX_UNDECIDED, + VTE_REGEX_PCRE2, + VTE_REGEX_GREGEX +} VteRegexMode; + +typedef enum { VTE_REGEX_CURSOR_GDKCURSOR, VTE_REGEX_CURSOR_GDKCURSORTYPE, VTE_REGEX_CURSOR_NAME @@ -36,11 +42,24 @@ typedef enum { MOUSE_TRACKING_ALL_MOTION_TRACKING } MouseTrackingMode; +struct vte_regex_and_flags { + VteRegexMode mode; + union { /* switched on @mode */ + struct { + VteRegex *regex; + guint32 match_flags; + } pcre; + struct { + GRegex *regex; + GRegexMatchFlags match_flags; + } gregex; + }; +}; + /* A match regex, with a tag. */ struct vte_match_regex { gint tag; - GRegex *regex; - GRegexMatchFlags match_flags; + struct vte_regex_and_flags regex; VteRegexCursorMode cursor_mode; union { GdkCursor *cursor; @@ -316,6 +335,7 @@ public: /* State variables for handling match checks. */ char *match_contents; GArray *match_attributes; + VteRegexMode match_regex_mode; GArray *match_regexes; char *match; int match_tag; @@ -323,8 +343,7 @@ public: gboolean show_match; /* Search data. */ - GRegex *search_regex; - GRegexMatchFlags search_match_flags; + struct vte_regex_and_flags search_regex; gboolean search_wrap_around; GArray *search_attrs; /* Cache attrs */ diff --git a/src/vtepcre2.h b/src/vtepcre2.h new file mode 100644 index 00000000..ddca5ff6 --- /dev/null +++ b/src/vtepcre2.h @@ -0,0 +1,27 @@ +/* + * Copyright © 2015 Christian Persch + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#define PCRE2_CODE_UNIT_WIDTH 0 +#include <pcre2.h> + +/* Assert compatibility of PCRE2 and GLib types */ +G_STATIC_ASSERT(sizeof(PCRE2_UCHAR8) == sizeof (guint8)); +G_STATIC_ASSERT(sizeof(PCRE2_SIZE) == sizeof (gsize)); +G_STATIC_ASSERT(PCRE2_UNSET == (gsize)-1); +G_STATIC_ASSERT(PCRE2_ZERO_TERMINATED == (gsize)-1); diff --git a/src/vteregex.cc b/src/vteregex.cc new file mode 100644 index 00000000..2e578f18 --- /dev/null +++ b/src/vteregex.cc @@ -0,0 +1,296 @@ +/* + * Copyright © 2015 Christian Persch + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * SECTION: vte-regex + * @short_description: Regex for matching and searching. Uses PCRE2 internally. + * + * Since: 0.44 + */ + +#include "config.h" + +#include "vtemacros.h" +#include "vteregex.h" +#include "vteregexinternal.hh" + +#ifdef WITH_PCRE2 +#include "vtepcre2.h" +#endif /* WITH_PCRE2 */ + +struct _VteRegex { + volatile int ref_count; +#ifdef WITH_PCRE2 + pcre2_code_8 *code; +#endif /* WITH_PCRE2 */ +}; + +#ifdef WITH_PCRE2 +#define DEFAULT_COMPILE_OPTIONS (PCRE2_UTF) +#define JIT_OPTIONS (PCRE2_JIT_COMPLETE) +#define DEFAULT_MATCH_OPTIONS (0) +#else +#define DEFAULT_COMPILE_OPTIONS (0 +#define JIT_OPTIONS (0) +#define DEFAULT_MATCH_OPTIONS (0) +#endif /* WITH_PCRE2 */ + +#ifdef WITH_PCRE2 + +static VteRegex * +regex_new(pcre2_code_8 *code) +{ + VteRegex *regex; + + regex = g_slice_new(VteRegex); + regex->ref_count = 1; + regex->code = code; + + return regex; +} + +static void +regex_free(VteRegex *regex) +{ + pcre2_code_free_8(regex->code); + g_slice_free(VteRegex, regex); +} + +static gboolean +set_gerror_from_pcre_error(int errcode, + GError **error) +{ + if (errcode < 0) { + PCRE2_UCHAR8 buf[128]; + int n; + + n = pcre2_get_error_message_8(errcode, buf, sizeof (buf)); + g_assert(n >= 0); + g_set_error_literal(error, VTE_REGEX_ERROR, errcode, (const char*)buf); + return FALSE; + } + + return TRUE; +} + +#else + +static void * +set_unsupported_error(GError **error) +{ + g_set_error_literal(error, VTE_REGEX_ERROR, VTE_REGEX_ERROR_NOT_SUPPORTED, + "PCRE2 not supported"); + return NULL; +} + +#endif /* WITH_PCRE2 */ + +G_DEFINE_BOXED_TYPE(VteRegex, vte_regex, + vte_regex_ref, (GBoxedFreeFunc)vte_regex_unref) + +G_DEFINE_QUARK(vte-regex-error, vte_regex_error) + +/** + * vte_regex_ref: + * @regex: (transfer none): a #VteRegex + * + * Increases the reference count of @regex by one. + * + * Returns: @regex + */ +VteRegex * +vte_regex_ref(VteRegex *regex) +{ + g_return_val_if_fail (regex, NULL); + +#ifdef WITH_PCRE2 + g_atomic_int_inc (®ex->ref_count); +#endif + return regex; +} + +/** + * vte_regex_ref: + * @regex: (transfer full): a #VteRegex + * + * Decreases the reference count of @regex by one, and frees @regex + * if the refcount reaches zero. + * + * Returns: %NULL + */ +VteRegex * +vte_regex_unref(VteRegex *regex) +{ + g_return_val_if_fail (regex, NULL); + +#ifdef WITH_PCRE2 + if (g_atomic_int_dec_and_test (®ex->ref_count)) + regex_free (regex); +#endif + return NULL; +} + +/** + * vte_regex_new: + * @pattern: a regex pattern string + * @pattern_length: the length of @pattern in bytes, or -1 if the + * string is NUL-terminated and the length is unknown + * @flags: PCRE2 compile flags + * @error: (allow-none): return location for a #GError, or %NULL + * + * Compiles @pattern into a regex. @flags must include %PCRE2_UTF. + * + * Returns: (transfer full): a newly created #VteRegex, or %NULL with @error filled in + */ +VteRegex * +vte_regex_new(const char *pattern, + gssize pattern_length, + guint32 flags, + GError **error) +{ +#ifdef WITH_PCRE2 + pcre2_code_8 *code; + int r, errcode; + guint32 v; + PCRE2_SIZE erroffset; + + g_return_val_if_fail(pattern != NULL, NULL); + g_return_val_if_fail(pattern_length >= -1, NULL); + g_return_val_if_fail(error == NULL || *error == NULL, NULL); + g_return_val_if_fail(flags & PCRE2_UTF, NULL); + + /* Check library compatibility */ + r = pcre2_config_8(PCRE2_CONFIG_UNICODE, &v); + if (r != 0 || v != 1) { + g_set_error(error, VTE_REGEX_ERROR, VTE_REGEX_ERROR_INCOMPATIBLE, + "PCRE2 library was built without unicode support"); + return NULL; + } + + code = pcre2_compile_8((PCRE2_SPTR8)pattern, + pattern_length >= 0 ? pattern_length : PCRE2_ZERO_TERMINATED, + (uint32_t)flags | PCRE2_NO_UTF_CHECK, + &errcode, &erroffset, + NULL); + + if (code == 0) { + set_gerror_from_pcre_error(errcode, error); + g_prefix_error(error, "Failed to compile pattern to regex at %" G_GSIZE_FORMAT ":", + erroffset); + return NULL; + } + + return regex_new(code); +#else + return set_unsupported_error(error); +#endif /* WITH_PCRE2 */ +} + +/** + * vte_regex_new_pcre: + * @code: a #pcre2_code_8 + * + * Creates a new #VteRegex for @code. @code must have been compiled with + * %PCRE2_UTF. + * + * Returns: (transfer full): a newly created #VteRegex, or %NULL if VTE + * was not compiled with PCRE2 support. + */ +VteRegex * +vte_regex_new_pcre(pcre2_code_8 *code, + GError **error) +{ +#ifdef WITH_PCRE2 + guint32 flags; + + g_return_val_if_fail(code != NULL, NULL); + g_return_val_if_fail(error == NULL || *error == NULL, NULL); + + pcre2_pattern_info_8(code, PCRE2_INFO_ALLOPTIONS, &flags); + g_return_val_if_fail(flags & PCRE2_UTF, NULL); + + return regex_new(code); +#else + return set_unsupported_error(error); +#endif +} + +/** + * vte_regex_get_pcre: + * @regex: a #VteRegex + * + * + * Returns: the #pcre2_code_8 from @regex + */ +const pcre2_code_8 * +vte_regex_get_pcre(VteRegex *regex) +{ +#ifdef WITH_PCRE2 + g_return_val_if_fail(regex != NULL, NULL); + + return regex->code; +#else + return NULL; +#endif +} + +/** + * vte_regex_jit: + * @regex: a #VteRegex + * + * If the platform supports JITing, JIT compiles @regex. + * + * Returns: %TRUE if JITing succeeded, or %FALSE with @error filled in + */ +gboolean +vte_regex_jit(VteRegex *regex, + guint flags, + GError **error) +{ +#ifdef WITH_PCRE2 + int r; + + g_return_val_if_fail(regex != NULL, FALSE); + + r = pcre2_jit_compile_8(regex->code, flags); + + return set_gerror_from_pcre_error(r, error); +#else + return set_unsupported_error(error); +#endif /* WITH_PCRE2 */ +} + +/* + * _vte_regex_get_jited: + * + * Note: We can't tell if the regex has been JITed for a particular mode, + * just if it has been JITed at all. + * + * Returns: %TRUE iff the regex has been JITed + */ +gboolean +_vte_regex_get_jited(VteRegex *regex) +{ + PCRE2_SIZE s; + int r; + + g_return_val_if_fail(regex != NULL, FALSE); + + r = pcre2_pattern_info_8(regex->code, PCRE2_INFO_JITSIZE, &s); + + return r == 0 && s != 0; +} diff --git a/src/vteregexinternal.hh b/src/vteregexinternal.hh new file mode 100644 index 00000000..cd5a653d --- /dev/null +++ b/src/vteregexinternal.hh @@ -0,0 +1,20 @@ +/* + * Copyright © 2015 Christian Persch + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +gboolean _vte_regex_get_jited(VteRegex *regex); |