summaryrefslogtreecommitdiff
path: root/src/vteregex.cc
blob: 5df61523811b952e308cbcde3648930544d3597b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
/*
 * Copyright © 2015 Christian Persch
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * SECTION: vte-regex
 * @short_description: Regex for matching and searching. Uses PCRE2 internally.
 *
 * Since: 0.44
 */

#include "config.h"

#include "vtemacros.h"
#include "vteenums.h"
#include "vteregex.h"
#include "vteregexinternal.hh"

#ifdef WITH_PCRE2
#include "vtepcre2.h"
#endif /* WITH_PCRE2 */

struct _VteRegex {
        volatile int ref_count;
#ifdef WITH_PCRE2
        pcre2_code_8 *code;
#endif /* WITH_PCRE2 */
};

#ifdef WITH_PCRE2
#define DEFAULT_COMPILE_OPTIONS (PCRE2_UTF)
#define JIT_OPTIONS (PCRE2_JIT_COMPLETE)
#define DEFAULT_MATCH_OPTIONS (0)
#else
#define DEFAULT_COMPILE_OPTIONS (0
#define JIT_OPTIONS (0)
#define DEFAULT_MATCH_OPTIONS (0)
#endif /* WITH_PCRE2 */

#ifdef WITH_PCRE2

static VteRegex *
regex_new(pcre2_code_8 *code)
{
        VteRegex *regex;

        regex = g_slice_new(VteRegex);
        regex->ref_count = 1;
        regex->code = code;

        return regex;
}

static void
regex_free(VteRegex *regex)
{
        pcre2_code_free_8(regex->code);
        g_slice_free(VteRegex, regex);
}

static gboolean
set_gerror_from_pcre_error(int errcode,
                           GError **error)
{
        if (errcode < 0) {
                PCRE2_UCHAR8 buf[128];
                int n;

                n = pcre2_get_error_message_8(errcode, buf, sizeof (buf));
                g_assert(n >= 0);
                g_set_error_literal(error, VTE_REGEX_ERROR, errcode, (const char*)buf);
                return FALSE;
        }

        return TRUE;
}

#else

static gboolean
set_unsupported_error(GError **error)
{
        g_set_error_literal(error, VTE_REGEX_ERROR, VTE_REGEX_ERROR_NOT_SUPPORTED,
                            "PCRE2 not supported");
        return FALSE;
}

#endif /* WITH_PCRE2 */

G_DEFINE_BOXED_TYPE(VteRegex, vte_regex,
                    vte_regex_ref, (GBoxedFreeFunc)vte_regex_unref)

G_DEFINE_QUARK(vte-regex-error, vte_regex_error)

/**
 * vte_regex_ref:
 * @regex: (transfer none): a #VteRegex
 *
 * Increases the reference count of @regex by one.
 *
 * Returns: @regex
 */
VteRegex *
vte_regex_ref(VteRegex *regex)
{
        g_return_val_if_fail (regex, NULL);

#ifdef WITH_PCRE2
        g_atomic_int_inc (&regex->ref_count);
#endif
        return regex;
}

/**
 * vte_regex_ref:
 * @regex: (transfer full): a #VteRegex
 *
 * Decreases the reference count of @regex by one, and frees @regex
 * if the refcount reaches zero.
 *
 * Returns: %NULL
 */
VteRegex *
vte_regex_unref(VteRegex *regex)
{
        g_return_val_if_fail (regex, NULL);

#ifdef WITH_PCRE2
        if (g_atomic_int_dec_and_test (&regex->ref_count))
                regex_free (regex);
#endif
        return NULL;
}

/**
 * vte_regex_new:
 * @pattern: a regex pattern string
 * @pattern_length: the length of @pattern in bytes, or -1 if the
 *  string is NUL-terminated and the length is unknown
 * @flags: PCRE2 compile flags
 * @error: (allow-none): return location for a #GError, or %NULL
 *
 * Compiles @pattern into a regex. @flags must include %PCRE2_UTF.
 *
 * Returns: (transfer full): a newly created #VteRegex, or %NULL with @error filled in
 */
VteRegex *
vte_regex_new(const char *pattern,
              gssize      pattern_length,
              guint32     flags,
              GError    **error)
{
#ifdef WITH_PCRE2
        pcre2_code_8 *code;
        int r, errcode;
        guint32 v;
        PCRE2_SIZE erroffset;

        g_return_val_if_fail(pattern != NULL, NULL);
        g_return_val_if_fail(pattern_length >= -1, NULL);
        g_return_val_if_fail(error == NULL || *error == NULL, NULL);
        g_return_val_if_fail(flags & PCRE2_UTF, NULL);

        /* Check library compatibility */
        r = pcre2_config_8(PCRE2_CONFIG_UNICODE, &v);
        if (r != 0 || v != 1) {
                g_set_error(error, VTE_REGEX_ERROR, VTE_REGEX_ERROR_INCOMPATIBLE,
                            "PCRE2 library was built without unicode support");
                return NULL;
        }

        code = pcre2_compile_8((PCRE2_SPTR8)pattern,
                               pattern_length >= 0 ? pattern_length : PCRE2_ZERO_TERMINATED,
                               (uint32_t)flags | PCRE2_NO_UTF_CHECK,
                               &errcode, &erroffset,
                               NULL);

        if (code == 0) {
                set_gerror_from_pcre_error(errcode, error);
                g_prefix_error(error, "Failed to compile pattern to regex at %" G_GSIZE_FORMAT ":",
                               erroffset);
                return NULL;
        }

        return regex_new(code);
#else
        set_unsupported_error(error);
        return NULL;
#endif /* WITH_PCRE2 */
}

/**
 * vte_regex_new_pcre:
 * @code: a #pcre2_code_8
 *
 * Creates a new #VteRegex for @code. @code must have been compiled with
 * %PCRE2_UTF.
 *
 * Returns: (transfer full): a newly created #VteRegex, or %NULL if VTE
 *   was not compiled with PCRE2 support.
 */
VteRegex *
vte_regex_new_pcre(pcre2_code_8 *code,
                   GError      **error)
{
#ifdef WITH_PCRE2
        guint32 flags;

        g_return_val_if_fail(code != NULL, NULL);
        g_return_val_if_fail(error == NULL || *error == NULL, NULL);

        pcre2_pattern_info_8(code, PCRE2_INFO_ALLOPTIONS, &flags);
        g_return_val_if_fail(flags & PCRE2_UTF, NULL);

        return regex_new(code);
#else
        set_unsupported_error(error);
        return NULL;
#endif
}

/**
 * vte_regex_get_pcre:
 * @regex: a #VteRegex
 *
 *
 * Returns: the #pcre2_code_8 from @regex
 */
const pcre2_code_8 *
vte_regex_get_pcre(VteRegex *regex)
{
#ifdef WITH_PCRE2
        g_return_val_if_fail(regex != NULL, NULL);

        return regex->code;
#else
        return NULL;
#endif
}

/**
 * vte_regex_jit:
 * @regex: a #VteRegex
 *
 * If the platform supports JITing, JIT compiles @regex.
 *
 * Returns: %TRUE if JITing succeeded, or %FALSE with @error filled in
 */
gboolean
vte_regex_jit(VteRegex *regex,
              guint     flags,
              GError  **error)
{
#ifdef WITH_PCRE2
        int r;

        g_return_val_if_fail(regex != NULL, FALSE);

        r = pcre2_jit_compile_8(regex->code, flags);

        return set_gerror_from_pcre_error(r, error);
#else
        return set_unsupported_error(error);
#endif /* WITH_PCRE2 */
}

/*
 * _vte_regex_get_jited:
 *
 * Note: We can't tell if the regex has been JITed for a particular mode,
 * just if it has been JITed at all.
 *
 * Returns: %TRUE iff the regex has been JITed
 */
gboolean
_vte_regex_get_jited(VteRegex *regex)
{
#ifdef WITH_PCRE2
        PCRE2_SIZE s;
        int r;

        g_return_val_if_fail(regex != NULL, FALSE);

        r = pcre2_pattern_info_8(regex->code, PCRE2_INFO_JITSIZE, &s);

        return r == 0 && s != 0;
#else
        return FALSE;
#endif
}