summaryrefslogtreecommitdiff
path: root/src/core/ngx_regex.c
diff options
context:
space:
mode:
authorMaxim Dounin <mdounin@mdounin.ru>2021-12-25 01:07:15 +0300
committerMaxim Dounin <mdounin@mdounin.ru>2021-12-25 01:07:15 +0300
commitc6fec0b027569a4e0b1d8aaee7dea0f2e4d6052b (patch)
tree2ac67a198959d1bf38efc6179bebee93f8b627e6 /src/core/ngx_regex.c
parentcddb22cefe8f1c132e00553cb012ca9ab3264c18 (diff)
downloadnginx-c6fec0b027569a4e0b1d8aaee7dea0f2e4d6052b.tar.gz
PCRE2 library support.
The PCRE2 library is now used by default if found, instead of the original PCRE library. If needed for some reason, this can be disabled with the --without-pcre2 configure option. To make it possible to specify paths to the library and include files via --with-cc-opt / --with-ld-opt, the library is first tested without any additional paths and options. If this fails, the pcre2-config script is used. Similarly to the original PCRE library, it is now possible to build PCRE2 from sources with nginx configure, by using the --with-pcre= option. It automatically detects if PCRE or PCRE2 sources are provided. Note that compiling PCRE2 10.33 and later requires inttypes.h. When compiling on Windows with MSVC, inttypes.h is only available starting with MSVC 2013. In older versions some replacement needs to be provided ("echo '#include <stdint.h>' > pcre2-10.xx/src/inttypes.h" is good enough for MSVC 2010). The interface on nginx side remains unchanged.
Diffstat (limited to 'src/core/ngx_regex.c')
-rw-r--r--src/core/ngx_regex.c331
1 files changed, 325 insertions, 6 deletions
diff --git a/src/core/ngx_regex.c b/src/core/ngx_regex.c
index 461b4be82..5ae4f780b 100644
--- a/src/core/ngx_regex.c
+++ b/src/core/ngx_regex.c
@@ -18,8 +18,13 @@ typedef struct {
static ngx_inline void ngx_regex_malloc_init(ngx_pool_t *pool);
static ngx_inline void ngx_regex_malloc_done(void);
+#if (NGX_PCRE2)
+static void * ngx_libc_cdecl ngx_regex_malloc(size_t size, void *data);
+static void ngx_libc_cdecl ngx_regex_free(void *p, void *data);
+#else
static void * ngx_libc_cdecl ngx_regex_malloc(size_t size);
static void ngx_libc_cdecl ngx_regex_free(void *p);
+#endif
static void ngx_regex_cleanup(void *data);
static ngx_int_t ngx_regex_module_init(ngx_cycle_t *cycle);
@@ -67,15 +72,24 @@ ngx_module_t ngx_regex_module = {
};
-static ngx_pool_t *ngx_regex_pool;
-static ngx_list_t *ngx_regex_studies;
+static ngx_pool_t *ngx_regex_pool;
+static ngx_list_t *ngx_regex_studies;
+static ngx_uint_t ngx_regex_direct_alloc;
+
+#if (NGX_PCRE2)
+static pcre2_compile_context *ngx_regex_compile_context;
+static pcre2_match_data *ngx_regex_match_data;
+static ngx_uint_t ngx_regex_match_data_size;
+#endif
void
ngx_regex_init(void)
{
+#if !(NGX_PCRE2)
pcre_malloc = ngx_regex_malloc;
pcre_free = ngx_regex_free;
+#endif
}
@@ -83,6 +97,7 @@ static ngx_inline void
ngx_regex_malloc_init(ngx_pool_t *pool)
{
ngx_regex_pool = pool;
+ ngx_regex_direct_alloc = (pool == NULL) ? 1 : 0;
}
@@ -90,9 +105,146 @@ static ngx_inline void
ngx_regex_malloc_done(void)
{
ngx_regex_pool = NULL;
+ ngx_regex_direct_alloc = 0;
}
+#if (NGX_PCRE2)
+
+ngx_int_t
+ngx_regex_compile(ngx_regex_compile_t *rc)
+{
+ int n, errcode;
+ char *p;
+ u_char errstr[128];
+ size_t erroff;
+ pcre2_code *re;
+ ngx_regex_elt_t *elt;
+ pcre2_general_context *gctx;
+ pcre2_compile_context *cctx;
+
+ if (ngx_regex_compile_context == NULL) {
+ /*
+ * Allocate a compile context if not yet allocated. This uses
+ * direct allocations from heap, so the result can be cached
+ * even at runtime.
+ */
+
+ ngx_regex_malloc_init(NULL);
+
+ gctx = pcre2_general_context_create(ngx_regex_malloc, ngx_regex_free,
+ NULL);
+ if (gctx == NULL) {
+ ngx_regex_malloc_done();
+ goto nomem;
+ }
+
+ cctx = pcre2_compile_context_create(gctx);
+ if (cctx == NULL) {
+ pcre2_general_context_free(gctx);
+ ngx_regex_malloc_done();
+ goto nomem;
+ }
+
+ ngx_regex_compile_context = cctx;
+
+ pcre2_general_context_free(gctx);
+ ngx_regex_malloc_done();
+ }
+
+ ngx_regex_malloc_init(rc->pool);
+
+ re = pcre2_compile(rc->pattern.data, rc->pattern.len,
+ (uint32_t) rc->options, &errcode, &erroff,
+ ngx_regex_compile_context);
+
+ /* ensure that there is no current pool */
+ ngx_regex_malloc_done();
+
+ if (re == NULL) {
+ pcre2_get_error_message(errcode, errstr, 128);
+
+ if ((size_t) erroff == rc->pattern.len) {
+ rc->err.len = ngx_snprintf(rc->err.data, rc->err.len,
+ "pcre2_compile() failed: %s in \"%V\"",
+ errstr, &rc->pattern)
+ - rc->err.data;
+
+ } else {
+ rc->err.len = ngx_snprintf(rc->err.data, rc->err.len,
+ "pcre2_compile() failed: %s in \"%V\" at \"%s\"",
+ errstr, &rc->pattern, rc->pattern.data + erroff)
+ - rc->err.data;
+ }
+
+ return NGX_ERROR;
+ }
+
+ rc->regex = re;
+
+ /* do not study at runtime */
+
+ if (ngx_regex_studies != NULL) {
+ elt = ngx_list_push(ngx_regex_studies);
+ if (elt == NULL) {
+ goto nomem;
+ }
+
+ elt->regex = rc->regex;
+ elt->name = rc->pattern.data;
+ }
+
+ n = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &rc->captures);
+ if (n < 0) {
+ p = "pcre2_pattern_info(\"%V\", PCRE2_INFO_CAPTURECOUNT) failed: %d";
+ goto failed;
+ }
+
+ if (rc->captures == 0) {
+ return NGX_OK;
+ }
+
+ n = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &rc->named_captures);
+ if (n < 0) {
+ p = "pcre2_pattern_info(\"%V\", PCRE2_INFO_NAMECOUNT) failed: %d";
+ goto failed;
+ }
+
+ if (rc->named_captures == 0) {
+ return NGX_OK;
+ }
+
+ n = pcre2_pattern_info(re, PCRE2_INFO_NAMEENTRYSIZE, &rc->name_size);
+ if (n < 0) {
+ p = "pcre2_pattern_info(\"%V\", PCRE2_INFO_NAMEENTRYSIZE) failed: %d";
+ goto failed;
+ }
+
+ n = pcre2_pattern_info(re, PCRE2_INFO_NAMETABLE, &rc->names);
+ if (n < 0) {
+ p = "pcre2_pattern_info(\"%V\", PCRE2_INFO_NAMETABLE) failed: %d";
+ goto failed;
+ }
+
+ return NGX_OK;
+
+failed:
+
+ rc->err.len = ngx_snprintf(rc->err.data, rc->err.len, p, &rc->pattern, n)
+ - rc->err.data;
+ return NGX_ERROR;
+
+nomem:
+
+ rc->err.len = ngx_snprintf(rc->err.data, rc->err.len,
+ "regex \"%V\" compilation failed: no memory",
+ &rc->pattern)
+ - rc->err.data;
+ return NGX_ERROR;
+}
+
+#else
+
ngx_int_t
ngx_regex_compile(ngx_regex_compile_t *rc)
{
@@ -195,6 +347,74 @@ nomem:
return NGX_ERROR;
}
+#endif
+
+
+#if (NGX_PCRE2)
+
+ngx_int_t
+ngx_regex_exec(ngx_regex_t *re, ngx_str_t *s, int *captures, ngx_uint_t size)
+{
+ size_t *ov;
+ ngx_int_t rc;
+ ngx_uint_t n, i;
+
+ /*
+ * The pcre2_match() function might allocate memory for backtracking
+ * frames, typical allocations are from 40k and above. So the allocator
+ * is configured to do direct allocations from heap during matching.
+ */
+
+ ngx_regex_malloc_init(NULL);
+
+ if (ngx_regex_match_data == NULL
+ || size > ngx_regex_match_data_size)
+ {
+ /*
+ * Allocate a match data if not yet allocated or smaller than
+ * needed.
+ */
+
+ if (ngx_regex_match_data) {
+ pcre2_match_data_free(ngx_regex_match_data);
+ }
+
+ ngx_regex_match_data_size = size;
+ ngx_regex_match_data = pcre2_match_data_create(size / 3, NULL);
+
+ if (ngx_regex_match_data == NULL) {
+ rc = PCRE2_ERROR_NOMEMORY;
+ goto failed;
+ }
+ }
+
+ rc = pcre2_match(re, s->data, s->len, 0, 0, ngx_regex_match_data, NULL);
+
+ if (rc < 0) {
+ goto failed;
+ }
+
+ n = pcre2_get_ovector_count(ngx_regex_match_data);
+ ov = pcre2_get_ovector_pointer(ngx_regex_match_data);
+
+ if (n > size / 3) {
+ n = size / 3;
+ }
+
+ for (i = 0; i < n; i++) {
+ captures[i * 2] = ov[i * 2];
+ captures[i * 2 + 1] = ov[i * 2 + 1];
+ }
+
+failed:
+
+ ngx_regex_malloc_done();
+
+ return rc;
+}
+
+#endif
+
ngx_int_t
ngx_regex_exec_array(ngx_array_t *a, ngx_str_t *s, ngx_log_t *log)
@@ -229,6 +449,35 @@ ngx_regex_exec_array(ngx_array_t *a, ngx_str_t *s, ngx_log_t *log)
}
+#if (NGX_PCRE2)
+
+static void * ngx_libc_cdecl
+ngx_regex_malloc(size_t size, void *data)
+{
+ if (ngx_regex_pool) {
+ return ngx_palloc(ngx_regex_pool, size);
+ }
+
+ if (ngx_regex_direct_alloc) {
+ return ngx_alloc(size, ngx_cycle->log);
+ }
+
+ return NULL;
+}
+
+
+static void ngx_libc_cdecl
+ngx_regex_free(void *p, void *data)
+{
+ if (ngx_regex_direct_alloc) {
+ ngx_free(p);
+ }
+
+ return;
+}
+
+#else
+
static void * ngx_libc_cdecl
ngx_regex_malloc(size_t size)
{
@@ -246,11 +495,13 @@ ngx_regex_free(void *p)
return;
}
+#endif
+
static void
ngx_regex_cleanup(void *data)
{
-#if (NGX_HAVE_PCRE_JIT)
+#if (NGX_PCRE2 || NGX_HAVE_PCRE_JIT)
ngx_regex_conf_t *rcf = data;
ngx_uint_t i;
@@ -275,12 +526,17 @@ ngx_regex_cleanup(void *data)
/*
* The PCRE JIT compiler uses mmap for its executable codes, so we
* have to explicitly call the pcre_free_study() function to free
- * this memory.
+ * this memory. In PCRE2, we call the pcre2_code_free() function
+ * for the same reason.
*/
+#if (NGX_PCRE2)
+ pcre2_code_free(elts[i].regex);
+#else
if (elts[i].regex->extra != NULL) {
pcre_free_study(elts[i].regex->extra);
}
+#endif
}
#endif
@@ -290,6 +546,26 @@ ngx_regex_cleanup(void *data)
*/
ngx_regex_studies = NULL;
+
+#if (NGX_PCRE2)
+
+ /*
+ * Free compile context and match data. If needed at runtime by
+ * the new cycle, these will be re-allocated.
+ */
+
+ if (ngx_regex_compile_context) {
+ pcre2_compile_context_free(ngx_regex_compile_context);
+ ngx_regex_compile_context = NULL;
+ }
+
+ if (ngx_regex_match_data) {
+ pcre2_match_data_free(ngx_regex_match_data);
+ ngx_regex_match_data = NULL;
+ ngx_regex_match_data_size = 0;
+ }
+
+#endif
}
@@ -297,7 +573,9 @@ static ngx_int_t
ngx_regex_module_init(ngx_cycle_t *cycle)
{
int opt;
+#if !(NGX_PCRE2)
const char *errstr;
+#endif
ngx_uint_t i;
ngx_list_part_t *part;
ngx_regex_elt_t *elts;
@@ -307,10 +585,16 @@ ngx_regex_module_init(ngx_cycle_t *cycle)
rcf = (ngx_regex_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_regex_module);
-#if (NGX_HAVE_PCRE_JIT)
+#if (NGX_PCRE2 || NGX_HAVE_PCRE_JIT)
+
if (rcf->pcre_jit) {
+#if (NGX_PCRE2)
+ opt = 1;
+#else
opt = PCRE_STUDY_JIT_COMPILE;
+#endif
}
+
#endif
ngx_regex_malloc_init(cycle->pool);
@@ -330,6 +614,23 @@ ngx_regex_module_init(ngx_cycle_t *cycle)
i = 0;
}
+#if (NGX_PCRE2)
+
+ if (opt) {
+ int n;
+
+ n = pcre2_jit_compile(elts[i].regex, PCRE2_JIT_COMPLETE);
+
+ if (n != 0) {
+ ngx_log_error(NGX_LOG_INFO, cycle->log, 0,
+ "pcre2_jit_compile() failed: %d in \"%s\", "
+ "ignored",
+ n, elts[i].name);
+ }
+ }
+
+#else
+
elts[i].regex->extra = pcre_study(elts[i].regex->code, opt, &errstr);
if (errstr != NULL) {
@@ -353,11 +654,15 @@ ngx_regex_module_init(ngx_cycle_t *cycle)
}
}
#endif
+#endif
}
ngx_regex_malloc_done();
ngx_regex_studies = NULL;
+#if (NGX_PCRE2)
+ ngx_regex_compile_context = NULL;
+#endif
return NGX_OK;
}
@@ -415,7 +720,21 @@ ngx_regex_pcre_jit(ngx_conf_t *cf, void *post, void *data)
return NGX_CONF_OK;
}
-#if (NGX_HAVE_PCRE_JIT)
+#if (NGX_PCRE2)
+ {
+ int r;
+ uint32_t jit;
+
+ jit = 0;
+ r = pcre2_config(PCRE2_CONFIG_JIT, &jit);
+
+ if (r != 0 || jit != 1) {
+ ngx_conf_log_error(NGX_LOG_WARN, cf, 0,
+ "PCRE2 library does not support JIT");
+ *fp = 0;
+ }
+ }
+#elif (NGX_HAVE_PCRE_JIT)
{
int jit, r;