summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2019-03-19 13:06:21 +0100
committerNikita Popov <nikita.ppv@gmail.com>2019-03-19 13:06:21 +0100
commit4fe3d108af2b0a1e8810959fe1ea7a0b52291ad7 (patch)
treec6325dd574d98e6fa37987af558e08f5b932fe4b
parent38b16274d16f3a90dd42e340b8429041c6aa7365 (diff)
downloadphp-git-4fe3d108af2b0a1e8810959fe1ea7a0b52291ad7.tar.gz
Don't create a new array for empty/null match every time
If PREG_OFFSET_CAPTURE is used, unmatched subpatterns will be either [null, -1] or ['', -1] depending on PREG_UNMATCHED_AS_NULL mode. Instead of creating a new array like this every time, cache it inside a global (per-request -- could make it immutable though). Additionally check whether the subpattern is an empty string or single character string and use an existing interned string in that case. Empty / single-char subpatterns are common, so let's avoid allocating strings for them.
-rw-r--r--ext/pcre/php_pcre.c78
-rw-r--r--ext/pcre/php_pcre.h3
2 files changed, 62 insertions, 19 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index 106936e936..04e5d858c3 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -259,6 +259,8 @@ static PHP_GINIT_FUNCTION(pcre) /* {{{ */
pcre_globals->backtrack_limit = 0;
pcre_globals->recursion_limit = 0;
pcre_globals->error_code = PHP_PCRE_NO_ERROR;
+ ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
+ ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
#ifdef HAVE_PCRE_JIT_SUPPORT
pcre_globals->jit = 1;
#endif
@@ -460,6 +462,15 @@ static PHP_RINIT_FUNCTION(pcre)
/* }}} */
#endif
+static PHP_RSHUTDOWN_FUNCTION(pcre)
+{
+ zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
+ zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
+ ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
+ ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
+ return SUCCESS;
+}
+
/* {{{ static pcre_clean_cache */
static int pcre_clean_cache(zval *data, void *arg)
{
@@ -937,26 +948,57 @@ PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
}
}/*}}}*/
+static void init_unmatched_null_pair() {
+ zval tmp;
+ zval *pair = &PCRE_G(unmatched_null_pair);
+ array_init_size(pair, 2);
+ ZVAL_NULL(&tmp);
+ zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp);
+ ZVAL_LONG(&tmp, -1);
+ zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp);
+}
+
+static void init_unmatched_empty_pair() {
+ zval tmp;
+ zval *pair = &PCRE_G(unmatched_empty_pair);
+ array_init_size(pair, 2);
+ ZVAL_EMPTY_STRING(&tmp);
+ zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp);
+ ZVAL_LONG(&tmp, -1);
+ zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp);
+}
+
/* {{{ add_offset_pair */
static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SIZE offset, zend_string *name, uint32_t unmatched_as_null)
{
zval match_pair, tmp;
- array_init_size(&match_pair, 2);
-
/* Add (match, offset) to the return value */
if (PCRE2_UNSET == offset) {
if (unmatched_as_null) {
- ZVAL_NULL(&tmp);
+ if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
+ init_unmatched_null_pair();
+ }
+ ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
} else {
- ZVAL_EMPTY_STRING(&tmp);
+ if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
+ init_unmatched_empty_pair();
+ }
+ ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
}
} else {
- ZVAL_STRINGL(&tmp, str, len);
+ array_init_size(&match_pair, 2);
+ if (len == 0) {
+ ZVAL_EMPTY_STRING(&tmp);
+ } else if (len == 1) {
+ ZVAL_INTERNED_STR(&tmp, ZSTR_CHAR((unsigned char) *str));
+ } else {
+ ZVAL_STRINGL(&tmp, str, len);
+ }
+ zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
+ ZVAL_LONG(&tmp, offset);
+ zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
}
- zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
- ZVAL_LONG(&tmp, offset);
- zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
if (name) {
Z_ADDREF(match_pair);
@@ -975,6 +1017,10 @@ static inline void populate_match_value(
} else {
ZVAL_EMPTY_STRING(val);
}
+ } else if (start_offset == end_offset) {
+ ZVAL_EMPTY_STRING(val);
+ } else if (start_offset + 1 == end_offset) {
+ ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset]));
} else {
ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset);
}
@@ -1223,16 +1269,10 @@ matched:
}
} else {
for (i = 0; i < count; i++) {
- if (PCRE2_UNSET == offsets[i<<1]) {
- if (unmatched_as_null) {
- add_next_index_null(&match_sets[i]);
- } else {
- add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
- }
- } else {
- add_next_index_stringl(&match_sets[i], subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1]);
- }
+ zval val;
+ populate_match_value(
+ &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
+ zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val);
}
}
mark = pcre2_get_mark(match_data);
@@ -2955,7 +2995,7 @@ zend_module_entry pcre_module_entry = {
#else
NULL,
#endif
- NULL,
+ PHP_RSHUTDOWN(pcre),
PHP_MINFO(pcre),
PHP_PCRE_VERSION,
PHP_MODULE_GLOBALS(pcre),
diff --git a/ext/pcre/php_pcre.h b/ext/pcre/php_pcre.h
index 0377ce77a8..eca3ca5fdf 100644
--- a/ext/pcre/php_pcre.h
+++ b/ext/pcre/php_pcre.h
@@ -75,6 +75,9 @@ ZEND_BEGIN_MODULE_GLOBALS(pcre)
zend_bool jit;
#endif
int error_code;
+ /* Used for unmatched subpatterns in OFFSET_CAPTURE mode */
+ zval unmatched_null_pair;
+ zval unmatched_empty_pair;
ZEND_END_MODULE_GLOBALS(pcre)
PHPAPI ZEND_EXTERN_MODULE_GLOBALS(pcre)