summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikita Popov <nikic@php.net>2016-12-15 17:22:08 +0100
committerNikita Popov <nikic@php.net>2017-01-02 23:31:27 +0100
commit593b6cd0a87401d72aedfcbad8bf9b1b149d94f4 (patch)
tree7da3fafbf2c1bd4e9435b02505d6f15e0ffc3bf4
parentb06fb88cf9bcaea72dd87dd4273228243c5a5b0b (diff)
downloadphp-git-593b6cd0a87401d72aedfcbad8bf9b1b149d94f4.tar.gz
Optimize get_browser() using prefix and contains checks
Avoid expensive regex compilations by checking for prefix and contained segments beforehand.
-rw-r--r--ext/standard/browscap.c248
1 files changed, 185 insertions, 63 deletions
diff --git a/ext/standard/browscap.c b/ext/standard/browscap.c
index 14ca27a95e..71491bcb45 100644
--- a/ext/standard/browscap.c
+++ b/ext/standard/browscap.c
@@ -27,6 +27,8 @@
#include "zend_ini_scanner.h"
#include "zend_globals.h"
+#define BROWSCAP_NUM_CONTAINS 5
+
typedef struct {
zend_string *key;
zend_string *value;
@@ -34,9 +36,13 @@ typedef struct {
typedef struct {
zend_string *pattern;
- zend_string *regex;
+ zend_string *parent;
uint32_t kv_start;
uint32_t kv_end;
+ /* We ensure that the length fits in 16 bits, so this is fine */
+ uint16_t contains_start[BROWSCAP_NUM_CONTAINS];
+ uint8_t contains_len[BROWSCAP_NUM_CONTAINS];
+ uint8_t prefix_len;
} browscap_entry;
typedef struct {
@@ -67,7 +73,9 @@ static void browscap_entry_dtor(zval *zvalue)
{
browscap_entry *entry = Z_PTR_P(zvalue);
zend_string_release(entry->pattern);
- zend_string_release(entry->regex);
+ if (entry->parent) {
+ zend_string_release(entry->parent);
+ }
efree(entry);
}
@@ -75,10 +83,55 @@ static void browscap_entry_dtor_persistent(zval *zvalue)
{
browscap_entry *entry = Z_PTR_P(zvalue);
zend_string_release(entry->pattern);
- zend_string_release(entry->regex);
+ if (entry->parent) {
+ zend_string_release(entry->parent);
+ }
pefree(entry, 1);
}
+static inline zend_bool is_placeholder(char c) {
+ return c == '?' || c == '*';
+}
+
+/* Length of prefix not containing any wildcards */
+static uint8_t browscap_compute_prefix_len(zend_string *pattern) {
+ size_t i;
+ for (i = 0; i < ZSTR_LEN(pattern); i++) {
+ if (is_placeholder(ZSTR_VAL(pattern)[i])) {
+ break;
+ }
+ }
+ return MIN(i, UINT8_MAX);
+}
+
+static size_t browscap_compute_contains(
+ zend_string *pattern, size_t start_pos,
+ uint16_t *contains_start, uint8_t *contains_len) {
+ size_t i = start_pos;
+ /* Find first non-placeholder character after prefix */
+ for (; i < ZSTR_LEN(pattern); i++) {
+ if (!is_placeholder(ZSTR_VAL(pattern)[i])) {
+ /* Skip the case of a single non-placeholder character.
+ * Let's try to find something longer instead. */
+ if (i + 1 < ZSTR_LEN(pattern) &&
+ !is_placeholder(ZSTR_VAL(pattern)[i + 1])) {
+ break;
+ }
+ }
+ }
+ *contains_start = i;
+
+ /* Find first placeholder character after that */
+ for (; i < ZSTR_LEN(pattern); i++) {
+ if (is_placeholder(ZSTR_VAL(pattern)[i])) {
+ break;
+ }
+ }
+ *contains_len = MIN(i - *contains_start, UINT8_MAX);
+ return i;
+}
+
+/* Length of regex, including escapes, anchors, etc. */
static size_t browscap_compute_regex_len(zend_string *pattern) {
size_t i, len = ZSTR_LEN(pattern);
for (i = 0; i < ZSTR_LEN(pattern); i++) {
@@ -227,12 +280,17 @@ static HashTable *browscap_entry_to_array(browser_data *bdata, browscap_entry *e
ALLOC_HASHTABLE(ht);
zend_hash_init(ht, 8, NULL, ZVAL_PTR_DTOR, 0);
- ZVAL_STR_COPY(&tmp, entry->regex);
+ ZVAL_STR(&tmp, browscap_convert_pattern(entry->pattern, 0));
zend_hash_str_add(ht, "browser_name_regex", sizeof("browser_name_regex")-1, &tmp);
ZVAL_STR_COPY(&tmp, entry->pattern);
zend_hash_str_add(ht, "browser_name_pattern", sizeof("browser_name_pattern")-1, &tmp);
+ if (entry->parent) {
+ ZVAL_STR_COPY(&tmp, entry->parent);
+ zend_hash_str_add(ht, "parent", sizeof("parent")-1, &tmp);
+ }
+
for (i = entry->kv_start; i < entry->kv_end; i++) {
ZVAL_STR_COPY(&tmp, bdata->kv[i].value);
zend_hash_add(ht, bdata->kv[i].key, &tmp);
@@ -256,17 +314,6 @@ static void php_browscap_parser_cb(zval *arg1, zval *arg2, zval *arg3, int callb
if (ctx->current_entry != NULL && arg2) {
zend_string *new_key, *new_value;
- /* parent entry can not be same as current section -> causes infinite loop! */
- if (!strcasecmp(Z_STRVAL_P(arg1), "parent") &&
- ctx->current_section_name != NULL &&
- !strcasecmp(ZSTR_VAL(ctx->current_section_name), Z_STRVAL_P(arg2))
- ) {
- zend_error(E_CORE_ERROR, "Invalid browscap ini file: "
- "'Parent' value cannot be same as the section name: %s "
- "(in file %s)", ZSTR_VAL(ctx->current_section_name), INI_STR("browscap"));
- return;
- }
-
/* Set proper value for true/false settings */
if ((Z_STRLEN_P(arg2) == 2 && !strncasecmp(Z_STRVAL_P(arg2), "on", sizeof("on") - 1)) ||
(Z_STRLEN_P(arg2) == 3 && !strncasecmp(Z_STRVAL_P(arg2), "yes", sizeof("yes") - 1)) ||
@@ -284,28 +331,61 @@ static void php_browscap_parser_cb(zval *arg1, zval *arg2, zval *arg3, int callb
new_value = browscap_intern_str(ctx, Z_STR_P(arg2));
}
- new_key = browscap_intern_str_ci(ctx, Z_STR_P(arg1), persistent);
- browscap_add_kv(bdata, new_key, new_value, persistent);
- ctx->current_entry->kv_end = bdata->kv_used;
+ if (!strcasecmp(Z_STRVAL_P(arg1), "parent")) {
+ /* parent entry can not be same as current section -> causes infinite loop! */
+ if (ctx->current_section_name != NULL &&
+ !strcasecmp(ZSTR_VAL(ctx->current_section_name), Z_STRVAL_P(arg2))
+ ) {
+ zend_error(E_CORE_ERROR, "Invalid browscap ini file: "
+ "'Parent' value cannot be same as the section name: %s "
+ "(in file %s)", ZSTR_VAL(ctx->current_section_name), INI_STR("browscap"));
+ return;
+ }
+
+ if (ctx->current_entry->parent) {
+ zend_string_release(ctx->current_entry->parent);
+ }
+ ctx->current_entry->parent = new_value;
+ } else {
+ new_key = browscap_intern_str_ci(ctx, Z_STR_P(arg1), persistent);
+ browscap_add_kv(bdata, new_key, new_value, persistent);
+ ctx->current_entry->kv_end = bdata->kv_used;
+ }
}
break;
- case ZEND_INI_PARSER_SECTION: {
- zval processed;
- zval unprocessed;
+ case ZEND_INI_PARSER_SECTION:
+ {
+ browscap_entry *entry;
+ zend_string *pattern = Z_STR_P(arg1);
+ size_t pos;
+ int i;
+
+ if (ZSTR_LEN(pattern) > UINT16_MAX) {
+ php_error_docref(NULL, E_WARNING,
+ "Skipping excessively long pattern of length %zd", ZSTR_LEN(pattern));
+ break;
+ }
+
+ entry = ctx->current_entry
+ = pemalloc(sizeof(browscap_entry), persistent);
+ zend_hash_update_ptr(bdata->htab, pattern, entry);
- ctx->current_entry = pemalloc(sizeof(browscap_entry), persistent);
- zend_hash_update_ptr(bdata->htab, Z_STR_P(arg1), ctx->current_entry);
+ if (ctx->current_section_name) {
+ zend_string_release(ctx->current_section_name);
+ }
+ ctx->current_section_name = zend_string_copy(pattern);
- if (ctx->current_section_name) {
- zend_string_release(ctx->current_section_name);
- }
- ctx->current_section_name = zend_string_copy(Z_STR_P(arg1));
+ entry->pattern = zend_string_copy(pattern);
+ entry->kv_end = entry->kv_start = bdata->kv_used;
+ entry->parent = NULL;
- ctx->current_entry->regex = browscap_convert_pattern(Z_STR_P(arg1), persistent);
- ctx->current_entry->pattern = zend_string_copy(Z_STR_P(arg1));
- ctx->current_entry->kv_end = ctx->current_entry->kv_start = bdata->kv_used;
+ pos = entry->prefix_len = browscap_compute_prefix_len(pattern);
+ for (i = 0; i < BROWSCAP_NUM_CONTAINS; i++) {
+ pos = browscap_compute_contains(pattern, pos,
+ &entry->contains_start[i], &entry->contains_len[i]);
}
break;
+ }
}
}
/* }}} */
@@ -455,32 +535,79 @@ PHP_MSHUTDOWN_FUNCTION(browscap) /* {{{ */
}
/* }}} */
+static inline size_t browscap_get_minimum_length(browscap_entry *entry) {
+ size_t len = entry->prefix_len;
+ int i;
+ for (i = 0; i < BROWSCAP_NUM_CONTAINS; i++) {
+ len += entry->contains_len[i];
+ }
+ return len;
+}
+
static int browser_reg_compare(
zval *entry_zv, int num_args, va_list args, zend_hash_key *key) /* {{{ */
{
browscap_entry *entry = Z_PTR_P(entry_zv);
- char *lookup_browser_name = va_arg(args, char *);
- int lookup_browser_length = va_arg(args, int);
+ zend_string *agent_name = va_arg(args, zend_string *);
browscap_entry **found_entry_ptr = va_arg(args, browscap_entry **);
browscap_entry *found_entry = *found_entry_ptr;
+ ALLOCA_FLAG(use_heap);
+ zend_string *pattern_lc, *regex;
+ const char *cur;
+ int i;
pcre *re;
int re_options;
pcre_extra *re_extra;
- /* See if we have an exact match, if so, we're done... */
- if (found_entry) {
- if (!strcasecmp(ZSTR_VAL(found_entry->pattern), lookup_browser_name)) {
- return 0;
+ /* Agent name too short */
+ if (ZSTR_LEN(agent_name) < browscap_get_minimum_length(entry)) {
+ return 0;
+ }
+
+ /* Quickly discard patterns where the prefix doesn't match. */
+ if (zend_binary_strcasecmp(
+ ZSTR_VAL(agent_name), entry->prefix_len,
+ ZSTR_VAL(entry->pattern), entry->prefix_len) != 0) {
+ return 0;
+ }
+
+ /* Lowercase the pattern, the agent name is already lowercase */
+ ZSTR_ALLOCA_ALLOC(pattern_lc, ZSTR_LEN(entry->pattern), use_heap);
+ zend_str_tolower_copy(ZSTR_VAL(pattern_lc), ZSTR_VAL(entry->pattern), ZSTR_LEN(entry->pattern));
+
+ /* Check if the agent contains the "contains" portions */
+ cur = ZSTR_VAL(agent_name) + entry->prefix_len;
+ for (i = 0; i < BROWSCAP_NUM_CONTAINS; i++) {
+ if (entry->contains_len[i] != 0) {
+ cur = zend_memnstr(cur,
+ ZSTR_VAL(pattern_lc) + entry->contains_start[i],
+ entry->contains_len[i],
+ ZSTR_VAL(agent_name) + ZSTR_LEN(agent_name));
+ if (!cur) {
+ ZSTR_ALLOCA_FREE(pattern_lc, use_heap);
+ return 0;
+ }
+ cur += entry->contains_len[i];
}
}
- re = pcre_get_compiled_regex(entry->regex, &re_extra, &re_options);
+ /* See if we have an exact match, if so, we're done... */
+ if (zend_string_equals(agent_name, pattern_lc)) {
+ *found_entry_ptr = entry;
+ ZSTR_ALLOCA_FREE(pattern_lc, use_heap);
+ return ZEND_HASH_APPLY_STOP;
+ }
+
+ regex = browscap_convert_pattern(entry->pattern, 0);
+ re = pcre_get_compiled_regex(regex, &re_extra, &re_options);
if (re == NULL) {
+ ZSTR_ALLOCA_FREE(pattern_lc, use_heap);
+ zend_string_release(regex);
return 0;
}
- if (pcre_exec(re, re_extra, lookup_browser_name, lookup_browser_length, 0, re_options, NULL, 0) == 0) {
+ if (pcre_exec(re, re_extra, ZSTR_VAL(agent_name), ZSTR_LEN(agent_name), 0, re_options, NULL, 0) == 0) {
/* If we've found a possible browser, we need to do a comparison of the
number of characters changed in the user agent being checked versus
the previous match found and the current match. */
@@ -523,6 +650,8 @@ static int browser_reg_compare(
}
}
+ ZSTR_ALLOCA_FREE(pattern_lc, use_heap);
+ zend_string_release(regex);
return 0;
}
/* }}} */
@@ -537,12 +666,8 @@ static void browscap_zval_copy_ctor(zval *p) /* {{{ */
Get information about the capabilities of a browser. If browser_name is omitted or null, HTTP_USER_AGENT is used. Returns an object by default; if return_array is true, returns an array. */
PHP_FUNCTION(get_browser)
{
- char *agent_name = NULL;
- size_t agent_name_len = 0;
+ zend_string *agent_name = NULL, *lookup_browser_name;
zend_bool return_array = 0;
- zval *agent, *parent_agent_name_zv, *http_user_agent;
- zend_string *parent_agent_name;
- char *lookup_browser_name;
browser_data *bdata;
browscap_entry *found_entry = NULL;
HashTable *agent_ht;
@@ -562,27 +687,29 @@ PHP_FUNCTION(get_browser)
bdata = &global_bdata;
}
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!b", &agent_name, &agent_name_len, &return_array) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S!b", &agent_name, &return_array) == FAILURE) {
return;
}
if (agent_name == NULL) {
- if ((Z_TYPE(PG(http_globals)[TRACK_VARS_SERVER]) == IS_ARRAY || zend_is_auto_global_str(ZEND_STRL("_SERVER"))) &&
- (http_user_agent = zend_hash_str_find(Z_ARRVAL_P(&PG(http_globals)[TRACK_VARS_SERVER]), "HTTP_USER_AGENT", sizeof("HTTP_USER_AGENT")-1)) == NULL
- ) {
+ zval *http_user_agent = NULL;
+ if (Z_TYPE(PG(http_globals)[TRACK_VARS_SERVER]) == IS_ARRAY
+ || zend_is_auto_global_str(ZEND_STRL("_SERVER"))) {
+ http_user_agent = zend_hash_str_find(
+ Z_ARRVAL_P(&PG(http_globals)[TRACK_VARS_SERVER]),
+ "HTTP_USER_AGENT", sizeof("HTTP_USER_AGENT")-1);
+ }
+ if (http_user_agent == NULL) {
php_error_docref(NULL, E_WARNING, "HTTP_USER_AGENT variable is not set, cannot determine user agent name");
RETURN_FALSE;
}
- agent_name = Z_STRVAL_P(http_user_agent);
- agent_name_len = Z_STRLEN_P(http_user_agent);
+ agent_name = Z_STR_P(http_user_agent);
}
- lookup_browser_name = estrndup(agent_name, agent_name_len);
- php_strtolower(lookup_browser_name, agent_name_len);
-
- found_entry = zend_hash_str_find_ptr(bdata->htab, lookup_browser_name, agent_name_len);
+ lookup_browser_name = zend_string_tolower(agent_name);
+ found_entry = zend_hash_find_ptr(bdata->htab, lookup_browser_name);
if (found_entry == NULL) {
- zend_hash_apply_with_arguments(bdata->htab, browser_reg_compare, 3, lookup_browser_name, agent_name_len, &found_entry);
+ zend_hash_apply_with_arguments(bdata->htab, browser_reg_compare, 2, lookup_browser_name, &found_entry);
if (found_entry == NULL) {
found_entry = zend_hash_str_find_ptr(bdata->htab,
@@ -602,11 +729,9 @@ PHP_FUNCTION(get_browser)
object_and_properties_init(return_value, zend_standard_class_def, agent_ht);
}
- parent_agent_name_zv = zend_hash_str_find(agent_ht, "parent", sizeof("parent")-1);
- parent_agent_name = parent_agent_name_zv ? Z_STR_P(parent_agent_name_zv) : NULL;
-
- while (parent_agent_name) {
- if ((found_entry = zend_hash_find_ptr(bdata->htab, parent_agent_name)) == NULL) {
+ while (found_entry->parent) {
+ found_entry = zend_hash_find_ptr(bdata->htab, found_entry->parent);
+ if (found_entry == NULL) {
break;
}
@@ -617,14 +742,11 @@ PHP_FUNCTION(get_browser)
zend_hash_merge(Z_OBJPROP_P(return_value), agent_ht, (copy_ctor_func_t) browscap_zval_copy_ctor, 0);
}
- parent_agent_name_zv = zend_hash_str_find(agent_ht, "parent", sizeof("parent")-1);
- parent_agent_name = parent_agent_name_zv ? Z_STR_P(parent_agent_name_zv) : NULL;
-
zend_hash_destroy(agent_ht);
efree(agent_ht);
}
- efree(lookup_browser_name);
+ zend_string_release(lookup_browser_name);
}
/* }}} */