summaryrefslogtreecommitdiff
path: root/ext/mbstring/mbstring.c
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2018-10-29 19:51:26 +0100
committerNikita Popov <nikita.ppv@gmail.com>2018-10-29 20:29:22 +0100
commita7d6b2c1fb57bb77554d15d753cb44c69872e3cf (patch)
tree5b68c4f3b89e56c3939008a8491077d527c312e3 /ext/mbstring/mbstring.c
parentea2e67876a55ffce2d38bff920ce7afdc7aff847 (diff)
downloadphp-git-a7d6b2c1fb57bb77554d15d753cb44c69872e3cf.tar.gz
Use zend_string for mbstring last encoding cache
Saves us a string duplication, as well as case-insensitive string comparisons for the likely case of an interned string encoding.
Diffstat (limited to 'ext/mbstring/mbstring.c')
-rw-r--r--ext/mbstring/mbstring.c206
1 files changed, 92 insertions, 114 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index e86369cbeb..6f3e5c65e7 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -683,24 +683,25 @@ static const sapi_post_entry mbstr_post_entries[] = {
};
/* }}} */
-static const mbfl_encoding *php_mb_get_encoding(const char *encoding_name) {
+static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name) {
if (encoding_name) {
const mbfl_encoding *encoding;
- if (MBSTRG(last_used_encoding_name)
- && !strcasecmp(encoding_name, MBSTRG(last_used_encoding_name))) {
+ zend_string *last_encoding_name = MBSTRG(last_used_encoding_name);
+ if (last_encoding_name && (last_encoding_name == encoding_name
+ || !strcasecmp(ZSTR_VAL(encoding_name), ZSTR_VAL(last_encoding_name)))) {
return MBSTRG(last_used_encoding);
}
- encoding = mbfl_name2encoding(encoding_name);
+ encoding = mbfl_name2encoding(ZSTR_VAL(encoding_name));
if (!encoding) {
- php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding_name);
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", ZSTR_VAL(encoding_name));
return NULL;
}
- if (MBSTRG(last_used_encoding_name)) {
- efree(MBSTRG(last_used_encoding_name));
+ if (last_encoding_name) {
+ zend_string_release(last_encoding_name);
}
- MBSTRG(last_used_encoding_name) = estrdup(encoding_name);
+ MBSTRG(last_used_encoding_name) = zend_string_copy(encoding_name);
MBSTRG(last_used_encoding) = encoding;
return encoding;
} else {
@@ -1732,7 +1733,7 @@ PHP_RSHUTDOWN_FUNCTION(mbstring)
MBSTRG(http_input_identify_string) = NULL;
if (MBSTRG(last_used_encoding_name)) {
- efree(MBSTRG(last_used_encoding_name));
+ zend_string_release(MBSTRG(last_used_encoding_name));
MBSTRG(last_used_encoding_name) = NULL;
}
@@ -2288,15 +2289,16 @@ PHP_FUNCTION(mb_strlen)
{
size_t n;
mbfl_string string;
- char *str, *enc_name = NULL;
- size_t str_len, enc_name_len;
+ char *str;
+ size_t str_len;
+ zend_string *enc_name = NULL;
mbfl_string_init(&string);
ZEND_PARSE_PARAMETERS_START(1, 2)
Z_PARAM_STRING(str, str_len)
Z_PARAM_OPTIONAL
- Z_PARAM_STRING(enc_name, enc_name_len)
+ Z_PARAM_STR(enc_name)
ZEND_PARSE_PARAMETERS_END();
string.val = (unsigned char *) str;
@@ -2323,13 +2325,13 @@ PHP_FUNCTION(mb_strpos)
int reverse = 0;
zend_long offset = 0;
mbfl_string haystack, needle;
- char *enc_name = NULL;
- size_t enc_name_len, n;
+ zend_string *enc_name = NULL;
+ size_t n;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name) == FAILURE) {
return;
}
@@ -2385,54 +2387,39 @@ PHP_FUNCTION(mb_strpos)
PHP_FUNCTION(mb_strrpos)
{
mbfl_string haystack, needle;
- char *enc_name = NULL;
- size_t enc_name_len;
+ zend_string *enc_name = NULL;
zval *zoffset = NULL;
- zend_long offset = 0, str_flg, n;
- char *enc_name2 = NULL;
- size_t enc_name_len2;
+ zend_long offset = 0, n;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name) == FAILURE) {
return;
}
if (zoffset) {
if (Z_TYPE_P(zoffset) == IS_STRING) {
- enc_name2 = Z_STRVAL_P(zoffset);
- enc_name_len2 = Z_STRLEN_P(zoffset);
- str_flg = 1;
-
- if (enc_name2 != NULL) {
- switch (*enc_name2) {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- case ' ':
- case '-':
- case '.':
- break;
- default :
- str_flg = 0;
- break;
- }
- }
-
- if (str_flg) {
- convert_to_long_ex(zoffset);
- offset = Z_LVAL_P(zoffset);
- } else {
- enc_name = enc_name2;
- enc_name_len = enc_name_len2;
+ switch (Z_STRVAL_P(zoffset)[0]) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ case ' ':
+ case '-':
+ case '.':
+ convert_to_long_ex(zoffset);
+ offset = Z_LVAL_P(zoffset);
+ break;
+ default :
+ enc_name = Z_STR_P(zoffset);
+ break;
}
} else {
convert_to_long_ex(zoffset);
@@ -2471,10 +2458,9 @@ PHP_FUNCTION(mb_stripos)
size_t n = (size_t) -1;
zend_long offset = 0;
mbfl_string haystack, needle;
- const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
- size_t from_encoding_len;
+ zend_string *from_encoding = NULL;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding) == FAILURE) {
return;
}
@@ -2500,10 +2486,10 @@ PHP_FUNCTION(mb_strripos)
size_t n = (size_t) -1;
zend_long offset = 0;
mbfl_string haystack, needle;
- const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
+ zend_string *from_encoding = NULL;
size_t from_encoding_len;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding) == FAILURE) {
return;
}
@@ -2523,14 +2509,13 @@ PHP_FUNCTION(mb_strstr)
{
size_t n;
mbfl_string haystack, needle, result, *ret = NULL;
- char *enc_name = NULL;
- size_t enc_name_len;
+ zend_string *enc_name = NULL;
zend_bool part = 0;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name) == FAILURE) {
return;
}
@@ -2578,14 +2563,13 @@ PHP_FUNCTION(mb_strrchr)
{
size_t n;
mbfl_string haystack, needle, result, *ret = NULL;
- char *enc_name = NULL;
- size_t enc_name_len;
+ zend_string *enc_name = NULL;
zend_bool part = 0;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name) == FAILURE) {
return;
}
@@ -2636,11 +2620,11 @@ PHP_FUNCTION(mb_stristr)
zend_bool part = 0;
size_t from_encoding_len, n;
mbfl_string haystack, needle, result, *ret = NULL;
- const char *from_encoding = NULL;
+ zend_string *from_encoding = NULL;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding) == FAILURE) {
return;
}
@@ -2688,13 +2672,12 @@ PHP_FUNCTION(mb_strrichr)
{
zend_bool part = 0;
size_t n;
- size_t from_encoding_len;
mbfl_string haystack, needle, result, *ret = NULL;
- const char *from_encoding = NULL;
+ zend_string *from_encoding = NULL;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding) == FAILURE) {
return;
}
@@ -2737,13 +2720,12 @@ PHP_FUNCTION(mb_substr_count)
{
size_t n;
mbfl_string haystack, needle;
- char *enc_name = NULL;
- size_t enc_name_len;
+ zend_string *enc_name = NULL;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|S", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name) == FAILURE) {
return;
}
@@ -2771,14 +2753,15 @@ PHP_FUNCTION(mb_substr_count)
Returns part of a string */
PHP_FUNCTION(mb_substr)
{
- char *str, *encoding = NULL;
+ char *str;
+ zend_string *encoding;
zend_long from, len;
size_t mblen, real_from, real_len;
- size_t str_len, encoding_len;
+ size_t str_len;
zend_bool len_is_null = 1;
mbfl_string string, result, *ret;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", &str, &str_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!S", &str, &str_len, &from, &len, &len_is_null, &encoding) == FAILURE) {
return;
}
@@ -2842,15 +2825,14 @@ PHP_FUNCTION(mb_substr)
Returns part of a string */
PHP_FUNCTION(mb_strcut)
{
- char *encoding = NULL;
+ zend_string *encoding = NULL;
zend_long from, len;
- size_t encoding_len;
zend_bool len_is_null = 1;
mbfl_string string, result, *ret;
mbfl_string_init(&string);
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", (char **)&string.val, &string.len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!S", (char **)&string.val, &string.len, &from, &len, &len_is_null, &encoding) == FAILURE) {
return;
}
@@ -2905,12 +2887,11 @@ PHP_FUNCTION(mb_strwidth)
{
size_t n;
mbfl_string string;
- char *enc_name = NULL;
- size_t enc_name_len;
+ zend_string *enc_name = NULL;
mbfl_string_init(&string);
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S", (char **)&string.val, &string.len, &enc_name) == FAILURE) {
return;
}
@@ -2933,12 +2914,13 @@ PHP_FUNCTION(mb_strwidth)
Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)
{
- char *str, *trimmarker = NULL, *encoding = NULL;
+ char *str, *trimmarker = NULL;
+ zend_string *encoding = NULL;
zend_long from, width, swidth;
- size_t str_len, trimmarker_len, encoding_len;
+ size_t str_len, trimmarker_len;
mbfl_string string, result, marker, *ret;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|sS", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding) == FAILURE) {
return;
}
@@ -3268,17 +3250,17 @@ static char *mbstring_convert_case(
Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)
{
- const char *from_encoding = NULL;
+ zend_string *from_encoding = NULL;
char *str;
- size_t str_len, from_encoding_len;
+ size_t str_len;
zend_long case_mode = 0;
char *newstr;
size_t ret_len;
const mbfl_encoding *enc;
RETVAL_FALSE;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|s!", &str, &str_len,
- &case_mode, &from_encoding, &from_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|S!", &str, &str_len,
+ &case_mode, &from_encoding) == FAILURE) {
return;
}
@@ -3307,15 +3289,15 @@ PHP_FUNCTION(mb_convert_case)
*/
PHP_FUNCTION(mb_strtoupper)
{
- const char *from_encoding = NULL;
+ zend_string *from_encoding = NULL;
char *str;
- size_t str_len, from_encoding_len;
+ size_t str_len;
char *newstr;
size_t ret_len;
const mbfl_encoding *enc;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
- &from_encoding, &from_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!", &str, &str_len,
+ &from_encoding) == FAILURE) {
return;
}
@@ -3341,15 +3323,15 @@ PHP_FUNCTION(mb_strtoupper)
*/
PHP_FUNCTION(mb_strtolower)
{
- const char *from_encoding = NULL;
+ zend_string *from_encoding = NULL;
char *str;
- size_t str_len, from_encoding_len;
+ size_t str_len;
char *newstr;
size_t ret_len;
const mbfl_encoding *enc;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
- &from_encoding, &from_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!", &str, &str_len,
+ &from_encoding) == FAILURE) {
return;
}
@@ -3589,12 +3571,11 @@ PHP_FUNCTION(mb_convert_kana)
mbfl_string string, result, *ret;
char *optstr = NULL;
size_t optstr_len;
- char *encname = NULL;
- size_t encname_len;
+ zend_string *encname = NULL;
mbfl_string_init(&string);
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sS", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname) == FAILURE) {
return;
}
@@ -4824,7 +4805,7 @@ PHP_FUNCTION(mb_check_encoding)
/* }}} */
-static inline zend_long php_mb_ord(const char* str, size_t str_len, const char* enc_name)
+static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string *enc_name)
{
const mbfl_encoding *enc;
enum mbfl_no_encoding no_enc;
@@ -4836,7 +4817,7 @@ static inline zend_long php_mb_ord(const char* str, size_t str_len, const char*
no_enc = enc->no_encoding;
if (php_mb_is_unsupported_no_encoding(no_enc)) {
- php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc_name);
+ php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", ZSTR_VAL(enc_name));
return -1;
}
@@ -4879,16 +4860,15 @@ static inline zend_long php_mb_ord(const char* str, size_t str_len, const char*
/* {{{ proto int|false mb_ord([string str[, string encoding]]) */
PHP_FUNCTION(mb_ord)
{
- char* str;
+ char *str;
size_t str_len;
- char* enc = NULL;
- size_t enc_len;
+ zend_string *enc = NULL;
zend_long cp;
ZEND_PARSE_PARAMETERS_START(1, 2)
Z_PARAM_STRING(str, str_len)
Z_PARAM_OPTIONAL
- Z_PARAM_STRING(enc, enc_len)
+ Z_PARAM_STR(enc)
ZEND_PARSE_PARAMETERS_END();
cp = php_mb_ord(str, str_len, enc);
@@ -4902,7 +4882,7 @@ PHP_FUNCTION(mb_ord)
/* }}} */
-static inline zend_string *php_mb_chr(zend_long cp, const char *enc_name)
+static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name)
{
const mbfl_encoding *enc;
enum mbfl_no_encoding no_enc;
@@ -4917,7 +4897,7 @@ static inline zend_string *php_mb_chr(zend_long cp, const char *enc_name)
no_enc = enc->no_encoding;
if (php_mb_is_unsupported_no_encoding(no_enc)) {
- php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc_name);
+ php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", ZSTR_VAL(enc_name));
return NULL;
}
@@ -4990,14 +4970,13 @@ static inline zend_string *php_mb_chr(zend_long cp, const char *enc_name)
PHP_FUNCTION(mb_chr)
{
zend_long cp;
- char* enc = NULL;
- size_t enc_len;
+ zend_string *enc = NULL;
zend_string* ret;
ZEND_PARSE_PARAMETERS_START(1, 2)
Z_PARAM_LONG(cp)
Z_PARAM_OPTIONAL
- Z_PARAM_STRING(enc, enc_len)
+ Z_PARAM_STR(enc)
ZEND_PARSE_PARAMETERS_END();
ret = php_mb_chr(cp, enc);
@@ -5024,14 +5003,13 @@ PHP_FUNCTION(mb_scrub)
const mbfl_encoding *enc;
char* str;
size_t str_len;
- char *enc_name = NULL;
- size_t enc_name_len;
+ zend_string *enc_name = NULL;
char *ret;
ZEND_PARSE_PARAMETERS_START(1, 2)
Z_PARAM_STRING(str, str_len)
Z_PARAM_OPTIONAL
- Z_PARAM_STRING(enc_name, enc_name_len)
+ Z_PARAM_STR(enc_name)
ZEND_PARSE_PARAMETERS_END();
enc = php_mb_get_encoding(enc_name);
@@ -5161,7 +5139,7 @@ MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nby
/* {{{ MBSTRING_API int php_mb_stripos()
*/
-MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, const char *from_encoding)
+MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, zend_string *from_encoding)
{
size_t n = (size_t) -1;
mbfl_string haystack, needle;