diff options
author | Dmitry Stogov <dmitry@zend.com> | 2021-02-19 15:42:21 +0300 |
---|---|---|
committer | Dmitry Stogov <dmitry@zend.com> | 2021-02-19 15:42:21 +0300 |
commit | 5e015425263c28d40fd49ee386135f02d0e76975 (patch) | |
tree | de733e8dd302ccdf11670eba201337455154ccbd /Zend | |
parent | be9200998fe276f87455f5b4e000d5dd28fd249d (diff) | |
download | php-git-5e015425263c28d40fd49ee386135f02d0e76975.tar.gz |
Improve basename(). Avoid calling mblen() for ASCII compatible locales.
Diffstat (limited to 'Zend')
-rw-r--r-- | Zend/zend_globals.h | 4 | ||||
-rw-r--r-- | Zend/zend_operators.c | 87 | ||||
-rw-r--r-- | Zend/zend_operators.h | 9 |
3 files changed, 88 insertions, 12 deletions
diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h index 825fad833c..e9b24fc0e3 100644 --- a/Zend/zend_globals.h +++ b/Zend/zend_globals.h @@ -95,6 +95,10 @@ struct _zend_compiler_globals { bool skip_shebang; bool increment_lineno; + bool variable_width_locale; /* UTF-8, Shift-JIS, Big5, ISO 2022, EUC, etc */ + bool ascii_compatible_locale; /* locale uses ASCII characters as singletons */ + /* and don't use them as lead/trail units */ + zend_string *doc_comment; uint32_t extra_fn_flags; diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index 0cdb3aa085..a23dad9e1e 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -30,12 +30,21 @@ #include "zend_exceptions.h" #include "zend_closures.h" +#include <locale.h> +#ifdef HAVE_LANGINFO_H +# include <langinfo.h> +#endif + #ifdef __SSE2__ #include <emmintrin.h> #endif +#if defined(ZEND_WIN32) && !defined(ZTS) && defined(_MSC_VER) +/* This performance improvement of tolower() on Windows gives 10-18% on bench.php */ +#define ZEND_USE_TOLOWER_L 1 +#endif + #ifdef ZEND_USE_TOLOWER_L -#include <locale.h> static _locale_t current_locale = NULL; /* this is true global! may lead to strange effects on ZTS, but so may setlocale() */ #define zend_tolower(c) _tolower_l(c, current_locale) @@ -2537,13 +2546,85 @@ ZEND_API bool ZEND_FASTCALL zend_object_is_true(zval *op) /* {{{ */ } /* }}} */ -#ifdef ZEND_USE_TOLOWER_L ZEND_API void zend_update_current_locale(void) /* {{{ */ { +#ifdef ZEND_USE_TOLOWER_L +# if defined(ZEND_WIN32) && defined(_MSC_VER) current_locale = _get_current_locale(); +# else + current_locale = uselocale(0); +# endif +#endif +#if defined(ZEND_WIN32) && defined(_MSC_VER) + if (MB_CUR_MAX > 1) { + unsigned int cp = ___lc_codepage_func(); + CG(variable_width_locale) = 1; + // TODO: EUC-* are also ASCII compatible ??? + CG(ascii_compatible_locale) = + cp == 65001; /* UTF-8 */ + } else { + CG(variable_width_locale) = 0; + CG(ascii_compatible_locale) = 1; + } +#elif defined(MB_CUR_MAX) + /* Check if current locale uses variable width encoding */ + if (MB_CUR_MAX > 1) { +#if HAVE_NL_LANGINFO + const char *charmap = nl_langinfo(CODESET); +#else + char buf[16]; + const char *charmap = NULL; + const char *locale = setlocale(LC_CTYPE, NULL); + + if (locale) { + const char *dot = strchr(locale, '.'); + const char *modifier; + + if (dot) { + dot++; + modifier = strchr(dot, '@'); + if (!modifier) { + charmap = dot; + } else if (modifier - dot < sizeof(buf)) { + memcpy(buf, dot, modifier - dot); + buf[modifier - dot] = '\0'; + charmap = buf; + } + } + } +#endif + CG(variable_width_locale) = 1; + CG(ascii_compatible_locale) = 0; + + if (charmap) { + size_t len = strlen(charmap); + static const char *ascii_compatible_charmaps[] = { + "utf-8", + "utf8", + // TODO: EUC-* are also ASCII compatible ??? + NULL + }; + const char **p; + /* Check if current locale is ASCII compatible */ + for (p = ascii_compatible_charmaps; *p; p++) { + if (zend_binary_strcasecmp(charmap, len, *p, strlen(*p)) == 0) { + CG(ascii_compatible_locale) = 1; + break; + } + } + } + + } else { + CG(variable_width_locale) = 0; + CG(ascii_compatible_locale) = 1; + } +#else + /* We can't determine current charset. Assume the worst case */ + CG(variable_width_locale) = 1; + CG(ascii_compatible_locale) = 0; +#endif } /* }}} */ -#endif static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str, size_t length) /* {{{ */ { unsigned char *p = (unsigned char*)str; diff --git a/Zend/zend_operators.h b/Zend/zend_operators.h index b3ad598b74..8996a3d959 100644 --- a/Zend/zend_operators.h +++ b/Zend/zend_operators.h @@ -450,16 +450,7 @@ ZEND_API zend_long ZEND_FASTCALL zend_atol(const char *str, size_t str_len); #define convert_to_object_ex(zv) convert_to_object(zv) #define convert_scalar_to_number_ex(zv) convert_scalar_to_number(zv) -#if defined(ZEND_WIN32) && !defined(ZTS) && defined(_MSC_VER) -/* This performance improvement of tolower() on Windows gives 10-18% on bench.php */ -#define ZEND_USE_TOLOWER_L 1 -#endif - -#ifdef ZEND_USE_TOLOWER_L ZEND_API void zend_update_current_locale(void); -#else -#define zend_update_current_locale() -#endif /* The offset in bytes between the value and type fields of a zval */ #define ZVAL_OFFSETOF_TYPE \ |