summaryrefslogtreecommitdiff
path: root/Zend
diff options
context:
space:
mode:
authorDmitry Stogov <dmitry@zend.com>2021-02-19 15:42:21 +0300
committerDmitry Stogov <dmitry@zend.com>2021-02-19 15:42:21 +0300
commit5e015425263c28d40fd49ee386135f02d0e76975 (patch)
treede733e8dd302ccdf11670eba201337455154ccbd /Zend
parentbe9200998fe276f87455f5b4e000d5dd28fd249d (diff)
downloadphp-git-5e015425263c28d40fd49ee386135f02d0e76975.tar.gz
Improve basename(). Avoid calling mblen() for ASCII compatible locales.
Diffstat (limited to 'Zend')
-rw-r--r--Zend/zend_globals.h4
-rw-r--r--Zend/zend_operators.c87
-rw-r--r--Zend/zend_operators.h9
3 files changed, 88 insertions, 12 deletions
diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h
index 825fad833c..e9b24fc0e3 100644
--- a/Zend/zend_globals.h
+++ b/Zend/zend_globals.h
@@ -95,6 +95,10 @@ struct _zend_compiler_globals {
bool skip_shebang;
bool increment_lineno;
+ bool variable_width_locale; /* UTF-8, Shift-JIS, Big5, ISO 2022, EUC, etc */
+ bool ascii_compatible_locale; /* locale uses ASCII characters as singletons */
+ /* and don't use them as lead/trail units */
+
zend_string *doc_comment;
uint32_t extra_fn_flags;
diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c
index 0cdb3aa085..a23dad9e1e 100644
--- a/Zend/zend_operators.c
+++ b/Zend/zend_operators.c
@@ -30,12 +30,21 @@
#include "zend_exceptions.h"
#include "zend_closures.h"
+#include <locale.h>
+#ifdef HAVE_LANGINFO_H
+# include <langinfo.h>
+#endif
+
#ifdef __SSE2__
#include <emmintrin.h>
#endif
+#if defined(ZEND_WIN32) && !defined(ZTS) && defined(_MSC_VER)
+/* This performance improvement of tolower() on Windows gives 10-18% on bench.php */
+#define ZEND_USE_TOLOWER_L 1
+#endif
+
#ifdef ZEND_USE_TOLOWER_L
-#include <locale.h>
static _locale_t current_locale = NULL;
/* this is true global! may lead to strange effects on ZTS, but so may setlocale() */
#define zend_tolower(c) _tolower_l(c, current_locale)
@@ -2537,13 +2546,85 @@ ZEND_API bool ZEND_FASTCALL zend_object_is_true(zval *op) /* {{{ */
}
/* }}} */
-#ifdef ZEND_USE_TOLOWER_L
ZEND_API void zend_update_current_locale(void) /* {{{ */
{
+#ifdef ZEND_USE_TOLOWER_L
+# if defined(ZEND_WIN32) && defined(_MSC_VER)
current_locale = _get_current_locale();
+# else
+ current_locale = uselocale(0);
+# endif
+#endif
+#if defined(ZEND_WIN32) && defined(_MSC_VER)
+ if (MB_CUR_MAX > 1) {
+ unsigned int cp = ___lc_codepage_func();
+ CG(variable_width_locale) = 1;
+ // TODO: EUC-* are also ASCII compatible ???
+ CG(ascii_compatible_locale) =
+ cp == 65001; /* UTF-8 */
+ } else {
+ CG(variable_width_locale) = 0;
+ CG(ascii_compatible_locale) = 1;
+ }
+#elif defined(MB_CUR_MAX)
+ /* Check if current locale uses variable width encoding */
+ if (MB_CUR_MAX > 1) {
+#if HAVE_NL_LANGINFO
+ const char *charmap = nl_langinfo(CODESET);
+#else
+ char buf[16];
+ const char *charmap = NULL;
+ const char *locale = setlocale(LC_CTYPE, NULL);
+
+ if (locale) {
+ const char *dot = strchr(locale, '.');
+ const char *modifier;
+
+ if (dot) {
+ dot++;
+ modifier = strchr(dot, '@');
+ if (!modifier) {
+ charmap = dot;
+ } else if (modifier - dot < sizeof(buf)) {
+ memcpy(buf, dot, modifier - dot);
+ buf[modifier - dot] = '\0';
+ charmap = buf;
+ }
+ }
+ }
+#endif
+ CG(variable_width_locale) = 1;
+ CG(ascii_compatible_locale) = 0;
+
+ if (charmap) {
+ size_t len = strlen(charmap);
+ static const char *ascii_compatible_charmaps[] = {
+ "utf-8",
+ "utf8",
+ // TODO: EUC-* are also ASCII compatible ???
+ NULL
+ };
+ const char **p;
+ /* Check if current locale is ASCII compatible */
+ for (p = ascii_compatible_charmaps; *p; p++) {
+ if (zend_binary_strcasecmp(charmap, len, *p, strlen(*p)) == 0) {
+ CG(ascii_compatible_locale) = 1;
+ break;
+ }
+ }
+ }
+
+ } else {
+ CG(variable_width_locale) = 0;
+ CG(ascii_compatible_locale) = 1;
+ }
+#else
+ /* We can't determine current charset. Assume the worst case */
+ CG(variable_width_locale) = 1;
+ CG(ascii_compatible_locale) = 0;
+#endif
}
/* }}} */
-#endif
static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str, size_t length) /* {{{ */ {
unsigned char *p = (unsigned char*)str;
diff --git a/Zend/zend_operators.h b/Zend/zend_operators.h
index b3ad598b74..8996a3d959 100644
--- a/Zend/zend_operators.h
+++ b/Zend/zend_operators.h
@@ -450,16 +450,7 @@ ZEND_API zend_long ZEND_FASTCALL zend_atol(const char *str, size_t str_len);
#define convert_to_object_ex(zv) convert_to_object(zv)
#define convert_scalar_to_number_ex(zv) convert_scalar_to_number(zv)
-#if defined(ZEND_WIN32) && !defined(ZTS) && defined(_MSC_VER)
-/* This performance improvement of tolower() on Windows gives 10-18% on bench.php */
-#define ZEND_USE_TOLOWER_L 1
-#endif
-
-#ifdef ZEND_USE_TOLOWER_L
ZEND_API void zend_update_current_locale(void);
-#else
-#define zend_update_current_locale()
-#endif
/* The offset in bytes between the value and type fields of a zval */
#define ZVAL_OFFSETOF_TYPE \