summaryrefslogtreecommitdiff
path: root/ext/mbstring/mbstring.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/mbstring.c')
-rw-r--r--ext/mbstring/mbstring.c199
1 files changed, 96 insertions, 103 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index e7e5c2abe5..c73b9d8f93 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -151,9 +151,16 @@ static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
+ mbfl_no_encoding_cp1254,
mbfl_no_encoding_8859_9
};
+static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
+ mbfl_no_encoding_ascii,
+ mbfl_no_encoding_utf8,
+ mbfl_no_encoding_koi8u
+};
+
static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8
@@ -168,6 +175,7 @@ static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
{ mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
{ mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
{ mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
+ { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
{ mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
};
@@ -360,7 +368,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
ZEND_ARG_INFO(0, strict)
ZEND_END_ARG_INFO()
-ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_list_encodings, 0, 0, 0)
+ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
@@ -1035,75 +1043,72 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output)
int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
{
enum mbfl_no_encoding no_encoding;
- const char *enc_name = NULL;
- uint enc_name_len = 0;
-
- no_encoding = new_value ? mbfl_name2no_encoding(new_value):
- mbfl_no_encoding_invalid;
+ const char *enc_name = NULL;
+ uint enc_name_len = 0;
+
+ no_encoding = new_value ? mbfl_name2no_encoding(new_value):
+ mbfl_no_encoding_invalid;
if (no_encoding != mbfl_no_encoding_invalid) {
- enc_name = new_value;
- enc_name_len = new_value_length;
- } else {
- switch (MBSTRG(language)) {
- case mbfl_no_language_uni:
- enc_name = "UTF-8";
- enc_name_len = sizeof("UTF-8") - 1;
- break;
- case mbfl_no_language_japanese:
- enc_name = "EUC-JP";
- enc_name_len = sizeof("EUC-JP") - 1;
- break;
- case mbfl_no_language_korean:
- enc_name = "EUC-KR";
- enc_name_len = sizeof("EUC-KR") - 1;
- break;
- case mbfl_no_language_simplified_chinese:
- enc_name = "EUC-CN";
- enc_name_len = sizeof("EUC-CN") - 1;
- break;
- case mbfl_no_language_traditional_chinese:
- enc_name = "EUC-TW";
- enc_name_len = sizeof("EUC-TW") - 1;
- break;
- case mbfl_no_language_russian:
- enc_name = "KOI8-R";
- enc_name_len = sizeof("KOI8-R") - 1;
- break;
- case mbfl_no_language_german:
- enc_name = "ISO-8859-15";
- enc_name_len = sizeof("ISO-8859-15") - 1;
- break;
- case mbfl_no_language_armenian:
- enc_name = "ArmSCII-8";
- enc_name_len = sizeof("ArmSCII-8") - 1;
- break;
- case mbfl_no_language_turkish:
- enc_name = "ISO-8859-9";
- enc_name_len = sizeof("ISO-8859-9") - 1;
- break;
- default:
- enc_name = "ISO-8859-1";
- enc_name_len = sizeof("ISO-8859-1") - 1;
- break;
- }
- no_encoding = mbfl_name2no_encoding(enc_name);
- }
- MBSTRG(internal_encoding) = no_encoding;
- MBSTRG(current_internal_encoding) = no_encoding;
+ enc_name = new_value;
+ enc_name_len = new_value_length;
+ } else {
+ switch (MBSTRG(language)) {
+ case mbfl_no_language_uni:
+ enc_name = "UTF-8";
+ enc_name_len = sizeof("UTF-8") - 1;
+ break;
+ case mbfl_no_language_japanese:
+ enc_name = "EUC-JP";
+ enc_name_len = sizeof("EUC-JP") - 1;
+ break;
+ case mbfl_no_language_korean:
+ enc_name = "EUC-KR";
+ enc_name_len = sizeof("EUC-KR") - 1;
+ break;
+ case mbfl_no_language_simplified_chinese:
+ enc_name = "EUC-CN";
+ enc_name_len = sizeof("EUC-CN") - 1;
+ break;
+ case mbfl_no_language_traditional_chinese:
+ enc_name = "EUC-TW";
+ enc_name_len = sizeof("EUC-TW") - 1;
+ break;
+ case mbfl_no_language_russian:
+ enc_name = "KOI8-R";
+ enc_name_len = sizeof("KOI8-R") - 1;
+ break;
+ case mbfl_no_language_german:
+ enc_name = "ISO-8859-15";
+ enc_name_len = sizeof("ISO-8859-15") - 1;
+ break;
+ case mbfl_no_language_armenian:
+ enc_name = "ArmSCII-8";
+ enc_name_len = sizeof("ArmSCII-8") - 1;
+ break;
+ case mbfl_no_language_turkish:
+ enc_name = "ISO-8859-9";
+ enc_name_len = sizeof("ISO-8859-9") - 1;
+ break;
+ default:
+ enc_name = "ISO-8859-1";
+ enc_name_len = sizeof("ISO-8859-1") - 1;
+ break;
+ }
+ no_encoding = mbfl_name2no_encoding(enc_name);
+ }
+ MBSTRG(internal_encoding) = no_encoding;
+ MBSTRG(current_internal_encoding) = no_encoding;
#if HAVE_MBREGEX
{
- const char *_enc_name = enc_name;
- if (FAILURE == php_mb_regex_set_default_mbctype(_enc_name TSRMLS_CC)) {
+ const char *enc_name = new_value;
+ if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
/* falls back to EUC-JP if an unknown encoding name is given */
- _enc_name = "EUC-JP";
- php_mb_regex_set_default_mbctype(_enc_name TSRMLS_CC);
+ enc_name = "EUC-JP";
+ php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
}
- php_mb_regex_set_mbctype(_enc_name TSRMLS_CC);
+ php_mb_regex_set_mbctype(new_value TSRMLS_CC);
}
#endif
-#ifdef ZEND_MULTIBYTE
- zend_multibyte_set_internal_encoding(new_value, new_value_length TSRMLS_CC);
-#endif /* ZEND_MULTIBYTE */
return SUCCESS;
}
/* }}} */
@@ -1253,28 +1258,22 @@ PHP_INI_BEGIN()
PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
#endif /* ZEND_MULTIBYTE */
PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
- STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
- PHP_INI_SYSTEM | PHP_INI_PERDIR,
- OnUpdateLong,
- func_overload,
- zend_mbstring_globals, mbstring_globals)
-
+ STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
+ PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
+
STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
PHP_INI_SYSTEM | PHP_INI_PERDIR,
OnUpdate_mbstring_encoding_translation,
- encoding_translation,
- zend_mbstring_globals, mbstring_globals)
-
+ encoding_translation, zend_mbstring_globals, mbstring_globals)
PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
- "^(text/|application/xhtml\\+xml)",
- PHP_INI_ALL,
- OnUpdate_mbstring_http_output_conv_mimetypes)
+ "^(text/|application/xhtml\\+xml)",
+ PHP_INI_ALL,
+ OnUpdate_mbstring_http_output_conv_mimetypes)
STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
PHP_INI_ALL,
OnUpdateLong,
- strict_detection,
- zend_mbstring_globals, mbstring_globals)
+ strict_detection, zend_mbstring_globals, mbstring_globals)
PHP_INI_END()
/* }}} */
@@ -1333,11 +1332,9 @@ static PHP_GSHUTDOWN_FUNCTION(mbstring)
if (mbstring_globals->detect_order_list) {
free(mbstring_globals->detect_order_list);
}
-
if (mbstring_globals->http_output_conv_mimetypes) {
_php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
}
-
#if HAVE_MBREGEX
php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
#endif
@@ -1450,6 +1447,7 @@ PHP_RINIT_FUNCTION(mbstring)
PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
#ifdef ZEND_MULTIBYTE
+ zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC);
php_mb_set_zend_encoding(TSRMLS_C);
#endif /* ZEND_MULTIBYTE */
@@ -1578,7 +1576,7 @@ PHP_FUNCTION(mb_internal_encoding)
#ifdef ZEND_MULTIBYTE
/* TODO: make independent from mbstring.encoding_translation? */
if (MBSTRG(encoding_translation)) {
- zend_multibyte_set_internal_encoding(name, name_len TSRMLS_CC);
+ zend_multibyte_set_internal_encoding(name TSRMLS_CC);
}
#endif /* ZEND_MULTIBYTE */
RETURN_TRUE;
@@ -3071,18 +3069,13 @@ PHP_FUNCTION(mb_detect_encoding)
/* }}} */
/* {{{ proto mixed mb_list_encodings()
- Returns an array of all supported entity encodings or Returns the entity encoding as a string */
+ Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)
{
const mbfl_encoding **encodings;
const mbfl_encoding *encoding;
int i;
- if (ZEND_NUM_ARGS() != 0) {
- RETVAL_FALSE;
- ZEND_WRONG_PARAM_COUNT();
- }
-
array_init(return_value);
i = 0;
encodings = mbfl_get_supported_encodings();
@@ -3319,8 +3312,8 @@ PHP_FUNCTION(mb_convert_variables)
int n, to_enc_len, argc, stack_level, stack_max, elistsz;
enum mbfl_no_encoding *elist;
char *name, *to_enc;
- void *ptmp;
-
+ void *ptmp;
+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
return;
}
@@ -3485,7 +3478,7 @@ detect_end:
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
if (ret != NULL) {
if (Z_REFCOUNT_PP(hash_entry) > 1) {
- Z_DELREF_P(*hash_entry);
+ Z_DELREF_PP(hash_entry);
MAKE_STD_ZVAL(*hash_entry);
} else {
zval_dtor(*hash_entry);
@@ -3864,7 +3857,7 @@ PHP_FUNCTION(mb_send_mail)
smart_str *s;
extern void mbfl_memory_device_unput(mbfl_memory_device *device);
char *pp, *ee;
-
+
/* initialize */
mbfl_memory_device_init(&device, 0, 0);
mbfl_string_init(&orig_str);
@@ -4501,8 +4494,7 @@ MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
/* }}} */
/* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
-MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from
- TSRMLS_DC)
+MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC)
{
int i;
mbfl_string string, result, *ret = NULL;
@@ -4722,8 +4714,9 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int
/* }}} */
#ifdef ZEND_MULTIBYTE
-/* {{{ MBSTRING_API int php_mb_set_zend_encoding() */
-MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D)
+
+/* {{{ php_mb_set_zend_encoding() */
+static int php_mb_set_zend_encoding(TSRMLS_D)
{
/* 'd better use mbfl_memory_device? */
char *name, *list = NULL;
@@ -4763,7 +4756,7 @@ MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D)
if (MBSTRG(encoding_translation)) {
/* notify internal encoding to Zend Engine */
name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
- zend_multibyte_set_internal_encoding(name, strlen(name) TSRMLS_CC);
+ zend_multibyte_set_internal_encoding(name TSRMLS_CC);
}
zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
@@ -4775,7 +4768,7 @@ MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D)
/* {{{ char *php_mb_encoding_detector()
* Interface for Zend Engine
*/
-char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg_list TSRMLS_DC)
+static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC)
{
mbfl_string string;
const char *ret;
@@ -4798,7 +4791,7 @@ char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.val = (char*)arg_string;
+ string.val = (unsigned char *)arg_string;
string.len = arg_length;
ret = mbfl_identify_encoding_name(&string, elist, size, 0);
if (list != NULL) {
@@ -4813,9 +4806,9 @@ char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg
/* }}} */
/* {{{ int php_mb_encoding_converter() */
-int php_mb_encoding_converter(char **to, int *to_length, const char *from,
- int from_length, const char *encoding_to, const char *encoding_from
- TSRMLS_DC)
+static int php_mb_encoding_converter(unsigned char **to, size_t *to_length,
+ const unsigned char *from, size_t from_length,
+ const char *encoding_to, const char *encoding_from TSRMLS_DC)
{
mbfl_string string, result, *ret;
enum mbfl_no_encoding from_encoding, to_encoding;
@@ -4836,7 +4829,7 @@ int php_mb_encoding_converter(char **to, int *to_length, const char *from,
mbfl_string_init(&result);
string.no_encoding = from_encoding;
string.no_language = MBSTRG(language);
- string.val = (char*)from;
+ string.val = (unsigned char*)from;
string.len = from_length;
/* initialize converter */
@@ -4865,14 +4858,14 @@ int php_mb_encoding_converter(char **to, int *to_length, const char *from,
* returns number of odd (e.g. appears only first byte of multibyte
* character) chars
*/
-int php_mb_oddlen(const char *string, int length, const char *encoding TSRMLS_DC)
+static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
{
mbfl_string mb_string;
mbfl_string_init(&mb_string);
mb_string.no_language = MBSTRG(language);
mb_string.no_encoding = mbfl_name2no_encoding(encoding);
- mb_string.val = (char*)string;
+ mb_string.val = (unsigned char *)string;
mb_string.len = length;
if (mb_string.no_encoding == mbfl_no_encoding_invalid) {