summaryrefslogtreecommitdiff
path: root/ext/mbstring/mbstring.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/mbstring.c')
-rw-r--r--ext/mbstring/mbstring.c1038
1 files changed, 437 insertions, 601 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index d4119dda97..7013ebc0da 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -62,6 +62,7 @@
#include "ext/standard/info.h"
#include "libmbfl/mbfl/mbfl_allocators.h"
+#include "libmbfl/mbfl/mbfilter_pass.h"
#include "php_variables.h"
#include "php_globals.h"
@@ -96,18 +97,15 @@ ZEND_DECLARE_MODULE_GLOBALS(mbstring)
static PHP_GINIT_FUNCTION(mbstring);
static PHP_GSHUTDOWN_FUNCTION(mbstring);
-static const char* php_mb_internal_encoding_name(TSRMLS_D);
-static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC);
-static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC);
-static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC);
-static int php_mb_set_zend_encoding(TSRMLS_D);
+static void php_mb_populate_current_detect_order_list(TSRMLS_D);
+
/* }}} */
/* {{{ php_mb_default_identify_list */
typedef struct _php_mb_nls_ident_list {
enum mbfl_no_language lang;
- const enum mbfl_no_encoding* list;
- int list_size;
+ const enum mbfl_no_encoding *list;
+ size_t list_size;
} php_mb_nls_ident_list;
static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
@@ -650,12 +648,12 @@ static sapi_post_entry mbstr_post_entries[] = {
* of parsed encodings.
*/
static int
-php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
+php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
{
- int n, l, size, bauto, ret = 1;
+ int size, bauto, ret = SUCCESS;
+ size_t n;
char *p, *p1, *p2, *endp, *tmpstr;
- enum mbfl_no_encoding no_encoding;
- enum mbfl_no_encoding *src, *entry, *list;
+ const mbfl_encoding **entry, **list;
list = NULL;
if (value == NULL || value_length <= 0) {
@@ -665,14 +663,8 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc
if (return_size) {
*return_size = 0;
}
- return 0;
+ return FAILURE;
} else {
- enum mbfl_no_encoding *identify_list;
- int identify_list_size;
-
- identify_list = MBSTRG(default_detect_order_list);
- identify_list_size = MBSTRG(default_detect_order_list_size);
-
/* copy the value string for work */
if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
tmpstr = (char *)estrndup(value+1, value_length-2);
@@ -681,7 +673,7 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc
else
tmpstr = (char *)estrndup(value, value_length);
if (tmpstr == NULL) {
- return 0;
+ return FAILURE;
}
/* count the number of listed encoding names */
endp = tmpstr + value_length;
@@ -691,9 +683,9 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc
p1 = p2 + 1;
n++;
}
- size = n + identify_list_size;
+ size = n + MBSTRG(default_detect_order_list_size);
/* make list */
- list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
+ list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
if (list != NULL) {
entry = list;
n = 0;
@@ -717,19 +709,19 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc
/* convert to the encoding number and check encoding */
if (strcasecmp(p1, "auto") == 0) {
if (!bauto) {
+ const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
+ const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
+ size_t i;
bauto = 1;
- l = identify_list_size;
- src = identify_list;
- while (l > 0) {
- *entry++ = *src++;
- l--;
+ for (i = 0; i < identify_list_size; i++) {
+ *entry++ = mbfl_no2encoding(*src++);
n++;
}
}
} else {
- no_encoding = mbfl_name2no_encoding(p1);
- if (no_encoding != mbfl_no_encoding_invalid) {
- *entry++ = no_encoding;
+ const mbfl_encoding *encoding = mbfl_name2encoding(p1);
+ if (encoding) {
+ *entry++ = encoding;
n++;
} else {
ret = 0;
@@ -769,40 +761,26 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc
}
/* }}} */
-/* {{{ MBSTRING_API php_mb_check_encoding_list */
-MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC)
-{
- return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
-}
-/* }}} */
-
/* {{{ static int php_mb_parse_encoding_array()
* Return 0 if input contains any illegal encoding, otherwise 1.
* Even if any illegal encoding is detected the result may contain a list
* of parsed encodings.
*/
static int
-php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
+php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
{
zval **hash_entry;
HashTable *target_hash;
- int i, n, l, size, bauto,ret = 1;
- enum mbfl_no_encoding no_encoding;
- enum mbfl_no_encoding *src, *list, *entry;
+ int i, n, size, bauto, ret = SUCCESS;
+ const mbfl_encoding **list, **entry;
list = NULL;
if (Z_TYPE_P(array) == IS_ARRAY) {
- enum mbfl_no_encoding *identify_list;
- int identify_list_size;
-
- identify_list = MBSTRG(default_detect_order_list);
- identify_list_size = MBSTRG(default_detect_order_list_size);
-
target_hash = Z_ARRVAL_P(array);
zend_hash_internal_pointer_reset(target_hash);
i = zend_hash_num_elements(target_hash);
- size = i + identify_list_size;
- list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
+ size = i + MBSTRG(default_detect_order_list_size);
+ list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
if (list != NULL) {
entry = list;
bauto = 0;
@@ -814,22 +792,23 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in
convert_to_string_ex(hash_entry);
if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
if (!bauto) {
+ const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
+ const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
+ size_t j;
+
bauto = 1;
- l = identify_list_size;
- src = identify_list;
- while (l > 0) {
- *entry++ = *src++;
- l--;
+ for (j = 0; j < identify_list_size; j++) {
+ *entry++ = mbfl_no2encoding(*src++);
n++;
}
}
} else {
- no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
- if (no_encoding != mbfl_no_encoding_invalid) {
- *entry++ = no_encoding;
+ const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_PP(hash_entry));
+ if (encoding) {
+ *entry++ = encoding;
n++;
} else {
- ret = 0;
+ ret = FAILURE;
}
}
zend_hash_move_forward(target_hash);
@@ -846,7 +825,7 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in
if (return_list) {
*return_list = NULL;
}
- ret = 0;
+ ret = FAILURE;
}
if (return_size) {
*return_size = n;
@@ -858,7 +837,7 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in
if (return_size) {
*return_size = 0;
}
- ret = 0;
+ ret = FAILURE;
}
}
@@ -866,6 +845,118 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in
}
/* }}} */
+/* {{{ zend_multibyte interface */
+static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC)
+{
+ return (const zend_encoding*)mbfl_name2encoding(encoding_name);
+}
+
+static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
+{
+ return ((const mbfl_encoding *)encoding)->name;
+}
+
+static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
+{
+ const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
+ if (encoding->flag & MBFL_ENCTYPE_SBCS) {
+ return 1;
+ }
+ if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
+ return 1;
+ }
+ return 0;
+}
+
+static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC)
+{
+ mbfl_string string;
+
+ if (!list) {
+ list = (const zend_encoding **)MBSTRG(current_detect_order_list);
+ list_size = MBSTRG(current_detect_order_list_size);
+ }
+
+ mbfl_string_init(&string);
+ string.no_language = MBSTRG(language);
+ string.val = (unsigned char *)arg_string;
+ string.len = arg_length;
+ return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
+}
+
+static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC)
+{
+ mbfl_string string, result;
+ mbfl_buffer_converter *convd;
+ int status, loc;
+
+ /* new encoding */
+ /* initialize string */
+ mbfl_string_init(&string);
+ mbfl_string_init(&result);
+ string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
+ string.no_language = MBSTRG(language);
+ string.val = (unsigned char*)from;
+ string.len = from_length;
+
+ /* initialize converter */
+ convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
+ if (convd == NULL) {
+ return -1;
+ }
+ mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
+ mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+
+ /* do it */
+ status = mbfl_buffer_converter_feed2(convd, &string, &loc);
+ if (status) {
+ mbfl_buffer_converter_delete(convd);
+ return (size_t)-1;
+ }
+
+ mbfl_buffer_converter_flush(convd);
+ if (!mbfl_buffer_converter_result(convd, &result)) {
+ mbfl_buffer_converter_delete(convd);
+ return (size_t)-1;
+ }
+
+ *to = result.val;
+ *to_length = result.len;
+
+ mbfl_buffer_converter_delete(convd);
+
+ return loc;
+}
+
+static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
+{
+ return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC);
+}
+
+static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D)
+{
+ return (const zend_encoding *)MBSTRG(internal_encoding);
+}
+
+static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC)
+{
+ MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
+ return SUCCESS;
+}
+
+static zend_multibyte_functions php_mb_zend_multibyte_functions = {
+ "mbstring",
+ php_mb_zend_encoding_fetcher,
+ php_mb_zend_encoding_name_getter,
+ php_mb_zend_encoding_lexer_compatibility_checker,
+ php_mb_zend_encoding_detector,
+ php_mb_zend_encoding_converter,
+ php_mb_zend_encoding_list_parser,
+ php_mb_zend_internal_encoding_getter,
+ php_mb_zend_internal_encoding_setter
+};
+/* }}} */
+
static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
static void _php_mb_free_regex(void *opaque);
@@ -940,7 +1031,7 @@ static void _php_mb_free_regex(void *opaque)
#endif
/* {{{ php_mb_nls_get_default_detect_order_list */
-static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size)
+static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
{
size_t i;
@@ -1048,23 +1139,27 @@ static PHP_INI_MH(OnUpdate_mbstring_language)
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
static PHP_INI_MH(OnUpdate_mbstring_detect_order)
{
- enum mbfl_no_encoding *list;
- int size;
+ const mbfl_encoding **list;
+ size_t size;
- if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
- if (MBSTRG(detect_order_list)) {
- free(MBSTRG(detect_order_list));
- }
- MBSTRG(detect_order_list) = list;
- MBSTRG(detect_order_list_size) = size;
- } else {
+ if (!new_value) {
if (MBSTRG(detect_order_list)) {
- free(MBSTRG(detect_order_list));
- MBSTRG(detect_order_list) = NULL;
+ pefree(MBSTRG(detect_order_list), 1);
}
+ MBSTRG(detect_order_list) = NULL;
+ MBSTRG(detect_order_list_size) = 0;
+ return SUCCESS;
+ }
+
+ if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
return FAILURE;
}
+ if (MBSTRG(detect_order_list)) {
+ pefree(MBSTRG(detect_order_list), 1);
+ }
+ MBSTRG(detect_order_list) = list;
+ MBSTRG(detect_order_list_size) = size;
return SUCCESS;
}
/* }}} */
@@ -1072,24 +1167,28 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order)
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
static PHP_INI_MH(OnUpdate_mbstring_http_input)
{
- enum mbfl_no_encoding *list;
- int size;
+ const mbfl_encoding **list;
+ size_t size;
- if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
- if (MBSTRG(http_input_list)) {
- free(MBSTRG(http_input_list));
- }
- MBSTRG(http_input_list) = list;
- MBSTRG(http_input_list_size) = size;
- } else {
+ if (!new_value) {
if (MBSTRG(http_input_list)) {
- free(MBSTRG(http_input_list));
- MBSTRG(http_input_list) = NULL;
+ pefree(MBSTRG(http_input_list), 1);
}
+ MBSTRG(http_input_list) = NULL;
MBSTRG(http_input_list_size) = 0;
+ return SUCCESS;
+ }
+
+ if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
return FAILURE;
}
+ if (MBSTRG(http_input_list)) {
+ pefree(MBSTRG(http_input_list), 1);
+ }
+ MBSTRG(http_input_list) = list;
+ MBSTRG(http_input_list_size) = size;
+
return SUCCESS;
}
/* }}} */
@@ -1097,20 +1196,23 @@ static PHP_INI_MH(OnUpdate_mbstring_http_input)
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
static PHP_INI_MH(OnUpdate_mbstring_http_output)
{
- enum mbfl_no_encoding no_encoding;
+ const mbfl_encoding *encoding;
- no_encoding = mbfl_name2no_encoding(new_value);
- if (no_encoding != mbfl_no_encoding_invalid) {
- MBSTRG(http_output_encoding) = no_encoding;
- MBSTRG(current_http_output_encoding) = no_encoding;
- } else {
- MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
- MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
- if (new_value != NULL && new_value_length > 0) {
- return FAILURE;
- }
+ if (new_value == NULL || new_value_length == 0) {
+ MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
+ MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
+ return SUCCESS;
+ }
+
+ encoding = mbfl_name2encoding(new_value);
+ if (!encoding) {
+ MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
+ MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
+ return FAILURE;
}
+ MBSTRG(http_output_encoding) = encoding;
+ MBSTRG(current_http_output_encoding) = encoding;
return SUCCESS;
}
/* }}} */
@@ -1118,46 +1220,44 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output)
/* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
{
- enum mbfl_no_encoding no_encoding;
-
- if (!new_value
- || !*new_value
- || (no_encoding = mbfl_name2no_encoding(new_value)) == mbfl_no_encoding_invalid) {
+ const mbfl_encoding *encoding;
+
+ if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) {
switch (MBSTRG(language)) {
case mbfl_no_language_uni:
- no_encoding = mbfl_no_encoding_utf8;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
break;
case mbfl_no_language_japanese:
- no_encoding = mbfl_no_encoding_euc_jp;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_euc_jp);
break;
case mbfl_no_language_korean:
- no_encoding = mbfl_no_encoding_euc_kr;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_euc_kr);
break;
case mbfl_no_language_simplified_chinese:
- no_encoding = mbfl_no_encoding_euc_cn;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_euc_cn);
break;
case mbfl_no_language_traditional_chinese:
- no_encoding = mbfl_no_encoding_euc_tw;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_euc_tw);
break;
case mbfl_no_language_russian:
- no_encoding = mbfl_no_encoding_koi8r;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_koi8r);
break;
case mbfl_no_language_german:
- no_encoding = mbfl_no_encoding_8859_15;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_8859_15);
break;
case mbfl_no_language_armenian:
- no_encoding = mbfl_no_encoding_armscii8;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_armscii8);
break;
case mbfl_no_language_turkish:
- no_encoding = mbfl_no_encoding_8859_9;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_8859_9);
break;
default:
- no_encoding = mbfl_no_encoding_8859_1;
+ encoding = NULL;
break;
}
}
- MBSTRG(internal_encoding) = no_encoding;
- MBSTRG(current_internal_encoding) = no_encoding;
+ MBSTRG(internal_encoding) = encoding;
+ MBSTRG(current_internal_encoding) = encoding;
#if HAVE_MBREGEX
{
const char *enc_name = new_value;
@@ -1194,33 +1294,6 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
}
/* }}} */
-/* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */
-static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
-{
- int *list, size;
-
- if (!CG(multibyte)) {
- return FAILURE;
- }
- if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
- if (MBSTRG(script_encoding_list) != NULL) {
- free(MBSTRG(script_encoding_list));
- }
- MBSTRG(script_encoding_list) = list;
- MBSTRG(script_encoding_list_size) = size;
- } else {
- if (MBSTRG(script_encoding_list) != NULL) {
- free(MBSTRG(script_encoding_list));
- }
- MBSTRG(script_encoding_list) = NULL;
- MBSTRG(script_encoding_list_size) = 0;
- return FAILURE;
- }
-
- return SUCCESS;
-}
-/* }}} */
-
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
{
@@ -1263,7 +1336,7 @@ static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
{
if (new_value == NULL) {
- return FAILURE;
+ return FAILURE;
}
OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
@@ -1318,7 +1391,6 @@ PHP_INI_BEGIN()
PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
- PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
@@ -1343,17 +1415,15 @@ PHP_INI_END()
static PHP_GINIT_FUNCTION(mbstring)
{
mbstring_globals->language = mbfl_no_language_uni;
- mbstring_globals->internal_encoding = mbfl_no_encoding_invalid;
+ mbstring_globals->internal_encoding = NULL;
mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
- mbstring_globals->script_encoding_list = NULL;
- mbstring_globals->script_encoding_list_size = 0;
- mbstring_globals->http_output_encoding = mbfl_no_encoding_pass;
- mbstring_globals->current_http_output_encoding = mbfl_no_encoding_pass;
- mbstring_globals->http_input_identify = mbfl_no_encoding_invalid;
- mbstring_globals->http_input_identify_get = mbfl_no_encoding_invalid;
- mbstring_globals->http_input_identify_post = mbfl_no_encoding_invalid;
- mbstring_globals->http_input_identify_cookie = mbfl_no_encoding_invalid;
- mbstring_globals->http_input_identify_string = mbfl_no_encoding_invalid;
+ mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
+ mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
+ mbstring_globals->http_input_identify = NULL;
+ mbstring_globals->http_input_identify_get = NULL;
+ mbstring_globals->http_input_identify_post = NULL;
+ mbstring_globals->http_input_identify_cookie = NULL;
+ mbstring_globals->http_input_identify_string = NULL;
mbstring_globals->http_input_list = NULL;
mbstring_globals->http_input_list_size = 0;
mbstring_globals->detect_order_list = NULL;
@@ -1384,9 +1454,6 @@ static PHP_GSHUTDOWN_FUNCTION(mbstring)
if (mbstring_globals->http_input_list) {
free(mbstring_globals->http_input_list);
}
- if (mbstring_globals->script_encoding_list) {
- free(mbstring_globals->script_encoding_list);
- }
if (mbstring_globals->detect_order_list) {
free(mbstring_globals->detect_order_list);
}
@@ -1426,12 +1493,9 @@ PHP_MINIT_FUNCTION(mbstring)
PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
- zend_multibyte_set_functions(
- php_mb_encoding_detector,
- php_mb_encoding_converter,
- php_mb_oddlen,
- php_mb_check_encoding_list,
- php_mb_internal_encoding_name TSRMLS_CC);
+ if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) {
+ return FAILURE;
+ }
php_rfc1867_set_multibyte_callbacks(
php_mb_encoding_translation,
@@ -1460,8 +1524,6 @@ PHP_MSHUTDOWN_FUNCTION(mbstring)
/* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)
{
- int n;
- enum mbfl_no_encoding *list=NULL, *entry;
zend_function *func, *orig;
const struct mb_overload_def *p;
@@ -1472,22 +1534,7 @@ PHP_RINIT_FUNCTION(mbstring)
MBSTRG(illegalchars) = 0;
- n = 0;
- if (MBSTRG(detect_order_list)) {
- list = MBSTRG(detect_order_list);
- n = MBSTRG(detect_order_list_size);
- }
- if (n <= 0) {
- list = MBSTRG(default_detect_order_list);
- n = MBSTRG(default_detect_order_list_size);
- }
- entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0);
- MBSTRG(current_detect_order_list) = entry;
- MBSTRG(current_detect_order_list_size) = n;
- while (n > 0) {
- *entry++ = *list++;
- n--;
- }
+ php_mb_populate_current_detect_order_list(TSRMLS_C);
/* override original function. */
if (MBSTRG(func_overload)){
@@ -1519,10 +1566,7 @@ PHP_RINIT_FUNCTION(mbstring)
#if HAVE_MBREGEX
PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
- if (CG(multibyte)) {
- zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC);
- php_mb_set_zend_encoding(TSRMLS_C);
- }
+ zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC);
return SUCCESS;
}
@@ -1546,11 +1590,11 @@ PHP_RSHUTDOWN_FUNCTION(mbstring)
}
/* clear http input identification. */
- MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
- MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
- MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
- MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
- MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
+ MBSTRG(http_input_identify) = NULL;
+ MBSTRG(http_input_identify_post) = NULL;
+ MBSTRG(http_input_identify_get) = NULL;
+ MBSTRG(http_input_identify_cookie) = NULL;
+ MBSTRG(http_input_identify_string) = NULL;
/* clear overloaded function. */
if (MBSTRG(func_overload)){
@@ -1625,31 +1669,27 @@ PHP_FUNCTION(mb_language)
Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)
{
- char *name = NULL;
+ const char *name = NULL;
int name_len;
- enum mbfl_no_encoding no_encoding;
+ const mbfl_encoding *encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
RETURN_FALSE;
}
if (name == NULL) {
- name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
+ name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
if (name != NULL) {
RETURN_STRING(name, 1);
} else {
RETURN_FALSE;
}
} else {
- no_encoding = mbfl_name2no_encoding(name);
- if (no_encoding == mbfl_no_encoding_invalid) {
+ encoding = mbfl_name2encoding(name);
+ if (!encoding) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
RETURN_FALSE;
} else {
- MBSTRG(current_internal_encoding) = no_encoding;
- /* TODO: make independent from mbstring.encoding_translation? */
- if (CG(multibyte) && MBSTRG(encoding_translation)) {
- zend_multibyte_set_internal_encoding(name TSRMLS_CC);
- }
+ MBSTRG(current_internal_encoding) = encoding;
RETURN_TRUE;
}
}
@@ -1662,10 +1702,9 @@ PHP_FUNCTION(mb_http_input)
{
char *typ = NULL;
int typ_len;
- int retname, n;
- char *name, *list, *temp;
- enum mbfl_no_encoding *entry;
- enum mbfl_no_encoding result = mbfl_no_encoding_invalid;
+ int retname;
+ char *list, *temp;
+ const mbfl_encoding *result = NULL;
retname = 1;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
@@ -1693,40 +1732,38 @@ PHP_FUNCTION(mb_http_input)
break;
case 'I':
case 'i':
- array_init(return_value);
- entry = MBSTRG(http_input_list);
- n = MBSTRG(http_input_list_size);
- while (n > 0) {
- name = (char *)mbfl_no_encoding2name(*entry);
- if (name) {
- add_next_index_string(return_value, name, 1);
+ {
+ array_init(return_value);
+ const mbfl_encoding **entry = MBSTRG(http_input_list);
+ const size_t n = MBSTRG(http_input_list_size);
+ size_t i;
+ for (i = 0; i < n; i++) {
+ add_next_index_string(return_value, (*entry)->name, 1);
+ entry++;
}
- entry++;
- n--;
+ retname = 0;
}
- retname = 0;
break;
case 'L':
case 'l':
- entry = MBSTRG(http_input_list);
- n = MBSTRG(http_input_list_size);
- list = NULL;
- while (n > 0) {
- name = (char *)mbfl_no_encoding2name(*entry);
- if (name) {
+ {
+ const mbfl_encoding **entry = MBSTRG(http_input_list);
+ const size_t n = MBSTRG(http_input_list_size);
+ size_t i;
+ list = NULL;
+ for (i = 0; i < n; i++) {
if (list) {
temp = list;
- spprintf(&list, 0, "%s,%s", temp, name);
+ spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
efree(temp);
if (!list) {
break;
}
} else {
- list = estrdup(name);
+ list = estrdup((*entry)->name);
}
+ entry++;
}
- entry++;
- n--;
}
if (!list) {
RETURN_FALSE;
@@ -1741,9 +1778,8 @@ PHP_FUNCTION(mb_http_input)
}
if (retname) {
- if (result != mbfl_no_encoding_invalid &&
- (name = (char *)mbfl_no_encoding2name(result)) != NULL) {
- RETVAL_STRING(name, 1);
+ if (result) {
+ RETVAL_STRING(result->name, 1);
} else {
RETVAL_FALSE;
}
@@ -1755,28 +1791,28 @@ PHP_FUNCTION(mb_http_input)
Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)
{
- char *name = NULL;
+ const char *name = NULL;
int name_len;
- enum mbfl_no_encoding no_encoding;
+ const mbfl_encoding *encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
RETURN_FALSE;
}
if (name == NULL) {
- name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding));
+ name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
if (name != NULL) {
RETURN_STRING(name, 1);
} else {
RETURN_FALSE;
}
} else {
- no_encoding = mbfl_name2no_encoding(name);
- if (no_encoding == mbfl_no_encoding_invalid) {
+ encoding = mbfl_name2encoding(name);
+ if (!encoding) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
RETURN_FALSE;
} else {
- MBSTRG(current_http_output_encoding) = no_encoding;
+ MBSTRG(current_http_output_encoding) = encoding;
RETURN_TRUE;
}
}
@@ -1788,32 +1824,26 @@ PHP_FUNCTION(mb_http_output)
PHP_FUNCTION(mb_detect_order)
{
zval **arg1 = NULL;
- int n, size;
- enum mbfl_no_encoding *list, *entry;
- char *name;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
return;
}
if (!arg1) {
+ size_t i;
+ size_t n = MBSTRG(current_detect_order_list_size);
+ const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
array_init(return_value);
- entry = MBSTRG(current_detect_order_list);
- n = MBSTRG(current_detect_order_list_size);
- while (n > 0) {
- name = (char *)mbfl_no_encoding2name(*entry);
- if (name) {
- add_next_index_string(return_value, name, 1);
- }
+ for (i = 0; i < n; i++) {
+ add_next_index_string(return_value, (*entry)->name, 1);
entry++;
- n--;
}
} else {
- list = NULL;
- size = 0;
+ const mbfl_encoding **list = NULL;
+ size_t size = 0;
switch (Z_TYPE_PP(arg1)) {
case IS_ARRAY:
- if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
+ if (FAILURE == php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
if (list) {
efree(list);
}
@@ -1822,7 +1852,7 @@ PHP_FUNCTION(mb_detect_order)
break;
default:
convert_to_string_ex(arg1);
- if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
+ if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
if (list) {
efree(list);
}
@@ -1942,7 +1972,7 @@ PHP_FUNCTION(mb_parse_str)
char *encstr = NULL;
int encstr_len;
php_mb_encoding_handler_info_t info;
- enum mbfl_no_encoding detected;
+ const mbfl_encoding *detected;
track_vars_array = NULL;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
@@ -1970,7 +2000,7 @@ PHP_FUNCTION(mb_parse_str)
MBSTRG(http_input_identify) = detected;
- RETVAL_BOOL(detected != mbfl_no_encoding_invalid);
+ RETVAL_BOOL(detected);
if (encstr != NULL) efree(encstr);
}
@@ -1986,7 +2016,7 @@ PHP_FUNCTION(mb_output_handler)
mbfl_string string, result;
const char *charset;
char *p;
- enum mbfl_no_encoding encoding;
+ const mbfl_encoding *encoding;
int last_feed, len;
unsigned char send_text_mimetype = 0;
char *s, *mimetype = NULL;
@@ -2005,7 +2035,7 @@ PHP_FUNCTION(mb_output_handler)
mbfl_buffer_converter_delete(MBSTRG(outconv));
MBSTRG(outconv) = NULL;
}
- if (encoding == mbfl_no_encoding_pass) {
+ if (encoding == &mbfl_encoding_pass) {
RETURN_STRINGL(arg_string, arg_string_len, 1);
}
@@ -2027,7 +2057,7 @@ PHP_FUNCTION(mb_output_handler)
/* if content-type is not yet set, set it and activate the converter */
if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
- charset = mbfl_no2preferred_mime_name(encoding);
+ charset = encoding->mime_name;
if (charset) {
len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
if (sapi_add_header(p, len, 0) != FAILURE) {
@@ -2035,7 +2065,7 @@ PHP_FUNCTION(mb_output_handler)
}
}
/* activate the converter */
- MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
+ MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
if (send_text_mimetype){
efree(mimetype);
}
@@ -2056,7 +2086,7 @@ PHP_FUNCTION(mb_output_handler)
/* feed the string */
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
string.val = (unsigned char *)arg_string;
string.len = arg_string_len;
mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
@@ -2093,7 +2123,7 @@ PHP_FUNCTION(mb_strlen)
string.no_language = MBSTRG(language);
if (enc_name == NULL) {
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
} else {
string.no_encoding = mbfl_name2no_encoding(enc_name);
if (string.no_encoding == mbfl_no_encoding_invalid) {
@@ -2124,9 +2154,9 @@ PHP_FUNCTION(mb_strpos)
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
offset = 0;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
@@ -2191,9 +2221,9 @@ PHP_FUNCTION(mb_strrpos)
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
RETURN_FALSE;
@@ -2280,7 +2310,7 @@ PHP_FUNCTION(mb_stripos)
int n;
long offset;
mbfl_string haystack, needle;
- char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
+ const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
int from_encoding_len;
n = -1;
offset = 0;
@@ -2309,7 +2339,7 @@ PHP_FUNCTION(mb_strripos)
int n;
long offset;
mbfl_string haystack, needle;
- const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
+ const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
int from_encoding_len;
n = -1;
offset = 0;
@@ -2341,9 +2371,9 @@ PHP_FUNCTION(mb_strstr)
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
RETURN_FALSE;
@@ -2399,9 +2429,9 @@ PHP_FUNCTION(mb_strrchr)
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
RETURN_FALSE;
@@ -2454,13 +2484,13 @@ PHP_FUNCTION(mb_stristr)
unsigned int from_encoding_len, len, mblen;
int n;
mbfl_string haystack, needle, result, *ret = NULL;
- const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
+ const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
@@ -2512,13 +2542,13 @@ PHP_FUNCTION(mb_strrichr)
zend_bool part = 0;
int n, from_encoding_len, len, mblen;
mbfl_string haystack, needle, result, *ret = NULL;
- char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
+ const char *from_encoding = MBSTRG(current_internal_encoding)->name;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
@@ -2570,9 +2600,9 @@ PHP_FUNCTION(mb_substr_count)
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
return;
@@ -2616,7 +2646,7 @@ PHP_FUNCTION(mb_substr)
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (argc == 4) {
string.no_encoding = mbfl_name2no_encoding(encoding);
@@ -2685,7 +2715,7 @@ PHP_FUNCTION(mb_strcut)
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", (char **)&string.val, (int **)&string.len, &from, &len, &encoding, &encoding_len) == FAILURE) {
return;
@@ -2748,7 +2778,7 @@ PHP_FUNCTION(mb_strwidth)
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
return;
@@ -2787,9 +2817,9 @@ PHP_FUNCTION(mb_strimwidth)
mbfl_string_init(&string);
mbfl_string_init(&marker);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
marker.no_language = MBSTRG(language);
- marker.no_encoding = MBSTRG(current_internal_encoding);
+ marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
marker.val = NULL;
marker.len = 0;
@@ -2833,9 +2863,10 @@ PHP_FUNCTION(mb_strimwidth)
MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
{
mbfl_string string, result, *ret;
- enum mbfl_no_encoding from_encoding, to_encoding;
+ const mbfl_encoding *from_encoding, *to_encoding;
mbfl_buffer_converter *convd;
- int size, *list;
+ size_t size;
+ const mbfl_encoding **list;
char *output=NULL;
if (output_len) {
@@ -2846,8 +2877,8 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
}
/* new encoding */
if (_to_encoding && strlen(_to_encoding)) {
- to_encoding = mbfl_name2no_encoding(_to_encoding);
- if (to_encoding == mbfl_no_encoding_invalid) {
+ to_encoding = mbfl_name2encoding(_to_encoding);
+ if (!to_encoding) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
return NULL;
}
@@ -2859,7 +2890,7 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
mbfl_string_init(&string);
mbfl_string_init(&result);
from_encoding = MBSTRG(current_internal_encoding);
- string.no_encoding = from_encoding;
+ string.no_encoding = from_encoding->no_encoding;
string.no_language = MBSTRG(language);
string.val = (unsigned char *)input;
string.len = length;
@@ -2871,17 +2902,17 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
if (size == 1) {
from_encoding = *list;
- string.no_encoding = from_encoding;
+ string.no_encoding = from_encoding->no_encoding;
} else if (size > 1) {
/* auto detect */
- from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection));
- if (from_encoding != mbfl_no_encoding_invalid) {
- string.no_encoding = from_encoding;
+ from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
+ if (from_encoding) {
+ string.no_encoding = from_encoding->no_encoding;
} else {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
from_encoding = mbfl_no_encoding_pass;
to_encoding = from_encoding;
- string.no_encoding = from_encoding;
+ string.no_encoding = from_encoding->no_encoding;
}
} else {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
@@ -2892,7 +2923,7 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
}
/* initialize converter */
- convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
+ convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
if (convd == NULL) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
return NULL;
@@ -2993,7 +3024,8 @@ PHP_FUNCTION(mb_convert_encoding)
Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)
{
- char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
+ const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
+ char *str;
int str_len, from_encoding_len;
long case_mode = 0;
char *newstr;
@@ -3017,7 +3049,8 @@ PHP_FUNCTION(mb_convert_case)
*/
PHP_FUNCTION(mb_strtoupper)
{
- char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
+ const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
+ char *str;
int str_len, from_encoding_len;
char *newstr;
size_t ret_len;
@@ -3040,7 +3073,8 @@ PHP_FUNCTION(mb_strtoupper)
*/
PHP_FUNCTION(mb_strtolower)
{
- char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
+ const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
+ char *str;
int str_len, from_encoding_len;
char *newstr;
size_t ret_len;
@@ -3068,9 +3102,9 @@ PHP_FUNCTION(mb_detect_encoding)
zval *encoding_list;
mbfl_string string;
- const char *ret;
- enum mbfl_no_encoding *elist;
- int size, *list;
+ const mbfl_encoding *ret;
+ const mbfl_encoding **elist, **list;
+ size_t size;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
return;
@@ -3082,7 +3116,7 @@ PHP_FUNCTION(mb_detect_encoding)
if (ZEND_NUM_ARGS() >= 2 && Z_STRVAL_P(encoding_list)) {
switch (Z_TYPE_P(encoding_list)) {
case IS_ARRAY:
- if (!php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
+ if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
if (list) {
efree(list);
list = NULL;
@@ -3092,7 +3126,7 @@ PHP_FUNCTION(mb_detect_encoding)
break;
default:
convert_to_string(encoding_list);
- if (!php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
+ if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
if (list) {
efree(list);
list = NULL;
@@ -3121,7 +3155,7 @@ PHP_FUNCTION(mb_detect_encoding)
string.no_language = MBSTRG(language);
string.val = (unsigned char *)str;
string.len = str_len;
- ret = mbfl_identify_encoding_name(&string, elist, size, strict);
+ ret = mbfl_identify_encoding2(&string, elist, size, strict);
if (list != NULL) {
efree((void *)list);
@@ -3131,7 +3165,7 @@ PHP_FUNCTION(mb_detect_encoding)
RETURN_FALSE;
}
- RETVAL_STRING((char *)ret, 1);
+ RETVAL_STRING((char *)ret->name, 1);
}
/* }}} */
@@ -3196,7 +3230,7 @@ PHP_FUNCTION(mb_encode_mimeheader)
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
return;
@@ -3245,14 +3279,14 @@ PHP_FUNCTION(mb_decode_mimeheader)
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
return;
}
mbfl_string_init(&result);
- ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
+ ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
if (ret != NULL) {
RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
} else {
@@ -3274,7 +3308,7 @@ PHP_FUNCTION(mb_convert_kana)
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
return;
@@ -3373,12 +3407,13 @@ PHP_FUNCTION(mb_convert_variables)
zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
HashTable *target_hash;
mbfl_string string, result, *ret;
- enum mbfl_no_encoding from_encoding, to_encoding;
+ const mbfl_encoding *from_encoding, *to_encoding;
mbfl_encoding_detector *identd;
mbfl_buffer_converter *convd;
- int n, to_enc_len, argc, stack_level, stack_max, elistsz;
- enum mbfl_no_encoding *elist;
- char *name, *to_enc;
+ int n, to_enc_len, argc, stack_level, stack_max;
+ size_t elistsz;
+ const mbfl_encoding **elist;
+ char *to_enc;
void *ptmp;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
@@ -3386,8 +3421,8 @@ PHP_FUNCTION(mb_convert_variables)
}
/* new encoding */
- to_encoding = mbfl_name2no_encoding(to_enc);
- if (to_encoding == mbfl_no_encoding_invalid) {
+ to_encoding = mbfl_name2encoding(to_enc);
+ if (!to_encoding) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
efree(args);
RETURN_FALSE;
@@ -3397,7 +3432,7 @@ PHP_FUNCTION(mb_convert_variables)
mbfl_string_init(&string);
mbfl_string_init(&result);
from_encoding = MBSTRG(current_internal_encoding);
- string.no_encoding = from_encoding;
+ string.no_encoding = from_encoding->no_encoding;
string.no_language = MBSTRG(language);
/* pre-conversion encoding */
@@ -3418,11 +3453,11 @@ PHP_FUNCTION(mb_convert_variables)
from_encoding = *elist;
} else {
/* auto detect */
- from_encoding = mbfl_no_encoding_invalid;
+ from_encoding = NULL;
stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
stack_level = 0;
- identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
+ identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
if (identd != NULL) {
n = 0;
while (n < argc || stack_level > 0) {
@@ -3475,12 +3510,12 @@ PHP_FUNCTION(mb_convert_variables)
}
}
detect_end:
- from_encoding = mbfl_encoding_detector_judge(identd);
+ from_encoding = mbfl_encoding_detector_judge2(identd);
mbfl_encoding_detector_delete(identd);
}
efree(stack);
- if (from_encoding == mbfl_no_encoding_invalid) {
+ if (!from_encoding) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
from_encoding = mbfl_no_encoding_pass;
}
@@ -3491,7 +3526,7 @@ detect_end:
/* create converter */
convd = NULL;
if (from_encoding != mbfl_no_encoding_pass) {
- convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
+ convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
if (convd == NULL) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
RETURN_FALSE;
@@ -3573,9 +3608,8 @@ detect_end:
efree(args);
- name = (char *)mbfl_no_encoding2name(from_encoding);
- if (name != NULL) {
- RETURN_STRING(name, 1);
+ if (from_encoding) {
+ RETURN_STRING(from_encoding->name, 1);
} else {
RETURN_FALSE;
}
@@ -3602,7 +3636,7 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
string.val = (unsigned char *)str;
string.len = str_len;
@@ -4055,10 +4089,10 @@ PHP_FUNCTION(mb_send_mail)
orig_str.no_language = MBSTRG(language);
orig_str.val = (unsigned char *)subject;
orig_str.len = subject_len;
- orig_str.no_encoding = MBSTRG(current_internal_encoding);
- if (orig_str.no_encoding == mbfl_no_encoding_invalid
- || orig_str.no_encoding == mbfl_no_encoding_pass) {
- orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
+ orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
+ if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
+ const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
+ orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
}
pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
if (pstr != NULL) {
@@ -4074,11 +4108,11 @@ PHP_FUNCTION(mb_send_mail)
orig_str.no_language = MBSTRG(language);
orig_str.val = (unsigned char *)message;
orig_str.len = (unsigned int)message_len;
- orig_str.no_encoding = MBSTRG(current_internal_encoding);
+ orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (orig_str.no_encoding == mbfl_no_encoding_invalid
- || orig_str.no_encoding == mbfl_no_encoding_pass) {
- orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
+ if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
+ const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
+ orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
}
pstr = NULL;
@@ -4186,13 +4220,13 @@ PHP_FUNCTION(mb_send_mail)
PHP_FUNCTION(mb_get_info)
{
char *typ = NULL;
- int typ_len, n;
+ int typ_len;
+ size_t n;
char *name;
const struct mb_overload_def *over_func;
zval *row1, *row2;
const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
- enum mbfl_no_encoding *entry;
- zval *row3;
+ const mbfl_encoding **entry;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
RETURN_FALSE;
@@ -4200,14 +4234,14 @@ PHP_FUNCTION(mb_get_info)
if (!typ || !strcasecmp("all", typ)) {
array_init(return_value);
- if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
- add_assoc_string(return_value, "internal_encoding", name, 1);
+ if (MBSTRG(current_internal_encoding)) {
+ add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name, 1);
}
- if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
- add_assoc_string(return_value, "http_input", name, 1);
+ if (MBSTRG(http_input_identify)) {
+ add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name, 1);
}
- if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
- add_assoc_string(return_value, "http_output", name, 1);
+ if (MBSTRG(current_http_output_encoding)) {
+ add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name, 1);
}
if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
@@ -4249,15 +4283,13 @@ PHP_FUNCTION(mb_get_info)
}
n = MBSTRG(current_detect_order_list_size);
entry = MBSTRG(current_detect_order_list);
- if(n > 0) {
+ if (n > 0) {
+ size_t i;
MAKE_STD_ZVAL(row2);
array_init(row2);
- while (n > 0) {
- if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
- add_next_index_string(row2, name, 1);
- }
+ for (i = 0; i < n; i++) {
+ add_next_index_string(row2, (*entry)->name, 1);
entry++;
- n--;
}
add_assoc_zval(return_value, "detect_order", row2);
}
@@ -4275,33 +4307,17 @@ PHP_FUNCTION(mb_get_info)
} else {
add_assoc_string(return_value, "strict_detection", "Off", 1);
}
- if (CG(multibyte)) {
- entry = MBSTRG(script_encoding_list);
- n = MBSTRG(script_encoding_list_size);
- if(n > 0) {
- MAKE_STD_ZVAL(row3);
- array_init(row3);
- while (n > 0) {
- if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
- add_next_index_string(row3, name, 1);
- }
- entry++;
- n--;
- }
- add_assoc_zval(return_value, "script_encoding", row3);
- }
- }
} else if (!strcasecmp("internal_encoding", typ)) {
- if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
- RETVAL_STRING(name, 1);
+ if (MBSTRG(current_internal_encoding)) {
+ RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name, 1);
}
} else if (!strcasecmp("http_input", typ)) {
- if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
- RETVAL_STRING(name, 1);
+ if (MBSTRG(http_input_identify)) {
+ RETVAL_STRING((char *)MBSTRG(http_input_identify)->name, 1);
}
} else if (!strcasecmp("http_output", typ)) {
- if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
- RETVAL_STRING(name, 1);
+ if (MBSTRG(current_http_output_encoding)) {
+ RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name, 1);
}
} else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
@@ -4349,15 +4365,11 @@ PHP_FUNCTION(mb_get_info)
} else if (!strcasecmp("detect_order", typ)) {
n = MBSTRG(current_detect_order_list_size);
entry = MBSTRG(current_detect_order_list);
- if(n > 0) {
+ if (n > 0) {
+ size_t i;
array_init(return_value);
- while (n > 0) {
- name = (char *)mbfl_no_encoding2name(*entry);
- if (name) {
- add_next_index_string(return_value, name, 1);
- }
- entry++;
- n--;
+ for (i = 0; i < n; i++) {
+ add_next_index_string(return_value, (*entry)->name, 1);
}
}
} else if (!strcasecmp("substitute_character", typ)) {
@@ -4377,22 +4389,6 @@ PHP_FUNCTION(mb_get_info)
RETVAL_STRING("Off", 1);
}
} else {
- if (CG(multibyte) && !strcasecmp("script_encoding", typ)) {
- entry = MBSTRG(script_encoding_list);
- n = MBSTRG(script_encoding_list_size);
- if(n > 0) {
- array_init(return_value);
- while (n > 0) {
- name = (char *)mbfl_no_encoding2name(*entry);
- if (name) {
- add_next_index_string(return_value, name, 1);
- }
- entry++;
- n--;
- }
- }
- return;
- }
RETURN_FALSE;
}
}
@@ -4407,7 +4403,7 @@ PHP_FUNCTION(mb_check_encoding)
char *enc = NULL;
int enc_len;
mbfl_buffer_converter *convd;
- enum mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding);
+ const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
mbfl_string string, result, *ret = NULL;
long illegalchars = 0;
@@ -4420,14 +4416,14 @@ PHP_FUNCTION(mb_check_encoding)
}
if (enc != NULL) {
- no_encoding = mbfl_name2no_encoding(enc);
- if (no_encoding == mbfl_no_encoding_invalid || no_encoding == mbfl_no_encoding_pass) {
+ encoding = mbfl_name2encoding(enc);
+ if (!encoding || encoding == &mbfl_encoding_pass) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
RETURN_FALSE;
}
}
- convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0);
+ convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
if (convd == NULL) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
RETURN_FALSE;
@@ -4436,7 +4432,7 @@ PHP_FUNCTION(mb_check_encoding)
mbfl_buffer_converter_illegal_substchar(convd, 0);
/* initialize string */
- mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding);
+ mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
mbfl_string_init(&result);
string.val = (unsigned char *)var;
@@ -4455,6 +4451,34 @@ PHP_FUNCTION(mb_check_encoding)
}
/* }}} */
+
+/* {{{ php_mb_populate_current_detect_order_list */
+static void php_mb_populate_current_detect_order_list(TSRMLS_D)
+{
+ const mbfl_encoding **entry = 0;
+ size_t nentries;
+
+ if (MBSTRG(current_detect_order_list)) {
+ return;
+ }
+
+ if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
+ nentries = MBSTRG(detect_order_list_size);
+ entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
+ memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
+ } else {
+ const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
+ nentries = MBSTRG(default_detect_order_list_size);
+ entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
+ size_t i;
+ for (i = 0; i < nentries; i++) {
+ entry[i] = mbfl_no2encoding(src[i]);
+ }
+ }
+ MBSTRG(current_detect_order_list) = entry;
+ MBSTRG(current_detect_order_list_size) = nentries;
+}
+
/* {{{ MBSTRING_API int php_mb_encoding_translation() */
MBSTRING_API int php_mb_encoding_translation(TSRMLS_D)
{
@@ -4483,8 +4507,7 @@ MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *e
/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
{
- return php_mb_mbchar_bytes_ex(s,
- mbfl_no2encoding(MBSTRG(internal_encoding)));
+ return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
}
/* }}} */
@@ -4532,8 +4555,7 @@ MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t
/* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
{
- return php_mb_safe_strrchr_ex(s, c, nbytes,
- mbfl_no2encoding(MBSTRG(internal_encoding)));
+ return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
}
/* }}} */
@@ -4548,12 +4570,10 @@ MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC)
MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
{
- if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){
- return php_mb_mbchar_bytes_ex(s,
- mbfl_no2encoding(MBSTRG(http_input_identify)));
+ if (MBSTRG(http_input_identify)) {
+ return php_mb_mbchar_bytes_ex(s, MBSTRG(http_input_identify));
} else {
- return php_mb_mbchar_bytes_ex(s,
- mbfl_no2encoding(MBSTRG(internal_encoding)));
+ return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
}
}
/* }}} */
@@ -4563,13 +4583,13 @@ MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, co
{
int i;
mbfl_string string, result, *ret = NULL;
- enum mbfl_no_encoding from_encoding, to_encoding;
+ const mbfl_encoding *from_encoding, *to_encoding;
mbfl_buffer_converter *convd;
if (encoding_to) {
/* new encoding */
- to_encoding = mbfl_name2no_encoding(encoding_to);
- if (to_encoding == mbfl_no_encoding_invalid) {
+ to_encoding = mbfl_name2encoding(encoding_to);
+ if (!to_encoding) {
return -1;
}
} else {
@@ -4577,8 +4597,8 @@ MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, co
}
if (encoding_from) {
/* old encoding */
- from_encoding = mbfl_name2no_encoding(encoding_from);
- if (from_encoding == mbfl_no_encoding_invalid) {
+ from_encoding = mbfl_name2encoding(encoding_from);
+ if (from_encoding) {
return -1;
}
} else {
@@ -4592,7 +4612,7 @@ MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, co
/* initialize string */
mbfl_string_init(&string);
mbfl_string_init(&result);
- string.no_encoding = from_encoding;
+ string.no_encoding = from_encoding->no_encoding;
string.no_language = MBSTRG(language);
for (i=0; i<num; i++){
@@ -4600,7 +4620,7 @@ MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, co
string.len = len[i];
/* initialize converter */
- convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
+ convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
if (convd == NULL) {
return -1;
}
@@ -4628,23 +4648,17 @@ MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, co
MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC)
{
mbfl_string string;
- enum mbfl_no_encoding *elist;
- enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid;
+ const mbfl_encoding **elist;
+ const mbfl_encoding *encoding = NULL;
mbfl_encoding_detector *identd = NULL;
- int size;
- enum mbfl_no_encoding *list;
+ size_t size;
+ const mbfl_encoding **list;
- if (MBSTRG(http_input_list_size) == 1 &&
- MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) {
- MBSTRG(http_input_identify) = mbfl_no_encoding_pass;
- return SUCCESS;
- }
+ php_mb_populate_current_detect_order_list(TSRMLS_C);
- if (MBSTRG(http_input_list_size) == 1 &&
- MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto &&
- mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) {
- MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0];
+ if (MBSTRG(http_input_list_size) == 1 && MBSTRG(http_input_list)[0] == &mbfl_encoding_pass) {
+ MBSTRG(http_input_identify) = &mbfl_encoding_pass;
return SUCCESS;
}
@@ -4659,24 +4673,16 @@ MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length
} else {
elist = MBSTRG(current_detect_order_list);
size = MBSTRG(current_detect_order_list_size);
- if (size <= 0){
- elist = MBSTRG(default_detect_order_list);
- size = MBSTRG(default_detect_order_list_size);
- }
}
} else {
elist = MBSTRG(current_detect_order_list);
size = MBSTRG(current_detect_order_list_size);
- if (size <= 0){
- elist = MBSTRG(default_detect_order_list);
- size = MBSTRG(default_detect_order_list_size);
- }
}
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- identd = mbfl_encoding_detector_new(elist, size, MBSTRG(strict_detection));
+ identd = mbfl_encoding_detector_new2(elist, size, MBSTRG(strict_detection));
if (identd) {
int n = 0;
@@ -4688,11 +4694,11 @@ MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length
}
n++;
}
- encoding = mbfl_encoding_detector_judge(identd);
+ encoding = mbfl_encoding_detector_judge2(identd);
mbfl_encoding_detector_delete(identd);
}
- if (encoding != mbfl_no_encoding_invalid) {
+ if (encoding) {
MBSTRG(http_input_identify) = encoding;
return SUCCESS;
} else {
@@ -4712,9 +4718,9 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
do {
size_t len = 0;
@@ -4778,176 +4784,6 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int
}
/* }}} */
-/* {{{ php_mb_set_zend_encoding() */
-static int php_mb_set_zend_encoding(TSRMLS_D)
-{
- /* 'd better use mbfl_memory_device? */
- char *name, *list = NULL;
- int n, *entry, list_size = 0;
-
- /* notify script encoding to Zend Engine */
- entry = MBSTRG(script_encoding_list);
- n = MBSTRG(script_encoding_list_size);
- while (n > 0) {
- name = (char *)mbfl_no_encoding2name(*entry);
- if (name) {
- list_size += strlen(name) + 1;
- if (!list) {
- list = (char*)emalloc(list_size);
- *list = '\0';
- } else {
- list = (char*)erealloc(list, list_size);
- strcat(list, ",");
- }
- strcat(list, name);
- }
- entry++;
- n--;
- }
- zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC);
- if (list) {
- efree(list);
- }
-
- /* TODO: make independent from mbstring.encoding_translation? */
- if (MBSTRG(encoding_translation)) {
- /* notify internal encoding to Zend Engine */
- name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
- zend_multibyte_set_internal_encoding(name TSRMLS_CC);
- }
-
- return 0;
-}
-/* }}} */
-
-/* {{{ char *php_mb_encoding_detector()
- * Interface for Zend Engine
- */
-static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC)
-{
- mbfl_string string;
- const char *ret;
- enum mbfl_no_encoding *elist;
- int size, *list;
-
- /* make encoding list */
- list = NULL;
- size = 0;
- php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
- if (size <= 0) {
- return NULL;
- }
- if (size > 0 && list != NULL) {
- elist = list;
- } else {
- elist = MBSTRG(current_detect_order_list);
- size = MBSTRG(current_detect_order_list_size);
- }
-
- mbfl_string_init(&string);
- string.no_language = MBSTRG(language);
- string.val = (unsigned char *)arg_string;
- string.len = arg_length;
- ret = mbfl_identify_encoding_name(&string, elist, size, 0);
- if (list != NULL) {
- efree((void *)list);
- }
- if (ret != NULL) {
- return estrdup(ret);
- } else {
- return NULL;
- }
-}
-/* }}} */
-
-/* {{{ int php_mb_encoding_converter() */
-static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC)
-{
- mbfl_string string, result, *ret;
- enum mbfl_no_encoding from_encoding, to_encoding;
- mbfl_buffer_converter *convd;
-
- /* new encoding */
- to_encoding = mbfl_name2no_encoding(encoding_to);
- if (to_encoding == mbfl_no_encoding_invalid) {
- return -1;
- }
- /* old encoding */
- from_encoding = mbfl_name2no_encoding(encoding_from);
- if (from_encoding == mbfl_no_encoding_invalid) {
- return -1;
- }
- /* initialize string */
- mbfl_string_init(&string);
- mbfl_string_init(&result);
- string.no_encoding = from_encoding;
- string.no_language = MBSTRG(language);
- string.val = (unsigned char*)from;
- string.len = from_length;
-
- /* initialize converter */
- convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
- if (convd == NULL) {
- return -1;
- }
- mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
- mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
-
- /* do it */
- ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
- if (ret != NULL) {
- *to = ret->val;
- *to_length = ret->len;
- }
-
- MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
- mbfl_buffer_converter_delete(convd);
-
- return ret ? 0 : -1;
-}
-/* }}} */
-
-/* {{{ int php_mb_oddlen()
- * returns number of odd (e.g. appears only first byte of multibyte
- * character) chars
- */
-static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
-{
- mbfl_string mb_string;
-
- mbfl_string_init(&mb_string);
- mb_string.no_language = MBSTRG(language);
- mb_string.no_encoding = mbfl_name2no_encoding(encoding);
- mb_string.val = (unsigned char *)string;
- mb_string.len = length;
-
- if (mb_string.no_encoding == mbfl_no_encoding_invalid) {
- return 0;
- }
- return mbfl_oddlen(&mb_string);
-}
-/* }}} */
-
-/* {{{ const char* php_mb_internal_encoding_name()
- * returns name of internal encoding
- */
-static const char* php_mb_internal_encoding_name(TSRMLS_D)
-{
- const char *name = mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
-
- if (!name ||
- !*name ||
- (strlen(name) == 4 &&
- (!memcmp("pass", name, sizeof("pass") - 1) ||
- !memcmp("auto", name, sizeof("auto") - 1) ||
- !memcmp("none", name, sizeof("none") - 1)))) {
- return NULL;
- }
- return name;
-}
-/* }}} */
-
-
#endif /* HAVE_MBSTRING */
/*