summaryrefslogtreecommitdiff
path: root/Zend/zend_multibyte.c
diff options
context:
space:
mode:
authorMoriyoshi Koizumi <moriyoshi@php.net>2010-12-19 16:36:37 +0000
committerMoriyoshi Koizumi <moriyoshi@php.net>2010-12-19 16:36:37 +0000
commitbbf3d43c1ee0ad53b03c3821cd630f0746d5e954 (patch)
treefd11ea79a69ee445ffde8310a3760603bf3df821 /Zend/zend_multibyte.c
parentc28cac404d2d0590ba2811f41331c60d09adbf1e (diff)
downloadphp-git-bbf3d43c1ee0ad53b03c3821cd630f0746d5e954.tar.gz
* Refactor zend_multibyte facility.
Now mbstring.script_encoding is superseded by zend.script_encoding.
Diffstat (limited to 'Zend/zend_multibyte.c')
-rw-r--r--Zend/zend_multibyte.c1252
1 files changed, 121 insertions, 1131 deletions
diff --git a/Zend/zend_multibyte.c b/Zend/zend_multibyte.c
index 3ca5191c00..dec07ecdcd 100644
--- a/Zend/zend_multibyte.c
+++ b/Zend/zend_multibyte.c
@@ -23,1219 +23,209 @@
#include "zend_compile.h"
#include "zend_operators.h"
#include "zend_multibyte.h"
+#include "zend_ini.h"
-static size_t zend_multibyte_encoding_filter(unsigned char **to, size_t *to_length, const char *to_encoding, const unsigned char *from, size_t from_length, const char *from_encoding TSRMLS_DC);
-size_t sjis_input_filter(unsigned char **buf, size_t *length, const unsigned char *sjis, size_t sjis_length TSRMLS_DC);
-size_t sjis_output_filter(unsigned char **buf, size_t *length, const unsigned char *sjis, size_t sjis_length TSRMLS_DC);
-static char* zend_multibyte_assemble_encoding_list(zend_encoding **encoding_list, size_t encoding_list_size);
-static int zend_multibyte_parse_encoding_list(const char *encoding_list,
-size_t encoding_list_size, zend_encoding ***result, size_t *result_size);
-static zend_encoding *zend_multibyte_find_script_encoding(zend_encoding *onetime_encoding TSRMLS_DC);
-static zend_encoding *zend_multibyte_detect_unicode(TSRMLS_D);
-static zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC);
-
-/*
- * encodings
- */
-static const char *ucs2_aliases[] = {"ISO-10646-UCS-2", "UCS2" , "UNICODE", NULL};
-static zend_encoding encoding_ucs2 = {
- NULL,
- NULL,
- "UCS-2",
- (const char *(*)[])&ucs2_aliases,
- 0
-};
-
-static zend_encoding encoding_ucs2be = {
- NULL,
- NULL,
- "UCS-2BE",
- NULL,
- 0
-};
-
-static zend_encoding encoding_ucs2le = {
- NULL,
- NULL,
- "UCS-2LE",
- NULL,
- 0
-};
-
-static const char *ucs4_aliases[] = {"ISO-10646-UCS-4", "UCS4", NULL};
-static zend_encoding encoding_ucs4 = {
- NULL,
- NULL,
- "UCS-4",
- (const char *(*)[])&ucs4_aliases,
- 0
-};
-
-static zend_encoding encoding_ucs4be = {
- NULL,
- NULL,
- "UCS-4BE",
- NULL,
- 0
-};
-
-static zend_encoding encoding_ucs4le = {
- NULL,
- NULL,
- "UCS-4LE",
- NULL,
- 0
-};
-
-static const char *utf32_aliases[] = {"utf32", NULL};
-static zend_encoding encoding_utf32 = {
- NULL,
- NULL,
- "UTF-32",
- (const char *(*)[])&utf32_aliases,
- 0
-};
-
-static zend_encoding encoding_utf32be = {
- NULL,
- NULL,
- "UTF-32BE",
- NULL,
- 0
-};
-
-static zend_encoding encoding_utf32le = {
- NULL,
- NULL,
- "UTF-32LE",
- NULL,
- 0
-};
-
-static const char *utf16_aliases[] = {"utf16", NULL};
-static zend_encoding encoding_utf16 = {
- NULL,
- NULL,
- "UTF-16",
- (const char *(*)[])&utf16_aliases,
- 0
-};
-
-static zend_encoding encoding_utf16be = {
- NULL,
- NULL,
- "UTF-16BE",
- NULL,
- 0
-};
-
-static zend_encoding encoding_utf16le = {
- NULL,
- NULL,
- "UTF-16LE",
- NULL,
- 0
-};
-
-static const char *utf8_aliases[] = {"utf8", NULL};
-static zend_encoding encoding_utf8 = {
- NULL,
- NULL,
- "UTF-8",
- (const char *(*)[])&utf8_aliases,
- 1
-};
-
-static const char *ascii_aliases[] = {"ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "US-ASCII", "ISO646-US", "us", "IBM367", "cp367", "csASCII", NULL};
-static zend_encoding encoding_ascii = {
- NULL,
- NULL,
- "ASCII",
- (const char *(*)[])&ascii_aliases,
- 1
-};
-
-static const char *euc_jp_aliases[] = {"EUC", "EUC_JP", "eucJP", "x-euc-jp", NULL};
-static zend_encoding encoding_euc_jp = {
- NULL,
- NULL,
- "EUC-JP",
- (const char *(*)[])&euc_jp_aliases,
- 1
-};
-
-static const char *sjis_aliases[] = {"x-sjis", "SJIS", "SHIFT-JIS", NULL};
-static zend_encoding encoding_sjis = {
- sjis_input_filter,
- sjis_output_filter,
- "Shift_JIS",
- (const char *(*)[])&sjis_aliases,
- 0
-};
-
-static const char *eucjp_win_aliases[] = {"eucJP-open", NULL};
-static zend_encoding encoding_eucjp_win = {
- NULL,
- NULL,
- "eucJP-win",
- (const char *(*)[])&eucjp_win_aliases,
- 1
-};
-
-static const char *sjis_win_aliases[] = {"SJIS-open", "MS_Kanji", "Windows-31J", "CP932", NULL};
-static zend_encoding encoding_sjis_win = {
- /* sjis-filters does not care about diffs of Shift_JIS and CP932 */
- sjis_input_filter,
- sjis_output_filter,
- "SJIS-win",
- (const char *(*)[])&sjis_win_aliases,
- 0
-};
-
-static const char *jis_aliases[] = {"ISO-2022-JP", NULL};
-static zend_encoding encoding_jis = {
- NULL,
- NULL,
- "JIS",
- (const char *(*)[])&jis_aliases,
- 0
-};
-
-static const char *euc_cn_aliases[] = {"CN-GB", "EUC_CN", "eucCN", "x-euc-cn", "gb2312", NULL};
-static zend_encoding encoding_euc_cn = {
- NULL,
- NULL,
- "EUC-CN",
- (const char *(*)[])&euc_cn_aliases,
- 1
-};
-
-static const char *cp936_aliases[] = {"CP-936", NULL};
-static zend_encoding encoding_cp936 = {
- NULL,
- NULL,
- "CP936",
- (const char *(*)[])&cp936_aliases,
- 0
-};
-
-static const char *hz_aliases[] = {"HZ-GB-2312", NULL};
-static zend_encoding encoding_hz = {
- NULL,
- NULL,
- "HZ",
- (const char *(*)[])&hz_aliases,
- 0
-};
-
-static const char *euc_tw_aliases[] = {"EUC_TW", "eucTW", "x-euc-tw", NULL};
-static zend_encoding encoding_euc_tw = {
- NULL,
- NULL,
- "EUC-TW",
- (const char *(*)[])&euc_tw_aliases,
- 1
-};
-
-static const char *big5_aliases[] = {"BIG5", "CN-BIG5", "BIG-FIVE", "BIGFIVE", "CP950", NULL};
-static zend_encoding encoding_big5 = {
- NULL,
- NULL,
- "BIG-5",
- (const char *(*)[])&big5_aliases,
- 0
-};
-
-static const char *euc_kr_aliases[] = {"EUC_KR", "eucKR", "x-euc-kr", NULL};
-static zend_encoding encoding_euc_kr = {
- NULL,
- NULL,
- "EUC-KR",
- (const char *(*)[])&euc_kr_aliases,
- 1
-};
-
-static const char *uhc_aliases[] = {"CP949", NULL};
-static zend_encoding encoding_uhc = {
- NULL,
- NULL,
- "UHC",
- (const char *(*)[])&uhc_aliases,
- 1
-};
-
-static zend_encoding encoding_2022kr = {
- NULL,
- NULL,
- "ISO-2022-KR",
- NULL,
- 0
-};
-
-static const char *cp1252_aliases[] = {"cp1252", NULL};
-static zend_encoding encoding_cp1252 = {
- NULL,
- NULL,
- "Windows-1252",
- (const char *(*)[])&cp1252_aliases,
- 1
-};
-
-static const char *iso_8859_1_aliases[] = {"ISO_8859-1", "latin1", NULL};
-static zend_encoding encoding_8859_1 = {
- NULL,
- NULL,
- "ISO-8859-1",
- (const char *(*)[])&iso_8859_1_aliases,
- 1
-};
-
-static const char *iso_8859_2_aliases[] = {"ISO_8859-2", "latin2", NULL};
-static zend_encoding encoding_8859_2 = {
- NULL,
- NULL,
- "ISO-8859-2",
- (const char *(*)[])&iso_8859_2_aliases,
- 1
-};
-
-static const char *iso_8859_3_aliases[] = {"ISO_8859-3", "latin3", NULL};
-static zend_encoding encoding_8859_3 = {
- NULL,
- NULL,
- "ISO-8859-3",
- (const char *(*)[])&iso_8859_3_aliases,
- 1
-};
-
-static const char *iso_8859_4_aliases[] = {"ISO_8859-4", "latin4", NULL};
-static zend_encoding encoding_8859_4 = {
- NULL,
- NULL,
- "ISO-8859-4",
- (const char *(*)[])&iso_8859_4_aliases,
- 1
-};
-
-static const char *iso_8859_5_aliases[] = {"ISO_8859-5", "cyrillic", NULL};
-static zend_encoding encoding_8859_5 = {
- NULL,
- NULL,
- "ISO-8859-5",
- (const char *(*)[])&iso_8859_5_aliases,
- 1
-};
-
-static const char *iso_8859_6_aliases[] = {"ISO_8859-6", "arabic", NULL};
-static zend_encoding encoding_8859_6 = {
- NULL,
- NULL,
- "ISO-8859-6",
- (const char *(*)[])&iso_8859_6_aliases,
- 1
-};
-
-static const char *iso_8859_7_aliases[] = {"ISO_8859-7", "greek", NULL};
-static zend_encoding encoding_8859_7 = {
- NULL,
- NULL,
- "ISO-8859-7",
- (const char *(*)[])&iso_8859_7_aliases,
- 1
-};
-
-static const char *iso_8859_8_aliases[] = {"ISO_8859-8", "hebrew", NULL};
-static zend_encoding encoding_8859_8 = {
- NULL,
- NULL,
- "ISO-8859-8",
- (const char *(*)[])&iso_8859_8_aliases,
- 1
-};
-
-static const char *iso_8859_9_aliases[] = {"ISO_8859-9", "latin5", NULL};
-static zend_encoding encoding_8859_9 = {
- NULL,
- NULL,
- "ISO-8859-9",
- (const char *(*)[])&iso_8859_9_aliases,
- 1
-};
-
-static const char *iso_8859_10_aliases[] = {"ISO_8859-10", "latin6", NULL};
-static zend_encoding encoding_8859_10 = {
- NULL,
- NULL,
- "ISO-8859-10",
- (const char *(*)[])&iso_8859_10_aliases,
- 1
-};
-
-static const char *iso_8859_13_aliases[] = {"ISO_8859-13", NULL};
-static zend_encoding encoding_8859_13 = {
- NULL,
- NULL,
- "ISO-8859-13",
- (const char *(*)[])&iso_8859_13_aliases,
- 1
-};
-
-static const char *iso_8859_14_aliases[] = {"ISO_8859-14", "latin8", NULL};
-static zend_encoding encoding_8859_14 = {
- NULL,
- NULL,
- "ISO-8859-14",
- (const char *(*)[])&iso_8859_14_aliases,
- 1
-};
-
-static const char *iso_8859_15_aliases[] = {"ISO_8859-15", NULL};
-static zend_encoding encoding_8859_15 = {
- NULL,
- NULL,
- "ISO-8859-15",
- (const char *(*)[])&iso_8859_15_aliases,
- 1
-};
-
-static const char *cp1251_aliases[] = {"CP1251", "CP-1251", "WINDOWS-1251", NULL};
-static zend_encoding encoding_cp1251 = {
- NULL,
- NULL,
- "Windows-1251",
- (const char *(*)[])&cp1251_aliases,
- 1
-};
-
-static const char *cp866_aliases[] = {"CP866", "CP-866", "IBM-866", NULL};
-static zend_encoding encoding_cp866 = {
- NULL,
- NULL,
- "CP866",
- (const char *(*)[])&cp866_aliases,
- 1
-};
-
-static const char *koi8r_aliases[] = {"KOI8-R", "KOI8R", NULL};
-static zend_encoding encoding_koi8r = {
- NULL,
- NULL,
- "KOI8-R",
- (const char *(*)[])&koi8r_aliases,
- 1
-};
-
-static const char *koi8u_aliases[] = {"KOI8-U", "KOI8U", NULL};
-static zend_encoding encoding_koi8u = {
- NULL,
- NULL,
- "KOI8-U",
- (const char *(*)[])&koi8u_aliases,
- 1
-};
-
-static const char *cp1254_aliases[] = {"cp1254", NULL};
-static zend_encoding encoding_cp1254 = {
- NULL,
- NULL,
- "Windows-1254",
- (const char *(*)[])&cp1254_aliases,
- 1
-};
-
-static const char *armscii8_aliases[] = { "ArmSCII8", "ARMSCII-8", "ARMSCII8", NULL};
-static zend_encoding encoding_armscii8 = {
- NULL,
- NULL,
- "ArmSCII-8",
- (const char *(*)[])&armscii8_aliases,
- 1
-};
-
-static const char *cp850_aliases[] = {"IBM850", NULL};
-static zend_encoding encoding_cp850 = {
- NULL,
- NULL,
- "CP850",
- (const char *(*)[])&cp850_aliases,
- 1
-};
-
-static zend_encoding *zend_encoding_table[] = {
- &encoding_ucs4,
- &encoding_ucs4be,
- &encoding_ucs4le,
- &encoding_ucs2,
- &encoding_ucs2be,
- &encoding_ucs2le,
- &encoding_utf32,
- &encoding_utf32be,
- &encoding_utf32le,
- &encoding_utf16,
- &encoding_utf16be,
- &encoding_utf16le,
- &encoding_utf8,
- &encoding_ascii,
- &encoding_euc_jp,
- &encoding_sjis,
- &encoding_eucjp_win,
- &encoding_sjis_win,
- &encoding_jis,
- &encoding_cp1252,
- &encoding_8859_1,
- &encoding_8859_2,
- &encoding_8859_3,
- &encoding_8859_4,
- &encoding_8859_5,
- &encoding_8859_6,
- &encoding_8859_7,
- &encoding_8859_8,
- &encoding_8859_9,
- &encoding_8859_10,
- &encoding_8859_13,
- &encoding_8859_14,
- &encoding_8859_15,
- &encoding_euc_cn,
- &encoding_cp936,
- &encoding_hz,
- &encoding_euc_tw,
- &encoding_big5,
- &encoding_euc_kr,
- &encoding_uhc,
- &encoding_2022kr,
- &encoding_cp1251,
- &encoding_cp866,
- &encoding_koi8r,
- &encoding_koi8u,
- &encoding_armscii8,
- &encoding_cp1254,
- &encoding_cp850,
- NULL
-};
-
-static char* dummy_encoding_detector(const unsigned char *string, size_t length, char *list TSRMLS_DC)
+static const zend_encoding *dummy_encoding_fetcher(const char *encoding_name TSRMLS_DC)
{
return NULL;
}
-static int dummy_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC)
+static const char *dummy_encoding_name_getter(const zend_encoding *encoding)
{
- return -1;
+ return NULL;
}
-static size_t dummy_encoding_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
+static int dummy_encoding_lexer_compatibility_checker(const zend_encoding *encoding)
{
return 0;
}
-static int dummy_encoding_list_checker(const char *encoding_list TSRMLS_DC)
+static const zend_encoding *dummy_encoding_detector(const unsigned char *string, size_t length, const zend_encoding **list, size_t list_size TSRMLS_DC)
{
- /* ignore encoding */
- return 1;
+ return NULL;
}
-static const char* dummy_get_internal_encoding(TSRMLS_D)
+static size_t dummy_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC)
{
- return NULL;
+ return (size_t)-1;
}
-ZEND_API zend_encoding_detector zend_multibyte_encoding_detector = dummy_encoding_detector;
-ZEND_API zend_encoding_converter zend_multibyte_encoding_converter = dummy_encoding_converter;
-ZEND_API zend_encoding_oddlen zend_multibyte_encoding_oddlen = dummy_encoding_oddlen;
-ZEND_API zend_encoding_list_checker zend_multibyte_check_encoding_list = dummy_encoding_list_checker;
-ZEND_API zend_encoding_name_getter zend_multibyte_get_internal_encoding = dummy_get_internal_encoding;
-
-ZEND_API int zend_multibyte_set_script_encoding(const char *encoding_list,
-size_t encoding_list_size TSRMLS_DC)
+static int dummy_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
{
- if (CG(script_encoding_list)) {
- efree(CG(script_encoding_list));
- CG(script_encoding_list) = NULL;
- }
- CG(script_encoding_list_size) = 0;
-
- if (!encoding_list) {
- return 0;
- }
-
- zend_multibyte_parse_encoding_list(encoding_list, encoding_list_size, &(CG(script_encoding_list)), &(CG(script_encoding_list_size)));
-
- return 0;
+ return FAILURE;
}
-
-ZEND_API int zend_multibyte_set_internal_encoding(const char *encoding_name TSRMLS_DC)
+static const zend_encoding *dummy_internal_encoding_getter(TSRMLS_D)
{
- CG(internal_encoding) = zend_multibyte_fetch_encoding(encoding_name);
- return 0;
+ return NULL;
}
-ZEND_API int zend_multibyte_set_functions(zend_encoding_detector encoding_detector, zend_encoding_converter encoding_converter, zend_encoding_oddlen encoding_oddlen, zend_encoding_list_checker encoding_list_checker, zend_encoding_name_getter get_internal_encoding TSRMLS_DC)
+static int dummy_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC)
{
- zend_multibyte_encoding_detector = encoding_detector;
- zend_multibyte_encoding_converter = encoding_converter;
- zend_multibyte_encoding_oddlen = encoding_oddlen;
- zend_multibyte_check_encoding_list = encoding_list_checker;
- zend_multibyte_get_internal_encoding = get_internal_encoding;
- return 0;
+ return FAILURE;
}
+static zend_multibyte_functions multibyte_functions = {
+ NULL,
+ dummy_encoding_fetcher,
+ dummy_encoding_name_getter,
+ dummy_encoding_lexer_compatibility_checker,
+ dummy_encoding_detector,
+ dummy_encoding_converter,
+ dummy_encoding_list_parser,
+ dummy_internal_encoding_getter,
+ dummy_internal_encoding_setter
+};
-ZEND_API int zend_multibyte_set_filter(zend_encoding *onetime_encoding TSRMLS_DC)
-{
- LANG_SCNG(script_encoding) = zend_multibyte_find_script_encoding(onetime_encoding TSRMLS_CC);
- LANG_SCNG(internal_encoding) = CG(internal_encoding);
-
- /* judge input/output filter */
- LANG_SCNG(input_filter) = NULL;
- LANG_SCNG(output_filter) = NULL;
+ZEND_API const zend_encoding *zend_multibyte_encoding_utf32be;
+ZEND_API const zend_encoding *zend_multibyte_encoding_utf32le;
+ZEND_API const zend_encoding *zend_multibyte_encoding_utf16be;
+ZEND_API const zend_encoding *zend_multibyte_encoding_utf16le;
+ZEND_API const zend_encoding *zend_multibyte_encoding_utf8;
- if (!LANG_SCNG(script_encoding)) {
- return 0;
+ZEND_API int zend_multibyte_set_functions(const zend_multibyte_functions *functions TSRMLS_DC)
+{
+ zend_multibyte_encoding_utf32be = functions->encoding_fetcher("UTF-32BE" TSRMLS_CC);
+ if (!zend_multibyte_encoding_utf32be) {
+ return FAILURE;
}
-
- if (!LANG_SCNG(internal_encoding) || LANG_SCNG(script_encoding) == LANG_SCNG(internal_encoding)) {
- /* if encoding specfic filters exist, use them */
- if (LANG_SCNG(script_encoding)->input_filter && LANG_SCNG(script_encoding)->output_filter) {
- LANG_SCNG(input_filter) = LANG_SCNG(script_encoding)->input_filter;
- LANG_SCNG(output_filter) = LANG_SCNG(script_encoding)->output_filter;
- return 0;
- }
-
- if (!LANG_SCNG(script_encoding)->compatible) {
- /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
- LANG_SCNG(internal_encoding) = LANG_SCNG(script_encoding);
- LANG_SCNG(input_filter) = zend_multibyte_script_encoding_filter;
- LANG_SCNG(output_filter) = zend_multibyte_internal_encoding_filter;
- return 0;
- } else {
- /* nothing to do in this case */
- return 0;
- }
+ zend_multibyte_encoding_utf32le = functions->encoding_fetcher("UTF-32LE" TSRMLS_CC);
+ if (!zend_multibyte_encoding_utf32le) {
+ return FAILURE;
}
-
- /* LANG_SCNG(internal_encoding) cannot be NULL here */
- if (LANG_SCNG(internal_encoding)->compatible) {
- LANG_SCNG(input_filter) = zend_multibyte_script_encoding_filter;
- return 0;
- } else if (LANG_SCNG(script_encoding)->compatible) {
- LANG_SCNG(output_filter) = zend_multibyte_internal_encoding_filter;
- return 0;
+ zend_multibyte_encoding_utf16be = functions->encoding_fetcher("UTF-16BE" TSRMLS_CC);
+ if (!zend_multibyte_encoding_utf16be) {
+ return FAILURE;
}
-
- /* both script and internal encodings are incompatible w/ flex */
- LANG_SCNG(input_filter) = zend_multibyte_script_encoding_filter;
- LANG_SCNG(output_filter) = zend_multibyte_internal_encoding_filter;
-
- return 0;
-}
-
-
-ZEND_API zend_encoding* zend_multibyte_fetch_encoding(const char *encoding_name)
-{
- int i, j;
- zend_encoding *encoding;
-
- if (!encoding_name) {
- return NULL;
+ zend_multibyte_encoding_utf16le = functions->encoding_fetcher("UTF-16LE" TSRMLS_CC);
+ if (!zend_multibyte_encoding_utf16le) {
+ return FAILURE;
}
-
- for (i = 0; (encoding = zend_encoding_table[i]) != NULL; i++) {
- if (zend_binary_strcasecmp(encoding->name, strlen(encoding->name), encoding_name, strlen(encoding_name)) == 0) {
- return encoding;
- }
+ zend_multibyte_encoding_utf8 = functions->encoding_fetcher("UTF-8" TSRMLS_CC);
+ if (!zend_multibyte_encoding_utf8) {
+ return FAILURE;
}
- for (i = 0; (encoding = zend_encoding_table[i]) != NULL; i++) {
- if (encoding->aliases != NULL) {
- for (j = 0; (*encoding->aliases)[j] != NULL; j++) {
- if (zend_binary_strcasecmp((*encoding->aliases)[j], strlen((*encoding->aliases)[j]), encoding_name, strlen(encoding_name)) == 0) {
- return encoding;
- }
- }
- }
- }
+ multibyte_functions = *functions;
- return NULL;
+ /* As zend_multibyte_set_functions() gets called after ini settings were
+ * populated, we need to reinitialize script_encoding here.
+ */
+ {
+ const char *value = zend_ini_string("zend.script_encoding", sizeof("zend.script_encoding"), 0);
+ zend_multibyte_set_script_encoding_by_string(value, strlen(value) TSRMLS_CC);
+ }
+ return SUCCESS;
}
-
-ZEND_API size_t zend_multibyte_script_encoding_filter(unsigned char **to, size_t
-*to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
+ZEND_API const zend_multibyte_functions *zend_multibyte_get_functions(TSRMLS_D)
{
- const char *name;
-
- if (LANG_SCNG(internal_encoding) == NULL || LANG_SCNG(internal_encoding)->compatible == 0) {
- name = "UTF-8";
- } else {
- name = LANG_SCNG(internal_encoding)->name;
- }
-
- return zend_multibyte_encoding_filter(to, to_length, name, from, from_length, LANG_SCNG(script_encoding)->name TSRMLS_CC);
+ return multibyte_functions.provider_name ? &multibyte_functions: NULL;
}
-ZEND_API size_t zend_multibyte_internal_encoding_filter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
+ZEND_API const zend_encoding *zend_multibyte_fetch_encoding(const char *name TSRMLS_DC)
{
- const char *name;
-
- if (LANG_SCNG(script_encoding)->compatible == 0) {
- name = "UTF-8";
- } else {
- name = LANG_SCNG(script_encoding)->name;
- }
-
- return zend_multibyte_encoding_filter(to, to_length, LANG_SCNG(internal_encoding)->name, from, from_length, name TSRMLS_CC);
+ return multibyte_functions.encoding_fetcher(name TSRMLS_CC);
}
-static size_t zend_multibyte_encoding_filter(unsigned char **to, size_t *to_length, const char *to_encoding, const unsigned char *from, size_t from_length, const char *from_encoding TSRMLS_DC)
+ZEND_API const char *zend_multibyte_get_encoding_name(const zend_encoding *encoding)
{
- size_t oddlen;
-
- if (zend_multibyte_encoding_converter == dummy_encoding_converter) {
- return 0;
- }
-
- oddlen = zend_multibyte_encoding_oddlen(from, from_length, from_encoding TSRMLS_CC);
- if (oddlen > 0) {
- from_length -= oddlen;
- }
-
- if (zend_multibyte_encoding_converter(to, to_length, from, from_length, to_encoding, from_encoding TSRMLS_CC) != 0) {
- return 0;
- }
-
- return from_length;
+ return multibyte_functions.encoding_name_getter(encoding);
}
-
-/*
- * Shift_JIS Input/Output Filter
- */
-static const unsigned char table_sjis[] = { /* 0x80-0x9f,0xE0-0xEF */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 0, 0, 0
-};
-
-size_t sjis_input_filter(unsigned char **buf, size_t *length, const unsigned char *sjis, size_t sjis_length TSRMLS_DC)
+ZEND_API int zend_multibyte_check_lexer_compatibility(const zend_encoding *encoding)
{
- const unsigned char *p;
- unsigned char *q;
- unsigned char c1, c2;
-
- *buf = (unsigned char*)emalloc(sjis_length * 3 / 2 + 1);
- if (!*buf)
- return 0;
- *length = 0;
-
- p = sjis;
- q = *buf;
-
- /* convert [SJIS -> EUC-JP] (for lex scan) -- some other better ways? */
- while (*p && (p - sjis) < sjis_length) {
- if (!(*p & 0x80)) {
- *q++ = *p++;
- continue;
- }
-
- /* handling 8 bit code */
- if (table_sjis[*p] == 1) {
- /* 1 byte kana */
- *q++ = 0x8e;
- *q++ = *p++;
- continue;
- }
-
- if (!*(p+1)) {
- *q++ = *p++;
- break;
- }
-
- if (table_sjis[*p] == 2) {
- /* 2 byte kanji code */
- c1 = *p++;
- if (!*p || (p - sjis) >= sjis_length) {
- break;
- }
- c2 = *p++;
- c1 -= (c1 <= 0x9f) ? 0x71 : 0xb1;
- c1 = (c1 << 1) + 1;
- if (c2 >= 0x9e) {
- c2 -= 0x7e;
- c1++;
- } else if (c2 > 0x7f) {
- c2 -= 0x20;
- } else {
- c2 -= 0x1f;
- }
-
- c1 |= 0x80;
- c2 |= 0x80;
-
- *q++ = c1;
- *q++ = c2;
- } else {
- /*
- * for user defined chars (ATTENTION)
- *
- * THESE ARE NOT CODE FOR CONVERSION! :-P
- * (using *ILLEGALLY* 3byte EUC-JP space)
- *
- * we cannot perfectly (== 1 to 1) convert these chars to EUC-JP.
- * so, these code are for perfect RESTORING in sjis_output_filter()
- */
- c1 = *p++;
- if (!*p || (p - sjis) >= sjis_length) {
- break;
- }
- c2 = *p++;
- *q++ = 0x8f;
- /*
- * MAP TO (EUC-JP):
- * type A: 0xeba1 - 0xf4fe
- * type B: 0xf5a1 - 0xfefe
- * type C: 0xa1a1 - 0xa6fe
- */
- c1 -= (c1 > 0xf9) ? (0x79+0x71) : (0x0a+0xb1);
- c1 = (c1 << 1) + 1;
- if (c2 >= 0x9e) {
- c2 -= 0x7e;
- c1++;
- } else if (c2 > 0x7f) {
- c2 -= 0x20;
- } else {
- c2 -= 0x1f;
- }
-
- c1 |= 0x80;
- c2 |= 0x80;
-
- *q++ = c1;
- *q++ = c2;
- }
- }
- *q = '\0';
- *length = q - *buf;
-
- return *length;
+ return multibyte_functions.lexer_compatibility_checker(encoding);
}
-static const unsigned char table_eucjp[] = { /* 0xA1-0xFE */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
-};
-
-size_t sjis_output_filter(unsigned char **sjis, size_t *sjis_length, const unsigned char *buf, size_t length TSRMLS_DC)
+ZEND_API const zend_encoding *zend_multibyte_encoding_detector(const unsigned char *string, size_t length, const zend_encoding **list, size_t list_size TSRMLS_DC)
{
- unsigned char c1, c2;
- unsigned char *p;
- const unsigned char *q;
-
- if (!sjis || !sjis_length) {
- return 0;
- }
-
- /* always Shift_JIS <= EUC-JP */
- *sjis = (unsigned char*)emalloc(length+1);
- if (!sjis) {
- return 0;
- }
- p = *sjis;
- q = buf;
-
- /* restore converted strings [EUC-JP -> Shift_JIS] */
- while (*q && (q - buf) < length) {
- if (!(*q & 0x80)) {
- *p++ = *q++;
- continue;
- }
-
- /* hankaku kana */
- if (*q == 0x8e) {
- q++;
- if (*q) {
- *p++ = *q++;
- }
- continue;
- }
-
- /* 2 byte kanji code */
- if (table_eucjp[*q] == 2) {
- c1 = (*q++ & ~0x80) & 0xff;
- if (*q) {
- c2 = (*q++ & ~0x80) & 0xff;
- } else {
- q--;
- break;
- }
-
- c2 += (c1 & 0x01) ? 0x1f : 0x7d;
- if (c2 >= 0x7f) {
- c2++;
- }
- c1 = ((c1 - 0x21) >> 1) + 0x81;
- if (c1 > 0x9f) {
- c1 += 0x40;
- }
-
- *p++ = c1;
- *p++ = c2;
- continue;
- }
-
- if (*q == 0x8f) {
- q++;
- if (*q) {
- c1 = (*q++ & ~0x80) & 0xff;
- } else {
- q--;
- break;
- }
- if (*q) {
- c2 = (*q++ & ~0x80) & 0xff;
- } else {
- q -= 2;
- break;
- }
-
- c2 += (c1 & 0x01) ? 0x1f : 0x7d;
- if (c2 >= 0x7f) {
- c2++;
- }
- c1 = ((c1 - 0x21) >> 1) + 0x81;
- if (c1 > 0x9f) {
- c1 += 0x40;
- }
-
- if (c1 >= 0x81 && c1 <= 0x9f) {
- c1 += 0x79;
- } else {
- c1 += 0x0a;
- }
-
- *p++ = c1;
- *p++ = c2;
- continue;
- }
-
- /* some other chars (may not happen) */
- *p++ = *q++;
- }
- *p = '\0';
- *sjis_length = p - *sjis;
-
- return q-buf; /* return length we actually read */
+ return multibyte_functions.encoding_detector(string, length, list, list_size TSRMLS_CC);
}
-
-static char *zend_multibyte_assemble_encoding_list(zend_encoding **encoding_list, size_t encoding_list_size)
+ZEND_API size_t zend_multibyte_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC)
{
- int i, list_size = 0;
- const char *name;
- char *list = NULL;
-
- if (!encoding_list || !encoding_list_size) {
- return NULL;
- }
-
- for (i = 0; i < encoding_list_size; i++) {
- name = (*(encoding_list+i))->name;
- if (name) {
- list_size += strlen(name) + 1;
- if (!list) {
- list = (char*)emalloc(list_size);
- if (!list) {
- return NULL;
- }
- *list = '\0';
- } else {
- list = (char*)erealloc(list, list_size);
- if (!list) {
- return NULL;
- }
- strcat(list, ",");
- }
- strcat(list, name);
- }
- }
- return list;
+ return multibyte_functions.encoding_converter(to, to_length, from, from_length, encoding_to, encoding_from TSRMLS_CC);
}
-
-static int zend_multibyte_parse_encoding_list(const char *encoding_list,
-size_t encoding_list_size, zend_encoding ***result, size_t *result_size)
+ZEND_API int zend_multibyte_parse_encoding_list(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
{
- int n, size;
- char *p, *p1, *p2, *endp, *tmpstr;
- zend_encoding **list, **entry, *encoding;
-
- list = NULL;
- if (encoding_list == NULL || encoding_list_size <= 0) {
- return -1;
- } else {
- /* copy the encoding_list string for work */
- tmpstr = (char *)estrndup(encoding_list, encoding_list_size);
- if (tmpstr == NULL) {
- return -1;
- }
- /* count the number of listed encoding names */
- endp = tmpstr + encoding_list_size;
- n = 1;
- p1 = tmpstr;
- while ((p2 = zend_memnstr(p1, ",", 1, endp)) != NULL) {
- p1 = p2 + 1;
- n++;
- }
- size = n;
- /* make list */
- list = (zend_encoding**)ecalloc(size, sizeof(zend_encoding*));
- if (list != NULL) {
- entry = list;
- n = 0;
- p1 = tmpstr;
- do {
- p2 = p = zend_memnstr(p1, ",", 1, endp);
- if (p == NULL) {
- p = endp;
- }
- *p = '\0';
- /* trim spaces */
- while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
- p1++;
- }
- p--;
- while (p > p1 && (*p == ' ' || *p == '\t')) {
- *p = '\0';
- p--;
- }
- /* convert to the encoding number and check encoding */
- encoding = zend_multibyte_fetch_encoding(p1);
- if (encoding)
- {
- *entry++ = encoding;
- n++;
- }
- p1 = p2 + 1;
- } while (n < size && p2 != NULL);
- *result = list;
- *result_size = n;
- }
- efree(tmpstr);
- }
-
- if (list == NULL) {
- return -1;
- }
-
- return 0;
+ return multibyte_functions.encoding_list_parser(encoding_list, encoding_list_len, return_list, return_size, persistent TSRMLS_CC);
}
-
-static zend_encoding* zend_multibyte_find_script_encoding(zend_encoding *onetime_encoding TSRMLS_DC)
+ZEND_API const zend_encoding *zend_multibyte_get_internal_encoding(TSRMLS_D)
{
- zend_encoding *script_encoding;
- char *name, *list;
-
- /* onetime_encoding is prior to everything */
- if (onetime_encoding != NULL) {
- return onetime_encoding;
- }
-
- if (CG(detect_unicode)) {
- /* check out bom(byte order mark) and see if containing wchars */
- script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
- if (script_encoding != NULL) {
- /* bom or wchar detection is prior to 'script_encoding' option */
- return script_encoding;
- }
- }
+ return multibyte_functions.internal_encoding_getter(TSRMLS_C);
+}
- /* if no script_encoding specified, just leave alone */
- if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
- return NULL;
- }
+ZEND_API const zend_encoding *zend_multibyte_get_script_encoding(TSRMLS_D)
+{
+ return LANG_SCNG(script_encoding);
+}
- /* if multiple encodings specified, detect automagically */
- if (CG(script_encoding_list_size) > 1 &&
- zend_multibyte_encoding_detector != dummy_encoding_detector) {
- list = zend_multibyte_assemble_encoding_list(CG(script_encoding_list),
- CG(script_encoding_list_size));
- name = zend_multibyte_encoding_detector(LANG_SCNG(script_org),
- LANG_SCNG(script_org_size), list TSRMLS_CC);
- if (list) {
- efree(list);
- }
- if (name) {
- script_encoding = zend_multibyte_fetch_encoding(name);
- efree(name);
- } else {
- script_encoding = NULL;
- }
- return script_encoding;
+ZEND_API int zend_multibyte_set_script_encoding(const zend_encoding **encoding_list, size_t encoding_list_size TSRMLS_DC)
+{
+ if (CG(script_encoding_list)) {
+ efree(CG(script_encoding_list));
}
-
- return *(CG(script_encoding_list));
+ CG(script_encoding_list) = encoding_list;
+ CG(script_encoding_list_size) = encoding_list_size;
+ return SUCCESS;
}
+ZEND_API int zend_multibyte_set_internal_encoding(const zend_encoding *encoding TSRMLS_DC)
+{
+ return multibyte_functions.internal_encoding_setter(encoding TSRMLS_CC);
+}
-static zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
+ZEND_API int zend_multibyte_set_script_encoding_by_string(const char *new_value, size_t new_value_length TSRMLS_DC)
{
- zend_encoding *script_encoding = NULL;
- int bom_size;
- unsigned char *script;
- unsigned char *pos1, *pos2;
+ const zend_encoding **list = 0;
+ size_t size = 0;
- if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
- return NULL;
+ if (!new_value) {
+ zend_multibyte_set_script_encoding(NULL, 0 TSRMLS_CC);
+ return SUCCESS;
}
- /* check out BOM */
- if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
- script_encoding = &encoding_utf32be;
- bom_size = sizeof(BOM_UTF32_BE)-1;
- } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
- script_encoding = &encoding_utf32le;
- bom_size = sizeof(BOM_UTF32_LE)-1;
- } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
- script_encoding = &encoding_utf16be;
- bom_size = sizeof(BOM_UTF16_BE)-1;
- } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
- script_encoding = &encoding_utf16le;
- bom_size = sizeof(BOM_UTF16_LE)-1;
- } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
- script_encoding = &encoding_utf8;
- bom_size = sizeof(BOM_UTF8)-1;
+ if (FAILURE == zend_multibyte_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
+ return FAILURE;
}
- if (script_encoding) {
- /* remove BOM */
- script = (unsigned char*)emalloc(LANG_SCNG(script_org_size)+1-bom_size);
- memcpy(script, LANG_SCNG(script_org)+bom_size, LANG_SCNG(script_org_size)+1-bom_size);
- efree(LANG_SCNG(script_org));
- LANG_SCNG(script_org) = script;
- LANG_SCNG(script_org_size) -= bom_size;
-
- return script_encoding;
+ if (size == 0) {
+ pefree(list, 1);
+ return FAILURE;
}
- /* script contains NULL bytes -> auto-detection */
- if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
- /* check if the NULL byte is after the __HALT_COMPILER(); */
- pos2 = LANG_SCNG(script_org);
-
- while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
- pos2 = memchr(pos2, '_', pos1 - pos2);
- if (!pos2) break;
- pos2++;
- if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
- pos2 += sizeof("_HALT_COMPILER")-1;
- while (*pos2 == ' ' ||
- *pos2 == '\t' ||
- *pos2 == '\r' ||
- *pos2 == '\n') {
- pos2++;
- }
- if (*pos2 == '(') {
- pos2++;
- while (*pos2 == ' ' ||
- *pos2 == '\t' ||
- *pos2 == '\r' ||
- *pos2 == '\n') {
- pos2++;
- }
- if (*pos2 == ')') {
- pos2++;
- while (*pos2 == ' ' ||
- *pos2 == '\t' ||
- *pos2 == '\r' ||
- *pos2 == '\n') {
- pos2++;
- }
- if (*pos2 == ';') {
- return NULL;
- }
- }
- }
- }
- }
- /* make best effort if BOM is missing */
- return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
+ if (FAILURE == zend_multibyte_set_script_encoding(list, size TSRMLS_CC)) {
+ return FAILURE;
}
- return NULL;
+ return SUCCESS;
}
-static zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
+ZEND_API size_t zend_multibyte_script_encoding_filter(unsigned char **to, size_t
+*to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
{
- const unsigned char *p;
- int wchar_size = 2;
- int le = 0;
-
- /* utf-16 or utf-32? */
- p = script;
- while ((p-script) < script_size) {
- p = memchr(p, 0, script_size-(p-script)-2);
- if (!p) {
- break;
- }
- if (*(p+1) == '\0' && *(p+2) == '\0') {
- wchar_size = 4;
- break;
- }
-
- /* searching for UTF-32 specific byte orders, so this will do */
- p += 4;
- }
-
- /* BE or LE? */
- p = script;
- while ((p-script) < script_size) {
- if (*p == '\0' && *(p+wchar_size-1) != '\0') {
- /* BE */
- le = 0;
- break;
- } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
- /* LE* */
- le = 1;
- break;
- }
- p += wchar_size;
+ const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
+ if (!internal_encoding || !zend_multibyte_check_lexer_compatibility(internal_encoding)) {
+ internal_encoding = zend_multibyte_encoding_utf8;
}
+ return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
+}
- if (wchar_size == 2) {
- return le ? &encoding_utf16le : &encoding_utf16be;
- } else {
- return le ? &encoding_utf32le : &encoding_utf32be;
+ZEND_API size_t zend_multibyte_internal_encoding_filter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
+{
+ const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
+ const zend_encoding *script_encoding = LANG_SCNG(script_encoding);
+ if (!internal_encoding || !zend_multibyte_check_lexer_compatibility(internal_encoding)) {
+ internal_encoding = zend_multibyte_encoding_utf8;
+ }
+ if (!zend_multibyte_check_lexer_compatibility(script_encoding)) {
+ script_encoding = zend_multibyte_encoding_utf8;
}
-
- return NULL;
+ return zend_multibyte_encoding_converter(to, to_length, from, from_length,
+script_encoding, internal_encoding TSRMLS_CC);
}
/*