diff options
Diffstat (limited to 'ext/mbstring/mbstring.c')
-rw-r--r-- | ext/mbstring/mbstring.c | 829 |
1 files changed, 667 insertions, 162 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index e01430fa07..f963ba6d97 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -104,6 +104,9 @@ static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *lis static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding); +static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc); + +static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc); /* }}} */ /* {{{ php_mb_default_identify_list */ @@ -430,6 +433,21 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_scrub, 0, 0, 1) + ZEND_ARG_INFO(0, str) + ZEND_ARG_INFO(0, encoding) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1) + ZEND_ARG_INFO(0, str) + ZEND_ARG_INFO(0, encoding) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1) + ZEND_ARG_INFO(0, cp) + ZEND_ARG_INFO(0, encoding) +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() @@ -556,6 +574,9 @@ const zend_function_entry mbstring_functions[] = { PHP_FE(mb_send_mail, arginfo_mb_send_mail) PHP_FE(mb_get_info, arginfo_mb_get_info) PHP_FE(mb_check_encoding, arginfo_mb_check_encoding) + PHP_FE(mb_ord, arginfo_mb_ord) + PHP_FE(mb_chr, arginfo_mb_chr) + PHP_FE(mb_scrub, arginfo_mb_scrub) #if HAVE_MBREGEX PHP_MBREGEX_FUNCTION_ENTRIES #endif @@ -710,9 +731,6 @@ php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_en } else tmpstr = (char *)estrndup(value, value_length); - if (tmpstr == NULL) { - return FAILURE; - } /* count the number of listed encoding names */ endp = tmpstr + value_length; n = 1; @@ -724,74 +742,64 @@ php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_en size = n + MBSTRG(default_detect_order_list_size); /* make list */ list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent); - if (list != NULL) { - entry = list; - n = 0; - bauto = 0; - p1 = tmpstr; - do { - p2 = p = (char*)php_memnstr(p1, ",", 1, endp); - if (p == NULL) { - p = endp; - } + entry = list; + n = 0; + bauto = 0; + p1 = tmpstr; + do { + p2 = p = (char*)php_memnstr(p1, ",", 1, endp); + if (p == NULL) { + p = endp; + } + *p = '\0'; + /* trim spaces */ + while (p1 < p && (*p1 == ' ' || *p1 == '\t')) { + p1++; + } + p--; + while (p > p1 && (*p == ' ' || *p == '\t')) { *p = '\0'; - /* trim spaces */ - while (p1 < p && (*p1 == ' ' || *p1 == '\t')) { - p1++; - } p--; - while (p > p1 && (*p == ' ' || *p == '\t')) { - *p = '\0'; - p--; - } - /* convert to the encoding number and check encoding */ - if (strcasecmp(p1, "auto") == 0) { - if (!bauto) { - const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); - const size_t identify_list_size = MBSTRG(default_detect_order_list_size); - size_t i; - bauto = 1; - for (i = 0; i < identify_list_size; i++) { - *entry++ = mbfl_no2encoding(*src++); - n++; - } - } - } else { - const mbfl_encoding *encoding = mbfl_name2encoding(p1); - if (encoding) { - *entry++ = encoding; + } + /* convert to the encoding number and check encoding */ + if (strcasecmp(p1, "auto") == 0) { + if (!bauto) { + const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); + const size_t identify_list_size = MBSTRG(default_detect_order_list_size); + size_t i; + bauto = 1; + for (i = 0; i < identify_list_size; i++) { + *entry++ = mbfl_no2encoding(*src++); n++; - } else { - ret = 0; } } - p1 = p2 + 1; - } while (n < size && p2 != NULL); - if (n > 0) { - if (return_list) { - *return_list = list; + } else { + const mbfl_encoding *encoding = mbfl_name2encoding(p1); + if (encoding) { + *entry++ = encoding; + n++; } else { - pefree(list, persistent); + ret = 0; } + } + p1 = p2 + 1; + } while (n < size && p2 != NULL); + if (n > 0) { + if (return_list) { + *return_list = list; } else { pefree(list, persistent); - if (return_list) { - *return_list = NULL; - } - ret = 0; - } - if (return_size) { - *return_size = n; } } else { + pefree(list, persistent); if (return_list) { *return_list = NULL; } - if (return_size) { - *return_size = 0; - } ret = 0; } + if (return_size) { + *return_size = n; + } efree(tmpstr); } @@ -818,60 +826,50 @@ php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, siz i = zend_hash_num_elements(target_hash); size = i + MBSTRG(default_detect_order_list_size); list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent); - if (list != NULL) { - entry = list; - bauto = 0; - n = 0; - ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) { - convert_to_string_ex(hash_entry); - if (strcasecmp(Z_STRVAL_P(hash_entry), "auto") == 0) { - if (!bauto) { - const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); - const size_t identify_list_size = MBSTRG(default_detect_order_list_size); - size_t j; - - bauto = 1; - for (j = 0; j < identify_list_size; j++) { - *entry++ = mbfl_no2encoding(*src++); - n++; - } - } - } else { - const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_P(hash_entry)); - if (encoding) { - *entry++ = encoding; + entry = list; + bauto = 0; + n = 0; + ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) { + convert_to_string_ex(hash_entry); + if (strcasecmp(Z_STRVAL_P(hash_entry), "auto") == 0) { + if (!bauto) { + const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); + const size_t identify_list_size = MBSTRG(default_detect_order_list_size); + size_t j; + + bauto = 1; + for (j = 0; j < identify_list_size; j++) { + *entry++ = mbfl_no2encoding(*src++); n++; - } else { - ret = FAILURE; } } - i--; - } ZEND_HASH_FOREACH_END(); - if (n > 0) { - if (return_list) { - *return_list = list; + } else { + const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_P(hash_entry)); + if (encoding) { + *entry++ = encoding; + n++; } else { - pefree(list, persistent); + ret = FAILURE; } + } + i--; + } ZEND_HASH_FOREACH_END(); + if (n > 0) { + if (return_list) { + *return_list = list; } else { pefree(list, persistent); - if (return_list) { - *return_list = NULL; - } - ret = FAILURE; - } - if (return_size) { - *return_size = n; } } else { + pefree(list, persistent); if (return_list) { *return_list = NULL; } - if (return_size) { - *return_size = 0; - } ret = FAILURE; } + if (return_size) { + *return_size = n; + } } return ret; @@ -937,6 +935,7 @@ static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_leng if (convd == NULL) { return -1; } + mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); @@ -1315,7 +1314,7 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output) /* }}} */ /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */ -int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length) +int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint32_t new_value_length) { const mbfl_encoding *encoding; @@ -1622,8 +1621,9 @@ PHP_RINIT_FUNCTION(mbstring) /* override original function. */ if (MBSTRG(func_overload)){ - p = &(mb_ovld[0]); + zend_error(E_DEPRECATED, "The mbstring.func_overload directive is deprecated"); + p = &(mb_ovld[0]); CG(compiler_options) |= ZEND_COMPILE_NO_BUILTIN_STRLEN; while (p->type > 0) { if ((MBSTRG(func_overload) & p->type) == p->type && @@ -1975,6 +1975,25 @@ PHP_FUNCTION(mb_detect_order) } /* }}} */ +static inline int php_mb_check_code_point(long cp) +{ + if (cp <= 0 || cp >= 0x110000) { + /* Out of Unicode range */ + return 0; + } + + if (cp >= 0xd800 && cp <= 0xdfff) { + /* Surrogate code-point. These are never valid on their own and we only allow a single + * substitute character. */ + return 0; + } + + /* As the we do not know the target encoding of the conversion operation that is going to + * use the substitution character, we cannot check whether the codepoint is actually mapped + * in the given encoding at this point. Thus we have to accept everything. */ + return 1; +} + /* {{{ proto mixed mb_substitute_character([mixed substchar]) Sets the current substitute_character or returns the current substitute_character */ PHP_FUNCTION(mb_substitute_character) @@ -2009,22 +2028,22 @@ PHP_FUNCTION(mb_substitute_character) } else { convert_to_long_ex(arg1); - if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) { + if (php_mb_check_code_point(Z_LVAL_P(arg1))) { MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1); } else { - php_error_docref(NULL, E_WARNING, "Unknown character."); + php_error_docref(NULL, E_WARNING, "Unknown character"); RETURN_FALSE; } } break; default: convert_to_long_ex(arg1); - if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) { + if (php_mb_check_code_point(Z_LVAL_P(arg1))) { MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1); } else { - php_error_docref(NULL, E_WARNING, "Unknown character."); + php_error_docref(NULL, E_WARNING, "Unknown character"); RETURN_FALSE; } break; @@ -2106,6 +2125,8 @@ PHP_FUNCTION(mb_parse_str) return; } + php_error_docref(NULL, E_DEPRECATED, "Calling mb_parse_str() without the result argument is deprecated"); + symbol_table = zend_rebuild_symbol_table(); ZVAL_ARR(&tmp, symbol_table); detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr); @@ -3107,7 +3128,7 @@ PHP_FUNCTION(mb_strimwidth) if (from < 0) { from += swidth; } - + if (from < 0 || (size_t)from > str_len) { php_error_docref(NULL, E_WARNING, "Start position is out of range"); RETURN_FALSE; @@ -3138,8 +3159,26 @@ PHP_FUNCTION(mb_strimwidth) } /* }}} */ + +/* See mbfl_no_encoding definition for list of unsupported encodings */ +static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc) +{ + return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint) + || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap) + || (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms) + || (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222)); +} + + +/* See mbfl_no_encoding definition for list of UTF-8 encodings */ +static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc) +{ + return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb); +} + + /* {{{ MBSTRING_API char *php_mb_convert_encoding() */ -MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len) +MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len) { mbfl_string string, result, *ret; const mbfl_encoding *from_encoding, *to_encoding; @@ -3207,6 +3246,7 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co php_error_docref(NULL, E_WARNING, "Unable to create character encoding converter"); return NULL; } + mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); @@ -3225,12 +3265,83 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co } /* }}} */ +MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const char *_to_encoding, const char *_from_encodings) +{ + HashTable *output, *chash; + zend_long idx; + zend_string *key, *key_tmp; + zval *entry, entry_tmp; + size_t ckey_len, cval_len; + char *ckey, *cval; + + if (!input) { + return NULL; + } + + if (input->u.v.nApplyCount++ > 1) { + input->u.v.nApplyCount--; + php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values"); + return NULL; + } + output = (HashTable *)emalloc(sizeof(HashTable)); + zend_hash_init(output, zend_hash_num_elements(input), NULL, ZVAL_PTR_DTOR, 0); + ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) { + /* convert key */ + if (key) { + ckey = php_mb_convert_encoding(ZSTR_VAL(key), ZSTR_LEN(key), _to_encoding, _from_encodings, &ckey_len); + key_tmp = zend_string_init(ckey, ckey_len, 0); + } + /* convert value */ + ZEND_ASSERT(entry); + switch(Z_TYPE_P(entry)) { + case IS_STRING: + cval = php_mb_convert_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), _to_encoding, _from_encodings, &cval_len); + ZVAL_STRINGL(&entry_tmp, cval, cval_len); + efree(cval); + break; + case IS_NULL: + case IS_TRUE: + case IS_FALSE: + case IS_LONG: + case IS_DOUBLE: + ZVAL_COPY(&entry_tmp, entry); + break; + case IS_ARRAY: + chash = php_mb_convert_encoding_recursive(HASH_OF(entry), _to_encoding, _from_encodings); + if (!chash) { + chash = (HashTable *)emalloc(sizeof(HashTable)); + zend_hash_init(chash, 0, NULL, ZVAL_PTR_DTOR, 0); + } + ZVAL_ARR(&entry_tmp, chash); + break; + case IS_OBJECT: + default: + if (key) { + efree(key_tmp); + } + php_error_docref(NULL, E_WARNING, "Object is not supported"); + continue; + } + if (key) { + zend_hash_add(output, key_tmp, &entry_tmp); + } else { + zend_hash_index_add(output, idx, &entry_tmp); + } + } ZEND_HASH_FOREACH_END(); + input->u.v.nApplyCount--; + + return output; +} +/* }}} */ + + /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding]) Returns converted string in desired encoding */ PHP_FUNCTION(mb_convert_encoding) { - char *arg_str, *arg_new; - size_t str_len, new_len; + zval *input; + char *arg_new; + size_t new_len; zval *arg_old = NULL; size_t size, l, n; char *_from_encodings = NULL, *ret, *s_free = NULL; @@ -3238,10 +3349,14 @@ PHP_FUNCTION(mb_convert_encoding) zval *hash_entry; HashTable *target_hash; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z", &input, &arg_new, &new_len, &arg_old) == FAILURE) { return; } + if (Z_TYPE_P(input) != IS_STRING && Z_TYPE_P(input) != IS_ARRAY) { + convert_to_string(input); + } + if (arg_old) { switch (Z_TYPE_P(arg_old)) { case IS_ARRAY: @@ -3276,19 +3391,26 @@ PHP_FUNCTION(mb_convert_encoding) } } - /* new encoding */ - ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size); - if (ret != NULL) { - // TODO: avoid reallocation ??? - RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */ - efree(ret); + if (Z_TYPE_P(input) == IS_STRING) { + /* new encoding */ + ret = php_mb_convert_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), arg_new, _from_encodings, &size); + if (ret != NULL) { + // TODO: avoid reallocation ??? + RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */ + efree(ret); + } else { + RETVAL_FALSE; + } + if (s_free) { + efree(s_free); + } } else { - RETVAL_FALSE; + HashTable *tmp; + tmp = php_mb_convert_encoding_recursive(HASH_OF(input), arg_new, _from_encodings); + RETURN_ARR(tmp); } - if ( s_free) { - efree(s_free); - } + return; } /* }}} */ @@ -3794,7 +3916,7 @@ PHP_FUNCTION(mb_convert_variables) target_hash = HASH_OF(var); if (target_hash != NULL) { while ((hash_entry = zend_hash_get_current_data(target_hash)) != NULL) { - if (!Z_IMMUTABLE_P(var)) { + if (Z_REFCOUNTED_P(var)) { if (++target_hash->u.v.nApplyCount > 1) { --target_hash->u.v.nApplyCount; recursion_error = 1; @@ -3843,7 +3965,7 @@ detect_end: } if (recursion_error) { while(stack_level-- && (var = &stack[stack_level])) { - if (!Z_IMMUTABLE_P(var)) { + if (Z_REFCOUNTED_P(var)) { if (HASH_OF(var)->u.v.nApplyCount > 1) { HASH_OF(var)->u.v.nApplyCount--; } @@ -3910,7 +4032,7 @@ detect_end: hash_entry = hash_entry_ptr; ZVAL_DEREF(hash_entry); if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) { - if (!Z_IMMUTABLE_P(hash_entry)) { + if (Z_REFCOUNTED_P(hash_entry)) { if (++(HASH_OF(hash_entry)->u.v.nApplyCount) > 1) { --(HASH_OF(hash_entry)->u.v.nApplyCount); recursion_error = 1; @@ -3963,7 +4085,7 @@ conv_end: if (recursion_error) { while(stack_level-- && (var = &stack[stack_level])) { - if (!Z_IMMUTABLE_P(var)) { + if (Z_REFCOUNTED_P(var)) { if (HASH_OF(var)->u.v.nApplyCount > 1) { HASH_OF(var)->u.v.nApplyCount--; } @@ -4266,11 +4388,11 @@ PHP_FUNCTION(mb_send_mail) size_t to_len; char *message = NULL; size_t message_len; - char *headers = NULL; - size_t headers_len; char *subject = NULL; - zend_string *extra_cmd = NULL; size_t subject_len; + zval *headers = NULL; + zend_string *extra_cmd = NULL; + zend_string *str_headers=NULL, *tmp_headers; int i; char *to_r = NULL; char *force_extra_parameters = INI_STR("mail.force_extra_parameters"); @@ -4310,7 +4432,7 @@ PHP_FUNCTION(mb_send_mail) body_enc = lang->mail_body_encoding; } - if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|sS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|zS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &extra_cmd) == FAILURE) { return; } @@ -4319,7 +4441,20 @@ PHP_FUNCTION(mb_send_mail) MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len); MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len); if (headers) { - MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len); + switch(Z_TYPE_P(headers)) { + case IS_STRING: + tmp_headers = zend_string_init(Z_STRVAL_P(headers), Z_STRLEN_P(headers), 0); + MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(tmp_headers), ZSTR_LEN(tmp_headers)); + str_headers = php_trim(tmp_headers, NULL, 0, 2); + zend_string_release(tmp_headers); + break; + case IS_ARRAY: + str_headers = php_mail_build_headers(headers); + break; + default: + php_error_docref(NULL, E_WARNING, "headers parameter must be string or array"); + RETURN_FALSE; + } } if (extra_cmd) { MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd)); @@ -4327,8 +4462,8 @@ PHP_FUNCTION(mb_send_mail) zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0); - if (headers != NULL) { - _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len); + if (str_headers != NULL) { + _php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers)); } if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) { @@ -4471,13 +4606,14 @@ PHP_FUNCTION(mb_send_mail) #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain" #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset=" #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: " - if (headers != NULL) { - p = headers; - n = headers_len; + if (str_headers != NULL) { + p = ZSTR_VAL(str_headers); + n = ZSTR_LEN(str_headers); mbfl_memory_device_strncat(&device, p, n); if (n > 0 && p[n - 1] != '\n') { mbfl_memory_device_strncat(&device, "\n", 1); } + zend_string_release(str_headers); } if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) { @@ -4507,7 +4643,7 @@ PHP_FUNCTION(mb_send_mail) mbfl_memory_device_unput(&device); mbfl_memory_device_output('\0', &device); - headers = (char *)device.buffer; + str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0); if (force_extra_parameters) { extra_cmd = php_escape_shell_cmd(force_extra_parameters); @@ -4515,7 +4651,7 @@ PHP_FUNCTION(mb_send_mail) extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd)); } - if (!err && php_mail(to_r, subject, message, headers, extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) { + if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) { RETVAL_TRUE; } else { RETVAL_FALSE; @@ -4536,6 +4672,9 @@ PHP_FUNCTION(mb_send_mail) } mbfl_memory_device_clear(&device); zend_hash_destroy(&ht_headers); + if (str_headers) { + zend_string_release(str_headers); + } } #undef SKIP_LONG_HEADER_SEP_MBSTRING @@ -4724,13 +4863,51 @@ PHP_FUNCTION(mb_get_info) } /* }}} */ -MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc) + +static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding) { - const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); mbfl_buffer_converter *convd; + + convd = mbfl_buffer_converter_new2(encoding, encoding, 0); + if (convd == NULL) { + return NULL; + } + mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); + mbfl_buffer_converter_illegal_substchar(convd, 0); + return convd; +} + + +static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) { mbfl_string string, result, *ret = NULL; long illegalchars = 0; + /* initialize string */ + mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding); + mbfl_string_init(&result); + + string.val = (unsigned char *) input; + string.len = length; + + ret = mbfl_buffer_converter_feed_result(convd, &string, &result); + illegalchars = mbfl_buffer_illegalchars(convd); + + if (ret != NULL) { + if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { + mbfl_string_clear(&result); + return 1; + } + mbfl_string_clear(&result); + } + return 0; +} + + +MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc) +{ + const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); + mbfl_buffer_converter *convd; + if (input == NULL) { return MBSTRG(illegalchars) == 0; } @@ -4743,60 +4920,388 @@ MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const c } } - convd = mbfl_buffer_converter_new2(encoding, encoding, 0); - + convd = php_mb_init_convd(encoding); if (convd == NULL) { php_error_docref(NULL, E_WARNING, "Unable to create converter"); return 0; } - mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); - mbfl_buffer_converter_illegal_substchar(convd, 0); + if (php_mb_check_encoding_impl(convd, input, length, encoding)) { + mbfl_buffer_converter_delete(convd); + return 1; + } + mbfl_buffer_converter_delete(convd); + return 0; +} - /* initialize string */ - mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding); - mbfl_string_init(&result); - string.val = (unsigned char *) input; - string.len = length; +MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_string *enc) +{ + const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); + mbfl_buffer_converter *convd; + zend_long idx; + zend_string *key; + zval *entry; + int valid = 1; - ret = mbfl_buffer_converter_feed_result(convd, &string, &result); - illegalchars = mbfl_buffer_illegalchars(convd); - mbfl_buffer_converter_delete(convd); + (void)(idx); - if (ret != NULL) { - if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { - mbfl_string_clear(&result); - return 1; + if (enc != NULL) { + encoding = mbfl_name2encoding(ZSTR_VAL(enc)); + if (!encoding || encoding == &mbfl_encoding_pass) { + php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", ZSTR_VAL(enc)); + return 0; } + } - mbfl_string_clear(&result); + convd = php_mb_init_convd(encoding); + if (convd == NULL) { + php_error_docref(NULL, E_WARNING, "Unable to create converter"); + return 0; } - return 0; + if (vars->u.v.nApplyCount++ > 1) { + vars->u.v.nApplyCount--; + mbfl_buffer_converter_delete(convd); + php_error_docref(NULL, E_WARNING, "Cannot not handle circular references"); + return 0; + } + ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) { + ZVAL_DEREF(entry); + if (key) { + if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) { + valid = 0; + break; + } + } + switch (Z_TYPE_P(entry)) { + case IS_STRING: + if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) { + valid = 0; + break; + } + break; + case IS_ARRAY: + if (!php_mb_check_encoding_recursive(HASH_OF(entry), enc)) { + valid = 0; + break; + } + break; + case IS_LONG: + case IS_DOUBLE: + case IS_NULL: + case IS_TRUE: + case IS_FALSE: + break; + default: + /* Other types are error. */ + valid = 0; + break; + } + } ZEND_HASH_FOREACH_END(); + vars->u.v.nApplyCount--; + mbfl_buffer_converter_delete(convd); + return valid; } -/* {{{ proto bool mb_check_encoding([string var[, string encoding]]) + +/* {{{ proto bool mb_check_encoding([mixed var[, string encoding]]) Check if the string is valid for the specified encoding */ PHP_FUNCTION(mb_check_encoding) { - char *var = NULL; - size_t var_len; - char *enc = NULL; - size_t enc_len; + zval *input = NULL; + zend_string *enc = NULL; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "|zS", &input, &enc) == FAILURE) { return; - } + } - RETVAL_FALSE; + /* FIXME: Actually check all inputs, except $_FILES file content. */ + if (input == NULL) { + if (MBSTRG(illegalchars) == 0) { + RETURN_TRUE; + } + RETURN_FALSE; + } - if (php_mb_check_encoding(var, var_len, enc)) { - RETVAL_TRUE; + switch(Z_TYPE_P(input)) { + case IS_LONG: + case IS_DOUBLE: + case IS_NULL: + case IS_TRUE: + case IS_FALSE: + RETURN_TRUE; + break; + case IS_STRING: + if (!php_mb_check_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), enc ? ZSTR_VAL(enc): NULL)) { + RETURN_FALSE; + } + break; + case IS_ARRAY: + if (!php_mb_check_encoding_recursive(HASH_OF(input), enc)) { + RETURN_FALSE; + } + break; + default: + php_error_docref(NULL, E_WARNING, "Input is something other than scalar or array"); + RETURN_FALSE; } + RETURN_TRUE; } /* }}} */ + +static inline zend_long php_mb_ord(const char* str, size_t str_len, const char* enc) +{ + enum mbfl_no_encoding no_enc; + char* ret; + size_t ret_len; + zend_long cp; + + if (enc == NULL) { + no_enc = MBSTRG(current_internal_encoding)->no_encoding; + } else { + no_enc = mbfl_name2no_encoding(enc); + + if (no_enc == mbfl_no_encoding_invalid) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + return -1; + } + } + + if (php_mb_is_unsupported_no_encoding(no_enc)) { + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + return -1; + } + + if (str_len == 0) { + php_error_docref(NULL, E_WARNING, "Empty string"); + return -1; + } + + { + long orig_illegalchars = MBSTRG(illegalchars); + MBSTRG(illegalchars) = 0; + ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len); + if (MBSTRG(illegalchars) != 0) { + if (ret) { + efree(ret); + } + MBSTRG(illegalchars) = orig_illegalchars; + return -1; + } + + MBSTRG(illegalchars) = orig_illegalchars; + } + + if (ret == NULL) { + return -1; + } + + cp = (unsigned char) ret[0] << 24 | \ + (unsigned char) ret[1] << 16 | \ + (unsigned char) ret[2] << 8 | \ + (unsigned char) ret[3]; + + efree(ret); + + return cp; +} + + +/* {{{ proto int|false mb_ord([string str[, string encoding]]) */ +PHP_FUNCTION(mb_ord) +{ + char* str; + size_t str_len; + char* enc = NULL; + size_t enc_len; + zend_long cp; + + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_STRING(str, str_len) + Z_PARAM_OPTIONAL + Z_PARAM_STRING(enc, enc_len) + ZEND_PARSE_PARAMETERS_END(); + + cp = php_mb_ord(str, str_len, enc); + + if (0 > cp) { + RETURN_FALSE; + } + + RETURN_LONG(cp); +} +/* }}} */ + + +static inline char* php_mb_chr(zend_long cp, const char* enc, size_t *output_len) +{ + enum mbfl_no_encoding no_enc; + char* buf; + size_t buf_len; + char* ret; + size_t ret_len; + + if (enc == NULL) { + no_enc = MBSTRG(current_internal_encoding)->no_encoding; + } else { + no_enc = mbfl_name2no_encoding(enc); + if (no_enc == mbfl_no_encoding_invalid) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + return NULL; + } + } + + if (php_mb_is_unsupported_no_encoding(no_enc)) { + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + return NULL; + } + + if (cp < 0 || cp > 0x10ffff) { + return NULL; + } + + if (php_mb_is_no_encoding_utf8(no_enc)) { + if (cp > 0xd7ff && 0xe000 > cp) { + return NULL; + } + + if (cp < 0x80) { + ret_len = 1; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = cp; + ret[1] = 0; + } else if (cp < 0x800) { + ret_len = 2; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = 0xc0 | (cp >> 6); + ret[1] = 0x80 | (cp & 0x3f); + ret[2] = 0; + } else if (cp < 0x10000) { + ret_len = 3; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = 0xe0 | (cp >> 12); + ret[1] = 0x80 | ((cp >> 6) & 0x3f); + ret[2] = 0x80 | (cp & 0x3f); + ret[3] = 0; + } else { + ret_len = 4; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = 0xf0 | (cp >> 18); + ret[1] = 0x80 | ((cp >> 12) & 0x3f); + ret[2] = 0x80 | ((cp >> 6) & 0x3f); + ret[3] = 0x80 | (cp & 0x3f); + ret[4] = 0; + } + + if (output_len) { + *output_len = ret_len; + } + + return ret; + } + + buf_len = 4; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = (cp >> 24) & 0xff; + buf[1] = (cp >> 16) & 0xff; + buf[2] = (cp >> 8) & 0xff; + buf[3] = cp & 0xff; + buf[4] = 0; + + { + long orig_illegalchars = MBSTRG(illegalchars); + MBSTRG(illegalchars) = 0; + ret = php_mb_convert_encoding(buf, buf_len, enc, "UCS-4BE", &ret_len); + if (MBSTRG(illegalchars) != 0) { + efree(buf); + efree(ret); + MBSTRG(illegalchars) = orig_illegalchars; + return NULL; + } + + MBSTRG(illegalchars) = orig_illegalchars; + } + + efree(buf); + if (output_len) { + *output_len = ret_len; + } + + return ret; +} + + +/* {{{ proto string|false mb_chr([int cp[, string encoding]]) */ +PHP_FUNCTION(mb_chr) +{ + zend_long cp; + char* enc = NULL; + size_t enc_len; + char* ret; + size_t ret_len; + + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_LONG(cp) + Z_PARAM_OPTIONAL + Z_PARAM_STRING(enc, enc_len) + ZEND_PARSE_PARAMETERS_END(); + + ret = php_mb_chr(cp, enc, &ret_len); + + if (ret == NULL) { + RETURN_FALSE; + } + + RETVAL_STRING(ret); + efree(ret); +} +/* }}} */ + + +static inline char* php_mb_scrub(const char* str, size_t str_len, const char* enc) +{ + size_t ret_len; + + return php_mb_convert_encoding(str, str_len, enc, enc, &ret_len); +} + + +/* {{{ proto string|false mb_scrub([string str[, string encoding]]) */ +PHP_FUNCTION(mb_scrub) +{ + char* str; + size_t str_len; + char *enc = NULL; + size_t enc_len; + char *ret; + + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_STRING(str, str_len) + Z_PARAM_OPTIONAL + Z_PARAM_STRING(enc, enc_len) + ZEND_PARSE_PARAMETERS_END(); + + if (enc == NULL) { + enc = (char *) MBSTRG(current_internal_encoding)->name; + } else if (!mbfl_is_support_encoding(enc)) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + RETURN_FALSE; + } + + ret = php_mb_scrub(str, str_len, enc); + + if (ret == NULL) { + RETURN_FALSE; + } + + RETVAL_STRING(ret); + efree(ret); +} +/* }}} */ + + /* {{{ php_mb_populate_current_detect_order_list */ static void php_mb_populate_current_detect_order_list(void) { |