summaryrefslogtreecommitdiff
path: root/ext/mbstring/mbstring.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/mbstring.c')
-rw-r--r--ext/mbstring/mbstring.c829
1 files changed, 667 insertions, 162 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index e01430fa07..f963ba6d97 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -104,6 +104,9 @@ static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *lis
static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
+static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
+
+static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
/* }}} */
/* {{{ php_mb_default_identify_list */
@@ -430,6 +433,21 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
+ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_scrub, 0, 0, 1)
+ ZEND_ARG_INFO(0, str)
+ ZEND_ARG_INFO(0, encoding)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1)
+ ZEND_ARG_INFO(0, str)
+ ZEND_ARG_INFO(0, encoding)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1)
+ ZEND_ARG_INFO(0, cp)
+ ZEND_ARG_INFO(0, encoding)
+ZEND_END_ARG_INFO()
+
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
@@ -556,6 +574,9 @@ const zend_function_entry mbstring_functions[] = {
PHP_FE(mb_send_mail, arginfo_mb_send_mail)
PHP_FE(mb_get_info, arginfo_mb_get_info)
PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
+ PHP_FE(mb_ord, arginfo_mb_ord)
+ PHP_FE(mb_chr, arginfo_mb_chr)
+ PHP_FE(mb_scrub, arginfo_mb_scrub)
#if HAVE_MBREGEX
PHP_MBREGEX_FUNCTION_ENTRIES
#endif
@@ -710,9 +731,6 @@ php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_en
}
else
tmpstr = (char *)estrndup(value, value_length);
- if (tmpstr == NULL) {
- return FAILURE;
- }
/* count the number of listed encoding names */
endp = tmpstr + value_length;
n = 1;
@@ -724,74 +742,64 @@ php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_en
size = n + MBSTRG(default_detect_order_list_size);
/* make list */
list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
- if (list != NULL) {
- entry = list;
- n = 0;
- bauto = 0;
- p1 = tmpstr;
- do {
- p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
- if (p == NULL) {
- p = endp;
- }
+ entry = list;
+ n = 0;
+ bauto = 0;
+ p1 = tmpstr;
+ do {
+ p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
+ if (p == NULL) {
+ p = endp;
+ }
+ *p = '\0';
+ /* trim spaces */
+ while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
+ p1++;
+ }
+ p--;
+ while (p > p1 && (*p == ' ' || *p == '\t')) {
*p = '\0';
- /* trim spaces */
- while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
- p1++;
- }
p--;
- while (p > p1 && (*p == ' ' || *p == '\t')) {
- *p = '\0';
- p--;
- }
- /* convert to the encoding number and check encoding */
- if (strcasecmp(p1, "auto") == 0) {
- if (!bauto) {
- const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
- const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
- size_t i;
- bauto = 1;
- for (i = 0; i < identify_list_size; i++) {
- *entry++ = mbfl_no2encoding(*src++);
- n++;
- }
- }
- } else {
- const mbfl_encoding *encoding = mbfl_name2encoding(p1);
- if (encoding) {
- *entry++ = encoding;
+ }
+ /* convert to the encoding number and check encoding */
+ if (strcasecmp(p1, "auto") == 0) {
+ if (!bauto) {
+ const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
+ const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
+ size_t i;
+ bauto = 1;
+ for (i = 0; i < identify_list_size; i++) {
+ *entry++ = mbfl_no2encoding(*src++);
n++;
- } else {
- ret = 0;
}
}
- p1 = p2 + 1;
- } while (n < size && p2 != NULL);
- if (n > 0) {
- if (return_list) {
- *return_list = list;
+ } else {
+ const mbfl_encoding *encoding = mbfl_name2encoding(p1);
+ if (encoding) {
+ *entry++ = encoding;
+ n++;
} else {
- pefree(list, persistent);
+ ret = 0;
}
+ }
+ p1 = p2 + 1;
+ } while (n < size && p2 != NULL);
+ if (n > 0) {
+ if (return_list) {
+ *return_list = list;
} else {
pefree(list, persistent);
- if (return_list) {
- *return_list = NULL;
- }
- ret = 0;
- }
- if (return_size) {
- *return_size = n;
}
} else {
+ pefree(list, persistent);
if (return_list) {
*return_list = NULL;
}
- if (return_size) {
- *return_size = 0;
- }
ret = 0;
}
+ if (return_size) {
+ *return_size = n;
+ }
efree(tmpstr);
}
@@ -818,60 +826,50 @@ php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, siz
i = zend_hash_num_elements(target_hash);
size = i + MBSTRG(default_detect_order_list_size);
list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
- if (list != NULL) {
- entry = list;
- bauto = 0;
- n = 0;
- ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
- convert_to_string_ex(hash_entry);
- if (strcasecmp(Z_STRVAL_P(hash_entry), "auto") == 0) {
- if (!bauto) {
- const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
- const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
- size_t j;
-
- bauto = 1;
- for (j = 0; j < identify_list_size; j++) {
- *entry++ = mbfl_no2encoding(*src++);
- n++;
- }
- }
- } else {
- const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_P(hash_entry));
- if (encoding) {
- *entry++ = encoding;
+ entry = list;
+ bauto = 0;
+ n = 0;
+ ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
+ convert_to_string_ex(hash_entry);
+ if (strcasecmp(Z_STRVAL_P(hash_entry), "auto") == 0) {
+ if (!bauto) {
+ const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
+ const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
+ size_t j;
+
+ bauto = 1;
+ for (j = 0; j < identify_list_size; j++) {
+ *entry++ = mbfl_no2encoding(*src++);
n++;
- } else {
- ret = FAILURE;
}
}
- i--;
- } ZEND_HASH_FOREACH_END();
- if (n > 0) {
- if (return_list) {
- *return_list = list;
+ } else {
+ const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_P(hash_entry));
+ if (encoding) {
+ *entry++ = encoding;
+ n++;
} else {
- pefree(list, persistent);
+ ret = FAILURE;
}
+ }
+ i--;
+ } ZEND_HASH_FOREACH_END();
+ if (n > 0) {
+ if (return_list) {
+ *return_list = list;
} else {
pefree(list, persistent);
- if (return_list) {
- *return_list = NULL;
- }
- ret = FAILURE;
- }
- if (return_size) {
- *return_size = n;
}
} else {
+ pefree(list, persistent);
if (return_list) {
*return_list = NULL;
}
- if (return_size) {
- *return_size = 0;
- }
ret = FAILURE;
}
+ if (return_size) {
+ *return_size = n;
+ }
}
return ret;
@@ -937,6 +935,7 @@ static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_leng
if (convd == NULL) {
return -1;
}
+
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
@@ -1315,7 +1314,7 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output)
/* }}} */
/* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
-int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length)
+int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint32_t new_value_length)
{
const mbfl_encoding *encoding;
@@ -1622,8 +1621,9 @@ PHP_RINIT_FUNCTION(mbstring)
/* override original function. */
if (MBSTRG(func_overload)){
- p = &(mb_ovld[0]);
+ zend_error(E_DEPRECATED, "The mbstring.func_overload directive is deprecated");
+ p = &(mb_ovld[0]);
CG(compiler_options) |= ZEND_COMPILE_NO_BUILTIN_STRLEN;
while (p->type > 0) {
if ((MBSTRG(func_overload) & p->type) == p->type &&
@@ -1975,6 +1975,25 @@ PHP_FUNCTION(mb_detect_order)
}
/* }}} */
+static inline int php_mb_check_code_point(long cp)
+{
+ if (cp <= 0 || cp >= 0x110000) {
+ /* Out of Unicode range */
+ return 0;
+ }
+
+ if (cp >= 0xd800 && cp <= 0xdfff) {
+ /* Surrogate code-point. These are never valid on their own and we only allow a single
+ * substitute character. */
+ return 0;
+ }
+
+ /* As the we do not know the target encoding of the conversion operation that is going to
+ * use the substitution character, we cannot check whether the codepoint is actually mapped
+ * in the given encoding at this point. Thus we have to accept everything. */
+ return 1;
+}
+
/* {{{ proto mixed mb_substitute_character([mixed substchar])
Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)
@@ -2009,22 +2028,22 @@ PHP_FUNCTION(mb_substitute_character)
} else {
convert_to_long_ex(arg1);
- if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
+ if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
} else {
- php_error_docref(NULL, E_WARNING, "Unknown character.");
+ php_error_docref(NULL, E_WARNING, "Unknown character");
RETURN_FALSE;
}
}
break;
default:
convert_to_long_ex(arg1);
- if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
+ if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
} else {
- php_error_docref(NULL, E_WARNING, "Unknown character.");
+ php_error_docref(NULL, E_WARNING, "Unknown character");
RETURN_FALSE;
}
break;
@@ -2106,6 +2125,8 @@ PHP_FUNCTION(mb_parse_str)
return;
}
+ php_error_docref(NULL, E_DEPRECATED, "Calling mb_parse_str() without the result argument is deprecated");
+
symbol_table = zend_rebuild_symbol_table();
ZVAL_ARR(&tmp, symbol_table);
detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr);
@@ -3107,7 +3128,7 @@ PHP_FUNCTION(mb_strimwidth)
if (from < 0) {
from += swidth;
}
-
+
if (from < 0 || (size_t)from > str_len) {
php_error_docref(NULL, E_WARNING, "Start position is out of range");
RETURN_FALSE;
@@ -3138,8 +3159,26 @@ PHP_FUNCTION(mb_strimwidth)
}
/* }}} */
+
+/* See mbfl_no_encoding definition for list of unsupported encodings */
+static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
+{
+ return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
+ || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
+ || (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
+ || (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
+}
+
+
+/* See mbfl_no_encoding definition for list of UTF-8 encodings */
+static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
+{
+ return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
+}
+
+
/* {{{ MBSTRING_API char *php_mb_convert_encoding() */
-MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
+MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
{
mbfl_string string, result, *ret;
const mbfl_encoding *from_encoding, *to_encoding;
@@ -3207,6 +3246,7 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
php_error_docref(NULL, E_WARNING, "Unable to create character encoding converter");
return NULL;
}
+
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
@@ -3225,12 +3265,83 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
}
/* }}} */
+MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const char *_to_encoding, const char *_from_encodings)
+{
+ HashTable *output, *chash;
+ zend_long idx;
+ zend_string *key, *key_tmp;
+ zval *entry, entry_tmp;
+ size_t ckey_len, cval_len;
+ char *ckey, *cval;
+
+ if (!input) {
+ return NULL;
+ }
+
+ if (input->u.v.nApplyCount++ > 1) {
+ input->u.v.nApplyCount--;
+ php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
+ return NULL;
+ }
+ output = (HashTable *)emalloc(sizeof(HashTable));
+ zend_hash_init(output, zend_hash_num_elements(input), NULL, ZVAL_PTR_DTOR, 0);
+ ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
+ /* convert key */
+ if (key) {
+ ckey = php_mb_convert_encoding(ZSTR_VAL(key), ZSTR_LEN(key), _to_encoding, _from_encodings, &ckey_len);
+ key_tmp = zend_string_init(ckey, ckey_len, 0);
+ }
+ /* convert value */
+ ZEND_ASSERT(entry);
+ switch(Z_TYPE_P(entry)) {
+ case IS_STRING:
+ cval = php_mb_convert_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), _to_encoding, _from_encodings, &cval_len);
+ ZVAL_STRINGL(&entry_tmp, cval, cval_len);
+ efree(cval);
+ break;
+ case IS_NULL:
+ case IS_TRUE:
+ case IS_FALSE:
+ case IS_LONG:
+ case IS_DOUBLE:
+ ZVAL_COPY(&entry_tmp, entry);
+ break;
+ case IS_ARRAY:
+ chash = php_mb_convert_encoding_recursive(HASH_OF(entry), _to_encoding, _from_encodings);
+ if (!chash) {
+ chash = (HashTable *)emalloc(sizeof(HashTable));
+ zend_hash_init(chash, 0, NULL, ZVAL_PTR_DTOR, 0);
+ }
+ ZVAL_ARR(&entry_tmp, chash);
+ break;
+ case IS_OBJECT:
+ default:
+ if (key) {
+ efree(key_tmp);
+ }
+ php_error_docref(NULL, E_WARNING, "Object is not supported");
+ continue;
+ }
+ if (key) {
+ zend_hash_add(output, key_tmp, &entry_tmp);
+ } else {
+ zend_hash_index_add(output, idx, &entry_tmp);
+ }
+ } ZEND_HASH_FOREACH_END();
+ input->u.v.nApplyCount--;
+
+ return output;
+}
+/* }}} */
+
+
/* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)
{
- char *arg_str, *arg_new;
- size_t str_len, new_len;
+ zval *input;
+ char *arg_new;
+ size_t new_len;
zval *arg_old = NULL;
size_t size, l, n;
char *_from_encodings = NULL, *ret, *s_free = NULL;
@@ -3238,10 +3349,14 @@ PHP_FUNCTION(mb_convert_encoding)
zval *hash_entry;
HashTable *target_hash;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z", &input, &arg_new, &new_len, &arg_old) == FAILURE) {
return;
}
+ if (Z_TYPE_P(input) != IS_STRING && Z_TYPE_P(input) != IS_ARRAY) {
+ convert_to_string(input);
+ }
+
if (arg_old) {
switch (Z_TYPE_P(arg_old)) {
case IS_ARRAY:
@@ -3276,19 +3391,26 @@ PHP_FUNCTION(mb_convert_encoding)
}
}
- /* new encoding */
- ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size);
- if (ret != NULL) {
- // TODO: avoid reallocation ???
- RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
- efree(ret);
+ if (Z_TYPE_P(input) == IS_STRING) {
+ /* new encoding */
+ ret = php_mb_convert_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), arg_new, _from_encodings, &size);
+ if (ret != NULL) {
+ // TODO: avoid reallocation ???
+ RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
+ efree(ret);
+ } else {
+ RETVAL_FALSE;
+ }
+ if (s_free) {
+ efree(s_free);
+ }
} else {
- RETVAL_FALSE;
+ HashTable *tmp;
+ tmp = php_mb_convert_encoding_recursive(HASH_OF(input), arg_new, _from_encodings);
+ RETURN_ARR(tmp);
}
- if ( s_free) {
- efree(s_free);
- }
+ return;
}
/* }}} */
@@ -3794,7 +3916,7 @@ PHP_FUNCTION(mb_convert_variables)
target_hash = HASH_OF(var);
if (target_hash != NULL) {
while ((hash_entry = zend_hash_get_current_data(target_hash)) != NULL) {
- if (!Z_IMMUTABLE_P(var)) {
+ if (Z_REFCOUNTED_P(var)) {
if (++target_hash->u.v.nApplyCount > 1) {
--target_hash->u.v.nApplyCount;
recursion_error = 1;
@@ -3843,7 +3965,7 @@ detect_end:
}
if (recursion_error) {
while(stack_level-- && (var = &stack[stack_level])) {
- if (!Z_IMMUTABLE_P(var)) {
+ if (Z_REFCOUNTED_P(var)) {
if (HASH_OF(var)->u.v.nApplyCount > 1) {
HASH_OF(var)->u.v.nApplyCount--;
}
@@ -3910,7 +4032,7 @@ detect_end:
hash_entry = hash_entry_ptr;
ZVAL_DEREF(hash_entry);
if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
- if (!Z_IMMUTABLE_P(hash_entry)) {
+ if (Z_REFCOUNTED_P(hash_entry)) {
if (++(HASH_OF(hash_entry)->u.v.nApplyCount) > 1) {
--(HASH_OF(hash_entry)->u.v.nApplyCount);
recursion_error = 1;
@@ -3963,7 +4085,7 @@ conv_end:
if (recursion_error) {
while(stack_level-- && (var = &stack[stack_level])) {
- if (!Z_IMMUTABLE_P(var)) {
+ if (Z_REFCOUNTED_P(var)) {
if (HASH_OF(var)->u.v.nApplyCount > 1) {
HASH_OF(var)->u.v.nApplyCount--;
}
@@ -4266,11 +4388,11 @@ PHP_FUNCTION(mb_send_mail)
size_t to_len;
char *message = NULL;
size_t message_len;
- char *headers = NULL;
- size_t headers_len;
char *subject = NULL;
- zend_string *extra_cmd = NULL;
size_t subject_len;
+ zval *headers = NULL;
+ zend_string *extra_cmd = NULL;
+ zend_string *str_headers=NULL, *tmp_headers;
int i;
char *to_r = NULL;
char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
@@ -4310,7 +4432,7 @@ PHP_FUNCTION(mb_send_mail)
body_enc = lang->mail_body_encoding;
}
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|sS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|zS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &extra_cmd) == FAILURE) {
return;
}
@@ -4319,7 +4441,20 @@ PHP_FUNCTION(mb_send_mail)
MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
if (headers) {
- MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
+ switch(Z_TYPE_P(headers)) {
+ case IS_STRING:
+ tmp_headers = zend_string_init(Z_STRVAL_P(headers), Z_STRLEN_P(headers), 0);
+ MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(tmp_headers), ZSTR_LEN(tmp_headers));
+ str_headers = php_trim(tmp_headers, NULL, 0, 2);
+ zend_string_release(tmp_headers);
+ break;
+ case IS_ARRAY:
+ str_headers = php_mail_build_headers(headers);
+ break;
+ default:
+ php_error_docref(NULL, E_WARNING, "headers parameter must be string or array");
+ RETURN_FALSE;
+ }
}
if (extra_cmd) {
MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
@@ -4327,8 +4462,8 @@ PHP_FUNCTION(mb_send_mail)
zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
- if (headers != NULL) {
- _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
+ if (str_headers != NULL) {
+ _php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers));
}
if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
@@ -4471,13 +4606,14 @@ PHP_FUNCTION(mb_send_mail)
#define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
#define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
#define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
- if (headers != NULL) {
- p = headers;
- n = headers_len;
+ if (str_headers != NULL) {
+ p = ZSTR_VAL(str_headers);
+ n = ZSTR_LEN(str_headers);
mbfl_memory_device_strncat(&device, p, n);
if (n > 0 && p[n - 1] != '\n') {
mbfl_memory_device_strncat(&device, "\n", 1);
}
+ zend_string_release(str_headers);
}
if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
@@ -4507,7 +4643,7 @@ PHP_FUNCTION(mb_send_mail)
mbfl_memory_device_unput(&device);
mbfl_memory_device_output('\0', &device);
- headers = (char *)device.buffer;
+ str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0);
if (force_extra_parameters) {
extra_cmd = php_escape_shell_cmd(force_extra_parameters);
@@ -4515,7 +4651,7 @@ PHP_FUNCTION(mb_send_mail)
extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
}
- if (!err && php_mail(to_r, subject, message, headers, extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
+ if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
RETVAL_TRUE;
} else {
RETVAL_FALSE;
@@ -4536,6 +4672,9 @@ PHP_FUNCTION(mb_send_mail)
}
mbfl_memory_device_clear(&device);
zend_hash_destroy(&ht_headers);
+ if (str_headers) {
+ zend_string_release(str_headers);
+ }
}
#undef SKIP_LONG_HEADER_SEP_MBSTRING
@@ -4724,13 +4863,51 @@ PHP_FUNCTION(mb_get_info)
}
/* }}} */
-MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
+
+static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding)
{
- const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
mbfl_buffer_converter *convd;
+
+ convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
+ if (convd == NULL) {
+ return NULL;
+ }
+ mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
+ mbfl_buffer_converter_illegal_substchar(convd, 0);
+ return convd;
+}
+
+
+static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) {
mbfl_string string, result, *ret = NULL;
long illegalchars = 0;
+ /* initialize string */
+ mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
+ mbfl_string_init(&result);
+
+ string.val = (unsigned char *) input;
+ string.len = length;
+
+ ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
+ illegalchars = mbfl_buffer_illegalchars(convd);
+
+ if (ret != NULL) {
+ if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
+ mbfl_string_clear(&result);
+ return 1;
+ }
+ mbfl_string_clear(&result);
+ }
+ return 0;
+}
+
+
+MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
+{
+ const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
+ mbfl_buffer_converter *convd;
+
if (input == NULL) {
return MBSTRG(illegalchars) == 0;
}
@@ -4743,60 +4920,388 @@ MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const c
}
}
- convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
-
+ convd = php_mb_init_convd(encoding);
if (convd == NULL) {
php_error_docref(NULL, E_WARNING, "Unable to create converter");
return 0;
}
- mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
- mbfl_buffer_converter_illegal_substchar(convd, 0);
+ if (php_mb_check_encoding_impl(convd, input, length, encoding)) {
+ mbfl_buffer_converter_delete(convd);
+ return 1;
+ }
+ mbfl_buffer_converter_delete(convd);
+ return 0;
+}
- /* initialize string */
- mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
- mbfl_string_init(&result);
- string.val = (unsigned char *) input;
- string.len = length;
+MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_string *enc)
+{
+ const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
+ mbfl_buffer_converter *convd;
+ zend_long idx;
+ zend_string *key;
+ zval *entry;
+ int valid = 1;
- ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
- illegalchars = mbfl_buffer_illegalchars(convd);
- mbfl_buffer_converter_delete(convd);
+ (void)(idx);
- if (ret != NULL) {
- if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
- mbfl_string_clear(&result);
- return 1;
+ if (enc != NULL) {
+ encoding = mbfl_name2encoding(ZSTR_VAL(enc));
+ if (!encoding || encoding == &mbfl_encoding_pass) {
+ php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", ZSTR_VAL(enc));
+ return 0;
}
+ }
- mbfl_string_clear(&result);
+ convd = php_mb_init_convd(encoding);
+ if (convd == NULL) {
+ php_error_docref(NULL, E_WARNING, "Unable to create converter");
+ return 0;
}
- return 0;
+ if (vars->u.v.nApplyCount++ > 1) {
+ vars->u.v.nApplyCount--;
+ mbfl_buffer_converter_delete(convd);
+ php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
+ return 0;
+ }
+ ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
+ ZVAL_DEREF(entry);
+ if (key) {
+ if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
+ valid = 0;
+ break;
+ }
+ }
+ switch (Z_TYPE_P(entry)) {
+ case IS_STRING:
+ if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
+ valid = 0;
+ break;
+ }
+ break;
+ case IS_ARRAY:
+ if (!php_mb_check_encoding_recursive(HASH_OF(entry), enc)) {
+ valid = 0;
+ break;
+ }
+ break;
+ case IS_LONG:
+ case IS_DOUBLE:
+ case IS_NULL:
+ case IS_TRUE:
+ case IS_FALSE:
+ break;
+ default:
+ /* Other types are error. */
+ valid = 0;
+ break;
+ }
+ } ZEND_HASH_FOREACH_END();
+ vars->u.v.nApplyCount--;
+ mbfl_buffer_converter_delete(convd);
+ return valid;
}
-/* {{{ proto bool mb_check_encoding([string var[, string encoding]])
+
+/* {{{ proto bool mb_check_encoding([mixed var[, string encoding]])
Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)
{
- char *var = NULL;
- size_t var_len;
- char *enc = NULL;
- size_t enc_len;
+ zval *input = NULL;
+ zend_string *enc = NULL;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "|zS", &input, &enc) == FAILURE) {
return;
- }
+ }
- RETVAL_FALSE;
+ /* FIXME: Actually check all inputs, except $_FILES file content. */
+ if (input == NULL) {
+ if (MBSTRG(illegalchars) == 0) {
+ RETURN_TRUE;
+ }
+ RETURN_FALSE;
+ }
- if (php_mb_check_encoding(var, var_len, enc)) {
- RETVAL_TRUE;
+ switch(Z_TYPE_P(input)) {
+ case IS_LONG:
+ case IS_DOUBLE:
+ case IS_NULL:
+ case IS_TRUE:
+ case IS_FALSE:
+ RETURN_TRUE;
+ break;
+ case IS_STRING:
+ if (!php_mb_check_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), enc ? ZSTR_VAL(enc): NULL)) {
+ RETURN_FALSE;
+ }
+ break;
+ case IS_ARRAY:
+ if (!php_mb_check_encoding_recursive(HASH_OF(input), enc)) {
+ RETURN_FALSE;
+ }
+ break;
+ default:
+ php_error_docref(NULL, E_WARNING, "Input is something other than scalar or array");
+ RETURN_FALSE;
}
+ RETURN_TRUE;
}
/* }}} */
+
+static inline zend_long php_mb_ord(const char* str, size_t str_len, const char* enc)
+{
+ enum mbfl_no_encoding no_enc;
+ char* ret;
+ size_t ret_len;
+ zend_long cp;
+
+ if (enc == NULL) {
+ no_enc = MBSTRG(current_internal_encoding)->no_encoding;
+ } else {
+ no_enc = mbfl_name2no_encoding(enc);
+
+ if (no_enc == mbfl_no_encoding_invalid) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
+ return -1;
+ }
+ }
+
+ if (php_mb_is_unsupported_no_encoding(no_enc)) {
+ php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc);
+ return -1;
+ }
+
+ if (str_len == 0) {
+ php_error_docref(NULL, E_WARNING, "Empty string");
+ return -1;
+ }
+
+ {
+ long orig_illegalchars = MBSTRG(illegalchars);
+ MBSTRG(illegalchars) = 0;
+ ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len);
+ if (MBSTRG(illegalchars) != 0) {
+ if (ret) {
+ efree(ret);
+ }
+ MBSTRG(illegalchars) = orig_illegalchars;
+ return -1;
+ }
+
+ MBSTRG(illegalchars) = orig_illegalchars;
+ }
+
+ if (ret == NULL) {
+ return -1;
+ }
+
+ cp = (unsigned char) ret[0] << 24 | \
+ (unsigned char) ret[1] << 16 | \
+ (unsigned char) ret[2] << 8 | \
+ (unsigned char) ret[3];
+
+ efree(ret);
+
+ return cp;
+}
+
+
+/* {{{ proto int|false mb_ord([string str[, string encoding]]) */
+PHP_FUNCTION(mb_ord)
+{
+ char* str;
+ size_t str_len;
+ char* enc = NULL;
+ size_t enc_len;
+ zend_long cp;
+
+ ZEND_PARSE_PARAMETERS_START(1, 2)
+ Z_PARAM_STRING(str, str_len)
+ Z_PARAM_OPTIONAL
+ Z_PARAM_STRING(enc, enc_len)
+ ZEND_PARSE_PARAMETERS_END();
+
+ cp = php_mb_ord(str, str_len, enc);
+
+ if (0 > cp) {
+ RETURN_FALSE;
+ }
+
+ RETURN_LONG(cp);
+}
+/* }}} */
+
+
+static inline char* php_mb_chr(zend_long cp, const char* enc, size_t *output_len)
+{
+ enum mbfl_no_encoding no_enc;
+ char* buf;
+ size_t buf_len;
+ char* ret;
+ size_t ret_len;
+
+ if (enc == NULL) {
+ no_enc = MBSTRG(current_internal_encoding)->no_encoding;
+ } else {
+ no_enc = mbfl_name2no_encoding(enc);
+ if (no_enc == mbfl_no_encoding_invalid) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
+ return NULL;
+ }
+ }
+
+ if (php_mb_is_unsupported_no_encoding(no_enc)) {
+ php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc);
+ return NULL;
+ }
+
+ if (cp < 0 || cp > 0x10ffff) {
+ return NULL;
+ }
+
+ if (php_mb_is_no_encoding_utf8(no_enc)) {
+ if (cp > 0xd7ff && 0xe000 > cp) {
+ return NULL;
+ }
+
+ if (cp < 0x80) {
+ ret_len = 1;
+ ret = (char *) safe_emalloc(ret_len, 1, 1);
+ ret[0] = cp;
+ ret[1] = 0;
+ } else if (cp < 0x800) {
+ ret_len = 2;
+ ret = (char *) safe_emalloc(ret_len, 1, 1);
+ ret[0] = 0xc0 | (cp >> 6);
+ ret[1] = 0x80 | (cp & 0x3f);
+ ret[2] = 0;
+ } else if (cp < 0x10000) {
+ ret_len = 3;
+ ret = (char *) safe_emalloc(ret_len, 1, 1);
+ ret[0] = 0xe0 | (cp >> 12);
+ ret[1] = 0x80 | ((cp >> 6) & 0x3f);
+ ret[2] = 0x80 | (cp & 0x3f);
+ ret[3] = 0;
+ } else {
+ ret_len = 4;
+ ret = (char *) safe_emalloc(ret_len, 1, 1);
+ ret[0] = 0xf0 | (cp >> 18);
+ ret[1] = 0x80 | ((cp >> 12) & 0x3f);
+ ret[2] = 0x80 | ((cp >> 6) & 0x3f);
+ ret[3] = 0x80 | (cp & 0x3f);
+ ret[4] = 0;
+ }
+
+ if (output_len) {
+ *output_len = ret_len;
+ }
+
+ return ret;
+ }
+
+ buf_len = 4;
+ buf = (char *) safe_emalloc(buf_len, 1, 1);
+ buf[0] = (cp >> 24) & 0xff;
+ buf[1] = (cp >> 16) & 0xff;
+ buf[2] = (cp >> 8) & 0xff;
+ buf[3] = cp & 0xff;
+ buf[4] = 0;
+
+ {
+ long orig_illegalchars = MBSTRG(illegalchars);
+ MBSTRG(illegalchars) = 0;
+ ret = php_mb_convert_encoding(buf, buf_len, enc, "UCS-4BE", &ret_len);
+ if (MBSTRG(illegalchars) != 0) {
+ efree(buf);
+ efree(ret);
+ MBSTRG(illegalchars) = orig_illegalchars;
+ return NULL;
+ }
+
+ MBSTRG(illegalchars) = orig_illegalchars;
+ }
+
+ efree(buf);
+ if (output_len) {
+ *output_len = ret_len;
+ }
+
+ return ret;
+}
+
+
+/* {{{ proto string|false mb_chr([int cp[, string encoding]]) */
+PHP_FUNCTION(mb_chr)
+{
+ zend_long cp;
+ char* enc = NULL;
+ size_t enc_len;
+ char* ret;
+ size_t ret_len;
+
+ ZEND_PARSE_PARAMETERS_START(1, 2)
+ Z_PARAM_LONG(cp)
+ Z_PARAM_OPTIONAL
+ Z_PARAM_STRING(enc, enc_len)
+ ZEND_PARSE_PARAMETERS_END();
+
+ ret = php_mb_chr(cp, enc, &ret_len);
+
+ if (ret == NULL) {
+ RETURN_FALSE;
+ }
+
+ RETVAL_STRING(ret);
+ efree(ret);
+}
+/* }}} */
+
+
+static inline char* php_mb_scrub(const char* str, size_t str_len, const char* enc)
+{
+ size_t ret_len;
+
+ return php_mb_convert_encoding(str, str_len, enc, enc, &ret_len);
+}
+
+
+/* {{{ proto string|false mb_scrub([string str[, string encoding]]) */
+PHP_FUNCTION(mb_scrub)
+{
+ char* str;
+ size_t str_len;
+ char *enc = NULL;
+ size_t enc_len;
+ char *ret;
+
+ ZEND_PARSE_PARAMETERS_START(1, 2)
+ Z_PARAM_STRING(str, str_len)
+ Z_PARAM_OPTIONAL
+ Z_PARAM_STRING(enc, enc_len)
+ ZEND_PARSE_PARAMETERS_END();
+
+ if (enc == NULL) {
+ enc = (char *) MBSTRG(current_internal_encoding)->name;
+ } else if (!mbfl_is_support_encoding(enc)) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
+ RETURN_FALSE;
+ }
+
+ ret = php_mb_scrub(str, str_len, enc);
+
+ if (ret == NULL) {
+ RETURN_FALSE;
+ }
+
+ RETVAL_STRING(ret);
+ efree(ret);
+}
+/* }}} */
+
+
/* {{{ php_mb_populate_current_detect_order_list */
static void php_mb_populate_current_detect_order_list(void)
{