diff options
Diffstat (limited to 'ext/mbstring/mbstring.c')
-rw-r--r-- | ext/mbstring/mbstring.c | 133 |
1 files changed, 47 insertions, 86 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index b6395aecdd..416dc9a6af 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -34,12 +34,12 @@ #include "libmbfl/mbfl/mbfilter_8bit.h" #include "libmbfl/mbfl/mbfilter_pass.h" #include "libmbfl/mbfl/mbfilter_wchar.h" -#include "libmbfl/filters/mbfilter_ascii.h" #include "libmbfl/filters/mbfilter_base64.h" #include "libmbfl/filters/mbfilter_qprint.h" #include "libmbfl/filters/mbfilter_ucs4.h" #include "libmbfl/filters/mbfilter_utf8.h" #include "libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.h" +#include "libmbfl/filters/mbfilter_singlebyte.h" #include "php_variables.h" #include "php_globals.h" @@ -73,9 +73,9 @@ static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *lis static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding); -static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc); +static inline bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc); -static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc); +static inline bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc); /* }}} */ /* {{{ php_mb_default_identify_list */ @@ -252,14 +252,14 @@ static size_t count_commas(const char *p, const char *end) { */ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, bool persistent, uint32_t arg_num, - zend_bool allow_pass_encoding) + bool allow_pass_encoding) { if (value == NULL || value_length == 0) { *return_list = NULL; *return_size = 0; return SUCCESS; } else { - zend_bool included_auto; + bool included_auto; size_t n, size; char *p1, *endp, *tmpstr; const mbfl_encoding **entry, **list; @@ -347,7 +347,7 @@ static int php_mb_parse_encoding_array(HashTable *target_hash, const mbfl_encodi size_t size = zend_hash_num_elements(target_hash) + MBSTRG(default_detect_order_list_size); const mbfl_encoding **list = ecalloc(size, sizeof(mbfl_encoding*)); const mbfl_encoding **entry = list; - zend_bool included_auto = 0; + bool included_auto = 0; size_t n = 0; zval *hash_entry; ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) { @@ -403,13 +403,7 @@ static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encodin static bool php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding) { const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding; - if (encoding->flag & MBFL_ENCTYPE_SBCS) { - return 1; - } - if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) { - return 1; - } - return 0; + return !(encoding->flag & MBFL_ENCTYPE_GL_UNSAFE); } static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size) @@ -1421,7 +1415,7 @@ PHP_FUNCTION(mb_substitute_character) { zend_string *substitute_character = NULL; zend_long substitute_codepoint; - zend_bool substitute_is_null = 1; + bool substitute_is_null = 1; ZEND_PARSE_PARAMETERS_START(0, 1) Z_PARAM_OPTIONAL @@ -1497,9 +1491,6 @@ PHP_FUNCTION(mb_preferred_mime_name) } /* }}} */ -#define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0) -#define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0) - /* {{{ Parses GET/POST/COOKIE data and sets global variables */ PHP_FUNCTION(mb_parse_str) { @@ -1717,10 +1708,10 @@ PHP_FUNCTION(mb_str_split) if (mbfl_encoding->flag & MBFL_ENCTYPE_SBCS) { /* 1 byte */ mb_len = string.len; chunk_len = (size_t)split_length; /* chunk length in bytes */ - } else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { /* 2 bytes */ + } else if (mbfl_encoding->flag & MBFL_ENCTYPE_WCS2) { /* 2 bytes */ mb_len = string.len / 2; chunk_len = split_length * 2; - } else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { /* 4 bytes */ + } else if (mbfl_encoding->flag & MBFL_ENCTYPE_WCS4) { /* 4 bytes */ mb_len = string.len / 4; chunk_len = split_length * 4; } else if (mbfl_encoding->mblen_table != NULL) { @@ -2003,7 +1994,7 @@ static void php_mb_strstr_variants(INTERNAL_FUNCTION_PARAMETERS, unsigned int va char *haystack_val, *needle_val; mbfl_string haystack, needle, result, *ret = NULL; zend_string *encoding_name = NULL; - zend_bool part = 0; + bool part = 0; ZEND_PARSE_PARAMETERS_START(2, 4) Z_PARAM_STRING(haystack_val, haystack.len) @@ -2127,7 +2118,7 @@ PHP_FUNCTION(mb_substr) zend_long from, len; size_t real_from, real_len; size_t str_len; - zend_bool len_is_null = 1; + bool len_is_null = 1; mbfl_string string, result, *ret; ZEND_PARSE_PARAMETERS_START(2, 4) @@ -2191,7 +2182,7 @@ PHP_FUNCTION(mb_strcut) zend_string *encoding = NULL; char *string_val; zend_long from, len; - zend_bool len_is_null = 1; + bool len_is_null = 1; mbfl_string string, result, *ret; ZEND_PARSE_PARAMETERS_START(2, 4) @@ -2335,7 +2326,7 @@ PHP_FUNCTION(mb_strimwidth) /* See mbfl_no_encoding definition for list of unsupported encodings */ -static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc) +static inline bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc) { return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint) || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap) @@ -2345,7 +2336,7 @@ static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding /* See mbfl_no_encoding definition for list of UTF-8 encodings */ -static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc) +static inline bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc) { return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb); } @@ -2504,7 +2495,7 @@ PHP_FUNCTION(mb_convert_encoding) HashTable *input_ht, *from_encodings_ht = NULL; const mbfl_encoding **from_encodings; size_t num_from_encodings; - zend_bool free_from_encodings; + bool free_from_encodings; ZEND_PARSE_PARAMETERS_START(2, 3) Z_PARAM_ARRAY_HT_OR_STR(input_ht, input_str) @@ -2676,13 +2667,13 @@ PHP_FUNCTION(mb_detect_encoding) size_t str_len; zend_string *encoding_str = NULL; HashTable *encoding_ht = NULL; - zend_bool strict = 0; + bool strict = 0; mbfl_string string; const mbfl_encoding *ret; const mbfl_encoding **elist; size_t size; - zend_bool free_elist; + bool free_elist; ZEND_PARSE_PARAMETERS_START(1, 3) Z_PARAM_STRING(str, str_len) @@ -2764,8 +2755,7 @@ PHP_FUNCTION(mb_encoding_aliases) array_init(return_value); if (encoding->aliases != NULL) { - const char **alias; - for (alias = *encoding->aliases; *alias; ++alias) { + for (const char **alias = encoding->aliases; *alias; ++alias) { add_next_index_string(return_value, (char *)*alias); } } @@ -3188,7 +3178,7 @@ PHP_FUNCTION(mb_encode_numericentity) zend_string *encoding = NULL; int mapsize; HashTable *target_hash; - zend_bool is_hex = 0; + bool is_hex = 0; mbfl_string string, result, *ret; ZEND_PARSE_PARAMETERS_START(2, 4) @@ -3649,7 +3639,7 @@ PHP_FUNCTION(mb_send_mail) if (!suppressed_hdrs.cnt_type) { mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1); - p = (char *)mbfl_no2preferred_mime_name(tran_cs->no_encoding); + p = (char *)mbfl_encoding_preferred_mime_name(tran_cs); if (p != NULL) { mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1); mbfl_memory_device_strcat(&device, p); @@ -3658,7 +3648,7 @@ PHP_FUNCTION(mb_send_mail) } if (!suppressed_hdrs.cnt_trans_enc) { mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1); - p = (char *)mbfl_no2preferred_mime_name(body_enc->no_encoding); + p = (char *)mbfl_encoding_preferred_mime_name(body_enc); if (p == NULL) { p = "7bit"; } @@ -3859,70 +3849,43 @@ PHP_FUNCTION(mb_get_info) } /* }}} */ - -static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding) +static int mbfl_filt_check_errors(int c, void* data) { - mbfl_buffer_converter *convd; - - convd = mbfl_buffer_converter_new(encoding, encoding, 0); - if (convd == NULL) { - return NULL; + if (c & MBFL_WCSGROUP_THROUGH) { + (*((mbfl_convert_filter**)data))->num_illegalchar++; } - mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); - mbfl_buffer_converter_illegal_substchar(convd, 0); - return convd; + return c; } +MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const mbfl_encoding *encoding) +{ + mbfl_convert_filter *filter = mbfl_convert_filter_new(encoding, &mbfl_encoding_wchar, mbfl_filt_check_errors, NULL, &filter); -static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) { - mbfl_string string, result; - - mbfl_string_init_set(&string, encoding); - mbfl_string_init(&result); - - string.val = (unsigned char *) input; - string.len = length; - - mbfl_string *ret = mbfl_buffer_converter_feed_result(convd, &string, &result); - size_t illegalchars = mbfl_buffer_illegalchars(convd); - - if (ret != NULL) { - if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { - mbfl_string_clear(&result); - return 1; + while (length--) { + unsigned char c = *input++; + (filter->filter_function)(c, filter); + if (filter->num_illegalchar) { + mbfl_convert_filter_delete(filter); + return 0; } - mbfl_string_clear(&result); } - return 0; -} - -MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const mbfl_encoding *encoding) -{ - mbfl_buffer_converter *convd = php_mb_init_convd(encoding); - /* If this assertion fails this means some memory allocation failure which is a bug */ - ZEND_ASSERT(convd != NULL); - int result = php_mb_check_encoding_impl(convd, input, length, encoding); - mbfl_buffer_converter_delete(convd); + (filter->filter_flush)(filter); + int result = !filter->num_illegalchar; + mbfl_convert_filter_delete(filter); return result; } static int php_mb_check_encoding_recursive(HashTable *vars, const mbfl_encoding *encoding) { - mbfl_buffer_converter *convd; zend_long idx; zend_string *key; zval *entry; int valid = 1; - (void)(idx); - - convd = php_mb_init_convd(encoding); - /* If this assertion fails this means some memory allocation failure which is a bug */ - ZEND_ASSERT(convd != NULL); + (void)(idx); /* Suppress spurious compiler warning that `idx` is not used */ if (GC_IS_RECURSIVE(vars)) { - mbfl_buffer_converter_delete(convd); php_error_docref(NULL, E_WARNING, "Cannot not handle circular references"); return 0; } @@ -3930,14 +3893,14 @@ static int php_mb_check_encoding_recursive(HashTable *vars, const mbfl_encoding ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) { ZVAL_DEREF(entry); if (key) { - if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) { + if (!php_mb_check_encoding(ZSTR_VAL(key), ZSTR_LEN(key), encoding)) { valid = 0; break; } } switch (Z_TYPE_P(entry)) { case IS_STRING: - if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) { + if (!php_mb_check_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) { valid = 0; break; } @@ -3961,11 +3924,9 @@ static int php_mb_check_encoding_recursive(HashTable *vars, const mbfl_encoding } } ZEND_HASH_FOREACH_END(); GC_TRY_UNPROTECT_RECURSION(vars); - mbfl_buffer_converter_delete(convd); return valid; } - /* {{{ Check if the string is valid for the specified encoding */ PHP_FUNCTION(mb_check_encoding) { @@ -4243,14 +4204,14 @@ static int php_mb_encoding_translation(void) /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */ MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc) { - if (enc != NULL) { - if (enc->flag & MBFL_ENCTYPE_MBCS) { - if (enc->mblen_table != NULL) { - if (s != NULL) return enc->mblen_table[*(unsigned char *)s]; + if (enc) { + if (enc->mblen_table) { + if (s) { + return enc->mblen_table[*(unsigned char *)s]; } - } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { + } else if (enc->flag & MBFL_ENCTYPE_WCS2) { return 2; - } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { + } else if (enc->flag & MBFL_ENCTYPE_WCS4) { return 4; } } |