summaryrefslogtreecommitdiff
path: root/ext/mbstring
diff options
context:
space:
mode:
authorRui Hirokawa <hirokawa@php.net>2003-07-13 07:34:28 +0000
committerRui Hirokawa <hirokawa@php.net>2003-07-13 07:34:28 +0000
commit63289837ae939e5c6bd0d004ba0613fd6f8e43ea (patch)
treef4341c99af53d698d7400bb11003e6e770019dd6 /ext/mbstring
parent439596bce5ce74fcb8d86b50fcd316adb5bf15c9 (diff)
downloadphp-git-63289837ae939e5c6bd0d004ba0613fd6f8e43ea.tar.gz
added strict detection mode in mb_detect_encoding to detect corrupted byte sequence.
Diffstat (limited to 'ext/mbstring')
-rw-r--r--ext/mbstring/mbfilter.c10
-rw-r--r--ext/mbstring/mbfilter.h4
-rw-r--r--ext/mbstring/mbstring.c19
3 files changed, 21 insertions, 12 deletions
diff --git a/ext/mbstring/mbfilter.c b/ext/mbstring/mbfilter.c
index 986077aa0f..427221dee8 100644
--- a/ext/mbstring/mbfilter.c
+++ b/ext/mbstring/mbfilter.c
@@ -7473,7 +7473,7 @@ mbfl_convert_encoding(
* identify encoding
*/
const mbfl_encoding *
-mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC)
+mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz, int strict TSRMLS_DC)
{
int i, n, num, bad;
unsigned char *p;
@@ -7517,7 +7517,7 @@ mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int el
}
i++;
}
- if ((num - 1) <= bad) {
+ if ((num - 1) <= bad && !strict) {
break;
}
p++;
@@ -7556,11 +7556,11 @@ mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int el
}
const char*
-mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC)
+mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz, int strict TSRMLS_DC)
{
const mbfl_encoding *encoding;
- encoding = mbfl_identify_encoding(string, elist, eliztsz TSRMLS_CC);
+ encoding = mbfl_identify_encoding(string, elist, eliztsz, strict TSRMLS_CC);
if (encoding != NULL &&
encoding->no_encoding > mbfl_no_encoding_charset_min &&
encoding->no_encoding < mbfl_no_encoding_charset_max) {
@@ -7575,7 +7575,7 @@ mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int
{
const mbfl_encoding *encoding;
- encoding = mbfl_identify_encoding(string, elist, eliztsz TSRMLS_CC);
+ encoding = mbfl_identify_encoding(string, elist, eliztsz, 0 TSRMLS_CC);
if (encoding != NULL &&
encoding->no_encoding > mbfl_no_encoding_charset_min &&
encoding->no_encoding < mbfl_no_encoding_charset_max) {
diff --git a/ext/mbstring/mbfilter.h b/ext/mbstring/mbfilter.h
index f39ed8b2f5..a8a55ae40f 100644
--- a/ext/mbstring/mbfilter.h
+++ b/ext/mbstring/mbfilter.h
@@ -460,10 +460,10 @@ mbfl_convert_encoding(mbfl_string *string, mbfl_string *result, enum mbfl_no_enc
* identify encoding
*/
const mbfl_encoding *
-mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC);
+mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz, int strict TSRMLS_DC);
const char *
-mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC);
+mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz, int strict TSRMLS_DC);
const enum mbfl_no_encoding
mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz TSRMLS_DC);
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index d03d6c05db..46cd058647 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -2694,15 +2694,15 @@ PHP_FUNCTION(mb_strtolower)
}
/* }}} */
-/* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list])
+/* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)
{
- pval **arg_str, **arg_list;
+ pval **arg_str, **arg_list, **arg_strict;
mbfl_string string;
const char *ret;
enum mbfl_no_encoding *elist;
- int size, *list;
+ int size, *list, strict = 0;
if (ZEND_NUM_ARGS() == 1) {
if (zend_get_parameters_ex(1, &arg_str) == FAILURE) {
@@ -2712,6 +2712,10 @@ PHP_FUNCTION(mb_detect_encoding)
if (zend_get_parameters_ex(2, &arg_str, &arg_list) == FAILURE) {
WRONG_PARAM_COUNT;
}
+ } else if (ZEND_NUM_ARGS() == 3) {
+ if (zend_get_parameters_ex(3, &arg_str, &arg_list, &arg_strict) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
} else {
WRONG_PARAM_COUNT;
}
@@ -2719,7 +2723,7 @@ PHP_FUNCTION(mb_detect_encoding)
/* make encoding list */
list = NULL;
size = 0;
- if (ZEND_NUM_ARGS() >= 2) {
+ if (ZEND_NUM_ARGS() >= 2 && Z_STRVAL_PP(arg_list)) {
switch (Z_TYPE_PP(arg_list)) {
case IS_ARRAY:
if (!php_mb_parse_encoding_array(*arg_list, &list, &size, 0)) {
@@ -2744,6 +2748,11 @@ PHP_FUNCTION(mb_detect_encoding)
}
}
+ if (ZEND_NUM_ARGS() == 3) {
+ convert_to_long_ex(arg_strict);
+ strict = Z_LVAL_PP(arg_strict);
+ }
+
if (size > 0 && list != NULL) {
elist = list;
} else {
@@ -2756,7 +2765,7 @@ PHP_FUNCTION(mb_detect_encoding)
string.no_language = MBSTRG(current_language);
string.val = (unsigned char *)Z_STRVAL_PP(arg_str);
string.len = Z_STRLEN_PP(arg_str);
- ret = mbfl_identify_encoding_name(&string, elist, size TSRMLS_CC);
+ ret = mbfl_identify_encoding_name(&string, elist, size, strict TSRMLS_CC);
if (list != NULL) {
efree((void *)list);
}