summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRui Hirokawa <hirokawa@php.net>2011-07-18 08:36:17 +0000
committerRui Hirokawa <hirokawa@php.net>2011-07-18 08:36:17 +0000
commitb150a6db240b1a955d617b7bca3316ca84bd3453 (patch)
treec5b7728e346afac4ccd5557c2aec2010448ddb74
parent6d3ff0eda1996745019f561e445ce4011ad4c746 (diff)
downloadphp-git-b150a6db240b1a955d617b7bca3316ca84bd3453.tar.gz
added numeric entities encode/decode in hex format.
-rw-r--r--ext/mbstring/libmbfl/mbfl/mbfilter.c143
-rw-r--r--ext/mbstring/mbstring.c14
2 files changed, 151 insertions, 6 deletions
diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c
index 9eec3b4afc..9f929cc3b2 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfilter.c
+++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c
@@ -2746,7 +2746,9 @@ collector_decode_htmlnumericentity(int c, void *data)
}
break;
case 2:
- if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
+ if (c == 0x78) { /* 'x' */
+ pc->status = 4;
+ } else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
pc->cache = c - 0x30;
pc->status = 3;
pc->digit = 1;
@@ -2810,6 +2812,89 @@ collector_decode_htmlnumericentity(int c, void *data)
(*pc->decoder->filter_function)(c, pc->decoder);
}
break;
+ case 4:
+ if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
+ pc->cache = c - 0x30;
+ pc->status = 5;
+ pc->digit = 1;
+ } else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F' */
+ pc->cache = c - 0x41 + 10;
+ pc->status = 5;
+ pc->digit = 1;
+ } else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f' */
+ pc->cache = c - 0x61 + 10;
+ pc->status = 5;
+ pc->digit = 1;
+ } else {
+ pc->status = 0;
+ (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
+ (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
+ (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
+ (*pc->decoder->filter_function)(c, pc->decoder);
+ }
+ break;
+ case 5:
+ s = 0;
+ f = 0;
+ if ((c >= 0x30 && c <= 0x39) ||
+ (c >= 0x41 && c <= 0x46) ||
+ (c >= 0x61 && c <= 0x66)) { /* '0' - '9' or 'a' - 'f' */
+ if (pc->digit > 9) {
+ pc->status = 0;
+ s = pc->cache;
+ f = 1;
+ } else {
+ if (c >= 0x30 && c <= 0x39) {
+ s = pc->cache*16 + (c - 0x30);
+ } else if (c >= 0x41 && c <= 0x46) {
+ s = pc->cache*16 + (c - 0x41 + 10);
+ } else {
+ s = pc->cache*16 + (c - 0x61 + 10);
+ }
+ pc->cache = s;
+ pc->digit++;
+ }
+ } else {
+ pc->status = 0;
+ s = pc->cache;
+ f = 1;
+ n = 0;
+ size = pc->mapsize;
+ while (n < size) {
+ mapelm = &(pc->convmap[n*4]);
+ d = s - mapelm[2];
+ if (d >= mapelm[0] && d <= mapelm[1]) {
+ f = 0;
+ (*pc->decoder->filter_function)(d, pc->decoder);
+ if (c != 0x3b) { /* ';' */
+ (*pc->decoder->filter_function)(c, pc->decoder);
+ }
+ break;
+ }
+ n++;
+ }
+ }
+ if (f) {
+ (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
+ (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
+ (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
+ r = 1;
+ n = pc->digit;
+ while (n > 0) {
+ r *= 16;
+ n--;
+ }
+ s %= r;
+ r /= 16;
+ while (r > 0) {
+ d = s/r;
+ s %= r;
+ r /= 16;
+ (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
+ }
+ (*pc->decoder->filter_function)(c, pc->decoder);
+ }
+ break;
default:
if (c == 0x26) { /* '&' */
pc->status = 1;
@@ -2822,6 +2907,53 @@ collector_decode_htmlnumericentity(int c, void *data)
return c;
}
+static int
+collector_encode_hex_htmlnumericentity(int c, void *data)
+{
+ struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
+ int f, n, s, r, d, size, *mapelm;
+
+ size = pc->mapsize;
+ f = 0;
+ n = 0;
+ while (n < size) {
+ mapelm = &(pc->convmap[n*4]);
+ if (c >= mapelm[0] && c <= mapelm[1]) {
+ s = (c + mapelm[2]) & mapelm[3];
+ if (s >= 0) {
+ (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
+ (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
+ (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
+ r = 0x1000000;
+ s %= r;
+ while (r > 0) {
+ d = s/r;
+ if (d || f) {
+ f = 1;
+ s %= r;
+ (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
+ }
+ r /= 16;
+ }
+ if (!f) {
+ f = 1;
+ (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
+ }
+ (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */
+ }
+ }
+ if (f) {
+ break;
+ }
+ n++;
+ }
+ if (!f) {
+ (*pc->decoder->filter_function)(c, pc->decoder);
+ }
+
+ return c;
+}
+
mbfl_string *
mbfl_html_numeric_entity(
mbfl_string *string,
@@ -2850,12 +2982,17 @@ mbfl_html_numeric_entity(
string->no_encoding,
mbfl_memory_device_output, 0, &device);
/* wchar filter */
- if (type == 0) {
+ if (type == 0) { /* decimal output */
encoder = mbfl_convert_filter_new(
string->no_encoding,
mbfl_no_encoding_wchar,
collector_encode_htmlnumericentity, 0, &pc);
- } else {
+ } else if (type == 2) { /* hex output */
+ encoder = mbfl_convert_filter_new(
+ string->no_encoding,
+ mbfl_no_encoding_wchar,
+ collector_encode_hex_htmlnumericentity, 0, &pc);
+ } else { /* type == 1: decimal/hex input */
encoder = mbfl_convert_filter_new(
string->no_encoding,
mbfl_no_encoding_wchar,
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index 34644a553b..f87b51abf7 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -412,6 +412,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
ZEND_ARG_INFO(0, string)
ZEND_ARG_INFO(0, convmap)
ZEND_ARG_INFO(0, encoding)
+ ZEND_ARG_INFO(0, is_hex)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
@@ -3682,10 +3683,11 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
HashTable *target_hash;
size_t argc = ZEND_NUM_ARGS();
int i, *convmap, *mapelm, mapsize=0;
+ zend_bool is_hex = 0;
mbfl_string string, result, *ret;
enum mbfl_no_encoding no_encoding;
- if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, &zconvmap, &encoding, &encoding_len) == FAILURE) {
+ if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
return;
}
@@ -3696,7 +3698,7 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
string.len = str_len;
/* encoding */
- if (argc == 3) {
+ if ((argc == 3 || argc == 4) && encoding_len > 0) {
no_encoding = mbfl_name2no_encoding(encoding);
if (no_encoding == mbfl_no_encoding_invalid) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
@@ -3706,6 +3708,12 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
}
}
+ if (argc == 4) {
+ if (type == 0 && is_hex) {
+ type = 2; /* output in hex format */
+ }
+ }
+
/* conversion map */
convmap = NULL;
if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
@@ -3743,7 +3751,7 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
}
/* }}} */
-/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding])
+/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)
{