diff options
author | Gustavo André dos Santos Lopes <cataphract@php.net> | 2010-10-12 02:51:11 +0000 |
---|---|---|
committer | Gustavo André dos Santos Lopes <cataphract@php.net> | 2010-10-12 02:51:11 +0000 |
commit | 99b613cbc8f862de0606c226272f7538b63a612a (patch) | |
tree | c8171db9b950d6ec29029c2946e86347fe445157 /ext/standard/html.c | |
parent | 40c3aefafbf7553ca8480fa88dad97b2b8b7b552 (diff) | |
download | php-git-99b613cbc8f862de0606c226272f7538b63a612a.tar.gz |
- Added a 3rd parameter to get_html_translation_table. It now takes a charset
hint, like htmlentities et al.
- Fixed bug #49407 (get_html_translation_table doesn't handle UTF-8).
- Fixed bug #25927 (get_html_translation_table calls the ' ' instead of
').
- Fixed tests for get_html_translation_table and unified the Windows and
non-Windows versions of the tests.
Diffstat (limited to 'ext/standard/html.c')
-rw-r--r-- | ext/standard/html.c | 90 |
1 files changed, 61 insertions, 29 deletions
diff --git a/ext/standard/html.c b/ext/standard/html.c index 3edcaa6dac..9cc17836a1 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -867,7 +867,7 @@ det_charset: /* }}} */ /* {{{ php_utf32_utf8 */ -size_t php_utf32_utf8(unsigned char *buf, int k) +size_t php_utf32_utf8(unsigned char *buf, unsigned k) { size_t retval = 0; @@ -1408,54 +1408,86 @@ PHP_FUNCTION(htmlentities) } /* }}} */ -/* {{{ proto array get_html_translation_table([int table [, int quote_style]]) +/* {{{ proto array get_html_translation_table([int table [, int quote_style [, string charset_hint]]]) Returns the internal translation table used by htmlspecialchars and htmlentities */ PHP_FUNCTION(get_html_translation_table) { long which = HTML_SPECIALCHARS, quote_style = ENT_COMPAT; - int i, j; - char ind[2]; - enum entity_charset charset = determine_charset(NULL TSRMLS_CC); + unsigned int i; + int j; + unsigned char ind[5]; /* max # of 8-bit code units (4; for UTF-8) + 1 for \0 */ + void *dummy; + char *charset_hint = NULL; + int charset_hint_len; + enum entity_charset charset; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ll", &which, "e_style) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|lls", + &which, "e_style, &charset_hint, &charset_hint_len) == FAILURE) { return; } - array_init(return_value); + charset = determine_charset(charset_hint TSRMLS_CC); - ind[1] = 0; + array_init(return_value); switch (which) { - case HTML_ENTITIES: - for (j=0; entity_map[j].charset != cs_terminator; j++) { - if (entity_map[j].charset != charset) + case HTML_ENTITIES: + for (j = 0; entity_map[j].charset != cs_terminator; j++) { + if (entity_map[j].charset != charset) + continue; + for (i = 0; i <= entity_map[j].endchar - entity_map[j].basechar; i++) { + char buffer[16]; + unsigned k; + size_t written; + + if (entity_map[j].table[i] == NULL) continue; - for (i = 0; i <= entity_map[j].endchar - entity_map[j].basechar; i++) { - char buffer[16]; + + k = i + entity_map[j].basechar; - if (entity_map[j].table[i] == NULL) - continue; - /* what about wide chars here ?? */ - ind[0] = i + entity_map[j].basechar; - snprintf(buffer, sizeof(buffer), "&%s;", entity_map[j].table[i]); - add_assoc_string(return_value, ind, buffer, 1); + switch (charset) { + case cs_utf_8: + written = php_utf32_utf8(ind, k); + ind[written] = '\0'; + break; + case cs_big5: + case cs_gb2312: + case cs_big5hkscs: + case cs_sjis: + /* we have no mappings for these, but if we had... */ + /* break through */ + default: /* one byte */ + written = 1; + ind[0] = (unsigned char)k; + ind[1] = '\0'; + break; + } + snprintf(buffer, sizeof(buffer), "&%s;", entity_map[j].table[i]); + if (zend_hash_find(Z_ARRVAL_P(return_value), (const char*)ind, written+1, &dummy) == FAILURE) { + /* in case of the single quote, which is repeated, the first one wins, + * so don't replace the existint mapping */ + add_assoc_string(return_value, (const char*)ind, buffer, 1); } } - /* break thru */ - - case HTML_SPECIALCHARS: - for (j = 0; basic_entities[j].charcode != 0; j++) { + } + /* break thru */ - if (basic_entities[j].flags && (quote_style & basic_entities[j].flags) == 0) - continue; + case HTML_SPECIALCHARS: + add_assoc_stringl(return_value, "&", "&", sizeof("&") - 1, 1); + for (j = 0; basic_entities[j].charcode != 0; j++) { + if (basic_entities[j].flags && (quote_style & basic_entities[j].flags) == 0) + continue; - ind[0] = (unsigned char)basic_entities[j].charcode; - add_assoc_stringl(return_value, ind, basic_entities[j].entity, basic_entities[j].entitylen, 1); + ind[0] = (unsigned char)basic_entities[j].charcode; + ind[1] = '\0'; + if (zend_hash_find(Z_ARRVAL_P(return_value), (const char*)ind, 2, &dummy) == FAILURE) { + add_assoc_stringl(return_value, ind, basic_entities[j].entity, + basic_entities[j].entitylen, 1); } - add_assoc_stringl(return_value, "&", "&", sizeof("&") - 1, 1); + } - break; + break; } } /* }}} */ |