diff options
author | Jakub Zelenka <bukka@php.net> | 2017-07-16 12:35:24 +0100 |
---|---|---|
committer | Jakub Zelenka <bukka@php.net> | 2017-07-16 12:35:24 +0100 |
commit | 0d0f7cde8c321ae841ba0f60f4377b72f98db637 (patch) | |
tree | 9382ab16cf35c723187e4894b2c0bb38d490abb4 /ext/json/json_encoder.c | |
parent | f6a44916623f9d8dc24d35d52f2998700f0567a6 (diff) | |
download | php-git-0d0f7cde8c321ae841ba0f60f4377b72f98db637.tar.gz |
Add JSON_INVALID_UTF8_SUBSTITUTE and JSON_INVALID_UTF8_IGNORE
It implements request #65082 and adds options for replacing resp.
ignoring invalid UTF-8 sequences for json_encode and json_decode.
Diffstat (limited to 'ext/json/json_encoder.c')
-rw-r--r-- | ext/json/json_encoder.c | 30 |
1 files changed, 22 insertions, 8 deletions
diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 9d480bcc90..50dcc02052 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -285,20 +285,30 @@ static int php_json_escape_string( do { us = (unsigned char)s[pos]; if (us >= 0x80) { + int utf8_sub = 0; size_t prev_pos = pos; us = php_next_utf8_char((unsigned char *)s, len, &pos, &status); /* check whether UTF8 character is correct */ if (status != SUCCESS) { - if (buf->s) { - ZSTR_LEN(buf->s) = checkpoint; - } - encoder->error_code = PHP_JSON_ERROR_UTF8; - if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) { - smart_str_appendl(buf, "null", 4); + if (options & PHP_JSON_INVALID_UTF8_IGNORE) { + /* ignore invalid UTF8 character */ + continue; + } else if (options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) { + /* Use Unicode character 'REPLACEMENT CHARACTER' (U+FFFD) */ + us = 0xfffd; + utf8_sub = 1; + } else { + if (buf->s) { + ZSTR_LEN(buf->s) = checkpoint; + } + encoder->error_code = PHP_JSON_ERROR_UTF8; + if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) { + smart_str_appendl(buf, "null", 4); + } + return FAILURE; } - return FAILURE; } /* Escape U+2028/U+2029 line terminators, UNLESS both @@ -307,7 +317,11 @@ static int php_json_escape_string( if ((options & PHP_JSON_UNESCAPED_UNICODE) && ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS) || us < 0x2028 || us > 0x2029)) { - smart_str_appendl(buf, s + prev_pos, pos - prev_pos); + if (utf8_sub) { + smart_str_appendl(buf, "\xef\xbf\xbd", 3); + } else { + smart_str_appendl(buf, s + prev_pos, pos - prev_pos); + } continue; } /* From http://en.wikipedia.org/wiki/UTF16 */ |