diff options
author | Bram Moolenaar <Bram@vim.org> | 2016-02-27 18:41:27 +0100 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2016-02-27 18:41:27 +0100 |
commit | b6ff81188d27fae774d9ad2dfb498f596d697d4b (patch) | |
tree | 4a1ac0474ae713f4b141df74812532914d76dd68 /src/json.c | |
parent | 85b11769ab507c7df93f319fd964fa579701b76b (diff) | |
download | vim-git-b6ff81188d27fae774d9ad2dfb498f596d697d4b.tar.gz |
patch 7.4.1434v7.4.1434
Problem: JSON encoding doesn't hanel surrogate pair.
Solution: Improve multi-byte handling of JSON. (Yasuhiro Matsumoto)
Diffstat (limited to 'src/json.c')
-rw-r--r-- | src/json.c | 72 |
1 files changed, 67 insertions, 5 deletions
diff --git a/src/json.c b/src/json.c index a42faec0d..da585e306 100644 --- a/src/json.c +++ b/src/json.c @@ -97,10 +97,26 @@ write_string(garray_T *gap, char_u *str) ga_concat(gap, (char_u *)"null"); else { +#if defined(FEAT_MBYTE) && defined(USE_ICONV) + vimconv_T conv; + char_u *converted = NULL; + + convert_setup(&conv, p_enc, (char_u*)"utf-8"); + if (conv.vc_type != CONV_NONE) + converted = res = string_convert(&conv, res, NULL); + convert_setup(&conv, NULL, NULL); +#endif + ga_append(gap, '"'); while (*res != NUL) { - int c = PTR2CHAR(res); + int c; +#ifdef FEAT_MBYTE + /* always use utf-8 encoding, ignore 'encoding' */ + c = utf_ptr2char(res); +#else + c = (int)*(p); +#endif switch (c) { @@ -123,7 +139,7 @@ write_string(garray_T *gap, char_u *str) if (c >= 0x20) { #ifdef FEAT_MBYTE - numbuf[mb_char2bytes(c, numbuf)] = NUL; + numbuf[utf_char2bytes(c, numbuf)] = NUL; #else numbuf[0] = c; numbuf[1] = NUL; @@ -137,9 +153,16 @@ write_string(garray_T *gap, char_u *str) ga_concat(gap, numbuf); } } - mb_cptr_adv(res); +#ifdef FEAT_MBYTE + res += utf_ptr2len(res); +#else + ++p; +#endif } ga_append(gap, '"'); +#if defined(FEAT_MBYTE) && defined(USE_ICONV) + vim_free(converted); +#endif } } @@ -525,11 +548,21 @@ json_decode_string(js_read_T *reader, typval_T *res) int c; long nr; char_u buf[NUMBUFLEN]; +#if defined(FEAT_MBYTE) && defined(USE_ICONV) + vimconv_T conv; + char_u *converted = NULL; +#endif if (res != NULL) ga_init2(&ga, 1, 200); p = reader->js_buf + reader->js_used + 1; /* skip over " */ +#if defined(FEAT_MBYTE) && defined(USE_ICONV) + convert_setup(&conv, (char_u*)"utf-8", p_enc); + if (conv.vc_type != CONV_NONE) + converted = p = string_convert(&conv, p, NULL); + convert_setup(&conv, NULL, NULL); +#endif while (*p != '"') { if (*p == NUL || p[1] == NUL @@ -573,13 +606,32 @@ json_decode_string(js_read_T *reader, typval_T *res) + STRLEN(reader->js_buf); } } + nr = 0; + len = 0; vim_str2nr(p + 2, NULL, &len, STR2NR_HEX + STR2NR_FORCE, &nr, NULL, 4); p += len + 2; + if (0xd800 <= nr && nr <= 0xdfff + && (int)(reader->js_end - p) >= 6 + && *p == '\\' && *(p+1) == 'u') + { + long nr2 = 0; + + /* decode surrogate pair: \ud812\u3456 */ + len = 0; + vim_str2nr(p + 2, NULL, &len, + STR2NR_HEX + STR2NR_FORCE, &nr2, NULL, 4); + if (0xdc00 <= nr2 && nr2 <= 0xdfff) + { + p += len + 2; + nr = (((nr - 0xd800) << 10) | + ((nr2 - 0xdc00) & 0x3ff)) + 0x10000; + } + } if (res != NULL) { #ifdef FEAT_MBYTE - buf[(*mb_char2bytes)((int)nr, buf)] = NUL; + buf[utf_char2bytes((int)nr, buf)] = NUL; ga_concat(&ga, buf); #else ga_append(&ga, nr); @@ -600,12 +652,19 @@ json_decode_string(js_read_T *reader, typval_T *res) } else { - len = MB_PTR2LEN(p); +#ifdef FEAT_MBYTE + len = utf_ptr2len(p); +#else + len = 1; +#endif if (res != NULL) { if (ga_grow(&ga, len) == FAIL) { ga_clear(&ga); +#if defined(FEAT_MBYTE) && defined(USE_ICONV) + vim_free(converted); +#endif return FAIL; } mch_memmove((char *)ga.ga_data + ga.ga_len, p, (size_t)len); @@ -614,6 +673,9 @@ json_decode_string(js_read_T *reader, typval_T *res) p += len; } } +#if defined(FEAT_MBYTE) && defined(USE_ICONV) + vim_free(converted); +#endif reader->js_used = (int)(p - reader->js_buf); if (*p == '"') |