summaryrefslogtreecommitdiff
path: root/src/json.c
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2016-02-27 18:41:27 +0100
committerBram Moolenaar <Bram@vim.org>2016-02-27 18:41:27 +0100
commitb6ff81188d27fae774d9ad2dfb498f596d697d4b (patch)
tree4a1ac0474ae713f4b141df74812532914d76dd68 /src/json.c
parent85b11769ab507c7df93f319fd964fa579701b76b (diff)
downloadvim-git-b6ff81188d27fae774d9ad2dfb498f596d697d4b.tar.gz
patch 7.4.1434v7.4.1434
Problem: JSON encoding doesn't hanel surrogate pair. Solution: Improve multi-byte handling of JSON. (Yasuhiro Matsumoto)
Diffstat (limited to 'src/json.c')
-rw-r--r--src/json.c72
1 files changed, 67 insertions, 5 deletions
diff --git a/src/json.c b/src/json.c
index a42faec0d..da585e306 100644
--- a/src/json.c
+++ b/src/json.c
@@ -97,10 +97,26 @@ write_string(garray_T *gap, char_u *str)
ga_concat(gap, (char_u *)"null");
else
{
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+ vimconv_T conv;
+ char_u *converted = NULL;
+
+ convert_setup(&conv, p_enc, (char_u*)"utf-8");
+ if (conv.vc_type != CONV_NONE)
+ converted = res = string_convert(&conv, res, NULL);
+ convert_setup(&conv, NULL, NULL);
+#endif
+
ga_append(gap, '"');
while (*res != NUL)
{
- int c = PTR2CHAR(res);
+ int c;
+#ifdef FEAT_MBYTE
+ /* always use utf-8 encoding, ignore 'encoding' */
+ c = utf_ptr2char(res);
+#else
+ c = (int)*(p);
+#endif
switch (c)
{
@@ -123,7 +139,7 @@ write_string(garray_T *gap, char_u *str)
if (c >= 0x20)
{
#ifdef FEAT_MBYTE
- numbuf[mb_char2bytes(c, numbuf)] = NUL;
+ numbuf[utf_char2bytes(c, numbuf)] = NUL;
#else
numbuf[0] = c;
numbuf[1] = NUL;
@@ -137,9 +153,16 @@ write_string(garray_T *gap, char_u *str)
ga_concat(gap, numbuf);
}
}
- mb_cptr_adv(res);
+#ifdef FEAT_MBYTE
+ res += utf_ptr2len(res);
+#else
+ ++p;
+#endif
}
ga_append(gap, '"');
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+ vim_free(converted);
+#endif
}
}
@@ -525,11 +548,21 @@ json_decode_string(js_read_T *reader, typval_T *res)
int c;
long nr;
char_u buf[NUMBUFLEN];
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+ vimconv_T conv;
+ char_u *converted = NULL;
+#endif
if (res != NULL)
ga_init2(&ga, 1, 200);
p = reader->js_buf + reader->js_used + 1; /* skip over " */
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+ convert_setup(&conv, (char_u*)"utf-8", p_enc);
+ if (conv.vc_type != CONV_NONE)
+ converted = p = string_convert(&conv, p, NULL);
+ convert_setup(&conv, NULL, NULL);
+#endif
while (*p != '"')
{
if (*p == NUL || p[1] == NUL
@@ -573,13 +606,32 @@ json_decode_string(js_read_T *reader, typval_T *res)
+ STRLEN(reader->js_buf);
}
}
+ nr = 0;
+ len = 0;
vim_str2nr(p + 2, NULL, &len,
STR2NR_HEX + STR2NR_FORCE, &nr, NULL, 4);
p += len + 2;
+ if (0xd800 <= nr && nr <= 0xdfff
+ && (int)(reader->js_end - p) >= 6
+ && *p == '\\' && *(p+1) == 'u')
+ {
+ long nr2 = 0;
+
+ /* decode surrogate pair: \ud812\u3456 */
+ len = 0;
+ vim_str2nr(p + 2, NULL, &len,
+ STR2NR_HEX + STR2NR_FORCE, &nr2, NULL, 4);
+ if (0xdc00 <= nr2 && nr2 <= 0xdfff)
+ {
+ p += len + 2;
+ nr = (((nr - 0xd800) << 10) |
+ ((nr2 - 0xdc00) & 0x3ff)) + 0x10000;
+ }
+ }
if (res != NULL)
{
#ifdef FEAT_MBYTE
- buf[(*mb_char2bytes)((int)nr, buf)] = NUL;
+ buf[utf_char2bytes((int)nr, buf)] = NUL;
ga_concat(&ga, buf);
#else
ga_append(&ga, nr);
@@ -600,12 +652,19 @@ json_decode_string(js_read_T *reader, typval_T *res)
}
else
{
- len = MB_PTR2LEN(p);
+#ifdef FEAT_MBYTE
+ len = utf_ptr2len(p);
+#else
+ len = 1;
+#endif
if (res != NULL)
{
if (ga_grow(&ga, len) == FAIL)
{
ga_clear(&ga);
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+ vim_free(converted);
+#endif
return FAIL;
}
mch_memmove((char *)ga.ga_data + ga.ga_len, p, (size_t)len);
@@ -614,6 +673,9 @@ json_decode_string(js_read_T *reader, typval_T *res)
p += len;
}
}
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+ vim_free(converted);
+#endif
reader->js_used = (int)(p - reader->js_buf);
if (*p == '"')