diff options
author | Bram Moolenaar <Bram@vim.org> | 2016-03-21 22:09:44 +0100 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2016-03-21 22:09:44 +0100 |
commit | b86f10ee10bdf932df02bdaf601dffa671518a47 (patch) | |
tree | 11d4b17ab5ee67f37e6fad384072dc09110e1c55 | |
parent | 3f3fbd3fdb73bdfbfeab22a9dfc7a25e38bdf5f6 (diff) | |
download | vim-git-b86f10ee10bdf932df02bdaf601dffa671518a47.tar.gz |
patch 7.4.1629v7.4.1629
Problem: Handling emoji characters as full width has problems with
backwards compatibility.
Solution: Remove ambiguous and double width characters from the emoji table.
Use a separate table for the character class.
(partly by Yasuhiro Matsumoto)
-rw-r--r-- | runtime/tools/unicode.vim | 196 | ||||
-rw-r--r-- | src/mbyte.c | 347 | ||||
-rw-r--r-- | src/version.c | 2 |
3 files changed, 335 insertions, 210 deletions
diff --git a/runtime/tools/unicode.vim b/runtime/tools/unicode.vim index dfe9cef41..e0627b644 100644 --- a/runtime/tools/unicode.vim +++ b/runtime/tools/unicode.vim @@ -32,8 +32,8 @@ func! ParseFoldProps() if line !~ '^#' && line !~ '^\s*$' let l = split(line, '\s*;\s*', 1) if len(l) != 4 - echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4' - return + echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4' + return endif call add(s:foldprops, l) endif @@ -50,8 +50,8 @@ func! ParseWidthProps() if line !~ '^#' && line !~ '^\s*$' let l = split(line, '\s*;\s*', 1) if len(l) != 2 - echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2' - return + echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2' + return endif call add(s:widthprops, l) endif @@ -72,18 +72,18 @@ func! BuildCaseTable(name, index) let n = ('0x' . p[0]) + 0 let nl = ('0x' . p[a:index]) + 0 if start >= 0 && add == nl - n && (step == 0 || n - end == step) - " continue with same range. - let step = n - end - let end = n + " continue with same range. + let step = n - end + let end = n else - if start >= 0 - " produce previous range - call Range(ranges, start, end, step, add) - endif - let start = n - let end = n - let step = 0 - let add = nl - n + if start >= 0 + " produce previous range + call Range(ranges, start, end, step, add) + endif + let start = n + let end = n + let step = 0 + let add = nl - n endif endif endfor @@ -115,18 +115,18 @@ func! BuildFoldTable() let n = ('0x' . p[0]) + 0 let nl = ('0x' . p[2]) + 0 if start >= 0 && add == nl - n && (step == 0 || n - end == step) - " continue with same range. - let step = n - end - let end = n + " continue with same range. + let step = n - end + let end = n else - if start >= 0 - " produce previous range - call Range(ranges, start, end, step, add) - endif - let start = n - let end = n - let step = 0 - let add = nl - n + if start >= 0 + " produce previous range + call Range(ranges, start, end, step, add) + endif + let start = n + let end = n + let step = 0 + let add = nl - n endif endif endfor @@ -160,15 +160,15 @@ func! BuildCombiningTable() if p[2] == 'Mn' || p[2] == 'Mc' || p[2] == 'Me' let n = ('0x' . p[0]) + 0 if start >= 0 && end + 1 == n - " continue with same range. - let end = n + " continue with same range. + let end = n else - if start >= 0 - " produce previous range - call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) - endif - let start = n - let end = n + if start >= 0 + " produce previous range + call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) + endif + let start = n + let end = n endif endif endfor @@ -197,47 +197,57 @@ func! BuildWidthTable(pattern, tableName) for p in s:widthprops if p[1][0] =~ a:pattern if p[0] =~ '\.\.' - " It is a range. we don't check for composing char then. - let rng = split(p[0], '\.\.') - if len(rng) != 2 - echoerr "Cannot parse range: '" . p[0] . "' in width table" - endif - let n = ('0x' . rng[0]) + 0 - let n_last = ('0x' . rng[1]) + 0 + " It is a range. we don't check for composing char then. + let rng = split(p[0], '\.\.') + if len(rng) != 2 + echoerr "Cannot parse range: '" . p[0] . "' in width table" + endif + let n = ('0x' . rng[0]) + 0 + let n_last = ('0x' . rng[1]) + 0 else - let n = ('0x' . p[0]) + 0 - let n_last = n + let n = ('0x' . p[0]) + 0 + let n_last = n endif " Find this char in the data table. while 1 - let dn = ('0x' . s:dataprops[dataidx][0]) + 0 - if dn >= n - break - endif - let dataidx += 1 + let dn = ('0x' . s:dataprops[dataidx][0]) + 0 + if dn >= n + break + endif + let dataidx += 1 endwhile if dn != n && n_last == n - echoerr "Cannot find character " . n . " in data table" + echoerr "Cannot find character " . n . " in data table" endif " Only use the char when it's not a composing char. " But use all chars from a range. let dp = s:dataprops[dataidx] if n_last > n || (dp[2] != 'Mn' && dp[2] != 'Mc' && dp[2] != 'Me') - if start >= 0 && end + 1 == n - " continue with same range. - else - if start >= 0 - " produce previous range - call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) - endif - let start = n - endif - let end = n_last + if start >= 0 && end + 1 == n + " continue with same range. + else + if start >= 0 + " produce previous range + call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) + if a:pattern == 'A' + call add(s:ambitable, [start, end]) + else + call add(s:doubletable, [start, end]) + endif + endif + let start = n + endif + let end = n_last endif endif endfor if start >= 0 call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) + if a:pattern == 'A' + call add(s:ambitable, [start, end]) + else + call add(s:doubletable, [start, end]) + endif endif " New buffer to put the result in. @@ -253,21 +263,72 @@ endfunc " Build the amoji width table in a new buffer. func! BuildEmojiTable(pattern, tableName) - let ranges = [] - for line in map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~a:pattern'), 'matchstr(v:val,"^\\S\\+")') + let alltokens = [] + let widthtokens = [] + let lines = map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~a:pattern'), 'matchstr(v:val,"^\\S\\+")') + for n in range(len(lines)) + let line = lines[n] let token = split(line, '\.\.') + let first = ('0x' . token[0]) + 0 if len(token) == 1 - call add(token, token[0]) + let last = first + else + let last = ('0x' . token[1]) + 0 + endif + + let token = [first, last] + if len(alltokens) > 0 && (token[0] - 1 == alltokens[-1][1]) + let alltokens[-1][1] = token[1] + else + call add(alltokens, token) + endif + + " exclude characters that are in the "ambiguous" or "doublewidth" table + for ambi in s:ambitable + if first >= ambi[0] && first <= ambi[1] + let first = ambi[1] + 1 + endif + if last >= ambi[0] && last <= ambi[1] + let last = ambi[0] - 1 + endif + endfor + for double in s:doubletable + if first >= double[0] && first <= double[1] + let first = double[1] + 1 + endif + if last >= double[0] && last <= double[1] + let last = double[0] - 1 + endif + endfor + + if first <= last + let token = [first, last] + if len(widthtokens) > 0 && (token[0] - 1 == widthtokens[-1][1]) + let widthtokens[-1][1] = token[1] + else + call add(widthtokens, token) + endif endif - call add(ranges, printf("\t{0x%04x, 0x%04x},", "0x".token[0], "0x".token[1])) endfor + let allranges = map(alltokens, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])') + let widthranges = map(widthtokens, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])') " New buffer to put the result in. new - exe "file " . a:tableName - call setline(1, " static struct interval " . a:tableName . "[] =") + exe "file " . a:tableName . '_all' + call setline(1, " static struct interval " . a:tableName . "_all[] =") call setline(2, " {") - call append('$', ranges) + call append('$', allranges) + call setline('$', getline('$')[:-2]) " remove last comma + call setline(line('$') + 1, " };") + wincmd p + + " New buffer to put the result in. + new + exe "file " . a:tableName . '_width' + call setline(1, " static struct interval " . a:tableName . "_width[] =") + call setline(2, " {") + call append('$', widthranges) call setline('$', getline('$')[:-2]) " remove last comma call setline(line('$') + 1, " };") wincmd p @@ -307,13 +368,16 @@ edit http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt call ParseWidthProps() " Build the double width table. +let s:doubletable = [] call BuildWidthTable('[WF]', 'doublewidth') " Build the ambiguous width table. +let s:ambitable = [] call BuildWidthTable('A', 'ambiguous') " Edit the emoji text file. Requires the netrw plugin. edit http://www.unicode.org/Public/emoji/3.0/emoji-data.txt " Build the emoji table. Ver. 1.0 - 6.0 +" Must come after the "ambiguous" table call BuildEmojiTable('; Emoji\s\+# [1-6]\.[0-9]', 'emoji') diff --git a/src/mbyte.c b/src/mbyte.c index c670a0669..01b13c861 100644 --- a/src/mbyte.c +++ b/src/mbyte.c @@ -1210,148 +1210,6 @@ intable(struct interval *table, size_t size, int c) return FALSE; } -/* Sorted list of non-overlapping intervals of Emoji characters, - * based on http://unicode.org/emoji/charts/emoji-list.html */ -static struct interval emoji_tab[] = -{ - {0x203c, 0x203c}, - {0x2049, 0x2049}, - {0x2122, 0x2122}, - {0x2139, 0x2139}, - {0x2194, 0x2199}, - {0x21a9, 0x21aa}, - {0x231a, 0x231b}, - {0x2328, 0x2328}, - {0x23cf, 0x23cf}, - {0x23e9, 0x23f3}, - {0x24c2, 0x24c2}, - {0x25aa, 0x25ab}, - {0x25b6, 0x25b6}, - {0x25c0, 0x25c0}, - {0x25fb, 0x25fe}, - {0x2600, 0x2604}, - {0x260e, 0x260e}, - {0x2611, 0x2611}, - {0x2614, 0x2615}, - {0x2618, 0x2618}, - {0x261d, 0x261d}, - {0x2620, 0x2620}, - {0x2622, 0x2623}, - {0x2626, 0x2626}, - {0x262a, 0x262a}, - {0x262e, 0x262f}, - {0x2638, 0x263a}, - {0x2648, 0x2653}, - {0x2660, 0x2660}, - {0x2663, 0x2663}, - {0x2665, 0x2666}, - {0x2668, 0x2668}, - {0x267b, 0x267b}, - {0x267f, 0x267f}, - {0x2692, 0x2694}, - {0x2696, 0x2697}, - {0x2699, 0x2699}, - {0x269b, 0x269c}, - {0x26a0, 0x26a1}, - {0x26aa, 0x26ab}, - {0x26b0, 0x26b1}, - {0x26bd, 0x26be}, - {0x26c4, 0x26c5}, - {0x26c8, 0x26c8}, - {0x26ce, 0x26ce}, - {0x26cf, 0x26cf}, - {0x26d1, 0x26d1}, - {0x26d3, 0x26d4}, - {0x26e9, 0x26ea}, - {0x26f0, 0x26f5}, - {0x26f7, 0x26fa}, - {0x26fd, 0x26fd}, - {0x2702, 0x2702}, - {0x2705, 0x2705}, - {0x2708, 0x2709}, - {0x270a, 0x270b}, - {0x270c, 0x270d}, - {0x270f, 0x270f}, - {0x2712, 0x2712}, - {0x2714, 0x2714}, - {0x2716, 0x2716}, - {0x271d, 0x271d}, - {0x2721, 0x2721}, - {0x2728, 0x2728}, - {0x2733, 0x2734}, - {0x2744, 0x2744}, - {0x2747, 0x2747}, - {0x274c, 0x274c}, - {0x274e, 0x274e}, - {0x2753, 0x2755}, - {0x2757, 0x2757}, - {0x2763, 0x2764}, - {0x2795, 0x2797}, - {0x27a1, 0x27a1}, - {0x27b0, 0x27b0}, - {0x27bf, 0x27bf}, - {0x2934, 0x2935}, - {0x2b05, 0x2b07}, - {0x2b1b, 0x2b1c}, - {0x2b50, 0x2b50}, - {0x2b55, 0x2b55}, - {0x3030, 0x3030}, - {0x303d, 0x303d}, - {0x3297, 0x3297}, - {0x3299, 0x3299}, - {0x1f004, 0x1f004}, - {0x1f0cf, 0x1f0cf}, - {0x1f170, 0x1f171}, - {0x1f17e, 0x1f17e}, - {0x1f17f, 0x1f17f}, - {0x1f18e, 0x1f18e}, - {0x1f191, 0x1f19a}, - {0x1f1e6, 0x1f1ff}, - {0x1f201, 0x1f202}, - {0x1f21a, 0x1f21a}, - {0x1f22f, 0x1f22f}, - {0x1f232, 0x1f23a}, - {0x1f250, 0x1f251}, - {0x1f300, 0x1f320}, - {0x1f330, 0x1f335}, - {0x1f337, 0x1f37c}, - {0x1f380, 0x1f393}, - {0x1f3a0, 0x1f3c4}, - {0x1f3c6, 0x1f3ca}, - {0x1f3e0, 0x1f3f0}, - {0x1f400, 0x1f43e}, - {0x1f440, 0x1f440}, - {0x1f442, 0x1f4f7}, - {0x1f4f9, 0x1f4fc}, - {0x1f500, 0x1f53d}, - {0x1f550, 0x1f567}, - {0x1f5fb, 0x1f5ff}, - {0x1f600, 0x1f600}, - {0x1f601, 0x1f610}, - {0x1f611, 0x1f611}, - {0x1f612, 0x1f614}, - {0x1f615, 0x1f615}, - {0x1f616, 0x1f616}, - {0x1f617, 0x1f617}, - {0x1f618, 0x1f618}, - {0x1f619, 0x1f619}, - {0x1f61a, 0x1f61a}, - {0x1f61b, 0x1f61b}, - {0x1f61c, 0x1f61e}, - {0x1f61f, 0x1f61f}, - {0x1f620, 0x1f625}, - {0x1f626, 0x1f627}, - {0x1f628, 0x1f62b}, - {0x1f62c, 0x1f62c}, - {0x1f62d, 0x1f62d}, - {0x1f62e, 0x1f62f}, - {0x1f630, 0x1f633}, - {0x1f634, 0x1f634}, - {0x1f635, 0x1f640}, - {0x1f645, 0x1f64f}, - {0x1f680, 0x1f6c5} -}; - /* * For UTF-8 character "c" return 2 for a double-width character, 1 for others. * Returns 4 or 6 for an unprintable character. @@ -1577,6 +1435,90 @@ utf_char2cells(int c) {0x100000, 0x10fffd} }; + /* Sorted list of non-overlapping intervals of Emoji characters that don't + * have ambiguous or double width, + * based on http://unicode.org/emoji/charts/emoji-list.html */ + static struct interval emoji_width[] = + { + {0x203c, 0x203c}, + {0x2049, 0x2049}, + {0x2139, 0x2139}, + {0x21a9, 0x21aa}, + {0x231a, 0x231b}, + {0x2328, 0x2328}, + {0x23cf, 0x23cf}, + {0x23e9, 0x23f3}, + {0x25aa, 0x25ab}, + {0x25fb, 0x25fe}, + {0x2600, 0x2604}, + {0x2611, 0x2611}, + {0x2618, 0x2618}, + {0x261d, 0x261d}, + {0x2620, 0x2620}, + {0x2622, 0x2623}, + {0x2626, 0x2626}, + {0x262a, 0x262a}, + {0x262e, 0x262f}, + {0x2638, 0x263a}, + {0x2648, 0x2653}, + {0x2666, 0x2666}, + {0x267b, 0x267b}, + {0x267f, 0x267f}, + {0x2692, 0x2694}, + {0x2696, 0x2697}, + {0x2699, 0x2699}, + {0x269b, 0x269c}, + {0x26a0, 0x26a1}, + {0x26aa, 0x26ab}, + {0x26b0, 0x26b1}, + {0x26bd, 0x26bd}, + {0x26ce, 0x26ce}, + {0x2702, 0x2702}, + {0x2705, 0x2705}, + {0x2708, 0x270d}, + {0x270f, 0x270f}, + {0x2712, 0x2712}, + {0x2714, 0x2714}, + {0x2716, 0x2716}, + {0x271d, 0x271d}, + {0x2721, 0x2721}, + {0x2728, 0x2728}, + {0x2733, 0x2734}, + {0x2744, 0x2744}, + {0x2747, 0x2747}, + {0x274c, 0x274c}, + {0x274e, 0x274e}, + {0x2753, 0x2755}, + {0x2763, 0x2764}, + {0x2795, 0x2797}, + {0x27a1, 0x27a1}, + {0x27b0, 0x27b0}, + {0x27bf, 0x27bf}, + {0x2934, 0x2935}, + {0x2b05, 0x2b07}, + {0x2b1b, 0x2b1c}, + {0x2b50, 0x2b50}, + {0x1f004, 0x1f004}, + {0x1f0cf, 0x1f0cf}, + {0x1f1e6, 0x1f1ff}, + {0x1f300, 0x1f320}, + {0x1f330, 0x1f335}, + {0x1f337, 0x1f37c}, + {0x1f380, 0x1f393}, + {0x1f3a0, 0x1f3c4}, + {0x1f3c6, 0x1f3ca}, + {0x1f3e0, 0x1f3f0}, + {0x1f400, 0x1f43e}, + {0x1f440, 0x1f440}, + {0x1f442, 0x1f4f7}, + {0x1f4f9, 0x1f4fc}, + {0x1f500, 0x1f53d}, + {0x1f550, 0x1f567}, + {0x1f5fb, 0x1f640}, + {0x1f645, 0x1f64f}, + {0x1f680, 0x1f6c5} + }; + if (c >= 0x100) { #ifdef USE_WCHAR_FUNCTIONS @@ -1596,7 +1538,7 @@ utf_char2cells(int c) if (intable(doublewidth, sizeof(doublewidth), c)) return 2; #endif - if (p_emoji && intable(emoji_tab, sizeof(emoji_tab), c)) + if (p_emoji && intable(emoji_width, sizeof(emoji_width), c)) return 2; } @@ -2674,6 +2616,123 @@ utf_class(int c) {0x2b740, 0x2b81f, 0x4e00}, /* CJK Ideographs */ {0x2f800, 0x2fa1f, 0x4e00}, /* CJK Ideographs */ }; + + /* Sorted list of non-overlapping intervals of all Emoji characters, + * based on http://unicode.org/emoji/charts/emoji-list.html */ + static struct interval emoji_all[] = + { + {0x203c, 0x203c}, + {0x2049, 0x2049}, + {0x2122, 0x2122}, + {0x2139, 0x2139}, + {0x2194, 0x2199}, + {0x21a9, 0x21aa}, + {0x231a, 0x231b}, + {0x2328, 0x2328}, + {0x23cf, 0x23cf}, + {0x23e9, 0x23f3}, + {0x24c2, 0x24c2}, + {0x25aa, 0x25ab}, + {0x25b6, 0x25b6}, + {0x25c0, 0x25c0}, + {0x25fb, 0x25fe}, + {0x2600, 0x2604}, + {0x260e, 0x260e}, + {0x2611, 0x2611}, + {0x2614, 0x2615}, + {0x2618, 0x2618}, + {0x261d, 0x261d}, + {0x2620, 0x2620}, + {0x2622, 0x2623}, + {0x2626, 0x2626}, + {0x262a, 0x262a}, + {0x262e, 0x262f}, + {0x2638, 0x263a}, + {0x2648, 0x2653}, + {0x2660, 0x2660}, + {0x2663, 0x2663}, + {0x2665, 0x2666}, + {0x2668, 0x2668}, + {0x267b, 0x267b}, + {0x267f, 0x267f}, + {0x2692, 0x2694}, + {0x2696, 0x2697}, + {0x2699, 0x2699}, + {0x269b, 0x269c}, + {0x26a0, 0x26a1}, + {0x26aa, 0x26ab}, + {0x26b0, 0x26b1}, + {0x26bd, 0x26be}, + {0x26c4, 0x26c5}, + {0x26c8, 0x26c8}, + {0x26ce, 0x26cf}, + {0x26d1, 0x26d1}, + {0x26d3, 0x26d4}, + {0x26e9, 0x26ea}, + {0x26f0, 0x26f5}, + {0x26f7, 0x26fa}, + {0x26fd, 0x26fd}, + {0x2702, 0x2702}, + {0x2705, 0x2705}, + {0x2708, 0x270d}, + {0x270f, 0x270f}, + {0x2712, 0x2712}, + {0x2714, 0x2714}, + {0x2716, 0x2716}, + {0x271d, 0x271d}, + {0x2721, 0x2721}, + {0x2728, 0x2728}, + {0x2733, 0x2734}, + {0x2744, 0x2744}, + {0x2747, 0x2747}, + {0x274c, 0x274c}, + {0x274e, 0x274e}, + {0x2753, 0x2755}, + {0x2757, 0x2757}, + {0x2763, 0x2764}, + {0x2795, 0x2797}, + {0x27a1, 0x27a1}, + {0x27b0, 0x27b0}, + {0x27bf, 0x27bf}, + {0x2934, 0x2935}, + {0x2b05, 0x2b07}, + {0x2b1b, 0x2b1c}, + {0x2b50, 0x2b50}, + {0x2b55, 0x2b55}, + {0x3030, 0x3030}, + {0x303d, 0x303d}, + {0x3297, 0x3297}, + {0x3299, 0x3299}, + {0x1f004, 0x1f004}, + {0x1f0cf, 0x1f0cf}, + {0x1f170, 0x1f171}, + {0x1f17e, 0x1f17f}, + {0x1f18e, 0x1f18e}, + {0x1f191, 0x1f19a}, + {0x1f1e6, 0x1f1ff}, + {0x1f201, 0x1f202}, + {0x1f21a, 0x1f21a}, + {0x1f22f, 0x1f22f}, + {0x1f232, 0x1f23a}, + {0x1f250, 0x1f251}, + {0x1f300, 0x1f320}, + {0x1f330, 0x1f335}, + {0x1f337, 0x1f37c}, + {0x1f380, 0x1f393}, + {0x1f3a0, 0x1f3c4}, + {0x1f3c6, 0x1f3ca}, + {0x1f3e0, 0x1f3f0}, + {0x1f400, 0x1f43e}, + {0x1f440, 0x1f440}, + {0x1f442, 0x1f4f7}, + {0x1f4f9, 0x1f4fc}, + {0x1f500, 0x1f53d}, + {0x1f550, 0x1f567}, + {0x1f5fb, 0x1f640}, + {0x1f645, 0x1f64f}, + {0x1f680, 0x1f6c5} + }; + int bot = 0; int top = sizeof(classes) / sizeof(struct clinterval) - 1; int mid; @@ -2701,7 +2760,7 @@ utf_class(int c) } /* emoji */ - if (intable(emoji_tab, sizeof(emoji_tab), c)) + if (intable(emoji_all, sizeof(emoji_all), c)) return 3; /* most other characters are "word" characters */ diff --git a/src/version.c b/src/version.c index 7852ea0de..c5504176a 100644 --- a/src/version.c +++ b/src/version.c @@ -749,6 +749,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 1629, +/**/ 1628, /**/ 1627, |