summaryrefslogtreecommitdiff
path: root/runtime/tools
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2016-03-21 22:09:44 +0100
committerBram Moolenaar <Bram@vim.org>2016-03-21 22:09:44 +0100
commitb86f10ee10bdf932df02bdaf601dffa671518a47 (patch)
tree11d4b17ab5ee67f37e6fad384072dc09110e1c55 /runtime/tools
parent3f3fbd3fdb73bdfbfeab22a9dfc7a25e38bdf5f6 (diff)
downloadvim-git-b86f10ee10bdf932df02bdaf601dffa671518a47.tar.gz
patch 7.4.1629v7.4.1629
Problem: Handling emoji characters as full width has problems with backwards compatibility. Solution: Remove ambiguous and double width characters from the emoji table. Use a separate table for the character class. (partly by Yasuhiro Matsumoto)
Diffstat (limited to 'runtime/tools')
-rw-r--r--runtime/tools/unicode.vim196
1 files changed, 130 insertions, 66 deletions
diff --git a/runtime/tools/unicode.vim b/runtime/tools/unicode.vim
index dfe9cef41..e0627b644 100644
--- a/runtime/tools/unicode.vim
+++ b/runtime/tools/unicode.vim
@@ -32,8 +32,8 @@ func! ParseFoldProps()
if line !~ '^#' && line !~ '^\s*$'
let l = split(line, '\s*;\s*', 1)
if len(l) != 4
- echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4'
- return
+ echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4'
+ return
endif
call add(s:foldprops, l)
endif
@@ -50,8 +50,8 @@ func! ParseWidthProps()
if line !~ '^#' && line !~ '^\s*$'
let l = split(line, '\s*;\s*', 1)
if len(l) != 2
- echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2'
- return
+ echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2'
+ return
endif
call add(s:widthprops, l)
endif
@@ -72,18 +72,18 @@ func! BuildCaseTable(name, index)
let n = ('0x' . p[0]) + 0
let nl = ('0x' . p[a:index]) + 0
if start >= 0 && add == nl - n && (step == 0 || n - end == step)
- " continue with same range.
- let step = n - end
- let end = n
+ " continue with same range.
+ let step = n - end
+ let end = n
else
- if start >= 0
- " produce previous range
- call Range(ranges, start, end, step, add)
- endif
- let start = n
- let end = n
- let step = 0
- let add = nl - n
+ if start >= 0
+ " produce previous range
+ call Range(ranges, start, end, step, add)
+ endif
+ let start = n
+ let end = n
+ let step = 0
+ let add = nl - n
endif
endif
endfor
@@ -115,18 +115,18 @@ func! BuildFoldTable()
let n = ('0x' . p[0]) + 0
let nl = ('0x' . p[2]) + 0
if start >= 0 && add == nl - n && (step == 0 || n - end == step)
- " continue with same range.
- let step = n - end
- let end = n
+ " continue with same range.
+ let step = n - end
+ let end = n
else
- if start >= 0
- " produce previous range
- call Range(ranges, start, end, step, add)
- endif
- let start = n
- let end = n
- let step = 0
- let add = nl - n
+ if start >= 0
+ " produce previous range
+ call Range(ranges, start, end, step, add)
+ endif
+ let start = n
+ let end = n
+ let step = 0
+ let add = nl - n
endif
endif
endfor
@@ -160,15 +160,15 @@ func! BuildCombiningTable()
if p[2] == 'Mn' || p[2] == 'Mc' || p[2] == 'Me'
let n = ('0x' . p[0]) + 0
if start >= 0 && end + 1 == n
- " continue with same range.
- let end = n
+ " continue with same range.
+ let end = n
else
- if start >= 0
- " produce previous range
- call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
- endif
- let start = n
- let end = n
+ if start >= 0
+ " produce previous range
+ call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
+ endif
+ let start = n
+ let end = n
endif
endif
endfor
@@ -197,47 +197,57 @@ func! BuildWidthTable(pattern, tableName)
for p in s:widthprops
if p[1][0] =~ a:pattern
if p[0] =~ '\.\.'
- " It is a range. we don't check for composing char then.
- let rng = split(p[0], '\.\.')
- if len(rng) != 2
- echoerr "Cannot parse range: '" . p[0] . "' in width table"
- endif
- let n = ('0x' . rng[0]) + 0
- let n_last = ('0x' . rng[1]) + 0
+ " It is a range. we don't check for composing char then.
+ let rng = split(p[0], '\.\.')
+ if len(rng) != 2
+ echoerr "Cannot parse range: '" . p[0] . "' in width table"
+ endif
+ let n = ('0x' . rng[0]) + 0
+ let n_last = ('0x' . rng[1]) + 0
else
- let n = ('0x' . p[0]) + 0
- let n_last = n
+ let n = ('0x' . p[0]) + 0
+ let n_last = n
endif
" Find this char in the data table.
while 1
- let dn = ('0x' . s:dataprops[dataidx][0]) + 0
- if dn >= n
- break
- endif
- let dataidx += 1
+ let dn = ('0x' . s:dataprops[dataidx][0]) + 0
+ if dn >= n
+ break
+ endif
+ let dataidx += 1
endwhile
if dn != n && n_last == n
- echoerr "Cannot find character " . n . " in data table"
+ echoerr "Cannot find character " . n . " in data table"
endif
" Only use the char when it's not a composing char.
" But use all chars from a range.
let dp = s:dataprops[dataidx]
if n_last > n || (dp[2] != 'Mn' && dp[2] != 'Mc' && dp[2] != 'Me')
- if start >= 0 && end + 1 == n
- " continue with same range.
- else
- if start >= 0
- " produce previous range
- call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
- endif
- let start = n
- endif
- let end = n_last
+ if start >= 0 && end + 1 == n
+ " continue with same range.
+ else
+ if start >= 0
+ " produce previous range
+ call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
+ if a:pattern == 'A'
+ call add(s:ambitable, [start, end])
+ else
+ call add(s:doubletable, [start, end])
+ endif
+ endif
+ let start = n
+ endif
+ let end = n_last
endif
endif
endfor
if start >= 0
call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
+ if a:pattern == 'A'
+ call add(s:ambitable, [start, end])
+ else
+ call add(s:doubletable, [start, end])
+ endif
endif
" New buffer to put the result in.
@@ -253,21 +263,72 @@ endfunc
" Build the amoji width table in a new buffer.
func! BuildEmojiTable(pattern, tableName)
- let ranges = []
- for line in map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~a:pattern'), 'matchstr(v:val,"^\\S\\+")')
+ let alltokens = []
+ let widthtokens = []
+ let lines = map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~a:pattern'), 'matchstr(v:val,"^\\S\\+")')
+ for n in range(len(lines))
+ let line = lines[n]
let token = split(line, '\.\.')
+ let first = ('0x' . token[0]) + 0
if len(token) == 1
- call add(token, token[0])
+ let last = first
+ else
+ let last = ('0x' . token[1]) + 0
+ endif
+
+ let token = [first, last]
+ if len(alltokens) > 0 && (token[0] - 1 == alltokens[-1][1])
+ let alltokens[-1][1] = token[1]
+ else
+ call add(alltokens, token)
+ endif
+
+ " exclude characters that are in the "ambiguous" or "doublewidth" table
+ for ambi in s:ambitable
+ if first >= ambi[0] && first <= ambi[1]
+ let first = ambi[1] + 1
+ endif
+ if last >= ambi[0] && last <= ambi[1]
+ let last = ambi[0] - 1
+ endif
+ endfor
+ for double in s:doubletable
+ if first >= double[0] && first <= double[1]
+ let first = double[1] + 1
+ endif
+ if last >= double[0] && last <= double[1]
+ let last = double[0] - 1
+ endif
+ endfor
+
+ if first <= last
+ let token = [first, last]
+ if len(widthtokens) > 0 && (token[0] - 1 == widthtokens[-1][1])
+ let widthtokens[-1][1] = token[1]
+ else
+ call add(widthtokens, token)
+ endif
endif
- call add(ranges, printf("\t{0x%04x, 0x%04x},", "0x".token[0], "0x".token[1]))
endfor
+ let allranges = map(alltokens, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])')
+ let widthranges = map(widthtokens, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])')
" New buffer to put the result in.
new
- exe "file " . a:tableName
- call setline(1, " static struct interval " . a:tableName . "[] =")
+ exe "file " . a:tableName . '_all'
+ call setline(1, " static struct interval " . a:tableName . "_all[] =")
call setline(2, " {")
- call append('$', ranges)
+ call append('$', allranges)
+ call setline('$', getline('$')[:-2]) " remove last comma
+ call setline(line('$') + 1, " };")
+ wincmd p
+
+ " New buffer to put the result in.
+ new
+ exe "file " . a:tableName . '_width'
+ call setline(1, " static struct interval " . a:tableName . "_width[] =")
+ call setline(2, " {")
+ call append('$', widthranges)
call setline('$', getline('$')[:-2]) " remove last comma
call setline(line('$') + 1, " };")
wincmd p
@@ -307,13 +368,16 @@ edit http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
call ParseWidthProps()
" Build the double width table.
+let s:doubletable = []
call BuildWidthTable('[WF]', 'doublewidth')
" Build the ambiguous width table.
+let s:ambitable = []
call BuildWidthTable('A', 'ambiguous')
" Edit the emoji text file. Requires the netrw plugin.
edit http://www.unicode.org/Public/emoji/3.0/emoji-data.txt
" Build the emoji table. Ver. 1.0 - 6.0
+" Must come after the "ambiguous" table
call BuildEmojiTable('; Emoji\s\+# [1-6]\.[0-9]', 'emoji')