diff options
author | Bram Moolenaar <Bram@vim.org> | 2010-01-12 19:52:03 +0100 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2010-01-12 19:52:03 +0100 |
commit | 3e8cb58725fa717caef1730a963dfcb12d8d9763 (patch) | |
tree | 2d50bbc4b7b896d0296e20091f73bc8608c45764 /runtime/tools | |
parent | 5890b2cf9eb203ebfdb3823f21437f7757329b26 (diff) | |
download | vim-git-3e8cb58725fa717caef1730a963dfcb12d8d9763.tar.gz |
updated for version 7.2.330v7.2.330
Problem: Tables for Unicode case operators are outdated.
Solution: Add a Vim script for generating the tables. Include tables for
Unicode 5.2.
Diffstat (limited to 'runtime/tools')
-rw-r--r-- | runtime/tools/README.txt | 2 | ||||
-rw-r--r-- | runtime/tools/unicode.vim | 280 |
2 files changed, 282 insertions, 0 deletions
diff --git a/runtime/tools/README.txt b/runtime/tools/README.txt index f5274df22..fa176c776 100644 --- a/runtime/tools/README.txt +++ b/runtime/tools/README.txt @@ -32,4 +32,6 @@ vim_vs_net.cmd: MS-Windows command file to use Vim with MS Visual Studio 7 and xcmdsrv_client.c: Example for a client program that communicates with a Vim server through the X-Windows interface. +unicode.vim Vim script to generate tables for src/mbyte.c. + [xxd (and tee for OS/2) can be found in the src directory] diff --git a/runtime/tools/unicode.vim b/runtime/tools/unicode.vim new file mode 100644 index 000000000..f33df4206 --- /dev/null +++ b/runtime/tools/unicode.vim @@ -0,0 +1,280 @@ +" Script to extract tables from Unicode .txt files, to be used in src/mbyte.c. +" The format of the UnicodeData.txt file is explained here: +" http://www.unicode.org/Public/5.1.0/ucd/UCD.html +" For the other files see the header. +" +" Usage: Vim -S <this-file> +" +" Author: Bram Moolenaar +" Last Update: 2010 Jan 12 + +" Parse lines of UnicodeData.txt. Creates a list of lists in s:dataprops. +func! ParseDataToProps() + let s:dataprops = [] + let lnum = 1 + while lnum <= line('$') + let l = split(getline(lnum), '\s*;\s*', 1) + if len(l) != 15 + echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 15' + return + endif + call add(s:dataprops, l) + let lnum += 1 + endwhile +endfunc + +" Parse lines of CaseFolding.txt. Creates a list of lists in s:foldprops. +func! ParseFoldProps() + let s:foldprops = [] + let lnum = 1 + while lnum <= line('$') + let line = getline(lnum) + if line !~ '^#' && line !~ '^\s*$' + let l = split(line, '\s*;\s*', 1) + if len(l) != 4 + echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4' + return + endif + call add(s:foldprops, l) + endif + let lnum += 1 + endwhile +endfunc + +" Parse lines of EastAsianWidth.txt. Creates a list of lists in s:widthprops. +func! ParseWidthProps() + let s:widthprops = [] + let lnum = 1 + while lnum <= line('$') + let line = getline(lnum) + if line !~ '^#' && line !~ '^\s*$' + let l = split(line, '\s*;\s*', 1) + if len(l) != 2 + echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2' + return + endif + call add(s:widthprops, l) + endif + let lnum += 1 + endwhile +endfunc + +" Build the toLower or toUpper table in a new buffer. +" Uses s:dataprops. +func! BuildCaseTable(name, index) + let start = -1 + let end = -1 + let step = 0 + let add = -1 + let ranges = [] + for p in s:dataprops + if p[a:index] != '' + let n = ('0x' . p[0]) + 0 + let nl = ('0x' . p[a:index]) + 0 + if start >= 0 && add == nl - n && (step == 0 || n - end == step) + " continue with same range. + let step = n - end + let end = n + else + if start >= 0 + " produce previous range + call Range(ranges, start, end, step, add) + endif + let start = n + let end = n + let step = 0 + let add = nl - n + endif + endif + endfor + if start >= 0 + call Range(ranges, start, end, step, add) + endif + + " New buffer to put the result in. + new + exe "file to" . a:name + call setline(1, "static convertStruct to" . a:name . "[] =") + call setline(2, "{") + call append('$', ranges) + call setline('$', getline('$')[:-2]) " remove last comma + call setline(line('$') + 1, "};") + wincmd p +endfunc + +" Build the foldCase table in a new buffer. +" Uses s:foldprops. +func! BuildFoldTable() + let start = -1 + let end = -1 + let step = 0 + let add = -1 + let ranges = [] + for p in s:foldprops + if p[1] == 'C' || p[1] == 'S' + let n = ('0x' . p[0]) + 0 + let nl = ('0x' . p[2]) + 0 + if start >= 0 && add == nl - n && (step == 0 || n - end == step) + " continue with same range. + let step = n - end + let end = n + else + if start >= 0 + " produce previous range + call Range(ranges, start, end, step, add) + endif + let start = n + let end = n + let step = 0 + let add = nl - n + endif + endif + endfor + if start >= 0 + call Range(ranges, start, end, step, add) + endif + + " New buffer to put the result in. + new + file foldCase + call setline(1, "static convertStruct foldCase[] =") + call setline(2, "{") + call append('$', ranges) + call setline('$', getline('$')[:-2]) " remove last comma + call setline(line('$') + 1, "};") + wincmd p +endfunc + +func! Range(ranges, start, end, step, add) + let s = printf("\t{0x%x,0x%x,%d,%d},", a:start, a:end, a:step == 0 ? -1 : a:step, a:add) + call add(a:ranges, s) +endfunc + +" Build the combining table. +" Uses s:dataprops. +func! BuildCombiningTable() + let start = -1 + let end = -1 + let ranges = [] + for p in s:dataprops + if p[2] == 'Mn' || p[2] == 'Mc' || p[2] == 'Me' + let n = ('0x' . p[0]) + 0 + if start >= 0 && end + 1 == n + " continue with same range. + let end = n + else + if start >= 0 + " produce previous range + call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) + endif + let start = n + let end = n + endif + endif + endfor + if start >= 0 + call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) + endif + + " New buffer to put the result in. + new + file combining + call setline(1, " static struct interval combining[] =") + call setline(2, " {") + call append('$', ranges) + call setline('$', getline('$')[:-2]) " remove last comma + call setline(line('$') + 1, " };") + wincmd p +endfunc + +" Build the ambiguous table in a new buffer. +" Uses s:widthprops and s:dataprops. +func! BuildAmbiguousTable() + let start = -1 + let end = -1 + let ranges = [] + let dataidx = 0 + for p in s:widthprops + if p[1][0] == 'A' + let n = ('0x' . p[0]) + 0 + " Find this char in the data table. + while 1 + let dn = ('0x' . s:dataprops[dataidx][0]) + 0 + if dn >= n + break + endif + let dataidx += 1 + endwhile + if dn != n + echoerr "Cannot find character " . n . " in data table" + endif + " Only use the char when it's not a composing char. + let dp = s:dataprops[dataidx] + if dp[2] != 'Mn' && dp[2] != 'Mc' && dp[2] != 'Me' + if start >= 0 && end + 1 == n + " continue with same range. + let end = n + else + if start >= 0 + " produce previous range + call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) + endif + let start = n + if p[0] =~ '\.\.' + let end = ('0x' . substitute(p[0], '.*\.\.', '', '')) + 0 + else + let end = n + endif + endif + endif + endif + endfor + if start >= 0 + call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) + endif + + " New buffer to put the result in. + new + file ambiguous + call setline(1, " static struct interval ambiguous[] =") + call setline(2, " {") + call append('$', ranges) + call setline('$', getline('$')[:-2]) " remove last comma + call setline(line('$') + 1, " };") + wincmd p +endfunc + + + +" Edit the Unicode text file. Requires the netrw plugin. +edit http://unicode.org/Public/UNIDATA/UnicodeData.txt + +" Parse each line, create a list of lists. +call ParseDataToProps() + +" Build the toLower table. +call BuildCaseTable("Lower", 13) + +" Build the toUpper table. +call BuildCaseTable("Upper", 12) + +" Build the ranges of composing chars. +call BuildCombiningTable() + +" Edit the case folding text file. Requires the netrw plugin. +edit http://www.unicode.org/Public/UNIDATA/CaseFolding.txt + +" Parse each line, create a list of lists. +call ParseFoldProps() + +" Build the foldCase table. +call BuildFoldTable() + +" Edit the width text file. Requires the netrw plugin. +edit http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt + +" Parse each line, create a list of lists. +call ParseWidthProps() + +" Build the ambiguous table. +call BuildAmbiguousTable() |