diff options
author | Kenichi Handa <handa@m17n.org> | 2002-05-30 00:21:40 +0000 |
---|---|---|
committer | Kenichi Handa <handa@m17n.org> | 2002-05-30 00:21:40 +0000 |
commit | 70db2f06002287e37f3c8187ba766890e8b44c78 (patch) | |
tree | fd2530fcb3b4854fcd33443ae760944b9e1d26da /etc/charsets/gb18030-2.awk | |
parent | b491fe8920cea5880b846ed24fff34f7146b9498 (diff) | |
download | emacs-70db2f06002287e37f3c8187ba766890e8b44c78.tar.gz |
New file.
Diffstat (limited to 'etc/charsets/gb18030-2.awk')
-rw-r--r-- | etc/charsets/gb18030-2.awk | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/etc/charsets/gb18030-2.awk b/etc/charsets/gb18030-2.awk new file mode 100644 index 00000000000..5b461fdfaa0 --- /dev/null +++ b/etc/charsets/gb18030-2.awk @@ -0,0 +1,85 @@ +BEGIN { + tohex["A"] = 10; + tohex["B"] = 11; + tohex["C"] = 12; + tohex["D"] = 13; + tohex["E"] = 14; + tohex["F"] = 15; + tohex["a"] = 10; + tohex["b"] = 11; + tohex["c"] = 12; + tohex["d"] = 13; + tohex["e"] = 14; + tohex["f"] = 15; +} + +function decode_hex(str) { + n = 0; + len = length(str); + for (i = 1; i <= len; i++) + { + c = substr (str, i, 1); + if (c >= "0" && c <= "9") + n = n * 16 + (c - "0"); + else + n = n * 16 + tohex[c]; + } + return n; +} + +function gb_to_index(gb) { + b0 = int(gb / 256); + b1 = gb % 256; + idx = (((b0 - 129)) * 190 + b1 - 64); + if (b1 >= 128) + idx--; + return idx +} + +function index_to_gb(idx) { + b0 = int(idx / 190) + 129; + b1 = (idx % 190) + 64; + if (b1 >= 127) + b1++; + return (b0 * 256 + b1); +} +function decode_gb(str) { + b0 = decode_hex(substr(str, 3, 2)); + b1 = decode_hex(substr(str, 7, 2)); + return (b0 * 256 + b1) +} + +/^<U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]>/ { + if ($2 ~ /^\\x[0-9A-F][0-9A-F]\\x[0-9A-F][0-9A-F]$/) + { + unicode = decode_hex(substr($1, 3, 4)); + gb = decode_gb($2); + idx = gb_to_index(gb); + gb_table[idx] = unicode; + } +} + +END { + last_idx = gb_to_index(decode_hex("FEFE")); + from_idx = 0; + from_unicode = gb_table[0]; + for (i = 1; i <= last_idx; i++) + { + gb = index_to_gb(i); + unicode = gb_table[i]; + if (i - from_idx != unicode - from_unicode) + { + if (i - 1 == from_idx) + printf ("0x%04X 0x%04X\n", + index_to_gb(from_idx), from_unicode); + else + printf ("0x%04X-0x%04X 0x%04X\n", + index_to_gb(from_idx), index_to_gb(i - 1), from_unicode); + from_idx = i; + from_unicode=unicode; + } + } + if (i - from_idx != unicode - from_unicode) + printf ("0x%04X-0x%04X 0x%04X\n", + index_to_gb(from_idx), index_to_gb(i - 1), from_unicode); +} |