summaryrefslogtreecommitdiff
path: root/data
diff options
context:
space:
mode:
authorfujiwarat <takao.fujiwara1@gmail.com>2013-08-27 23:17:47 +0900
committerfujiwarat <takao.fujiwara1@gmail.com>2013-08-29 12:57:40 +0900
commit17d8871211a531c755cd4397a75f4892222412ff (patch)
tree4a845f7c2b1084f0a0e8d3db90e41c5258c98135 /data
parente1a94ef87577a38b3402a0ebaa1f0e5088f99c2a (diff)
downloadibus-anthy-17d8871211a531c755cd4397a75f4892222412ff.tar.gz
Imported anthy zipcode.t into ibus-anthy.
Diffstat (limited to 'data')
-rw-r--r--data/Makefile.am10
-rwxr-xr-xdata/zipcode-textdic.py54
2 files changed, 64 insertions, 0 deletions
diff --git a/data/Makefile.am b/data/Makefile.am
index 3aa240a..f187c93 100644
--- a/data/Makefile.am
+++ b/data/Makefile.am
@@ -27,8 +27,18 @@ dicts_DATA = \
dictsdir = $(pkgdatadir)/dicts
+if ENABLE_ZIPCODE
+dicts_DATA += zipcode.t
+
+zipcode.t: $(ANTHY_ZIPCODE_FILE)
+ $(PYTHON) zipcode-textdic.py $<
+
+CLEANFILES = zipcode.t
+endif
+
EXTRA_DIST = \
era.t \
oldchar.t \
symbol.t \
+ zipcode-textdic.py \
$(NULL)
diff --git a/data/zipcode-textdic.py b/data/zipcode-textdic.py
new file mode 100755
index 0000000..2864f85
--- /dev/null
+++ b/data/zipcode-textdic.py
@@ -0,0 +1,54 @@
+#!/usr/bin/python
+
+# Finally decided to import anthy zipcode.t with UTF-8 into ibus-anthy
+# because if digits without hyphen is grepped by engine, it could cause
+# the timeout issue. If digits without hyphen are sent to anthy,
+# digits with hyphen also need to be sent to anthy so the lookup could
+# include too many and unnecessary candidates.
+# Also wish to install the filename of 'zipcode.t' to simplify enigne.
+
+import sys
+
+if len(sys.argv) < 2:
+ print >> sys.stderr, 'usage: %s /usr/share/anthy/zipcode.t' % sys.argv[0]
+ exit(-1)
+
+anthy_zipfile = sys.argv[1]
+
+try:
+ contents = unicode(open(anthy_zipfile).read(), 'euc_jp').encode('utf-8')
+except UnicodeDecodeError, e:
+ print >> sys.stderr, 'Your file is not eucJP? %s' % anthy_zipfile
+ contents = open(anthy_zipfile).read()
+
+output_zipfile = open('zipcode.t', 'w')
+output_zipfile.write('# copied %s with UTF-8.\n#\n' % anthy_zipfile)
+
+for line in contents.split('\n'):
+ if len(line) == 0 or line[0] == '#':
+ output_zipfile.write('%s\n' % line)
+ continue
+
+ words = line.split()
+ if len(words) < 3:
+ continue
+
+ if len(words[0]) < 1 or ord(unicode(words[0], 'utf-8')[0]) > 0xff:
+ mbcs_addr = words[0]
+ else:
+ uni_addr = ''
+ i = 0
+ for word in words[0]:
+ # Convert ASCII number char to wide number char.
+ uni_addr += unichr(0xfee0 + ord(word))
+ if i == 2:
+ # Insert wide hyphen
+ uni_addr += unichr(0x30fc)
+ i += 1
+ mbcs_addr = uni_addr.encode('utf-8')
+
+ output_zipfile.write('%s %s %s\n' % \
+ (mbcs_addr, '#T35*500', words[2]))
+
+output_zipfile.flush()
+output_zipfile.close()