summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfujiwarat <takao.fujiwara1@gmail.com>2013-08-27 23:17:47 +0900
committerfujiwarat <takao.fujiwara1@gmail.com>2013-08-29 12:57:40 +0900
commit17d8871211a531c755cd4397a75f4892222412ff (patch)
tree4a845f7c2b1084f0a0e8d3db90e41c5258c98135
parente1a94ef87577a38b3402a0ebaa1f0e5088f99c2a (diff)
downloadibus-anthy-17d8871211a531c755cd4397a75f4892222412ff.tar.gz
Imported anthy zipcode.t into ibus-anthy.
-rw-r--r--configure.ac1
-rw-r--r--data/Makefile.am10
-rwxr-xr-xdata/zipcode-textdic.py54
-rw-r--r--engine/engine.py38
-rw-r--r--setup/anthyprefs.py.in7
5 files changed, 68 insertions, 42 deletions
diff --git a/configure.ac b/configure.ac
index 60b86a5..ff5fbef 100644
--- a/configure.ac
+++ b/configure.ac
@@ -249,6 +249,7 @@ AC_ARG_WITH(anthy-zipcode,
[ANTHY_ZIPCODE_FILE="$withval"],
[ANTHY_ZIPCODE_FILE="$datadir/anthy/zipcode.t"])
AC_SUBST(ANTHY_ZIPCODE_FILE)
+AM_CONDITIONAL([ENABLE_ZIPCODE], [test -f "$ANTHY_ZIPCODE_FILE" ])
dnl - check file path of kasumi bin
AC_ARG_WITH(kasumi-exec,
diff --git a/data/Makefile.am b/data/Makefile.am
index 3aa240a..f187c93 100644
--- a/data/Makefile.am
+++ b/data/Makefile.am
@@ -27,8 +27,18 @@ dicts_DATA = \
dictsdir = $(pkgdatadir)/dicts
+if ENABLE_ZIPCODE
+dicts_DATA += zipcode.t
+
+zipcode.t: $(ANTHY_ZIPCODE_FILE)
+ $(PYTHON) zipcode-textdic.py $<
+
+CLEANFILES = zipcode.t
+endif
+
EXTRA_DIST = \
era.t \
oldchar.t \
symbol.t \
+ zipcode-textdic.py \
$(NULL)
diff --git a/data/zipcode-textdic.py b/data/zipcode-textdic.py
new file mode 100755
index 0000000..2864f85
--- /dev/null
+++ b/data/zipcode-textdic.py
@@ -0,0 +1,54 @@
+#!/usr/bin/python
+
+# Finally decided to import anthy zipcode.t with UTF-8 into ibus-anthy
+# because if digits without hyphen is grepped by engine, it could cause
+# the timeout issue. If digits without hyphen are sent to anthy,
+# digits with hyphen also need to be sent to anthy so the lookup could
+# include too many and unnecessary candidates.
+# Also wish to install the filename of 'zipcode.t' to simplify enigne.
+
+import sys
+
+if len(sys.argv) < 2:
+ print >> sys.stderr, 'usage: %s /usr/share/anthy/zipcode.t' % sys.argv[0]
+ exit(-1)
+
+anthy_zipfile = sys.argv[1]
+
+try:
+ contents = unicode(open(anthy_zipfile).read(), 'euc_jp').encode('utf-8')
+except UnicodeDecodeError, e:
+ print >> sys.stderr, 'Your file is not eucJP? %s' % anthy_zipfile
+ contents = open(anthy_zipfile).read()
+
+output_zipfile = open('zipcode.t', 'w')
+output_zipfile.write('# copied %s with UTF-8.\n#\n' % anthy_zipfile)
+
+for line in contents.split('\n'):
+ if len(line) == 0 or line[0] == '#':
+ output_zipfile.write('%s\n' % line)
+ continue
+
+ words = line.split()
+ if len(words) < 3:
+ continue
+
+ if len(words[0]) < 1 or ord(unicode(words[0], 'utf-8')[0]) > 0xff:
+ mbcs_addr = words[0]
+ else:
+ uni_addr = ''
+ i = 0
+ for word in words[0]:
+ # Convert ASCII number char to wide number char.
+ uni_addr += unichr(0xfee0 + ord(word))
+ if i == 2:
+ # Insert wide hyphen
+ uni_addr += unichr(0x30fc)
+ i += 1
+ mbcs_addr = uni_addr.encode('utf-8')
+
+ output_zipfile.write('%s %s %s\n' % \
+ (mbcs_addr, '#T35*500', words[2]))
+
+output_zipfile.flush()
+output_zipfile.close()
diff --git a/engine/engine.py b/engine/engine.py
index a31c58b..ef160d4 100644
--- a/engine/engine.py
+++ b/engine/engine.py
@@ -1062,43 +1062,6 @@ class Engine(IBus.EngineSimple):
candidate = candidate.replace(key, value)
self.__lookup_table.append_candidate(IBus.Text.new_from_string(candidate))
- def __fill_anthy_zipcode_strip(self, dict_file, id):
- import re
- text = self.__preedit_ja_string.get_latin()[0]
- if text.find('-') < 0:
- return
- text = text.replace('-', '')
- section = 'dict/file/' + id
- if 'encoding' not in self.__prefs.keys(section):
- section = 'dict/file/default'
- encoding = self.__prefs.get_value(section, 'encoding')
- contents = unicode(open(dict_file).read(), encoding)
- expression = re.compile('^' + text + '[ \t]')
-
- found = False
- dict_dest = None
- for line in contents.split('\n'):
- matched = expression.search(line)
- if matched:
- found = True
- dict_dest = UN(matched.string).split(' ')[2]
- break
- if found:
- self.__lookup_table.append_candidate(IBus.Text.new_from_string(dict_dest))
-
- def __fill_lookup_table_dict_mode(self):
- if Engine.__dict_mode <= 0:
- return
- single_files = self.__get_single_dict_files()
- file = single_files[Engine.__dict_mode - 1]
- if file == None:
- return
- id = self.__get_dict_id_from_file(file)
- if id == None:
- return
- if id == 'zipcode':
- self.__fill_anthy_zipcode_strip(file, id)
-
def __fill_lookup_table(self):
if self.__convert_mode == CONV_MODE_PREDICTION:
nr_predictions = self.__context.get_nr_predictions()
@@ -1122,7 +1085,6 @@ class Engine(IBus.EngineSimple):
candidate = UN(buf)
self.__lookup_table.append_candidate(IBus.Text.new_from_string(candidate))
self.__candidate_cb(candidate)
- self.__fill_lookup_table_dict_mode()
def __invalidate(self):
diff --git a/setup/anthyprefs.py.in b/setup/anthyprefs.py.in
index f2a3b64..a685da1 100644
--- a/setup/anthyprefs.py.in
+++ b/setup/anthyprefs.py.in
@@ -1070,12 +1070,12 @@ _config = {
},
'dict': {
- 'zipcode': ['@ANTHY_ZIPCODE_FILE@'],
+ 'zipcode': ['@pkgdatadir@/dicts/zipcode.t'],
'symbol': ['@pkgdatadir@/dicts/symbol.t'],
'oldchar': ['@pkgdatadir@/dicts/oldchar.t'],
'era': ['@pkgdatadir@/dicts/era.t'],
'files': [
- '@ANTHY_ZIPCODE_FILE@',
+ '@pkgdatadir@/dicts/zipcode.t',
'@pkgdatadir@/dicts/symbol.t',
'@pkgdatadir@/dicts/oldchar.t',
'@pkgdatadir@/dicts/era.t',
@@ -1112,9 +1112,8 @@ _config = {
'short_label': '〒',
'long_label': N_("Zip Code Conversion"),
'preview_lines': 30,
- 'reverse': True,
+ 'reverse': False,
'is_system': True,
- 'encoding': 'euc_jp',
},
'dict/file/symbol': {