summaryrefslogtreecommitdiff
path: root/scripts/pyutil.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/pyutil.py')
-rw-r--r--scripts/pyutil.py148
1 files changed, 0 insertions, 148 deletions
diff --git a/scripts/pyutil.py b/scripts/pyutil.py
deleted file mode 100644
index 4ed0e4b..0000000
--- a/scripts/pyutil.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim:set et sts=4 sw=4:
-#
-# ibus-pinyin - The PinYin engine for IBus
-#
-# Copyright (c) 2007-2008 Peng Huang <shawn.p.huang@gmail.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
-
-from pydict import *
-
-class PinYinWord:
- correct_dict = {"nve" : "nue", "lve" : "lue"}
- def __init__ (self, pinyin):
- if pinyin in self.correct_dict:
- pinyin = self.correct_dict [pinyin]
-
- self._pinyin = pinyin
- self._is_completed = self.is_valid_pinyin ()
- if self._is_completed:
- sheng_mu, yun_mu = self.split ()
- self._pinyin_id = PINYIN_DICT [self._pinyin]
- self._sheng_mu_id = SHENGMU_DICT [sheng_mu]
- else:
- self._sheng_mu_id = SHENGMU_DICT [self._pinyin]
-
- def is_valid_pinyin (self):
- return PINYIN_DICT.has_key (self._pinyin)
-
- def get_sheng_mu_id (self):
- return self._sheng_mu_id
-
- def get_shengmu (self):
- return ID_SHENGMU_DICT[self._sheng_mu_id]
-
- def get_pinyin_id (self):
- return self._pinyin_id
-
- def get_pinyin (self):
- return self._pinyin
-
- def get_pattern (self, mohu = False):
- if mohu == False:
- if self.is_valid_pinyin ():
- return self._pinyin
- else:
- return self._pinyin + "%"
- else:
- if not self.is_valid_pinyin ():
- if self._pinyin in ("zh", "ch", "sh"):
- return self._pinyin[0] + "%"
- return self._pinyin + "%"
- else:
- shengmu = self.get_shengmu ()
- yunmu = self._pinyin [len (shengmu):]
- if shengmu in ("zh", "ch", "sh", "z", "c", "s"):
- shengmu = shengmu[0] + "%"
- if yunmu in ("ing", "in", "en", "eng", "an", "ang"):
- yunmu = yunmu[0:2] + "%"
- return shengmu + yunmu
-
- def split (self):
- if not self.is_valid_pinyin ():
- raise Exception ("Pinyin '%s' is not a valid pinyin!" % py)
- if self._pinyin[:2] in SHENGMU_DICT.keys ():
- return self._pinyin[:2], self._pinyin[2:]
- elif self._pinyin[:1] in SHENGMU_DICT.keys ():
- return self._pinyin[:1], self._pinyin[1:]
- else:
- return "", self._pinyin[:]
-
- def __str__ (self):
- return self._pinyin
-
-class PinYinString:
- def __init__ (self, string):
- pass
-
-def load_pinyin_table (_file):
-
- def pinyin_table_parser (f):
- for l in f:
- a = unicode (l, "utf-8").strip ().split ()
- hanzi, pinyin, freq = a
- yield (hanzi, pinyin, int (freq))
- # db.add_phrases (pinyin_table_parser (bzf))
-
- hanzi_dic = {}
- for hanzi, pinyin, freq in pinyin_table_parser (_file):
- if not hanzi_dic.has_key (hanzi):
- hanzi_dic[hanzi] = {}
-
- if hanzi_dic[hanzi].has_key (pinyin):
- hanzi_dic[hanzi][pinyin] += freq
- else:
- hanzi_dic[hanzi][pinyin] = freq
-
- return hanzi_dic
-
-def load_phrase_pinyin_freq (_file):
- def phrase_pinyin_parser (f):
- for l in f:
- phrase, pinyin, freq = unicode (l, "utf-8").strip ().split ()
- pinyin = pinyin.replace (u"u:", u"v")
- yield (phrase, pinyin, int (freq))
- phrases_dic = {}
- for phrase, pinyin, freq in phrase_pinyin_parser (_file):
- if not phrases_dic.has_key (phrase):
- phrases_dic[phrase] = []
- phrases_dic[phrase].append ((phrase, pinyin, freq))
-
- return phrases_dic
-
-def load_phrase_pinyin (_file):
- def phrase_pinyin_parser (f):
- for l in f:
- phrase, pinyin = unicode (l, "utf-8").strip ().split ()
- pinyin = pinyin.replace (u"u:", u"v")
- yield (phrase, pinyin, 0)
- phrases_dic = {}
- for phrase, pinyin, freq in phrase_pinyin_parser (_file):
- if not phrases_dic.has_key (phrase):
- phrases_dic[phrase] = []
- phrases_dic[phrase].append ((phrase, pinyin, freq))
-
- return phrases_dic
-
-def load_sogou_phrases (_file):
- import re
- dic = {}
- for l in _file:
- w = unicode (l, "utf8")
- w = re.split (ur"\t+", w)
- dic [w[0]] = (w[0], int (w[1]))
- return dic
-