diff options
Diffstat (limited to 'src/PYPinyinParser.cc')
-rw-r--r-- | src/PYPinyinParser.cc | 352 |
1 files changed, 0 insertions, 352 deletions
diff --git a/src/PYPinyinParser.cc b/src/PYPinyinParser.cc deleted file mode 100644 index 4598ff8..0000000 --- a/src/PYPinyinParser.cc +++ /dev/null @@ -1,352 +0,0 @@ -/* vim:set et ts=4 sts=4: - * - * ibus-pinyin - The Chinese PinYin engine for IBus - * - * Copyright (c) 2008-2010 Peng Huang <shawn.p.huang@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include <cstring> -#include <cstdlib> -#include "PYPinyinParser.h" - -namespace PY { - -#include "PYBopomofo.h" -#include "PYPinyinParserTable.h" - -static gboolean -check_flags (const Pinyin *pinyin, guint option) -{ - if (pinyin == NULL) - return FALSE; - - if (pinyin->flags != 0) { - guint flags; - flags = pinyin->flags & option; - if (flags == 0) - return FALSE; - if ((flags != pinyin->flags) && ((pinyin->flags & PINYIN_CORRECT_ALL) != 0)) - return FALSE; - } - return TRUE; -} - -static int -py_cmp (const void *p1, const void *p2) -{ - const gchar *str = (const gchar *) p1; - const Pinyin *py = (const Pinyin *) p2; - - return std::strcmp (str, py->text); -} - -static const Pinyin * -is_pinyin (const gchar *p, - const gchar *end, - gint len, - guint option) -{ - gchar buf[8]; - const Pinyin *result; - - if (G_UNLIKELY (len > 6)) - return NULL; - - if (G_UNLIKELY (len > end - p)) - return NULL; - - if (G_LIKELY (len > 0)) { - std::strncpy (buf, p, len); - buf[len] = 0; - result = (const Pinyin *) std::bsearch (buf, pinyin_table, G_N_ELEMENTS (pinyin_table), - sizeof (Pinyin), py_cmp); - if (check_flags (result, option)) - return result; - return NULL; - } - - /* len < 0 */ - len = MIN (6, end - p); - std::strncpy (buf, p, len); - - for (; len > 0; len --) { - buf[len] = 0; - result = (const Pinyin *) std::bsearch (buf, pinyin_table, G_N_ELEMENTS (pinyin_table), - sizeof (Pinyin), py_cmp); - if (G_UNLIKELY (check_flags (result, option))) { - return result; - } - } - - return NULL; -} - -static int -sp_cmp (const void *p1, - const void *p2) -{ - const Pinyin **pys = (const Pinyin **) p1; - const Pinyin **e = (const Pinyin **) p2; - - int retval = pys[0] - e[0]; - - if (retval != 0) - return retval; - return pys[1] - e[1]; -} - -static const Pinyin ** -need_resplit(const Pinyin *p1, - const Pinyin *p2) -{ - const Pinyin * pys[] = {p1, p2}; - - return (const Pinyin **) std::bsearch (pys, special_table, G_N_ELEMENTS (special_table), - sizeof (special_table[0]), sp_cmp); -} - -guint -PinyinParser::parse (const String &pinyin, - gint len, - guint option, - PinyinArray &result, - guint max) -{ - - const gchar *p; - const gchar *end; - const Pinyin *py; - const Pinyin *prev_py; - gchar prev_c; - - result.clear (); - - if (G_UNLIKELY (len < 0)) - len = pinyin.size (); - - p = pinyin; - end = p + len; - - prev_py = NULL; - - prev_c = 0; - for (; p < end && result.size () < max; ) { - if (G_UNLIKELY (*p == '\'')) { - prev_c = '\''; - p++; - continue; - } - switch (prev_c) { - case 'r': - case 'n': - case 'g': - case 'e': - switch (*p) { - case 'i': - case 'u': - case 'v': - case 'a': - case 'e': - case 'o': - case 'r': - { - const Pinyin **pp; - const Pinyin *new_py1; - const Pinyin *new_py2; - - py = is_pinyin (p, end, -1, option); - - if ((new_py1 = is_pinyin (prev_py->text, - prev_py->text + prev_py->len, - prev_py->len - 1, - option)) != NULL) { - new_py2 = is_pinyin (p -1, end, -1, option); - - if (((new_py2 != NULL) && (new_py2->len > 1 )) && - (py == NULL || new_py2->len > py->len + 1)) { - PinyinSegment & segment = result[result.size () - 1]; - segment.pinyin = new_py1; - segment.len = new_py1->len; - py = new_py2; - p --; - break; - } - } - - if ( py == NULL) - break; - - pp = need_resplit (prev_py, py); - if (pp != NULL) { - PinyinSegment & segment = result[result.size () - 1]; - segment.pinyin = pp[2]; - segment.len = pp[2]->len; - py = pp[3]; - p --; - break; - } - } - default: - py = is_pinyin (p, end, -1, option); - break; - } - break; - default: - py = is_pinyin (p, end, -1, option); - break; - } - - if (G_UNLIKELY (py == NULL)) - break; - - result.append (py, p - (const gchar *) pinyin, py->len); - p += py->len; - prev_c = py->text[py->len - 1]; - prev_py = py; - } - - if (G_UNLIKELY (p == (const gchar *)pinyin)) - return 0; -#if 0 - if (G_UNLIKELY (*(p - 1) == '\'')) - p --; -#endif - return p - (const gchar *)pinyin; -} - -static const gchar * const -id_map[] = { - "", "b", "c", "ch", - "d", "f", "g", "h", - "j", "k", "l", "m", - "n", "p", "q", "r", - "s", "sh", "t", "w", - "x", "y", "z", "zh", - "a", "ai", "an", "ang", "ao", - "e", "ei", "en", "eng", "er", - "i", "ia", "ian", "iang", "iao", - "ie", "in", "ing", "iong", "iu", - "o", "ong", "ou", - "u", "ua", "uai", "uan", "uang", - 0, /* it should be ue or ve */ - "ui", "un", "uo", "v" -}; - -const Pinyin * -PinyinParser::isPinyin (gint sheng, gint yun, guint option) -{ - const Pinyin *result; - gchar buf[16]; - - std::strcpy (buf, id_map[sheng]); - - if (yun == PINYIN_ID_UE) { - /* append ue or ve base on sheng */ - switch (sheng) { - case PINYIN_ID_J: - case PINYIN_ID_Q: - case PINYIN_ID_X: - case PINYIN_ID_Y: - std::strcat (buf, "ue"); - break; - default: - std::strcat (buf, "ve"); - break; - } - } - else { - std::strcat (buf, id_map[yun]); - } - - result = (const Pinyin *) bsearch (buf, pinyin_table, G_N_ELEMENTS (pinyin_table), - sizeof (Pinyin), py_cmp); - if (check_flags (result, option)) - return result; - return NULL; -} - -static int -bopomofo_cmp (const void *p1, const void *p2) -{ - const wchar_t *s1 = (wchar_t *) p1; - const Pinyin *s2 = *(const Pinyin **) p2; - - return std::wcscmp (s1, s2->bopomofo); -} - -gboolean -PinyinParser::isBopomofoToneChar (const wchar_t ch) -{ - return ch == bopomofo_char[BOPOMOFO_TONE_2] - || ch == bopomofo_char[BOPOMOFO_TONE_3] - || ch == bopomofo_char[BOPOMOFO_TONE_4] - || ch == bopomofo_char[BOPOMOFO_TONE_5]; -} - -guint -PinyinParser::parseBopomofo (const std::wstring &bopomofo, - gint len, - guint option, - PinyinArray &result, - guint max) -{ - std::wstring::const_iterator bpmf = bopomofo.begin(); - const std::wstring::const_iterator end = bpmf + len; - const Pinyin **bs_res = NULL; - wchar_t buf[MAX_BOPOMOFO_LEN + 1]; - guint i, j; - - result.clear (); - - if (G_UNLIKELY (len < 0)) - len = bopomofo.length (); - - for (; bpmf < end && result.size () < max;) { - for (i = MAX_BOPOMOFO_LEN; i > 0; i--){ - if (bpmf + i > end) - continue; - - for (j = 0; j < i; j++){ - wchar_t key = *(bpmf + j); - - if (j == i - 1 && isBopomofoToneChar (key)) { - break; /* ignore tone */ - } - - buf[j] = key; - } - - buf[j] = '\0'; - bs_res = (const Pinyin **) std::bsearch (buf, - bopomofo_table, - G_N_ELEMENTS (bopomofo_table), - sizeof (bopomofo_table[0]), - bopomofo_cmp); - if (bs_res != NULL && check_flags (*bs_res, option)) - break; - } - if (!(bs_res != NULL && check_flags (*bs_res, option))) - break; - - result.append(*bs_res, bpmf - bopomofo.begin (), i); - bpmf += i; - } - - return bpmf - bopomofo.begin (); -}; - -}; |