summaryrefslogtreecommitdiff
path: root/src-diclib/xchar.c
diff options
context:
space:
mode:
Diffstat (limited to 'src-diclib/xchar.c')
-rw-r--r--src-diclib/xchar.c433
1 files changed, 433 insertions, 0 deletions
diff --git a/src-diclib/xchar.c b/src-diclib/xchar.c
new file mode 100644
index 0000000..6c67e42
--- /dev/null
+++ b/src-diclib/xchar.c
@@ -0,0 +1,433 @@
+/*
+ * 文字(xchar)のタイプなどを扱う
+ *
+ * Copyright (C) 2001-2006 TABATA Yusuke
+ */
+#include <string.h>
+#include "config.h"
+
+#include <anthy/xstr.h>
+#include <anthy/xchar.h>
+
+#include "diclib_inner.h"
+
+#define PAGE_SIZE 128
+#define NR_PAGES 512
+#include "e2u.h"
+#include "u2e.h"
+
+/* this use UCS4 */
+static struct xchar_ent {
+ const xchar xc;
+ const int type;
+ struct xchar_ent *next;/* hash chain */
+} xchar_tab[] =
+{
+ {0xFF40, XCT_OPEN , 0}, /* `, 追加 */
+ {0x2032, XCT_CLOSE, 0}, /* ′, 追加 */
+ {0x2018, XCT_OPEN , 0}, /* ‘, 追加 */
+ {0x2019, XCT_CLOSE, 0}, /* ’, 追加 */
+ {0x201C, XCT_OPEN , 0}, /* “, 追加 */
+ {0x201D, XCT_CLOSE, 0}, /* ”, 文字コードが間違っていると思われるので修正 */
+ {0xff08, XCT_OPEN, 0}, /* ( */
+ {0xff09, XCT_CLOSE, 0}, /* ) */
+ {0x3014, XCT_OPEN, 0}, /* 〔 */
+ {0x3015, XCT_CLOSE, 0}, /* 〕 */
+ {0xff3b, XCT_OPEN, 0}, /* [ */
+ {0xff3d, XCT_CLOSE, 0}, /* ] */
+ {0xff5b, XCT_OPEN, 0}, /* { */
+ {0xff5d, XCT_CLOSE, 0}, /* } */
+ {0x3008, XCT_OPEN, 0}, /* < */
+ {0x3009, XCT_CLOSE, 0}, /* > */
+ {0x300a, XCT_OPEN, 0}, /* 《 */
+ {0x300b, XCT_CLOSE, 0}, /* 》 */
+ {0x300c, XCT_OPEN, 0}, /* 「 */
+ {0x300d, XCT_CLOSE, 0}, /* 」 */
+ {0x300e, XCT_OPEN, 0}, /* 『 */
+ {0x300f, XCT_CLOSE, 0}, /* 』 */
+ {0x3010, XCT_OPEN, 0}, /* 【 */
+ {0x3011, XCT_CLOSE, 0}, /* 】 */
+ {0x3001, XCT_PUNCTUATION, 0}, /* 、 */
+ {0x3002, XCT_PUNCTUATION, 0}, /* 。 */
+ {0xff0c, XCT_PUNCTUATION, 0}, /* , */
+ {0xff0e, XCT_PUNCTUATION, 0}, /* . */
+ {0xff1f, XCT_PUNCTUATION, 0}, /* ? */
+ {0xff01, XCT_PUNCTUATION, 0}, /* ! */
+
+ {28, XCT_OPEN, 0}, /* ( */
+ {133, XCT_OPEN, 0}, /* [ */
+ {29, XCT_CLOSE, 0}, /* ) */
+ {135, XCT_CLOSE, 0}, /* ] */
+ {HK_TO, XCT_DEP, 0},/* と */
+ {HK_HA, XCT_DEP, 0},/* は */
+ {HK_NO, XCT_DEP, 0},/* の */
+ {HK_NI, XCT_DEP, 0},/* に */
+ {HK_GA, XCT_DEP, 0},/* が */
+ {HK_WO, XCT_DEP, 0},/* を */
+ {WIDE_0, XCT_WIDENUM, 0},
+ {WIDE_1, XCT_WIDENUM, 0},
+ {WIDE_2, XCT_WIDENUM, 0},
+ {WIDE_3, XCT_WIDENUM, 0},
+ {WIDE_4, XCT_WIDENUM, 0},
+ {WIDE_5, XCT_WIDENUM, 0},
+ {WIDE_6, XCT_WIDENUM, 0},
+ {WIDE_7, XCT_WIDENUM, 0},
+ {WIDE_8, XCT_WIDENUM, 0},
+ {WIDE_9, XCT_WIDENUM, 0},
+ {HK_DDOT, XCT_PART, 0},
+ {HK_XA, XCT_PART, 0},
+ {HK_XI, XCT_PART, 0},
+ {HK_XU, XCT_PART, 0},
+ {HK_XE, XCT_PART, 0},
+ {HK_XO, XCT_PART, 0},
+ {HK_XYA, XCT_PART, 0},
+ {HK_XYU, XCT_PART, 0},
+ {HK_XYO, XCT_PART, 0},
+ {HK_TT, XCT_PART, 0},
+ {0, 0, 0},
+};
+
+#define DDOT 0x8ede
+#define CIRCLE 0x8edf
+
+static const struct half_kana_table half_kana_tab[] = {
+ {HK_A,0x8eb1,0},
+ {HK_I,0x8eb2,0},
+ {HK_U,0x8eb3,0},
+ {HK_E,0x8eb4,0},
+ {HK_O,0x8eb5,0},
+ {HK_KA,0x8eb6,0},
+ {HK_KI,0x8eb7,0},
+ {HK_KU,0x8eb8,0},
+ {HK_KE,0x8eb9,0},
+ {HK_KO,0x8eba,0},
+ {HK_SA,0x8ebb,0},
+ {HK_SI,0x8ebc,0},
+ {HK_SU,0x8ebd,0},
+ {HK_SE,0x8ebe,0},
+ {HK_SO,0x8ebf,0},
+ {HK_TA,0x8ec0,0},
+ {HK_TI,0x8ec1,0},
+ {HK_TU,0x8ec2,0},
+ {HK_TE,0x8ec3,0},
+ {HK_TO,0x8ec4,0},
+ {HK_NA,0x8ec5,0},
+ {HK_NI,0x8ec6,0},
+ {HK_NU,0x8ec7,0},
+ {HK_NE,0x8ec8,0},
+ {HK_NO,0x8ec9,0},
+ {HK_HA,0x8eca,0},
+ {HK_HI,0x8ecb,0},
+ {HK_HU,0x8ecc,0},
+ {HK_HE,0x8ecd,0},
+ {HK_HO,0x8ece,0},
+ {HK_MA,0x8ecf,0},
+ {HK_MI,0x8ed0,0},
+ {HK_MU,0x8ed1,0},
+ {HK_ME,0x8ed2,0},
+ {HK_MO,0x8ed3,0},
+ {HK_YA,0x8ed4,0},
+ {HK_YU,0x8ed5,0},
+ {HK_YO,0x8ed6,0},
+ {HK_RA,0x8ed7,0},
+ {HK_RI,0x8ed8,0},
+ {HK_RU,0x8ed9,0},
+ {HK_RE,0x8eda,0},
+ {HK_RO,0x8edb,0},
+ {HK_WA,0x8edc,0},
+ {HK_WI,0,0},
+ {HK_WE,0,0},
+ {HK_WO,0x8ea6,0},
+ {HK_N,0x8edd,0},
+ {HK_TT,0x8eaf,0},
+ {HK_XA,0x8ea7,0},
+ {HK_XI,0x8ea8,0},
+ {HK_XU,0x8ea9,0},
+ {HK_XE,0x8eaa,0},
+ {HK_XO,0x8eab,0},
+ {HK_GA,0x8eb6,DDOT},
+ {HK_GI,0x8eb7,DDOT},
+ {HK_GU,0x8eb8,DDOT},
+ {HK_GE,0x8eb9,DDOT},
+ {HK_GO,0x8eba,DDOT},
+ {HK_ZA,0x8ebb,DDOT},
+ {HK_ZI,0x8ebc,DDOT},
+ {HK_ZU,0x8ebd,DDOT},
+ {HK_ZE,0x8ebe,DDOT},
+ {HK_ZO,0x8ebf,DDOT},
+ {HK_DA,0x8ec0,DDOT},
+ {HK_DI,0x8ec1,DDOT},
+ {HK_DU,0x8ec2,DDOT},
+ {HK_DE,0x8ec3,DDOT},
+ {HK_DO,0x8ec4,DDOT},
+ {HK_BA,0x8eca,DDOT},
+ {HK_BI,0x8ecb,DDOT},
+ {HK_BU,0x8ecc,DDOT},
+ {HK_BE,0x8ecd,DDOT},
+ {HK_BO,0x8ece,DDOT},
+ {HK_PA,0x8eca,CIRCLE},
+ {HK_PI,0x8ecb,CIRCLE},
+ {HK_PU,0x8ecc,CIRCLE},
+ {HK_PE,0x8ecd,CIRCLE},
+ {HK_PO,0x8ece,CIRCLE},
+ {HK_XYA,0x8eac,0},
+ {HK_XYU,0x8ead,0},
+ {HK_XYO,0x8eae,0},
+ {HK_XWA,0,0},
+ {HK_DDOT,DDOT,0},
+ {HK_BAR,0x8eb0,0},
+ {0,0,0}
+};
+
+static const struct half_wide_ent {
+ const xchar half;
+ const xchar wide;
+} half_wide_tab[] = {
+ {'!', 0xff01},
+ {'\"', 0x201d},
+ {'#', 0xff03},
+ {'$', 0xff04},
+ {'%', 0xff05},
+ {'&', 0xff06},
+ {'\'', 0x2019},
+ {'(', 0xff08},
+ {')', 0xff09},
+ {'*', 0xff0a},
+ {'+', 0xff0b},
+ {',', 0xff0c},
+ {'-', 0xff0d},
+ {'.', 0xff0e},
+ {'/', 0xff0f},
+ {':', 0xff1a},
+ {';', 0xff1b},
+ {'<', 0xff1c},
+ {'=', 0xff1d},
+ {'>', 0xff1e},
+ {'?', 0xff1f},
+ {'@', 0xff20},
+ {'[', 0xff3b},
+ {'\\', 0xff3c},
+ {']', 0xff3d},
+ {'^', 0xff3e},
+ {'_', 0xff3f},
+ {'`', 0xff40},
+ {'{', 0xff5b},
+ {'|', 0xff5c},
+ {'}', 0xff5d},
+ {'~', 0xff5e},
+ {0, 0}
+};
+
+xchar
+anthy_lookup_half_wide(xchar xc)
+{
+ const struct half_wide_ent *hw;
+ for (hw = half_wide_tab; hw->half; hw ++) {
+ if (hw->half == xc) {
+ return hw->wide;
+ }
+ if (hw->wide == xc) {
+ return hw->half;
+ }
+ }
+ return 0;
+}
+
+const struct half_kana_table *
+anthy_find_half_kana(xchar xc)
+{
+ const struct half_kana_table *tab;
+ for (tab = half_kana_tab; tab->src; tab ++) {
+ if (tab->src == xc && tab->dst) {
+ return tab;
+ }
+ }
+ return NULL;
+}
+
+static int
+find_xchar_type(xchar xc)
+{
+ struct xchar_ent *xe = xchar_tab;
+
+ for (; xe->xc; xe++) {
+ if (xe->xc == xc) {
+ return xe->type;
+ }
+ }
+
+ return XCT_NONE;
+}
+
+static int
+is_hira(xchar xc)
+{
+ if (xc == HK_DDOT) {
+ return 1;
+ }
+ if (xc == HK_BAR) {
+ return 1;
+ }
+ xc = anthy_ucs_to_euc(xc);
+ if ((xc & 0xff00) == 0xa400) {
+ return 1;
+ }
+ return 0;
+}
+
+static int
+is_kata(xchar xc)
+{
+ if (xc == HK_BAR) {
+ return 1;
+ }
+ xc = anthy_ucs_to_euc(xc);
+ if ((xc & 0xff00) == 0xa500) {
+ return 1;
+ }
+ return 0;
+}
+
+static int
+is_symbol(xchar xc)
+{
+ if (xc == UCS_GETA) {
+ return 1;
+ }
+ xc = anthy_ucs_to_euc(xc);
+ if (xc == EUC_GETA) {
+ return 0;
+ }
+ if ((xc & 0xff00) == 0xa100) {
+ return 1;
+ }
+ if ((xc & 0xff00) == 0xa200) {
+ return 1;
+ }
+ return 0;
+}
+
+static int
+is_kanji(xchar xc)
+{
+ if (xc > 0x4e00 && xc < 0xa000) {
+ return 1;
+ }
+ return 0;
+}
+
+static int
+search(const int *tab[], int v, int geta)
+{
+ int page = v / PAGE_SIZE;
+ int off = v % PAGE_SIZE;
+ const int *t;
+ if (page >= NR_PAGES) {
+ return geta;
+ }
+ t = tab[page];
+ if (!t) {
+ return geta;
+ }
+ if (!t[off] && v) {
+ return geta;
+ }
+ return t[off];
+}
+
+int
+anthy_euc_to_ucs(int ec)
+{
+ return search(e2u_index, ec, UCS_GETA);
+}
+
+int
+anthy_ucs_to_euc(int uc)
+{
+ int r = search(u2e_index, uc, EUC_GETA);
+ if (r > 65536) {
+ return EUC_GETA;
+ }
+ return r;
+}
+
+int
+anthy_get_xchar_type(const xchar xc)
+{
+ int t = find_xchar_type(xc);
+ if (xc > 47 && xc < 58) {
+ t |= XCT_NUM;
+ }
+ if (xc < 128) {
+ t |= XCT_ASCII;
+ }
+ if (is_hira(xc)) {
+ t |= XCT_HIRA;
+ }
+ if (is_kata(xc)) {
+ t |= XCT_KATA;
+ }
+ if (is_symbol(xc)) {
+ if (!(t & XCT_OPEN) && !(t & XCT_CLOSE)) {
+ t |= XCT_SYMBOL;
+ }
+ }
+ if (is_kanji(xc)) {
+ t |= XCT_KANJI;
+ }
+ return t;
+}
+
+int
+anthy_get_xstr_type(const xstr *xs)
+{
+ int i, t = XCT_ALL;
+ for (i = 0; i < xs->len; i++) {
+ t &= anthy_get_xchar_type(xs->str[i]);
+ }
+ return t;
+}
+
+int
+anthy_xchar_to_num(xchar xc)
+{
+ switch (xc) {
+ case WIDE_0:return 0;
+ case WIDE_1:return 1;
+ case WIDE_2:return 2;
+ case WIDE_3:return 3;
+ case WIDE_4:return 4;
+ case WIDE_5:return 5;
+ case WIDE_6:return 6;
+ case WIDE_7:return 7;
+ case WIDE_8:return 8;
+ case WIDE_9:return 9;
+ }
+ if (xc >= '0' && xc <= '9') {
+ return xc - (int)'0';
+ }
+ return -1;
+}
+
+xchar
+anthy_xchar_wide_num_to_num(xchar c)
+{
+ switch (c) {
+ case WIDE_0:return '0';
+ case WIDE_1:return '1';
+ case WIDE_2:return '2';
+ case WIDE_3:return '3';
+ case WIDE_4:return '4';
+ case WIDE_5:return '5';
+ case WIDE_6:return '6';
+ case WIDE_7:return '7';
+ case WIDE_8:return '8';
+ case WIDE_9:return '9';
+ default:return c;
+ }
+}
+
+void
+anthy_init_xchar_tab(void)
+{
+}