summaryrefslogtreecommitdiff
path: root/anthy/wtype.h
diff options
context:
space:
mode:
Diffstat (limited to 'anthy/wtype.h')
-rw-r--r--anthy/wtype.h232
1 files changed, 232 insertions, 0 deletions
diff --git a/anthy/wtype.h b/anthy/wtype.h
new file mode 100644
index 0000000..fe7eb5c
--- /dev/null
+++ b/anthy/wtype.h
@@ -0,0 +1,232 @@
+/*
+ * 品詞の型 wtype_t を扱う
+ *
+ * 単語の品詞を扱う
+ * 単語は次の要素を持つ。
+ * *品詞
+ * *品詞サブタイプ
+ * *品詞サブサブタイプ
+ * *活用クラス(CC)
+ * *活用形
+ * *自立語かどうかのフラグ
+ * wtype_tは上記の情報をすべて含む。
+ *
+ * 各要素の詳細については doc/POSを参照すべし
+ */
+#ifndef _wtype_h_included_
+#define _wtype_h_included_
+
+/*品詞 Part Of Speech */
+#define POS_NONE 0
+#define POS_NOUN 1
+#define POS_PRT 2
+#define POS_XV 3
+#define POS_V 4
+#define POS_A 5
+#define POS_AJV 6
+#define POS_AV 7
+#define POS_ME 8
+#define POS_CONJ 9
+#define POS_IJ 10
+#define POS_PRE 11
+#define POS_SUC 12
+#define POS_TANKANJI 13
+#define POS_N2T 14
+#define POS_D2KY 15
+#define POS_NUMBER 16
+#define POS_INVAL 17
+#define POS_OPEN 18
+#define POS_CLOSE 19
+
+/* 活用形 Conjugate Type */
+#define CT_NONE 0
+#define CT_SYUSI 1
+#define CT_MIZEN 2
+#define CT_RENYOU 3
+#define CT_RENTAI 4
+#define CT_KATEI 5
+#define CT_MEIREI 6
+#define CT_HEAD 7
+#define CT_MEISIKA 8
+
+/* 活用クラス Conjugate Class */
+#define CC_NONE 0
+#define CC_K5 1
+#define CC_C5 2
+#define CC_G5 3
+#define CC_S5 4
+#define CC_T5 5
+#define CC_N5 6
+#define CC_M5 7
+#define CC_B5 8
+#define CC_R5 9
+#define CC_L5 10
+#define CC_W5 11
+#define CC_U5 12
+#define CC_KS1 13
+#define CC_RV 14
+#define CC_KV 15
+#define CC_SV 16
+#define CC_ZV 17
+#define CC_A 18
+#define CC_A_U 19
+#define CC_AJV 20
+#define CC_SRV 21
+
+/* 副品詞 Class Of Speech */
+#define COS_NONE 0
+/* 地名 */
+#define COS_CN 1
+/* 数詞 */
+#define COS_NN 2
+/* 人名 */
+#define COS_JN 3
+/* 団体名 */
+#define COS_KK 4
+/* 一般接頭辞・接尾辞 */
+#define COS_SUFFIX 5
+/* サ変の接尾辞 */
+#define COS_SVSUFFIX 6
+/**/
+
+/* 副々品詞 Sub Class Of Speech*/
+#define SCOS_NONE 0
+#define SCOS_FAMNAME 1
+#define SCOS_FSTNAME 2
+#define SCOS_T0 10
+#define SCOS_T2 12
+#define SCOS_T3 13
+#define SCOS_T4 14
+#define SCOS_T5 15
+#define SCOS_T7 17
+#define SCOS_T8 18
+#define SCOS_T9 19
+#define SCOS_T10 20
+#define SCOS_T12 22
+#define SCOS_T13 23
+#define SCOS_T14 24
+#define SCOS_T15 25
+#define SCOS_T17 27
+#define SCOS_T18 28
+#define SCOS_T19 29
+#define SCOS_T20 30
+#define SCOS_T22 32
+#define SCOS_T23 33
+#define SCOS_T24 34
+#define SCOS_T25 35
+#define SCOS_T27 37
+#define SCOS_T28 38
+#define SCOS_T29 39
+#define SCOS_T30 40
+#define SCOS_T32 42
+#define SCOS_T33 43
+#define SCOS_T34 44
+#define SCOS_T35 45
+#define SCOS_T37 47
+#define SCOS_T38 48
+#define SCOS_T39 49
+#define SCOS_T40 50
+#define SCOS_F0 60
+#define SCOS_F1 61
+#define SCOS_F2 62
+#define SCOS_F3 63
+#define SCOS_F4 64
+#define SCOS_F5 65
+#define SCOS_F6 66
+#define SCOS_F7 67
+#define SCOS_F8 68
+#define SCOS_F9 69
+#define SCOS_F10 70
+#define SCOS_F11 71
+#define SCOS_F12 72
+#define SCOS_F13 73
+#define SCOS_F14 74
+#define SCOS_A0 80
+#define SCOS_A1 81
+#define SCOS_N1 90
+#define SCOS_N10 91
+#define SCOS_N100 92
+#define SCOS_N1000 93
+#define SCOS_N10000 94
+
+/* FLAGS */
+#define WF_NONE 0
+/* この動詞は連体詞が名詞化する */
+#define WF_MEISI 1
+/* サ変名詞 */
+#define WF_SV 2
+/* 自立語、文節のコアとなる */
+#define WF_INDEP 4
+/* 形容動詞 */
+#define WF_AJV 8
+
+/* wtype_t中のオフセット */
+#define WT_POS 0
+#define WT_COS 1
+#define WT_SCOS 2
+#define WT_CC 3
+#define WT_CT 4
+#define WT_FLAGS 5
+
+/* 各bit fieldの幅 */
+#define POS_BITS 5
+#define COS_BITS 4
+#define SCOS_BITS 7
+#define CC_BITS 5
+#define CT_BITS 4
+#define WF_BITS 4
+/* 29bits */
+
+/** 品詞 */
+struct wtype{
+ unsigned int pos : POS_BITS;
+ unsigned int cos : COS_BITS;
+ unsigned int scos : SCOS_BITS;
+ unsigned int cc : CC_BITS;
+ unsigned int ct : CT_BITS;
+ unsigned int wf : WF_BITS;
+};
+
+typedef struct wtype wtype_t;
+
+/** anthy_wtype_include(名詞、人名)は真、逆は偽 */
+int anthy_wtype_include(wtype_t haystack, wtype_t needle);
+
+/* 二つの品詞が完全に一致するかどうかを見る */
+int anthy_wtype_equal(wtype_t lhs, wtype_t rhs);
+
+void anthy_print_wtype(wtype_t w);
+/* 辞書ファイル中の名前から品詞を得る(関数名が悪い) */
+const char *anthy_type_to_wtype(const char *name, wtype_t *w);
+/* 品詞の名前から品詞を得る */
+wtype_t anthy_init_wtype_by_name(const char *str);
+
+int anthy_wtype_get_pos(wtype_t w);
+int anthy_wtype_get_cc(wtype_t w);
+int anthy_wtype_get_ct(wtype_t w);
+int anthy_wtype_get_cos(wtype_t w);
+int anthy_wtype_get_scos(wtype_t w);
+int anthy_wtype_get_wf(wtype_t w);
+
+/* フラグの取得 */
+int anthy_wtype_get_indep(wtype_t w);
+int anthy_wtype_get_sv(wtype_t w);
+int anthy_wtype_get_meisi(wtype_t w);
+int anthy_wtype_get_ajv(wtype_t w);
+
+wtype_t anthy_get_wtype(int pos, int cos, int scos, int cc, int ct, int wf);
+wtype_t anthy_get_wtype_with_ct(wtype_t base, int ct);
+
+void anthy_wtype_set_pos(wtype_t *w, int pos);
+void anthy_wtype_set_cc(wtype_t *w, int cc);
+void anthy_wtype_set_ct(wtype_t *w, int ct);
+void anthy_wtype_set_cos(wtype_t *w, int cs);
+void anthy_wtype_set_scos(wtype_t *w, int scos);
+void anthy_wtype_set_dep(wtype_t *w, int isDep);
+
+void anthy_init_wtypes(void);
+
+extern wtype_t anthy_wt_all;/* すべてにマッチする自立語 */
+extern wtype_t anthy_wt_none;/* 品詞無しPOS_INVAL */
+
+#endif