summaryrefslogtreecommitdiff
path: root/src-splitter/segclass.c
diff options
context:
space:
mode:
Diffstat (limited to 'src-splitter/segclass.c')
-rw-r--r--src-splitter/segclass.c130
1 files changed, 130 insertions, 0 deletions
diff --git a/src-splitter/segclass.c b/src-splitter/segclass.c
new file mode 100644
index 0000000..6b440a4
--- /dev/null
+++ b/src-splitter/segclass.c
@@ -0,0 +1,130 @@
+#include <string.h>
+
+#include <anthy/splitter.h>
+#include <anthy/wtype.h>
+#include <anthy/segclass.h>
+#include "wordborder.h"
+
+static struct {
+ const char *name;
+ const char *sym;
+} seg_class_tab[] = {
+ {"ʸƬ", "H"}, {"ʸËö", "T"}, {"ʸÀá", "B"},
+ {"Àܳ¸ì", "C"}, {"̾»ì+³Ê½õ»ì", "Nk"}, {"̾»ì+½ªÃ¼", "Ne"},
+ {"Æ°»ì+ÉÕ°¸ì", "Vf"}, {"Æ°»ì+½ªÃ¼", "Ve"}, {"·ÁÍÆ»ì", "A"},
+ {"·ÁÍÆÆ°»ì", "AJV"},
+ {"Ï¢Íѽ¤¾þ", "YM"}, {"Ï¢Âν¤¾þ", "TM"},
+ {"̾»ì", "N"}, {"̾»ì+ÉÕ°¸ì", "Nf"}, {"̾»ì+Ï¢ÍÑ", "Ny"},
+ {"Æ°»ì+Ï¢ÍÑ", "Vy"},
+ {"Æ°»ì+Ï¢ÂÎ", "Vt"},
+ {NULL, NULL}
+};
+
+void
+anthy_set_seg_class(struct word_list* wl)
+{
+ int head_pos;
+ enum dep_class dc;
+ enum seg_class seg_class;
+
+ if (!wl) return;
+
+ head_pos = wl->head_pos;
+ dc = wl->part[PART_DEPWORD].dc;
+ seg_class = SEG_HEAD;
+
+ if (wl->part[PART_CORE].len == 0) {
+ seg_class = SEG_BUNSETSU;
+ } else {
+ switch (head_pos) {
+ case POS_NOUN:
+ case POS_NUMBER:
+ /* BREAK THROUGH */
+ case POS_N2T:
+ if (dc == DEP_RAW) {
+ seg_class = SEG_MEISHI;
+ } else if (dc == DEP_END) {
+ seg_class = SEG_MEISHI_SHUTAN;
+ } else if (dc == DEP_RENYOU) {
+ seg_class = SEG_MEISHI_RENYOU;
+ } else if (dc == DEP_KAKUJOSHI) {
+ seg_class = SEG_MEISHI_KAKUJOSHI;
+ } else {
+ seg_class = SEG_MEISHI_FUZOKUGO;
+ }
+ break;
+ case POS_V:
+ if (dc == DEP_RAW) {
+ seg_class = SEG_BUNSETSU;
+ } else if (dc == DEP_END) {
+ seg_class = SEG_DOUSHI_SHUTAN;
+ } else if (dc == DEP_RENYOU) {
+ seg_class = SEG_DOUSHI_RENYOU;
+ } else if (dc == DEP_RENTAI) {
+ seg_class = SEG_DOUSHI_RENTAI;
+ } else {
+ seg_class = SEG_DOUSHI_FUZOKUGO;
+ }
+ break;
+ case POS_D2KY:
+ /* BREAK THROUGH */
+ case POS_A:
+ seg_class = SEG_KEIYOUSHI;
+ if (dc == DEP_RENYOU) {
+ seg_class = SEG_RENYOU_SHUSHOKU;
+ } else if (dc == DEP_RENTAI) {
+ seg_class = SEG_RENTAI_SHUSHOKU;
+ }
+ break;
+ case POS_AJV:
+ seg_class = SEG_KEIYOUDOUSHI;
+ if (dc == DEP_RENYOU) {
+ seg_class = SEG_RENYOU_SHUSHOKU;
+ } else if (dc == DEP_RENTAI) {
+ seg_class = SEG_RENTAI_SHUSHOKU;
+ }
+ break;
+ case POS_AV:
+ seg_class = SEG_RENYOU_SHUSHOKU;
+ break;
+ case POS_ME:
+ seg_class = SEG_RENTAI_SHUSHOKU;
+ break;
+ case POS_CONJ:
+ seg_class = SEG_SETSUZOKUGO;
+ break;
+ case POS_OPEN:
+ seg_class = SEG_BUNSETSU;
+ break;
+ case POS_CLOSE:
+ seg_class = SEG_BUNSETSU;
+ break;
+ default:
+ seg_class = SEG_MEISHI;
+ break;
+ }
+ }
+ wl->seg_class = seg_class;
+}
+
+const char* anthy_seg_class_name(enum seg_class sc)
+{
+ return seg_class_tab[sc].name;
+}
+
+const char* anthy_seg_class_sym(enum seg_class sc)
+{
+ return seg_class_tab[sc].sym;
+}
+
+enum seg_class
+anthy_seg_class_by_name(const char *name)
+{
+ int i;
+ for (i = 0; seg_class_tab[i].name; i++) {
+ if (!strcmp(seg_class_tab[i].name, name)) {
+ return i;
+ }
+ }
+ return SEG_BUNSETSU;
+}