diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2009-02-07 16:32:56 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2009-02-07 16:32:56 +0000 |
commit | a7a06a7ccfe0af1e134357678b8fa6cf87dff3b0 (patch) | |
tree | a966aeee62e69ae3ad13275d07ddb15049b14e0e /mkworddic/mkudic.c | |
download | anthy-a7a06a7ccfe0af1e134357678b8fa6cf87dff3b0.tar.gz |
anthy-9100hHEADanthy-9100hmaster
Diffstat (limited to 'mkworddic/mkudic.c')
-rw-r--r-- | mkworddic/mkudic.c | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/mkworddic/mkudic.c b/mkworddic/mkudic.c new file mode 100644 index 0000000..72a63c5 --- /dev/null +++ b/mkworddic/mkudic.c @@ -0,0 +1,152 @@ +/* + * 用例辞書を作る + * + * Copyright (C) 2003-2005 TABATA Yusuke + */ +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include <anthy/matrix.h> +#include "mkdic.h" + +#define LINE_LEN 256 + +/* 用例 */ +struct use_case { + int id[2]; + struct use_case *next; +}; + +/* 用例辞書 */ +struct uc_dict { + /* 用例リスト */ + struct use_case uc_head; + int nr_ucs; +}; + +/* 用例定義の行から単語のidを求める + */ +static int +get_id_from_word_line(char *buf) +{ + char yomi[LINE_LEN]; + char okuri[LINE_LEN]; + char wt[LINE_LEN]; + char kanji[LINE_LEN]; + int res, id; + xstr *xs; + + res = sscanf(buf, "%s %s %s %s", yomi, okuri, wt, kanji); + if (res != 4) { + return -1; + } + xs = anthy_cstr_to_xstr(kanji, 0); + id = anthy_xstr_hash(xs); + anthy_free_xstr(xs); + return id; +} + +static void +commit_uc(struct uc_dict *dict, int x, int y) +{ + struct use_case *uc; + if (x < 0 || y < 0) { + return ; + } + uc = malloc(sizeof(struct use_case)); + uc->id[0] = x; + uc->id[1] = y; + /**/ + uc->next = dict->uc_head.next; + dict->uc_head.next = uc; + dict->nr_ucs ++; +} + +/* 用例データベースを作る */ +struct uc_dict * +create_uc_dict(void) +{ + struct uc_dict *dict = malloc(sizeof(struct uc_dict)); + + dict->uc_head.next = NULL; + dict->nr_ucs = 0; + + return dict; +} + +/* 用例ファイルを読み込む */ +void +read_uc_file(struct uc_dict *dict, const char *fn) +{ + char buf[LINE_LEN]; + FILE *uc_file; + int off, base = 0, cur; + int line_number = 0; + + uc_file = fopen(fn, "r"); + if (!uc_file) { + return ; + } + + /* off=0 : 最初の単語 + * off=1,2..n : それと関係ある単語 + */ + off = 0; + while (fgets(buf, LINE_LEN, uc_file)) { + /**/ + line_number ++; + /**/ + if (buf[0] == '#') { + /* コメント */ + continue; + } + if (buf[0] == '-') { + /* 区切り記号 */ + off = 0; + continue; + } + cur = get_id_from_word_line(buf); + if (cur == -1) { + fprintf(stderr, "Invalid line(%d):%s\n", line_number, buf); + } + /**/ + if (off == 0) { + /* 一つめの項目 */ + base = cur; + } else { + /* 二つめ以降の項目 */ + commit_uc(dict, cur, base); + } + off ++; + } +} + +/* 用例辞書をファイルに書き出す */ +void +make_ucdict(FILE *uc_out, struct uc_dict *dict) +{ + struct use_case *uc; + struct sparse_matrix *sm; + struct matrix_image *mi; + int i; + /* 疎行列に詰め込む */ + sm = anthy_sparse_matrix_new(); + if (dict) { + for (uc = dict->uc_head.next; uc; uc = uc->next) { + anthy_sparse_matrix_set(sm, uc->id[0], uc->id[1], 1, NULL); + } + } + anthy_sparse_matrix_make_matrix(sm); + /* 疎行列のイメージを作成してファイルに書き出す */ + mi = anthy_matrix_image_new(sm); + for (i = 0; i < mi->size; i++) { + write_nl(uc_out, mi->image[i]); + } + if (dict) { + printf("udic: %d use examples.\n", dict->nr_ucs); + } else { + printf("udic: no use examples.\n"); + } + +} |