diff options
Diffstat (limited to 'src-worddic/priv_dic.c')
-rw-r--r-- | src-worddic/priv_dic.c | 425 |
1 files changed, 425 insertions, 0 deletions
diff --git a/src-worddic/priv_dic.c b/src-worddic/priv_dic.c new file mode 100644 index 0000000..04e1f19 --- /dev/null +++ b/src-worddic/priv_dic.c @@ -0,0 +1,425 @@ +/* + * 個人辞書を扱うためのコード + * + * ユーザが明示的に登録した単語だけでなく、 + * 未知語を自動的に学習して管理するAPIも持つ。 + * + * Copyright (C) 2000-2007 TABATA Yusuke + */ +/* + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <sys/types.h> +#include <sys/stat.h> +#include <dirent.h> +#include <fcntl.h> +#include <unistd.h> + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include <anthy/anthy.h> +#include <anthy/alloc.h> +#include <anthy/dic.h> +#include <anthy/record.h> +#include <anthy/dicutil.h> +#include <anthy/conf.h> +#include <anthy/logger.h> +#include <anthy/texttrie.h> +#include <anthy/textdict.h> +#include <anthy/word_dic.h> +#include "dic_main.h" +#include "dic_ent.h" + +/* 個人辞書 */ +struct text_trie *anthy_private_tt_dic; +struct textdict *anthy_private_text_dic; +static struct textdict *anthy_imported_text_dic; +static char *imported_dic_dir; +/* ロック用の変数 */ +static char *lock_fn; +static int lock_depth; +static int lock_fd; + +#define MAX_DICT_SIZE 100000000 + +/* 個人辞書のディレクトリの有無を確認する */ +void +anthy_check_user_dir(void) +{ + const char *hd; + char *dn; + struct stat st; + hd = anthy_conf_get_str("HOME"); + dn = alloca(strlen(hd) + 10); + sprintf(dn, "%s/.anthy", hd); + if (stat(dn, &st) || !S_ISDIR(st.st_mode)) { + int r; + /*fprintf(stderr, "Anthy: Failed to open anthy directory(%s).\n", dn);*/ + r = mkdir(dn, S_IRWXU); + if (r == -1){ + anthy_log(0, "Failed to create profile directory\n"); + return ; + } + /*fprintf(stderr, "Anthy: Created\n");*/ + r = chmod(dn, S_IRUSR | S_IWUSR | S_IXUSR); + if (r == -1) { + anthy_log(0, "But failed to change permission.\n"); + } + } +} + +static void +init_lock_fn(const char *home, const char *id) +{ + lock_fn = malloc(strlen(home) + strlen(id) + 40); + sprintf(lock_fn, "%s/.anthy/lock-file_%s", home, id); +} + +static struct text_trie * +open_tt_dic(const char *home, const char *id) +{ + struct text_trie *tt; + char *buf = malloc(strlen(home) + strlen(id) + 40); + sprintf(buf, "%s/.anthy/private_dict_%s.tt", home, id); + tt = anthy_trie_open(buf, 0); + free(buf); + return tt; +} + +static struct textdict * +open_textdic(const char *home, const char *name, const char *id) +{ + char *fn = malloc(strlen(home) + strlen(name) + strlen(id) + 10); + struct textdict *td; + sprintf(fn, "%s/.anthy/%s%s", home, name, id); + td = anthy_textdict_open(fn, 0); + free(fn); + return td; +} + +void +anthy_priv_dic_lock(void) +{ + struct flock lck; + lock_depth ++; + if (lock_depth > 1) { + return ; + } + if (!lock_fn) { + /* 初期化をミスってる */ + lock_fd = -1; + return ; + } + + /* ファイルロックの方法は多数あるが、この方法はcygwinでも動くので採用した */ + lock_fd = open(lock_fn, O_CREAT|O_RDWR, S_IREAD|S_IWRITE); + if (lock_fd == -1) { + return ; + } + + lck.l_type = F_WRLCK; + lck.l_whence = (short) 0; + lck.l_start = (off_t) 0; + lck.l_len = (off_t) 1; + if (fcntl(lock_fd, F_SETLKW, &lck) == -1) { + close(lock_fd); + lock_fd = -1; + } +} + +void +anthy_priv_dic_unlock(void) +{ + lock_depth --; + if (lock_depth > 0) { + return ; + } + + if (lock_fd != -1) { + close(lock_fd); + lock_fd = -1; + } +} + +void +anthy_priv_dic_update(void) +{ + if (!anthy_private_tt_dic) { + return ; + } + + anthy_trie_update_mapping(anthy_private_tt_dic); +} + +/* seq_entに追加する */ +static void +add_to_seq_ent(const char *line, int encoding, struct seq_ent *seq) +{ + struct word_line wl; + wtype_t wt; + xstr *xs; + /* */ + if (anthy_parse_word_line(line, &wl)) { + return ; + } + xs = anthy_cstr_to_xstr(wl.word, encoding); + anthy_type_to_wtype(wl.wt, &wt); + anthy_mem_dic_push_back_dic_ent(seq, 0, xs, wt, + NULL, wl.freq, 0); + anthy_free_xstr(xs); +} + +/* texttrieに登録されているかをチェックし、 + * 登録されていればseq_entに追加する + */ +static void +copy_words_from_tt(struct seq_ent *seq, xstr *xs, + int encoding, const char *prefix) +{ + char *key, *v; + int key_len; + char *key_buf; + int prefix_len = strlen(prefix); + /**/ + if (!anthy_private_tt_dic) { + return ; + } + key = anthy_xstr_to_cstr(xs, encoding); + key_len = strlen(key); + key_buf = malloc(key_len + 12); + /* 辞書中には各単語が「見出し XXXX」(XXXXはランダムな文字列)を + * キーとして保存されているので列挙する + */ + sprintf(key_buf, "%s%s ", prefix, key); + do { + if (strncmp(&key_buf[2], key, key_len) || + strncmp(&key_buf[0], prefix, prefix_len) || + key_buf[key_len+2] != ' ') { + /* 「見出し 」で始まっていないので対象外 */ + break; + } + /* 単語を読み出して登録 */ + v = anthy_trie_find(anthy_private_tt_dic, key_buf); + if (v) { + add_to_seq_ent(v, encoding, seq); + } + free(v); + /**/ + } while (anthy_trie_find_next_key(anthy_private_tt_dic, + key_buf, key_len + 8)); + free(key); + free(key_buf); +} + +void +anthy_copy_words_from_private_dic(struct seq_ent *seq, + xstr *xs, int is_reverse) +{ + if (is_reverse) { + return ; + } + /* 個人辞書から取ってくる */ + copy_words_from_tt(seq, xs, ANTHY_EUC_JP_ENCODING, " "); + copy_words_from_tt(seq, xs, ANTHY_UTF8_ENCODING, " p"); + /**/ + if (!anthy_select_section("UNKNOWN_WORD", 0) && + !anthy_select_row(xs, 0)) { + wtype_t wt; + xstr *word_xs; + anthy_type_to_wtype("#T35", &wt); + word_xs = anthy_get_nth_xstr(0); + anthy_mem_dic_push_back_dic_ent(seq, 0, word_xs, wt, NULL, 10, 0); + } +} + +int +anthy_parse_word_line(const char *line, struct word_line *res) +{ + int i; + const char *buf = line; + /* default values */ + res->wt[0] = 0; + res->freq = 1; + res->word = NULL; + /* 品詞と頻度をparse */ + for (i = 0; i < 9 && *buf && *buf != '*' && *buf != ' '; buf++, i++) { + res->wt[i] = *buf; + } + res->wt[i] = 0; + if (*buf == '*') { + buf ++; + sscanf(buf, "%d", &res->freq); + buf = strchr(buf, ' '); + } else { + res->freq = 1; + } + if (!buf || !(*buf)) { + res->word = ""; + return -1; + } + buf++; + /* 単語 */ + res->word = buf; + return 0; +} + +void +anthy_ask_scan(void (*request_scan)(struct textdict *, void *), + void *arg) +{ + DIR *dir; + struct dirent *de; + int size = 0; + request_scan(anthy_private_text_dic, arg); + request_scan(anthy_imported_text_dic, arg); + dir = opendir(imported_dic_dir); + if (!dir) { + return ; + } + while ((de = readdir(dir))) { + struct stat st_buf; + struct textdict *td; + char *fn = malloc(strlen(imported_dic_dir) + + strlen(de->d_name) + 3); + if (!fn) { + break; + } + sprintf(fn, "%s/%s", imported_dic_dir, de->d_name); + if (stat(fn, &st_buf)) { + free(fn); + continue; + } + if (!S_ISREG(st_buf.st_mode)) { + free(fn); + continue; + } + size += st_buf.st_size; + if (size > MAX_DICT_SIZE) { + free(fn); + break; + } + td = anthy_textdict_open(fn, 0); + request_scan(td, arg); + anthy_textdict_close(td); + free(fn); + } + closedir(dir); +} + +static void +add_unknown_word(xstr *yomi, xstr *word) +{ + /* recordに追加 */ + if (anthy_select_section("UNKNOWN_WORD", 1)) { + return ; + } + if (!anthy_select_row(yomi, 0)) { + anthy_mark_row_used(); + } + if (anthy_select_row(yomi, 1)) { + return ; + } + anthy_set_nth_xstr(0, word); +} + +void +anthy_add_unknown_word(xstr *yomi, xstr *word) +{ + if (!(anthy_get_xstr_type(word) & XCT_KATA) && + !(anthy_get_xstr_type(word) & XCT_HIRA)) { + return ; + } + if (yomi->len < 4 || yomi->len > 30) { + return ; + } + /**/ + add_unknown_word(yomi, word); +} + +void +anthy_forget_unused_unknown_word(xstr *xs) +{ + char key_buf[128]; + char *v; + + if (!anthy_private_tt_dic) { + return ; + } + + v = anthy_xstr_to_cstr(xs, ANTHY_UTF8_ENCODING); + sprintf(key_buf, " U%s 0", v); + free(v); + anthy_trie_delete(anthy_private_tt_dic, key_buf); + + /* recordに記録された物を消す */ + if (anthy_select_section("UNKNOWN_WORD", 0)) { + return ; + } + if (!anthy_select_row(xs, 0)) { + anthy_release_row(); + } +} + +void +anthy_init_private_dic(const char *id) +{ + const char *home = anthy_conf_get_str("HOME"); + if (anthy_private_tt_dic) { + anthy_trie_close(anthy_private_tt_dic); + } + /**/ + anthy_textdict_close(anthy_private_text_dic); + anthy_textdict_close(anthy_imported_text_dic); + /**/ + if (lock_fn) { + free(lock_fn); + } + init_lock_fn(home, id); + anthy_private_tt_dic = open_tt_dic(home, id); + /**/ + anthy_private_text_dic = open_textdic(home, "private_words_", id); + anthy_imported_text_dic = open_textdic(home, "imported_words_", id); + imported_dic_dir = malloc(strlen(home) + strlen(id) + 30); + sprintf(imported_dic_dir, "%s/.anthy/imported_words_%s.d/", home, id); +} + +void +anthy_release_private_dic(void) +{ + if (anthy_private_tt_dic) { + anthy_trie_close(anthy_private_tt_dic); + anthy_private_tt_dic = NULL; + } + /**/ + anthy_textdict_close(anthy_private_text_dic); + anthy_textdict_close(anthy_imported_text_dic); + free(imported_dic_dir); + anthy_private_text_dic = NULL; + anthy_imported_text_dic = NULL; + imported_dic_dir = NULL; + /**/ + if (lock_depth > 0) { + /* not sane situation */ + lock_depth = 0; + if (lock_fn) { + unlink(lock_fn); + } + } + /**/ + free(lock_fn); + lock_fn = NULL; +} |