diff options
Diffstat (limited to 'src-ordering/infosort.c')
-rw-r--r-- | src-ordering/infosort.c | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/src-ordering/infosort.c b/src-ordering/infosort.c new file mode 100644 index 0000000..b20f731 --- /dev/null +++ b/src-ordering/infosort.c @@ -0,0 +1,148 @@ +/* + * 文節の構造metawordをソートする + * + * 文節に対する複数の構造の候補をソートする + * + * Copyright (C) 2000-2007 TABATA Yusuke + * + */ +/* + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <stdlib.h> +#include <math.h> + +#include <anthy/segment.h> +#include <anthy/ordering.h> +#include <anthy/feature_set.h> +#include <anthy/splitter.h> +#include <anthy/diclib.h> +#include "sorter.h" + +static void *cand_info_array; + +static double +calc_probability(struct feature_list *fl) +{ + struct feature_freq *res, arg; + res = anthy_find_feature_freq(cand_info_array, + fl, &arg); + if (res) { + double pos = (double)res->f[15]; + double neg = (double)res->f[14]; + double prob = pos / (pos + neg); + prob = prob * prob; + /**/ + return prob; + } + return 0; +} + +static void +mw_eval(struct seg_ent *prev_seg, struct seg_ent *seg, + struct meta_word *mw) +{ + int pc; + struct feature_list fl; + double prob; + (void)seg; + anthy_feature_list_init(&fl); + /**/ + anthy_feature_list_set_cur_class(&fl, mw->seg_class); + anthy_feature_list_set_dep_word(&fl, mw->dep_word_hash); + anthy_feature_list_set_dep_class(&fl, mw->dep_class); + anthy_feature_list_set_mw_features(&fl, mw->mw_features); + /* 前の文節の素性 */ + if (prev_seg) { + pc = prev_seg->best_seg_class; + } else { + pc = SEG_HEAD; + } + anthy_feature_list_set_class_trans(&fl, pc, mw->seg_class); + anthy_feature_list_sort(&fl); + /* 計算する */ + prob = 0.1 + calc_probability(&fl); + if (prob < 0) { + prob = (double)1 / (double)1000; + } + anthy_feature_list_free(&fl); + mw->struct_score = RATIO_BASE * RATIO_BASE; + mw->struct_score *= prob; + /* + anthy_feature_list_print(&fl); + printf(" prob=%f, struct_score=%d\n", prob, mw->struct_score); + */ + + /**/ + if (mw->mw_features & MW_FEATURE_SUFFIX) { + mw->struct_score /= 2; + } + if (mw->mw_features & MW_FEATURE_WEAK_CONN) { + mw->struct_score /= 10; + } +} + +static void +seg_eval(struct seg_ent *prev_seg, + struct seg_ent *seg) +{ + int i; + for (i = 0; i < seg->nr_metaword; i++) { + mw_eval(prev_seg, seg, seg->mw_array[i]); + } +} + +static void +sl_eval(struct segment_list *seg_list) +{ + int i; + struct seg_ent *prev_seg = NULL; + for (i = 0; i < seg_list->nr_segments; i++) { + struct seg_ent *seg; + seg = anthy_get_nth_segment(seg_list, i); + seg_eval(prev_seg, seg); + prev_seg = seg; + } +} + +static int +metaword_compare_func(const void *p1, const void *p2) +{ + const struct meta_word * const *s1 = p1; + const struct meta_word * const *s2 = p2; + return (*s2)->struct_score - (*s1)->struct_score; +} + +void +anthy_sort_metaword(struct segment_list *seg_list) +{ + int i; + /**/ + sl_eval(seg_list); + /**/ + for (i = 0; i < seg_list->nr_segments; i++) { + struct seg_ent *seg = anthy_get_nth_segment(seg_list, i); + if (seg->mw_array) { /* 不正なメモリアクセスを行うバグの修正 */ + qsort(seg->mw_array, seg->nr_metaword, sizeof(struct meta_word *), + metaword_compare_func); + } + } +} + +void +anthy_infosort_init(void) +{ + cand_info_array = anthy_file_dic_get_section("cand_info"); +} |