1 files changed, 695 insertions, 0 deletions
diff --git a/src-main/context.c b/src-main/context.c
new file mode 100644
index 0000000..7b426db
--- /dev/null
+++ b/src-main/context.c
@@ -0,0 +1,695 @@
+/*
+ * 変換や文節の伸縮などの操作が進行中の文字列や候補などを
+ * まとめて変換コンテキストと呼ぶ。
+ * Anthyのコンテキストに対する操作は全てここから呼ばれる。
+ * 各操作に対して変換パイプラインの必要なモジュールを順に呼びだす。
+ *
+ * personalityの管理もする。
+ *
+ * Funded by IPA未踏ソフトウェア創造事業 2001 10/29
+ * Copyright (C) 2000-2007 TABATA Yusuke
+ *
+ * $Id: context.c,v 1.26 2002/11/17 14:45:47 yusuke Exp $
+ */
+/*
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2 of the License, or (at your option) any later version.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <anthy/anthy.h>
+#include <anthy/alloc.h>
+#include <anthy/record.h>
+#include <anthy/ordering.h>
+#include <anthy/splitter.h>
+#include <anthy/xstr.h>
+#include "main.h"
+
+/**/
+static allocator context_ator;
+
+/** 現在のpersonality 
+ * 未設定時: null
+ * 未設定のまま変換を開始した場合: "default"
+ * anonymousの場合: ""
+ */
+static char *current_personality;
+
+/**/
+#define HISTORY_FILE_LIMIT 100000
+
+static void
+context_dtor(void *p)
+{
+  anthy_do_reset_context((struct anthy_context *)p);
+}
+
+/** 現在のpersonalityを返す */
+static char *
+get_personality(void)
+{
+  if (!current_personality) {
+    current_personality = strdup("default");
+    anthy_dic_set_personality(current_personality);
+  }
+  return current_personality;
+}
+
+static void
+release_segment(struct seg_ent *s)
+{
+  if (s->cands) {
+    int i;
+    for (i = 0; i < s->nr_cands; i++) {
+      anthy_release_cand_ent(s->cands[i]);
+    }
+    free (s->cands);
+  }
+  if (s->mw_array) {
+    free(s->mw_array);
+  }
+  free(s);
+  
+}
+
+/** 文節リストの最後の要素を削除する */
+static void
+pop_back_seg_ent(struct anthy_context *c)
+{
+  struct seg_ent *s;
+  s = c->seg_list.list_head.prev;
+  if (s == &c->seg_list.list_head) {
+    return ;
+  }
+  s->prev->next = s->next;
+  s->next->prev = s->prev;
+  release_segment(s);
+  c->seg_list.nr_segments --;
+}
+
+
+/** n番目の文節の文字のindexを求める */
+static int
+get_nth_segment_index(struct anthy_context *c, int n)
+{
+  int i,s;
+  for (i = 0, s = 0; i < c->str.len; i++) {
+    if (c->split_info.ce[i].seg_border) {
+      if (s == n) {
+	return i;
+      }
+      s++;
+    }
+  }
+  return -1;
+}
+
+/** n番目の文節の長さを求める．
+ * segment_listが構成されていなくても計算できるようにする．
+ */
+static int
+get_nth_segment_len(struct anthy_context *c, int sindex)
+{
+  int a,i,l;
+  a = get_nth_segment_index(c, sindex);
+  if ( a == -1){
+    return -1;
+  }
+  l = 1;
+  for (i = a+1; !c->split_info.ce[i].seg_border; i++) {
+    l++;
+  }
+  return l;
+}
+
+/** metawordの配列を作る */
+static void
+make_metaword_array(struct anthy_context *ac,
+		    struct seg_ent *se)
+{
+  int i;
+  se->mw_array = NULL;
+  for (i = se->len; i > 0; i--) {
+    int j;
+    /* 最後に濁点とかがついてたら直前の文字ごと落す */
+    if (i < se->len &&
+	anthy_get_xchar_type(se->str.str[i]) & XCT_PART) {
+      /* FIXME 濁点とかがありえない並びをしてたら */
+      i--;
+      continue ;
+    }
+
+    se->nr_metaword = anthy_get_nr_metaword(&ac->split_info, se->from, i);
+    if (!se->nr_metaword) {
+      continue ;
+    }
+    /* metawordを配列に取り込む */
+    se->mw_array = malloc(sizeof(struct meta_word*) * se->nr_metaword);
+    for (j = 0; j < se->nr_metaword; j++) {
+      se->mw_array[j] = anthy_get_nth_metaword(&ac->split_info, se->from, i, j);
+    }
+    return;
+  }
+}
+
+static struct seg_ent*
+create_segment(struct anthy_context *ac, int from, int len,
+	       struct meta_word* best_mw)
+{
+  struct seg_ent* s;
+  s = (struct seg_ent *)malloc(sizeof(struct seg_ent));
+  s->str.str = &ac->str.str[from];
+  s->str.len = len;
+  s->from = from;
+  s->len = s->str.len;
+  s->nr_cands = 0;
+  s->cands = NULL;
+  s->best_seg_class = ac->split_info.ce[from].best_seg_class;
+  s->best_mw = best_mw;
+  make_metaword_array(ac, s);
+  return s;
+}
+
+/** 変換コンテキストに文節を追加する */
+static void
+push_back_segment(struct anthy_context *ac, struct seg_ent *se)
+{
+  se->next = &ac->seg_list.list_head;
+  se->prev = ac->seg_list.list_head.prev;
+  ac->seg_list.list_head.prev->next = se;
+  ac->seg_list.list_head.prev = se;
+  ac->seg_list.nr_segments ++;
+  se->committed = -1;
+}
+
+/** splitterによって配列中に付けられた文節境界のマークから、
+ * 文節のリストを構成する
+ */
+static void
+create_segment_list(struct anthy_context *ac, int from, int to)
+{
+  int i, n;
+  struct seg_ent *s;
+  /* from の所までにいくつの文節があるか調べる */
+  i = 0; n = 0;
+  while (i < from) {
+    i += get_nth_segment_len(ac, n);
+    n++;
+  };
+  /**/
+  for (i = from; i < to; i++) {
+    if (ac->split_info.ce[i].seg_border) {
+      int len = get_nth_segment_len(ac, n);
+      s = create_segment(ac, i, len, ac->split_info.ce[i].best_mw);
+
+      push_back_segment(ac, s);
+      n++;
+    }
+  }
+}
+
+/** コンテキストを作る */
+struct anthy_context *
+anthy_do_create_context(int encoding)
+{
+  struct anthy_context *ac;
+  char *p = get_personality();
+
+  if (!p) {
+    return NULL;
+  }
+
+  ac = (struct anthy_context *)anthy_smalloc(context_ator);
+  ac->str.str = NULL;
+  ac->str.len = 0;
+  ac->seg_list.nr_segments = 0;
+  ac->seg_list.list_head.prev = &ac->seg_list.list_head;
+  ac->seg_list.list_head.next = &ac->seg_list.list_head;
+  ac->split_info.word_split_info = NULL;
+  ac->split_info.ce = NULL;
+  ac->ordering_info.oc = NULL;
+  ac->dic_session = NULL;
+  ac->prediction.str.str = NULL;
+  ac->prediction.str.len = 0;
+  ac->prediction.nr_prediction = 0;
+  ac->prediction.predictions = NULL;
+  ac->encoding = encoding;
+  ac->reconversion_mode = ANTHY_RECONVERT_AUTO;
+
+  return ac;
+}
+
+/** コンテキストのアロケータを作る */
+void
+anthy_init_contexts(void)
+{
+  context_ator = anthy_create_allocator(sizeof(struct anthy_context),
+					context_dtor);
+}
+
+void
+anthy_quit_contexts(void)
+{
+  anthy_free_allocator(context_ator);
+}
+
+static void
+release_prediction(struct prediction_cache *pc)
+{
+  int i;
+  if (pc->str.str) {
+    free(pc->str.str);
+    pc->str.str = NULL;
+  }
+  if (pc->predictions) {
+    for (i = 0; i < pc->nr_prediction; ++i) {
+      anthy_free_xstr(pc->predictions[i].src_str);
+      anthy_free_xstr(pc->predictions[i].str);
+    }
+    free(pc->predictions);
+    pc->predictions = NULL;
+  }
+}
+
+void
+anthy_release_segment_list(struct anthy_context *ac)
+{
+  int i, sc;
+  sc = ac->seg_list.nr_segments;
+  for (i = 0; i < sc; i++) {
+    pop_back_seg_ent(ac);
+  }
+  ac->seg_list.nr_segments = 0;
+}
+
+/* resetではcontextのために確保されたリソースを全て解放する */
+void
+anthy_do_reset_context(struct anthy_context *ac)
+{
+  /* まず辞書セッションを解放 */
+  if (ac->dic_session) {
+    anthy_dic_release_session(ac->dic_session);
+    ac->dic_session = NULL;
+  }
+  if (!ac->str.str) {
+    /* 文字列が設定されていなければ解放すべき物はもう無い */
+    return ;
+  }
+  free(ac->str.str);
+  ac->str.str = NULL;
+  anthy_release_split_context(&ac->split_info);
+  anthy_release_segment_list(ac);
+
+  /* 予測された文字列の解放 */
+  release_prediction(&ac->prediction);
+}
+
+void
+anthy_do_release_context(struct anthy_context *ac)
+{
+  anthy_sfree(context_ator, ac);
+}
+
+static void
+make_candidates(struct anthy_context *ac, int from, int from2, int is_reverse)
+{
+  int i;
+  int len = ac->str.len;
+
+  /* 文節の境界を設定 */
+  /* from と from2の間に境界を作ることを禁止する */
+  anthy_mark_border(&ac->split_info, from, from2, len);
+  create_segment_list(ac, from, len);
+  anthy_sort_metaword(&ac->seg_list);
+
+  /* 候補を列挙 */
+  for (i = 0; i < ac->seg_list.nr_segments; i++) {
+    anthy_do_make_candidates(&ac->split_info,
+			     anthy_get_nth_segment(&ac->seg_list, i),
+			     is_reverse);
+  }
+  /* 候補をソート */
+  anthy_sort_candidate(&ac->seg_list, 0);
+}
+
+int
+anthy_do_context_set_str(struct anthy_context *ac, xstr *s, int is_reverse)
+{
+  int i;
+
+  /* 文字列をコピー(一文字分余計にして0をセット) */
+  ac->str.str = (xchar *)malloc(sizeof(xchar)*(s->len+1));
+  anthy_xstrcpy(&ac->str, s);
+  ac->str.str[s->len] = 0;
+
+  /* splitterの初期化*/
+  anthy_init_split_context(&ac->str, &ac->split_info, is_reverse);
+
+  /* 解の候補を作成 */
+  make_candidates(ac, 0, 0, is_reverse);
+  
+  /* 最初に設定した文節境界を覚えておく */
+  for (i = 0; i < ac->seg_list.nr_segments; i++) {
+    struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
+    ac->split_info.ce[s->from].initial_seg_len = s->len;
+  }
+
+  return 0;
+}
+
+void
+anthy_do_resize_segment(struct anthy_context *ac,
+			int nth, int resize)
+{
+  int i;
+  int index, len, sc;
+
+  /* resizeが可能か検査する */
+  if (nth >= ac->seg_list.nr_segments) {
+    return ;
+  }
+  index = get_nth_segment_index(ac, nth);
+  len = get_nth_segment_len(ac, nth);
+  if (index + len + resize > ac->str.len) {
+    return ;
+  }
+  if (len + resize < 1) {
+    return ;
+  }
+
+  /* nth以降のseg_entを解放する */
+  sc = ac->seg_list.nr_segments;
+  for (i = nth; i < sc; i++) {
+    pop_back_seg_ent(ac);
+  }
+
+  /* resizeしたseg_borderをマークする */
+  /* 現在のマークを消して新しいマークをつける */
+  ac->split_info.ce[index+len].seg_border = 0;
+  ac->split_info.ce[ac->str.len].seg_border = 1;
+  for (i = index+len+resize+1; i < ac->str.len; i++) {
+    ac->split_info.ce[i].seg_border = 0;
+  }
+  ac->split_info.ce[index+len+resize].seg_border = 1;
+  for (i = index; i < ac->str.len; i++) {
+    ac->split_info.ce[i].best_mw = NULL;
+  }
+
+  /* 解の候補を作成 */
+  make_candidates(ac, index, index+len+resize, 0);
+}
+
+/*
+ * n番めの文節を取得する、無い場合にはNULLを返す
+ */
+struct seg_ent *
+anthy_get_nth_segment(struct segment_list *sl, int n)
+{
+  int i;
+  struct seg_ent *se;
+  if (n >= sl->nr_segments ||
+      n < 0) {
+    return NULL;
+  }
+  for (i = 0, se = sl->list_head.next; i < n; i++, se = se->next);
+  return se;
+}
+
+int
+anthy_do_set_prediction_str(struct anthy_context *ac, xstr* xs)
+{
+  struct prediction_cache* prediction = &ac->prediction;
+  int nr_prediction;
+
+  /* まず辞書セッションを解放 */
+  if (ac->dic_session) {
+    anthy_dic_release_session(ac->dic_session);
+    ac->dic_session = NULL;
+  }
+  /* 予測された文字列の解放 */
+  release_prediction(&ac->prediction);
+
+  /* 辞書セッションの開始 */
+  if (!ac->dic_session) {
+    ac->dic_session = anthy_dic_create_session();
+    if (!ac->dic_session) {
+      return -1;
+    }
+  }
+
+  prediction->str.str = (xchar*)malloc(sizeof(xchar*)*(xs->len+1));
+  anthy_xstrcpy(&prediction->str, xs);
+  prediction->str.str[xs->len]=0;
+
+  nr_prediction = anthy_traverse_record_for_prediction(xs, NULL);
+  prediction->nr_prediction = nr_prediction;
+
+  if (nr_prediction) {
+    prediction->predictions = (struct prediction_t*)malloc(sizeof(struct prediction_t) *
+							   nr_prediction);
+    anthy_traverse_record_for_prediction(xs, prediction->predictions);
+  }
+  return 0;
+}
+
+static const char *
+get_change_state(struct anthy_context *ac)
+{
+  int resize = 0, cand_change = 0;
+  int i;
+  for (i = 0; i < ac->seg_list.nr_segments; i++) {
+    struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
+    if (ac->split_info.ce[s->from].initial_seg_len != s->len) {
+      resize = 1;
+    }
+    if (s->committed > 0) {
+      cand_change = 1;
+    }
+  }
+  /**/
+  if (resize && cand_change) {
+    return "SC";
+  }
+  if (resize) {
+    return "S";
+  }
+  if (cand_change) {
+    return "C";
+  }
+  return "-";
+}
+
+static void
+write_history(FILE *fp, struct anthy_context *ac)
+{
+  int i;
+  /* 読み */
+  fprintf(fp, "|");
+  for (i = 0; i < ac->seg_list.nr_segments; i++) {
+    struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
+    char *c = anthy_xstr_to_cstr(&s->str, ANTHY_EUC_JP_ENCODING);
+    fprintf(fp, "%s|", c);
+    free(c);
+  }
+  fprintf(fp, " |");
+  /* 結果 */
+  for (i = 0; i < ac->seg_list.nr_segments; i++) {
+    struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
+    char *c;
+    /**/
+    if (s->committed < 0) {
+      fprintf(fp, "?|");
+      continue ;
+    }
+    c = anthy_xstr_to_cstr(&s->cands[s->committed]->str,
+			   ANTHY_EUC_JP_ENCODING);
+    fprintf(fp, "%s|", c);
+    free(c);
+  }
+}
+
+void
+anthy_save_history(const char *fn, struct anthy_context *ac)
+{
+  FILE *fp;
+  struct stat st;
+  if (!fn) {
+    return ;
+  }
+  fp = fopen(fn, "a");
+  if (!fp) {
+    return ;
+  }
+  if (stat(fn, &st) ||
+      st.st_size > HISTORY_FILE_LIMIT) {
+    fclose(fp);
+    return ;
+  }
+  /**/
+  fprintf(fp, "anthy-%s ", anthy_get_version_string());
+  fprintf(fp, "%s ", get_change_state(ac));
+  write_history(fp, ac);
+  fprintf(fp, "\n");
+  fclose(fp);
+  /**/
+  chmod(fn, S_IREAD | S_IWRITE);
+}
+
+/** 候補を表示する */
+void
+anthy_print_candidate(struct cand_ent *ce)
+{
+  int mod = (ce->score % 1000);
+  int seg_score = 0;
+
+  if (ce->mw) {
+    seg_score = ce->mw->score;
+  }
+  anthy_putxstr(&ce->str);
+  printf(":(");
+  /*if (ce->nr_words == 1) {printf("%d,", ce->elm[0].id);    }*/
+  if (ce->flag & CEF_OCHAIRE) {
+    putchar('o');
+  }
+  if (ce->flag & CEF_SINGLEWORD) {
+    putchar('1');
+  }
+  if (ce->flag & CEF_GUESS) {
+    putchar('g');
+  }
+  if (ce->flag & (CEF_KATAKANA | CEF_HIRAGANA)) {
+    putchar('N');
+  }
+  if (ce->flag & CEF_USEDICT) {
+    putchar('U');
+  }
+  if (ce->flag & CEF_CONTEXT) {
+    putchar('C');
+  }
+  printf(",%d,", seg_score);
+
+    
+  if (ce->mw) {
+    printf("%s,%d", anthy_seg_class_sym(ce->mw->seg_class),
+	   ce->mw->struct_score);
+  } else {
+    putchar('-');
+  }
+  printf(")");
+
+  if (ce->score >= 1000) {
+    printf("%d,", ce->score/1000);
+    if (mod < 100) {
+      printf("0");
+    }
+    if (mod < 10) {
+      printf("0");
+    }
+    printf("%d ", mod);
+  } else {
+    printf("%d ", ce->score);
+  }
+}
+
+/** 文節を表示する */
+static void
+print_segment(struct seg_ent *e)
+{
+  int i;
+
+  anthy_putxstr(&e->str);
+  printf("(");
+  for ( i = 0 ; i < e->nr_cands ; i++) {
+    anthy_print_candidate(e->cands[i]);
+    printf(",");
+  }
+  printf(")");
+  printf(":\n");
+}
+
+/** コンテキストを表示する */
+void
+anthy_do_print_context(struct anthy_context *ac, int encoding)
+{
+  int i;
+  struct char_ent *ce;
+  anthy_xstr_set_print_encoding(encoding);
+
+  ce = ac->split_info.ce;
+  if (!ce) {
+    printf("(invalid)\n");
+    return ;
+  }
+  /* 各文字を表示する */
+  for (i = 0, ce = ac->split_info.ce; i < ac->str.len; i++, ce++) {
+    if (ce->seg_border) {
+      printf("|");
+    }
+    anthy_putxchar(*(ce->c));
+  }
+  printf("\n");
+  /* 各文節を表示する */
+  for (i = 0; i < ac->seg_list.nr_segments; i++) {
+    print_segment(anthy_get_nth_segment(&ac->seg_list, i));
+  }
+  printf("\n");
+}
+
+void
+anthy_release_cand_ent(struct cand_ent *ce)
+{
+  if (ce->elm) {
+    free(ce->elm);
+  }
+  if (&ce->str) {
+    anthy_free_xstr_str(&ce->str);
+  }
+  free(ce);
+}
+
+int
+anthy_do_set_personality(const char *id)
+{
+  if (current_personality) {
+    /* すでに設定されてる */
+    return -1;
+  }
+  if (!id || strchr(id, '/')) {
+    return -1;
+  }
+  current_personality = strdup(id);
+  anthy_dic_set_personality(current_personality);
+  return 0;
+}
+
+void
+anthy_init_personality(void)
+{
+  current_personality = NULL;
+}
+
+void
+anthy_quit_personality(void)
+{
+  if (current_personality) {
+    free(current_personality);
+    current_personality = NULL;
+  }
+}