summaryrefslogtreecommitdiff
path: root/src-util/morph-main.c
diff options
context:
space:
mode:
Diffstat (limited to 'src-util/morph-main.c')
-rw-r--r--src-util/morph-main.c154
1 files changed, 154 insertions, 0 deletions
diff --git a/src-util/morph-main.c b/src-util/morph-main.c
new file mode 100644
index 0000000..17b28d0
--- /dev/null
+++ b/src-util/morph-main.c
@@ -0,0 +1,154 @@
+/* コーパスから遷移行列を作るためのコード
+ *
+ * Copyright (C) 2005-2006 TABATA Yusuke
+ * Copyright (C) 2005-2006 YOSHIDA Yuichi
+ *
+ */
+/*
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <anthy/anthy.h>
+/* for print_context_info() */
+#include <anthy/convdb.h>
+
+struct test_context {
+ anthy_context_t ac;
+};
+
+static void read_file(struct test_context *tc, const char *fn);
+extern void anthy_reload_record(void);
+
+int verbose;
+int use_utf8;
+
+/**/
+static void
+init_test_context(struct test_context *tc)
+{
+ tc->ac = anthy_create_context();
+ anthy_set_reconversion_mode(tc->ac, ANTHY_RECONVERT_ALWAYS);
+ if (use_utf8) {
+ anthy_context_set_encoding(tc->ac, ANTHY_UTF8_ENCODING);
+ }
+ anthy_reload_record();
+}
+
+static void
+conv_sentence(struct test_context *tc, const char *str)
+{
+ anthy_set_string(tc->ac, str);
+ if (verbose) {
+ anthy_print_context(tc->ac);
+ }
+ /**/
+ print_context_info(tc->ac, NULL);
+}
+
+/* 行末の改行を削除 */
+static void
+chomp(char *buf)
+{
+ int len = strlen(buf);
+ while (len > 0) {
+ char c = buf[len - 1];
+ if (c == '\n' || c == '\r' || c == ' ' || c == '\t') {
+ buf[len - 1] = 0;
+ len --;
+ } else {
+ return ;
+ }
+ }
+}
+
+static void
+read_fp(struct test_context *tc, FILE *fp)
+{
+ char buf[1024];
+ while (fgets(buf, 1024, fp)) {
+ if (buf[0] == '#') {
+ continue;
+ }
+
+ if (!strncmp(buf, "\\include ", 9)) {
+ read_file(tc, &buf[9]);
+ continue;
+ }
+ chomp(buf);
+ conv_sentence(tc, buf);
+ }
+}
+
+static void
+read_file(struct test_context *tc, const char *fn)
+{
+ FILE *fp;
+ fp = fopen(fn, "r");
+ if (!fp) {
+ printf("failed to open (%s)\n", fn);
+ return ;
+ }
+ read_fp(tc, fp);
+ fclose(fp);
+}
+
+static void
+print_usage(void)
+{
+ printf("morphological analyzer\n");
+ printf(" $ ./morphological analyzer < [text-file]\n or");
+ printf(" $ ./morphological analyzer [text-file]\n");
+ exit(0);
+}
+
+static void
+parse_args(int argc, char **argv)
+{
+ int i;
+ for (i = 1; i < argc; i++) {
+ char *arg = argv[i];
+ if (!strcmp(arg, "--utf8")) {
+ use_utf8 = 1;
+ } else if (arg[i] == '-') {
+ print_usage();
+ }
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ struct test_context tc;
+ int i, nr;
+ anthy_init();
+ anthy_set_personality("");
+ init_test_context(&tc);
+
+ /*read_file(&tc, "index.txt");*/
+ parse_args(argc, argv);
+
+ nr = 0;
+ for (i = 1; i < argc; i++) {
+ read_file(&tc, argv[i]);
+ nr ++;
+ }
+ if (nr == 0) {
+ read_fp(&tc, stdin);
+ }
+
+ return 0;
+}