diff options
Diffstat (limited to 'src-util/morph-main.c')
-rw-r--r-- | src-util/morph-main.c | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/src-util/morph-main.c b/src-util/morph-main.c new file mode 100644 index 0000000..17b28d0 --- /dev/null +++ b/src-util/morph-main.c @@ -0,0 +1,154 @@ +/* コーパスから遷移行列を作るためのコード + * + * Copyright (C) 2005-2006 TABATA Yusuke + * Copyright (C) 2005-2006 YOSHIDA Yuichi + * + */ +/* + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <anthy/anthy.h> +/* for print_context_info() */ +#include <anthy/convdb.h> + +struct test_context { + anthy_context_t ac; +}; + +static void read_file(struct test_context *tc, const char *fn); +extern void anthy_reload_record(void); + +int verbose; +int use_utf8; + +/**/ +static void +init_test_context(struct test_context *tc) +{ + tc->ac = anthy_create_context(); + anthy_set_reconversion_mode(tc->ac, ANTHY_RECONVERT_ALWAYS); + if (use_utf8) { + anthy_context_set_encoding(tc->ac, ANTHY_UTF8_ENCODING); + } + anthy_reload_record(); +} + +static void +conv_sentence(struct test_context *tc, const char *str) +{ + anthy_set_string(tc->ac, str); + if (verbose) { + anthy_print_context(tc->ac); + } + /**/ + print_context_info(tc->ac, NULL); +} + +/* 行末の改行を削除 */ +static void +chomp(char *buf) +{ + int len = strlen(buf); + while (len > 0) { + char c = buf[len - 1]; + if (c == '\n' || c == '\r' || c == ' ' || c == '\t') { + buf[len - 1] = 0; + len --; + } else { + return ; + } + } +} + +static void +read_fp(struct test_context *tc, FILE *fp) +{ + char buf[1024]; + while (fgets(buf, 1024, fp)) { + if (buf[0] == '#') { + continue; + } + + if (!strncmp(buf, "\\include ", 9)) { + read_file(tc, &buf[9]); + continue; + } + chomp(buf); + conv_sentence(tc, buf); + } +} + +static void +read_file(struct test_context *tc, const char *fn) +{ + FILE *fp; + fp = fopen(fn, "r"); + if (!fp) { + printf("failed to open (%s)\n", fn); + return ; + } + read_fp(tc, fp); + fclose(fp); +} + +static void +print_usage(void) +{ + printf("morphological analyzer\n"); + printf(" $ ./morphological analyzer < [text-file]\n or"); + printf(" $ ./morphological analyzer [text-file]\n"); + exit(0); +} + +static void +parse_args(int argc, char **argv) +{ + int i; + for (i = 1; i < argc; i++) { + char *arg = argv[i]; + if (!strcmp(arg, "--utf8")) { + use_utf8 = 1; + } else if (arg[i] == '-') { + print_usage(); + } + } +} + +int +main(int argc, char **argv) +{ + struct test_context tc; + int i, nr; + anthy_init(); + anthy_set_personality(""); + init_test_context(&tc); + + /*read_file(&tc, "index.txt");*/ + parse_args(argc, argv); + + nr = 0; + for (i = 1; i < argc; i++) { + read_file(&tc, argv[i]); + nr ++; + } + if (nr == 0) { + read_fp(&tc, stdin); + } + + return 0; +} |