summaryrefslogtreecommitdiff
path: root/src-util/rkconv.c
diff options
context:
space:
mode:
Diffstat (limited to 'src-util/rkconv.c')
-rw-r--r--src-util/rkconv.c791
1 files changed, 791 insertions, 0 deletions
diff --git a/src-util/rkconv.c b/src-util/rkconv.c
new file mode 100644
index 0000000..cd627bc
--- /dev/null
+++ b/src-util/rkconv.c
@@ -0,0 +1,791 @@
+/*
+ * roma kana converter
+ *
+ * 理解するためには,構文解析について書かれたテキストで
+ * SLR(1)について調べるよろし.
+ *
+ * $Id: rkconv.c,v 1.16 2002/11/16 03:35:21 yusuke Exp $
+ *
+ * Copyright (C) 2001-2002 UGAWA Tomoharu
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "rkconv.h"
+
+#define MAX_CONV_CHARS 1024
+#define MAX_MAP_PALETTE 10
+#define SPECIAL_CHAR '\xff'
+#define TERMINATE_CHAR '\n'
+
+/* break_into_roman */
+struct break_roman {
+ int decided_length;
+ int pending_size;
+ char* pending;
+};
+
+struct rk_rule_set
+{
+ struct rk_rule* rules;
+ int nr_rules;
+};
+
+struct next_array
+{
+ struct rk_slr_closure* array[128];
+};
+
+struct rk_slr_closure
+{
+ char* prefix;
+ struct rk_rule* r;
+ int is_reduction_only;
+ struct next_array *next;
+};
+
+struct rk_map
+{
+ struct rk_rule_set* rs;
+ struct rk_slr_closure* root_cl;
+ int refcount;
+};
+
+struct rk_conv_context
+{
+ struct rk_map* map;
+ int map_no;
+ int old_map_no;
+ struct rk_slr_closure* cur_state;
+ char cur_str[MAX_CONV_CHARS + 1];
+ int cur_str_len;
+ struct rk_map* map_palette[MAX_MAP_PALETTE];
+ struct break_roman *brk_roman;
+};
+
+static void
+rk_rule_set_free(struct rk_rule_set* rs)
+{
+ int i;
+
+ for (i = 0; i < rs->nr_rules; i++) {
+ free((void *) rs->rules[i].lhs);
+ free((void *) rs->rules[i].rhs);
+ free((void *) rs->rules[i].follow);
+ }
+ free(rs->rules);
+ free(rs);
+}
+
+static int
+rk_rule_copy_to(const struct rk_rule* from, struct rk_rule* to)
+{
+ char *lhs, *rhs;
+
+ if ((lhs = strdup(from->lhs))) {
+ if ((rhs = strdup(from->rhs))) {
+ if (!(to->follow = from->follow)
+ || (to->follow = strdup(from->follow))) {
+ to->lhs = lhs;
+ to->rhs = rhs;
+ return 0;
+ }
+ free(rhs);
+ }
+ free(lhs);
+ }
+ to->lhs = NULL;
+ to->rhs = NULL;
+ return -1;
+}
+
+static struct rk_rule_set*
+rk_rule_set_create(const struct rk_rule* rules)
+{
+ int i;
+ struct rk_rule_set* rs;
+
+ rs = (struct rk_rule_set*) malloc(sizeof(struct rk_rule_set));
+ if (rs == NULL) {
+ return NULL;
+ }
+
+ for (i = 0; rules[i].lhs != NULL; i++);
+ rs->nr_rules = i;
+ rs->rules = (struct rk_rule*) malloc(sizeof(struct rk_rule) * i);
+ if (rs->rules == NULL) {
+ free(rs);
+ return NULL;
+ }
+ for (i = 0; i < rs->nr_rules; i++) {
+ if (rk_rule_copy_to(rules + i, rs->rules + i) != 0) {
+ rs->nr_rules = i;
+ rk_rule_set_free(rs);
+ return NULL;
+ }
+ }
+ return rs;
+}
+
+
+static void
+rk_slr_closure_free(struct rk_slr_closure* cl)
+{
+ int i;
+ free(cl->prefix);
+ if (cl->next) {
+ for (i = 0; i < 128; i++) {
+ if (cl->next->array[i]) {
+ rk_slr_closure_free(cl->next->array[i]);
+ }
+ }
+ free(cl->next);
+ }
+ free(cl);
+}
+
+static struct next_array *
+alloc_next_array(void)
+{
+ int i;
+ struct next_array *na = malloc(sizeof(struct next_array));
+ for (i = 0; i < 128; i++) {
+ na->array[i] = NULL;
+ }
+ return na;
+}
+
+static struct rk_slr_closure*
+rk_slr_closure_create(struct rk_rule_set* rs,
+ const char* prefix, int pflen)
+{
+ struct rk_slr_closure* cl;
+ int i;
+
+ cl = (struct rk_slr_closure*) malloc(sizeof(struct rk_slr_closure));
+ if (cl == NULL) {
+ return NULL;
+ }
+
+ if (prefix != NULL) {
+ cl->prefix = (char*) malloc(pflen + 1);
+ if (cl->prefix == NULL) {
+ free(cl);
+ return NULL;
+ }
+ memcpy(cl->prefix, prefix, pflen);
+ cl->prefix[pflen] = '\0';
+ } else {
+ cl->prefix = strdup("");
+ if (cl->prefix == NULL) {
+ free(cl);
+ return NULL;
+ }
+ }
+
+ cl->r = NULL;
+ cl->is_reduction_only = 1;
+ cl->next = NULL;
+
+ for (i = 0; i < rs->nr_rules; i++) {
+ struct rk_rule* r;
+ int c;
+ r = rs->rules + i;
+ if (pflen > 0 && strncmp(prefix, r->lhs, pflen) != 0)
+ continue;
+
+ c = r->lhs[pflen] & 0x7f;
+ if (c == '\0') { /* reduce */
+ cl->r = r;
+ if (r->follow != NULL)
+ cl->is_reduction_only = 0;
+ } else {
+ cl->is_reduction_only = 0;
+ if (cl->next == NULL) {
+ cl->next = alloc_next_array();
+ }
+ if (cl->next->array[c] == NULL) {
+ cl->next->array[c] = rk_slr_closure_create(rs, r->lhs,
+ pflen + 1);
+ if (cl->next->array[c] == NULL) {
+ rk_slr_closure_free(cl);
+ return NULL;
+ }
+ }
+ }
+ }
+
+ return cl;
+}
+
+struct rk_map*
+rk_map_create(const struct rk_rule* rules)
+{
+ struct rk_map* map;
+
+ map = (struct rk_map*) malloc(sizeof(struct rk_map));
+ if (map == NULL) {
+ return NULL;
+ }
+
+ map->rs = rk_rule_set_create(rules);
+ if (map->rs == NULL) {
+ free(map);
+ return NULL;
+ }
+
+ map->root_cl = rk_slr_closure_create(map->rs, NULL, 0);
+ if (map->root_cl == NULL) {
+ rk_rule_set_free(map->rs);
+ free(map);
+ return NULL;
+ }
+
+ map->refcount = 0;
+
+ return map;
+}
+
+int
+rk_map_free(struct rk_map* map)
+{
+ if (map->refcount > 0) {
+ return -1;
+ }
+ rk_rule_set_free(map->rs);
+ rk_slr_closure_free(map->root_cl);
+ free(map);
+ return 0;
+}
+
+
+static int
+rk_reduce(struct rk_conv_context* cc,
+ struct rk_slr_closure* cur_state, char* buf, int size)
+{
+ struct rk_rule* r;
+ const char* p;
+ char* q, * end;
+
+ r = cur_state->r;
+ if (r == NULL || size <= 0)
+ return 0;
+
+ if (r->rhs[0] == SPECIAL_CHAR) {
+ if (r->rhs[1] == 'o')
+ rk_select_registered_map(cc, cc->old_map_no);
+ else {
+ int mapn = r->rhs[1] - '0';
+ rk_select_registered_map(cc, mapn);
+ }
+ return 0;
+ }
+
+ p = r->rhs;
+ q = buf;
+ end = buf + size - 1;
+ while (*p && q < end)
+ *q ++ = *p++;
+ *q = '\0';
+
+ return q - buf;
+}
+
+static void
+rk_convert_iterative(struct rk_conv_context* cc, int c,
+ char* buf, int size)
+{
+ struct rk_slr_closure* cur_state = cc->cur_state;
+
+ if (cc->map == NULL)
+ return;
+ if (size > 0)
+ *buf = '\0';
+ AGAIN:
+
+ if (cur_state->next && cur_state->next->array[c]) {
+ struct rk_slr_closure* next_state = cur_state->next->array[c];
+
+ if (next_state->is_reduction_only) {
+ rk_reduce(cc, next_state, buf, size);
+ if (cc->map == NULL) {
+ cc->cur_state = NULL;
+ return;
+ }
+ cur_state = cc->map->root_cl;
+ } else
+ cur_state = next_state;
+ } else if (cur_state->r != NULL &&
+ (cur_state->r->follow == NULL ||
+ strchr(cur_state->r->follow, c))) {
+ int len;
+
+ len = rk_reduce(cc, cur_state, buf, size);
+ if (cc->map == NULL) {
+ cc->cur_state = NULL;
+ return;
+ }
+ cur_state = cc->map->root_cl;
+ buf += len;
+ size -= len;
+ goto AGAIN;
+ } else if (cur_state != cc->map->root_cl) {
+ cur_state = cc->map->root_cl;
+ goto AGAIN;
+ }
+ cc->cur_state = cur_state;
+}
+
+static void
+brk_roman_init(struct rk_conv_context *rkctx)
+{
+ rkctx->brk_roman= (struct break_roman *)malloc(sizeof(struct break_roman));
+ rkctx->brk_roman->pending=NULL;
+ rkctx->brk_roman->pending_size=0;
+}
+
+static void
+brk_roman_free(struct rk_conv_context *rkctx)
+{
+ struct break_roman *br=rkctx->brk_roman;
+
+ if(!br)
+ return;
+
+ if (br->pending) {
+ free(br->pending);
+ }
+ free(br);
+}
+
+
+static void
+brk_roman_save_pending(struct rk_conv_context *rkctx)
+{
+ struct break_roman *br=rkctx->brk_roman;
+ int len;
+
+ if(!br)
+ return;
+
+ len = rk_get_pending_str(rkctx,NULL,0);
+
+ if(br->pending_size < len){
+ br->pending_size=len;
+ if(br->pending)
+ free(br->pending);
+ br->pending=(char *)malloc(len);
+ }
+
+ rk_get_pending_str(rkctx,br->pending,len);
+}
+
+
+static void
+brk_roman_set_decided_len(struct rk_conv_context *rkctx,int len)
+{
+ struct break_roman *br=rkctx->brk_roman;
+
+ if(!br)
+ return;
+
+ br->decided_length=len;
+}
+
+static void
+brk_roman_flush(struct rk_conv_context *rkctx)
+{
+ struct break_roman *br=rkctx->brk_roman;
+
+ if(!br)
+ return;
+
+ if(br->pending)
+ br->pending[0]='\0';
+ br->decided_length=0;
+}
+
+struct rk_conv_context*
+rk_context_create(int brk)
+{
+ struct rk_conv_context* cc;
+
+ cc = (struct rk_conv_context*) malloc(sizeof(struct rk_conv_context));
+ if (cc == NULL) {
+ return NULL;
+ }
+
+ cc->map = NULL;
+ memset(&cc->map_palette, 0, sizeof(struct rk_map*) * MAX_MAP_PALETTE);
+ cc->map_no = -1;
+ cc->old_map_no = -1;
+ cc->brk_roman = NULL;
+ if (brk) {
+ brk_roman_init(cc);
+ }
+ rk_flush(cc);
+
+ return cc;
+}
+
+void
+rk_context_free(struct rk_conv_context* cc)
+{
+ int i;
+
+ brk_roman_free(cc);
+ rk_select_map(cc, NULL);
+ for (i = 0; i < MAX_MAP_PALETTE; i++) {
+ rk_register_map(cc, i, NULL);
+ }
+ free(cc);
+}
+
+int
+rk_push_key(struct rk_conv_context* cc, int c)
+{
+ int increased_length;
+ c &= 0x7f;
+ if (cc->cur_state == NULL)
+ return -1;
+
+ brk_roman_save_pending(cc);
+ rk_convert_iterative(cc, c,
+ cc->cur_str + cc->cur_str_len,
+ MAX_CONV_CHARS + 1 - cc->cur_str_len);
+ increased_length = strlen(cc->cur_str + cc->cur_str_len);
+ brk_roman_set_decided_len(cc,increased_length);
+ cc->cur_str_len += increased_length;
+
+ return 0;
+}
+
+void
+rk_terminate(struct rk_conv_context* cc)
+{
+ rk_push_key(cc, TERMINATE_CHAR);
+}
+
+void
+rk_flush(struct rk_conv_context* cc)
+{
+ brk_roman_flush(cc);
+ cc->cur_state = (cc->map == NULL) ? NULL : cc->map->root_cl;
+ cc->cur_str[0] = '\0';
+ cc->cur_str_len = 0;
+}
+
+int
+rk_partial_result(struct rk_conv_context* cc, char* buf, int size)
+{
+ int nr_rules = cc->map->rs->nr_rules;
+ int i, pending_len;
+ char *pending_buf;
+ struct rk_rule *rule = cc->map->rs->rules;
+
+ pending_len = rk_get_pending_str(cc, NULL, 0);
+ if (pending_len == 0) {
+ return 0;
+ }
+ pending_buf = alloca(pending_len);
+ rk_get_pending_str(cc, pending_buf, pending_len);
+
+ for (i = 0; i < nr_rules; i++) {
+ if (!strcmp(rule[i].lhs, pending_buf)) {
+ const char *res = rule[i].rhs;
+ if (size <= 0) {
+ return strlen(res) + 1;
+ }
+ return snprintf(buf, size, "%s", res);
+ }
+ }
+ return 0;
+}
+
+int
+rk_result(struct rk_conv_context* cc, char* buf, int size)
+{
+ int copy_len;
+
+ if (size <= 0)
+ return cc->cur_str_len;
+ copy_len = (size - 1 < cc->cur_str_len) ? size - 1 : cc->cur_str_len;
+ memcpy(buf, cc->cur_str, copy_len);
+ buf[copy_len] = '\0';
+ if (copy_len < cc->cur_str_len)
+ memmove(cc->cur_str, cc->cur_str + copy_len,
+ cc->cur_str_len - copy_len + 1);
+ cc->cur_str_len -= copy_len;
+
+ return cc->cur_str_len;
+}
+
+struct rk_map*
+rk_select_map(struct rk_conv_context* cc, struct rk_map* map)
+{
+ struct rk_map* old_map;
+
+ cc->old_map_no = cc->map_no;
+ old_map = cc->map;
+ if (old_map) {
+ old_map->refcount--;
+ }
+
+ cc->map = map;
+ if (cc->map == NULL) {
+ cc->cur_state = NULL;
+ } else {
+ map->refcount++;
+ cc->cur_state = map->root_cl;
+ rk_flush(cc);
+ }
+ cc->map_no = -1;
+
+ return old_map;
+}
+
+
+int
+rk_get_pending_str(struct rk_conv_context* cc, char* buf, int size)
+{
+ const char* p, *end;
+ char *q;
+
+ p = (cc->cur_state == NULL) ? "" : cc->cur_state->prefix;
+
+ if (size <= 0)
+ return strlen(p) + 1;
+
+ q = buf;
+ end = buf + size - 1;
+ while (*p && q < end)
+ *q++ = *p++;
+ *q = '\0';
+ return strlen(p);
+}
+
+struct rk_map*
+rk_register_map(struct rk_conv_context* cc, int mapn, struct rk_map* map)
+{
+ struct rk_map* old_map;
+
+ if (mapn < 0 || MAX_MAP_PALETTE <= mapn)
+ return NULL;
+
+ old_map = cc->map_palette[mapn];
+ if (old_map)
+ old_map->refcount--;
+
+ cc->map_palette[mapn] = map;
+ if (map)
+ map->refcount++;
+
+ return old_map;
+}
+
+void
+rk_select_registered_map(struct rk_conv_context* cc, int mapn)
+{
+ if (0 <= mapn && mapn < 0 + MAX_MAP_PALETTE) {
+ rk_select_map(cc, cc->map_palette[mapn]);
+ cc->map_no = mapn;
+ } else {
+ rk_select_map(cc, NULL);
+ cc->map_no = -1;
+ }
+}
+
+int
+rk_selected_map(struct rk_conv_context* cc)
+{
+ return cc->map_no;
+}
+
+/* some utitlity functions to merge rk_rule */
+static int
+rk_rule_length(const struct rk_rule* rules)
+{
+ int i;
+ for (i = 0; rules[i].lhs != NULL; i++);
+ return i;
+}
+
+static int
+rk_my_strcmp(const char *s1, const char *s2)
+{
+ while (*s1 == *s2) {
+ if (!(*s1)) {
+ return 0;
+ }
+ s1++;
+ s2++;
+ }
+ return (*s1) - (*s2);
+}
+
+static int
+rk_rule_compare_func(const void *p, const void *q)
+{
+ const struct rk_rule *r1, *r2;
+ r1 = p;
+ r2 = q;
+ return rk_my_strcmp(r1->lhs, r2->lhs);
+}
+
+/*
+ * ソートされたrk_ruleを作って返す
+ */
+static struct rk_rule *
+rk_sort_rule(const struct rk_rule *src)
+{
+ struct rk_rule* rules;
+ int size = rk_rule_length(src);
+ int i, ret;
+
+ rules = (struct rk_rule*) malloc(sizeof(struct rk_rule) * (size + 1));
+ if (!rules) {
+ return NULL;
+ }
+ for (i = 0; i < size; i++) {
+ ret = rk_rule_copy_to (&src[i], &rules[i]);
+ if (ret == -1) {
+ goto ERROR;
+ }
+ }
+ qsort(rules, size, sizeof(struct rk_rule),
+ rk_rule_compare_func);
+
+ rules[i].lhs = NULL;
+ return rules;
+
+ ERROR:
+ rules[i].lhs = NULL;
+ rk_rules_free(rules);
+ free(rules);
+ return NULL;
+}
+
+/* 一つ目のルールが優先される */
+static struct rk_rule*
+rk_do_merge_rules(const struct rk_rule* r1,
+ const struct rk_rule* r2)
+{
+ int size;
+ int ret;
+ struct rk_rule* rules;
+ struct rk_rule* p, *q;
+ struct rk_rule* r;
+ struct rk_rule* tmp;
+ int i;
+
+ size = rk_rule_length(r1) + rk_rule_length(r2);
+ rules = (struct rk_rule*) malloc(sizeof(struct rk_rule) * (size + 1));
+ if (rules == NULL) {
+ return NULL;
+ }
+
+ r = rules;
+ p = (struct rk_rule *)r1;
+ q = (struct rk_rule *)r2;
+ /* ソート済の列に対してマージソートをする */
+ for (i = 0; i < size; i++) {
+ if (p->lhs && q->lhs) {
+ /* p,qを比較してどちらから取り出すかを選ぶ */
+ ret = rk_my_strcmp(p->lhs, q->lhs);
+ if (ret > 0) {
+ tmp = q;
+ q++;
+ } else if (ret < 0) {
+ tmp = p;
+ p++;
+ } else {
+ /* キーが両方同じなのでqの方を優先する */
+ tmp = q;
+ p++;q++;
+ }
+ } else if (p->lhs) {
+ tmp = p;
+ p++;
+ } else if (q->lhs) {
+ tmp = q;
+ q++;
+ } else {
+ continue;
+ }
+ /* ここまでに選択したものをcopyする */
+ ret = rk_rule_copy_to(tmp, r);
+ if (ret == -1) {
+ r->lhs = NULL;
+ goto ERROR;
+ }
+ r++;
+ }
+ r->lhs = NULL;
+
+ return rules;
+
+ ERROR:
+ rk_rules_free (rules);
+ return NULL;
+}
+
+struct rk_rule*
+rk_merge_rules(const struct rk_rule* r1,
+ const struct rk_rule* r2)
+{
+ struct rk_rule *t1, *t2;
+ struct rk_rule* rules;
+
+ t1 = rk_sort_rule(r1);
+ if (!t1) {
+ return NULL;
+ }
+ t2 = rk_sort_rule(r2);
+ if (!t2) {
+ rk_rules_free(t1);
+ return NULL;
+ }
+ rules = rk_do_merge_rules(t1, t2);
+ rk_rules_free(t1);
+ rk_rules_free(t2);
+ return rules;
+
+}
+
+void
+rk_rules_free(struct rk_rule* rules)
+{
+ struct rk_rule* p;
+
+ for (p = rules; p->lhs != NULL; p++) {
+ free((void *) p->lhs);
+ free((void *) p->rhs);
+ free((void *) p->follow);
+ }
+
+ free(rules);
+}
+
+const char *
+brk_roman_get_previous_pending(struct rk_conv_context *rkctx)
+{
+ struct break_roman *br=rkctx->brk_roman;
+
+ if(!br)
+ return NULL;
+
+ return br->pending[0] ? br->pending : NULL;
+}
+
+int
+brk_roman_get_decided_len(struct rk_conv_context *rkctx)
+{
+ struct break_roman *br=rkctx->brk_roman;
+
+ if(!br)
+ return 0;
+
+ return br->decided_length;
+}