summaryrefslogtreecommitdiff
path: root/linguistics.c
diff options
context:
space:
mode:
Diffstat (limited to 'linguistics.c')
-rw-r--r--linguistics.c153
1 files changed, 153 insertions, 0 deletions
diff --git a/linguistics.c b/linguistics.c
new file mode 100644
index 00000000..800ac5fd
--- /dev/null
+++ b/linguistics.c
@@ -0,0 +1,153 @@
+#include <string.h>
+#include <stdio.h>
+#include <glib.h>
+#include "debug.h"
+#include "linguistics.h"
+
+static const char *special[][3]={
+/* Capital Diacritics */
+/* ¨ Diaresis */
+{"Ä","A","AE"},
+{"Ö","O","OE"},
+{"Ü","U","UE"},
+/* ˝ Double Acute Accent */
+{"Ő","O"},
+{"Ű","U"},
+/* ´ Acute Accent */
+{"Á","A"},
+{"Ć","C"},
+{"É","E"},
+{"Í","I"},
+{"Ń","N"},
+{"Ó","O"},
+{"Ś","S"},
+{"Ú","U"},
+{"Ý","Y"},
+{"Ź","Z"},
+/* ˛ Ogonek */
+{"Ą","A"},
+{"Ę","E"},
+/* ˙ Dot */
+{"Ż","Z"},
+/* – Stroke */
+{"Ł","L"},
+/* ˚ Ring */
+{"Å","A","AA"},
+{"Ů","U"},
+/* ˇ Caron */
+{"Č","C"},
+{"Ď","D"},
+{"Ě","E"},
+{"Ň","N"},
+{"Ř","R"},
+{"Š","S"},
+{"Ť","T"},
+{"Ž","Z"},
+/* / Slash */
+{"Ø","O","OE"},
+/* ligatures */
+{"Æ","A","AE"},
+/* Small Diacritics */
+/* ¨ Diaresis */
+{"ä","a","ae"},
+{"ö","o","oe"},
+{"ü","u","ue"},
+/* ˝ Double Acute Accent */
+{"ő","o"},
+{"ű","u"},
+/* ´ Acute Accent */
+{"á","a"},
+{"ć","c"},
+{"é","e"},
+{"í","i"},
+{"ń","n"},
+{"ó","o"},
+{"ś","s"},
+{"ú","u"},
+{"ý","y"},
+{"ź","z"},
+/* ˛ Ogonek */
+{"ą","a"},
+{"ę","e"},
+/* ˙ Dot */
+{"ż","z"},
+/* – Stroke */
+{"ł","l"},
+/* ˚ Ring */
+{"ů","u"},
+{"å","a", "aa"},
+/* ˇ Caron */
+{"č","c"},
+{"ď","d"},
+{"ě","e"},
+{"Ň","N"},
+{"ř","r"},
+{"š","s"},
+{"ť","t"},
+{"ž","z"},
+/* / Slash */
+{"ø","o", "oe"},
+/* ligatures */
+{"æ","a","ae"},
+{"ß","s","ss"},
+};
+
+char *
+linguistics_expand_special(char *str, int mode)
+{
+ char *in=str;
+ char *out,*ret;
+ int found=0;
+ out=ret=g_strdup(str);
+ if (!mode)
+ return ret;
+ while (*in) {
+ char *next=g_utf8_find_next_char(in, NULL);
+ int i,len=next-in;
+ int match=0;
+ if (len > 1) {
+ for (i = 0 ; i < sizeof(special)/sizeof(special[0]); i++) {
+ const char *search=special[i][0];
+ if (!strncmp(in,search,len)) {
+ const char *replace=special[i][mode];
+ if (replace) {
+ int replace_len=strlen(replace);
+ dbg_assert(replace_len <= len);
+ dbg(1,"found %s %s %d %s %d\n",in,search,len,replace,replace_len);
+ strcpy(out, replace);
+ out+=replace_len;
+ match=1;
+ break;
+ }
+ }
+ }
+ }
+ if (match) {
+ found=1;
+ in=next;
+ } else {
+ while (len-- > 0)
+ *out++=*in++;
+ }
+ }
+ *out++='\0';
+ if (!found) {
+ g_free(ret);
+ ret=NULL;
+ }
+ return ret;
+}
+
+char *
+linguistics_next_word(char *str)
+{
+ int len=strcspn(str, " -");
+ if (!str[len] || !str[len+1])
+ return NULL;
+ return str+len+1;
+}
+
+void
+linguistics_init(void)
+{
+}