From 9cd2f8ef4485490d97e06fbd35cb14d638350a1e Mon Sep 17 00:00:00 2001 From: Eric Albright Date: Sun, 2 Mar 2008 00:04:13 +0000 Subject: Fix handling of Unicode NFD forms git-svn-id: svn+ssh://svn.abisource.com/svnroot/enchant/trunk@22986 bcba8976-2d24-0410-9c9c-aab3bd5fdfd6 --- src/aspell/aspell_provider.c | 14 +++++++++++--- src/ispell/ispell_checker.cpp | 24 ++++++++++++++++-------- src/myspell/myspell_checker.cpp | 20 ++++++++++++++------ 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/src/aspell/aspell_provider.c b/src/aspell/aspell_provider.c index 4005cf1..ebe4205 100644 --- a/src/aspell/aspell_provider.c +++ b/src/aspell/aspell_provider.c @@ -84,10 +84,14 @@ aspell_dict_check (EnchantDict * me, const char *const word, size_t len) { PspellManager *manager; int val; + char *normalizedWord; manager = (PspellManager *) me->user_data; - - val = pspell_manager_check (manager, word, len); + + normalizedWord = g_utf8_normalize (word, len, G_NORMALIZE_NFC); + val = pspell_manager_check (manager, normalizedWord, strlen(normalizedWord)); + g_free(normalizedWord); + if (val == 0) return 1; else if (val > 0) @@ -106,6 +110,7 @@ aspell_dict_suggest (EnchantDict * me, const char *const word, const PspellWordList *word_list; PspellStringEmulation *suggestions; + char *normalizedWord; char **sugg_arr = NULL; size_t n_suggestions, i; @@ -113,7 +118,10 @@ aspell_dict_suggest (EnchantDict * me, const char *const word, manager = (PspellManager *) me->user_data; - word_list = pspell_manager_suggest (manager, word, len); + normalizedWord = g_utf8_normalize (word, len, G_NORMALIZE_NFC); + word_list = pspell_manager_suggest (manager, normalizedWord, strlen(normalizedWord)); + g_free(normalizedWord); + if (word_list) { suggestions = pspell_word_list_elements (word_list); diff --git a/src/ispell/ispell_checker.cpp b/src/ispell/ispell_checker.cpp index 9bd6dd9..0758cad 100644 --- a/src/ispell/ispell_checker.cpp +++ b/src/ispell/ispell_checker.cpp @@ -190,13 +190,17 @@ ISpellChecker::checkWord(const char * const utf8Word, size_t length) else { /* convert to 8bit string and null terminate */ - size_t len_in, len_out; - char *In = (char *)(utf8Word); + size_t len_in, len_out, result; + // the 8bit encodings use precomposed forms + char *normalizedWord = g_utf8_normalize (utf8Word, length, G_NORMALIZE_NFC); + char *In = normalizedWord; char *Out = szWord; - len_in = length * sizeof(char); + len_in = strlen(In); len_out = sizeof( szWord ) - 1; - if ((size_t)-1 == g_iconv(m_translate_in, &In, &len_in, &Out, &len_out)) + result = g_iconv(m_translate_in, &In, &len_in, &Out, &len_out); + g_free(normalizedWord); + if ((size_t)-1 == result) return false; *Out = '\0'; } @@ -234,12 +238,16 @@ ISpellChecker::suggestWord(const char * const utf8Word, size_t length, { /* convert to 8bit string and null terminate */ - size_t len_in, len_out; - char *In = (char *)(utf8Word); + size_t len_in, len_out, result; + // the 8bit encodings use precomposed forms + char *normalizedWord = g_utf8_normalize (utf8Word, length, G_NORMALIZE_NFC); + char *In = normalizedWord; char *Out = word8; - len_in = length; + len_in = strlen(In); len_out = sizeof( word8 ) - 1; - if ((size_t)-1 == g_iconv(m_translate_in, &In, &len_in, &Out, &len_out)) + result = g_iconv(m_translate_in, &In, &len_in, &Out, &len_out); + g_free(normalizedWord); + if ((size_t)-1 == result) return NULL; *Out = '\0'; } diff --git a/src/myspell/myspell_checker.cpp b/src/myspell/myspell_checker.cpp index fe160fd..fdd9617 100644 --- a/src/myspell/myspell_checker.cpp +++ b/src/myspell/myspell_checker.cpp @@ -171,12 +171,16 @@ MySpellChecker::checkWord(const char *utf8Word, size_t len) if (len > MAXWORDLEN || !g_iconv_is_valid(m_translate_in)) return false; - char *in = (char*) utf8Word; + // the 8bit encodings use precomposed forms + char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC); + char *in = normalizedWord; char word8[MAXWORDLEN + 1]; char *out = word8; - size_t len_in = len * sizeof(char); + size_t len_in = strlen(in); size_t len_out = sizeof( word8 ) - 1; - if ((size_t)-1 == g_iconv(m_translate_in, &in, &len_in, &out, &len_out)) + size_t result = g_iconv(m_translate_in, &in, &len_in, &out, &len_out); + g_free(normalizedWord); + if ((size_t)-1 == result) return false; *out = '\0'; if (myspell->spell(word8)) @@ -193,12 +197,16 @@ MySpellChecker::suggestWord(const char* const utf8Word, size_t len, size_t *nsug || !g_iconv_is_valid(m_translate_out)) return 0; - char *in = (char*) utf8Word; + // the 8bit encodings use precomposed forms + char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC); + char *in = normalizedWord; char word8[MAXWORDLEN + 1]; char *out = word8; - size_t len_in = len; + size_t len_in = strlen(in); size_t len_out = sizeof(word8) - 1; - if ((size_t)-1 == g_iconv(m_translate_in, &in, &len_in, &out, &len_out)) + size_t result = g_iconv(m_translate_in, &in, &len_in, &out, &len_out); + g_free(normalizedWord); + if ((size_t)-1 == result) return NULL; *out = '\0'; -- cgit v1.2.1