summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Albright <eric_albright@sil.org>2008-03-02 00:04:13 +0000
committerEric Albright <eric_albright@sil.org>2008-03-02 00:04:13 +0000
commit9cd2f8ef4485490d97e06fbd35cb14d638350a1e (patch)
tree12cacae8f36645e8ec9d166615af926250d06cbb
parentfcde2906a43990a91977c25a6da6369bdc2d6c97 (diff)
downloadenchant-9cd2f8ef4485490d97e06fbd35cb14d638350a1e.tar.gz
Fix handling of Unicode NFD forms
git-svn-id: svn+ssh://svn.abisource.com/svnroot/enchant/trunk@22986 bcba8976-2d24-0410-9c9c-aab3bd5fdfd6
-rw-r--r--src/aspell/aspell_provider.c14
-rw-r--r--src/ispell/ispell_checker.cpp24
-rw-r--r--src/myspell/myspell_checker.cpp20
3 files changed, 41 insertions, 17 deletions
diff --git a/src/aspell/aspell_provider.c b/src/aspell/aspell_provider.c
index 4005cf1..ebe4205 100644
--- a/src/aspell/aspell_provider.c
+++ b/src/aspell/aspell_provider.c
@@ -84,10 +84,14 @@ aspell_dict_check (EnchantDict * me, const char *const word, size_t len)
{
PspellManager *manager;
int val;
+ char *normalizedWord;
manager = (PspellManager *) me->user_data;
-
- val = pspell_manager_check (manager, word, len);
+
+ normalizedWord = g_utf8_normalize (word, len, G_NORMALIZE_NFC);
+ val = pspell_manager_check (manager, normalizedWord, strlen(normalizedWord));
+ g_free(normalizedWord);
+
if (val == 0)
return 1;
else if (val > 0)
@@ -106,6 +110,7 @@ aspell_dict_suggest (EnchantDict * me, const char *const word,
const PspellWordList *word_list;
PspellStringEmulation *suggestions;
+ char *normalizedWord;
char **sugg_arr = NULL;
size_t n_suggestions, i;
@@ -113,7 +118,10 @@ aspell_dict_suggest (EnchantDict * me, const char *const word,
manager = (PspellManager *) me->user_data;
- word_list = pspell_manager_suggest (manager, word, len);
+ normalizedWord = g_utf8_normalize (word, len, G_NORMALIZE_NFC);
+ word_list = pspell_manager_suggest (manager, normalizedWord, strlen(normalizedWord));
+ g_free(normalizedWord);
+
if (word_list)
{
suggestions = pspell_word_list_elements (word_list);
diff --git a/src/ispell/ispell_checker.cpp b/src/ispell/ispell_checker.cpp
index 9bd6dd9..0758cad 100644
--- a/src/ispell/ispell_checker.cpp
+++ b/src/ispell/ispell_checker.cpp
@@ -190,13 +190,17 @@ ISpellChecker::checkWord(const char * const utf8Word, size_t length)
else
{
/* convert to 8bit string and null terminate */
- size_t len_in, len_out;
- char *In = (char *)(utf8Word);
+ size_t len_in, len_out, result;
+ // the 8bit encodings use precomposed forms
+ char *normalizedWord = g_utf8_normalize (utf8Word, length, G_NORMALIZE_NFC);
+ char *In = normalizedWord;
char *Out = szWord;
- len_in = length * sizeof(char);
+ len_in = strlen(In);
len_out = sizeof( szWord ) - 1;
- if ((size_t)-1 == g_iconv(m_translate_in, &In, &len_in, &Out, &len_out))
+ result = g_iconv(m_translate_in, &In, &len_in, &Out, &len_out);
+ g_free(normalizedWord);
+ if ((size_t)-1 == result)
return false;
*Out = '\0';
}
@@ -234,12 +238,16 @@ ISpellChecker::suggestWord(const char * const utf8Word, size_t length,
{
/* convert to 8bit string and null terminate */
- size_t len_in, len_out;
- char *In = (char *)(utf8Word);
+ size_t len_in, len_out, result;
+ // the 8bit encodings use precomposed forms
+ char *normalizedWord = g_utf8_normalize (utf8Word, length, G_NORMALIZE_NFC);
+ char *In = normalizedWord;
char *Out = word8;
- len_in = length;
+ len_in = strlen(In);
len_out = sizeof( word8 ) - 1;
- if ((size_t)-1 == g_iconv(m_translate_in, &In, &len_in, &Out, &len_out))
+ result = g_iconv(m_translate_in, &In, &len_in, &Out, &len_out);
+ g_free(normalizedWord);
+ if ((size_t)-1 == result)
return NULL;
*Out = '\0';
}
diff --git a/src/myspell/myspell_checker.cpp b/src/myspell/myspell_checker.cpp
index fe160fd..fdd9617 100644
--- a/src/myspell/myspell_checker.cpp
+++ b/src/myspell/myspell_checker.cpp
@@ -171,12 +171,16 @@ MySpellChecker::checkWord(const char *utf8Word, size_t len)
if (len > MAXWORDLEN || !g_iconv_is_valid(m_translate_in))
return false;
- char *in = (char*) utf8Word;
+ // the 8bit encodings use precomposed forms
+ char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
+ char *in = normalizedWord;
char word8[MAXWORDLEN + 1];
char *out = word8;
- size_t len_in = len * sizeof(char);
+ size_t len_in = strlen(in);
size_t len_out = sizeof( word8 ) - 1;
- if ((size_t)-1 == g_iconv(m_translate_in, &in, &len_in, &out, &len_out))
+ size_t result = g_iconv(m_translate_in, &in, &len_in, &out, &len_out);
+ g_free(normalizedWord);
+ if ((size_t)-1 == result)
return false;
*out = '\0';
if (myspell->spell(word8))
@@ -193,12 +197,16 @@ MySpellChecker::suggestWord(const char* const utf8Word, size_t len, size_t *nsug
|| !g_iconv_is_valid(m_translate_out))
return 0;
- char *in = (char*) utf8Word;
+ // the 8bit encodings use precomposed forms
+ char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
+ char *in = normalizedWord;
char word8[MAXWORDLEN + 1];
char *out = word8;
- size_t len_in = len;
+ size_t len_in = strlen(in);
size_t len_out = sizeof(word8) - 1;
- if ((size_t)-1 == g_iconv(m_translate_in, &in, &len_in, &out, &len_out))
+ size_t result = g_iconv(m_translate_in, &in, &len_in, &out, &len_out);
+ g_free(normalizedWord);
+ if ((size_t)-1 == result)
return NULL;
*out = '\0';