diff options
author | <urmaslt@svn.abisource.com> | 2011-03-07 22:27:10 +0000 |
---|---|---|
committer | <urmaslt@svn.abisource.com> | 2011-03-07 22:27:10 +0000 |
commit | f66e35232891817161156c65d356c3b96b43b5ff (patch) | |
tree | 342d46dab6dd05f24b4334f17c70cf8469e4b0a0 /src/myspell/hashmgr.cxx | |
parent | 71fe6e42189bbc47a5366bb180cdd68209ef6318 (diff) | |
download | enchant-f66e35232891817161156c65d356c3b96b43b5ff.tar.gz |
* Update hunspell to 2.15
* Enable UTF-8 dict paths on Windows
git-svn-id: svn+ssh://svn.abisource.com/svnroot/enchant/trunk@29648 bcba8976-2d24-0410-9c9c-aab3bd5fdfd6
Diffstat (limited to 'src/myspell/hashmgr.cxx')
-rw-r--r-- | src/myspell/hashmgr.cxx | 185 |
1 files changed, 97 insertions, 88 deletions
diff --git a/src/myspell/hashmgr.cxx b/src/myspell/hashmgr.cxx index ed11dd2..ea93b87 100644 --- a/src/myspell/hashmgr.cxx +++ b/src/myspell/hashmgr.cxx @@ -1,32 +1,15 @@ #include "license.hunspell" #include "license.myspell" -#ifndef MOZILLA_CLIENT -#include <cstdlib> -#include <cstring> -#include <cstdio> -#include <cctype> -#else #include <stdlib.h> #include <string.h> #include <stdio.h> #include <ctype.h> -#endif #include "hashmgr.hxx" #include "csutil.hxx" #include "atypes.hxx" -#ifdef MOZILLA_CLIENT -#ifdef __SUNPRO_CC // for SunONE Studio compiler -using namespace std; -#endif -#else -#ifndef WIN32 -using namespace std; -#endif -#endif - // build a hash table from a munched word list HashMgr::HashMgr(const char * tpath, const char * apath, const char * key) @@ -107,6 +90,10 @@ HashMgr::~HashMgr() if (ignorechars) free(ignorechars); if (ignorechars_utf16) free(ignorechars_utf16); + +#ifdef MOZILLA_CLIENT + delete [] csconv; +#endif } // lookup a root word in the hashtable @@ -118,7 +105,7 @@ struct hentry * HashMgr::lookup(const char *word) const dp = tableptr[hash(word)]; if (!dp) return NULL; for ( ; dp != NULL; dp = dp->next) { - if (strcmp(word,&(dp->word)) == 0) return dp; + if (strcmp(word, dp->word) == 0) return dp; } } return NULL; @@ -134,7 +121,7 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl); if (!hp) return 1; - char * hpw = &(hp->word); + char * hpw = hp->word; strcpy(hpw, word); if (ignorechars != NULL) { if (utf8) { @@ -161,7 +148,6 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, hp->var = H_OPT; if (aliasm) { hp->var += H_OPT_ALIASM; -// *((char **) (hpw + wbl + 1)) = get_aliasm(atoi(desc)); store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc))); } else { strcpy(hpw + wbl + 1, desc); @@ -179,7 +165,7 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, return 0; } while (dp->next != NULL) { - if ((!dp->next_homonym) && (strcmp(&(hp->word), &(dp->word)) == 0)) { + if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) { // remove hidden onlyupcase homonym if (!onlyupcase) { if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { @@ -197,7 +183,7 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, } dp=dp->next; } - if (strcmp(&(hp->word), &(dp->word)) == 0) { + if (strcmp(hp->word, dp->word) == 0) { // remove hidden onlyupcase homonym if (!onlyupcase) { if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { @@ -274,7 +260,7 @@ int HashMgr::remove(const char * word) while (dp) { if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) { unsigned short * flags = - (unsigned short *) malloc(sizeof(short *) * (dp->alen + 1)); + (unsigned short *) malloc(sizeof(short) * (dp->alen + 1)); if (!flags) return 1; for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i]; flags[dp->alen] = forbiddenword; @@ -296,7 +282,7 @@ int HashMgr::remove_forbidden_flag(const char * word) { if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic. else { unsigned short * flags2 = - (unsigned short *) malloc(sizeof(short *) * (dp->alen - 1)); + (unsigned short *) malloc(sizeof(short) * (dp->alen - 1)); if (!flags2) return 1; int i, j = 0; for (i = 0; i < dp->alen; i++) { @@ -312,17 +298,10 @@ int HashMgr::remove_forbidden_flag(const char * word) { } // add a custom dic. word to the hash table (public) -int HashMgr::add(const char * word, char * aff) +int HashMgr::add(const char * word) { - unsigned short * flags; + unsigned short * flags = NULL; int al = 0; - if (aff) { - al = decode_flags(&flags, aff); - flag_qsort(flags, 0, al); - } else { - flags = NULL; - } - if (remove_forbidden_flag(word)) { int captype; int wbl = strlen(word); @@ -375,6 +354,7 @@ int HashMgr::load_tables(const char * tpath, const char * key) int al; char * ap; char * dp; + char * dp2; unsigned short * flags; char * ts; @@ -389,16 +369,16 @@ int HashMgr::load_tables(const char * tpath, const char * key) return 2; } mychomp(ts); - + /* remove byte order mark */ if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) { memmove(ts, ts+3, strlen(ts+3)+1); - HUNSPELL_WARNING(stderr, "warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions\n"); + // warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions } - - if ((*ts < '1') || (*ts > '9')) HUNSPELL_WARNING(stderr, "error - missing word count in dictionary file\n"); + tablesize = atoi(ts); - if (!tablesize) { + if (tablesize == 0) { + HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n"); delete dict; return 4; } @@ -419,15 +399,26 @@ int HashMgr::load_tables(const char * tpath, const char * key) while ((ts = dict->getline())) { mychomp(ts); // split each line into word and morphological description - dp = strchr(ts,'\t'); - char * dp2 = strchr(ts,' '); - if (dp2 && (!dp || (dp2 < dp))) dp = dp2; + dp = ts; + while ((dp = strchr(dp, ':'))) { + if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) { + for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--); + if (dp < ts) { // missing word + dp = NULL; + } else { + *(dp + 1) = '\0'; + dp = dp + 2; + } + break; + } + dp++; + } - if (dp) { - *dp = '\0'; - dp++; - } else { - dp = NULL; + // tabulator is the old morphological field separator + dp2 = strchr(ts, '\t'); + if (dp2 && (!dp || dp2 < dp)) { + *dp2 = '\0'; + dp = dp2 + 1; } // split each line into word and affix char strings @@ -448,13 +439,18 @@ int HashMgr::load_tables(const char * tpath, const char * key) *ap = '\0'; if (aliasf) { int index = atoi(ap + 1); - al = get_aliasf(index, &flags); + al = get_aliasf(index, &flags, dict); if (!al) { - HUNSPELL_WARNING(stderr, "error - bad flag vector alias: %s\n", ts); + HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum()); *ap = '\0'; } } else { - al = decode_flags(&flags, ap + 1); + al = decode_flags(&flags, ap + 1, dict); + if (al == -1) { + HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); + delete dict; + return 6; + } flag_qsort(flags, 0, al); } } else { @@ -493,12 +489,16 @@ int HashMgr::hash(const char * word) const return (unsigned long) hv % tablesize; } -int HashMgr::decode_flags(unsigned short ** result, char * flags) { +int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) { int len; + if (*flags == '\0') { + *result = NULL; + return 0; + } switch (flag_mode) { case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) len = strlen(flags); - if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: length of FLAG_LONG flagvector is odd: %s\n", flags); + if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum()); len /= 2; *result = (unsigned short *) malloc(len * sizeof(short)); if (!*result) return -1; @@ -508,6 +508,7 @@ int HashMgr::decode_flags(unsigned short ** result, char * flags) { break; } case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233) + int i; len = 1; char * src = flags; unsigned short * dest; @@ -520,14 +521,20 @@ int HashMgr::decode_flags(unsigned short ** result, char * flags) { dest = *result; for (p = flags; *p; p++) { if (*p == ',') { - *dest = (unsigned short) atoi(src); - if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); + i = atoi(src); + if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n", + af->getlinenum(), i, DEFAULTFLAGS - 1); + *dest = (unsigned short) i; + if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum()); src = p + 1; dest++; } } - *dest = (unsigned short) atoi(src); - if (*dest == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); + i = atoi(src); + if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n", + af->getlinenum(), i, DEFAULTFLAGS - 1); + *dest = (unsigned short) i; + if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum()); break; } case FLAG_UNI: { // UTF-8 characters @@ -549,18 +556,21 @@ int HashMgr::decode_flags(unsigned short ** result, char * flags) { dest++; } } - } + } return len; } unsigned short HashMgr::decode_flag(const char * f) { unsigned short s = 0; + int i; switch (flag_mode) { case FLAG_LONG: s = ((unsigned short) f[0] << 8) + (unsigned short) f[1]; break; case FLAG_NUM: - s = (unsigned short) atoi(f); + i = atoi(f); + if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", i, DEFAULTFLAGS - 1); + s = (unsigned short) i; break; case FLAG_UNI: u8_u16((w_char *) &s, 1, f); @@ -568,7 +578,7 @@ unsigned short HashMgr::decode_flag(const char * f) { default: s = (unsigned short) *((unsigned char *)f); } - if (!s) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); + if (s == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); return s; } @@ -618,18 +628,18 @@ int HashMgr::load_config(const char * affpath, const char * key) /* parse in the try string */ if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) { if (flag_mode != FLAG_CHAR) { - HUNSPELL_WARNING(stderr, "error: duplicate FLAG parameter\n"); + HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of the FLAG affix file parameter\n", afflst->getlinenum()); } if (strstr(line, "long")) flag_mode = FLAG_LONG; if (strstr(line, "num")) flag_mode = FLAG_NUM; if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI; if (flag_mode == FLAG_CHAR) { - HUNSPELL_WARNING(stderr, "error: FLAG need `num', `long' or `UTF-8' parameter: %s\n", line); + HUNSPELL_WARNING(stderr, "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n", afflst->getlinenum()); } } if (strncmp(line,"FORBIDDENWORD",13) == 0) { char * st = NULL; - if (parse_string(line, &st, "FORBIDDENWORD")) { + if (parse_string(line, &st, afflst->getlinenum())) { delete afflst; return 1; } @@ -637,7 +647,7 @@ int HashMgr::load_config(const char * affpath, const char * key) free(st); } if (strncmp(line, "SET", 3) == 0) { - if (parse_string(line, &enc, "SET")) { + if (parse_string(line, &enc, afflst->getlinenum())) { delete afflst; return 1; } @@ -650,8 +660,8 @@ int HashMgr::load_config(const char * affpath, const char * key) #endif } else csconv = get_current_cs(enc); } - if (strncmp(line, "LANG", 4) == 0) { - if (parse_string(line, &lang, "LANG")) { + if (strncmp(line, "LANG", 4) == 0) { + if (parse_string(line, &lang, afflst->getlinenum())) { delete afflst; return 1; } @@ -660,7 +670,8 @@ int HashMgr::load_config(const char * affpath, const char * key) /* parse in the ignored characters (for example, Arabic optional diacritics characters */ if (strncmp(line,"IGNORE",6) == 0) { - if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, "IGNORE", utf8)) { + if (parse_array(line, &ignorechars, &ignorechars_utf16, + &ignorechars_utf16_len, utf8, afflst->getlinenum())) { delete afflst; return 1; } @@ -683,7 +694,7 @@ int HashMgr::load_config(const char * affpath, const char * key) if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1; if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break; } - if (csconv == NULL) csconv = get_current_cs("ISO8859-1"); + if (csconv == NULL) csconv = get_current_cs(SPELL_ENCODING); delete afflst; return 0; } @@ -692,7 +703,7 @@ int HashMgr::load_config(const char * affpath, const char * key) int HashMgr::parse_aliasf(char * line, FileMgr * af) { if (numaliasf != 0) { - HUNSPELL_WARNING(stderr, "error: duplicate AF (alias for flag vector) tables used\n"); + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum()); return 1; } char * tp = line; @@ -710,8 +721,7 @@ int HashMgr::parse_aliasf(char * line, FileMgr * af) numaliasf = 0; aliasf = NULL; aliasflen = NULL; - HUNSPELL_WARNING(stderr, "incorrect number of entries in AF table\n"); - // free(piece); + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum()); return 1; } aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *)); @@ -731,7 +741,6 @@ int HashMgr::parse_aliasf(char * line, FileMgr * af) } i++; } - // free(piece); piece = mystrsep(&tp, 0); } if (np != 2) { @@ -740,7 +749,7 @@ int HashMgr::parse_aliasf(char * line, FileMgr * af) free(aliasflen); aliasf = NULL; aliasflen = NULL; - HUNSPELL_WARNING(stderr, "error: missing AF table information\n"); + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum()); return 1; } @@ -764,14 +773,13 @@ int HashMgr::parse_aliasf(char * line, FileMgr * af) free(aliasflen); aliasf = NULL; aliasflen = NULL; - HUNSPELL_WARNING(stderr, "error: AF table is corrupt\n"); - // free(piece); + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); return 1; } break; } case 1: { - aliasflen[j] = (unsigned short) decode_flags(&(aliasf[j]), piece); + aliasflen[j] = (unsigned short) decode_flags(&(aliasf[j]), piece, af); flag_qsort(aliasf[j], 0, aliasflen[j]); break; } @@ -779,7 +787,6 @@ int HashMgr::parse_aliasf(char * line, FileMgr * af) } i++; } - // free(piece); piece = mystrsep(&tp, 0); } if (!aliasf[j]) { @@ -788,7 +795,7 @@ int HashMgr::parse_aliasf(char * line, FileMgr * af) aliasf = NULL; aliasflen = NULL; numaliasf = 0; - HUNSPELL_WARNING(stderr, "error: AF table is corrupt\n"); + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); return 1; } } @@ -799,12 +806,12 @@ int HashMgr::is_aliasf() { return (aliasf != NULL); } -int HashMgr::get_aliasf(int index, unsigned short ** fvec) { +int HashMgr::get_aliasf(int index, unsigned short ** fvec, FileMgr * af) { if ((index > 0) && (index <= numaliasf)) { *fvec = aliasf[index - 1]; return aliasflen[index - 1]; } - HUNSPELL_WARNING(stderr, "error: bad flag alias index: %d\n", index); + HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n", af->getlinenum(), index); *fvec = NULL; return 0; } @@ -813,7 +820,7 @@ int HashMgr::get_aliasf(int index, unsigned short ** fvec) { int HashMgr::parse_aliasm(char * line, FileMgr * af) { if (numaliasm != 0) { - HUNSPELL_WARNING(stderr, "error: duplicate AM (aliases for morphological descriptions) tables used\n"); + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum()); return 1; } char * tp = line; @@ -828,8 +835,7 @@ int HashMgr::parse_aliasm(char * line, FileMgr * af) case 1: { numaliasm = atoi(piece); if (numaliasm < 1) { - HUNSPELL_WARNING(stderr, "incorrect number of entries in AM table\n"); - // free(piece); + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum()); return 1; } aliasm = (char **) malloc(numaliasm * sizeof(char *)); @@ -844,14 +850,13 @@ int HashMgr::parse_aliasm(char * line, FileMgr * af) } i++; } - // free(piece); piece = mystrsep(&tp, 0); } if (np != 2) { numaliasm = 0; free(aliasm); aliasm = NULL; - HUNSPELL_WARNING(stderr, "error: missing AM alias information\n"); + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum()); return 1; } @@ -869,8 +874,7 @@ int HashMgr::parse_aliasm(char * line, FileMgr * af) switch(i) { case 0: { if (strncmp(piece,"AM",2) != 0) { - HUNSPELL_WARNING(stderr, "error: AM table is corrupt\n"); - // free(piece); + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); numaliasm = 0; free(aliasm); aliasm = NULL; @@ -879,7 +883,7 @@ int HashMgr::parse_aliasm(char * line, FileMgr * af) break; } case 1: { - // add the remaining of the line + // add the remaining of the line if (*tp) { *(tp - 1) = ' '; tp = tp + strlen(tp); @@ -889,19 +893,24 @@ int HashMgr::parse_aliasm(char * line, FileMgr * af) else reverseword(piece); } aliasm[j] = mystrdup(piece); + if (!aliasm[j]) { + numaliasm = 0; + free(aliasm); + aliasm = NULL; + return 1; + } break; } default: break; } i++; } - // free(piece); piece = mystrsep(&tp, ' '); } if (!aliasm[j]) { numaliasm = 0; free(aliasm); aliasm = NULL; - HUNSPELL_WARNING(stderr, "error: map table is corrupt\n"); + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); return 1; } } |