summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDom Lachowicz <domlachowicz@gmail.com>2008-04-17 23:57:14 +0000
committerDom Lachowicz <domlachowicz@gmail.com>2008-04-17 23:57:14 +0000
commit09d8c09c368410e30128c0288f4ffc38749f6a56 (patch)
tree1f47e5b94d710900cfa961f3d12919f2f83176cb
parent1e9c930b0a14f2b9efec50c6377b2dc0e8e3563e (diff)
downloadenchant-09d8c09c368410e30128c0288f4ffc38749f6a56.tar.gz
bug 11550; upgrade to hunspell 1.2.2 and pick up the system-wide version of the header files. this fixes an undefined symbol error. shame on them for changing the API/ABI and for not exposing enough in the stable C API to be useful.
git-svn-id: svn+ssh://svn.abisource.com/svnroot/enchant/trunk@23527 bcba8976-2d24-0410-9c9c-aab3bd5fdfd6
-rw-r--r--src/myspell/Makefile.am31
-rw-r--r--src/myspell/affentry.cxx115
-rw-r--r--src/myspell/affixmgr.cxx227
-rw-r--r--src/myspell/affixmgr.hxx28
-rw-r--r--src/myspell/atypes.hxx14
-rw-r--r--src/myspell/csutil.cxx99
-rw-r--r--src/myspell/csutil.hxx24
-rw-r--r--src/myspell/filemgr.cxx38
-rw-r--r--src/myspell/filemgr.hxx19
-rw-r--r--src/myspell/hashmgr.cxx157
-rw-r--r--src/myspell/hashmgr.hxx12
-rw-r--r--src/myspell/htypes.hxx6
-rw-r--r--src/myspell/hunspell.cxx98
-rw-r--r--src/myspell/hunspell.h38
-rw-r--r--src/myspell/hunspell.hxx18
-rw-r--r--src/myspell/hunzip.cxx196
-rw-r--r--src/myspell/hunzip.hxx41
-rw-r--r--src/myspell/myspell_checker.cpp7
-rw-r--r--src/myspell/phonet.cxx2
-rw-r--r--src/myspell/phonet.hxx4
-rw-r--r--src/myspell/suggestmgr.cxx16
-rw-r--r--src/myspell/suggestmgr.hxx2
-rw-r--r--src/myspell/w_char.hxx19
23 files changed, 823 insertions, 388 deletions
diff --git a/src/myspell/Makefile.am b/src/myspell/Makefile.am
index f2ea5dc..0e529ca 100644
--- a/src/myspell/Makefile.am
+++ b/src/myspell/Makefile.am
@@ -4,38 +4,45 @@ else
target_lib =
endif
-# copied from hunspell 1.2.1
+# copied from hunspell 1.2.2
COPIED_MYSPELL_FILES= \
+ affentry.cxx
affentry.hxx \
+ affixmgr.cxx \
affixmgr.hxx \
atypes.hxx \
baseaffix.hxx \
+ csutil.cxx \
csutil.hxx \
+ dictmgr.cxx \
dictmgr.hxx \
+ filemgr.cxx \
+ filemgr.hxx \
+ hashmgr.cxx \
hashmgr.hxx \
htypes.hxx \
+ hunspell.cxx \
hunspell.h \
hunspell.hxx \
+ hunzip.cxx \
+ hunzip.hxx \
langnum.hxx \
- suggestmgr.hxx \
- affentry.cxx \
- affixmgr.cxx \
- csutil.cxx \
- dictmgr.cxx \
- hashmgr.cxx \
- hunspell.cxx \
- suggestmgr.cxx \
phonet.cxx \
- phonet.hxx
+ phonet.hxx \
+ suggestmgr.cxx \
+ suggestmgr.hxx \
+ utf_info.cxx \
+ w_char.hxx
if WITH_SYSTEM_MYSPELL
EXTRA_MYSPELL_FILES=
+EXTRA_MYSPELL_CFLAGS=-DWITH_SYSTEM_MYSPELL
else
EXTRA_MYSPELL_FILES=$(COPIED_MYSPELL_FILES)
+EXTRA_MYSPELL_CFLAGS=
endif
-
-INCLUDES=-I$(top_srcdir)/src $(ENCHANT_CFLAGS) $(MYSPELL_CFLAGS) $(CXX_WARN_CFLAGS) -D_ENCHANT_BUILD=1
+INCLUDES=-I$(top_srcdir)/src $(ENCHANT_CFLAGS) $(MYSPELL_CFLAGS) $(CXX_WARN_CFLAGS) $(EXTRA_MYSPELL_CFLAGS) -D_ENCHANT_BUILD=1
myspell_LTLIBRARIES = $(target_lib)
myspelldir= $(libdir)/enchant
diff --git a/src/myspell/affentry.cxx b/src/myspell/affentry.cxx
index 0ffe557..2436fbb 100644
--- a/src/myspell/affentry.cxx
+++ b/src/myspell/affentry.cxx
@@ -7,9 +7,9 @@
#include <cctype>
#include <cstdio>
#else
-#include <stdlib.h>
+#include <stdlib.h>
#include <string.h>
-#include <stdio.h>
+#include <stdio.h>
#include <ctype.h>
#endif
@@ -17,7 +17,7 @@
#include "csutil.hxx"
#ifndef MOZILLA_CLIENT
-#ifndef W32
+#ifndef WIN32
using namespace std;
#endif
#endif
@@ -29,8 +29,8 @@ PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
pmyMgr = pmgr;
// set up its intial values
-
- aflag = dp->aflag; // flag
+
+ aflag = dp->aflag; // flag
strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append
stripl = dp->stripl; // length of strip string
@@ -70,7 +70,7 @@ char * PfxEntry::add(const char * word, int len)
char tword[MAXWORDUTF8LEN + 4];
if ((len > stripl) && (len >= numconds) && test_condition(word) &&
- (!stripl || (strncmp(word, strip, stripl) == 0)) &&
+ (!stripl || (strncmp(word, strip, stripl) == 0)) &&
((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
/* we have a match so add prefix */
char * pp = tword;
@@ -81,7 +81,7 @@ char * PfxEntry::add(const char * word, int len)
strcpy(pp, (word + stripl));
return mystrdup(tword);
}
- return NULL;
+ return NULL;
}
inline char * PfxEntry::nextchar(char * p) {
@@ -149,7 +149,7 @@ inline int PfxEntry::test_condition(const char * st)
}
}
-// check if this prefix entry matches
+// check if this prefix entry matches
struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound, const FLAG needflag)
{
int tmpl; // length of tmpword
@@ -171,7 +171,7 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound
if (stripl) strcpy (tmpword, strip);
strcpy ((tmpword + stripl), (word + appndl));
-
+
// now make sure all of the conditions on characters
// are met. Please see the appendix at the end of
// this file for more info on exactly what is being
@@ -194,14 +194,14 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound
he = he->next_homonym; // check homonyms
} while (he);
}
-
- // prefix matched but no root word was found
- // if aeXPRODUCT is allowed, try again but now
+
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
// ross checked combined with a suffix
//if ((opts & aeXPRODUCT) && in_compound) {
if ((opts & aeXPRODUCT)) {
- he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, NULL,
+ he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, NULL,
0, NULL, FLAG_NULL, needflag, in_compound);
if (he) return he;
}
@@ -210,7 +210,7 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound
return NULL;
}
-// check if this prefix entry matches
+// check if this prefix entry matches
struct hentry * PfxEntry::check_twosfx(const char * word, int len,
char in_compound, const FLAG needflag)
{
@@ -245,8 +245,8 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len,
if (test_condition(tmpword)) {
tmpl += stripl;
- // prefix matched but no root word was found
- // if aeXPRODUCT is allowed, try again but now
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
// cross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
@@ -258,7 +258,7 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len,
return NULL;
}
-// check if this prefix entry matches
+// check if this prefix entry matches
char * PfxEntry::check_twosfx_morph(const char * word, int len,
char in_compound, const FLAG needflag)
{
@@ -292,8 +292,8 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len,
if (test_condition(tmpword)) {
tmpl += stripl;
- // prefix matched but no root word was found
- // if aeXPRODUCT is allowed, try again but now
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
// ross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
@@ -305,7 +305,7 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len,
return NULL;
}
-// check if this prefix entry matches
+// check if this prefix entry matches
char * PfxEntry::check_morph(const char * word, int len, char in_compound, const FLAG needflag)
{
int tmpl; // length of tmpword
@@ -313,7 +313,7 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
char tmpword[MAXWORDUTF8LEN + 4];
char result[MAXLNLEN];
char * st;
-
+
*result = '\0';
// on entry prefix is 0 length or already matches the beginning of the word.
@@ -349,7 +349,7 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
! TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
// needflag
((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
- (contclass && TESTAFF(contclass, needflag, contclasslen)))) {
+ (contclass && TESTAFF(contclass, needflag, contclasslen)))) {
if (morphcode) {
strcat(result, " ");
strcat(result, morphcode);
@@ -364,6 +364,13 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
if (HENTRY_DATA(he)) {
strcat(result, " ");
strcat(result,HENTRY_DATA(he));
+ } else {
+ // return with debug information
+ char * flag = pmyMgr->encode_flag(getFlag());
+ strcat(result, " ");
+ strcat(result, MORPH_FLAG);
+ strcat(result, flag);
+ free(flag);
}
strcat(result, "\n");
}
@@ -371,12 +378,12 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
} while (he);
}
- // prefix matched but no root word was found
- // if aeXPRODUCT is allowed, try again but now
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
// ross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
- st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this,
+ st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this,
FLAG_NULL, needflag);
if (st) {
strcat(result, st);
@@ -385,7 +392,7 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
}
}
}
-
+
if (*result) return mystrdup(result);
return NULL;
}
@@ -396,7 +403,7 @@ SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
pmyMgr = pmgr;
// set up its intial values
- aflag = dp->aflag; // char flag
+ aflag = dp->aflag; // char flag
strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append
stripl = dp->stripl; // length of strip string
@@ -425,7 +432,7 @@ SfxEntry::~SfxEntry()
if (strip) free(strip);
pmyMgr = NULL;
appnd = NULL;
- strip = NULL;
+ strip = NULL;
if (opts & aeLONGCOND) free(c.l.conds2);
if (morphcode && !(opts & aeALIASM)) free(morphcode);
if (contclass && !(opts & aeALIASF)) free(contclass);
@@ -470,14 +477,14 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
if (numconds == 0) return 1;
char * p = c.conds;
st--;
- int c = 1;
+ int i = 1;
while (1) {
switch (*p) {
case '\0': return 1;
case '[': { p = nextchar(p); pos = st; break; }
case '^': { p = nextchar(p); neg = true; break; }
case ']': { if (!neg && !ingroup) return 0;
- c++;
+ i++;
pos = NULL;
neg = false;
ingroup = false;
@@ -515,17 +522,17 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
}
if (pos && st != pos) {
if (neg) return 0;
- else if (c == numconds) return 1;
+ else if (i == numconds) return 1;
ingroup = true;
}
if (p && *p != '\0') p = nextchar(p);
} else if (pos) {
if (neg) return 0;
- else if (c == numconds) return 1;
+ else if (i == numconds) return 1;
ingroup = true;
}
if (!pos) {
- c++;
+ i++;
st--;
if (st < beg && p && *p != '\0') return 0; // word <= condition
}
@@ -538,12 +545,12 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
}
}
-// see if this suffix is present in the word
+// see if this suffix is present in the word
struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
AffEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag,
const FLAG badflag)
{
- int tmpl; // length of tmpword
+ int tmpl; // length of tmpword
struct hentry * he; // hash entry pointer
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
@@ -563,7 +570,7 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
tmpl = len - appndl;
// the second condition is not enough for UTF-8 strings
// it checked in test_condition()
-
+
if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
// if (tmpl > 0) {
@@ -596,21 +603,21 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
do {
// check conditional suffix (enabled by prefix)
if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() &&
- TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
- (((optflags & aeXPRODUCT) == 0) ||
+ TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
+ (((optflags & aeXPRODUCT) == 0) ||
TESTAFF(he->astr, ep->getFlag(), he->alen) ||
// enabled by prefix
((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
) &&
// handle cont. class
- ((!cclass) ||
+ ((!cclass) ||
((contclass) && TESTAFF(contclass, cclass, contclasslen))
) &&
// check only in compound homonyms (bad flags)
(!badflag || !TESTAFF(he->astr, badflag, he->alen)
- ) &&
+ ) &&
// handle required flag
- ((!needflag) ||
+ ((!needflag) ||
(TESTAFF(he->astr, needflag, he->alen) ||
((contclass) && TESTAFF(contclass, needflag, contclasslen)))
)
@@ -618,12 +625,12 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
he = he->next_homonym; // check homonyms
} while (he);
- // obsolote stemming code (used only by the
+ // obsolote stemming code (used only by the
// experimental SuffixMgr:suggest_pos_stems)
// store resulting root in wlst
} else if (wlst && (*ns < maxSug)) {
int cwrd = 1;
- for (int k=0; k < *ns; k++)
+ for (int k=0; k < *ns; k++)
if (strcmp(tmpword, wlst[k]) == 0) cwrd = 0;
if (cwrd) {
wlst[*ns] = mystrdup(tmpword);
@@ -640,11 +647,11 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
return NULL;
}
-// see if two-level suffix is present in the word
+// see if two-level suffix is present in the word
struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
AffEntry* ppfx, const FLAG needflag)
{
- int tmpl; // length of tmpword
+ int tmpl; // length of tmpword
struct hentry * he; // hash entry pointer
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
@@ -689,7 +696,7 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
if (test_condition((char *) cp, (char *) tmpword)) {
if (ppfx) {
// handle conditional suffix
- if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
+ if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, (FLAG) aflag, needflag);
else
he = pmyMgr->suffix_check(tmpword, tmpl, optflags, ppfx, NULL, 0, NULL, (FLAG) aflag, needflag);
@@ -702,18 +709,18 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
return NULL;
}
-// see if two-level suffix is present in the word
+// see if two-level suffix is present in the word
char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
AffEntry* ppfx, const FLAG needflag)
{
- int tmpl; // length of tmpword
+ int tmpl; // length of tmpword
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
PfxEntry* ep = (PfxEntry *) ppfx;
char * st;
char result[MAXLNLEN];
-
+
*result = '\0';
// if this suffix is being cross checked with a prefix
@@ -788,7 +795,7 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
}
// get next homonym with same affix
-struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, AffEntry* ppfx,
+struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, AffEntry* ppfx,
const FLAG cclass, const FLAG needflag)
{
PfxEntry* ep = (PfxEntry *) ppfx;
@@ -796,18 +803,18 @@ struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, Aff
while (he->next_homonym) {
he = he->next_homonym;
- if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
- ((optflags & aeXPRODUCT) == 0 ||
+ if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
+ ((optflags & aeXPRODUCT) == 0 ||
TESTAFF(he->astr, eFlag, he->alen) ||
// handle conditional suffix
((contclass) && TESTAFF(contclass, eFlag, contclasslen))
) &&
// handle cont. class
- ((!cclass) ||
+ ((!cclass) ||
((contclass) && TESTAFF(contclass, cclass, contclasslen))
) &&
// handle required flag
- ((!needflag) ||
+ ((!needflag) ||
(TESTAFF(he->astr, needflag, he->alen) ||
((contclass) && TESTAFF(contclass, needflag, contclasslen)))
)
diff --git a/src/myspell/affixmgr.cxx b/src/myspell/affixmgr.cxx
index d3e36be..1bcec78 100644
--- a/src/myspell/affixmgr.cxx
+++ b/src/myspell/affixmgr.cxx
@@ -20,15 +20,17 @@
#include "csutil.hxx"
#ifndef MOZILLA_CLIENT
-#ifndef W32
+#ifndef WIN32
using namespace std;
#endif
#endif
-AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
+AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key)
{
// register hash manager and load affix data from aff file
- pHMgr = ptr;
+ pHMgr = ptr[0];
+ alldic = ptr;
+ maxdic = md;
keystring = NULL;
trystring = NULL;
encoding=NULL;
@@ -107,7 +109,7 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
contclasses[j] = 0;
}
- if (parse_file(affpath)) {
+ if (parse_file(affpath, key)) {
HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath);
}
@@ -244,14 +246,10 @@ AffixMgr::~AffixMgr()
// read in aff file and build up prefix and suffix entry objects
-int AffixMgr::parse_file(const char * affpath)
+int AffixMgr::parse_file(const char * affpath, const char * key)
{
-
- // io buffers
- char line[MAXLNLEN+1];
-
- // affix type
- char ft;
+ char * line; // io buffers
+ char ft; // affix type
// checking flag duplication
char dupflags[CONTSIZE];
@@ -261,8 +259,7 @@ int AffixMgr::parse_file(const char * affpath)
int firstline = 1;
// open the affix file
- FILE * afflst;
- afflst = fopen(affpath,"r");
+ FileMgr * afflst = new FileMgr(affpath, key);
if (!afflst) {
HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath);
return 1;
@@ -271,10 +268,9 @@ int AffixMgr::parse_file(const char * affpath)
// step one is to parse the affix file building up the internal
// affix data structures
-
// read in each line ignoring any that do not
// start with a known line type indicator
- while (fgets(line,MAXLNLEN,afflst)) {
+ while ((line = afflst->getline())) {
mychomp(line);
/* remove byte order mark */
@@ -289,7 +285,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the keyboard string */
if (strncmp(line,"KEY",3) == 0) {
if (parse_string(line, &keystring, "KEY")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -297,7 +293,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the try string */
if (strncmp(line,"TRY",3) == 0) {
if (parse_string(line, &trystring, "TRY")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -305,7 +301,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the name of the character set used by the .dict and .aff */
if (strncmp(line,"SET",3) == 0) {
if (parse_string(line, &encoding, "SET")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
if (strcmp(encoding, "UTF-8") == 0) {
@@ -325,7 +321,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by the controlled compound words */
if (strncmp(line,"COMPOUNDFLAG",12) == 0) {
if (parse_flag(line, &compoundflag, "COMPOUNDFLAG")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -334,12 +330,12 @@ int AffixMgr::parse_file(const char * affpath)
if (strncmp(line,"COMPOUNDBEGIN",13) == 0) {
if (complexprefixes) {
if (parse_flag(line, &compoundend, "COMPOUNDBEGIN")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
} else {
if (parse_flag(line, &compoundbegin, "COMPOUNDBEGIN")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -348,7 +344,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by compound words */
if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) {
if (parse_flag(line, &compoundmiddle, "COMPOUNDMIDDLE")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -356,12 +352,12 @@ int AffixMgr::parse_file(const char * affpath)
if (strncmp(line,"COMPOUNDEND",11) == 0) {
if (complexprefixes) {
if (parse_flag(line, &compoundbegin, "COMPOUNDEND")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
} else {
if (parse_flag(line, &compoundend, "COMPOUNDEND")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -370,7 +366,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the data used by compound_check() method */
if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) {
if (parse_num(line, &cpdwordmax, "COMPOUNDWORDMAX")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -378,7 +374,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag sign compounds in dictionary */
if (strncmp(line,"COMPOUNDROOT",12) == 0) {
if (parse_flag(line, &compoundroot, "COMPOUNDROOT")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -386,7 +382,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by compound_check() method */
if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) {
if (parse_flag(line, &compoundpermitflag, "COMPOUNDPERMITFLAG")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -394,7 +390,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by compound_check() method */
if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) {
if (parse_flag(line, &compoundforbidflag, "COMPOUNDFORBIDFLAG")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -417,7 +413,7 @@ int AffixMgr::parse_file(const char * affpath)
if (strncmp(line,"NOSUGGEST",9) == 0) {
if (parse_flag(line, &nosuggest, "NOSUGGEST")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -425,7 +421,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by forbidden words */
if (strncmp(line,"FORBIDDENWORD",13) == 0) {
if (parse_flag(line, &forbiddenword, "FORBIDDENWORD")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -433,7 +429,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by forbidden words */
if (strncmp(line,"LEMMA_PRESENT",13) == 0) {
if (parse_flag(line, &lemma_present, "LEMMA_PRESENT")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -441,7 +437,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by circumfixes */
if (strncmp(line,"CIRCUMFIX",9) == 0) {
if (parse_flag(line, &circumfix, "CIRCUMFIX")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -449,7 +445,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by fogemorphemes */
if (strncmp(line,"ONLYINCOMPOUND",14) == 0) {
if (parse_flag(line, &onlyincompound, "ONLYINCOMPOUND")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -457,7 +453,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by `needaffixs' */
if (strncmp(line,"PSEUDOROOT",10) == 0) {
if (parse_flag(line, &needaffix, "PSEUDOROOT")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -465,7 +461,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by `needaffixs' */
if (strncmp(line,"NEEDAFFIX",9) == 0) {
if (parse_flag(line, &needaffix, "NEEDAFFIX")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -473,7 +469,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the minimal length for words in compounds */
if (strncmp(line,"COMPOUNDMIN",11) == 0) {
if (parse_num(line, &cpdmin, "COMPOUNDMIN")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
if (cpdmin < 1) cpdmin = 1;
@@ -482,7 +478,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the max. words and syllables in compounds */
if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) {
if (parse_cpdsyllable(line)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -490,7 +486,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by compound_check() method */
if (strncmp(line,"SYLLABLENUM",11) == 0) {
if (parse_string(line, &cpdsyllablenum, "SYLLABLENUM")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -503,7 +499,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the extra word characters */
if (strncmp(line,"WORDCHARS",9) == 0) {
if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, "WORDCHARS", utf8)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -511,7 +507,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the ignored characters (for example, Arabic optional diacretics charachters */
if (strncmp(line,"IGNORE",6) == 0) {
if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, "IGNORE", utf8)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -519,7 +515,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the typical fault correcting table */
if (strncmp(line,"REP",3) == 0) {
if (parse_reptable(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -527,7 +523,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the phonetic translation table */
if (strncmp(line,"PHONE",5) == 0) {
if (parse_phonetable(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -535,7 +531,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the checkcompoundpattern table */
if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) {
if (parse_checkcpdtable(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -543,7 +539,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the defcompound table */
if (strncmp(line,"COMPOUNDRULE",12) == 0) {
if (parse_defcpdtable(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -551,7 +547,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the related character map table */
if (strncmp(line,"MAP",3) == 0) {
if (parse_maptable(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -559,7 +555,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the word breakpoints table */
if (strncmp(line,"BREAK",5) == 0) {
if (parse_breaktable(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -567,7 +563,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the language for language specific codes */
if (strncmp(line,"LANG",4) == 0) {
if (parse_string(line, &lang, "LANG")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
langnum = get_lang_num(lang);
@@ -575,14 +571,14 @@ int AffixMgr::parse_file(const char * affpath)
if (strncmp(line,"VERSION",7) == 0) {
if (parse_string(line, &version, "VERSION")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
if (strncmp(line,"MAXNGRAMSUGS",12) == 0) {
if (parse_num(line, &maxngramsugs, "MAXNGRAMSUGS")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -598,7 +594,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by forbidden words */
if (strncmp(line,"KEEPCASE",8) == 0) {
if (parse_flag(line, &keepcase, "KEEPCASE")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -606,7 +602,7 @@ int AffixMgr::parse_file(const char * affpath)
/* parse in the flag used by the affix generator */
if (strncmp(line,"SUBSTANDARD",11) == 0) {
if (parse_flag(line, &substandard, "SUBSTANDARD")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -625,7 +621,7 @@ int AffixMgr::parse_file(const char * affpath)
dupflags_ini = 0;
}
if (parse_affix(line, ft, afflst, dupflags)) {
- fclose(afflst);
+ delete afflst;
process_pfx_tree_to_list();
process_sfx_tree_to_list();
return 1;
@@ -633,7 +629,7 @@ int AffixMgr::parse_file(const char * affpath)
}
}
- fclose(afflst);
+ delete afflst;
// convert affix trees to sorted list
process_pfx_tree_to_list();
@@ -671,6 +667,23 @@ int AffixMgr::parse_file(const char * affpath)
free(enc);
enc = NULL;
+#ifdef WINSHELL
+ char expw[MAXLNLEN];
+ if (wordchars) {
+ strcpy(expw, wordchars);
+ free(wordchars);
+ } else *expw = '\0';
+
+ for (int i = 0; i <= 255; i++) {
+ if ( (csconv[i].cupper != csconv[i].clower) &&
+ (! strchr(expw, (char) i))) {
+ *(expw + strlen(expw) + 1) = '\0';
+ *(expw + strlen(expw)) = (char) i;
+ }
+ }
+
+ wordchars = mystrdup(expw);
+#endif
// temporary BREAK definition for German dash handling (OOo issue 64400)
if ((langnum == LANG_de) && (!breaktable)) {
breaktable = (char **) malloc(sizeof(char *));
@@ -950,6 +963,15 @@ int AffixMgr::process_sfx_order()
return 0;
}
+// add flags to the result for dictionary debugging
+void AffixMgr::debugflag(char * result, unsigned short flag) {
+ char * st = encode_flag(flag);
+ strcat(result, " ");
+ strcat(result, MORPH_FLAG);
+ strcat(result, st);
+ free(st);
+}
+
// calculate the character length of the condition
int AffixMgr::condlen(char * st)
{
@@ -969,7 +991,7 @@ int AffixMgr::condlen(char * st)
int AffixMgr::encodeit(struct affentry * ptr, char * cs)
{
if (strcmp(cs,".") != 0) {
- ptr->numconds = condlen(cs);
+ ptr->numconds = (char) condlen(cs);
strncpy(ptr->c.conds, cs, MAXCONDLEN);
// long condition (end of conds padded by strncpy)
if (ptr->c.conds[MAXCONDLEN - 1] && cs[MAXCONDLEN]) {
@@ -1313,7 +1335,7 @@ int AffixMgr::defcpd_check(hentry *** words, short wnum, hentry * rv, hentry **
ok = 1;
btnum[bt - 1]--;
pp = btpp[bt - 1];
- wp = btwp[bt - 1] + btnum[bt - 1];
+ wp = btwp[bt - 1] + (signed short) btnum[bt - 1];
} while ((btnum[bt - 1] < 0) && --bt);
} while (bt);
@@ -2415,14 +2437,14 @@ char * AffixMgr::suffix_check_twosfx_morph(const char * word, int len,
if (((PfxEntry *) ppfx)->getMorph()) {
strcat(result, ((PfxEntry *) ppfx)->getMorph());
strcat(result, " ");
- }
+ } else debugflag(result, ((PfxEntry *) ppfx)->getFlag());
}
strcat(result, st);
free(st);
if (se->getMorph()) {
strcat(result, " ");
strcat(result, se->getMorph());
- }
+ } else debugflag(result, se->getFlag());
strcat(result, "\n");
}
}
@@ -2457,7 +2479,7 @@ char * AffixMgr::suffix_check_twosfx_morph(const char * word, int len,
if (sptr->getMorph()) {
strcat(result3, " ");
strcat(result3, sptr->getMorph());
- }
+ } else debugflag(result3, sptr->getFlag());
strlinecat(result2, result3);
strcat(result2, "\n");
strcat(result, result2);
@@ -2517,7 +2539,7 @@ char * AffixMgr::suffix_check_morph(const char * word, int len,
if (((PfxEntry *) ppfx)->getMorph()) {
strcat(result, ((PfxEntry *) ppfx)->getMorph());
strcat(result, " ");
- }
+ } else debugflag(result, ((PfxEntry *) ppfx)->getFlag());
}
if (complexprefixes && HENTRY_DATA(rv)) strcat(result, HENTRY_DATA(rv));
if (! HENTRY_FIND(rv, MORPH_STEM)) {
@@ -2529,13 +2551,13 @@ char * AffixMgr::suffix_check_morph(const char * word, int len,
// sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, rv);
if (!complexprefixes && HENTRY_DATA(rv)) {
- strcat(result, " ");
- strcat(result, HENTRY_DATA(rv));
+ strcat(result, " ");
+ strcat(result, HENTRY_DATA(rv));
}
if (se->getMorph()) {
strcat(result, " ");
strcat(result, se->getMorph());
- }
+ } else debugflag(result, se->getFlag());
strcat(result, "\n");
rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
}
@@ -2575,7 +2597,7 @@ char * AffixMgr::suffix_check_morph(const char * word, int len,
if (((PfxEntry *) ppfx)->getMorph()) {
strcat(result, ((PfxEntry *) ppfx)->getMorph());
strcat(result, " ");
- }
+ } else debugflag(result, ((PfxEntry *) ppfx)->getFlag());
}
if (complexprefixes && HENTRY_DATA(rv)) strcat(result, HENTRY_DATA(rv));
if (! HENTRY_FIND(rv, MORPH_STEM)) {
@@ -2603,7 +2625,7 @@ char * AffixMgr::suffix_check_morph(const char * word, int len,
if (sptr->getMorph()) {
strcat(result, " ");
strcat(result, sptr->getMorph());
- }
+ } else debugflag(result, sptr->getFlag());
strcat(result, "\n");
rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
}
@@ -2731,7 +2753,7 @@ char * AffixMgr::morphgen(char * ts, int wl, const unsigned short * ap,
if (cmp == 0) {
char * newword = sptr->add(ts, wl);
if (newword) {
- hentry * check = pHMgr->lookup(newword);
+ hentry * check = pHMgr->lookup(newword); // XXX extra dic
if (!check || !check->astr ||
!TESTAFF(check->astr, forbiddenword, check->alen)) {
return newword;
@@ -2767,7 +2789,7 @@ char * AffixMgr::morphgen(char * ts, int wl, const unsigned short * ap,
int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts,
int wl, const unsigned short * ap, unsigned short al, char * bad, int badl,
- char * phone)
+ char * phon)
{
int nh=0;
// first add root word to list
@@ -2778,8 +2800,8 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts
wlst[nh].orig = NULL;
nh++;
// add special phonetic version
- if (phone && (nh < maxn)) {
- wlst[nh].word = mystrdup(phone);
+ if (phon && (nh < maxn)) {
+ wlst[nh].word = mystrdup(phon);
wlst[nh].allow = (1 == 0);
wlst[nh].orig = mystrdup(ts);
nh++;
@@ -2809,11 +2831,11 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts
wlst[nh].orig = NULL;
nh++;
// add special phonetic version
- if (phone && (nh < maxn)) {
+ if (phon && (nh < maxn)) {
char st[MAXWORDUTF8LEN];
- strcpy(st, phone);
+ strcpy(st, phon);
strcat(st, sptr->getKey());
- reverseword(st + strlen(phone));
+ reverseword(st + strlen(phon));
wlst[nh].word = mystrdup(st);
wlst[nh].allow = (1 == 0);
wlst[nh].orig = mystrdup(newword);
@@ -2969,6 +2991,12 @@ int AffixMgr::get_checksharps()
return checksharps;
}
+char * AffixMgr::encode_flag(unsigned short aflag)
+{
+ return pHMgr->encode_flag(aflag);
+}
+
+
// return the preferred ignore string for suggestions
char * AffixMgr::get_ignore()
{
@@ -3097,8 +3125,12 @@ FLAG AffixMgr::get_lemma_present()
// utility method to look up root words in hash table
struct hentry * AffixMgr::lookup(const char * word)
{
- if (! pHMgr) return NULL;
- return pHMgr->lookup(word);
+ int i;
+ struct hentry * he = NULL;
+ for (i = 0; i < *maxdic && !he; i++) {
+ he = (alldic[i])->lookup(word);
+ }
+ return he;
}
// return the value of suffix
@@ -3203,7 +3235,7 @@ int AffixMgr::parse_cpdsyllable(char * line)
}
/* parse in the typical fault correcting table */
-int AffixMgr::parse_reptable(char * line, FILE * af)
+int AffixMgr::parse_reptable(char * line, FileMgr * af)
{
if (numrep != 0) {
HUNSPELL_WARNING(stderr, "error: duplicate REP tables used\n");
@@ -3243,9 +3275,9 @@ int AffixMgr::parse_reptable(char * line, FILE * af)
}
/* now parse the numrep lines to read in the remainder of the table */
- char * nl = line;
+ char * nl;
for (int j=0; j < numrep; j++) {
- if (!fgets(nl,MAXLNLEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3283,7 +3315,7 @@ int AffixMgr::parse_reptable(char * line, FILE * af)
}
/* parse in the typical fault correcting table */
-int AffixMgr::parse_phonetable(char * line, FILE * af)
+int AffixMgr::parse_phonetable(char * line, FileMgr * af)
{
if (phone) {
HUNSPELL_WARNING(stderr, "error: duplicate PHONE tables used\n");
@@ -3302,7 +3334,7 @@ int AffixMgr::parse_phonetable(char * line, FILE * af)
phone = (phonetable *) malloc(sizeof(struct phonetable));
phone->num = atoi(piece);
phone->rules = NULL;
- phone->utf8 = utf8;
+ phone->utf8 = (char) utf8;
if (!phone) return 1;
if (phone->num < 1) {
HUNSPELL_WARNING(stderr, "incorrect number of entries in phonelacement table\n");
@@ -3327,9 +3359,9 @@ int AffixMgr::parse_phonetable(char * line, FILE * af)
}
/* now parse the phone->num lines to read in the remainder of the table */
- char * nl = line;
+ char * nl;
for (int j=0; j < phone->num; j++) {
- if (!fgets(nl,MAXLNLEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3370,7 +3402,7 @@ int AffixMgr::parse_phonetable(char * line, FILE * af)
}
/* parse in the checkcompoundpattern table */
-int AffixMgr::parse_checkcpdtable(char * line, FILE * af)
+int AffixMgr::parse_checkcpdtable(char * line, FileMgr * af)
{
if (numcheckcpd != 0) {
HUNSPELL_WARNING(stderr, "error: duplicate compound pattern tables used\n");
@@ -3410,9 +3442,9 @@ int AffixMgr::parse_checkcpdtable(char * line, FILE * af)
}
/* now parse the numcheckcpd lines to read in the remainder of the table */
- char * nl = line;
+ char * nl;
for (int j=0; j < numcheckcpd; j++) {
- if (!fgets(nl,MAXLNLEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3450,7 +3482,7 @@ int AffixMgr::parse_checkcpdtable(char * line, FILE * af)
}
/* parse in the compound rule table */
-int AffixMgr::parse_defcpdtable(char * line, FILE * af)
+int AffixMgr::parse_defcpdtable(char * line, FileMgr * af)
{
if (numdefcpd != 0) {
HUNSPELL_WARNING(stderr, "error: duplicate compound rule tables used\n");
@@ -3490,9 +3522,9 @@ int AffixMgr::parse_defcpdtable(char * line, FILE * af)
}
/* now parse the numdefcpd lines to read in the remainder of the table */
- char * nl = line;
+ char * nl;
for (int j=0; j < numdefcpd; j++) {
- if (!fgets(nl,MAXLNLEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3533,7 +3565,7 @@ int AffixMgr::parse_defcpdtable(char * line, FILE * af)
/* parse in the character map table */
-int AffixMgr::parse_maptable(char * line, FILE * af)
+int AffixMgr::parse_maptable(char * line, FileMgr * af)
{
if (nummap != 0) {
HUNSPELL_WARNING(stderr, "error: duplicate MAP tables used\n");
@@ -3573,9 +3605,9 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
}
/* now parse the nummap lines to read in the remainder of the table */
- char * nl = line;
+ char * nl;
for (int j=0; j < nummap; j++) {
- if (!fgets(nl,MAXLNLEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3630,7 +3662,7 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
}
/* parse in the word breakpoint table */
-int AffixMgr::parse_breaktable(char * line, FILE * af)
+int AffixMgr::parse_breaktable(char * line, FileMgr * af)
{
if (numbreak != 0) {
HUNSPELL_WARNING(stderr, "error: duplicate word breakpoint tables used\n");
@@ -3670,9 +3702,9 @@ int AffixMgr::parse_breaktable(char * line, FILE * af)
}
/* now parse the numbreak lines to read in the remainder of the table */
- char * nl = line;
+ char * nl;
for (int j=0; j < numbreak; j++) {
- if (!fgets(nl,MAXLNLEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3734,7 +3766,7 @@ void AffixMgr::reverse_condition(char * piece) {
}
}
-int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflags)
+int AffixMgr::parse_affix(char * line, const char at, FileMgr * af, char * dupflags)
{
int numents = 0; // number of affentry structures to parse
@@ -3745,7 +3777,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
struct affentry * nptr= NULL;
char * tp = line;
- char * nl = line;
+ char * nl = NULL;
char * piece;
int i = 0;
@@ -3773,7 +3805,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
HUNSPELL_WARNING(stderr, "error: duplicate affix flag %s in line %s\n", piece, nl);
// return 1; XXX permissive mode for bad dictionaries
}
- dupflags[aflag] += ((at == 'S') ? dupSFX : dupPFX);
+ dupflags[aflag] += (char) ((at == 'S') ? dupSFX : dupPFX);
break;
}
// piece 3 - is cross product indicator
@@ -3820,7 +3852,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
// now parse numents affentries for this affix
for (int j=0; j < numents; j++) {
- if (!fgets(nl,MAXLNLEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -3835,7 +3867,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
case 0: {
np++;
if (nptr != ptr) nptr->opts = ptr->opts &
- (aeXPRODUCT + aeUTF8 + aeALIASF + aeALIASM);
+ (char) (aeXPRODUCT + aeUTF8 + aeALIASF + aeALIASM);
break;
}
@@ -3898,6 +3930,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
if (pHMgr->is_aliasf()) {
int index = atoi(dash + 1);
nptr->contclasslen = (unsigned short) pHMgr->get_aliasf(index, &(nptr->contclass));
+ if (!nptr->contclasslen) HUNSPELL_WARNING(stderr, "error: bad affix flag alias: \"%s\"\n", dash+1);
} else {
nptr->contclasslen = (unsigned short) pHMgr->decode_flags(&(nptr->contclass), dash + 1);
flag_qsort(nptr->contclass, 0, nptr->contclasslen);
diff --git a/src/myspell/affixmgr.hxx b/src/myspell/affixmgr.hxx
index 644d2c9..f045b87 100644
--- a/src/myspell/affixmgr.hxx
+++ b/src/myspell/affixmgr.hxx
@@ -27,6 +27,8 @@ class AffixMgr
AffEntry * pFlag[CONTSIZE];
AffEntry * sFlag[CONTSIZE];
HashMgr * pHMgr;
+ HashMgr ** alldic;
+ int * maxdic;
char * keystring;
char * trystring;
char * encoding;
@@ -96,8 +98,9 @@ class AffixMgr
flag flag_mode;
public:
-
- AffixMgr(const char * affpath, HashMgr * ptr);
+
+ AffixMgr(const char * affpath, HashMgr** ptr, int * md,
+ const char * key = NULL);
~AffixMgr();
struct hentry * affix_check(const char * word, int len,
const unsigned short needflag = (unsigned short) 0,
@@ -150,7 +153,7 @@ public:
short numsyllable, short maxwordnum, short wnum, hentry ** words,
char hu_mov_rule, char ** result, char * partresult);
- struct hentry * lookup(const char * word);
+ struct hentry * lookup(const char * word);
int get_numrep();
struct replentry * get_reptable();
struct phonetable * get_phonetable();
@@ -171,7 +174,6 @@ public:
FLAG get_compoundbegin();
FLAG get_forbiddenword();
FLAG get_nosuggest();
-// FLAG get_circumfix();
FLAG get_needaffix();
FLAG get_onlyincompound();
FLAG get_compoundroot();
@@ -191,21 +193,23 @@ public:
int get_sugswithdots(void);
FLAG get_keepcase(void);
int get_checksharps(void);
+ char * encode_flag(unsigned short aflag);
private:
- int parse_file(const char * affpath);
+ int parse_file(const char * affpath, const char * key);
int parse_flag(char * line, unsigned short * out, const char * name);
int parse_num(char * line, int * out, const char * name);
int parse_cpdsyllable(char * line);
- int parse_reptable(char * line, FILE * af);
- int parse_phonetable(char * line, FILE * af);
- int parse_maptable(char * line, FILE * af);
- int parse_breaktable(char * line, FILE * af);
- int parse_checkcpdtable(char * line, FILE * af);
- int parse_defcpdtable(char * line, FILE * af);
- int parse_affix(char * line, const char at, FILE * af, char * dupflags);
+ int parse_reptable(char * line, FileMgr * af);
+ int parse_phonetable(char * line, FileMgr * af);
+ int parse_maptable(char * line, FileMgr * af);
+ int parse_breaktable(char * line, FileMgr * af);
+ int parse_checkcpdtable(char * line, FileMgr * af);
+ int parse_defcpdtable(char * line, FileMgr * af);
+ int parse_affix(char * line, const char at, FileMgr * af, char * dupflags);
void reverse_condition(char *);
+ void debugflag(char * result, unsigned short flag);
int condlen(char *);
int encodeit(struct affentry * ptr, char * cs);
int build_pfxtree(AffEntry* pfxptr);
diff --git a/src/myspell/atypes.hxx b/src/myspell/atypes.hxx
index 0afb345..0d4db14 100644
--- a/src/myspell/atypes.hxx
+++ b/src/myspell/atypes.hxx
@@ -13,8 +13,8 @@ static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {}
// HUNSTEM def.
#define HUNSTEM
-#include "csutil.hxx"
#include "hashmgr.hxx"
+#include "w_char.hxx"
#define SETSIZE 256
#define CONTSIZE 65536
@@ -70,6 +70,12 @@ struct affentry
char * morphcode;
};
+struct guessword {
+ char * word;
+ bool allow;
+ char * orig;
+};
+
struct mapentry {
char * set;
w_char * set_utf16;
@@ -81,10 +87,4 @@ struct flagentry {
int len;
};
-struct guessword {
- char * word;
- bool allow;
- char * orig;
-};
-
#endif
diff --git a/src/myspell/csutil.cxx b/src/myspell/csutil.cxx
index da9ff9f..e282754 100644
--- a/src/myspell/csutil.cxx
+++ b/src/myspell/csutil.cxx
@@ -45,7 +45,7 @@ static NS_DEFINE_CID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
using namespace std;
#endif
#else
-#ifndef W32
+#ifndef WIN32
using namespace std;
#endif
#endif
@@ -55,8 +55,8 @@ static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instan
/* only UTF-16 (BMP) implementation */
char * u16_u8(char * dest, int size, const w_char * src, int srclen) {
- char * u8 = dest;
- char * u8_max = u8 + size;
+ signed char * u8 = (signed char *)dest;
+ signed char * u8_max = (signed char *)(u8 + size);
const w_char * u2 = src;
const w_char * u2_max = src + srclen;
while ((u2 < u2_max) && (u8 < u8_max)) {
@@ -103,7 +103,7 @@ char * u16_u8(char * dest, int size, const w_char * src, int srclen) {
/* only UTF-16 (BMP) implementation */
int u8_u16(w_char * dest, int size, const char * src) {
- const char * u8 = src;
+ const signed char * u8 = (const signed char *)src;
w_char * u2 = dest;
w_char * u2_max = u2 + size;
@@ -125,7 +125,7 @@ int u8_u16(w_char * dest, int size, const char * src) {
case 0x90:
case 0xa0:
case 0xb0: {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Unexpected continuation bytes in %ld. character position\n%s\n", static_cast<long>(u8 - src), src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Unexpected continuation bytes in %ld. character position\n%s\n", static_cast<long>(u8 - (signed char *)src), src);
u2->h = 0xff;
u2->l = 0xfd;
break;
@@ -137,7 +137,7 @@ int u8_u16(w_char * dest, int size, const char * src) {
u2->l = (*u8 << 6) + (*(u8+1) & 0x3f);
u8++;
} else {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - src), src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src);
u2->h = 0xff;
u2->l = 0xfd;
}
@@ -151,12 +151,12 @@ int u8_u16(w_char * dest, int size, const char * src) {
u2->l = (*u8 << 6) + (*(u8+1) & 0x3f);
u8++;
} else {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - src), src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src);
u2->h = 0xff;
u2->l = 0xfd;
}
} else {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - src), src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src);
u2->h = 0xff;
u2->l = 0xfd;
}
@@ -350,9 +350,10 @@ char * line_uniq_app(char ** text, char breakchar) {
}
char ** lines;
+ int i;
int linenum = line_tok(*text, &lines, breakchar);
int dup = 0;
- for (int i = 0; i < linenum; i++) {
+ for (i = 0; i < linenum; i++) {
for (int j = 0; j < (i - 1); j++) {
if (strcmp(lines[i], lines[j]) == 0) {
*(lines[i]) = '\0';
@@ -375,7 +376,7 @@ char * line_uniq_app(char ** text, char breakchar) {
return *text;
}
strcpy(*text," ( ");
- for (int i = 0; i < linenum; i++) if (*(lines[i])) {
+ for (i = 0; i < linenum; i++) if (*(lines[i])) {
sprintf(*text + strlen(*text), "%s%s", lines[i], " | ");
}
(*text)[strlen(*text) - 2] = ')'; // " ) "
@@ -415,8 +416,8 @@ char * tr(char * text, char oldc, char newc) {
// otherwise return -1
int morphcmp(const char * s, const char * t)
{
- int se;
- int te;
+ int se = 0;
+ int te = 0;
const char * sl;
const char * tl;
const char * olds;
@@ -579,8 +580,9 @@ char * mystrrep(char * word, const char * pat, const char * rep) {
}
int uniqlist(char ** list, int n) {
+ int i;
if (n < 2) return n;
- for (int i = 0; i < n; i++) {
+ for (i = 0; i < n; i++) {
for (int j = 0; j < i; j++) {
if (list[j] && list[i] && (strcmp(list[j], list[i]) == 0)) {
free(list[i]);
@@ -590,7 +592,7 @@ char * mystrrep(char * word, const char * pat, const char * rep) {
}
}
int m = 1;
- for (int i = 1; i < n; i++) if (list[i]) {
+ for (i = 1; i < n; i++) if (list[i]) {
list[m] = list[i];
m++;
}
@@ -681,6 +683,20 @@ void mkallcap_utf(w_char * u, int nc, int langnum) {
if (*p != '\0') *d= csconv[((unsigned char)*p)].cupper;
}
+ // conversion function for protected memory
+ void store_pointer(char * dest, char * source)
+ {
+ memcpy(dest, &source, sizeof(char *));
+ }
+
+ // conversion function for protected memory
+ char * get_stored_pointer(char * s)
+ {
+ char * p;
+ memcpy(&p, s, sizeof(char *));
+ return p;
+ }
+
// these are simple character mappings for the
// encodings supported
// supplying isupper, tolower, and toupper
@@ -941,7 +957,7 @@ struct cs_info iso1_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
@@ -1201,7 +1217,7 @@ struct cs_info iso2_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
@@ -1461,7 +1477,7 @@ struct cs_info iso3_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso4_tbl[] = {
@@ -1720,7 +1736,7 @@ struct cs_info iso4_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso5_tbl[] = {
@@ -1979,7 +1995,7 @@ struct cs_info iso5_tbl[] = {
{ 0x00, 0xfc, 0xac },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xae },
-{ 0x00, 0xff, 0xaf },
+{ 0x00, 0xff, 0xaf }
};
struct cs_info iso6_tbl[] = {
@@ -2238,7 +2254,7 @@ struct cs_info iso6_tbl[] = {
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso7_tbl[] = {
@@ -2497,7 +2513,7 @@ struct cs_info iso7_tbl[] = {
{ 0x00, 0xfc, 0xbc },
{ 0x00, 0xfd, 0xbe },
{ 0x00, 0xfe, 0xbf },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso8_tbl[] = {
@@ -2756,7 +2772,7 @@ struct cs_info iso8_tbl[] = {
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso9_tbl[] = {
@@ -3015,7 +3031,7 @@ struct cs_info iso9_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0x49 },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso10_tbl[] = {
@@ -3274,7 +3290,7 @@ struct cs_info iso10_tbl[] = {
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info koi8r_tbl[] = {
@@ -3533,7 +3549,7 @@ struct cs_info koi8r_tbl[] = {
{ 0x01, 0xdc, 0xfc },
{ 0x01, 0xdd, 0xfd },
{ 0x01, 0xde, 0xfe },
-{ 0x01, 0xdf, 0xff },
+{ 0x01, 0xdf, 0xff }
};
struct cs_info koi8u_tbl[] = {
@@ -3792,7 +3808,7 @@ struct cs_info koi8u_tbl[] = {
{ 0x01, 0xdc, 0xfc },
{ 0x01, 0xdd, 0xfd },
{ 0x01, 0xde, 0xfe },
-{ 0x01, 0xdf, 0xff },
+{ 0x01, 0xdf, 0xff }
};
struct cs_info cp1251_tbl[] = {
@@ -4051,7 +4067,7 @@ struct cs_info cp1251_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xdf },
+{ 0x00, 0xff, 0xdf }
};
struct cs_info iso13_tbl[] = {
@@ -4310,7 +4326,7 @@ struct cs_info iso13_tbl[] = {
{ 0x00, 0xFC, 0xDC },
{ 0x00, 0xFD, 0xDD },
{ 0x00, 0xFE, 0xDE },
-{ 0x00, 0xFF, 0xFF },
+{ 0x00, 0xFF, 0xFF }
};
@@ -4570,7 +4586,7 @@ struct cs_info iso14_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
struct cs_info iso15_tbl[] = {
@@ -4829,7 +4845,7 @@ struct cs_info iso15_tbl[] = {
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
-{ 0x00, 0xff, 0xbe },
+{ 0x00, 0xff, 0xbe }
};
struct cs_info iscii_devanagari_tbl[] = {
@@ -5088,10 +5104,10 @@ struct cs_info iscii_devanagari_tbl[] = {
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xff, 0xff }
};
-struct enc_entry encds[] = {
+static struct enc_entry encds[] = {
{"ISO8859-1",iso1_tbl},
{"ISO8859-2",iso2_tbl},
{"ISO8859-3",iso3_tbl},
@@ -5108,7 +5124,7 @@ struct enc_entry encds[] = {
{"ISO8859-13", iso13_tbl},
{"ISO8859-14", iso14_tbl},
{"ISO8859-15", iso15_tbl},
-{"ISCII-DEVANAGARI", iscii_devanagari_tbl},
+{"ISCII-DEVANAGARI", iscii_devanagari_tbl}
};
struct cs_info * get_current_cs(const char * es) {
@@ -5117,6 +5133,7 @@ struct cs_info * get_current_cs(const char * es) {
for (int i = 0; i < n; i++) {
if (strcmp(es,encds[i].enc_name) == 0) {
ccs = encds[i].cs_table;
+ break;
}
}
return ccs;
@@ -5362,14 +5379,14 @@ int get_captype(char * word, int nl, cs_info * csconv) {
int ncap = 0;
int nneutral = 0;
int firstcap = 0;
-
- for (char * q = word; *q != '\0'; q++) {
- if (csconv[*((unsigned char *)q)].ccase) ncap++;
- if (csconv[*((unsigned char *)q)].cupper == csconv[*((unsigned char *)q)].clower) nneutral++;
- }
- if (ncap) {
- firstcap = csconv[*((unsigned char *) word)].ccase;
- }
+ if (csconv == NULL) return NOCAP;
+ for (char * q = word; *q != '\0'; q++) {
+ if (csconv[*((unsigned char *)q)].ccase) ncap++;
+ if (csconv[*((unsigned char *)q)].cupper == csconv[*((unsigned char *)q)].clower) nneutral++;
+ }
+ if (ncap) {
+ firstcap = csconv[*((unsigned char *) word)].ccase;
+ }
// now finally set the captype
if (ncap == 0) {
diff --git a/src/myspell/csutil.hxx b/src/myspell/csutil.hxx
index df7979b..2a16538 100644
--- a/src/myspell/csutil.hxx
+++ b/src/myspell/csutil.hxx
@@ -3,6 +3,8 @@
// First some base level utility routines
+#include "w_char.hxx"
+
#define NOCAP 0
#define INITCAP 1
#define ALLCAP 2
@@ -23,6 +25,7 @@
#define MORPH_PHON "ph:"
#define MORPH_HYPH "hy:"
#define MORPH_PART "pa:"
+#define MORPH_FLAG "fl:"
#define MORPH_HENTRY "_H:"
#define MORPH_TAG_LEN strlen(MORPH_STEM)
@@ -30,16 +33,15 @@
#define MSEP_REC '\n'
#define MSEP_ALT '\v'
-
// default flags
#define DEFAULTFLAGS 65510
#define FORBIDDENWORD 65510
#define ONLYUPCASEFLAG 65511
-typedef struct {
- unsigned char l;
- unsigned char h;
-} w_char;
+// hash entry macros
+#define HENTRY_DATA(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \
+ get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : NULL)
+#define HENTRY_FIND(h,p) (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL)
#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
@@ -104,12 +106,6 @@ struct cs_info {
unsigned char cupper;
};
-// two character arrays
-struct replentry {
- char * pattern;
- char * pattern2;
-};
-
// Unicode character encoding information
struct unicode_info {
unsigned short c;
@@ -200,4 +196,10 @@ int morphcmp(const char * s, const char * t);
int get_sfxcount(const char * morph);
+// conversion function for protected memory
+void store_pointer(char * dest, char * source);
+
+// conversion function for protected memory
+char * get_stored_pointer(char * s);
+
#endif
diff --git a/src/myspell/filemgr.cxx b/src/myspell/filemgr.cxx
new file mode 100644
index 0000000..165fc77
--- /dev/null
+++ b/src/myspell/filemgr.cxx
@@ -0,0 +1,38 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "filemgr.hxx"
+
+int FileMgr::fail(const char * err, const char * par) {
+ fprintf(stderr, err, par);
+ return -1;
+}
+
+FileMgr::FileMgr(const char * file, const char * key) {
+ hin = NULL;
+ fin = fopen(file, "r");
+ if (!fin) {
+ // check hzipped file
+ char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION));
+ if (st) {
+ strcpy(st, file);
+ strcat(st, HZIP_EXTENSION);
+ hin = new Hunzip(st, key);
+ }
+ }
+ if (!fin && !hin) fail(MSG_OPEN, file);
+}
+
+FileMgr::~FileMgr()
+{
+ if (fin) fclose(fin);
+ if (hin) delete hin;
+}
+
+char * FileMgr::getline() {
+ const char * l;
+ if (fin) return fgets(in, BUFSIZE - 1, fin);
+ if (hin && (l = hin->getline())) return strcpy(in, l);
+ return NULL;
+}
diff --git a/src/myspell/filemgr.hxx b/src/myspell/filemgr.hxx
new file mode 100644
index 0000000..593228d
--- /dev/null
+++ b/src/myspell/filemgr.hxx
@@ -0,0 +1,19 @@
+/* file manager class - read lines of files [filename] OR [filename.hz] */
+#ifndef _FILEMGR_HXX_
+#define _FILEMGR_HXX_
+#include "hunzip.hxx"
+
+class FileMgr
+{
+protected:
+ FILE * fin;
+ Hunzip * hin;
+ char in[BUFSIZE + 50]; // input buffer
+ int fail(const char * err, const char * par);
+
+public:
+ FileMgr(const char * filename, const char * key = NULL);
+ ~FileMgr();
+ char * getline();
+};
+#endif
diff --git a/src/myspell/hashmgr.cxx b/src/myspell/hashmgr.cxx
index 08e061c..a1ca329 100644
--- a/src/myspell/hashmgr.cxx
+++ b/src/myspell/hashmgr.cxx
@@ -22,14 +22,14 @@
using namespace std;
#endif
#else
-#ifndef W32
+#ifndef WIN32
using namespace std;
#endif
#endif
// build a hash table from a munched word list
-HashMgr::HashMgr(const char * tpath, const char * apath)
+HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
{
tablesize = 0;
tableptr = NULL;
@@ -48,8 +48,8 @@ HashMgr::HashMgr(const char * tpath, const char * apath)
numaliasm = 0;
aliasm = NULL;
forbiddenword = FORBIDDENWORD; // forbidden word signing flag
- load_config(apath);
- int ec = load_tables(tpath);
+ load_config(apath, key);
+ int ec = load_tables(tpath, key);
if (ec) {
/* error condition - what should we do here */
HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
@@ -129,7 +129,7 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
int al, const char * desc, bool onlyupcase)
{
bool upcasehomonym = false;
- int descl = desc ? (aliasm ? sizeof(char *) : strlen(desc) + 1) : 0;
+ int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;
// variable-length hash record with word and optional fields
struct hentry* hp =
(struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);
@@ -161,7 +161,8 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
hp->var = H_OPT;
if (aliasm) {
hp->var += H_OPT_ALIASM;
- *((char **) (hpw + wbl + 1)) = get_aliasm(atoi(desc));
+// *((char **) (hpw + wbl + 1)) = get_aliasm(atoi(desc));
+ store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));
} else {
strcpy(hpw + wbl + 1, desc);
if (complexprefixes) {
@@ -236,12 +237,12 @@ int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
if (al) memcpy(flags2, flags, al * sizeof(unsigned short));
flags2[al] = ONLYUPCASEFLAG;
if (utf8) {
- char st[MAXDELEN];
- w_char w[MAXDELEN];
- int wlen = u8_u16(w, MAXDELEN, word);
+ char st[BUFSIZE];
+ w_char w[BUFSIZE];
+ int wlen = u8_u16(w, BUFSIZE, word);
mkallsmall_utf(w, wlen, langnum);
mkallcap_utf(w, 1, langnum);
- u16_u8(st, MAXDELEN, w, wlen);
+ u16_u8(st, BUFSIZE, w, wlen);
return add_word(st,wbl,wcl,flags2,al+1,dp, true);
} else {
mkallsmall(word, csconv);
@@ -256,8 +257,8 @@ int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) {
int len;
if (utf8) {
- w_char dest_utf[MAXDELEN];
- len = u8_u16(dest_utf, MAXDELEN, word);
+ w_char dest_utf[BUFSIZE];
+ len = u8_u16(dest_utf, BUFSIZE, word);
*captype = get_captype_utf8(dest_utf, len, langnum);
} else {
len = wbl;
@@ -266,27 +267,47 @@ int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) {
return len;
}
-// remove word with FORBIDDENWORD flag (not implemented)
+// remove word (personal dictionary function for standalone applications)
int HashMgr::remove(const char * word)
{
- struct hentry * dp = lookup(word);
-/*
- if (!word || (!dp->astr || !TESTAFF(dp->astr, forbiddenword, pt->alen))) {
- int wbl = strlen(word);
- int wcl = get_clen_and_captype(word, wbl, &captype);
- if (aliasf) {
- add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false);
- } else {
- unsigned short * flags = (unsigned short *) malloc (dp->alen * sizeof(short));
- if (flags) {
- memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));
- add_word(word, wbl, wcl, flags, dp->alen, NULL, false);
- } else return 1;
+ struct hentry * dp = lookup(word);
+ while (dp) {
+ if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
+ unsigned short * flags =
+ (unsigned short *) malloc(sizeof(short *) * (dp->alen + 1));
+ if (!flags) return 1;
+ for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i];
+ flags[dp->alen] = forbiddenword;
+ dp->astr = flags;
+ dp->alen++;
+ flag_qsort(flags, 0, dp->alen);
}
- return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp->alen, NULL, captype);
+ dp = dp->next_homonym;
}
-*/
- return 1;
+}
+
+/* remove forbidden flag to add a personal word to the hash */
+int HashMgr::remove_forbidden_flag(const char * word) {
+ struct hentry * dp = lookup(word);
+ if (!dp) return 1;
+ while (dp) {
+ if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) {
+ if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic.
+ else {
+ unsigned short * flags2 =
+ (unsigned short *) malloc(sizeof(short *) * (dp->alen - 1));
+ if (!flags2) return 1;
+ int i, j = 0;
+ for (i = 0; i < dp->alen; i++) {
+ if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i];
+ }
+ dp->alen--;
+ dp->astr = flags2; // XXX allowed forbidden words
+ }
+ }
+ dp = dp->next_homonym;
+ }
+ return 0;
}
// add a custom dic. word to the hash table (public)
@@ -301,17 +322,21 @@ int HashMgr::add(const char * word, char * aff)
flags = NULL;
}
- int captype;
- int wbl = strlen(word);
- int wcl = get_clen_and_captype(word, wbl, &captype);
- add_word(word, wbl, wcl, flags, al, NULL, false);
- return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, NULL, captype);
+ if (remove_forbidden_flag(word)) {
+ int captype;
+ int wbl = strlen(word);
+ int wcl = get_clen_and_captype(word, wbl, &captype);
+ add_word(word, wbl, wcl, flags, al, NULL, false);
+ return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, NULL, captype);
+ }
+ return 0;
}
int HashMgr::add_with_affix(const char * word, const char * example)
{
// detect captype and modify word length for UTF-8 encoding
struct hentry * dp = lookup(example);
+ remove_forbidden_flag(word);
if (dp && dp->astr) {
int captype;
int wbl = strlen(word);
@@ -344,22 +369,22 @@ struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
}
// load a munched word list and build a hash table on the fly
-int HashMgr::load_tables(const char * tpath)
+int HashMgr::load_tables(const char * tpath, const char * key)
{
int al;
char * ap;
char * dp;
unsigned short * flags;
+ char * ts;
- // raw dictionary - munched file
- FILE * rawdict = fopen(tpath, "r");
- if (rawdict == NULL) return 1;
+ // open dictionary file
+ FileMgr * dict = new FileMgr(tpath, key);
+ if (dict == NULL) return 1;
// first read the first line of file to get hash table size */
- char ts[MAXDELEN];
- if (! fgets(ts, MAXDELEN-1,rawdict)) {
+ if (!(ts = dict->getline())) {
HUNSPELL_WARNING(stderr, "error: empty dic file\n");
- fclose(rawdict);
+ delete dict;
return 2;
}
mychomp(ts);
@@ -373,7 +398,7 @@ int HashMgr::load_tables(const char * tpath)
if ((*ts < '1') || (*ts > '9')) HUNSPELL_WARNING(stderr, "error - missing word count in dictionary file\n");
tablesize = atoi(ts);
if (!tablesize) {
- fclose(rawdict);
+ delete dict;
return 4;
}
tablesize = tablesize + 5 + USERWORD;
@@ -382,7 +407,7 @@ int HashMgr::load_tables(const char * tpath)
// allocate the hash table
tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *));
if (! tableptr) {
- fclose(rawdict);
+ delete dict;
return 3;
}
for (int i=0; i<tablesize; i++) tableptr[i] = NULL;
@@ -390,7 +415,7 @@ int HashMgr::load_tables(const char * tpath)
// loop through all words on much list and add to hash
// table and create word and affix strings
- while (fgets(ts,MAXDELEN-1,rawdict)) {
+ while ((ts = dict->getline())) {
mychomp(ts);
// split each line into word and morphological description
dp = strchr(ts,'\t');
@@ -443,16 +468,15 @@ int HashMgr::load_tables(const char * tpath)
// add the word and its index plus its capitalized form optionally
if (add_word(ts,wbl,wcl,flags,al,dp, false) ||
add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) {
- fclose(rawdict);
+ delete dict;
return 5;
}
}
- fclose(rawdict);
+ delete dict;
return 0;
}
-
// the hash function is a simple load and rotate
// algorithm borrowed
@@ -506,8 +530,8 @@ int HashMgr::decode_flags(unsigned short ** result, char * flags) {
break;
}
case FLAG_UNI: { // UTF-8 characters
- w_char w[MAXDELEN/2];
- len = u8_u16(w, MAXDELEN/2, flags);
+ w_char w[BUFSIZE/2];
+ len = u8_u16(w, BUFSIZE/2, flags);
*result = (unsigned short *) malloc(len * sizeof(short));
if (!*result) return -1;
memcpy(*result, w, len * sizeof(short));
@@ -566,16 +590,13 @@ char * HashMgr::encode_flag(unsigned short f) {
}
// read in aff file and set flag mode
-int HashMgr::load_config(const char * affpath)
+int HashMgr::load_config(const char * affpath, const char * key)
{
+ char * line; // io buffers
int firstline = 1;
-
- // io buffers
- char line[MAXDELEN+1];
// open the affix file
- FILE * afflst;
- afflst = fopen(affpath,"r");
+ FileMgr * afflst = new FileMgr(affpath, key);
if (!afflst) {
HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath);
return 1;
@@ -584,7 +605,7 @@ int HashMgr::load_config(const char * affpath)
// read in each line ignoring any that do not
// start with a known line type indicator
- while (fgets(line,MAXDELEN,afflst)) {
+ while ((line = afflst->getline())) {
mychomp(line);
/* remove byte order mark */
@@ -608,7 +629,7 @@ int HashMgr::load_config(const char * affpath)
if (strncmp(line,"FORBIDDENWORD",13) == 0) {
char * st = NULL;
if (parse_string(line, &st, "FORBIDDENWORD")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
forbiddenword = decode_flag(st);
@@ -616,7 +637,7 @@ int HashMgr::load_config(const char * affpath)
}
if (strncmp(line, "SET", 3) == 0) {
if (parse_string(line, &enc, "SET")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
if (strcmp(enc, "UTF-8") == 0) {
@@ -630,7 +651,7 @@ int HashMgr::load_config(const char * affpath)
}
if (strncmp(line, "LANG", 4) == 0) {
if (parse_string(line, &lang, "LANG")) {
- fclose(afflst);
+ delete afflst;
return 1;
}
langnum = get_lang_num(lang);
@@ -639,21 +660,21 @@ int HashMgr::load_config(const char * affpath)
/* parse in the ignored characters (for example, Arabic optional diacritics characters */
if (strncmp(line,"IGNORE",6) == 0) {
if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, "IGNORE", utf8)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {
if (parse_aliasf(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {
if (parse_aliasm(line, afflst)) {
- fclose(afflst);
+ delete afflst;
return 1;
}
}
@@ -662,12 +683,12 @@ int HashMgr::load_config(const char * affpath)
if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;
}
if (csconv == NULL) csconv = get_current_cs("ISO8859-1");
- fclose(afflst);
+ delete afflst;
return 0;
}
/* parse in the ALIAS table */
-int HashMgr::parse_aliasf(char * line, FILE * af)
+int HashMgr::parse_aliasf(char * line, FileMgr * af)
{
if (numaliasf != 0) {
HUNSPELL_WARNING(stderr, "error: duplicate AF (alias for flag vector) tables used\n");
@@ -723,9 +744,9 @@ int HashMgr::parse_aliasf(char * line, FILE * af)
}
/* now parse the numaliasf lines to read in the remainder of the table */
- char * nl = line;
+ char * nl;
for (int j=0; j < numaliasf; j++) {
- if (!fgets(nl,MAXDELEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
@@ -788,7 +809,7 @@ int HashMgr::get_aliasf(int index, unsigned short ** fvec) {
}
/* parse morph alias definitions */
-int HashMgr::parse_aliasm(char * line, FILE * af)
+int HashMgr::parse_aliasm(char * line, FileMgr * af)
{
if (numaliasm != 0) {
HUNSPELL_WARNING(stderr, "error: duplicate AM (aliases for morphological descriptions) tables used\n");
@@ -836,7 +857,7 @@ int HashMgr::parse_aliasm(char * line, FILE * af)
/* now parse the numaliasm lines to read in the remainder of the table */
char * nl = line;
for (int j=0; j < numaliasm; j++) {
- if (!fgets(nl,MAXDELEN,af)) return 1;
+ if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
diff --git a/src/myspell/hashmgr.hxx b/src/myspell/hashmgr.hxx
index d88de48..ce23f71 100644
--- a/src/myspell/hashmgr.hxx
+++ b/src/myspell/hashmgr.hxx
@@ -8,6 +8,7 @@
#endif
#include "htypes.hxx"
+#include "filemgr.hxx"
enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
@@ -35,7 +36,7 @@ class HashMgr
public:
- HashMgr(const char * tpath, const char * apath);
+ HashMgr(const char * tpath, const char * apath, const char * key = NULL);
~HashMgr();
struct hentry * lookup(const char *) const;
@@ -55,14 +56,15 @@ public:
private:
int get_clen_and_captype(const char * word, int wbl, int * captype);
- int load_tables(const char * tpath);
+ int load_tables(const char * tpath, const char * key);
int add_word(const char * word, int wbl, int wcl, unsigned short * ap,
int al, const char * desc, bool onlyupcase);
- int load_config(const char * affpath);
- int parse_aliasf(char * line, FILE * af);
+ int load_config(const char * affpath, const char * key);
+ int parse_aliasf(char * line, FileMgr * af);
int add_hidden_capitalized_word(char * word, int wbl, int wcl,
unsigned short * flags, int al, char * dp, int captype);
- int parse_aliasm(char * line, FILE * af);
+ int parse_aliasm(char * line, FileMgr * af);
+ int remove_forbidden_flag(const char * word);
};
diff --git a/src/myspell/htypes.hxx b/src/myspell/htypes.hxx
index bc078c3..80647f9 100644
--- a/src/myspell/htypes.hxx
+++ b/src/myspell/htypes.hxx
@@ -1,8 +1,6 @@
#ifndef _HTYPES_HXX_
#define _HTYPES_HXX_
-#define MAXDELEN 8192
-
#define ROTATE_LEN 5
#define ROTATE(v,q) \
@@ -13,10 +11,8 @@
#define H_OPT_ALIASM (1 << 1)
#define H_OPT_PHON (1 << 2)
+// see also csutil.hxx
#define HENTRY_WORD(h) &(h->word)
-#define HENTRY_DATA(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \
- *((char **) (&(h->word) + h->blen + 1)) : &(h->word) + h->blen + 1) : NULL)
-#define HENTRY_FIND(h,p) (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL)
// approx. number of user defined words
#define USERWORD 1000
diff --git a/src/myspell/hunspell.cxx b/src/myspell/hunspell.cxx
index 7075c36..e74b34a 100644
--- a/src/myspell/hunspell.cxx
+++ b/src/myspell/hunspell.cxx
@@ -13,30 +13,31 @@
#include "hunspell.hxx"
#include "hunspell.h"
-
-#ifdef HAVE_CONFIG_H
#include "config.h"
-#endif
+#include "csutil.hxx"
#ifndef MOZILLA_CLIENT
-#ifndef W32
+#ifndef WIN32
using namespace std;
#endif
#endif
-Hunspell::Hunspell(const char * affpath, const char * dpath)
+Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
{
encoding = NULL;
csconv = NULL;
utf8 = 0;
complexprefixes = 0;
+ affixpath = mystrdup(affpath);
+ maxdic = 0;
/* first set up the hash manager */
- pHMgr = new HashMgr(dpath, affpath);
+ pHMgr[0] = new HashMgr(dpath, affpath, key);
+ if (pHMgr[0]) maxdic = 1;
/* next set up the affix manager */
/* it needs access to the hash manager lookup methods */
- pAMgr = new AffixMgr(affpath,pHMgr);
+ pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
/* get the preferred try string and the dictionary */
/* encoding from the Affix Manager for that dictionary */
@@ -51,25 +52,33 @@ Hunspell::Hunspell(const char * affpath, const char * dpath)
/* and finally set up the suggestion manager */
pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
if (try_string) free(try_string);
-
}
Hunspell::~Hunspell()
{
if (pSMgr) delete pSMgr;
if (pAMgr) delete pAMgr;
- if (pHMgr) delete pHMgr;
+ for (int i = 0; i < maxdic; i++) delete pHMgr[i];
+ maxdic = 0;
pSMgr = NULL;
pAMgr = NULL;
- pHMgr = NULL;
#ifdef MOZILLA_CLIENT
free(csconv);
#endif
csconv= NULL;
if (encoding) free(encoding);
encoding = NULL;
+ if (affixpath) free(affixpath);
+ affixpath = NULL;
}
+// load extra dictionaries
+int Hunspell::add_dic(const char * dpath, const char * key) {
+ if (maxdic == MAXDIC) return 1;
+ pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
+ if (pHMgr[maxdic]) maxdic++; else return 1;
+ return 0;
+}
// make a copy of src at destination while removing all leading
// blanks and removing any trailing periods after recording
@@ -337,7 +346,7 @@ int Hunspell::spell(const char * word, int * info, char ** root)
int abbv = 0;
int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
int info2 = 0;
- if (wl == 0) return 1;
+ if (wl == 0 || maxdic == 0) return 1;
if (root) *root = NULL;
// allow numbers with dots and commas (but forbid double separators: "..", ",," etc.)
@@ -359,7 +368,8 @@ int Hunspell::spell(const char * word, int * info, char ** root)
// LANG_hu section: number(s) + (percent or degree) with suffixes
if (langnum == LANG_hu) {
if ((nstate == NNUM) && ((cw[i] == '%') || ((!utf8 && (cw[i] == '\xB0')) ||
- (utf8 && (strncmp(cw + i, "\xC2\xB0", 2)==0))))
+ (utf8 && (strncmp(cw + i, "\xC2\xB0", 2)==0 || // UTF-8 degree
+ strncmp(cw + i, "\xE2\x80\xB0", 3)==0)))) // UTF-8 per mille
&& checkword(cw + i, info, root)) return 1;
}
// END of LANG_hu section
@@ -562,7 +572,7 @@ int Hunspell::spell(const char * word, int * info, char ** root)
struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
{
struct hentry * he = NULL;
- int len;
+ int len, i;
char w2[MAXWORDUTF8LEN];
const char * word;
@@ -589,7 +599,8 @@ struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
}
// look word in hash table
- if (pHMgr) he = pHMgr->lookup(word);
+ for (i = 0; (i < maxdic) && !he; i ++) {
+ he = (pHMgr[i])->lookup(word);
// check forbidden and onlyincompound words
if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
@@ -610,6 +621,7 @@ struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
(pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
(info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
)) he = he->next_homonym;
+ }
// check with affixes
if (!he && pAMgr) {
@@ -671,7 +683,7 @@ int Hunspell::suggest(char*** slst, const char * word)
int onlycmpdsug = 0;
char cw[MAXWORDUTF8LEN];
char wspace[MAXWORDUTF8LEN];
- if (! pSMgr) return 0;
+ if (!pSMgr || maxdic == 0) return 0;
w_char unicw[MAXWORDLEN];
int nc = strlen(word);
if (utf8) {
@@ -823,27 +835,27 @@ int Hunspell::suggest(char*** slst, const char * word)
if ((ns == 0 || onlycmpdsug) && pAMgr && (pAMgr->get_maxngramsugs() != 0)) {
switch(captype) {
case NOCAP: {
- ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr);
+ ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
break;
}
case HUHCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall2(wspace, unicw, nc);
- ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr);
+ ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
break;
}
case INITCAP: {
capwords = 1;
memcpy(wspace,cw,(wl+1));
mkallsmall2(wspace, unicw, nc);
- ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr);
+ ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
break;
}
case ALLCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall2(wspace, unicw, nc);
int oldns = ns;
- ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr);
+ ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
for (int j = oldns; j < ns; j++)
mkallcap((*slst)[j]);
break;
@@ -936,7 +948,7 @@ int Hunspell::suggest_auto(char*** slst, const char * word)
{
char cw[MAXWORDUTF8LEN];
char wspace[MAXWORDUTF8LEN];
- if (! pSMgr) return 0;
+ if (!pSMgr || maxdic == 0) return 0;
int wl = strlen(word);
if (utf8) {
if (wl >= MAXWORDUTF8LEN) return 0;
@@ -1059,13 +1071,13 @@ int Hunspell::stem(char*** slst, char ** desc, int n)
alt = strstr(alt, " | ");
}
int pln = line_tok(tok, &pl, MSEP_ALT);
- for (int i = 0; i < pln; i++) {
+ for (int k = 0; k < pln; k++) {
// add derivational suffixes
- if (strstr(pl[i], MORPH_DERI_SFX)) {
+ if (strstr(pl[k], MORPH_DERI_SFX)) {
// remove inflectional suffixes
- char * is = strstr(pl[i], MORPH_INFL_SFX);
+ char * is = strstr(pl[k], MORPH_INFL_SFX);
if (is) *is = '\0';
- char * sg = pSMgr->suggest_gen(&(pl[i]), 1, pl[i]);
+ char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
if (sg) {
char ** gen;
int genl = line_tok(sg, &gen, MSEP_REC);
@@ -1078,15 +1090,17 @@ int Hunspell::stem(char*** slst, char ** desc, int n)
}
} else {
sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
- if (strstr(pl[i], MORPH_SURF_PFX)) {
- copy_field(result2 + strlen(result2), pl[i], MORPH_SURF_PFX);
+ if (strstr(pl[k], MORPH_SURF_PFX)) {
+ copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
}
- copy_field(result2 + strlen(result2), pl[i], MORPH_STEM);
+ copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
}
}
freelist(&pl, pln);
}
- return uniqlist(*slst, line_tok(result2, slst, MSEP_REC));
+ int sln = line_tok(result2, slst, MSEP_REC);
+ return uniqlist(*slst, sln);
+
}
int Hunspell::stem(char*** slst, const char * word)
@@ -1103,7 +1117,7 @@ int Hunspell::suggest_pos_stems(char*** slst, const char * word)
{
char cw[MAXWORDUTF8LEN];
char wspace[MAXWORDUTF8LEN];
- if (! pSMgr) return 0;
+ if (! pSMgr || maxdic == 0) return 0;
int wl = strlen(word);
if (utf8) {
if (wl >= MAXWORDUTF8LEN) return 0;
@@ -1223,21 +1237,19 @@ int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
int Hunspell::add(const char * word)
{
- if (pHMgr) return pHMgr->add(word, NULL);
+ if (pHMgr[0]) return (pHMgr[0])->add(word, NULL);
return 0;
}
int Hunspell::add_with_affix(const char * word, const char * example)
{
- if (pHMgr) return pHMgr->add_with_affix(word, example);
+ if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);
return 0;
}
-/* XXX not implemented yet */
-
int Hunspell::remove(const char * word)
{
- if (pHMgr) return pHMgr->remove(word);
+ if (pHMgr[0]) return (pHMgr[0])->remove(word);
return 0;
}
@@ -1251,21 +1263,20 @@ struct cs_info * Hunspell::get_csconv()
return csconv;
}
-char * Hunspell::cat_result(char * result, char * st)
+void Hunspell::cat_result(char * result, char * st)
{
if (st) {
if (*result) strcat(result, "\n");
strcat(result, st);
free(st);
}
- return result;
}
int Hunspell::analyze(char*** slst, const char * word)
{
char cw[MAXWORDUTF8LEN];
char wspace[MAXWORDUTF8LEN];
- if (! pSMgr) return 0;
+ if (! pSMgr || maxdic == 0) return 0;
int wl = strlen(word);
if (utf8) {
if (wl >= MAXWORDUTF8LEN) return 0;
@@ -1474,7 +1485,7 @@ int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
{
if (!pSMgr || !pln) return 0;
char **pl2;
- char pl2n = analyze(&pl2, word);
+ int pl2n = analyze(&pl2, word);
int captype = 0;
int abbv = 0;
char cw[MAXWORDUTF8LEN];
@@ -1522,7 +1533,7 @@ int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
int Hunspell::generate(char*** slst, const char * word, const char * pattern)
{
char **pl;
- char pln = analyze(&pl, pattern);
+ int pln = analyze(&pl, pattern);
int n = generate(slst, word, pl, pln);
freelist(&pl, pln);
return uniqlist(*slst, n);
@@ -1534,7 +1545,7 @@ char * Hunspell::morph_with_correction(const char * word)
{
char cw[MAXWORDUTF8LEN];
char wspace[MAXWORDUTF8LEN];
- if (! pSMgr) return NULL;
+ if (! pSMgr || maxdic == 0) return NULL;
int wl = strlen(word);
if (utf8) {
if (wl >= MAXWORDUTF8LEN) return NULL;
@@ -1684,6 +1695,12 @@ Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
return (Hunhandle*)(new Hunspell(affpath, dpath));
}
+Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
+ const char * key)
+{
+ return (Hunhandle*)(new Hunspell(affpath, dpath, key));
+}
+
void Hunspell_destroy(Hunhandle *pHunspell)
{
delete (Hunspell*)(pHunspell);
@@ -1750,7 +1767,6 @@ int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
}
/* remove word from the run-time dictionary */
- /* NOTE: not implemented yet */
int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
return ((Hunspell*)pHunspell)->remove(word);
diff --git a/src/myspell/hunspell.h b/src/myspell/hunspell.h
index 452599c..a18cec4 100644
--- a/src/myspell/hunspell.h
+++ b/src/myspell/hunspell.h
@@ -7,15 +7,25 @@ extern "C" {
typedef struct Hunhandle Hunhandle;
-Hunhandle *Hunspell_create(const char * affpath, const char * dpath);
-void Hunspell_destroy(Hunhandle *pHunspell);
+#ifdef _MSC_VER
+#define DLL __declspec ( dllexport )
+#else
+#define DLL
+#endif
+
+DLL Hunhandle *Hunspell_create(const char * affpath, const char * dpath);
+
+DLL Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
+ const char * key);
+
+DLL void Hunspell_destroy(Hunhandle *pHunspell);
/* spell(word) - spellcheck word
* output: 0 = bad word, not 0 = good word
*/
-int Hunspell_spell(Hunhandle *pHunspell, const char *);
+DLL int Hunspell_spell(Hunhandle *pHunspell, const char *);
-char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
+DLL char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
/* suggest(suggestions, word) - search suggestions
* input: pointer to an array of strings pointer and the (bad) word
@@ -24,17 +34,17 @@ char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
* a newly allocated array of strings (*slts will be NULL when number
* of suggestion equals 0.)
*/
-int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word);
+DLL int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word);
/* morphological functions */
/* analyze(result, word) - morphological analysis of the word */
-int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word);
+DLL int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word);
/* stem(result, word) - stemmer function */
-int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
+DLL int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
/* stem(result, analysis, n) - get stems from a morph. analysis
* example:
@@ -43,11 +53,11 @@ int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
* int n2 = Hunspell_stem2(result2, result, n1);
*/
-int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n);
+DLL int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n);
/* generate(result, word, word2) - morphological generation by example(s) */
-int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
+DLL int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
const char * word2);
/* generate(result, word, desc, n) - generation by morph. description(s)
@@ -58,27 +68,25 @@ int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
*/
-int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
+DLL int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
char** desc, int n);
/* functions for run-time modification of the dictionary */
/* add word to the run-time dictionary */
-int Hunspell_add(const char * word);
+DLL int Hunspell_add(Hunhandle *pHunspell, const char * word);
/* add word to the run-time dictionary with affix flags of
* the example (a dictionary word): Hunspell will recognize
* affixed forms of the new word, too.
*/
-int Hunspell_add_with_affix(const char * word, const char * example);
+DLL int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example);
/* remove word from the run-time dictionary */
- /* NOTE: not implemented yet */
-
-int Hunspell_remove(const char * word);
+DLL int Hunspell_remove(Hunhandle *pHunspell, const char * word);
#ifdef __cplusplus
}
diff --git a/src/myspell/hunspell.hxx b/src/myspell/hunspell.hxx
index 38c141e..854f354 100644
--- a/src/myspell/hunspell.hxx
+++ b/src/myspell/hunspell.hxx
@@ -1,7 +1,6 @@
#include "hashmgr.hxx"
#include "affixmgr.hxx"
#include "suggestmgr.hxx"
-#include "csutil.hxx"
#include "langnum.hxx"
#define SPELL_COMPOUND (1 << 0)
@@ -10,6 +9,7 @@
#define SPELL_NOCAP (1 << 3)
#define SPELL_INITCAP (1 << 4)
+#define MAXDIC 20
#define MAXSUGGESTION 15
#define MAXSHARPS 5
@@ -26,32 +26,37 @@
#endif
#endif
-#ifdef W32
+#ifdef WIN32
class DLLEXPORT Hunspell
#else
class Hunspell
#endif
{
AffixMgr* pAMgr;
- HashMgr* pHMgr;
+ HashMgr* pHMgr[MAXDIC];
+ int maxdic;
SuggestMgr* pSMgr;
+ char * affixpath;
char * encoding;
struct cs_info * csconv;
int langnum;
int utf8;
int complexprefixes;
char** wordbreak;
+ char * key;
public:
/* Hunspell(aff, dic) - constructor of Hunspell class
* input: path of affix file and dictionary file
*/
-
- Hunspell(const char * affpath, const char * dpath);
+ Hunspell(const char * affpath, const char * dpath, const char * key = NULL);
~Hunspell();
+ /* load extra dictionaries (only dic files) */
+ int add_dic(const char * dpath, const char * key = NULL);
+
/* spell(word) - spellcheck word
* output: 0 = bad word, not 0 = good word
*
@@ -123,7 +128,6 @@ public:
int add_with_affix(const char * word, const char * example);
/* remove word from the run-time dictionary */
- /* NOTE: not implemented yet */
int remove(const char * word);
@@ -164,7 +168,7 @@ private:
hentry * spellsharps(char * base, char *, int, int, char * tmp, int * info, char **root);
int is_keepcase(const hentry * rv);
int insert_sug(char ***slst, char * word, int ns);
- char * cat_result(char * result, char * st);
+ void cat_result(char * result, char * st);
char * stem_description(const char * desc);
};
diff --git a/src/myspell/hunzip.cxx b/src/myspell/hunzip.cxx
new file mode 100644
index 0000000..f9091b8
--- /dev/null
+++ b/src/myspell/hunzip.cxx
@@ -0,0 +1,196 @@
+#ifndef MOZILLA_CLIENT
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+#else
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#endif
+
+#include "hunzip.hxx"
+
+#define CODELEN 65536
+#define BASEBITREC 5000
+
+#define UNCOMPRESSED '\002'
+#define MAGIC "hz0"
+#define MAGIC_ENCRYPT "hz1"
+#define MAGICLEN (sizeof(MAGIC) - 1)
+
+int Hunzip::fail(const char * err, const char * par) {
+ fprintf(stderr, err, par);
+ return -1;
+}
+
+Hunzip::Hunzip(const char * file, const char * key) {
+ bufsiz = 0;
+ lastbit = 0;
+ inc = 0;
+ outc = 0;
+ dec = NULL;
+ filename = (char *) malloc(strlen(file) + 1);
+ if (filename) strcpy(filename, file);
+ if (getcode(key) == -1) bufsiz = -1;
+ else bufsiz = getbuf();
+}
+
+int Hunzip::getcode(const char * key) {
+ unsigned char c[2];
+ int i, j, n, p;
+ int allocatedbit = BASEBITREC;
+ const char * enc = key;
+
+ fin = fopen(filename, "rb");
+ if (!fin) return -1;
+
+ // read magic number
+ if ((fread(in, 1, 3, fin) < MAGICLEN)
+ || !(strncmp(MAGIC, in, MAGICLEN) == 0 ||
+ strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) {
+ return fail(MSG_FORMAT, filename);
+ }
+
+ // check encryption
+ if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) {
+ unsigned char cs;
+ if (!key) return fail(MSG_KEY, filename);
+ if (fread(&c, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename);
+ for (cs = 0; *enc; enc++) cs ^= *enc;
+ if (cs != c[0]) return fail(MSG_KEY, filename);
+ enc = key;
+ } else key = NULL;
+
+ // read record count
+ if (fread(&c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename);
+
+ if (key) {
+ c[0] ^= *enc;
+ if (*(++enc) == '\0') enc = key;
+ c[1] ^= *enc;
+ }
+
+ n = ((int) c[0] << 8) + c[1];
+ dec = (struct bit *) malloc(BASEBITREC * sizeof(struct bit));
+ if (!dec) return fail(MSG_MEMORY, filename);
+ dec[0].v[0] = 0;
+ dec[0].v[1] = 0;
+
+ // read codes
+ for (i = 0; i < n; i++) {
+ unsigned char l;
+ if (fread(c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename);
+ if (key) {
+ if (*(++enc) == '\0') enc = key;
+ c[0] ^= *enc;
+ if (*(++enc) == '\0') enc = key;
+ c[1] ^= *enc;
+ }
+ if (fread(&l, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename);
+ if (key) {
+ if (*(++enc) == '\0') enc = key;
+ l ^= *enc;
+ }
+ if (fread(in, 1, l/8+1, fin) < (size_t) l/8+1) return fail(MSG_FORMAT, filename);
+ if (key) for (j = 0; j <= l/8; j++) {
+ if (*(++enc) == '\0') enc = key;
+ in[j] ^= *enc;
+ }
+ p = 0;
+ for (j = 0; j < l; j++) {
+ int b = (in[j/8] & (1 << (7 - (j % 8)))) ? 1 : 0;
+ int oldp = p;
+ p = dec[p].v[b];
+ if (p == 0) {
+ lastbit++;
+ if (lastbit == allocatedbit) {
+ allocatedbit += BASEBITREC;
+ dec = (struct bit *) realloc(dec, allocatedbit * sizeof(struct bit));
+ }
+ dec[lastbit].v[0] = 0;
+ dec[lastbit].v[1] = 0;
+ dec[oldp].v[b] = lastbit;
+ p = lastbit;
+ }
+ }
+ dec[p].c[0] = c[0];
+ dec[p].c[1] = c[1];
+ }
+ return 0;
+}
+
+Hunzip::~Hunzip()
+{
+ if (dec) free(dec);
+ if (fin) fclose(fin);
+ if (filename) free(filename);
+}
+
+int Hunzip::getbuf() {
+ int p = 0;
+ int o = 0;
+ do {
+ if (inc == 0) inbits = fread(in, 1, BUFSIZE, fin) * 8;
+ for (; inc < inbits; inc++) {
+ int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0;
+ int oldp = p;
+ p = dec[p].v[b];
+ if (p == 0) {
+ if (oldp == lastbit) {
+ fclose(fin);
+ fin = NULL;
+ // add last odd byte
+ if (dec[lastbit].c[0]) out[o++] = dec[lastbit].c[1];
+ return o;
+ }
+ out[o++] = dec[oldp].c[0];
+ out[o++] = dec[oldp].c[1];
+ if (o == BUFSIZE) return o;
+ p = dec[p].v[b];
+ }
+ }
+ inc = 0;
+ } while (inbits == BUFSIZE * 8);
+ return fail(MSG_FORMAT, filename);
+}
+
+const char * Hunzip::getline() {
+ char linebuf[BUFSIZE];
+ int l = 0, eol = 0, left = 0, right = 0;
+ if (bufsiz == -1) return NULL;
+ while (l < bufsiz && !eol) {
+ linebuf[l++] = out[outc];
+ switch (out[outc]) {
+ case '\t': break;
+ case 31: { // escape
+ if (++outc == bufsiz) {
+ bufsiz = getbuf();
+ outc = 0;
+ }
+ linebuf[l - 1] = out[outc];
+ break;
+ }
+ case ' ': break;
+ default: if (((unsigned char) out[outc]) < 47) {
+ if (out[outc] > 32) {
+ right = out[outc] - 31;
+ if (++outc == bufsiz) {
+ bufsiz = getbuf();
+ outc = 0;
+ }
+ }
+ if (out[outc] == 30) left = 9; else left = out[outc];
+ linebuf[l-1] = '\n';
+ eol = 1;
+ }
+ }
+ if (++outc == bufsiz) {
+ outc = 0;
+ bufsiz = fin ? getbuf(): -1;
+ }
+ }
+ if (right) strcpy(linebuf + l - 1, line + strlen(line) - right - 1);
+ else linebuf[l] = '\0';
+ strcpy(line + left, linebuf);
+ return line;
+}
diff --git a/src/myspell/hunzip.hxx b/src/myspell/hunzip.hxx
new file mode 100644
index 0000000..52109d1
--- /dev/null
+++ b/src/myspell/hunzip.hxx
@@ -0,0 +1,41 @@
+/* hunzip: file decompression for sorted dictionaries with optional encryption,
+ * algorithm: prefix-suffix encoding and 16-bit Huffman encoding */
+
+#ifndef _HUNZIP_HXX_
+#define _HUNZIP_HXX_
+
+#define BUFSIZE 65536
+#define HZIP_EXTENSION ".hz"
+
+#define MSG_OPEN "error: %s: cannot open\n"
+#define MSG_FORMAT "error: %s: not in hzip format\n"
+#define MSG_MEMORY "error: %s: missing memory\n"
+#define MSG_KEY "error: %s: missing or bad password\n"
+
+struct bit {
+ unsigned char c[2];
+ int v[2];
+};
+
+class Hunzip
+{
+
+protected:
+ char * filename;
+ FILE * fin;
+ int bufsiz, lastbit, inc, inbits, outc;
+ struct bit * dec; // code table
+ char in[BUFSIZE]; // input buffer
+ char out[BUFSIZE + 1]; // Huffman-decoded buffer
+ char line[BUFSIZE + 50]; // decoded line
+ int getcode(const char * key);
+ int getbuf();
+ int fail(const char * err, const char * par);
+
+public:
+ Hunzip(const char * filename, const char * key = NULL);
+ ~Hunzip();
+ const char * getline();
+};
+
+#endif
diff --git a/src/myspell/myspell_checker.cpp b/src/myspell/myspell_checker.cpp
index 390d2a2..7b86ca9 100644
--- a/src/myspell/myspell_checker.cpp
+++ b/src/myspell/myspell_checker.cpp
@@ -38,8 +38,13 @@
#include "enchant.h"
#include "enchant-provider.h"
-/* built against hunspell 1.1.5 on 2007-03-19 */
+/* built against hunspell 1.2.2 on 2008-04-12 */
+
+#ifdef WITH_SYSTEM_MYSPELL
+#include <hunspell/hunspell.hxx>
+#else
#include "hunspell.hxx"
+#endif
ENCHANT_PLUGIN_DECLARE("Myspell")
diff --git a/src/myspell/phonet.cxx b/src/myspell/phonet.cxx
index e4ad5a0..91dc419 100644
--- a/src/myspell/phonet.cxx
+++ b/src/myspell/phonet.cxx
@@ -46,7 +46,7 @@ void init_phonet_hash(phonetable & parms)
{
int i, k;
- for (i = 0; i < parms.hash_size; i++) {
+ for (i = 0; i < HASHSIZE; i++) {
parms.hash[i] = -1;
}
diff --git a/src/myspell/phonet.hxx b/src/myspell/phonet.hxx
index 471ff84..d1cf995 100644
--- a/src/myspell/phonet.hxx
+++ b/src/myspell/phonet.hxx
@@ -30,6 +30,7 @@
#ifndef __PHONETHXX__
#define __PHONETHXX__
+#define HASHSIZE 256
#define MAXPHONETLEN 256
#define MAXPHONETUTF8LEN (MAXPHONETLEN * 4)
@@ -38,8 +39,7 @@ struct phonetable {
cs_info * lang;
int num;
char * * rules;
- static const int hash_size = 256;
- int hash[hash_size];
+ int hash[HASHSIZE];
};
void init_phonet_hash(phonetable & parms);
diff --git a/src/myspell/suggestmgr.cxx b/src/myspell/suggestmgr.cxx
index b1a58f3..c19ba08 100644
--- a/src/myspell/suggestmgr.cxx
+++ b/src/myspell/suggestmgr.cxx
@@ -15,9 +15,10 @@
#include "suggestmgr.hxx"
#include "htypes.hxx"
+#include "csutil.hxx"
#ifndef MOZILLA_CLIENT
-#ifndef W32
+#ifndef WIN32
using namespace std;
#endif
#endif
@@ -1028,7 +1029,7 @@ int SuggestMgr::movechar_utf(char ** wlst, const w_char * word, int wl, int ns,
}
// generate a set of suggestions for very poorly spelled words
-int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr)
+int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md)
{
int i, j;
@@ -1037,8 +1038,6 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr)
int lp, lpphon;
int nonbmp = 0;
- if (!pHMgr) return ns;
-
// exhaustively search through all root words
// keeping track of the MAX_ROOTS most similar root words
struct hentry * roots[MAX_ROOTS];
@@ -1088,8 +1087,9 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr)
mkallcap(candidate, csconv);
phonet(candidate, target, n, *ph);
}
-
- while ((hp = pHMgr->walk_hashtable(col, hp))) {
+
+ for (i = 0; i < md; i++) {
+ while ((hp = (pHMgr[i])->walk_hashtable(col, hp))) {
if ((hp->astr) && (pAMgr) &&
(TESTAFF(hp->astr, pAMgr->get_forbiddenword(), hp->alen) ||
TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) ||
@@ -1135,7 +1135,7 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr)
lval = scoresphon[j];
}
}
- }
+ }}
// find minimum threshhold for a passable suggestion
// mangle original word three differnt ways
@@ -1557,7 +1557,7 @@ char * SuggestMgr::suggest_hentry_gen(hentry * rv, char * pattern)
*result = '\0';
int sfxcount = get_sfxcount(pattern);
-// if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount) return NULL;
+ if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount) return NULL;
if (HENTRY_DATA(rv)) {
char * aff = pAMgr->morphgen(HENTRY_WORD(rv), rv->blen, rv->astr, rv->alen,
diff --git a/src/myspell/suggestmgr.hxx b/src/myspell/suggestmgr.hxx
index d22884f..0e61572 100644
--- a/src/myspell/suggestmgr.hxx
+++ b/src/myspell/suggestmgr.hxx
@@ -51,7 +51,7 @@ public:
~SuggestMgr();
int suggest(char*** slst, const char * word, int nsug, int * onlycmpdsug);
- int ngsuggest(char ** wlst, char * word, int ns, HashMgr* pHMgr);
+ int ngsuggest(char ** wlst, char * word, int ns, HashMgr** pHMgr, int md);
int suggest_auto(char*** slst, const char * word, int nsug);
int suggest_stems(char*** slst, const char * word, int nsug);
int suggest_pos_stems(char*** slst, const char * word, int nsug);
diff --git a/src/myspell/w_char.hxx b/src/myspell/w_char.hxx
new file mode 100644
index 0000000..a3d11c3
--- /dev/null
+++ b/src/myspell/w_char.hxx
@@ -0,0 +1,19 @@
+#ifndef __WCHARHXX__
+#define __WCHARHXX__
+
+#ifdef WIN32
+typedef struct {
+#else
+typedef struct __attribute__ ((packed)) {
+#endif
+ unsigned char l;
+ unsigned char h;
+} w_char;
+
+// two character arrays
+struct replentry {
+ char * pattern;
+ char * pattern2;
+};
+
+#endif