diff options
Diffstat (limited to 'src/myspell/affentry.cxx')
-rw-r--r-- | src/myspell/affentry.cxx | 194 |
1 files changed, 104 insertions, 90 deletions
diff --git a/src/myspell/affentry.cxx b/src/myspell/affentry.cxx index 2436fbb..fef0cca 100644 --- a/src/myspell/affentry.cxx +++ b/src/myspell/affentry.cxx @@ -1,34 +1,20 @@ #include "license.hunspell" #include "license.myspell" -#ifndef MOZILLA_CLIENT -#include <cstdlib> -#include <cstring> -#include <cctype> -#include <cstdio> -#else #include <stdlib.h> #include <string.h> #include <stdio.h> #include <ctype.h> -#endif #include "affentry.hxx" #include "csutil.hxx" -#ifndef MOZILLA_CLIENT -#ifndef WIN32 -using namespace std; -#endif -#endif - - PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp) { // register affix manager pmyMgr = pmgr; - // set up its intial values + // set up its initial values aflag = dp->aflag; // flag strip = dp->strip; // string to strip @@ -69,7 +55,8 @@ char * PfxEntry::add(const char * word, int len) { char tword[MAXWORDUTF8LEN + 4]; - if ((len > stripl) && (len >= numconds) && test_condition(word) && + if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) && + (len >= numconds) && test_condition(word) && (!stripl || (strncmp(word, strip, stripl) == 0)) && ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) { /* we have a match so add prefix */ @@ -92,8 +79,9 @@ inline char * PfxEntry::nextchar(char * p) { if (p == c.conds + MAXCONDLEN_1) return c.l.conds2; // end of the MAXCONDLEN length condition } else if (p == c.conds + MAXCONDLEN) return NULL; + return *p ? p : NULL; } - return p; + return NULL; } inline int PfxEntry::test_condition(const char * st) @@ -106,22 +94,27 @@ inline int PfxEntry::test_condition(const char * st) while (1) { switch (*p) { case '\0': return 1; - case '[': { p = nextchar(p); pos = st; break; } - case '^': { p = nextchar(p); neg = true; break; } - case ']': { if ((neg && ingroup) || (!neg && !ingroup)) return 0; - pos = NULL; + case '[': { neg = false; ingroup = false; p = nextchar(p); - st++; - if (*st == '\0' && p && *p != '\0') return 0; // word <= condition + pos = st; break; + } + case '^': { p = nextchar(p); neg = true; break; } + case ']': { + if ((neg && ingroup) || (!neg && !ingroup)) return 0; + pos = NULL; + p = nextchar(p); + // skip the next character + if (!ingroup && *st) for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++); + if (*st == '\0' && p) return 0; // word <= condition break; } case '.': if (!pos) { // dots are not metacharacters in groups: [.] p = nextchar(p); // skip the next character for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++); - if (*st == '\0') return 0; // word <= condition + if (*st == '\0' && p) return 0; // word <= condition break; } default: { @@ -138,8 +131,14 @@ inline int PfxEntry::test_condition(const char * st) p = nextchar(p); st++; } - if (st != pos) ingroup = true; - } else if (pos) ingroup = true; + if (pos && st != pos) { + ingroup = true; + while (p && *p != ']' && (p = nextchar(p))); + } + } else if (pos) { + ingroup = true; + while (p && *p != ']' && (p = nextchar(p))); + } } else if (pos) { // group p = nextchar(p); } else return 0; @@ -163,8 +162,7 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound tmpl = len - appndl; -// if ((tmpl > 0) && (tmpl + stripl >= numconds)) { - if (tmpl > 0) { + if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) { // generate new root word by removing prefix and adding // back any characters that would have been stripped @@ -201,7 +199,7 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound //if ((opts & aeXPRODUCT) && in_compound) { if ((opts & aeXPRODUCT)) { - he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, NULL, + he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, this, NULL, 0, NULL, FLAG_NULL, needflag, in_compound); if (he) return he; } @@ -225,8 +223,8 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len, tmpl = len - appndl; - if ((tmpl > 0) && (tmpl + stripl >= numconds)) { -// if (tmpl > 0) { + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + stripl >= numconds)) { // generate new root word by removing prefix and adding // back any characters that would have been stripped @@ -250,7 +248,7 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len, // cross checked combined with a suffix if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { - he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, needflag); + he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, this, needflag); if (he) return he; } } @@ -272,8 +270,8 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len, tmpl = len - appndl; - if ((tmpl > 0) && (tmpl + stripl >= numconds)) { -// if (tmpl > 0) { + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + stripl >= numconds)) { // generate new root word by removing prefix and adding // back any characters that would have been stripped @@ -298,7 +296,7 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len, if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { return pmyMgr->suffix_check_twosfx_morph(tmpword, tmpl, - aeXPRODUCT, (AffEntry *)this, needflag); + aeXPRODUCT, this, needflag); } } } @@ -323,8 +321,8 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const tmpl = len - appndl; - if ((tmpl > 0) && (tmpl + stripl >= numconds)) { -// if (tmpl > 0) { + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + stripl >= numconds)) { // generate new root word by removing prefix and adding // back any characters that would have been stripped @@ -351,28 +349,27 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const ((!needflag) || TESTAFF(he->astr, needflag, he->alen) || (contclass && TESTAFF(contclass, needflag, contclasslen)))) { if (morphcode) { - strcat(result, " "); - strcat(result, morphcode); - } else strcat(result,getKey()); + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, morphcode, MAXLNLEN); + } else mystrcat(result,getKey(), MAXLNLEN); if (!HENTRY_FIND(he, MORPH_STEM)) { - strcat(result, " "); - strcat(result, MORPH_STEM); - strcat(result,HENTRY_WORD(he)); + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, MORPH_STEM, MAXLNLEN); + mystrcat(result, HENTRY_WORD(he), MAXLNLEN); } // store the pointer of the hash entry -// sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, he); if (HENTRY_DATA(he)) { - strcat(result, " "); - strcat(result,HENTRY_DATA(he)); + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, HENTRY_DATA2(he), MAXLNLEN); } else { // return with debug information char * flag = pmyMgr->encode_flag(getFlag()); - strcat(result, " "); - strcat(result, MORPH_FLAG); - strcat(result, flag); + mystrcat(result, " ", MAXLNLEN); + mystrcat(result, MORPH_FLAG, MAXLNLEN); + mystrcat(result, flag, MAXLNLEN); free(flag); } - strcat(result, "\n"); + mystrcat(result, "\n", MAXLNLEN); } he = he->next_homonym; } while (he); @@ -383,10 +380,10 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const // ross checked combined with a suffix if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { - st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, + st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, this, FLAG_NULL, needflag); if (st) { - strcat(result, st); + mystrcat(result, st, MAXLNLEN); free(st); } } @@ -402,7 +399,7 @@ SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp) // register affix manager pmyMgr = pmgr; - // set up its intial values + // set up its initial values aflag = dp->aflag; // char flag strip = dp->strip; // string to strip appnd = dp->appnd; // string to append @@ -444,7 +441,8 @@ char * SfxEntry::add(const char * word, int len) char tword[MAXWORDUTF8LEN + 4]; /* make sure all conditions match */ - if ((len > stripl) && (len >= numconds) && test_condition(word + len, word) && + if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) && + (len >= numconds) && test_condition(word + len, word) && (!stripl || (strcmp(word + len - stripl, strip) == 0)) && ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) { /* we have a match so add suffix */ @@ -460,13 +458,16 @@ char * SfxEntry::add(const char * word, int len) } inline char * SfxEntry::nextchar(char * p) { - p++; - if (opts & aeLONGCOND) { - // jump to the 2nd part of the condition - if (p == c.l.conds1 + MAXCONDLEN_1) return c.l.conds2; - // end of the MAXCONDLEN length condition - } else if (p == c.conds + MAXCONDLEN) return NULL; - return p; + if (p) { + p++; + if (opts & aeLONGCOND) { + // jump to the 2nd part of the condition + if (p == c.l.conds1 + MAXCONDLEN_1) return c.l.conds2; + // end of the MAXCONDLEN length condition + } else if (p == c.conds + MAXCONDLEN) return NULL; + return *p ? p : NULL; + } + return NULL; } inline int SfxEntry::test_condition(const char * st, const char * beg) @@ -485,22 +486,30 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) case '^': { p = nextchar(p); neg = true; break; } case ']': { if (!neg && !ingroup) return 0; i++; + // skip the next character + if (!ingroup) { + for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--); + st--; + } pos = NULL; neg = false; ingroup = false; p = nextchar(p); - st--; - if (st < beg && p && *p != '\0') return 0; // word <= condition + if (st < beg && p) return 0; // word <= condition break; } case '.': if (!pos) { // dots are not metacharacters in groups: [.] p = nextchar(p); // skip the next character for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--); - if (st < beg) return 0; // word <= condition - if (*st & 0x80) { // head of the UTF-8 character + if (st < beg) { // word <= condition + if (p) return 0; else return 1; + } + if ((opts & aeUTF8) && (*st & 0x80)) { // head of the UTF-8 character st--; - if (st < beg) return 0; // word <= condition + if (st < beg) { // word <= condition + if (p) return 0; else return 1; + } } break; } @@ -524,18 +533,23 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) if (neg) return 0; else if (i == numconds) return 1; ingroup = true; + while (p && *p != ']' && (p = nextchar(p))); + st--; } - if (p && *p != '\0') p = nextchar(p); + if (p && *p != ']') p = nextchar(p); } else if (pos) { if (neg) return 0; else if (i == numconds) return 1; ingroup = true; + while (p && *p != ']' && (p = nextchar(p))); +// if (p && *p != ']') p = nextchar(p); + st--; } if (!pos) { i++; st--; - if (st < beg && p && *p != '\0') return 0; // word <= condition } + if (st < beg && p && *p != ']') return 0; // word <= condition } else if (pos) { // group p = nextchar(p); } else return 0; @@ -547,14 +561,14 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) // see if this suffix is present in the word struct hentry * SfxEntry::checkword(const char * word, int len, int optflags, - AffEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag, + PfxEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag, const FLAG badflag) { int tmpl; // length of tmpword struct hentry * he; // hash entry pointer unsigned char * cp; char tmpword[MAXWORDUTF8LEN + 4]; - PfxEntry* ep = (PfxEntry *) ppfx; + PfxEntry* ep = ppfx; // if this suffix is being cross checked with a prefix // but it does not support cross products skip it @@ -571,8 +585,8 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags, // the second condition is not enough for UTF-8 strings // it checked in test_condition() - if ((tmpl > 0) && (tmpl + stripl >= numconds)) { -// if (tmpl > 0) { + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + stripl >= numconds)) { // generate new root word by removing suffix and adding // back any characters that would have been stripped or @@ -605,9 +619,9 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags, if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) && (((optflags & aeXPRODUCT) == 0) || - TESTAFF(he->astr, ep->getFlag(), he->alen) || + (ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) || // enabled by prefix - ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) + ((contclass) && (ep && TESTAFF(contclass, ep->getFlag(), contclasslen))) ) && // handle cont. class ((!cclass) || @@ -649,13 +663,13 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags, // see if two-level suffix is present in the word struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags, - AffEntry* ppfx, const FLAG needflag) + PfxEntry* ppfx, const FLAG needflag) { int tmpl; // length of tmpword struct hentry * he; // hash entry pointer unsigned char * cp; char tmpword[MAXWORDUTF8LEN + 4]; - PfxEntry* ep = (PfxEntry *) ppfx; + PfxEntry* ep = ppfx; // if this suffix is being cross checked with a prefix @@ -671,8 +685,8 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags, tmpl = len - appndl; - if ((tmpl > 0) && (tmpl + stripl >= numconds)) { -// if (tmpl > 0) { + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + stripl >= numconds)) { // generate new root word by removing suffix and adding // back any characters that would have been stripped or @@ -711,12 +725,12 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags, // see if two-level suffix is present in the word char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags, - AffEntry* ppfx, const FLAG needflag) + PfxEntry* ppfx, const FLAG needflag) { int tmpl; // length of tmpword unsigned char * cp; char tmpword[MAXWORDUTF8LEN + 4]; - PfxEntry* ep = (PfxEntry *) ppfx; + PfxEntry* ep = ppfx; char * st; char result[MAXLNLEN]; @@ -736,8 +750,8 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags, tmpl = len - appndl; - if ((tmpl > 0) && (tmpl + stripl >= numconds)) { -// if (tmpl > 0) { + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + stripl >= numconds)) { // generate new root word by removing suffix and adding // back any characters that would have been stripped or @@ -764,18 +778,18 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags, if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) { st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag); if (st) { - if (((PfxEntry *) ppfx)->getMorph()) { - strcat(result, ((PfxEntry *) ppfx)->getMorph()); - strcat(result, " "); + if (ppfx->getMorph()) { + mystrcat(result, ppfx->getMorph(), MAXLNLEN); + mystrcat(result, " ", MAXLNLEN); } - strcat(result,st); + mystrcat(result,st, MAXLNLEN); free(st); mychomp(result); } } else { st = pmyMgr->suffix_check_morph(tmpword, tmpl, optflags, ppfx, aflag, needflag); if (st) { - strcat(result, st); + mystrcat(result, st, MAXLNLEN); free(st); mychomp(result); } @@ -783,7 +797,7 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags, } else { st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag); if (st) { - strcat(result, st); + mystrcat(result, st, MAXLNLEN); free(st); mychomp(result); } @@ -795,10 +809,10 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags, } // get next homonym with same affix -struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, AffEntry* ppfx, +struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, PfxEntry* ppfx, const FLAG cclass, const FLAG needflag) { - PfxEntry* ep = (PfxEntry *) ppfx; + PfxEntry* ep = ppfx; FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL; while (he->next_homonym) { |