summaryrefslogtreecommitdiff
path: root/src/myspell/affentry.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'src/myspell/affentry.cxx')
-rw-r--r--src/myspell/affentry.cxx194
1 files changed, 104 insertions, 90 deletions
diff --git a/src/myspell/affentry.cxx b/src/myspell/affentry.cxx
index 2436fbb..fef0cca 100644
--- a/src/myspell/affentry.cxx
+++ b/src/myspell/affentry.cxx
@@ -1,34 +1,20 @@
#include "license.hunspell"
#include "license.myspell"
-#ifndef MOZILLA_CLIENT
-#include <cstdlib>
-#include <cstring>
-#include <cctype>
-#include <cstdio>
-#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
-#endif
#include "affentry.hxx"
#include "csutil.hxx"
-#ifndef MOZILLA_CLIENT
-#ifndef WIN32
-using namespace std;
-#endif
-#endif
-
-
PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
{
// register affix manager
pmyMgr = pmgr;
- // set up its intial values
+ // set up its initial values
aflag = dp->aflag; // flag
strip = dp->strip; // string to strip
@@ -69,7 +55,8 @@ char * PfxEntry::add(const char * word, int len)
{
char tword[MAXWORDUTF8LEN + 4];
- if ((len > stripl) && (len >= numconds) && test_condition(word) &&
+ if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
+ (len >= numconds) && test_condition(word) &&
(!stripl || (strncmp(word, strip, stripl) == 0)) &&
((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
/* we have a match so add prefix */
@@ -92,8 +79,9 @@ inline char * PfxEntry::nextchar(char * p) {
if (p == c.conds + MAXCONDLEN_1) return c.l.conds2;
// end of the MAXCONDLEN length condition
} else if (p == c.conds + MAXCONDLEN) return NULL;
+ return *p ? p : NULL;
}
- return p;
+ return NULL;
}
inline int PfxEntry::test_condition(const char * st)
@@ -106,22 +94,27 @@ inline int PfxEntry::test_condition(const char * st)
while (1) {
switch (*p) {
case '\0': return 1;
- case '[': { p = nextchar(p); pos = st; break; }
- case '^': { p = nextchar(p); neg = true; break; }
- case ']': { if ((neg && ingroup) || (!neg && !ingroup)) return 0;
- pos = NULL;
+ case '[': {
neg = false;
ingroup = false;
p = nextchar(p);
- st++;
- if (*st == '\0' && p && *p != '\0') return 0; // word <= condition
+ pos = st; break;
+ }
+ case '^': { p = nextchar(p); neg = true; break; }
+ case ']': {
+ if ((neg && ingroup) || (!neg && !ingroup)) return 0;
+ pos = NULL;
+ p = nextchar(p);
+ // skip the next character
+ if (!ingroup && *st) for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
+ if (*st == '\0' && p) return 0; // word <= condition
break;
}
case '.': if (!pos) { // dots are not metacharacters in groups: [.]
p = nextchar(p);
// skip the next character
for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
- if (*st == '\0') return 0; // word <= condition
+ if (*st == '\0' && p) return 0; // word <= condition
break;
}
default: {
@@ -138,8 +131,14 @@ inline int PfxEntry::test_condition(const char * st)
p = nextchar(p);
st++;
}
- if (st != pos) ingroup = true;
- } else if (pos) ingroup = true;
+ if (pos && st != pos) {
+ ingroup = true;
+ while (p && *p != ']' && (p = nextchar(p)));
+ }
+ } else if (pos) {
+ ingroup = true;
+ while (p && *p != ']' && (p = nextchar(p)));
+ }
} else if (pos) { // group
p = nextchar(p);
} else return 0;
@@ -163,8 +162,7 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound
tmpl = len - appndl;
-// if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
- if (tmpl > 0) {
+ if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
@@ -201,7 +199,7 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound
//if ((opts & aeXPRODUCT) && in_compound) {
if ((opts & aeXPRODUCT)) {
- he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, NULL,
+ he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, this, NULL,
0, NULL, FLAG_NULL, needflag, in_compound);
if (he) return he;
}
@@ -225,8 +223,8 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len,
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
-// if (tmpl > 0) {
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + stripl >= numconds)) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
@@ -250,7 +248,7 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len,
// cross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
- he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, needflag);
+ he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, this, needflag);
if (he) return he;
}
}
@@ -272,8 +270,8 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len,
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
-// if (tmpl > 0) {
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + stripl >= numconds)) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
@@ -298,7 +296,7 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len,
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
return pmyMgr->suffix_check_twosfx_morph(tmpword, tmpl,
- aeXPRODUCT, (AffEntry *)this, needflag);
+ aeXPRODUCT, this, needflag);
}
}
}
@@ -323,8 +321,8 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
-// if (tmpl > 0) {
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + stripl >= numconds)) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
@@ -351,28 +349,27 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
(contclass && TESTAFF(contclass, needflag, contclasslen)))) {
if (morphcode) {
- strcat(result, " ");
- strcat(result, morphcode);
- } else strcat(result,getKey());
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, morphcode, MAXLNLEN);
+ } else mystrcat(result,getKey(), MAXLNLEN);
if (!HENTRY_FIND(he, MORPH_STEM)) {
- strcat(result, " ");
- strcat(result, MORPH_STEM);
- strcat(result,HENTRY_WORD(he));
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, MORPH_STEM, MAXLNLEN);
+ mystrcat(result, HENTRY_WORD(he), MAXLNLEN);
}
// store the pointer of the hash entry
-// sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, he);
if (HENTRY_DATA(he)) {
- strcat(result, " ");
- strcat(result,HENTRY_DATA(he));
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, HENTRY_DATA2(he), MAXLNLEN);
} else {
// return with debug information
char * flag = pmyMgr->encode_flag(getFlag());
- strcat(result, " ");
- strcat(result, MORPH_FLAG);
- strcat(result, flag);
+ mystrcat(result, " ", MAXLNLEN);
+ mystrcat(result, MORPH_FLAG, MAXLNLEN);
+ mystrcat(result, flag, MAXLNLEN);
free(flag);
}
- strcat(result, "\n");
+ mystrcat(result, "\n", MAXLNLEN);
}
he = he->next_homonym;
} while (he);
@@ -383,10 +380,10 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
// ross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
- st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this,
+ st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, this,
FLAG_NULL, needflag);
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
}
}
@@ -402,7 +399,7 @@ SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
// register affix manager
pmyMgr = pmgr;
- // set up its intial values
+ // set up its initial values
aflag = dp->aflag; // char flag
strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append
@@ -444,7 +441,8 @@ char * SfxEntry::add(const char * word, int len)
char tword[MAXWORDUTF8LEN + 4];
/* make sure all conditions match */
- if ((len > stripl) && (len >= numconds) && test_condition(word + len, word) &&
+ if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
+ (len >= numconds) && test_condition(word + len, word) &&
(!stripl || (strcmp(word + len - stripl, strip) == 0)) &&
((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
/* we have a match so add suffix */
@@ -460,13 +458,16 @@ char * SfxEntry::add(const char * word, int len)
}
inline char * SfxEntry::nextchar(char * p) {
- p++;
- if (opts & aeLONGCOND) {
- // jump to the 2nd part of the condition
- if (p == c.l.conds1 + MAXCONDLEN_1) return c.l.conds2;
- // end of the MAXCONDLEN length condition
- } else if (p == c.conds + MAXCONDLEN) return NULL;
- return p;
+ if (p) {
+ p++;
+ if (opts & aeLONGCOND) {
+ // jump to the 2nd part of the condition
+ if (p == c.l.conds1 + MAXCONDLEN_1) return c.l.conds2;
+ // end of the MAXCONDLEN length condition
+ } else if (p == c.conds + MAXCONDLEN) return NULL;
+ return *p ? p : NULL;
+ }
+ return NULL;
}
inline int SfxEntry::test_condition(const char * st, const char * beg)
@@ -485,22 +486,30 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
case '^': { p = nextchar(p); neg = true; break; }
case ']': { if (!neg && !ingroup) return 0;
i++;
+ // skip the next character
+ if (!ingroup) {
+ for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
+ st--;
+ }
pos = NULL;
neg = false;
ingroup = false;
p = nextchar(p);
- st--;
- if (st < beg && p && *p != '\0') return 0; // word <= condition
+ if (st < beg && p) return 0; // word <= condition
break;
}
case '.': if (!pos) { // dots are not metacharacters in groups: [.]
p = nextchar(p);
// skip the next character
for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
- if (st < beg) return 0; // word <= condition
- if (*st & 0x80) { // head of the UTF-8 character
+ if (st < beg) { // word <= condition
+ if (p) return 0; else return 1;
+ }
+ if ((opts & aeUTF8) && (*st & 0x80)) { // head of the UTF-8 character
st--;
- if (st < beg) return 0; // word <= condition
+ if (st < beg) { // word <= condition
+ if (p) return 0; else return 1;
+ }
}
break;
}
@@ -524,18 +533,23 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
if (neg) return 0;
else if (i == numconds) return 1;
ingroup = true;
+ while (p && *p != ']' && (p = nextchar(p)));
+ st--;
}
- if (p && *p != '\0') p = nextchar(p);
+ if (p && *p != ']') p = nextchar(p);
} else if (pos) {
if (neg) return 0;
else if (i == numconds) return 1;
ingroup = true;
+ while (p && *p != ']' && (p = nextchar(p)));
+// if (p && *p != ']') p = nextchar(p);
+ st--;
}
if (!pos) {
i++;
st--;
- if (st < beg && p && *p != '\0') return 0; // word <= condition
}
+ if (st < beg && p && *p != ']') return 0; // word <= condition
} else if (pos) { // group
p = nextchar(p);
} else return 0;
@@ -547,14 +561,14 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
// see if this suffix is present in the word
struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
- AffEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag,
+ PfxEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag,
const FLAG badflag)
{
int tmpl; // length of tmpword
struct hentry * he; // hash entry pointer
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
- PfxEntry* ep = (PfxEntry *) ppfx;
+ PfxEntry* ep = ppfx;
// if this suffix is being cross checked with a prefix
// but it does not support cross products skip it
@@ -571,8 +585,8 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
// the second condition is not enough for UTF-8 strings
// it checked in test_condition()
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
-// if (tmpl > 0) {
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + stripl >= numconds)) {
// generate new root word by removing suffix and adding
// back any characters that would have been stripped or
@@ -605,9 +619,9 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() &&
TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
(((optflags & aeXPRODUCT) == 0) ||
- TESTAFF(he->astr, ep->getFlag(), he->alen) ||
+ (ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) ||
// enabled by prefix
- ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
+ ((contclass) && (ep && TESTAFF(contclass, ep->getFlag(), contclasslen)))
) &&
// handle cont. class
((!cclass) ||
@@ -649,13 +663,13 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
// see if two-level suffix is present in the word
struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
- AffEntry* ppfx, const FLAG needflag)
+ PfxEntry* ppfx, const FLAG needflag)
{
int tmpl; // length of tmpword
struct hentry * he; // hash entry pointer
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
- PfxEntry* ep = (PfxEntry *) ppfx;
+ PfxEntry* ep = ppfx;
// if this suffix is being cross checked with a prefix
@@ -671,8 +685,8 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
-// if (tmpl > 0) {
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + stripl >= numconds)) {
// generate new root word by removing suffix and adding
// back any characters that would have been stripped or
@@ -711,12 +725,12 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
// see if two-level suffix is present in the word
char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
- AffEntry* ppfx, const FLAG needflag)
+ PfxEntry* ppfx, const FLAG needflag)
{
int tmpl; // length of tmpword
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
- PfxEntry* ep = (PfxEntry *) ppfx;
+ PfxEntry* ep = ppfx;
char * st;
char result[MAXLNLEN];
@@ -736,8 +750,8 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
-// if (tmpl > 0) {
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + stripl >= numconds)) {
// generate new root word by removing suffix and adding
// back any characters that would have been stripped or
@@ -764,18 +778,18 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
if (st) {
- if (((PfxEntry *) ppfx)->getMorph()) {
- strcat(result, ((PfxEntry *) ppfx)->getMorph());
- strcat(result, " ");
+ if (ppfx->getMorph()) {
+ mystrcat(result, ppfx->getMorph(), MAXLNLEN);
+ mystrcat(result, " ", MAXLNLEN);
}
- strcat(result,st);
+ mystrcat(result,st, MAXLNLEN);
free(st);
mychomp(result);
}
} else {
st = pmyMgr->suffix_check_morph(tmpword, tmpl, optflags, ppfx, aflag, needflag);
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
mychomp(result);
}
@@ -783,7 +797,7 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
} else {
st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
if (st) {
- strcat(result, st);
+ mystrcat(result, st, MAXLNLEN);
free(st);
mychomp(result);
}
@@ -795,10 +809,10 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
}
// get next homonym with same affix
-struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, AffEntry* ppfx,
+struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, PfxEntry* ppfx,
const FLAG cclass, const FLAG needflag)
{
- PfxEntry* ep = (PfxEntry *) ppfx;
+ PfxEntry* ep = ppfx;
FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL;
while (he->next_homonym) {