summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2005-06-14 22:01:04 +0000
committerBram Moolenaar <Bram@vim.org>2005-06-14 22:01:04 +0000
commit9f30f50471678a0a986c30b50dce705bdcc991dc (patch)
treef4db0c82382dd5997a02e406c91acef45907f9d5
parent9ba0eb850c0f4c94df3b7f7461610bf0b073f712 (diff)
downloadvim-git-9f30f50471678a0a986c30b50dce705bdcc991dc.tar.gz
updated for version 7.0085
-rw-r--r--src/spell.c728
1 files changed, 535 insertions, 193 deletions
diff --git a/src/spell.c b/src/spell.c
index 2b37f632f..54ca1036b 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -13,7 +13,23 @@
* The spell checking mechanism uses a tree (aka trie). Each node in the tree
* has a list of bytes that can appear (siblings). For each byte there is a
* pointer to the node with the byte that follows in the word (child).
- * A NUL byte is used where the word may end.
+ *
+ * A NUL byte is used where the word may end. The bytes are sorted, so that
+ * binary searching can be used and the NUL bytes are at the start. The
+ * number of possible bytes is stored before the list of bytes.
+ *
+ * The tree uses two arrays: "byts" stores the characters, "idxs" stores
+ * either the next index or flags. The tree starts at index 0. For example,
+ * to lookup "vi" this sequence is followed:
+ * i = 0
+ * len = byts[i]
+ * n = where "v" appears in byts[i + 1] to byts[i + len]
+ * i = idxs[n]
+ * len = byts[i]
+ * n = where "i" appears in byts[i + 1] to byts[i + len]
+ * i = idxs[n]
+ * len = byts[i]
+ * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
*
* There are two trees: one with case-folded words and one with words in
* original case. The second one is only used for keep-case words and is
@@ -30,8 +46,17 @@
/*
* Use this to let the score depend in how much a suggestion sounds like the
- * bad word. It's quite slow and doesn't make the sorting much better....
- * #define SOUNDFOLD_SCORE
+ * bad word. It's quite slow and only occasionally makes the sorting better.
+#define SOUNDFOLD_SCORE
+ */
+
+/*
+ * Use this to adjust the score after finding suggestions, based on the
+ * suggested word sounding like the bad word. This is much faster than doing
+ * it for every possible suggestion.
+ * Disadvantage: When "the" is typed as "hte" it sounds different and goes
+ * down in the list.
+#define RESCORE(word_score, sound_score) ((2 * word_score + sound_score) / 3)
*/
/*
@@ -47,8 +72,8 @@
*
* <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
* <charflags> N bytes List of flags (first one is for character 128):
- * 0x01 word character
- * 0x02 upper-case character
+ * 0x01 word character CF_WORD
+ * 0x02 upper-case character CF_UPPER
* <fcharslen> 2 bytes Number of bytes in <fchars>.
* <fchars> N bytes Folded characters, first one is for character 128.
*
@@ -145,7 +170,16 @@
Some places assume a word length fits in a
byte, thus it can't be above 255. */
-/* Flags used for a word. */
+/* Type used for indexes in the word tree need to be at least 3 bytes. If int
+ * is 8 bytes we could use something smaller, but what? */
+#if SIZEOF_INT > 2
+typedef int idx_T;
+#else
+typedef long idx_T;
+#endif
+
+/* Flags used for a word. Only the lowest byte can be used, the region byte
+ * comes above it. */
#define WF_REGION 0x01 /* region byte follows */
#define WF_ONECAP 0x02 /* word with one capital (or all capitals) */
#define WF_ALLCAP 0x04 /* word must be all capitals */
@@ -155,6 +189,9 @@
#define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP)
+#define WF_USED 0x10000 /* Word was found in text. Must be in separate
+ byte before region and flags. */
+
#define BY_NOFLAGS 0 /* end of word without flags or region */
#define BY_FLAGS 1 /* end of word, flag byte follows */
#define BY_INDEX 2 /* child is shared, index follows */
@@ -192,9 +229,9 @@ struct slang_S
char_u *sl_fname; /* name of .spl file */
int sl_add; /* TRUE if it's a .add file. */
char_u *sl_fbyts; /* case-folded word bytes */
- int *sl_fidxs; /* case-folded word indexes */
+ idx_T *sl_fidxs; /* case-folded word indexes */
char_u *sl_kbyts; /* keep-case word bytes */
- int *sl_kidxs; /* keep-case word indexes */
+ idx_T *sl_kidxs; /* keep-case word indexes */
char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
garray_T sl_rep; /* list of fromto_T entries from REP lines */
@@ -267,6 +304,9 @@ typedef struct suggest_S
char_u *st_word; /* suggested word, allocated string */
int st_orglen; /* length of replaced text */
int st_score; /* lower is better */
+#ifdef RESCORE
+ int st_had_bonus; /* bonus already included in score */
+#endif
} suggest_T;
#define SUG(sup, i) (((suggest_T *)(sup)->su_ga.ga_data)[i])
@@ -274,8 +314,14 @@ typedef struct suggest_S
/* Number of suggestions displayed. */
#define SUG_PROMPT_COUNT ((int)Rows - 2)
-/* Threshold for sorting and cleaning up suggestions. */
-#define SUG_CLEANUP_COUNT (SUG_PROMPT_COUNT + 50)
+/* Number of suggestions kept when cleaning up. When rescore_suggestions() is
+ * called the score may change, thus we need to keep more than what is
+ * displayed. */
+#define SUG_CLEAN_COUNT (SUG_PROMPT_COUNT < 25 ? 25 : SUG_PROMPT_COUNT)
+
+/* Threshold for sorting and cleaning up suggestions. Don't want to keep lots
+ * of suggestions that are not going to be displayed. */
+#define SUG_MAX_COUNT (SUG_PROMPT_COUNT + 50)
/* score for various changes */
#define SCORE_SPLIT 99 /* split bad word */
@@ -283,6 +329,7 @@ typedef struct suggest_S
#define SCORE_ALLCAP 120 /* need all-cap case */
#define SCORE_REGION 70 /* word is for different region */
#define SCORE_RARE 180 /* rare word */
+#define SCORE_NOTUSED 11 /* word not found in text yet */
/* score for edit distance */
#define SCORE_SWAP 90 /* swap two characters */
@@ -290,8 +337,8 @@ typedef struct suggest_S
#define SCORE_REP 87 /* REP replacement */
#define SCORE_SUBST 93 /* substitute a character */
#define SCORE_SIMILAR 33 /* substitute a similar character */
-#define SCORE_DEL 96 /* delete a character */
-#define SCORE_INS 94 /* insert a character */
+#define SCORE_DEL 94 /* delete a character */
+#define SCORE_INS 96 /* insert a character */
#define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower.
* 350 allows for about three changes. */
@@ -329,13 +376,14 @@ typedef struct spelltab_S
char_u st_isw[256]; /* flags: is word char */
char_u st_isu[256]; /* flags: is uppercase char */
char_u st_fold[256]; /* chars: folded case */
+ char_u st_upper[256]; /* chars: upper case */
} spelltab_T;
static spelltab_T spelltab;
static int did_set_spelltab;
-#define SPELL_ISWORD 1
-#define SPELL_ISUPPER 2
+#define CF_WORD 0x01
+#define CF_UPPER 0x02
static void clear_spell_chartab __ARGS((spelltab_T *sp));
static int set_spell_finish __ARGS((spelltab_T *new_st));
@@ -364,7 +412,7 @@ typedef struct trystate_S
int ts_fidx; /* index in fword[], case-folded bad word */
int ts_fidxtry; /* ts_fidx at which bytes may be changed */
int ts_twordlen; /* valid length of tword[] */
- int ts_arridx; /* index in tree array, start of node */
+ idx_T ts_arridx; /* index in tree array, start of node */
char_u ts_save_prewordlen; /* saved "prewordlen" */
int ts_save_splitoff; /* su_splitoff saved here */
int ts_save_badflags; /* badflags saved here */
@@ -379,30 +427,80 @@ static void spell_load_lang __ARGS((char_u *lang));
static char_u *spell_enc __ARGS((void));
static void spell_load_cb __ARGS((char_u *fname, void *cookie));
static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent));
-static int read_tree __ARGS((FILE *fd, char_u *byts, int *idxs, int maxidx, int startidx));
+static idx_T read_tree __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx));
static int find_region __ARGS((char_u *rp, char_u *region));
static int captype __ARGS((char_u *word, char_u *end));
static void spell_reload_one __ARGS((char_u *fname, int added_word));
static int set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
static void write_spell_chartab __ARGS((FILE *fd));
-static int spell_isupper __ARGS((int c));
static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
-static void onecap_copy __ARGS((char_u *word, int len, char_u *wcopy, int upper));
+static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper));
static void spell_try_change __ARGS((suginfo_T *su));
static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add));
static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
static void spell_try_soundalike __ARGS((suginfo_T *su));
static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
+#if 0
static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
+#endif
+#ifdef RESCORE
+static void add_suggestion __ARGS((suginfo_T *su, char_u *goodword, int use_score, int had_bonus));
+#else
static void add_suggestion __ARGS((suginfo_T *su, char_u *goodword, int use_score));
+#endif
static void add_banned __ARGS((suginfo_T *su, char_u *word));
static int was_banned __ARGS((suginfo_T *su, char_u *word));
static void free_banned __ARGS((suginfo_T *su));
-static void cleanup_suggestions __ARGS((suginfo_T *su));
+#ifdef RESCORE
+static void rescore_suggestions __ARGS((suginfo_T *su));
+#endif
+static void cleanup_suggestions __ARGS((suginfo_T *su, int keep));
static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, char_u *res));
+#if defined(RESCORE) || defined(SOUNDFOLD_SCORE)
+static int spell_sound_score __ARGS((slang_T *slang, char_u *goodword, char_u *badsound));
+#endif
static int spell_edit_score __ARGS((char_u *badword, char_u *goodword));
+/*
+ * Use our own character-case definitions, because the current locale may
+ * differ from what the .spl file uses.
+ * These must not be called with negative number!
+ */
+#ifndef FEAT_MBYTE
+/* Non-multi-byte implementation. */
+# define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c))
+# define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c))
+# define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE)
+#else
+/* Multi-byte implementation. For Unicode we can call utf_*(), but don't do
+ * that for ASCII, because we don't want to use 'casemap' here. Otherwise use
+ * the "w" library function for characters above 255 if available. */
+# ifdef HAVE_TOWLOWER
+# define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
+ : (c) < 256 ? spelltab.st_fold[c] : towlower(c))
+# else
+# define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
+ : (c) < 256 ? spelltab.st_fold[c] : (c))
+# endif
+
+# ifdef HAVE_TOWUPPER
+# define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
+ : (c) < 256 ? spelltab.st_upper[c] : towupper(c))
+# else
+# define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
+ : (c) < 256 ? spelltab.st_upper[c] : (c))
+# endif
+
+# ifdef HAVE_ISWUPPER
+# define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
+ : (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
+# else
+# define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
+ : (c) < 256 ? spelltab.st_isu[c] : (c))
+# endif
+#endif
+
static char *e_format = N_("E759: Format error in spell file");
@@ -489,6 +587,7 @@ spell_check(wp, ptr, attrp)
/* Check for a matching word in case-folded words. */
find_word(&mi, FALSE);
+ /* Check for a matching word in keep-case words. */
find_word(&mi, TRUE);
}
@@ -528,16 +627,16 @@ find_word(mip, keepcap)
matchinf_T *mip;
int keepcap;
{
- int arridx = 0;
+ idx_T arridx = 0;
int endlen[MAXWLEN]; /* length at possible word endings */
- int endidx[MAXWLEN]; /* possible word endings */
+ idx_T endidx[MAXWLEN]; /* possible word endings */
int endidxcnt = 0;
int len;
int wlen = 0;
int flen;
int c;
char_u *ptr;
- unsigned lo, hi, m;
+ idx_T lo, hi, m;
#ifdef FEAT_MBYTE
char_u *s;
#endif
@@ -547,7 +646,7 @@ find_word(mip, keepcap)
slang_T *slang = mip->mi_lp->lp_slang;
unsigned flags;
char_u *byts;
- int *idxs;
+ idx_T *idxs;
if (keepcap)
{
@@ -692,6 +791,11 @@ find_word(mip, keepcap)
for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; --len)
{
flags = idxs[arridx];
+
+ /* Set a flag for words that were used. The region and case
+ * doesn't matter here, it's only used to rate the suggestions. */
+ idxs[arridx] = flags | WF_USED;
+
if (keepcap)
{
/* For "keepcap" tree the case is always right. */
@@ -823,7 +927,6 @@ spell_move_to(dir, allwords, curline)
if (attr != 0)
{
/* We found a bad word. Check the attribute. */
- /* TODO: check for syntax @Spell cluster. */
if (allwords || attr == highlight_attr[HLF_SPB])
{
/* When searching forward only accept a bad word after
@@ -1073,6 +1176,7 @@ spell_load_file(fname, lang, old_lp, silent)
fromto_T *ftp;
int rr;
short *first;
+ idx_T idx;
fd = mch_fopen((char *)fname, "r");
if (fd == NULL)
@@ -1170,7 +1274,7 @@ formerr:
fol[i] = getc(fd); /* <fchars> */
fol[i] = NUL;
- /* Set the word-char flags and fill spell_isupper() table. */
+ /* Set the word-char flags and fill SPELL_ISUPPER() table. */
i = set_spell_charflags(p, cnt, fol);
vim_free(p);
vim_free(fol);
@@ -1293,19 +1397,19 @@ formerr:
if (p == NULL)
goto endFAIL;
if (round == 1)
- lp->sl_fidxs = (int *)p;
+ lp->sl_fidxs = (idx_T *)p;
else
- lp->sl_kidxs = (int *)p;
+ lp->sl_kidxs = (idx_T *)p;
/* Read the tree and store it in the array. */
- i = read_tree(fd,
+ idx = read_tree(fd,
round == 1 ? lp->sl_fbyts : lp->sl_kbyts,
round == 1 ? lp->sl_fidxs : lp->sl_kidxs,
len, 0);
- if (i == -1)
+ if (idx == -1)
goto truncerr;
- if (i < 0)
+ if (idx < 0)
goto formerr;
}
}
@@ -1348,18 +1452,18 @@ endOK:
* Returns -1 if the file is shorter than expected.
* Returns -2 if there is a format error.
*/
- static int
+ static idx_T
read_tree(fd, byts, idxs, maxidx, startidx)
FILE *fd;
char_u *byts;
- int *idxs;
+ idx_T *idxs;
int maxidx; /* size of arrays */
- int startidx; /* current index in "byts" and "idxs" */
+ idx_T startidx; /* current index in "byts" and "idxs" */
{
int len;
int i;
int n;
- int idx = startidx;
+ idx_T idx = startidx;
int c;
#define SHARED_MASK 0x8000000
@@ -1619,7 +1723,7 @@ captype(word, end)
else
#endif
c = *p++;
- firstcap = allcap = spell_isupper(c);
+ firstcap = allcap = SPELL_ISUPPER(c);
/*
* Need to check all letters to find a word with mixed upper/lower.
@@ -1633,7 +1737,7 @@ captype(word, end)
#else
c = *p;
#endif
- if (!spell_isupper(c))
+ if (!SPELL_ISUPPER(c))
{
/* UUl -> KEEPCAP */
if (past_second && allcap)
@@ -1876,6 +1980,7 @@ spell_read_aff(fname, spin)
int do_sal;
int do_map;
int found_map = FALSE;
+ hashitem_T *hi;
/*
* Open the file.
@@ -2031,7 +2136,8 @@ spell_read_aff(fname, spin)
else
tp = &aff->af_suff;
aff_todo = atoi((char *)items[3]);
- if (!HASHITEM_EMPTY(hash_find(tp, cur_aff->ah_key)))
+ hi = hash_find(tp, cur_aff->ah_key);
+ if (!HASHITEM_EMPTY(hi))
{
smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
fname, lnum, items[1]);
@@ -2171,7 +2277,7 @@ spell_read_aff(fname, spin)
/*
* Don't write a word table for an ASCII file, so that we don't check
* for conflicts with a word table that matches 'encoding'.
- * Don't write one for utf-8 either, we use utf_isupper() and
+ * Don't write one for utf-8 either, we use utf_*() and
* mb_get_class(), the list of chars in the file will be incomplete.
*/
if (!spin->si_ascii
@@ -2336,7 +2442,7 @@ spell_read_dic(fname, spin, affile)
/* Read and ignore the first line: word count. */
(void)vim_fgets(line, MAXLINELEN, fd);
- if (!isdigit(*skipwhite(line)))
+ if (!vim_isdigit(*skipwhite(line)))
EMSG2(_("E760: No word count in %s"), fname);
/*
@@ -2528,12 +2634,14 @@ store_aff_word(word, spin, afflist, ht, xht, comb, flags)
/* Skip chop string. */
#ifdef FEAT_MBYTE
if (has_mbyte)
+ {
i = mb_charlen(ae->ae_chop);
+ for ( ; i > 0; --i)
+ mb_ptr_adv(p);
+ }
else
#endif
- i = STRLEN(ae->ae_chop);
- for ( ; i > 0; --i)
- mb_ptr_adv(p);
+ p += STRLEN(ae->ae_chop);
}
STRCAT(newword, p);
}
@@ -3754,13 +3862,16 @@ init_spellfile()
clear_spell_chartab(sp)
spelltab_T *sp;
{
- int i;
+ int i;
/* Init everything to FALSE. */
vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
for (i = 0; i < 256; ++i)
+ {
sp->st_fold[i] = i;
+ sp->st_upper[i] = i;
+ }
/* We include digits. A word shouldn't start with a digit, but handling
* that is done separately. */
@@ -3773,7 +3884,10 @@ clear_spell_chartab(sp)
sp->st_fold[i] = i + 0x20;
}
for (i = 'a'; i <= 'z'; ++i)
+ {
sp->st_isw[i] = TRUE;
+ sp->st_upper[i] = i - 0x20;
+ }
}
/*
@@ -3799,18 +3913,33 @@ init_spell_chartab()
if (MB_BYTE2LEN(i) == 2)
spelltab.st_isw[i] = TRUE;
}
+ else if (enc_utf8)
+ {
+ for (i = 128; i < 256; ++i)
+ {
+ spelltab.st_isu[i] = utf_isupper(i);
+ spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
+ spelltab.st_fold[i] = utf_fold(i);
+ spelltab.st_upper[i] = utf_toupper(i);
+ }
+ }
else
#endif
{
- /* Rough guess: use isalpha() and isupper() for characters above 128. */
+ /* Rough guess: use locale-dependent library functions. */
for (i = 128; i < 256; ++i)
{
- spelltab.st_isw[i] = MB_ISUPPER(i) || MB_ISLOWER(i);
if (MB_ISUPPER(i))
{
+ spelltab.st_isw[i] = TRUE;
spelltab.st_isu[i] = TRUE;
spelltab.st_fold[i] = MB_TOLOWER(i);
}
+ else if (MB_ISLOWER(i))
+ {
+ spelltab.st_isw[i] = TRUE;
+ spelltab.st_upper[i] = MB_TOUPPER(i);
+ }
}
}
}
@@ -3872,7 +4001,8 @@ set_spell_chartab(fol, low, upp)
}
/* if "UPP" and "FOL" are not the same the "UPP" char needs
- * case-folding and it's upper case. */
+ * case-folding, it's upper case and the "UPP" is the upper case of
+ * "FOL" . */
if (u < 256 && u != f)
{
if (f >= 256)
@@ -3882,6 +4012,7 @@ set_spell_chartab(fol, low, upp)
}
new_st.st_fold[u] = f;
new_st.st_isu[u] = TRUE;
+ new_st.st_upper[f] = u;
}
}
@@ -3908,21 +4039,25 @@ set_spell_charflags(flags, cnt, upp)
spelltab_T new_st;
int i;
char_u *p = upp;
+ int c;
clear_spell_chartab(&new_st);
for (i = 0; i < cnt; ++i)
{
- new_st.st_isw[i + 128] = (flags[i] & SPELL_ISWORD) != 0;
- new_st.st_isu[i + 128] = (flags[i] & SPELL_ISUPPER) != 0;
+ new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
+ new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
if (*p == NUL)
return FAIL;
#ifdef FEAT_MBYTE
- new_st.st_fold[i + 128] = mb_ptr2char_adv(&p);
+ c = mb_ptr2char_adv(&p);
#else
- new_st.st_fold[i + 128] = *p++;
+ c = *p++;
#endif
+ new_st.st_fold[i + 128] = c;
+ if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
+ new_st.st_upper[c] = i + 128;
}
return set_spell_finish(&new_st);
@@ -3941,7 +4076,8 @@ set_spell_finish(new_st)
{
if (spelltab.st_isw[i] != new_st->st_isw[i]
|| spelltab.st_isu[i] != new_st->st_isu[i]
- || spelltab.st_fold[i] != new_st->st_fold[i])
+ || spelltab.st_fold[i] != new_st->st_fold[i]
+ || spelltab.st_upper[i] != new_st->st_upper[i])
{
EMSG(_("E763: Word characters differ between spell files"));
return FAIL;
@@ -3977,9 +4113,9 @@ write_spell_chartab(fd)
{
flags = 0;
if (spelltab.st_isw[i])
- flags |= SPELL_ISWORD;
+ flags |= CF_WORD;
if (spelltab.st_isu[i])
- flags |= SPELL_ISUPPER;
+ flags |= CF_UPPER;
fputc(flags, fd); /* <charflags> */
#ifdef FEAT_MBYTE
@@ -3995,43 +4131,14 @@ write_spell_chartab(fd)
}
/*
- * Return TRUE if "c" is an upper-case character for spelling.
- */
- static int
-spell_isupper(c)
- int c;
-{
-# ifdef FEAT_MBYTE
- if (enc_utf8)
- {
- /* For Unicode we can call utf_isupper(), but don't do that for ASCII,
- * because we don't want to use 'casemap' here. */
- if (c >= 128)
- return utf_isupper(c);
- }
- else if (has_mbyte && c > 256)
- {
- /* For characters above 255 we don't have something specfied.
- * Fall back to locale-dependent iswupper(). If not available
- * simply return FALSE. */
-# ifdef HAVE_ISWUPPER
- return iswupper(c);
-# else
- return FALSE;
-# endif
- }
-# endif
- return spelltab.st_isu[c];
-}
-
-/*
- * Case-fold "p[len]" into "buf[buflen]". Used for spell checking.
+ * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated.
+ * Uses the character definitions from the .spl file.
* When using a multi-byte 'encoding' the length may change!
* Returns FAIL when something wrong.
*/
static int
-spell_casefold(p, len, buf, buflen)
- char_u *p;
+spell_casefold(str, len, buf, buflen)
+ char_u *str;
int len;
char_u *buf;
int buflen;
@@ -4047,32 +4154,20 @@ spell_casefold(p, len, buf, buflen)
#ifdef FEAT_MBYTE
if (has_mbyte)
{
- int c;
int outi = 0;
+ char_u *p;
+ int c;
/* Fold one character at a time. */
- for (i = 0; i < len; i += mb_ptr2len_check(p + i))
+ for (p = str; p < str + len; )
{
- c = mb_ptr2char(p + i);
- if (enc_utf8)
- /* For Unicode case folding is always the same, no need to use
- * the table from the spell file. */
- c = utf_fold(c);
- else if (c < 256)
- /* Use the table from the spell file. */
- c = spelltab.st_fold[c];
-# ifdef HAVE_TOWLOWER
- else
- /* We don't know what to do, fall back to towlower(), it
- * depends on the current locale. */
- c = towlower(c);
-# endif
if (outi + MB_MAXBYTES > buflen)
{
buf[outi] = NUL;
return FAIL;
}
- outi += mb_char2bytes(c, buf + outi);
+ c = mb_ptr2char_adv(&p);
+ outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
}
buf[outi] = NUL;
}
@@ -4081,7 +4176,7 @@ spell_casefold(p, len, buf, buflen)
{
/* Be quick for non-multibyte encodings. */
for (i = 0; i < len; ++i)
- buf[i] = spelltab.st_fold[p[i]];
+ buf[i] = spelltab.st_fold[str[i]];
buf[i] = NUL;
}
@@ -4136,22 +4231,28 @@ spell_suggest()
/*
* 1. Try inserting/deleting/swapping/changing a letter, use REP entries
* from the .aff file and inserting a space (split the word).
+ *
+ * Set a maximum score to limit the combination of operations that is
+ * tried.
*/
- /* Set a maximum score to limit the combination of operations that is
- * tried. */
sug.su_maxscore = SCORE_MAXINIT;
spell_try_change(&sug);
- cleanup_suggestions(&sug);
/*
* 2. Try finding sound-a-like words.
+ *
+ * Only do this when we don't have a lot of suggestions yet, because it's
+ * very slow and often doesn't find new suggestions.
*/
- /* Allow a higher score if we don't have many suggestions yet. */
- if (sug.su_maxscore == SCORE_MAXINIT)
+ if (sug.su_ga.ga_len < SUG_CLEAN_COUNT)
+ {
+ /* Allow a higher score now. */
sug.su_maxscore = SCORE_MAXMAX;
- spell_try_soundalike(&sug);
+ spell_try_soundalike(&sug);
+ }
/* When CTRL-C was hit while searching do show the results. */
+ ui_breakcheck();
if (got_int)
{
(void)vgetc();
@@ -4162,8 +4263,13 @@ spell_suggest()
MSG(_("Sorry, no suggestions"));
else
{
- /* Cleanup, sort the suggestions and truncate at SUG_PROMPT_COUNT. */
- cleanup_suggestions(&sug);
+#ifdef RESCORE
+ /* Do slow but more accurate computation of the word score. */
+ rescore_suggestions(&sug);
+#endif
+
+ /* Sort the suggestions and truncate at SUG_PROMPT_COUNT. */
+ cleanup_suggestions(&sug, SUG_PROMPT_COUNT);
/* List the suggestions. */
msg_start();
@@ -4184,9 +4290,12 @@ spell_suggest()
vim_strncpy(wcopy + STRLEN(wcopy),
sug.su_badptr + stp->st_orglen,
sug.su_badlen - stp->st_orglen);
- /* TODO: remove score */
- vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\" (%d)"),
+ if (p_verbose > 0)
+ vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\" (%d)"),
i + 1, wcopy, stp->st_score);
+ else
+ vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\""),
+ i + 1, wcopy);
msg_puts(IObuff);
lines_left = 3; /* avoid more prompt */
msg_putchar('\n');
@@ -4224,13 +4333,13 @@ spell_suggest()
}
/*
- * Make a copy of "word[len]", with the first letter upper or lower cased,
- * to "wcopy[MAXWLEN]".
+ * Make a copy of "word", with the first letter upper or lower cased, to
+ * "wcopy[MAXWLEN]". "word" must not be empty.
+ * The result is NUL terminated.
*/
static void
-onecap_copy(word, len, wcopy, upper)
+onecap_copy(word, wcopy, upper)
char_u *word;
- int len;
char_u *wcopy;
int upper; /* TRUE: first letter made upper case */
{
@@ -4246,9 +4355,9 @@ onecap_copy(word, len, wcopy, upper)
#endif
c = *p++;
if (upper)
- c = MB_TOUPPER(c);
+ c = SPELL_TOUPPER(c);
else
- c = MB_TOLOWER(c);
+ c = SPELL_TOFOLD(c);
#ifdef FEAT_MBYTE
if (has_mbyte)
l = mb_char2bytes(c, wcopy);
@@ -4258,12 +4367,12 @@ onecap_copy(word, len, wcopy, upper)
l = 1;
wcopy[0] = c;
}
- vim_strncpy(wcopy + l, p, len - (p - word));
+ vim_strncpy(wcopy + l, p, MAXWLEN - l);
}
/*
- * Make a copy of "word[len]" with all the letters upper cased into
- * "wcopy[MAXWLEN]".
+ * Make a copy of "word" with all the letters upper cased into
+ * "wcopy[MAXWLEN]". The result is NUL terminated.
*/
static void
allcap_copy(word, wcopy)
@@ -4283,8 +4392,7 @@ allcap_copy(word, wcopy)
else
#endif
c = *s++;
-
- c = MB_TOUPPER(c); /* TODO: use spell toupper */
+ c = SPELL_TOUPPER(c);
#ifdef FEAT_MBYTE
if (has_mbyte)
@@ -4322,14 +4430,14 @@ spell_try_change(su)
int newscore;
langp_T *lp;
char_u *byts;
- int *idxs;
+ idx_T *idxs;
int depth;
int c;
int n;
int flags;
int badflags;
garray_T *gap;
- int arridx;
+ idx_T arridx;
int len;
char_u *p;
fromto_T *ftp;
@@ -4417,7 +4525,7 @@ spell_try_change(su)
*/
++sp->ts_curi; /* eat one NUL byte */
- flags = idxs[arridx];
+ flags = (int)idxs[arridx];
/*
* Form the word with proper case in preword.
@@ -4451,6 +4559,10 @@ spell_try_change(su)
if (flags & WF_RARE)
newscore += SCORE_RARE;
+ /* Words that were not found in the text get a penalty. */
+ if ((flags & WF_USED) == 0)
+ newscore += SCORE_NOTUSED;
+
if (!spell_valid_case(badflags,
captype(preword + prewordlen, NULL)))
newscore += SCORE_ICASE;
@@ -4458,7 +4570,11 @@ spell_try_change(su)
if (fword[sp->ts_fidx] == 0)
{
/* The badword also ends: add suggestions, */
- add_suggestion(su, preword, sp->ts_score + newscore);
+ add_suggestion(su, preword, sp->ts_score + newscore
+#ifdef RESCORE
+ , FALSE
+#endif
+ );
}
else if (sp->ts_fidx >= sp->ts_fidxtry)
{
@@ -4476,10 +4592,24 @@ spell_try_change(su)
STRCAT(preword, " ");
prewordlen = STRLEN(preword);
splitoff = sp->ts_twordlen;
- /* TODO: when case-folding changed the number of bytes
- * this doesn't work... */
- badflags = captype(su->su_badptr + sp->ts_fidx,
- su->su_badptr + su->su_badlen);
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ {
+ int i = 0;
+
+ /* Case-folding may change the number of bytes:
+ * Count nr of chars in fword[sp->ts_fidx] and
+ * advance that many chars in su->su_badptr. */
+ for (p = fword; p < fword + sp->ts_fidx;
+ mb_ptr_adv(p))
+ ++i;
+ for (p = su->su_badptr; i > 0; mb_ptr_adv(p))
+ --i;
+ }
+ else
+#endif
+ p = su->su_badptr + sp->ts_fidx;
+ badflags = captype(p, su->su_badptr + su->su_badlen);
sp->ts_state = STATE_SPLITUNDO;
++depth;
@@ -4535,11 +4665,15 @@ spell_try_change(su)
* even try when the byte was already changed. */
if (c == fword[sp->ts_fidx])
newscore = 0;
- /* TODO: multi-byte characters */
+
+ /* TODO: this is too slow and comparing bytes isn't right
+ * for multi-byte characters. */
+#if 0
else if (lp->lp_slang->sl_map != NULL
- && similar_chars(lp->lp_slang,
+ && similar_chars(lp->lp_slang,
c, fword[sp->ts_fidx]))
newscore = SCORE_SIMILAR;
+#endif
else
newscore = SCORE_SUBST;
if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry)
@@ -4818,10 +4952,10 @@ find_keepcap_word(slang, fword, kword)
{
char_u uword[MAXWLEN]; /* "fword" in upper-case */
int depth;
- int tryidx;
+ idx_T tryidx;
/* The following arrays are used at each depth in the tree. */
- int arridx[MAXWLEN];
+ idx_T arridx[MAXWLEN];
int round[MAXWLEN];
int fwordidx[MAXWLEN];
int uwordidx[MAXWLEN];
@@ -4831,10 +4965,10 @@ find_keepcap_word(slang, fword, kword)
int l;
int len;
int c;
- unsigned lo, hi, m;
+ idx_T lo, hi, m;
char_u *p;
char_u *byts = slang->sl_kbyts; /* array with bytes of the words */
- int *idxs = slang->sl_kidxs; /* array with indexes */
+ idx_T *idxs = slang->sl_kidxs; /* array with indexes */
if (byts == NULL)
{
@@ -4976,16 +5110,18 @@ spell_try_soundalike(su)
char_u tword[MAXWLEN];
char_u tfword[MAXWLEN];
char_u tsalword[MAXWLEN];
- int arridx[MAXWLEN];
+ idx_T arridx[MAXWLEN];
int curi[MAXWLEN];
langp_T *lp;
char_u *byts;
- int *idxs;
+ idx_T *idxs;
int depth;
int c;
- int n;
+ idx_T n;
int round;
int flags;
+ int score, sound_score;
+ char_u *bp, *sp;
for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
lp->lp_slang != NULL; ++lp)
@@ -5030,7 +5166,7 @@ spell_try_soundalike(su)
if (c == 0)
{
/* End of word, deal with the word. */
- flags = idxs[n];
+ flags = (int)idxs[n];
if (round == 2 || (flags & WF_KEEPCAP) == 0)
{
tword[depth] = NUL;
@@ -5047,19 +5183,63 @@ spell_try_soundalike(su)
tfword, tsalword);
}
- /* TODO: also compare with small changes
- * (insert char, swap char, etc.) */
- if (STRCMP(salword, tsalword) == 0)
+ /*
+ * Accept the word if the sound-folded words
+ * are (almost) equal.
+ */
+ for (bp = salword, sp = tsalword; *bp == *sp;
+ ++bp, ++sp)
+ if (*bp == NUL)
+ break;
+
+ if (*bp == *sp)
+ /* equal */
+ sound_score = 0;
+ else if (*bp != NUL && bp[1] != NUL
+ && *bp == sp[1] && bp[1] == *sp
+ && STRCMP(bp + 2, sp + 2) == 0)
+ /* swap two bytes */
+ sound_score = SCORE_SWAP;
+ else if (STRCMP(bp + 1, sp) == 0)
+ /* delete byte */
+ sound_score = SCORE_DEL;
+ else if (STRCMP(bp, sp + 1) == 0)
+ /* insert byte */
+ sound_score = SCORE_INS;
+ else if (STRCMP(bp + 1, sp + 1) == 0)
+ /* skip one byte */
+ sound_score = SCORE_SUBST;
+ else
+ /* not equal or similar */
+ sound_score = SCORE_MAXMAX;
+
+ if (sound_score < SCORE_MAXMAX)
{
+ char_u cword[MAXWLEN];
+ char_u *p;
+
if (round == 1 && flags != 0)
{
- char_u cword[MAXWLEN];
-
+ /* Need to fix case according to
+ * "flags". */
make_case_word(tword, cword, flags);
- add_suggestion(su, cword, 0);
+ p = cword;
}
else
- add_suggestion(su, tword, 0);
+ p = tword;
+
+ /* Compute the score. */
+ score = spell_edit_score(su->su_badword, p);
+#ifdef RESCORE
+ /* give a bonus for the good word sounding
+ * the same as the bad word */
+ add_suggestion(su, tword,
+ RESCORE(score, sound_score),
+ TRUE);
+#else
+ add_suggestion(su, tword,
+ score + sound_score);
+#endif
}
}
@@ -5078,15 +5258,16 @@ spell_try_soundalike(su)
curi[depth] = 1;
}
}
+
+ line_breakcheck();
}
- line_breakcheck();
}
}
}
}
/*
- * Copy "fword" to "cword", fixing according to "flags".
+ * Copy "fword" to "cword", fixing case according to "flags".
*/
static void
make_case_word(fword, cword, flags)
@@ -5099,12 +5280,13 @@ make_case_word(fword, cword, flags)
allcap_copy(fword, cword);
else if (flags & WF_ONECAP)
/* Make the first letter upper-case */
- onecap_copy(fword, STRLEN(fword), cword, TRUE);
+ onecap_copy(fword, cword, TRUE);
else
/* Use goodword as-is. */
STRCPY(cword, fword);
}
+#if 0
/*
* Return TRUE if "c1" and "c2" are similar characters according to the MAP
* lines in the .aff file.
@@ -5129,6 +5311,7 @@ similar_chars(slang, c1, c2)
return FALSE;
return vim_strchr(p1, '/') == vim_strchr(p2, '/');
}
+#endif
/*
* Add a suggestion to the list of suggestions.
@@ -5137,13 +5320,19 @@ similar_chars(slang, c1, c2)
* with spell_edit_score().
*/
static void
-add_suggestion(su, goodword, use_score)
+add_suggestion(su, goodword, score
+#ifdef RESCORE
+ , had_bonus
+#endif
+ )
suginfo_T *su;
char_u *goodword;
- int use_score;
+ int score;
+#ifdef RESCORE
+ int had_bonus; /* set st_had_bonus */
+#endif
{
suggest_T *stp;
- int score;
int i;
#ifdef SOUNDFOLD_SCORE
char_u fword[MAXWLEN];
@@ -5154,23 +5343,13 @@ add_suggestion(su, goodword, use_score)
if (was_banned(su, goodword))
return;
- /* Compute the score and add the suggestion if it's good enough. */
- if (use_score != 0)
- score = use_score;
- else
- score = spell_edit_score(su->su_badword, goodword);
-
if (score <= su->su_maxscore)
{
#ifdef SOUNDFOLD_SCORE
/* Add to the score when the word sounds differently.
* This is slow... */
if (su->su_slang->sl_sal.ga_len > 0)
- {
- (void)spell_casefold(goodword, STRLEN(goodword), fword, MAXWLEN);
- spell_soundfold(su->su_slang, fword, salword);
- score += spell_edit_score(su->su_salword, salword);
- }
+ score += spell_sound_score(su->su_slang, fword, su->su_salword);
#endif
/* Check if the word is already there. */
@@ -5180,7 +5359,12 @@ add_suggestion(su, goodword, use_score)
{
/* Found it. Remember the lowest score. */
if (stp[i].st_score > score)
+ {
stp[i].st_score = score;
+#ifdef RESCORE
+ stp[i].st_had_bonus = had_bonus;
+#endif
+ }
break;
}
@@ -5192,13 +5376,16 @@ add_suggestion(su, goodword, use_score)
if (stp->st_word != NULL)
{
stp->st_score = score;
+#ifdef RESCORE
+ stp->st_had_bonus = had_bonus;
+#endif
stp->st_orglen = su->su_badlen;
++su->su_ga.ga_len;
/* If we have too many suggestions now, sort the list and keep
* the best suggestions. */
- if (su->su_ga.ga_len > SUG_CLEANUP_COUNT)
- cleanup_suggestions(su);
+ if (su->su_ga.ga_len > SUG_MAX_COUNT)
+ cleanup_suggestions(su, SUG_CLEAN_COUNT);
}
}
}
@@ -5233,7 +5420,9 @@ was_banned(su, word)
suginfo_T *su;
char_u *word;
{
- return !HASHITEM_EMPTY(hash_find(&su->su_banned, word));
+ hashitem_T *hi = hash_find(&su->su_banned, word);
+
+ return !HASHITEM_EMPTY(hi);
}
/*
@@ -5258,6 +5447,45 @@ free_banned(su)
hash_clear(&su->su_banned);
}
+#ifdef RESCORE
+/*
+ * Recompute the score if sound-folding is possible. This is slow,
+ * thus only done for the final results.
+ */
+ static void
+rescore_suggestions(su)
+ suginfo_T *su;
+{
+ langp_T *lp;
+ suggest_T *stp;
+ char_u sal_badword[MAXWLEN];
+ int score;
+ int i;
+
+ for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
+ lp->lp_slang != NULL; ++lp)
+ {
+ if (lp->lp_slang->sl_sal.ga_len > 0)
+ {
+ /* soundfold the bad word */
+ spell_soundfold(lp->lp_slang, su->su_fbadword, sal_badword);
+
+ for (i = 0; i < su->su_ga.ga_len; ++i)
+ {
+ stp = &SUG(su, i);
+ if (!stp->st_had_bonus)
+ {
+ score = spell_sound_score(lp->lp_slang, stp->st_word,
+ sal_badword);
+ stp->st_score = RESCORE(stp->st_score, score);
+ }
+ }
+ break;
+ }
+ }
+}
+#endif
+
static int
#ifdef __BORLANDC__
_RTLENTRYF
@@ -5287,8 +5515,9 @@ sug_compare(s1, s2)
* - Remove words that won't be displayed.
*/
static void
-cleanup_suggestions(su)
+cleanup_suggestions(su, keep)
suginfo_T *su;
+ int keep; /* nr of suggestions to keep */
{
suggest_T *stp = &SUG(su, 0);
int i;
@@ -5298,12 +5527,12 @@ cleanup_suggestions(su)
sizeof(suggest_T), sug_compare);
/* Truncate the list to the number of suggestions that will be displayed. */
- if (su->su_ga.ga_len > SUG_PROMPT_COUNT)
+ if (su->su_ga.ga_len > keep)
{
- for (i = SUG_PROMPT_COUNT; i < su->su_ga.ga_len; ++i)
+ for (i = keep; i < su->su_ga.ga_len; ++i)
vim_free(stp[i].st_word);
- su->su_ga.ga_len = SUG_PROMPT_COUNT;
- su->su_maxscore = stp[SUG_PROMPT_COUNT - 1].st_score;
+ su->su_ga.ga_len = keep;
+ su->su_maxscore = stp[keep - 1].st_score;
}
}
@@ -5320,6 +5549,7 @@ spell_soundfold(slang, inword, res)
char_u word[MAXWLEN];
#ifdef FEAT_MBYTE
int l;
+ int found_mbyte = FALSE;
#endif
char_u *s;
char_u *t;
@@ -5333,26 +5563,30 @@ spell_soundfold(slang, inword, res)
int p0 = -333;
int c0;
- /* Remove accents, if wanted.
- * We actually remove all non-word characters. */
+ /* Remove accents, if wanted. We actually remove all non-word characters.
+ * But keep white space. */
if (slang->sl_rem_accents)
{
t = word;
for (s = inword; *s != NUL; )
{
+ if (vim_iswhite(*s))
+ *t++ = *s++;
#ifdef FEAT_MBYTE
- if (has_mbyte)
+ else if (has_mbyte)
{
l = mb_ptr2len_check(s);
if (SPELL_ISWORDP(s))
{
mch_memmove(t, s, l);
t += l;
+ if (l > 1)
+ found_mbyte = TRUE;
}
s += l;
}
- else
#endif
+ else
{
if (SPELL_ISWORDP(s))
*t++ = *s;
@@ -5362,12 +5596,34 @@ spell_soundfold(slang, inword, res)
*t = NUL;
}
else
+ {
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ for (s = inword; *s != NUL; s += l)
+ if ((l = mb_ptr2len_check(s)) > 1)
+ {
+ found_mbyte = TRUE;
+ break;
+ }
+#endif
STRCPY(word, inword);
+ }
+
+#ifdef FEAT_MBYTE
+ /* If there are multi-byte characters in the word return it as-is, because
+ * the following won't work. */
+ if (found_mbyte)
+ {
+ STRCPY(res, word);
+ return;
+ }
+#endif
ftp = (fromto_T *)slang->sl_sal.ga_data;
/*
* This comes from Aspell phonet.cpp. Converted from C++ to C.
+ * Changed to keep spaces.
* TODO: support for multi-byte chars.
*/
i = j = z = 0;
@@ -5433,7 +5689,8 @@ spell_soundfold(slang, inword, res)
if (*s == NUL
|| (*s == '^'
- && (i == 0 || !SPELL_ISWORDP(word + i - 1))
+ && (i == 0 || !(word[i - 1] == ' '
+ || SPELL_ISWORDP(word + i - 1)))
&& (*(s + 1) != '$'
|| (!SPELL_ISWORDP(word + i + k0))))
|| (*s == '$' && i > 0
@@ -5589,6 +5846,11 @@ spell_soundfold(slang, inword, res)
++n;
}
}
+ else if (vim_iswhite(c))
+ {
+ c = ' ';
+ k = 1;
+ }
if (z0 == 0)
{
@@ -5609,11 +5871,46 @@ spell_soundfold(slang, inword, res)
res[j] = NUL;
}
+#if defined(RESCORE) || defined(SOUNDFOLD_SCORE)
+/*
+ * Return the score for how much words sound different.
+ */
+ static int
+spell_sound_score(slang, goodword, badsound)
+ slang_T *slang;
+ char_u *goodword; /* good word */
+ char_u *badsound; /* sound-folded bad word */
+{
+ char_u fword[MAXWLEN];
+ char_u goodsound[MAXWLEN];
+ int score;
+
+ /* Case-fold the word, needed for sound folding. */
+ (void)spell_casefold(goodword, STRLEN(goodword), fword, MAXWLEN);
+
+ /* sound-fold the good word */
+ spell_soundfold(slang, fword, goodsound);
+
+ /* compute the edit distance-score of the sounds */
+ score = spell_edit_score(badsound, goodsound);
+
+ /* Correction: adding/inserting "*" at the start (word starts with vowel)
+ * shouldn't be counted so much, vowels halfway the word aren't counted at
+ * all. */
+ if (*badsound != *goodsound && (*badsound == '*' || *goodsound == '*'))
+ score -= SCORE_DEL / 2;
+
+ return score;
+}
+#endif
+
/*
* Compute the "edit distance" to turn "badword" into "goodword". The less
* deletes/inserts/swaps are required the lower the score.
+ *
* The algorithm comes from Aspell editdist.cpp, edit_distance().
- * TODO: make this work with multi-byte chars.
+ * It has been converted from C++ to C and modified to support multi-byte
+ * characters.
*/
static int
spell_edit_score(badword, goodword)
@@ -5625,15 +5922,36 @@ spell_edit_score(badword, goodword)
int j, i;
int t;
int bc, gc;
+ int pbc, pgc;
+#ifdef FEAT_MBYTE
+ char_u *p;
+ int wbadword[MAXWLEN];
+ int wgoodword[MAXWLEN];
+
+ if (has_mbyte)
+ {
+ /* Get the characters from the multi-byte strings and put them in an
+ * int array for easy access. */
+ for (p = badword, badlen = 0; *p != NUL; )
+ wbadword[badlen++] = mb_ptr2char_adv(&p);
+ ++badlen;
+ for (p = goodword, goodlen = 0; *p != NUL; )
+ wgoodword[goodlen++] = mb_ptr2char_adv(&p);
+ ++goodlen;
+ }
+ else
+#endif
+ {
+ badlen = STRLEN(badword) + 1;
+ goodlen = STRLEN(goodword) + 1;
+ }
/* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
#define CNT(a, b) cnt[(a) + (b) * (badlen + 1)]
- badlen = STRLEN(badword) + 1;
- goodlen = STRLEN(goodword) + 1;
cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)),
TRUE);
- if (cnt == 0)
- return 0;
+ if (cnt == NULL)
+ return 0; /* out of memory */
CNT(0, 0) = 0;
for (j = 1; j <= goodlen; ++j)
@@ -5644,24 +5962,48 @@ spell_edit_score(badword, goodword)
CNT(i, 0) = CNT(i - 1, 0) + SCORE_INS;
for (j = 1; j <= goodlen; ++j)
{
- bc = badword[i - 1];
- gc = goodword[j - 1];
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ {
+ bc = wbadword[i - 1];
+ gc = wgoodword[j - 1];
+ }
+ else
+#endif
+ {
+ bc = badword[i - 1];
+ gc = goodword[j - 1];
+ }
if (bc == gc)
CNT(i, j) = CNT(i - 1, j - 1);
else
{
/* Use a better score when there is only a case difference. */
- if (spelltab.st_fold[bc] == spelltab.st_fold[gc])
+ if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
else
CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
- if (i > 1 && j > 1 && bc == goodword[j - 2]
- && badword[i - 2] == gc)
+ if (i > 1 && j > 1)
{
- t = SCORE_SWAP + CNT(i - 2, j - 2);
- if (t < CNT(i, j))
- CNT(i, j) = t;
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ {
+ pbc = wbadword[i - 2];
+ pgc = wgoodword[j - 2];
+ }
+ else
+#endif
+ {
+ pbc = badword[i - 2];
+ pgc = goodword[j - 2];
+ }
+ if (bc == pgc && pbc == gc)
+ {
+ t = SCORE_SWAP + CNT(i - 2, j - 2);
+ if (t < CNT(i, j))
+ CNT(i, j) = t;
+ }
}
t = SCORE_DEL + CNT(i - 1, j);
if (t < CNT(i, j))