diff options
author | Bram Moolenaar <Bram@vim.org> | 2005-08-19 20:32:47 +0000 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2005-08-19 20:32:47 +0000 |
commit | 5195e45609b26b3d262a90822a2283a4adcf65c9 (patch) | |
tree | 9ba1a4e7af2e732fa78e70e987b2677ddf943b07 | |
parent | 5b8d8fdb526bd21dbac64a3ea815a1e628b93b41 (diff) | |
download | vim-git-5195e45609b26b3d262a90822a2283a4adcf65c9.tar.gz |
updated for version 7.0132
-rw-r--r-- | runtime/doc/insert.txt | 9 | ||||
-rw-r--r-- | runtime/doc/map.txt | 10 | ||||
-rw-r--r-- | runtime/spell/en.ascii.spl | bin | 566603 -> 566616 bytes | |||
-rw-r--r-- | runtime/spell/en.utf-8.spl | bin | 569129 -> 569151 bytes | |||
-rw-r--r-- | runtime/spell/hu/hu_HU.diff | 78 | ||||
-rw-r--r-- | runtime/spell/main.aap | 9 | ||||
-rw-r--r-- | src/spell.c | 2117 |
7 files changed, 1487 insertions, 736 deletions
diff --git a/runtime/doc/insert.txt b/runtime/doc/insert.txt index 43b3d4f41..ae90eb954 100644 --- a/runtime/doc/insert.txt +++ b/runtime/doc/insert.txt @@ -1,4 +1,4 @@ -*insert.txt* For Vim version 7.0aa. Last change: 2005 Aug 11 +*insert.txt* For Vim version 7.0aa. Last change: 2005 Aug 17 VIM REFERENCE MANUAL by Bram Moolenaar @@ -899,8 +899,11 @@ CTRL-X CTRL-O Guess what kind of item is in front of the cursor and Spelling suggestions *compl-spelling* -The word in front of the cursor is located and correctly spelled words are -suggested to replace it. The word doesn't actually have to be badly spelled. +A word before or at the cursor is located and correctly spelled words are +suggested to replace it. If there is a badly spelled word in the line, before +or under the cursor, the cursor is moved to after it. Otherwise the word just +before the cursor is used for suggestions, even though it isn't badly spelled. + NOTE: CTRL-S suspends display in many Unix terminals. Use 's' instead. Type CTRL-Q to resume displaying. diff --git a/runtime/doc/map.txt b/runtime/doc/map.txt index a135a481a..5fb03e916 100644 --- a/runtime/doc/map.txt +++ b/runtime/doc/map.txt @@ -281,10 +281,7 @@ last defined. Example: > n <C-W>* * <C-W><C-S>* Last set from /home/abcd/.vimrc -When the map was defined by hand there is no "Last set" message. When the map -was defined while executing a function, user command or autocommand, the -script in which it was defined is reported. -{not available when compiled without the +eval feature} +See |:verbose-cmd| for more information. *map_backslash* Note that only CTRL-V is mentioned here as a special character for mappings @@ -877,10 +874,7 @@ last defined. Example: > TOhtml 0 % :call Convert2HTML(<line1>, <line2>) Last set from /usr/share/vim/vim-7.0/plugin/tohtml.vim < -When the command was defined by hand there is no "Last set" message. When the -command was defined while executing a function, user command or autocommand, -the script in which it was defined is reported. -{not available when compiled without the +eval feature} +See |:verbose-cmd| for more information. *E174* *E182* :com[mand][!] [{attr}...] {cmd} {rep} diff --git a/runtime/spell/en.ascii.spl b/runtime/spell/en.ascii.spl Binary files differindex 9d2b068c4..360e1ae9c 100644 --- a/runtime/spell/en.ascii.spl +++ b/runtime/spell/en.ascii.spl diff --git a/runtime/spell/en.utf-8.spl b/runtime/spell/en.utf-8.spl Binary files differindex 8b2edc692..9ba853ceb 100644 --- a/runtime/spell/en.utf-8.spl +++ b/runtime/spell/en.utf-8.spl diff --git a/runtime/spell/hu/hu_HU.diff b/runtime/spell/hu/hu_HU.diff index de22c794f..2b9a6ddf0 100644 --- a/runtime/spell/hu/hu_HU.diff +++ b/runtime/spell/hu/hu_HU.diff @@ -1,5 +1,5 @@ *** hu_HU.orig.aff Tue Aug 16 18:21:10 2005 ---- hu_HU.aff Tue Aug 16 19:42:34 2005 +--- hu_HU.aff Fri Aug 19 21:28:45 2005 *************** *** 57,62 **** @@ -16,8 +16,9 @@ ! #VERSION Magyar 0.99.4.2 SET ISO8859-2 *************** -*** 65,77 **** - COMPOUNDFLAG Y +*** 64,77 **** + COMPOUNDMIN 2 +! COMPOUNDFLAG Y ! COMPOUNDWORD 2 y ! COMPOUNDSYLLABLE 6 aáeéiíoóöõuúüû ! SYLLABLENUM klmc @@ -30,24 +31,85 @@ ! WORDCHARS -.§%°0123456789 ! HU_KOTOHANGZO Z ---- 65,80 ---- - COMPOUNDFLAG Y +--- 64,116 ---- + COMPOUNDMIN 2 ! #COMPOUNDWORD 2 y +! COMPOUNDMAX 2 +! # I don't understand what the "y" is for; if it's to disable compounding simply +! # remove the compound flag from the word. +! ! #COMPOUNDSYLLABLE 6 aáeéiíoóöõuúüû +! COMPOUNDSYLMAX 6 +! SYLLABLE a/á/e/é/i/í/o/ó/ö/õ/u/ú/ü/û +! # Strange that every vowel is counted as a syllable, that's how the hunspell +! # code works. +! ! #SYLLABLENUM klmc +! # Don't understand what this is for +! +! #COMPOUNDFLAG Y ! #COMPOUNDFIRST v ! #COMPOUNDLAST x +! COMPOUNDFLAGS Y+ +! COMPOUNDFLAGS vY*x +! COMPOUNDFLAGS Y+x +! COMPOUNDFLAGS vY+ +! ! #FORBIDDENWORD w -! BAD w +! # I don't understand what FORBIDDENWORD is needed for, using NEEDAFFIX +! # (ONLYROOT) should be sufficient. +! ! #ONLYROOT u +! NEEDAFFIX u +! ! #ACCENT áéíóöõúüû aeiooouuu +! MAP 5 +! MAP aáä +! MAP eé +! MAP ií +! MAP oóöõ +! MAP uúüû +! ! #CHECKNUM +! # Vim always handles numbers in the same way. +! ! #WORDCHARS -.§%°0123456789 +! FOL ±¢³µ¶¨¹º»¼¾¿±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþßÿ-§%° +! LOW ±¢³µ¶¨¹º»¼¾¿±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþßÿ-§%° +! UPP ¡¢£¥¦¨©ª«¬®¯±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßÿ-§%° +! MIDWORD . +! ! #HU_KOTOHANGZO Z ! +! # There are soooo many affixes. Postpone the prefixes to keep the time needed +! # for generating the .spl within reasonable limits. ! PFXPOSTPONE *************** +*** 81,96 **** + +! REP 89 +! REP í i +! REP i í +! REP ó o +! REP o ó +! REP o õ +! REP ú u +! REP u ú +! REP u û +! REP û ü +! REP ü û + REP j ly + REP ly j +- REP a ä # Handel->Händel + REP S © # Skoda->©koda +--- 120,124 ---- + +! REP 78 + REP j ly + REP ly j + REP S © # Skoda->©koda +*************** *** 173,241 **** - # character conversion table @@ -118,11 +180,11 @@ - CHR 123 U3 Û - SFX z Y 6 ---- 176,177 ---- +--- 201,202 ---- *************** *** 17678,17681 **** PFX D 0 leg . - - 1 - ---- 17614 ---- +--- 17639 ---- diff --git a/runtime/spell/main.aap b/runtime/spell/main.aap index 1e37c1cc3..5aaaaa102 100644 --- a/runtime/spell/main.aap +++ b/runtime/spell/main.aap @@ -4,19 +4,22 @@ # aap generate all the .spl files # aap diff create all the diff files -LANG = af bg ca cs da de el en eo fr fo gl he hr it nl ny pl sk yi +LANG = af bg ca cs da de el en eo fr fo gl he hr it nl ny pl sk yi hu +# "hu" is at the end, because it takes so much time. +# # TODO: # Finnish doesn't work, the dictionary fi_FI.zip file contains hyphenation... diff: $*LANG/diff - :print done + :print Done. @for l in string.split(_no.LANG): :child $l/main.aap # The existing .spl files need to be generated when the spell file format # changes. Depending on the Vim executable does that, but results in doing it -# much too often. Generate a dummy .spl file and check if it changed. +# much too often. Generate a dummy .spl file and let the .spl depend on it, so +# that they are outdated when it changes. :child check/main.aap *.spl: check/check.latin1.spl diff --git a/src/spell.c b/src/spell.c index a3fc24f72..30b08ae98 100644 --- a/src/spell.c +++ b/src/spell.c @@ -56,16 +56,6 @@ # define SPELL_PRINTTREE #endif -/* SPELL_COMPRESS_CNT is after how many allocated blocks we compress the tree - * to limit the amount of memory used (esp. for Italian and Hungarian). The - * amount of memory used for nodes then is SPELL_COMPRESS_CNT times - * SBLOCKSIZE. - * Then compress again after allocating SPELL_COMPRESS_INC more blocks or - * adding SPELL_COMPRESS_ADDED words and running out of memory again. */ -#define SPELL_COMPRESS_CNT 30000 -#define SPELL_COMPRESS_INC 100 -#define SPELL_COMPRESS_ADDED 500000 - /* * Use this to adjust the score after finding suggestions, based on the * suggested word sounding like the bad word. This is much faster than doing @@ -78,94 +68,102 @@ /* * Vim spell file format: <HEADER> - * <SUGGEST> + * <SECTIONS> * <LWORDTREE> * <KWORDTREE> * <PREFIXTREE> * - * <HEADER>: <fileID> - * <regioncnt> <regionname> ... - * <charflagslen> <charflags> - * <fcharslen> <fchars> - * <midwordlen> <midword> - * <compoundlen> <compoundtype> <compoundinfo> - * <prefcondcnt> <prefcond> ... + * <HEADER>: <fileID> <versionnr> * - * <fileID> 10 bytes "VIMspell10" - * <regioncnt> 1 byte number of regions following (8 supported) - * <regionname> 2 bytes Region name: ca, au, etc. Lower case. - * First <regionname> is region 1. + * <fileID> 8 bytes "VIMspell" + * <versionnr> 1 byte VIMSPELLVERSION * - * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). - * <charflags> N bytes List of flags (first one is for character 128): - * 0x01 word character CF_WORD - * 0x02 upper-case character CF_UPPER - * <fcharslen> 2 bytes Number of bytes in <fchars>. - * <fchars> N bytes Folded characters, first one is for character 128. * - * <midwordlen> 2 bytes Number of bytes in <midword>. - * <midword> N bytes Characters that are word characters only when used - * in the middle of a word. + * Sections make it possible to add information to the .spl file without + * making it incompatible with previous versions. There are two kinds of + * sections: + * 1. Not essential for correct spell checking. E.g. for making suggestions. + * These are skipped when not supported. + * 2. Optional information, but essential for spell checking when present. + * E.g. conditions for affixes. When this section is present but not + * supported an error message is given. * - * <compoundlen> 2 bytes Number of bytes following for compound info (can - * be used to skip it when it's not understood). + * <SECTIONS>: <section> ... <sectionend> * - * <compoundtype 1 byte 1: compound words using <comp1minlen> and - * <comp1flags> + * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) * - * <comp1minlen> 1 byte minimal word length for compounding + * <sectionID> 1 byte number from 0 to 254 identifying the section * - * <comp1flags> N bytes flags used for compounding words + * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct + * spell checking * + * <sectionlen> 4 bytes length of section contents, MSB first * - * <prefcondcnt> 2 bytes Number of <prefcond> items following. - * - * <prefcond> : <condlen> <condstr> + * <sectionend> 1 byte SN_END * - * <condlen> 1 byte Length of <condstr>. * - * <condstr> N bytes Condition for the prefix. + * sectionID == SN_REGION: <regionname> ... + * <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case. + * First <regionname> is region 1. * + * sectionID == SN_CHARFLAGS: <charflagslen> <charflags> + * <folcharslen> <folchars> + * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). + * <charflags> N bytes List of flags (first one is for character 128): + * 0x01 word character CF_WORD + * 0x02 upper-case character CF_UPPER + * <folcharslen> 2 bytes Number of bytes in <folchars>. + * <folchars> N bytes Folded characters, first one is for character 128. * - * <SUGGEST> : <repcount> <rep> ... - * <salflags> <salcount> <sal> ... - * <maplen> <mapstr> + * sectionID == SN_MIDWORD: <midword> + * <midword> N bytes Characters that are word characters only when used + * in the middle of a word. * - * <repcount> 2 bytes number of <rep> items, MSB first. + * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... + * <prefcondcnt> 2 bytes Number of <prefcond> items following. + * <prefcond> : <condlen> <condstr> + * <condlen> 1 byte Length of <condstr>. + * <condstr> N bytes Condition for the prefix. * + * sectionID == SN_REP: <repcount> <rep> ... + * <repcount> 2 bytes number of <rep> items, MSB first. * <rep> : <repfromlen> <repfrom> <reptolen> <repto> + * <repfromlen> 1 byte length of <repfrom> + * <repfrom> N bytes "from" part of replacement + * <reptolen> 1 byte length of <repto> + * <repto> N bytes "to" part of replacement * - * <repfromlen> 1 byte length of <repfrom> - * - * <repfrom> N bytes "from" part of replacement - * - * <reptolen> 1 byte length of <repto> - * - * <repto> N bytes "to" part of replacement - * - * <salflags> 1 byte flags for soundsalike conversion: + * sectionID == SN_SAL: <salflags> <salcount> <sal> ... + * <salflags> 1 byte flags for soundsalike conversion: * SAL_F0LLOWUP * SAL_COLLAPSE * SAL_REM_ACCENTS - * SAL_SOFO: SOFOFROM and SOFOTO used instead of SAL - * - * <salcount> 2 bytes number of <sal> items following - * + * <salcount> 2 bytes number of <sal> items following * <sal> : <salfromlen> <salfrom> <saltolen> <salto> + * <salfromlen> 1 byte length of <salfrom> + * <salfrom> N bytes "from" part of soundsalike + * <saltolen> 1 byte length of <salto> + * <salto> N bytes "to" part of soundsalike * - * <salfromlen> 1-2 bytes length of <salfrom> (2 bytes for SAL_SOFO) - * - * <salfrom> N bytes "from" part of soundsalike - * - * <saltolen> 1-2 bytes length of <salto> (2 bytes for SAL_SOFO) - * - * <salto> N bytes "to" part of soundsalike + * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> + * <sofofromlen> 2 bytes length of <sofofrom> + * <sofofrom> N bytes "from" part of soundfold + * <sofotolen> 2 bytes length of <sofoto> + * <sofoto> N bytes "to" part of soundfold * - * <maplen> 2 bytes length of <mapstr>, MSB first - * - * <mapstr> N bytes String with sequences of similar characters, + * sectionID == SN_MAP: <mapstr> + * <mapstr> N bytes String with sequences of similar characters, * separated by slashes. * + * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compflags> + * <compmax> 1 byte Maximum nr of words in compound word. + * <compminlen> 1 byte Minimal word length for compounding. + * <compsylmax> 1 byte Maximum nr of syllables in compound word. + * <compflags> N bytes Flags from COMPOUNDFLAGS items, separated by + * slashes. + * + * sectionID == SN_SYLLABLE: <syllable> + * <syllable> N bytes String from SYLLABLE item. * * <LWORDTREE>: <wordtree> * @@ -333,6 +331,12 @@ typedef int salfirst_T; typedef short salfirst_T; #endif +/* Values for SP_*ERROR are negative, positive values are used by + * read_cnt_string(). */ +#define SP_TRUNCERROR -1 /* spell file truncated error */ +#define SP_FORMERROR -2 /* format error in spell file */ +#define SP_ERROR -3 /* other error while reading spell file */ + /* * Structure used to store words and other info for one language, loaded from * a .spl file. @@ -367,8 +371,14 @@ struct slang_S char_u *sl_midword; /* MIDWORD string or NULL */ - int sl_compminlen; /* COMPOUNDMIN */ - char_u *sl_compflags; /* COMPOUNDFLAGS (NULL when no compounding) */ + int sl_compmax; /* COMPOUNDMAX (default: MAXWLEN) */ + int sl_compminlen; /* COMPOUNDMIN (default: MAXWLEN) */ + int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */ + regprog_T *sl_compprog; /* COMPOUNDFLAGS turned into a regexp progrm + * (NULL when no compounding) */ + char_u *sl_compstartflags; /* flags for first compound word */ + char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */ + garray_T sl_syl_items; /* syllable items */ int sl_prefixcnt; /* number of items in "sl_prefprog" */ regprog_T **sl_prefprog; /* table with regprogs for prefixes */ @@ -402,7 +412,6 @@ static slang_T *first_lang = NULL; #define SAL_F0LLOWUP 1 #define SAL_COLLAPSE 2 #define SAL_REM_ACCENTS 4 -#define SAL_SOFO 8 /* SOFOFROM and SOFOTO instead of SAL */ /* * Structure used in "b_langp", filled from 'spelllang'. @@ -417,6 +426,25 @@ typedef struct langp_S #define REGION_ALL 0xff /* word valid in all regions */ +#define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */ +#define VIMSPELLMAGICL 8 +#define VIMSPELLVERSION 50 + +/* Section IDs. Only renumber them when VIMSPELLVERSION changes! */ +#define SN_REGION 0 /* <regionname> section */ +#define SN_CHARFLAGS 1 /* charflags section */ +#define SN_MIDWORD 2 /* <midword> section */ +#define SN_PREFCOND 3 /* <prefcond> section */ +#define SN_REP 4 /* REP items section */ +#define SN_SAL 5 /* SAL items section */ +#define SN_SOFO 6 /* soundfolding section */ +#define SN_MAP 7 /* MAP items section */ +#define SN_COMPOUND 8 /* compound words section */ +#define SN_SYLLABLE 9 /* syllable section */ +#define SN_END 255 /* end of sections */ + +#define SNF_REQUIRED 1 /* <sectionflags>: required section */ + /* Result values. Lower number is accepted over higher one. */ #define SP_BANNED -1 #define SP_OK 0 @@ -424,9 +452,6 @@ typedef struct langp_S #define SP_LOCAL 2 #define SP_BAD 3 -#define VIMSPELLMAGIC "VIMspell10" /* string at start of Vim spell file */ -#define VIMSPELLMAGICL 10 - /* file used for "zG" and "zW" */ static char_u *int_wordlist = NULL; @@ -472,7 +497,7 @@ typedef struct suggest_S /* score for various changes */ #define SCORE_SPLIT 149 /* split bad word */ #define SCORE_ICASE 52 /* slightly different case */ -#define SCORE_REGION 70 /* word is for different region */ +#define SCORE_REGION 200 /* word is for different region */ #define SCORE_RARE 180 /* rare word */ #define SCORE_SWAP 90 /* swap two characters */ #define SCORE_SWAP3 110 /* swap two characters in three */ @@ -527,6 +552,8 @@ typedef struct matchinf_S /* for when checking a compound word */ int mi_compoff; /* start of following word offset */ + char_u mi_compflags[MAXWLEN]; /* flags for compound words used */ + int mi_complen; /* nr of compound words used */ /* others */ int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ @@ -559,7 +586,7 @@ static int spell_iswordp_nmw __ARGS((char_u *p)); #ifdef FEAT_MBYTE static int spell_iswordp_w __ARGS((int *p, buf_T *buf)); #endif -static void write_spell_prefcond __ARGS((FILE *fd, garray_T *gap)); +static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap)); /* * For finding suggestions: At each node in the tree these states are tried: @@ -608,8 +635,9 @@ typedef struct trystate_S char_u ts_isdiff; /* DIFF_ values */ char_u ts_fcharstart; /* index in fword where badword char started */ #endif - char_u ts_save_prewordlen; /* saved "prewordlen" */ - char_u ts_save_splitoff; /* su_splitoff saved here */ + char_u ts_prewordlen; /* length of word in "preword[]" */ + char_u ts_splitoff; /* index in "tword" after last split */ + char_u ts_complen; /* nr of compound words used */ char_u ts_save_badflags; /* su_badflags saved here */ } trystate_T; @@ -634,7 +662,7 @@ static slang_T *slang_alloc __ARGS((char_u *lang)); static void slang_free __ARGS((slang_T *lp)); static void slang_clear __ARGS((slang_T *lp)); static void find_word __ARGS((matchinf_T *mip, int mode)); -static int can_compound __ARGS((slang_T *slang, int flags)); +static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags)); static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req)); static void find_prefix __ARGS((matchinf_T *mip)); static int fold_more __ARGS((matchinf_T *mip)); @@ -646,6 +674,16 @@ static void int_wordlist_spl __ARGS((char_u *fname)); static void spell_load_cb __ARGS((char_u *fname, void *cookie)); static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent)); static char_u *read_cnt_string __ARGS((FILE *fd, int cnt_bytes, int *lenp)); +static char_u *read_string __ARGS((FILE *fd, int cnt)); +static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len)); +static int read_charflags_section __ARGS((FILE *fd)); +static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp)); +static int read_rep_section __ARGS((FILE *fd, slang_T *slang)); +static int read_sal_section __ARGS((FILE *fd, slang_T *slang)); +static int read_sofo_section __ARGS((FILE *fd, slang_T *slang)); +static int read_compound __ARGS((FILE *fd, slang_T *slang, int len)); +static int init_syl_tab __ARGS((slang_T *slang)); +static int count_syllables __ARGS((slang_T *slang, char_u *word)); static int set_sofo __ARGS((slang_T *lp, char_u *from, char_u *to)); static void set_sal_first __ARGS((slang_T *lp)); #ifdef FEAT_MBYTE @@ -658,9 +696,8 @@ static int find_region __ARGS((char_u *rp, char_u *region)); static int captype __ARGS((char_u *word, char_u *end)); static int badword_captype __ARGS((char_u *word, char_u *end)); static void spell_reload_one __ARGS((char_u *fname, int added_word)); -static int set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp)); +static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp)); static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp)); -static void write_spell_chartab __ARGS((FILE *fd)); static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen)); static int check_need_cap __ARGS((linenr_T lnum, colnr_T col)); static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount, int banbadword, int need_cap)); @@ -777,11 +814,13 @@ spell_check(wp, ptr, attrp, capcol) be passed to functions quickly. */ int nrlen = 0; /* found a number first */ int c; + int wrongcaplen = 0; /* A word never starts at a space or a control character. Return quickly * then, skipping over the character. */ if (*ptr <= ' ') return 1; + vim_memset(&mi, 0, sizeof(matchinf_T)); /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and * 0X99FF. But when a word character follows do check spelling to find @@ -818,10 +857,7 @@ spell_check(wp, ptr, attrp, capcol) /* Check word starting with capital letter. */ c = PTR2CHAR(ptr); if (!SPELL_ISUPPER(c)) - { - *attrp = highlight_attr[HLF_SPC]; - return (int)(mi.mi_fend - ptr); - } + wrongcaplen = (int)(mi.mi_fend - ptr); } } if (capcol != NULL) @@ -832,12 +868,10 @@ spell_check(wp, ptr, attrp, capcol) mi.mi_end = mi.mi_fend; /* Check caps type later. */ - mi.mi_capflags = 0; - mi.mi_cend = NULL; mi.mi_buf = wp->w_buffer; - /* Include one non-word character so that we can check for the - * word end. */ + /* case-fold the word with one non-word character, so that we can check + * for the word end. */ if (*mi.mi_fend != NUL) mb_ptr_adv(mi.mi_fend); @@ -897,6 +931,10 @@ spell_check(wp, ptr, attrp, capcol) #endif return 1; } + else if (mi.mi_end == ptr) + /* Always include at least one character. Required for when there + * is a mixup in "midword". */ + mb_ptr_adv(mi.mi_end); if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) *attrp = highlight_attr[HLF_SPB]; @@ -906,6 +944,13 @@ spell_check(wp, ptr, attrp, capcol) *attrp = highlight_attr[HLF_SPL]; } + if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE)) + { + /* Report SpellCap only when the word isn't badly spelled. */ + *attrp = highlight_attr[HLF_SPC]; + return wrongcaplen; + } + return (int)(mi.mi_end - ptr); } @@ -1085,7 +1130,7 @@ find_word(mip, mode) #endif if (spell_iswordp(ptr + wlen, mip->mi_buf)) { - if (slang->sl_compflags == NULL) + if (slang->sl_compprog == NULL) continue; /* next char is a word character */ word_ends = FALSE; } @@ -1157,16 +1202,45 @@ find_word(mip, mode) if (mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND || !word_ends) { - /* Makes you wonder why someone puts a compound flag on a word + /* If there is no flag or the word is shorter than + * COMPOUNDMIN reject it quickly. + * Makes you wonder why someone puts a compound flag on a word * that's too short... Myspell compatibility requires this * anyway. */ - if (wlen < slang->sl_compminlen) + if (((unsigned)flags >> 24) == 0 || wlen < slang->sl_compminlen) + continue; + + /* Limit the number of compound words to COMPOUNDMAX. */ + if (!word_ends && mip->mi_complen + 2 > slang->sl_compmax) continue; - /* The word doesn't end or it comes after another: it must - * have a compound flag. */ - if (!can_compound(slang, flags)) + /* At start of word quickly check if compounding is possible + * with this flag. */ + if (mip->mi_complen == 0 + && vim_strchr(slang->sl_compstartflags, + ((unsigned)flags >> 24)) == NULL) continue; + + /* If the word ends the sequence of compound flags of the + * words must match with one of the COMPOUNDFLAGS items and + * the number of syllables must not be too large. */ + mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24); + mip->mi_compflags[mip->mi_complen + 1] = NUL; + if (word_ends) + { + char_u fword[MAXWLEN]; + + if (slang->sl_compsylmax < MAXWLEN) + { + /* "fword" is only needed for checking syllables. */ + if (ptr == mip->mi_word) + (void)spell_casefold(ptr, wlen, fword, MAXWLEN); + else + vim_strncpy(fword, ptr, endlen[endidxcnt]); + } + if (!can_compound(slang, fword, mip->mi_compflags)) + continue; + } } if (!word_ends) @@ -1193,13 +1267,17 @@ find_word(mip, mode) } } #endif + ++mip->mi_complen; find_word(mip, FIND_COMPOUND); + --mip->mi_complen; if (mip->mi_result == SP_OK) break; /* Find following word in keep-case tree. */ mip->mi_compoff = wlen; + ++mip->mi_complen; find_word(mip, FIND_KEEPCOMPOUND); + --mip->mi_complen; if (mip->mi_result == SP_OK) break; continue; @@ -1239,16 +1317,29 @@ find_word(mip, mode) } /* - * Return TRUE if "flags" has a valid compound flag. - * TODO: check flags in a more advanced way. + * Return TRUE if "flags" is a valid sequence of compound flags and + * "word[len]" does not have too many syllables. */ static int -can_compound(slang, flags) +can_compound(slang, word, flags) slang_T *slang; - int flags; + char_u *word; + char_u *flags; { - return slang->sl_compflags != NULL - && *slang->sl_compflags == ((unsigned)flags >> 24); + regmatch_T regmatch; + + if (slang->sl_compprog == NULL) + return FALSE; + regmatch.regprog = slang->sl_compprog; + regmatch.rm_ic = FALSE; + if (!vim_regexec(®match, flags, 0)) + return FALSE; + + /* Count the number of syllables. This may be slow, do it last. */ + if (slang->sl_compsylmax < MAXWLEN + && count_syllables(slang, word) > slang->sl_compsylmax) + return FALSE; + return TRUE; } /* @@ -1480,6 +1571,8 @@ no_spell_checking() /* * Move to next spell error. * "curline" is TRUE for "z?": find word under/after cursor in the same line. + * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move + * to after badly spelled word before the cursor. * Return OK if found, FAIL otherwise. */ int @@ -1599,6 +1692,10 @@ spell_move_to(dir, allwords, curline) vim_free(buf); return OK; } + else if (curline) + /* Insert mode completion: put cursor after + * the bad word. */ + found_pos.col += len; } } } @@ -1609,19 +1706,20 @@ spell_move_to(dir, allwords, curline) capcol -= len; } + if (dir == BACKWARD && found_pos.lnum != 0) + { + /* Use the last match in the line. */ + curwin->w_cursor = found_pos; + vim_free(buf); + return OK; + } + if (curline) break; /* only check cursor line */ /* Advance to next line. */ if (dir == BACKWARD) { - if (found_pos.lnum != 0) - { - /* Use the last match in the line. */ - curwin->w_cursor = found_pos; - vim_free(buf); - return OK; - } if (lnum == 1) break; --lnum; @@ -1715,8 +1813,8 @@ spell_load_lang(lang) } if (r == FAIL) - smsg((char_u *)_("Warning: Cannot find word list \"%s\""), - fname_enc + 6); + smsg((char_u *)_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""), + lang, spell_enc(), lang); else if (*langcp != NUL) { /* Load all the additions. */ @@ -1767,6 +1865,9 @@ slang_alloc(lang) { lp->sl_name = vim_strsave(lang); ga_init2(&lp->sl_rep, sizeof(fromto_T), 10); + lp->sl_compmax = MAXWLEN; + lp->sl_compminlen = MAXWLEN; + lp->sl_compsylmax = MAXWLEN; } return lp; } @@ -1853,8 +1954,14 @@ slang_clear(lp) vim_free(lp->sl_midword); lp->sl_midword = NULL; - vim_free(lp->sl_compflags); - lp->sl_compflags = NULL; + vim_free(lp->sl_compprog); + vim_free(lp->sl_compstartflags); + lp->sl_compprog = NULL; + lp->sl_compstartflags = NULL; + + vim_free(lp->sl_syllable); + lp->sl_syllable = NULL; + ga_clear(&lp->sl_syl_items); #ifdef FEAT_MBYTE { @@ -1870,6 +1977,11 @@ slang_clear(lp) } hash_clear(&lp->sl_map_hash); #endif + + lp->sl_compmax = MAXWLEN; + lp->sl_compminlen = MAXWLEN; + lp->sl_compsylmax = MAXWLEN; + lp->sl_regions[0] = NUL; } /* @@ -1902,7 +2014,7 @@ spell_load_file(fname, lang, old_lp, silent) int silent; /* no error if file doesn't exist */ { FILE *fd; - char_u buf[MAXWLEN + 1]; + char_u buf[VIMSPELLMAGICL]; char_u *p; char_u *bp; idx_T *ip; @@ -1912,15 +2024,10 @@ spell_load_file(fname, lang, old_lp, silent) int round; char_u *save_sourcing_name = sourcing_name; linenr_T save_sourcing_lnum = sourcing_lnum; - int cnt, ccnt; - char_u *fol; slang_T *lp = NULL; - garray_T *gap; - fromto_T *ftp; - salitem_T *smp; - short *first; idx_T idx; int c = 0; + int res; fd = mch_fopen((char *)fname, "r"); if (fd == NULL) @@ -1964,332 +2071,122 @@ spell_load_file(fname, lang, old_lp, silent) sourcing_lnum = 0; /* <HEADER>: <fileID> - * <regioncnt> <regionname> ... - * <charflagslen> <charflags> - * <fcharslen> <fchars> - * <midwordlen> <midword> - * <compoundlen> <compoundtype> <compoundinfo> - * <prefcondcnt> <prefcond> ... */ for (i = 0; i < VIMSPELLMAGICL; ++i) buf[i] = getc(fd); /* <fileID> */ if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) { - EMSG(_("E757: Wrong file ID in spell file")); + EMSG(_("E757: This does not look like a spell file")); goto endFAIL; } - - cnt = getc(fd); /* <regioncnt> */ - if (cnt < 0) + c = getc(fd); /* <versionnr> */ + if (c < VIMSPELLVERSION) { -truncerr: - EMSG(_(e_spell_trunc)); + EMSG(_("E771: Old spell file, needs to be updated")); goto endFAIL; } - if (cnt > 8) + else if (c > VIMSPELLVERSION) { -formerr: - EMSG(_(e_format)); + EMSG(_("E772: Spell file is for newer version of Vim")); goto endFAIL; } - for (i = 0; i < cnt; ++i) - { - lp->sl_regions[i * 2] = getc(fd); /* <regionname> */ - lp->sl_regions[i * 2 + 1] = getc(fd); - } - lp->sl_regions[cnt * 2] = NUL; - /* <charflagslen> <charflags> */ - p = read_cnt_string(fd, 1, &cnt); - if (cnt < 0) - goto endFAIL; - /* <fcharslen> <fchars> */ - fol = read_cnt_string(fd, 2, &ccnt); - if (ccnt < 0) + /* + * <SECTIONS>: <section> ... <sectionend> + * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) + */ + for (;;) { - vim_free(p); - goto endFAIL; - } - - /* Set the word-char flags and fill SPELL_ISUPPER() table. */ - if (p != NULL && fol != NULL) - i = set_spell_charflags(p, cnt, fol); - - vim_free(p); - vim_free(fol); - - /* When <charflagslen> is zero then <fcharlen> must also be zero. */ - if ((p == NULL) != (fol == NULL)) - goto formerr; - - /* <midwordlen> <midword> */ - lp->sl_midword = read_cnt_string(fd, 2, &cnt); - if (cnt < 0) - goto endFAIL; + n = getc(fd); /* <sectionID> or <sectionend> */ + if (n == SN_END) + break; + c = getc(fd); /* <sectionflags> */ + len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); + /* <sectionlen> */ + if (len < 0) + goto truncerr; - /* <compoundlen> <compoundtype> <compoundinfo> */ - cnt = (getc(fd) << 8) + getc(fd); /* <compoundlen> */ - if (cnt < 0) - goto endFAIL; - if (cnt > 0) - { - --cnt; - c = getc(fd); /* <compoundtype> */ - if (c != 1) + res = 0; + switch (n) { - /* Unknown kind of compound words, skip the info. */ - while (cnt-- > 0) - getc(fd); - } - else if (cnt < 2) - goto formerr; - else - { - --cnt; - c = getc(fd); /* <comp1minlen> */ - if (c < 1 || c > 50) - c = 3; - lp->sl_compminlen = c; + case SN_REGION: + res = read_region_section(fd, lp, len); + break; - p = alloc(cnt + 1); - if (p == NULL) - goto endFAIL; - lp->sl_compflags = p; - while (cnt-- > 0) - *p++ = getc(fd); /* <comp1flags> */ - *p = NUL; - } - } + case SN_CHARFLAGS: + res = read_charflags_section(fd); + break; + case SN_MIDWORD: + lp->sl_midword = read_string(fd, len); /* <midword> */ + if (lp->sl_midword == NULL) + goto endFAIL; + break; - /* <prefcondcnt> <prefcond> ... */ - cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */ - if (cnt > 0) - { - lp->sl_prefprog = (regprog_T **)alloc_clear( - (unsigned)sizeof(regprog_T *) * cnt); - if (lp->sl_prefprog == NULL) - goto endFAIL; - lp->sl_prefixcnt = cnt; + case SN_PREFCOND: + res = read_prefcond_section(fd, lp); + break; - for (i = 0; i < cnt; ++i) - { - /* <prefcond> : <condlen> <condstr> */ - n = getc(fd); /* <condlen> */ - if (n < 0 || n >= MAXWLEN) - goto formerr; - /* When <condlen> is zero we have an empty condition. Otherwise - * compile the regexp program used to check for the condition. */ - if (n > 0) - { - buf[0] = '^'; /* always match at one position only */ - p = buf + 1; - while (n-- > 0) - *p++ = getc(fd); /* <condstr> */ - *p = NUL; - lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); - } - } - } + case SN_REP: + res = read_rep_section(fd, lp); + break; + case SN_SAL: + res = read_sal_section(fd, lp); + break; - /* <SUGGEST> : <repcount> <rep> ... - * <salflags> <salcount> <sal> ... - * <maplen> <mapstr> */ + case SN_SOFO: + res = read_sofo_section(fd, lp); + break; - cnt = (getc(fd) << 8) + getc(fd); /* <repcount> */ - if (cnt < 0) - goto formerr; + case SN_MAP: + p = read_string(fd, len); /* <mapstr> */ + if (p == NULL) + goto endFAIL; + set_map_str(lp, p); + vim_free(p); + break; - gap = &lp->sl_rep; - if (ga_grow(gap, cnt) == FAIL) - goto endFAIL; + case SN_COMPOUND: + res = read_compound(fd, lp, len); + break; - /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ - for (; gap->ga_len < cnt; ++gap->ga_len) - { - ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; - ftp->ft_from = read_cnt_string(fd, 1, &i); - if (i <= 0) - goto endFAIL; - ftp->ft_to = read_cnt_string(fd, 1, &i); - if (i <= 0) + case SN_SYLLABLE: + lp->sl_syllable = read_string(fd, len); /* <syllable> */ + if (lp->sl_syllable == NULL) + goto endFAIL; + if (init_syl_tab(lp) == FAIL) + goto endFAIL; + break; + + default: + /* Unsupported section. When it's required give an error + * message. When it's not required skip the contents. */ + if (c & SNF_REQUIRED) + { + EMSG(_("E770: Unsupported section in spell file")); + goto endFAIL; + } + while (--len >= 0) + if (getc(fd) < 0) + goto truncerr; + break; + } + if (res == SP_FORMERROR) { - vim_free(ftp->ft_from); +formerr: + EMSG(_(e_format)); goto endFAIL; } - } - - /* Fill the first-index table. */ - first = lp->sl_rep_first; - for (i = 0; i < 256; ++i) - first[i] = -1; - for (i = 0; i < gap->ga_len; ++i) - { - ftp = &((fromto_T *)gap->ga_data)[i]; - if (first[*ftp->ft_from] == -1) - first[*ftp->ft_from] = i; - } - - i = getc(fd); /* <salflags> */ - if (i & SAL_F0LLOWUP) - lp->sl_followup = TRUE; - if (i & SAL_COLLAPSE) - lp->sl_collapse = TRUE; - if (i & SAL_REM_ACCENTS) - lp->sl_rem_accents = TRUE; - if (i & SAL_SOFO) - lp->sl_sofo = TRUE; - else - lp->sl_sofo = FALSE; - - cnt = (getc(fd) << 8) + getc(fd); /* <salcount> */ - if (cnt < 0) - goto formerr; - - if (lp->sl_sofo) - { - /* - * SOFOFROM and SOFOTO items come in one <salfrom> and <salto> - */ - if (cnt != 1) - goto formerr; - - /* <salfromlen> <salfrom> */ - bp = read_cnt_string(fd, 2, &cnt); - if (cnt < 0) - goto endFAIL; - - /* <saltolen> <salto> */ - fol = read_cnt_string(fd, 2, &cnt); - if (cnt < 0) + if (res == SP_TRUNCERROR) { - vim_free(bp); +truncerr: + EMSG(_(e_spell_trunc)); goto endFAIL; } - - /* Store the info in lp->sl_sal and/or lp->sl_sal_first. */ - if (bp != NULL && fol != NULL) - i = set_sofo(lp, bp, fol); - else if (bp != NULL || fol != NULL) - i = FAIL; /* only one of two strings is an error */ - else - i = OK; - - vim_free(bp); - vim_free(fol); - if (i == FAIL) - goto formerr; - } - else - { - /* - * SAL items - */ - gap = &lp->sl_sal; - ga_init2(gap, sizeof(salitem_T), 10); - if (ga_grow(gap, cnt) == FAIL) + if (res == SP_ERROR) goto endFAIL; - - /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ - for (; gap->ga_len < cnt; ++gap->ga_len) - { - smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; - ccnt = getc(fd); /* <salfromlen> */ - if (ccnt < 0) - goto formerr; - if ((p = alloc(ccnt + 2)) == NULL) - goto endFAIL; - smp->sm_lead = p; - - /* Read up to the first special char into sm_lead. */ - for (i = 0; i < ccnt; ++i) - { - c = getc(fd); /* <salfrom> */ - if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) - break; - *p++ = c; - } - smp->sm_leadlen = p - smp->sm_lead; - *p++ = NUL; - - /* Put (abc) chars in sm_oneof, if any. */ - if (c == '(') - { - smp->sm_oneof = p; - for (++i; i < ccnt; ++i) - { - c = getc(fd); /* <salfrom> */ - if (c == ')') - break; - *p++ = c; - } - *p++ = NUL; - if (++i < ccnt) - c = getc(fd); - } - else - smp->sm_oneof = NULL; - - /* Any following chars go in sm_rules. */ - smp->sm_rules = p; - if (i < ccnt) - /* store the char we got while checking for end of sm_lead */ - *p++ = c; - for (++i; i < ccnt; ++i) - *p++ = getc(fd); /* <salfrom> */ - *p++ = NUL; - - /* <saltolen> <salto> */ - smp->sm_to = read_cnt_string(fd, 1, &ccnt); - if (ccnt < 0) - { - vim_free(smp->sm_lead); - goto formerr; - } - -#ifdef FEAT_MBYTE - if (has_mbyte) - { - /* convert the multi-byte strings to wide char strings */ - smp->sm_lead_w = mb_str2wide(smp->sm_lead); - smp->sm_leadlen = mb_charlen(smp->sm_lead); - if (smp->sm_oneof == NULL) - smp->sm_oneof_w = NULL; - else - smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); - if (smp->sm_to == NULL) - smp->sm_to_w = NULL; - else - smp->sm_to_w = mb_str2wide(smp->sm_to); - if (smp->sm_lead_w == NULL - || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL) - || (smp->sm_to_w == NULL && smp->sm_to != NULL)) - { - vim_free(smp->sm_lead); - vim_free(smp->sm_to); - vim_free(smp->sm_lead_w); - vim_free(smp->sm_oneof_w); - vim_free(smp->sm_to_w); - goto endFAIL; - } - } -#endif - } - - /* Fill the first-index table. */ - set_sal_first(lp); - } - - /* <maplen> <mapstr> */ - p = read_cnt_string(fd, 2, &cnt); - if (cnt < 0) - goto endFAIL; - if (p != NULL) - { - set_map_str(lp, p); - vim_free(p); } /* round 1: <LWORDTREE> @@ -2367,7 +2264,8 @@ endOK: * Read a length field from "fd" in "cnt_bytes" bytes. * Allocate memory, read the string into it and add a NUL at the end. * Returns NULL when the count is zero. - * Sets "*cntp" to -1 when there is an error, length of the result otherwise. + * Sets "*cntp" to SP_*ERROR when there is an error, length of the result + * otherwise. */ static char_u * read_cnt_string(fd, cnt_bytes, cntp) @@ -2384,33 +2282,600 @@ read_cnt_string(fd, cnt_bytes, cntp) cnt = (cnt << 8) + getc(fd); if (cnt < 0) { - EMSG(_(e_spell_trunc)); - *cntp = -1; + *cntp = SP_TRUNCERROR; return NULL; } *cntp = cnt; if (cnt == 0) return NULL; /* nothing to read, return NULL */ + str = read_string(fd, cnt); + if (str == NULL) + *cntp = SP_ERROR; + return str; +} + +/* + * Read a string of length "cnt" from "fd" into allocated memory. + * Returns NULL when out of memory. + */ + static char_u * +read_string(fd, cnt) + FILE *fd; + int cnt; +{ + char_u *str; + int i; + /* allocate memory */ str = alloc((unsigned)cnt + 1); - if (str == NULL) + if (str != NULL) { - *cntp = -1; - return NULL; + /* Read the string. Doesn't check for truncated file. */ + for (i = 0; i < cnt; ++i) + str[i] = getc(fd); + str[i] = NUL; + } + return str; +} + +/* + * Read SN_REGION: <regionname> ... + * Return SP_*ERROR flags. + */ + static int +read_region_section(fd, lp, len) + FILE *fd; + slang_T *lp; + int len; +{ + int i; + + if (len > 16) + return SP_FORMERROR; + for (i = 0; i < len; ++i) + lp->sl_regions[i] = getc(fd); /* <regionname> */ + lp->sl_regions[len] = NUL; + return 0; +} + +/* + * Read SN_CHARFLAGS section: <charflagslen> <charflags> + * <folcharslen> <folchars> + * Return SP_*ERROR flags. + */ + static int +read_charflags_section(fd) + FILE *fd; +{ + char_u *flags; + char_u *fol; + int flagslen, follen; + + /* <charflagslen> <charflags> */ + flags = read_cnt_string(fd, 1, &flagslen); + if (flagslen < 0) + return flagslen; + + /* <folcharslen> <folchars> */ + fol = read_cnt_string(fd, 2, &follen); + if (follen < 0) + { + vim_free(flags); + return follen; } - /* Read the string. Doesn't check for truncated file. */ + /* Set the word-char flags and fill SPELL_ISUPPER() table. */ + if (flags != NULL && fol != NULL) + set_spell_charflags(flags, flagslen, fol); + + vim_free(flags); + vim_free(fol); + + /* When <charflagslen> is zero then <fcharlen> must also be zero. */ + if ((flags == NULL) != (fol == NULL)) + return SP_FORMERROR; + return 0; +} + +/* + * Read SN_PREFCOND section. + * Return SP_*ERROR flags. + */ + static int +read_prefcond_section(fd, lp) + FILE *fd; + slang_T *lp; +{ + int cnt; + int i; + int n; + char_u *p; + char_u buf[MAXWLEN + 1]; + + /* <prefcondcnt> <prefcond> ... */ + cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */ + if (cnt <= 0) + return SP_FORMERROR; + + lp->sl_prefprog = (regprog_T **)alloc_clear( + (unsigned)sizeof(regprog_T *) * cnt); + if (lp->sl_prefprog == NULL) + return SP_ERROR; + lp->sl_prefixcnt = cnt; + for (i = 0; i < cnt; ++i) - str[i] = getc(fd); - str[i] = NUL; + { + /* <prefcond> : <condlen> <condstr> */ + n = getc(fd); /* <condlen> */ + if (n < 0 || n >= MAXWLEN) + return SP_FORMERROR; + + /* When <condlen> is zero we have an empty condition. Otherwise + * compile the regexp program used to check for the condition. */ + if (n > 0) + { + buf[0] = '^'; /* always match at one position only */ + p = buf + 1; + while (n-- > 0) + *p++ = getc(fd); /* <condstr> */ + *p = NUL; + lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); + } + } + return 0; +} - return str; +/* + * Read REP items section from "fd": <repcount> <rep> ... + * Return SP_*ERROR flags. + */ + static int +read_rep_section(fd, slang) + FILE *fd; + slang_T *slang; +{ + int cnt; + garray_T *gap; + fromto_T *ftp; + short *first; + int i; + + cnt = (getc(fd) << 8) + getc(fd); /* <repcount> */ + if (cnt < 0) + return SP_TRUNCERROR; + + gap = &slang->sl_rep; + if (ga_grow(gap, cnt) == FAIL) + return SP_ERROR; + + /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ + for (; gap->ga_len < cnt; ++gap->ga_len) + { + ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; + ftp->ft_from = read_cnt_string(fd, 1, &i); + if (i < 0) + return i; + if (i == 0) + return SP_FORMERROR; + ftp->ft_to = read_cnt_string(fd, 1, &i); + if (i <= 0) + { + vim_free(ftp->ft_from); + if (i < 0) + return i; + return SP_FORMERROR; + } + } + + /* Fill the first-index table. */ + first = slang->sl_rep_first; + for (i = 0; i < 256; ++i) + first[i] = -1; + for (i = 0; i < gap->ga_len; ++i) + { + ftp = &((fromto_T *)gap->ga_data)[i]; + if (first[*ftp->ft_from] == -1) + first[*ftp->ft_from] = i; + } + return 0; +} + +/* + * Read SN_SAL section: <salflags> <salcount> <sal> ... + * Return SP_*ERROR flags. + */ + static int +read_sal_section(fd, slang) + FILE *fd; + slang_T *slang; +{ + int i; + int cnt; + garray_T *gap; + salitem_T *smp; + int ccnt; + char_u *p; + int c; + + slang->sl_sofo = FALSE; + + i = getc(fd); /* <salflags> */ + if (i & SAL_F0LLOWUP) + slang->sl_followup = TRUE; + if (i & SAL_COLLAPSE) + slang->sl_collapse = TRUE; + if (i & SAL_REM_ACCENTS) + slang->sl_rem_accents = TRUE; + + cnt = (getc(fd) << 8) + getc(fd); /* <salcount> */ + if (cnt < 0) + return SP_TRUNCERROR; + + gap = &slang->sl_sal; + ga_init2(gap, sizeof(salitem_T), 10); + if (ga_grow(gap, cnt) == FAIL) + return SP_ERROR; + + /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ + for (; gap->ga_len < cnt; ++gap->ga_len) + { + smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; + ccnt = getc(fd); /* <salfromlen> */ + if (ccnt < 0) + return SP_TRUNCERROR; + if ((p = alloc(ccnt + 2)) == NULL) + return SP_ERROR; + smp->sm_lead = p; + + /* Read up to the first special char into sm_lead. */ + for (i = 0; i < ccnt; ++i) + { + c = getc(fd); /* <salfrom> */ + if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) + break; + *p++ = c; + } + smp->sm_leadlen = p - smp->sm_lead; + *p++ = NUL; + + /* Put (abc) chars in sm_oneof, if any. */ + if (c == '(') + { + smp->sm_oneof = p; + for (++i; i < ccnt; ++i) + { + c = getc(fd); /* <salfrom> */ + if (c == ')') + break; + *p++ = c; + } + *p++ = NUL; + if (++i < ccnt) + c = getc(fd); + } + else + smp->sm_oneof = NULL; + + /* Any following chars go in sm_rules. */ + smp->sm_rules = p; + if (i < ccnt) + /* store the char we got while checking for end of sm_lead */ + *p++ = c; + for (++i; i < ccnt; ++i) + *p++ = getc(fd); /* <salfrom> */ + *p++ = NUL; + + /* <saltolen> <salto> */ + smp->sm_to = read_cnt_string(fd, 1, &ccnt); + if (ccnt < 0) + { + vim_free(smp->sm_lead); + return ccnt; + } + +#ifdef FEAT_MBYTE + if (has_mbyte) + { + /* convert the multi-byte strings to wide char strings */ + smp->sm_lead_w = mb_str2wide(smp->sm_lead); + smp->sm_leadlen = mb_charlen(smp->sm_lead); + if (smp->sm_oneof == NULL) + smp->sm_oneof_w = NULL; + else + smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); + if (smp->sm_to == NULL) + smp->sm_to_w = NULL; + else + smp->sm_to_w = mb_str2wide(smp->sm_to); + if (smp->sm_lead_w == NULL + || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL) + || (smp->sm_to_w == NULL && smp->sm_to != NULL)) + { + vim_free(smp->sm_lead); + vim_free(smp->sm_to); + vim_free(smp->sm_lead_w); + vim_free(smp->sm_oneof_w); + vim_free(smp->sm_to_w); + return SP_ERROR; + } + } +#endif + } + + /* Fill the first-index table. */ + set_sal_first(slang); + + return 0; +} + +/* + * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> + * Return SP_*ERROR flags. + */ + static int +read_sofo_section(fd, slang) + FILE *fd; + slang_T *slang; +{ + int cnt; + char_u *from, *to; + int res; + + slang->sl_sofo = TRUE; + + /* <sofofromlen> <sofofrom> */ + from = read_cnt_string(fd, 2, &cnt); + if (cnt < 0) + return cnt; + + /* <sofotolen> <sofoto> */ + to = read_cnt_string(fd, 2, &cnt); + if (cnt < 0) + { + vim_free(from); + return cnt; + } + + /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */ + if (from != NULL && to != NULL) + res = set_sofo(slang, from, to); + else if (from != NULL || to != NULL) + res = SP_FORMERROR; /* only one of two strings is an error */ + else + res = 0; + + vim_free(from); + vim_free(to); + return res; +} + +/* + * Read the compound section from the .spl file: + * <compmax> <compminlen> <compsylmax> <compflags> + * Returns SP_*ERROR flags. + */ + static int +read_compound(fd, slang, len) + FILE *fd; + slang_T *slang; + int len; +{ + int todo = len; + int c; + int atstart; + char_u *pat; + char_u *pp; + char_u *cp; + + if (todo < 2) + return SP_FORMERROR; /* need at least two bytes */ + + --todo; + c = getc(fd); /* <compmax> */ + if (c < 2) + c = MAXWLEN; + slang->sl_compmax = c; + + --todo; + c = getc(fd); /* <compminlen> */ + if (c < 1) + c = 3; + slang->sl_compminlen = c; + + --todo; + c = getc(fd); /* <compsylmax> */ + if (c < 1) + c = MAXWLEN; + slang->sl_compsylmax = c; + + /* Turn the COMPOUNDFLAGS items into a regexp pattern: + * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". + * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. */ + pat = alloc((unsigned)todo * 2 + 7); + if (pat == NULL) + return SP_ERROR; + + /* We also need a list of all flags that can appear at the start. */ + cp = alloc(todo + 1); + if (cp == NULL) + { + vim_free(pat); + return SP_ERROR; + } + slang->sl_compstartflags = cp; + *cp = NUL; + + pp = pat; + *pp++ = '^'; + *pp++ = '\\'; + *pp++ = '('; + + atstart = 1; + while (todo-- > 0) + { + c = getc(fd); /* <compflags> */ + if (atstart != 0) + { + /* At start of item: copy flags to "sl_compstartflags". For a + * [abc] item set "atstart" to 2 and copy up to the ']'. */ + if (c == '[') + atstart = 2; + else if (c == ']') + atstart = 0; + else + { + if (vim_strchr(slang->sl_compstartflags, c) == NULL) + { + *cp++ = c; + *cp = NUL; + } + if (atstart == 1) + atstart = 0; + } + } + if (c == '/') /* slash separates two items */ + { + *pp++ = '\\'; + *pp++ = '|'; + atstart = 1; + } + else /* normal char, "[abc]" and '*' are copied as-is */ + { + if (c == '+') + *pp++ = '\\'; /* "a+" becomes "a\+" */ + *pp++ = c; + } + } + + *pp++ = '\\'; + *pp++ = ')'; + *pp++ = '$'; + *pp = NUL; + + slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); + vim_free(pat); + if (slang->sl_compprog == NULL) + return SP_FORMERROR; + + return 0; +} + +#define SY_MAXLEN 30 +typedef struct syl_item_S +{ + char_u sy_chars[SY_MAXLEN]; /* the sequence of chars */ + int sy_len; +} syl_item_T; + +/* + * Truncate "slang->sl_syllable" at the first slash and put the following items + * in "slang->sl_syl_items". + */ + static int +init_syl_tab(slang) + slang_T *slang; +{ + char_u *p; + char_u *s; + int l; + syl_item_T *syl; + + ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4); + p = vim_strchr(slang->sl_syllable, '/'); + while (p != NULL) + { + *p++ = NUL; + if (p == NUL) + break; + s = p; + p = vim_strchr(p, '/'); + if (p == NULL) + l = STRLEN(s); + else + l = p - s; + if (l >= SY_MAXLEN) + return SP_FORMERROR; + if (ga_grow(&slang->sl_syl_items, 1) == FAIL) + return SP_ERROR; + syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + + slang->sl_syl_items.ga_len++; + vim_strncpy(syl->sy_chars, s, l); + syl->sy_len = l; + } + return OK; +} + +/* + * Count the number of syllables in "word". + * When "word" contains spaces the syllables after the last space are counted. + * Returns zero if syllables are not defines. + */ + static int +count_syllables(slang, word) + slang_T *slang; + char_u *word; +{ + int cnt = 0; + int skip = FALSE; + char_u *p; + int len; + int i; + syl_item_T *syl; + int c; + + if (slang->sl_syllable == NULL) + return 0; + + for (p = word; *p != NUL; p += len) + { + /* When running into a space reset counter. */ + if (*p == ' ') + { + len = 1; + cnt = 0; + continue; + } + + /* Find longest match of syllable items. */ + len = 0; + for (i = 0; i < slang->sl_syl_items.ga_len; ++i) + { + syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i; + if (syl->sy_len > len + && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0) + len = syl->sy_len; + } + if (len != 0) /* found a match, count syllable */ + { + ++cnt; + skip = FALSE; + } + else + { + /* No recognized syllable item, at least a syllable char then? */ +#ifdef FEAT_MBYTE + c = mb_ptr2char(p); + len = (*mb_ptr2len)(p); +#else + c = *p; + len = 1; +#endif + if (vim_strchr(slang->sl_syllable, c) == NULL) + skip = FALSE; /* No, search for next syllable */ + else if (!skip) + { + ++cnt; /* Yes, count it */ + skip = TRUE; /* don't count following syllable chars */ + } + } + } + return cnt; } /* * Set the SOFOFROM and SOFOTO items in language "lp". - * Returns FAIL when there is something wrong. + * Returns SP_*ERROR flags when there is something wrong. */ static int set_sofo(lp, from, to) @@ -2436,7 +2901,7 @@ set_sofo(lp, from, to) gap = &lp->sl_sal; ga_init2(gap, sizeof(int *), 1); if (ga_grow(gap, 256) == FAIL) - return FAIL; + return SP_ERROR; vim_memset(gap->ga_data, 0, sizeof(int *) * 256); gap->ga_len = 256; @@ -2450,7 +2915,7 @@ set_sofo(lp, from, to) ++lp->sl_sal_first[c & 0xff]; } if (*p != NUL || *s != NUL) /* lengths differ */ - return FAIL; + return SP_FORMERROR; /* Allocate the lists. */ for (i = 0; i < 256; ++i) @@ -2458,7 +2923,7 @@ set_sofo(lp, from, to) { p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); if (p == NULL) - return FAIL; + return SP_ERROR; ((int **)gap->ga_data)[i] = (int *)p; *(int *)p = 0; } @@ -2491,14 +2956,14 @@ set_sofo(lp, from, to) { /* mapping bytes to bytes is done in sl_sal_first[] */ if (STRLEN(from) != STRLEN(to)) - return FAIL; + return SP_FORMERROR; for (i = 0; to[i] != NUL; ++i) lp->sl_sal_first[from[i]] = to[i]; lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */ } - return OK; + return 0; } /* @@ -3248,8 +3713,12 @@ typedef struct afffile_S int af_rar; /* RAR ID for rare word */ int af_kep; /* KEP ID for keep-case word */ int af_bad; /* BAD ID for banned word */ - char_u *af_compflags; /* COMPOUNDFLAG or COMPOUNDFLAGS */ + int af_needaffix; /* NEEDAFFIX ID */ + char_u *af_compflags; /* COMPOUNDFLAG and COMPOUNDFLAGS concat'ed */ + int af_compmax; /* COMPOUNDMAX */ int af_compminlen; /* COMPOUNDMIN */ + int af_compsylmax; /* COMPOUNDSYLMAX */ + char_u *af_syllable; /* SYLLABLE */ int af_pfxpostpone; /* postpone prefixes without chop string */ hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ @@ -3264,7 +3733,8 @@ struct affentry_S char_u *ae_add; /* text to add to basic word (can be NULL) */ char_u *ae_cond; /* condition (NULL for ".") */ regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ - int ae_rare; /* rare affix */ + char_u ae_rare; /* rare affix */ + char_u ae_nocomp; /* word with affix not compoundable */ }; #define AH_KEY_LEN 10 @@ -3373,7 +3843,8 @@ typedef struct spellinfo_S int si_msg_count; /* number of words added since last message */ int si_region_count; /* number of regions supported (1 when there are no regions) */ - char_u si_region_name[16]; /* region names (if count > 1) */ + char_u si_region_name[16]; /* region names; used only if + * si_region_count > 1) */ garray_T si_rep; /* list of fromto_T entries from REP lines */ garray_T si_sal; /* list of fromto_T entries from SAL lines */ @@ -3384,8 +3855,11 @@ typedef struct spellinfo_S int si_rem_accents; /* soundsalike: remove accents */ garray_T si_map; /* MAP info concatenated */ char_u *si_midword; /* MIDWORD chars, alloc'ed string or NULL */ + int si_compmax; /* max nr of words for compounding */ int si_compminlen; /* minimal length for compounding */ + int si_compsylmax; /* max nr of syllables for compounding */ char_u *si_compflags; /* flags used for compounding */ + char_u *si_syllable; /* syllable string */ garray_T si_prefcond; /* table with conditions for postponed * prefixes, each stored as a string */ int si_newID; /* current value for ah_newID */ @@ -3398,15 +3872,15 @@ static int sal_to_bool __ARGS((char_u *s)); static int has_non_ascii __ARGS((char_u *s)); static void spell_free_aff __ARGS((afffile_T *aff)); static int spell_read_dic __ARGS((spellinfo_T *spin, char_u *fname, afffile_T *affile)); -static char_u *get_pfxlist __ARGS((spellinfo_T *spin, afffile_T *affile, char_u *afflist)); -static char_u *get_compflags __ARGS((spellinfo_T *spin, char_u *afflist)); -static int store_aff_word __ARGS((spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int comb, int flags, char_u *pfxlist)); +static int get_pfxlist __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist)); +static void get_compflags __ARGS((spellinfo_T *spin, char_u *afflist, char_u *store_afflist)); +static int store_aff_word __ARGS((spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int comb, int flags, char_u *pfxlist, int pfxlen)); static int spell_read_wordfile __ARGS((spellinfo_T *spin, char_u *fname)); static void *getroom __ARGS((spellinfo_T *spin, size_t len, int align)); static char_u *getroom_save __ARGS((spellinfo_T *spin, char_u *s)); static void free_blocks __ARGS((sblock_T *bl)); static wordnode_T *wordtree_alloc __ARGS((spellinfo_T *spin)); -static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist)); +static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix)); static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID)); static wordnode_T *get_wordnode __ARGS((spellinfo_T *spin)); static void deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node)); @@ -3425,6 +3899,13 @@ static void init_spellfile __ARGS((void)); * Use a negative number with the lower 8 bits zero. */ #define PFX_FLAGS -256 +/* + * Tunable parameters for when the tree is compressed. See 'mkspellmem'. + */ +static long compress_start = 30000; /* memory / SBLOCKSIZE */ +static long compress_inc = 100; /* memory / SBLOCKSIZE */ +static long compress_added = 500000; /* word count */ + #ifdef SPELL_PRINTTREE /* * For debugging the tree code: print the current tree in a (more or less) @@ -3718,17 +4199,55 @@ spell_read_aff(spin, fname) if (items[1][1] != NUL) smsg((char_u *)_(e_affname), fname, lnum, items[1]); } + else if (STRCMP(items[0], "NEEDAFFIX") == 0 && itemcnt == 2 + && aff->af_needaffix == 0) + { + aff->af_needaffix = items[1][0]; + if (items[1][1] != NUL) + smsg((char_u *)_(e_affname), fname, lnum, items[1]); + } else if (STRCMP(items[0], "COMPOUNDFLAG") == 0 && itemcnt == 2 - && aff->af_compflags == 0) + && aff->af_compflags == NULL) { - aff->af_compflags = getroom_save(spin, items[1]); + p = getroom(spin, 3, FALSE); + if (p != NULL) + { + /* Turn single flag "c" into COMPOUNDFLAGS compatible + * string "c+". */ + p[0] = items[1][0]; + p[1] = '+'; + p[2] = NUL; + aff->af_compflags = p; + } if (items[1][1] != NUL) smsg((char_u *)_(e_affname), fname, lnum, items[1]); } - else if (STRCMP(items[0], "COMPOUNDFLAGS") == 0 && itemcnt == 2 - && aff->af_compflags == 0) + else if (STRCMP(items[0], "COMPOUNDFLAGS") == 0 && itemcnt == 2) + { + /* Concatenate this string to previously defined ones, using a + * slash to separate them. */ + l = STRLEN(items[1]) + 1; + if (aff->af_compflags != NULL) + l += STRLEN(aff->af_compflags) + 1; + p = getroom(spin, l, FALSE); + if (p != NULL) + { + if (aff->af_compflags != NULL) + { + STRCPY(p, aff->af_compflags); + STRCAT(p, "/"); + } + STRCAT(p, items[1]); + aff->af_compflags = p; + } + } + else if (STRCMP(items[0], "COMPOUNDMAX") == 0 && itemcnt == 2 + && aff->af_compmax == 0) { - aff->af_compflags = getroom_save(spin, items[1]); + aff->af_compmax = atoi((char *)items[1]); + if (aff->af_compmax == 0) + smsg((char_u *)_("Wrong COMPOUNDMAX value in %s line %d: %s"), + fname, lnum, items[1]); } else if (STRCMP(items[0], "COMPOUNDMIN") == 0 && itemcnt == 2 && aff->af_compminlen == 0) @@ -3738,6 +4257,19 @@ spell_read_aff(spin, fname) smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"), fname, lnum, items[1]); } + else if (STRCMP(items[0], "COMPOUNDSYLMAX") == 0 && itemcnt == 2 + && aff->af_compsylmax == 0) + { + aff->af_compsylmax = atoi((char *)items[1]); + if (aff->af_compsylmax == 0) + smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), + fname, lnum, items[1]); + } + else if (STRCMP(items[0], "SYLLABLE") == 0 && itemcnt == 2 + && aff->af_syllable == NULL) + { + aff->af_syllable = getroom_save(spin, items[1]); + } else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) { aff->af_pfxpostpone = TRUE; @@ -3811,14 +4343,25 @@ spell_read_aff(spin, fname) { affentry_T *aff_entry; int rare = FALSE; + int nocomp = FALSE; int upper = FALSE; int lasti = 5; - /* Check for "rare" after the other info. */ - if (itemcnt > 5 && STRICMP(items[5], "rare") == 0) + /* Check for "rare" and "nocomp" after the other info. */ + while (itemcnt > lasti) { - rare = TRUE; - lasti = 6; + if (!rare && STRICMP(items[lasti], "rare") == 0) + { + rare = TRUE; + ++lasti; + } + else if (!nocomp && STRICMP(items[lasti], "nocomp") == 0) + { + nocomp = TRUE; + ++lasti; + } + else + break; } /* Myspell allows extra text after the item, but that might @@ -3833,6 +4376,7 @@ spell_read_aff(spin, fname) if (aff_entry == NULL) break; aff_entry->ae_rare = rare; + aff_entry->ae_nocomp = nocomp; if (STRCMP(items[2], "0") != 0) aff_entry->ae_chop = getroom_save(spin, items[2]); @@ -4129,6 +4673,14 @@ spell_read_aff(spin, fname) } /* Use compound specifications of the .aff file for the spell info. */ + if (aff->af_compmax != 0) + { + if (spin->si_compmax != 0 && spin->si_compmax != aff->af_compmax) + smsg((char_u *)_("COMPOUNDMAX value differs from what is used in another .aff file")); + else + spin->si_compmax = aff->af_compmax; + } + if (aff->af_compminlen != 0) { if (spin->si_compminlen != 0 @@ -4138,6 +4690,18 @@ spell_read_aff(spin, fname) spin->si_compminlen = aff->af_compminlen; } + if (aff->af_compsylmax != 0) + { + if (aff->af_syllable == NULL) + smsg((char_u *)_("COMPOUNDSYLMAX without SYLLABLE")); + + if (spin->si_compsylmax != 0 + && spin->si_compsylmax != aff->af_compsylmax) + smsg((char_u *)_("COMPOUNDSYLMAX value differs from what is used in another .aff file")); + else + spin->si_compsylmax = aff->af_compsylmax; + } + if (aff->af_compflags != NULL) { if (spin->si_compflags != NULL @@ -4147,6 +4711,15 @@ spell_read_aff(spin, fname) spin->si_compflags = aff->af_compflags; } + if (aff->af_syllable != NULL) + { + if (spin->si_syllable != NULL + && STRCMP(spin->si_syllable, aff->af_syllable) != 0) + smsg((char_u *)_("SYLLABLE value differs from what is used in another .aff file")); + else + spin->si_syllable = aff->af_syllable; + } + vim_free(pc); fclose(fd); return aff; @@ -4269,7 +4842,9 @@ spell_read_dic(spin, fname, affile) char_u line[MAXLINELEN]; char_u *p; char_u *afflist; - char_u *store_afflist; + char_u store_afflist[MAXWLEN]; + int pfxlen; + int need_affix; char_u *dw; char_u *pc; char_u *w; @@ -4417,7 +4992,9 @@ spell_read_dic(spin, fname, affile) hash_add_item(&ht, hi, dw, hash); flags = 0; - store_afflist = NULL; + store_afflist[0] = NUL; + pfxlen = 0; + need_affix = FALSE; if (afflist != NULL) { /* Check for affix name that stands for keep-case word and stands @@ -4431,40 +5008,23 @@ spell_read_dic(spin, fname, affile) if (affile->af_bad != NUL && vim_strchr(afflist, affile->af_bad) != NULL) flags |= WF_BANNED; + if (affile->af_needaffix != NUL + && vim_strchr(afflist, affile->af_needaffix) != NULL) + need_affix = TRUE; if (affile->af_pfxpostpone) /* Need to store the list of prefix IDs with the word. */ - store_afflist = get_pfxlist(spin, affile, afflist); + pfxlen = get_pfxlist(affile, afflist, store_afflist); - if (spin->si_compflags) - { - /* Need to store the list of compound flags with the word. */ - p = get_compflags(spin, afflist); - if (p != NULL) - { - if (store_afflist != NULL) - { - char_u *s; - - /* Concatenate the prefix IDs with the compound flags. - */ - s = getroom(spin, STRLEN(store_afflist) - + STRLEN(p) + 1, FALSE); - if (s != NULL) - { - STRCPY(s, store_afflist); - STRCAT(s, p); - store_afflist = s; - } - } - else - store_afflist = p; - } - } + if (spin->si_compflags != NULL) + /* Need to store the list of compound flags with the word. + * Concatenate them to the list of prefix IDs. */ + get_compflags(spin, afflist, store_afflist + pfxlen); } /* Add the word to the word tree(s). */ - if (store_word(spin, dw, flags, spin->si_region, store_afflist) == FAIL) + if (store_word(spin, dw, flags, spin->si_region, + store_afflist, need_affix) == FAIL) retval = FAIL; if (afflist != NULL) @@ -4473,13 +5033,13 @@ spell_read_dic(spin, fname, affile) * Additionally do matching prefixes that combine. */ if (store_aff_word(spin, dw, afflist, affile, &affile->af_suff, &affile->af_pref, - FALSE, flags, store_afflist) == FAIL) + FALSE, flags, store_afflist, pfxlen) == FAIL) retval = FAIL; /* Find all matching prefixes and add the resulting words. */ if (store_aff_word(spin, dw, afflist, affile, &affile->af_pref, NULL, - FALSE, flags, store_afflist) == FAIL) + FALSE, flags, store_afflist, pfxlen) == FAIL) retval = FAIL; } } @@ -4498,93 +5058,56 @@ spell_read_dic(spin, fname, affile) /* * Get the list of prefix IDs from the affix list "afflist". * Used for PFXPOSTPONE. - * Returns a string allocated with getroom(). NULL when there are no prefixes - * or when out of memory. + * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL + * and return the number of affixes. */ - static char_u * -get_pfxlist(spin, affile, afflist) - spellinfo_T *spin; + static int +get_pfxlist(affile, afflist, store_afflist) afffile_T *affile; char_u *afflist; + char_u *store_afflist; { char_u *p; - int cnt; - int round; - char_u *res = NULL; + int cnt = 0; char_u key[2]; hashitem_T *hi; key[1] = NUL; - - /* round 1: count the number of prefix IDs. - * round 2: move prefix IDs to "res" */ - for (round = 1; round <= 2; ++round) + for (p = afflist; *p != NUL; ++p) { - cnt = 0; - for (p = afflist; *p != NUL; ++p) - { - key[0] = *p; - hi = hash_find(&affile->af_pref, key); - if (!HASHITEM_EMPTY(hi)) - { - /* This is a prefix ID, use the new number. */ - if (round == 2) - res[cnt] = HI2AH(hi)->ah_newID; - ++cnt; - } - } - if (round == 1 && cnt > 0) - res = getroom(spin, cnt + 1, FALSE); - if (res == NULL) - break; + key[0] = *p; + hi = hash_find(&affile->af_pref, key); + if (!HASHITEM_EMPTY(hi)) + /* This is a prefix ID, use the new number. */ + store_afflist[cnt++] = HI2AH(hi)->ah_newID; } - if (res != NULL) - res[cnt] = NUL; - return res; + store_afflist[cnt] = NUL; + return cnt; } /* * Get the list of affix IDs from the affix list "afflist" that are used for * compound words. - * Returns a string allocated with getroom(). NULL when there are no relevant - * affixes or when out of memory. + * Puts the flags in "store_afflist[]". */ - static char_u * -get_compflags(spin, afflist) + static void +get_compflags(spin, afflist, store_afflist) spellinfo_T *spin; char_u *afflist; + char_u *store_afflist; { char_u *p; - int cnt; - int round; - char_u *res = NULL; + int cnt = 0; - /* round 1: count the number of affix IDs. - * round 2: move affix IDs to "res" */ - for (round = 1; round <= 2; ++round) - { - cnt = 0; - for (p = afflist; *p != NUL; ++p) - { - if (*p != ',' && *p != '-' - && vim_strchr(spin->si_compflags, *p) != NULL) - { - /* This is a compount affix ID. */ - if (round == 2) - res[cnt] = *p; - ++cnt; - } - } - if (round == 1 && cnt > 0) - res = getroom(spin, cnt + 1, FALSE); - if (res == NULL) - break; - } + for (p = afflist; *p != NUL; ++p) + /* A flag is a compound flag if it appears in "si_compflags" and + * it's not a special character. */ + if (vim_strchr(spin->si_compflags, *p) != NULL + && vim_strchr((char_u *)"+*[]/", *p) == NULL) + store_afflist[cnt++] = *p; - if (res != NULL) - res[cnt] = NUL; - return res; + store_afflist[cnt] = NUL; } /* @@ -4597,7 +5120,8 @@ get_compflags(spin, afflist) * Returns FAIL when out of memory. */ static int -store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags, pfxlist) +store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags, + pfxlist, pfxlen) spellinfo_T *spin; /* spell info */ char_u *word; /* basic word start */ char_u *afflist; /* list of names of supported affixes */ @@ -4607,6 +5131,8 @@ store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags, pfxlist) int comb; /* only use affixes that combine */ int flags; /* flags for the word */ char_u *pfxlist; /* list of prefix IDs */ + int pfxlen; /* nr of flags in "pfxlist" for prefixes, rest + * is compound flags */ { int todo; hashitem_T *hi; @@ -4619,8 +5145,9 @@ store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags, pfxlist) char_u *p; int use_flags; char_u *use_pfxlist; + char_u pfx_pfxlist[MAXWLEN]; int c; - int wordlen = STRLEN(word); + size_t wordlen = STRLEN(word); todo = ht->ht_used; for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) @@ -4701,7 +5228,15 @@ store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags, pfxlist) use_flags = flags | WF_RARE; else use_flags = flags; + + /* Obey the "nocomp" flag of the affix: don't use the + * compound flags. */ use_pfxlist = pfxlist; + if (ae->ae_nocomp && pfxlist != NULL) + { + vim_strncpy(pfx_pfxlist, pfxlist, pfxlen); + use_pfxlist = pfx_pfxlist; + } /* When there are postponed prefixes... */ if (spin->si_prefroot != NULL @@ -4711,14 +5246,15 @@ store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags, pfxlist) use_flags |= WF_HAS_AFF; /* ... don't use a prefix list if combining - * affixes is not allowed */ - if (!ah->ah_combine || comb) - use_pfxlist = NULL; + * affixes is not allowed. But do use the + * compound flags after them. */ + if ((!ah->ah_combine || comb) && pfxlist != NULL) + use_pfxlist += pfxlen; } /* Store the modified word. */ if (store_word(spin, newword, use_flags, - spin->si_region, use_pfxlist) == FAIL) + spin->si_region, use_pfxlist, FALSE) == FAIL) retval = FAIL; /* When added a suffix and combining is allowed also @@ -4726,7 +5262,7 @@ store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags, pfxlist) if (xht != NULL && ah->ah_combine) if (store_aff_word(spin, newword, afflist, affile, xht, NULL, TRUE, - use_flags, use_pfxlist) == FAIL) + use_flags, use_pfxlist, pfxlen) == FAIL) retval = FAIL; } } @@ -4929,7 +5465,7 @@ spell_read_wordfile(spin, fname) } /* Normal word: store it. */ - if (store_word(spin, line, flags, regionmask, NULL) == FAIL) + if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL) { retval = FAIL; break; @@ -5046,12 +5582,13 @@ wordtree_alloc(spin) * compound flag. */ static int -store_word(spin, word, flags, region, pfxlist) +store_word(spin, word, flags, region, pfxlist, need_affix) spellinfo_T *spin; char_u *word; int flags; /* extra flags, WF_BANNED */ int region; /* supported region(s) */ char_u *pfxlist; /* list of prefix IDs or NULL */ + int need_affix; /* only store word with affix ID */ { int len = STRLEN(word); int ct = captype(word, word + len); @@ -5062,7 +5599,8 @@ store_word(spin, word, flags, region, pfxlist) (void)spell_casefold(word, len, foldword, MAXWLEN); for (p = pfxlist; res == OK; ++p) { - res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, + if (!need_affix || (p != NULL && *p != NUL)) + res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, region, p == NULL ? 0 : *p); if (p == NULL || *p == NUL) break; @@ -5073,7 +5611,8 @@ store_word(spin, word, flags, region, pfxlist) { for (p = pfxlist; res == OK; ++p) { - res = tree_add_word(spin, word, spin->si_keeproot, flags, + if (!need_affix || (p != NULL && *p != NUL)) + res = tree_add_word(spin, word, spin->si_keeproot, flags, region, p == NULL ? 0 : *p); if (p == NULL || *p == NUL) break; @@ -5209,7 +5748,7 @@ tree_add_word(spin, word, root, flags, region, affixID) { if (--spin->si_compress_cnt == 1) /* Did enough words to lower the block count limit. */ - spin->si_blocks_cnt += SPELL_COMPRESS_INC; + spin->si_blocks_cnt += compress_inc; } /* @@ -5217,25 +5756,25 @@ tree_add_word(spin, word, root, flags, region, affixID) * to free up some room. But compression is slow, and we might actually * need that room, thus only compress in the following situations: * 1. When not compressed before (si_compress_cnt == 0): when using - * SPELL_COMPRESS_CNT blocks. - * 2. When compressed before and used SPELL_COMPRESS_INC blocks before - * adding SPELL_COMPRESS_ADDED words (si_compress_cnt > 1). - * 3. When compressed before, added SPELL_COMPRESS_ADDED words + * "compress_start" blocks. + * 2. When compressed before and used "compress_inc" blocks before + * adding "compress_added" words (si_compress_cnt > 1). + * 3. When compressed before, added "compress_added" words * (si_compress_cnt == 1) and the number of free nodes drops below the * maximum word length. */ #ifndef SPELL_PRINTTREE if (spin->si_compress_cnt == 1 ? spin->si_free_count < MAXWLEN - : spin->si_blocks_cnt >= SPELL_COMPRESS_CNT) + : spin->si_blocks_cnt >= compress_start) #endif { /* Decrement the block counter. The effect is that we compress again - * when the freed up room has been used and another SPELL_COMPRESS_INC - * blocks have been allocated. Unless SPELL_COMPRESS_ADDED words have + * when the freed up room has been used and another "compress_inc" + * blocks have been allocated. Unless "compress_added" words have * been added, then the limit is put back again. */ - spin->si_blocks_cnt -= SPELL_COMPRESS_INC; - spin->si_compress_cnt = SPELL_COMPRESS_ADDED; + spin->si_blocks_cnt -= compress_inc; + spin->si_compress_cnt = compress_added; if (spin->si_verbose) { @@ -5258,6 +5797,47 @@ tree_add_word(spin, word, root, flags, region, affixID) } /* + * Check the 'mkspellmem' option. Return FAIL if it's wrong. + * Sets "sps_flags". + */ + int +spell_check_msm() +{ + char_u *p = p_msm; + long start = 0; + long inc = 0; + long added = 0; + + if (!VIM_ISDIGIT(*p)) + return FAIL; + /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/ + start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102); + if (*p != ',') + return FAIL; + ++p; + if (!VIM_ISDIGIT(*p)) + return FAIL; + inc = (getdigits(&p) * 102) / (SBLOCKSIZE / 10); + if (*p != ',') + return FAIL; + ++p; + if (!VIM_ISDIGIT(*p)) + return FAIL; + added = getdigits(&p) * 1024; + if (*p != NUL) + return FAIL; + + if (start == 0 || inc == 0 || added == 0 || inc > start) + return FAIL; + + compress_start = start; + compress_inc = inc; + compress_added = added; + return OK; +} + + +/* * Get a wordnode_T, either from the list of previously freed nodes or * allocate a new one. */ @@ -5532,7 +6112,7 @@ rep_compare(s1, s2) } /* - * Write the Vim spell file "fname". + * Write the Vim .spl file "fname". */ static void write_vim_spell(spin, fname) @@ -5558,94 +6138,142 @@ write_vim_spell(spin, fname) return; } - /* <HEADER>: <fileID> <regioncnt> <regionname> ... - * <charflagslen> <charflags> - * <fcharslen> <fchars> - * <midwordlen> <midword> - * <compoundlen> <compoundtype> <compoundinfo> - * <prefcondcnt> <prefcond> ... */ - + /* <HEADER>: <fileID> <versionnr> */ /* <fileID> */ if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1) EMSG(_(e_write)); + putc(VIMSPELLVERSION, fd); /* <versionnr> */ - /* write the region names if there is more than one */ + /* + * <SECTIONS>: <section> ... <sectionend> + */ + + /* SN_REGION: <regionname> ... + * Write the region names only if there is more than one. */ if (spin->si_region_count > 1) { - putc(spin->si_region_count, fd); /* <regioncnt> <regionname> ... */ - fwrite(spin->si_region_name, (size_t)(spin->si_region_count * 2), - (size_t)1, fd); + putc(SN_REGION, fd); /* <sectionID> */ + putc(SNF_REQUIRED, fd); /* <sectionflags> */ + l = spin->si_region_count * 2; + put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ + fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd); + /* <regionname> ... */ regionmask = (1 << spin->si_region_count) - 1; } else - { - putc(0, fd); regionmask = 0; - } - /* - * Write the table with character flags and table for case folding. - * <charflagslen> <charflags> <fcharlen> <fchars> + /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> + * + * The table with character flags and the table for case folding. + * This makes sure the same characters are recognized as word characters + * when generating an when using a spell file. * Skip this for ASCII, the table may conflict with the one used for * 'encoding'. * Also skip this for an .add.spl file, the main spell file must contain * the table (avoids that it conflicts). File is shorter too. */ - if (spin->si_ascii || spin->si_add) + if (!spin->si_ascii && !spin->si_add) { - putc(0, fd); - putc(0, fd); - putc(0, fd); - } - else - write_spell_chartab(fd); + char_u folchars[128 * 8]; + int flags; + putc(SN_MIDWORD, fd); /* <sectionID> */ + putc(SNF_REQUIRED, fd); /* <sectionflags> */ - if (spin->si_midword == NULL) - put_bytes(fd, 0L, 2); /* <midwordlen> */ - else + /* Form the <folchars> string first, we need to know its length. */ + l = 0; + for (i = 128; i < 256; ++i) + { +#ifdef FEAT_MBYTE + if (has_mbyte) + l += mb_char2bytes(spelltab.st_fold[i], folchars + l); + else +#endif + folchars[l++] = spelltab.st_fold[i]; + } + put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */ + + fputc(128, fd); /* <charflagslen> */ + for (i = 128; i < 256; ++i) + { + flags = 0; + if (spelltab.st_isw[i]) + flags |= CF_WORD; + if (spelltab.st_isu[i]) + flags |= CF_UPPER; + fputc(flags, fd); /* <charflags> */ + } + + put_bytes(fd, (long_u)l, 2); /* <folcharslen> */ + fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */ + } + + /* SN_MIDWORD: <midword> */ + if (spin->si_midword != NULL) { + putc(SN_MIDWORD, fd); /* <sectionID> */ + putc(SNF_REQUIRED, fd); /* <sectionflags> */ + i = STRLEN(spin->si_midword); - put_bytes(fd, (long_u)i, 2); /* <midwordlen> */ + put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* <midword> */ } - - /* Write the compound info. */ - if (spin->si_compflags == NULL) - put_bytes(fd, 0L, 2); /* <compoundlen> */ - else + /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */ + if (spin->si_prefcond.ga_len > 0) { - l = STRLEN(spin->si_compflags); - put_bytes(fd, (long_u)(l + 2), 2); /* <compoundlen> */ - putc(1, fd); /* <compoundtype> */ - putc(spin->si_compminlen, fd); /* <comp1minlen> */ - fwrite(spin->si_compflags, (size_t)l, (size_t)1, fd); - /* <comp1flags> */ - } + putc(SN_PREFCOND, fd); /* <sectionID> */ + putc(SNF_REQUIRED, fd); /* <sectionflags> */ + l = write_spell_prefcond(NULL, &spin->si_prefcond); + put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ - /* Write the prefix conditions. */ - write_spell_prefcond(fd, &spin->si_prefcond); + write_spell_prefcond(fd, &spin->si_prefcond); + } - /* <SUGGEST> : <repcount> <rep> ... - * <salflags> <salcount> <sal> ... - * <maplen> <mapstr> */ + /* SN_REP: <repcount> <rep> ... + * SN_SAL: <salflags> <salcount> <sal> ... */ /* Sort the REP items. */ qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len, sizeof(fromto_T), rep_compare); - /* round 1: REP items - * round 2: SAL items (unless SOFO is used) */ + /* round 1: SN_REP section + * round 2: SN_SAL section (unless SN_SOFO is used) */ for (round = 1; round <= 2; ++round) { if (round == 1) + { gap = &spin->si_rep; + putc(SN_REP, fd); /* <sectionID> */ + } else { + if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) + /* using SN_SOFO section instead of SN_SAL */ + break; gap = &spin->si_sal; + putc(SN_SAL, fd); /* <sectionID> */ + } + /* This is for making suggestions, section is not required. */ + putc(0, fd); /* <sectionflags> */ + + /* Compute the length of what follows. */ + l = 2; /* count <repcount> or <salcount> */ + for (i = 0; i < gap->ga_len; ++i) + { + ftp = &((fromto_T *)gap->ga_data)[i]; + l += 1 + STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */ + l += 1 + STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */ + } + if (round == 2) + ++l; /* count <salflags> */ + put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ + + if (round == 2) + { i = 0; if (spin->si_followup) i |= SAL_F0LLOWUP; @@ -5653,11 +6281,7 @@ write_vim_spell(spin, fname) i |= SAL_COLLAPSE; if (spin->si_rem_accents) i |= SAL_REM_ACCENTS; - if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) - i |= SAL_SOFO; putc(i, fd); /* <salflags> */ - if (i & SAL_SOFO) - break; } put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */ @@ -5674,26 +6298,73 @@ write_vim_spell(spin, fname) fwrite(p, l, (size_t)1, fd); } } + } - /* SOFOFROM and SOFOTO */ + /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> + * This is for making suggestions, section is not required. */ if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) { - put_bytes(fd, 1L, 2); /* <salcount> */ + putc(SN_SOFO, fd); /* <sectionID> */ + putc(0, fd); /* <sectionflags> */ l = STRLEN(spin->si_sofofr); - put_bytes(fd, (long_u)l, 2); /* <salfromlen> */ - fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <salfrom> */ + put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4); + /* <sectionlen> */ + + put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */ + fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */ l = STRLEN(spin->si_sofoto); - put_bytes(fd, (long_u)l, 2); /* <saltolen> */ - fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <salto> */ + put_bytes(fd, (long_u)l, 2); /* <sofotolen> */ + fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */ + } + + /* SN_MAP: <mapstr> + * This is for making suggestions, section is not required. */ + if (spin->si_map.ga_len > 0) + { + putc(SN_MAP, fd); /* <sectionID> */ + putc(0, fd); /* <sectionflags> */ + l = spin->si_map.ga_len; + put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ + fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd); + /* <mapstr> */ + } + + /* SN_COMPOUND: compound info. + * We don't mark it required, when not supported all compound words will + * be bad words. */ + if (spin->si_compflags != NULL) + { + putc(SN_COMPOUND, fd); /* <sectionID> */ + putc(0, fd); /* <sectionflags> */ + + l = STRLEN(spin->si_compflags); + put_bytes(fd, (long_u)(l + 3), 4); /* <sectionlen> */ + putc(spin->si_compmax, fd); /* <compmax> */ + putc(spin->si_compminlen, fd); /* <compminlen> */ + putc(spin->si_compsylmax, fd); /* <compsylmax> */ + /* <compflags> */ + fwrite(spin->si_compflags, (size_t)l, (size_t)1, fd); + } + + /* SN_SYLLABLE: syllable info. + * We don't mark it required, when not supported syllables will not be + * counted. */ + if (spin->si_syllable != NULL) + { + putc(SN_SYLLABLE, fd); /* <sectionID> */ + putc(0, fd); /* <sectionflags> */ + + l = STRLEN(spin->si_syllable); + put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ + fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); /* <syllable> */ } - put_bytes(fd, (long_u)spin->si_map.ga_len, 2); /* <maplen> */ - if (spin->si_map.ga_len > 0) /* <mapstr> */ - fwrite(spin->si_map.ga_data, (size_t)spin->si_map.ga_len, - (size_t)1, fd); + /* end of <SECTIONS> */ + putc(SN_END, fd); /* <sectionend> */ + /* * <LWORDTREE> <KWORDTREE> <PREFIXTREE> @@ -6540,7 +7211,7 @@ set_spell_chartab(fol, low, upp) /* * Set the spell character tables from strings in the .spl file. */ - static int + static void set_spell_charflags(flags, cnt, fol) char_u *flags; int cnt; /* length of "flags" */ @@ -6576,7 +7247,7 @@ set_spell_charflags(flags, cnt, fol) } } - return set_spell_finish(&new_st); + (void)set_spell_finish(&new_st); } static int @@ -6716,8 +7387,9 @@ spell_iswordp_w(p, buf) /* * Write the table with prefix conditions to the .spl file. + * When "fd" is NULL only count the length of what is written. */ - static void + static int write_spell_prefcond(fd, gap) FILE *fd; garray_T *gap; @@ -6725,58 +7397,32 @@ write_spell_prefcond(fd, gap) int i; char_u *p; int len; + int totlen; - put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */ + if (fd != NULL) + put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */ + + totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */ for (i = 0; i < gap->ga_len; ++i) { /* <prefcond> : <condlen> <condstr> */ p = ((char_u **)gap->ga_data)[i]; - if (p == NULL) - fputc(0, fd); - else + if (p != NULL) { len = STRLEN(p); - fputc(len, fd); - fwrite(p, (size_t)len, (size_t)1, fd); + if (fd != NULL) + { + fputc(len, fd); + fwrite(p, (size_t)len, (size_t)1, fd); + } + totlen += len; } - } -} - -/* - * Write the current tables into the .spl file. - * This makes sure the same characters are recognized as word characters when - * generating an when using a spell file. - */ - static void -write_spell_chartab(fd) - FILE *fd; -{ - char_u charbuf[256 * 4]; - int len = 0; - int flags; - int i; - - fputc(128, fd); /* <charflagslen> */ - for (i = 128; i < 256; ++i) - { - flags = 0; - if (spelltab.st_isw[i]) - flags |= CF_WORD; - if (spelltab.st_isu[i]) - flags |= CF_UPPER; - fputc(flags, fd); /* <charflags> */ - -#ifdef FEAT_MBYTE - if (has_mbyte) - len += mb_char2bytes(spelltab.st_fold[i], charbuf + len); - else -#endif - charbuf[len++] = spelltab.st_fold[i]; + else if (fd != NULL) + fputc(0, fd); } - put_bytes(fd, (long_u)len, 2); /* <fcharlen> */ - fwrite(charbuf, (size_t)len, (size_t)1, fd); /* <fchars> */ + return totlen; } /* @@ -6837,26 +7483,36 @@ spell_casefold(str, len, buf, buflen) #define SPS_DOUBLE 4 static int sps_flags = SPS_BEST; +static int sps_limit = 9999; /* * Check the 'spellsuggest' option. Return FAIL if it's wrong. - * Sets "sps_flags". + * Sets "sps_flags" and "sps_limit". */ int spell_check_sps() { char_u *p; + char_u *s; char_u buf[MAXPATHL]; int f; sps_flags = 0; + sps_limit = 9999; for (p = p_sps; *p != NUL; ) { copy_option_part(&p, buf, MAXPATHL, ","); f = 0; - if (STRCMP(buf, "best") == 0) + if (VIM_ISDIGIT(*buf)) + { + s = buf; + sps_limit = getdigits(&s); + if (*s != NUL && !VIM_ISDIGIT(*s)) + f = -1; + } + else if (STRCMP(buf, "best") == 0) f = SPS_BEST; else if (STRCMP(buf, "fast") == 0) f = SPS_FAST; @@ -6869,6 +7525,7 @@ spell_check_sps() if (f == -1 || (sps_flags != 0 && f != 0)) { sps_flags = SPS_BEST; + sps_limit = 9999; return FAIL; } if (f != 0) @@ -6902,6 +7559,7 @@ spell_suggest() suggest_T *stp; int mouse_used; int need_cap; + int limit; /* Find the start of the badly spelled word. */ if (spell_move_to(FORWARD, TRUE, TRUE) == FAIL @@ -6937,8 +7595,13 @@ spell_suggest() line = ml_get_curline(); - /* Get the list of suggestions */ - spell_find_suggest(line + curwin->w_cursor.col, &sug, (int)Rows - 2, + /* Get the list of suggestions. Limit to 'lines' - 2 or the number in + * 'spellsuggest', whatever is smaller. */ + if (sps_limit > (int)Rows - 2) + limit = (int)Rows - 2; + else + limit = sps_limit; + spell_find_suggest(line + curwin->w_cursor.col, &sug, limit, TRUE, need_cap); if (sug.su_ga.ga_len == 0) @@ -7151,8 +7814,8 @@ ex_spellrepall(eap) int addlen; char_u *line; char_u *p; - int didone = FALSE; int save_ws = p_ws; + linenr_T prev_lnum = 0; if (repl_from == NULL || repl_to == NULL) { @@ -7167,6 +7830,8 @@ ex_spellrepall(eap) sprintf((char *)frompat, "\\V\\<%s\\>", repl_from); p_ws = FALSE; + sub_nsubs = 0; + sub_nlines = 0; curwin->w_cursor.lnum = 0; while (!got_int) { @@ -7188,7 +7853,13 @@ ex_spellrepall(eap) STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from)); ml_replace(curwin->w_cursor.lnum, p, FALSE); changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col); - didone = TRUE; + + if (curwin->w_cursor.lnum != prev_lnum) + { + ++sub_nlines; + prev_lnum = curwin->w_cursor.lnum; + } + ++sub_nsubs; } curwin->w_cursor.col += STRLEN(repl_to); } @@ -7197,8 +7868,10 @@ ex_spellrepall(eap) curwin->w_cursor = pos; vim_free(frompat); - if (!didone) + if (sub_nsubs == 0) EMSG2(_("E753: Not found: %s"), repl_from); + else + do_sub_msg(FALSE); } /* @@ -7652,10 +8325,12 @@ suggest_try_change(su) char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ char_u tword[MAXWLEN]; /* good word collected so far */ trystate_T stack[MAXWLEN]; - char_u preword[MAXWLEN * 3]; /* word found with proper case (appended - * to for word split) */ - char_u prewordlen = 0; /* length of word in "preword" */ - int splitoff = 0; /* index in tword after last split */ + char_u preword[MAXWLEN * 3]; /* word found with proper case; + * concatanation of prefix compound + * words and split word. NUL terminated + * when going deeper but not when coming + * back. */ + char_u compflags[MAXWLEN]; /* compound flags, one for each word */ trystate_T *sp; int newscore; langp_T *lp; @@ -7696,16 +8371,9 @@ suggest_try_change(su) */ depth = 0; sp = &stack[0]; + vim_memset(sp, 0, sizeof(trystate_T)); sp->ts_state = STATE_START; - sp->ts_score = 0; sp->ts_curi = 1; - sp->ts_fidx = 0; - sp->ts_fidxtry = 0; - sp->ts_twordlen = 0; - sp->ts_arridx = 0; -#ifdef FEAT_MBYTE - sp->ts_tcharlen = 0; -#endif /* * When there are postponed prefixes we need to use these first. At @@ -7792,10 +8460,10 @@ suggest_try_change(su) /* Move the prefix to preword[] with the right case * and make find_keepcap_word() works. */ - splitoff = sp->ts_twordlen; - tword[splitoff] = NUL; + sp->ts_splitoff = sp->ts_twordlen; + tword[sp->ts_splitoff] = NUL; make_case_word(tword, preword, flags); - prewordlen = STRLEN(preword); + sp->ts_prewordlen = STRLEN(preword); } break; } @@ -7814,6 +8482,9 @@ suggest_try_change(su) ++sp->ts_curi; /* eat one NUL byte */ flags = (int)idxs[arridx]; + fword_ends = (fword[sp->ts_fidx] == NUL + || !spell_iswordp(fword + sp->ts_fidx, curbuf)); + tword[sp->ts_twordlen] = NUL; if (sp->ts_prefixdepth == PFD_COMPOUND) { @@ -7821,8 +8492,13 @@ suggest_try_change(su) * word does not support compounding then give up * (splitting is tried for the word without compound * flag). */ - if (sp->ts_twordlen - splitoff < slang->sl_compminlen - || !can_compound(slang, flags)) + if (((unsigned)flags >> 24) == 0 + || sp->ts_twordlen - sp->ts_splitoff + < slang->sl_compminlen) + break; + compflags[sp->ts_complen] = ((unsigned)flags >> 24); + compflags[sp->ts_complen + 1] = NUL; + if (fword_ends && !can_compound(slang, tword, compflags)) break; } else if (sp->ts_prefixdepth < MAXWLEN) @@ -7839,7 +8515,7 @@ suggest_try_change(su) if (c > 0) { c = valid_word_prefix(c, n, flags, - tword + splitoff, slang, FALSE); + tword + sp->ts_splitoff, slang, FALSE); if (c == 0) break; @@ -7853,11 +8529,10 @@ suggest_try_change(su) * Form the word with proper case in preword. * If there is a word from a previous split, append. */ - tword[sp->ts_twordlen] = NUL; if (flags & WF_KEEPCAP) /* Must find the word in the keep-case tree. */ - find_keepcap_word(slang, tword + splitoff, - preword + prewordlen); + find_keepcap_word(slang, tword + sp->ts_splitoff, + preword + sp->ts_prewordlen); else { /* Include badflags: if the badword is onecap or allcap @@ -7872,18 +8547,18 @@ suggest_try_change(su) #endif ) c = WF_ONECAP; - make_case_word(tword + splitoff, - preword + prewordlen, flags | c); + make_case_word(tword + sp->ts_splitoff, + preword + sp->ts_prewordlen, flags | c); } /* Don't use a banned word. It may appear again as a good * word, thus remember it. */ if (flags & WF_BANNED) { - add_banned(su, preword + prewordlen); + add_banned(su, preword + sp->ts_prewordlen); break; } - if (was_banned(su, preword + prewordlen) + if (was_banned(su, preword + sp->ts_prewordlen) || was_banned(su, preword)) break; @@ -7895,11 +8570,9 @@ suggest_try_change(su) newscore += SCORE_RARE; if (!spell_valid_case(su->su_badflags, - captype(preword + prewordlen, NULL))) + captype(preword + sp->ts_prewordlen, NULL))) newscore += SCORE_ICASE; - fword_ends = (fword[sp->ts_fidx] == NUL - || !spell_iswordp(fword + sp->ts_fidx, curbuf)); if (fword_ends && sp->ts_fidx >= sp->ts_fidxtry) { /* The badword also ends: add suggestions. Give a penalty @@ -7948,11 +8621,18 @@ suggest_try_change(su) * the following word is valid. */ if (!fword_ends - && spell_iswordp(fword + sp->ts_fidx, curbuf) - && sp->ts_twordlen - splitoff + && ((unsigned)flags >> 24) != 0 + && sp->ts_twordlen - sp->ts_splitoff >= slang->sl_compminlen - && can_compound(slang, flags)) + && sp->ts_complen + 1 <= slang->sl_compmax + && (sp->ts_complen > 0 + || vim_strchr(slang->sl_compstartflags, + ((unsigned)flags >> 24)) != NULL)) + { try_compound = TRUE; + compflags[sp->ts_complen] = ((unsigned)flags >> 24); + compflags[sp->ts_complen + 1] = NUL; + } else { try_compound = FALSE; @@ -7963,9 +8643,7 @@ suggest_try_change(su) if (try_deeper(su, stack, depth, newscore)) { /* Save things to be restored at STATE_SPLITUNDO. */ - sp->ts_save_prewordlen = prewordlen; sp->ts_save_badflags = su->su_badflags; - sp->ts_save_splitoff = splitoff; sp->ts_state = STATE_SPLITUNDO; ++depth; @@ -7974,8 +8652,8 @@ suggest_try_change(su) /* Append a space to preword when splitting. */ if (!try_compound && !fword_ends) STRCAT(preword, " "); - prewordlen = STRLEN(preword); - splitoff = sp->ts_twordlen; + sp->ts_prewordlen = STRLEN(preword); + sp->ts_splitoff = sp->ts_twordlen; /* If the badword has a non-word character at this * position skip it. That means replacing the @@ -7996,10 +8674,10 @@ suggest_try_change(su) if (fword_ends) { /* Copy the skipped character to preword. */ - mch_memmove(preword + prewordlen, + mch_memmove(preword + sp->ts_prewordlen, fword + sp->ts_fidx, l); - prewordlen += l; - preword[prewordlen] = NUL; + sp->ts_prewordlen += l; + preword[sp->ts_prewordlen] = NUL; } else sp->ts_score -= SCORE_SPLIT - SCORE_SUBST; @@ -8008,7 +8686,10 @@ suggest_try_change(su) /* set flag to check compound flag on following word */ if (try_compound) + { sp->ts_prefixdepth = PFD_COMPOUND; + ++sp->ts_complen; + } else sp->ts_prefixdepth = PFD_NOPREFIX; @@ -8032,8 +8713,6 @@ suggest_try_change(su) case STATE_SPLITUNDO: /* Undo the changes done for word split or compound word. */ su->su_badflags = sp->ts_save_badflags; - splitoff = sp->ts_save_splitoff; - prewordlen = sp->ts_save_prewordlen; /* Continue looking for NUL bytes. */ sp->ts_state = STATE_START; @@ -8644,7 +9323,6 @@ suggest_try_change(su) /* Continue in or go back to the prefix tree. */ byts = pbyts; idxs = pidxs; - splitoff = 0; } /* Don't check for CTRL-C too often, it takes time. */ @@ -9075,6 +9753,7 @@ suggest_try_soundalike(su) int round; int flags; int sound_score; + int local_score; /* Do this for all languages that support sound folding. */ for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0); @@ -9138,6 +9817,15 @@ suggest_try_soundalike(su) * sound-a-like words. */ sound_score = soundalike_score(salword, tsalword); + + /* Add a penalty for words in another region. */ + if ((flags & WF_REGION) && (((unsigned)flags + >> 16) & lp->lp_region) == 0) + local_score = SCORE_REGION; + else + local_score = 0; + sound_score += local_score; + if (sound_score < SCORE_MAXMAX) { char_u cword[MAXWLEN]; @@ -9163,7 +9851,8 @@ suggest_try_soundalike(su) { /* Compute the score. */ score = spell_edit_score( - su->su_badword, p); + su->su_badword, p) + + local_score; if (sps_flags & SPS_BEST) /* give a bonus for the good word * sounding the same as the bad |