diff options
author | Bram Moolenaar <Bram@vim.org> | 2006-01-12 23:22:24 +0000 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2006-01-12 23:22:24 +0000 |
commit | 4770d09abd866bb53d95895dc6a5c5fe7cccb619 (patch) | |
tree | b9ca6f4a66c7591a84cfe88fb21edb31db906a4e | |
parent | 1cbe5f739d4e75b5e16b85ae79ff0434a641b03d (diff) | |
download | vim-git-4770d09abd866bb53d95895dc6a5c5fe7cccb619.tar.gz |
updated for version 7.0179v7.0179
68 files changed, 5372 insertions, 1820 deletions
@@ -77,6 +77,7 @@ SRC_ALL = \ src/testdir/*.in \ src/testdir/test[0-9]*.ok \ src/testdir/test49.vim \ + src/testdir/test60.vim \ src/proto.h \ src/proto/buffer.pro \ src/proto/charset.pro \ @@ -683,6 +684,9 @@ LANG_GEN_BIN = \ runtime/spell/en.ascii.spl \ runtime/spell/en.latin1.spl \ runtime/spell/en.utf-8.spl \ + runtime/spell/en.ascii.sug \ + runtime/spell/en.latin1.sug \ + runtime/spell/en.utf-8.sug \ # all files for lang archive LANG_SRC = \ diff --git a/runtime/autoload/syntaxcomplete.vim b/runtime/autoload/syntaxcomplete.vim new file mode 100644 index 000000000..367847c9b --- /dev/null +++ b/runtime/autoload/syntaxcomplete.vim @@ -0,0 +1,179 @@ +" Vim completion script +" Language: All languages, uses existing syntax highlighting rules +" Maintainer: David Fishburn <fishburn@ianywhere.com> +" Version: 1.0 +" Last Change: Sun Jan 08 2006 10:17:51 PM + +" Set completion with CTRL-X CTRL-O to autoloaded function. +if exists('&ofu') + setlocal ofu=syntaxcomplete#Complete +endif + +if exists('g:loaded_syntax_completion') + finish +endif +let g:loaded_syntax_completion = 1 + +" This script will build a completion list based on the syntax +" elements defined by the files in $VIMRUNTIME/syntax. + +let s:syn_remove_words = 'match,matchgroup=,contains,'. + \ 'links to,start=,end=,nextgroup=' + +let s:cache_name = [] +let s:cache_list = [] + +" This function is used for the 'omnifunc' option. +function! syntaxcomplete#Complete(findstart, base) + + if a:findstart + " Locate the start of the item, including "." + let line = getline('.') + let start = col('.') - 1 + let lastword = -1 + while start > 0 + if line[start - 1] =~ '\w' + let start -= 1 + elseif line[start - 1] =~ '\.' + " The user must be specifying a column name + if lastword == -1 + let lastword = start + endif + let start -= 1 + let b:sql_compl_type = 'column' + else + break + endif + endwhile + + " Return the column of the last word, which is going to be changed. + " Remember the text that comes before it in s:prepended. + if lastword == -1 + let s:prepended = '' + return start + endif + let s:prepended = strpart(line, start, lastword - start) + return lastword + endif + + let base = s:prepended . a:base + + let list_idx = index(s:cache_name, &filetype, 0, &ignorecase) + if list_idx > -1 + let compl_list = s:cache_list[list_idx] + else + let compl_list = s:SyntaxList() + let s:cache_name = add( s:cache_name, &filetype ) + let s:cache_list = add( s:cache_list, compl_list ) + endif + + " Return list of matches. + + if base =~ '\w' + let compstr = join(compl_list, ' ') + let compstr = substitute(compstr, '\<\%('.base.'\)\@!\w\+\s*', '', 'g') + let compl_list = split(compstr, '\s\+') + endif + + return compl_list +endfunc + +function! s:SyntaxList() + let saveL = @l + + " Loop through all the syntax groupnames, and build a + " syntax file which contains these names. This can + " work generically for any filetype that does not already + " have a plugin defined. + " This ASSUMES the syntax groupname BEGINS with the name + " of the filetype. From my casual viewing of the vim7\sytax + " directory. + redir @l + silent! exec 'syntax list ' + redir END + + let syntax_groups = @l + let @l = saveL + + if syntax_groups =~ 'E28' + \ || syntax_groups =~ 'E411' + \ || syntax_groups =~ 'E415' + \ || syntax_groups =~ 'No sytax items' + return -1 + endif + + " Abort names - match, links to, matchgroup=, start=, contains=, contained, + " cluster=, nextgroup=, end= + let next_group_regex = '\n' . + \ '\zs'.&filetype.'\w\+\ze'. + \ '\s\+xxx\s\+'. + \ '\<\('. + \ substitute(s:syn_remove_words, ',', '\\|', 'g'). + \ '\)\@!' + let syn_list = '' + let index = 0 + let index = match(syntax_groups, next_group_regex, index) + + + while index > 0 + let group_name = matchstr( syntax_groups, '\w\+', index ) + + let extra_syn_list = s:SyntaxGroupItems(group_name) + + let syn_list = syn_list . extra_syn_list . "\n" + + let index = index + strlen(group_name) + let index = match(syntax_groups, next_group_regex, index) + endwhile + + return sort(split(syn_list)) +endfunction + +function! s:SyntaxGroupItems( group_name ) + let saveL = @l + + " Generate (based on the syntax highlight rules) a list of + " the Statements, functions, keywords and so on available + " If this needs updating, the syntax\sql.vim file should be + " updated + redir @l + silent! exec 'syntax list ' . a:group_name + redir END + + if @l !~ 'E28' + " let syn_list = substitute( @l, '^.*xxx\s*\%(contained\s*\)\?', "", '' ) + let syn_list = substitute( @l, '^.*xxx\s*', "", '' ) + + " We only want the words for the lines begining with + " containedin, but there could be other items. + + " Tried to remove all lines that do not begin with contained + " but this does not work in all cases since you can have + " contained nextgroup=... + " So this will strip off the ending of lines with known + " keywords. + let syn_list = substitute( syn_list, '\<\('. + \ substitute( + \ escape( s:syn_remove_words, '\\/.*$^~[]') + \ , ',', '\\|', 'g'). + \ '\).\{-}\%($\|'."\n".'\)' + \ , "\n", 'g' ) + + " Now strip off the newline + blank space + contained + let syn_list = substitute( syn_list, '\%(^\|\n\)\@<=\s*\<\('. + \ 'contained\)' + \ , "", 'g' ) + + " There are a number of items which have non-word characters in + " them, *'T_F1'*. vim.vim is one such file. + " This will replace non-word characters with spaces. + let syn_list = substitute( syn_list, '[^0-9A-Za-z_ ]', ' ', 'g' ) + else + let syn_list = '' + endif + + let @l = saveL + + return syn_list +endfunction + diff --git a/runtime/doc/autocmd.txt b/runtime/doc/autocmd.txt index 28edb4d4b..e83309a8d 100644 --- a/runtime/doc/autocmd.txt +++ b/runtime/doc/autocmd.txt @@ -1,4 +1,4 @@ -*autocmd.txt* For Vim version 7.0aa. Last change: 2005 Dec 18 +*autocmd.txt* For Vim version 7.0aa. Last change: 2006 Jan 08 VIM REFERENCE MANUAL by Bram Moolenaar @@ -330,7 +330,7 @@ BufEnter After entering a buffer. Useful for setting *BufFilePost* BufFilePost After changing the name of the current buffer with the ":file" or ":saveas" command. - *BufReadCmd* + *BufFilePre* BufFilePre Before changing the name of the current buffer with the ":file" or ":saveas" command. *BufHidden* @@ -368,10 +368,10 @@ BufRead or BufReadPost When starting to edit a new buffer, after This does NOT work for ":r file". Not used when the file doesn't exist. Also used after successfully recovering a file. - *BufReadPre* *E200* *E201* + *BufReadCmd* BufReadCmd Before starting to edit a new buffer. Should read the file into the buffer. |Cmd-event| - *BufFilePre* + *BufReadPre* *E200* *E201* BufReadPre When starting to edit a new buffer, before reading the file into the buffer. Not used if the file doesn't exist. diff --git a/runtime/doc/cmdline.txt b/runtime/doc/cmdline.txt index 72b3c295f..d12c1ea77 100644 --- a/runtime/doc/cmdline.txt +++ b/runtime/doc/cmdline.txt @@ -1,4 +1,4 @@ -*cmdline.txt* For Vim version 7.0aa. Last change: 2005 Dec 27 +*cmdline.txt* For Vim version 7.0aa. Last change: 2005 Dec 30 VIM REFERENCE MANUAL by Bram Moolenaar @@ -198,6 +198,8 @@ CTRL-\ e {expr} *c_CTRL-\_e* The cursor position is unchanged, except when the cursor was at the end of the line, then it stays at the end. |setcmdpos()| can be used to set the cursor position. + The |sandbox| is used for evaluating the expression to avoid + nasty side effects. Example: > :cmap <F7> <C-\>eAppendSome()<CR> :func AppendSome() diff --git a/runtime/doc/develop.txt b/runtime/doc/develop.txt index 498833c5a..4d12d166c 100644 --- a/runtime/doc/develop.txt +++ b/runtime/doc/develop.txt @@ -1,4 +1,4 @@ -*develop.txt* For Vim version 7.0aa. Last change: 2005 Sep 01 +*develop.txt* For Vim version 7.0aa. Last change: 2006 Jan 12 VIM REFERENCE MANUAL by Bram Moolenaar @@ -382,8 +382,8 @@ checking engine in Vim, for various reasons: them separately from Vim. That's mostly not impossible, but a drawback. - Performance: A few tests showed that it's possible to check spelling on the fly (while redrawing), just like syntax highlighting. But the mechanisms - used by other code are much slower. Myspell uses a simplistic hashtable, - for example. + used by other code are much slower. Myspell uses a hashtable, for example. + The affix compression that most spell checkers use makes it slower too. - For using an external program like aspell a communication mechanism would have to be setup. That's complicated to do in a portable way (Unix-only would be relatively simple, but that's not good enough). And performance @@ -399,14 +399,88 @@ checking engine in Vim, for various reasons: another program or library would be acceptable. But the word lists probably differ, the suggestions may be wrong words. + +Spelling suggestions *develop-spell-suggestions* + +For making suggestions there are two basic mechanisms: +1. Try changing the bad word a little bit and check for a match with a good + word. Or go through the list of good words, change them a little bit and + check for a match with the bad word. The changes are deleting a character, + inserting a character, swapping two characters, etc. +2. Perform soundfolding on both the bad word and the good words and then find + matches, possibly with a few changes like with the first mechanism. + +The first is good for finding typing mistakes. After experimenting with +hashtables and looking at solutions from other spell checkers the conclusion +was that a trie (a kind of tree structure) is ideal for this. Both for +reducing memory use and being able to try sensible changes. For example, when +inserting a character only characters that lead to good words need to be +tried. Other mechanisms (with hashtables) need to try all possible letters at +every position in the word. Also, a hashtable has the requirement that word +boundaries are identified separately, while a trie does not require this. +That makes the mechanism a lot simpler. + +Soundfolding is useful when someone knows how the words sounds but doesn't +know how it is spelled. For example, the word "dictionary" might be written +as "daktonerie". The number of changes that the first method would need to +try is very big, it's hard to find the good word that way. After soundfolding +the words become "tktnr" and "tkxnry", these differ by only two letters. + +To find words by their soundfolded equivalent (soundalike word) we need a list +of all soundfolded words. A few experiments have been done to find out what +the best method is. Alternatives: +1. Do the sound folding on the fly when looking for suggestions. This means + walking through the trie of good words, soundfolding each word and + checking how different it is from the bad word. This is very efficient for + memory use, but takes a long time. On a fast PC it takes a couple of + seconds for English, which can be acceptable for interactive use. But for + some languages it takes more than ten seconds (e.g., German, Catalan), + which is unacceptable slow. For batch processing (automatic corrections) + it's to slow for all languages. +2. Use a trie for the soundfolded words, so that searching can be done just + like how it works without soundfolding. This requires remembering a list + of good words for each soundfolded word. This makes finding matches very + fast but requires quite a lot of memory, in the order of 1 to 10 Mbyte. + For some languages more than the original word list. +3. Like the second alternative, but reduce the amount of memory by using affix + compression and store only the soundfolded basic word. This is what Aspell + does. Disadvantage is that affixes need to be stripped from the bad word + before soundfolding it, which means that mistakes at the start and/or end + of the word will cause the mechanism to fail. Also, this becomes slow when + the bad word is quite different from the good word. + +The choice made is to use the second mechanism and use a separate file. This +way a user with sufficient memory can get very good suggestions while a user +who is short of memory or just wants the spell checking and no suggestions +doesn't use so much memory. + + +Word frequency + +For sorting suggestions it helps to know which words are common. In theory we +could store a word frequency with the word in the dictionary. However, this +requires storing a count per word. That degrades word tree compression a lot. +And maintaining the word frequency for all languages will be a heavy task. +Also, it would be nice to prefer words that are already in the text. This way +the words that appear in the specific text are preferred for suggestions. + +What has been implemented is to count words that have been seen during +displaying. A hashtable is used to quickly find the word count. The count is +initialized from words listed in COMMON items in the affix file, so that it +also works when starting a new file. + +This isn't ideal, because the longer Vim is running the higher the counts +become. But in practice it is a noticable improvement over not using the word +count. + ============================================================================== 4. Assumptions *design-assumptions* Size of variables: char 8 bit signed char_u 8 bit unsigned -int 16, 32 or 64 bit signed -unsigned 16, 32 or 64 bit unsigned +int 32 or 64 bit signed (16 might be possible with limited features) +unsigned 32 or 64 bit unsigned (16 as with ints) long 32 or 64 bit signed, can hold a pointer Note that some compilers cannot handle long lines or strings. The C89 diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt index 7d7058201..4c527e1a1 100644 --- a/runtime/doc/eval.txt +++ b/runtime/doc/eval.txt @@ -1,4 +1,4 @@ -*eval.txt* For Vim version 7.0aa. Last change: 2005 Dec 27 +*eval.txt* For Vim version 7.0aa. Last change: 2006 Jan 09 VIM REFERENCE MANUAL by Bram Moolenaar @@ -1419,7 +1419,7 @@ v:swapchoice |SwapExists| autocommands can set this to the selected choice no SwapExists autocommand. The default is empty. *v:swapcommand* *swapcommand-variable* -v:swapcommand Normal mode ommand to be executed after a file has been +v:swapcommand Normal mode command to be executed after a file has been opened. Can be used for a |SwapExists| autocommand to have another Vim open the file and jump to the right place. For example, when jumping to a tag the value is ":tag tagname\r". @@ -4381,6 +4381,10 @@ system({expr} [, {input}]) *system()* *E677* The resulting error code can be found in |v:shell_error|. This function will fail in |restricted-mode|. + + Note that any wrong value in the options mentioned above may + make the function fail. It has also been reported to fail + when using a security agent application. Unlike ":!cmd" there is no automatic check for changed files. Use |:checktime| to force a check. @@ -6814,6 +6818,7 @@ These items are not allowed in the sandbox: - executing a shell command - reading or writing a file - jumping to another buffer or editing a file + - executing Python, Perl, etc. commands This is not guaranteed 100% secure, but it should block most attacks. *:san* *:sandbox* diff --git a/runtime/doc/if_mzsch.txt b/runtime/doc/if_mzsch.txt index 0580891e2..d0fd793a2 100644 --- a/runtime/doc/if_mzsch.txt +++ b/runtime/doc/if_mzsch.txt @@ -1,4 +1,4 @@ -*if_mzsch.txt* For Vim version 7.0aa. Last change: 2005 May 08 +*if_mzsch.txt* For Vim version 7.0aa. Last change: 2006 Jan 05 VIM REFERENCE MANUAL by Sergey Khorev @@ -10,6 +10,7 @@ The MzScheme Interface to Vim *mzscheme* *MzScheme* 2. Examples |mzscheme-examples| 3. Threads |mzscheme-threads| 4. The Vim access procedures |mzscheme-vim| +5. Dynamic loading |mzscheme-dynamic| {Vi does not have any of these commands} @@ -243,5 +244,23 @@ Windows *mzscheme-window* a pair (linenr . column). (set-cursor (line . col) [window]) Set cursor position. +============================================================================== +5. Dynamic loading *mzscheme-dynamic* + +On MS-Windows the MzScheme libraries can be loaded dynamically. The |:version| +output then includes |+mzscheme/dyn|. + +This means that Vim will search for the MzScheme DLL files only when needed. +When you don't use the MzScheme interface you don't need them, thus you can +use Vim without these DLL files. + +To use the MzScheme interface the MzScheme DLLs must be in your search path. +In a console window type "path" to see what directories are used. + +The names of the DLLs must match the MzScheme version Vim was compiled with. +For MzScheme version 209 they will be "libmzsch209_000.dll" and +"libmzgc209_000.dll". To know for sure edit "gvim.exe" and search for +"libmzsch\d\d\d_\d\d\d\.dll\c". + ====================================================================== vim:tw=78:ts=8:sts=4:ft=help:norl: diff --git a/runtime/doc/index.txt b/runtime/doc/index.txt index 6c5037c88..96a6c612d 100644 --- a/runtime/doc/index.txt +++ b/runtime/doc/index.txt @@ -1,4 +1,4 @@ -*index.txt* For Vim version 7.0aa. Last change: 2005 Dec 23 +*index.txt* For Vim version 7.0aa. Last change: 2006 Jan 11 VIM REFERENCE MANUAL by Bram Moolenaar @@ -1069,7 +1069,8 @@ The commands are sorted on the non-optional part of their name. |:cNfile| :cNf[ile] go to last error in previous file |:cabbrev| :ca[bbrev] like ":abbreviate" but for Command-line mode |:cabclear| :cabc[lear] clear all abbreviations for Command-line mode -|:caddfile| :cad[dfile] add error message to current quickfix list +|:caddexpr| :cad[dexpr] add errors from expr +|:caddfile| :caddf[ile] add error message to current quickfix list |:call| :cal[l] call a function |:catch| :cat[ch] part of a :try command |:cbuffer| :cb[uffer] parse error messages and jump to first error diff --git a/runtime/doc/insert.txt b/runtime/doc/insert.txt index f7db100cb..0a28038dc 100644 --- a/runtime/doc/insert.txt +++ b/runtime/doc/insert.txt @@ -1,4 +1,4 @@ -*insert.txt* For Vim version 7.0aa. Last change: 2005 Dec 28 +*insert.txt* For Vim version 7.0aa. Last change: 2006 Jan 08 VIM REFERENCE MANUAL by Bram Moolenaar @@ -354,7 +354,7 @@ CTRL-G CTRL-J cursor one line down, insert start column *i_CTRL-G_CTRL-J* <MouseUp> scroll three lines up *i_<MouseUp>* <S-MouseUp> scroll a full page up *i_<S-MouseUp>* CTRL-O execute one command, return to Insert mode *i_CTRL-O* -CTRL-\ CTRL-O like CTRL-O but don't move the cursor *i_CTRL-\_CTRL-O* +CTRL-\ CTRL-O like CTRL-O but don't move the cursor *i_CTRL-\_CTRL-O* CTRL-L when 'insertmode' is set: go to Normal mode *i_CTRL-L* CTRL-G u break undo sequence, start new change *i_CTRL-G_u* ----------------------------------------------------------------------- @@ -963,8 +963,8 @@ The menu is used when: While the menu is displayed these keys have a special meaning: <CR> and <Enter>: Accept the currently selected match -<Up>: Select the previous match, as if CTRL-P was used -<Down>: Select the next match, as if CTRL-N was used +<Up>: Select the previous match, as if CTRL-P was used +<Down>: Select the next match, as if CTRL-N was used <PageUp>: Select a match several entries back <PageDown>: Select a match several entries further @@ -1010,14 +1010,14 @@ When the same structure name appears in multiple places all possible members are included. -CSS *ft-css-omni* +CSS *ft-css-omni* Complete properties and their appropriate values according to CSS 2.1 specification. -(X)HTML *ft-html-omni* - *ft-xhtml-omni* +(X)HTML *ft-html-omni* + *ft-xhtml-omni* CTRL-X CTRL-O provides completion of various elements of (X)HTML files. It is designed to support writing of XHTML 1.0 Strict files but will @@ -1040,7 +1040,26 @@ Note: When used first time completion menu will be shown with little delay - this is time needed for loading of data file. -XML *ft-xml-omni* +SYNTAX *ft-syntax-omni* + +This uses the current syntax highlighting for completion. It can be used for +any filetype and provides a minimal language-sensitive completion. + +To enable code completion do: > + source $VIMRUNTIME/autoload/syntaxcomplete.vim + +You can automate this by placing this in your vimrc (after any ":filetype" +command): > + autocmd Filetype * + \ if exists('&ofu') && &ofu == "" | + \ source $VIMRUNTIME/autoload/syntaxcomplete.vim | + \ endif + +The above will set completion to this script only if a proper one does not +already exist for that filetype. + + +XML *ft-xml-omni* Vim 7 provides mechanism to context aware completion of XML files. It depends on special |xml-omni-datafile| and two commands: |:XMLns| and |:XMLent|. @@ -1056,7 +1075,7 @@ Features are: with "<!ENTITY" declarations - when used after "</" CTRL-X CTRL-O will close the last opened tag -Format of XML data file *xml-omni-datafile* +Format of XML data file *xml-omni-datafile* Vim distribution provides two data files as examples (xhtml10s.vim, xsl.vim) @@ -1105,7 +1124,7 @@ xsl.vim for example. Commands -:XMLns {name} [{namespace}] *:XMLns* +:XMLns {name} [{namespace}] *:XMLns* Vim has to know which data file should be used and with which namespace. For loading of data file and connecting data with prope namespace use |:XMLns| @@ -1118,24 +1137,24 @@ to use XML completion in .xsl files: > :XMLns xsl xsl -:XMLent {name} *:XMLent* +:XMLent {name} *:XMLent* By default entities will be completed from data file of default namespace. XMLent command should be used in case when there is no default namespace: > - :XMLent xhtml10s + :XMLent xhtml10s Usage While used in situation (after declarations from previous part, | is cursor position): > - <| + <| Will complete to appropriate XHTML tag, and in this situation: > - <xsl:| + <xsl:| Will complete to appropriate XSL tag. @@ -1143,7 +1162,7 @@ File xmlcomplete.vim provides through |autoload| mechanism GetLastOpenTag function which can be used in XML files to get name of last open tag with (b:unaryTagsStack has to be defined): > - :echo xmlcomplete#GetLastOpenTag("b:unaryTagsStack") + :echo xmlcomplete#GetLastOpenTag("b:unaryTagsStack") diff --git a/runtime/doc/map.txt b/runtime/doc/map.txt index 90ebe06eb..85155b439 100644 --- a/runtime/doc/map.txt +++ b/runtime/doc/map.txt @@ -1,4 +1,4 @@ -*map.txt* For Vim version 7.0aa. Last change: 2005 Dec 17 +*map.txt* For Vim version 7.0aa. Last change: 2006 Jan 09 VIM REFERENCE MANUAL by Bram Moolenaar @@ -946,11 +946,10 @@ local function or uses a local mapping. Otherwise, using "<SID>" outside of a script context is an error. If you need to get the script number to use in a complicated script, you can -use this trick: > - :map <SID>xx <SID>xx - :let s:sid = maparg("<SID>xx") - :unmap <SID>xx -And remove the trailing "xx". +use this function: > + function s:SID() + return matchstr(expand('<sfile>'), '<SNR>\zs\d\+\ze_SID$') + endfun The "<SNR>" will be shown when listing functions and mappings. This is useful to find out what they are defined to. diff --git a/runtime/doc/message.txt b/runtime/doc/message.txt index c64b4540b..bb94867b3 100644 --- a/runtime/doc/message.txt +++ b/runtime/doc/message.txt @@ -1,4 +1,4 @@ -*message.txt* For Vim version 7.0aa. Last change: 2005 Oct 10 +*message.txt* For Vim version 7.0aa. Last change: 2006 Jan 08 VIM REFERENCE MANUAL by Bram Moolenaar @@ -19,7 +19,8 @@ The ":messages" command can be used to view previously given messages. This is especially useful when messages have been overwritten or truncated. This depends on the 'shortmess' option. -The number of remembered messages is fixed at 20. +The number of remembered messages is fixed at 20 for the tiny version and 100 +for other versions. *g<* The "g<" command can be used to see the last page of previous command output. diff --git a/runtime/doc/motion.txt b/runtime/doc/motion.txt index dd95b6cc6..5570d7303 100644 --- a/runtime/doc/motion.txt +++ b/runtime/doc/motion.txt @@ -1,4 +1,4 @@ -*motion.txt* For Vim version 7.0aa. Last change: 2005 Dec 12 +*motion.txt* For Vim version 7.0aa. Last change: 2006 Jan 02 VIM REFERENCE MANUAL by Bram Moolenaar @@ -386,10 +386,11 @@ These commands move over words or WORDS. *word* A word consists of a sequence of letters, digits and underscores, or a sequence of other non-blank characters, separated with white space (spaces, -tabs, <EOL>). This can be changed with the 'iskeyword' option. +tabs, <EOL>). This can be changed with the 'iskeyword' option. An empty line +is also considered to be a word. *WORD* A WORD consists of a sequence of non-blank characters, separated with white -space. An empty line is also considered to be a word and a WORD. +space. An empty line is also considered to be a WORD. A sequence of folded lines is counted for one word of a single character. "w" and "W", "e" and "E" move to the start/end of the first word or WORD after diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt index e121a77c6..c6535366d 100644 --- a/runtime/doc/options.txt +++ b/runtime/doc/options.txt @@ -1,4 +1,4 @@ -*options.txt* For Vim version 7.0aa. Last change: 2005 Dec 29 +*options.txt* For Vim version 7.0aa. Last change: 2006 Jan 04 VIM REFERENCE MANUAL by Bram Moolenaar @@ -4597,12 +4597,12 @@ A jump table for the options with a short description can be found at |Q_op|. This defines what bases Vim will consider for numbers when using the CTRL-A and CTRL-X commands for adding to and subtracting from a number respectively; see |CTRL-A| for more info on these commands. - alpha if included, single alphabetical characters will be + alpha If included, single alphabetical characters will be incremented or decremented. This is useful for a list with a letter index a), b), etc. - octal if included, numbers that start with a zero will be considered + octal If included, numbers that start with a zero will be considered to be octal. Example: Using CTRL-A on "007" results in "010". - hex if included, numbers starting with "0x" or "0X" will be + hex If included, numbers starting with "0x" or "0X" will be considered to be hexadecimal. Example: Using CTRL-X on "0x100" results in "0x0ff". Numbers which simply begin with a digit in the range 1-9 are always @@ -6050,6 +6050,7 @@ A jump table for the options with a short description can be found at |Q_op|. a S Argument list status as in default title. ({current} of {max}) Empty if the argument file count is zero or one. { NF Evaluate expression between '{' and '}' and substitute result. + Note that there is no '%' before the closing '}'. ( - Start of item group. Can be used for setting the width and alignment of a section. Must be followed by %) somewhere. ) - End of item group. No width fields allowed. diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt index 2cc592189..191a8d587 100644 --- a/runtime/doc/pattern.txt +++ b/runtime/doc/pattern.txt @@ -1,4 +1,4 @@ -*pattern.txt* For Vim version 7.0aa. Last change: 2005 Sep 12 +*pattern.txt* For Vim version 7.0aa. Last change: 2006 Jan 05 VIM REFERENCE MANUAL by Bram Moolenaar @@ -256,9 +256,13 @@ switched off by setting the 's' flag in the 'shortmess' option. The highlight method 'w' is used for this message (default: standout). *search-range* -You cannot limit the search command "/" to a certain range of lines. A trick -to do this anyway is to use the ":substitute" command with the 'c' flag. -Example: > +You can limit the search command "/" to a certain range of lines by including +\%>l items. For example, to match the word "limit" below line 199 and above +line 300: > + /\%>199l\%<300llimit +Also see |/\%>l|. + +Another way is to use the ":substitute" command with the 'c' flag. Example: > :.,300s/Pattern//gc This command will search from the cursor position until line 300 for "Pattern". At the match, you will be asked to type a character. Type 'q' to @@ -800,8 +804,8 @@ $ At end of pattern or in front of "\|" or "\)" ("|" or ")" after "\v"): */\%l* */\%>l* */\%<l* \%23l Matches in a specific line. -\%<23l Matches above a specific line. -\%>23l Matches below a specific line. +\%<23l Matches above a specific line (lower line number). +\%>23l Matches below a specific line (higher line number). These three can be used to match specific lines in a buffer. The "23" can be any line number. The first line is 1. {not in Vi} WARNING: When inserting or deleting lines Vim does not automatically diff --git a/runtime/doc/quickfix.txt b/runtime/doc/quickfix.txt index 0049a5464..09eccb52e 100644 --- a/runtime/doc/quickfix.txt +++ b/runtime/doc/quickfix.txt @@ -1,4 +1,4 @@ -*quickfix.txt* For Vim version 7.0aa. Last change: 2005 Sep 27 +*quickfix.txt* For Vim version 7.0aa. Last change: 2006 Jan 11 VIM REFERENCE MANUAL by Bram Moolenaar @@ -110,8 +110,8 @@ The following quickfix commands can be used: Read the error file. Just like ":cfile" but don't jump to the first error. - *:cad* *:caddfile* -:cad[dfile] [errorfile] Read the error file and add the errors from the + *:caddf* *:caddfile* +:caddf[ile] [errorfile] Read the error file and add the errors from the errorfile to the current quickfix list. If a quickfix list is not present, then a new list is created. @@ -124,18 +124,28 @@ The following quickfix commands can be used: Otherwise all lines in the buffer are used. *:cex* *:cexpr* -:cex[pr][!] {expr} Create a quickfix list using the result of {expr}. - If {expr} is a String, then each new-line terminated - line in the String is processed using 'errorformat' - and the result is added to the quickfix list. - If {expr} is a List, then each String item in the list - is processed and added to the quickfix list. - Non String items in the List are ignored. See |:cc| +:cex[pr][!] {expr} Create a quickfix list using the result of {expr} and + jump to the first error. If {expr} is a String, then + each new-line terminated line in the String is + processed using 'errorformat' and the result is added + to the quickfix list. If {expr} is a List, then each + String item in the list is processed and added to the + quickfix list. Non String items in the List are + ignored. See |:cc| for [!]. Examples: > :cexpr system('grep -n xyz *') :cexpr getline(1, '$') < + *:cad* *:caddexpr* +:cad[dexpr][!] {expr} Evaluate {expr} and add the resulting lines to the + current quickfix list. If a quickfix list is not + present, then a new list is created. The current + cursor position will not be changed. See |:cexpr| for + more information. + Example: > + :g/mypattern/caddexpr expand("%") . ":" . line(".") . ":" . getline(".") +< *:cl* *:clist* :cl[ist] [from] [, [to]] List all errors that are valid |quickfix-valid|. diff --git a/runtime/doc/quickref.txt b/runtime/doc/quickref.txt index 687e5cff5..251fcc2b5 100644 --- a/runtime/doc/quickref.txt +++ b/runtime/doc/quickref.txt @@ -1,4 +1,4 @@ -*quickref.txt* For Vim version 7.0aa. Last change: 2005 Dec 12 +*quickref.txt* For Vim version 7.0aa. Last change: 2006 Jan 11 VIM REFERENCE MANUAL by Bram Moolenaar @@ -938,7 +938,9 @@ Short explanation of each option: *option-list* |:clist| :cl list all errors |:cfile| :cf read errors from the file 'errorfile' |:cgetfile| :cg like :cfile but don't jump to the first error -|:caddfile| :cad add errors from the error file to the current +|:caddfile| :caddf add errors from the error file to the current + quickfix list +|:caddexpr| :cad add errors from an expression to the current quickfix list |:cbuffer| :cb read errors from text in a buffer |:cexpr| :cex read errors from an expression diff --git a/runtime/doc/spell.txt b/runtime/doc/spell.txt index 101e9b518..a2989d783 100644 --- a/runtime/doc/spell.txt +++ b/runtime/doc/spell.txt @@ -1,4 +1,4 @@ -*spell.txt* For Vim version 7.0aa. Last change: 2005 Dec 29 +*spell.txt* For Vim version 7.0aa. Last change: 2006 Jan 11 VIM REFERENCE MANUAL by Bram Moolenaar @@ -172,6 +172,12 @@ When there is a line break right after a sentence the highlighting of the next line may be postponed. Use |CTRL-L| when needed. Also see |set-spc-auto| for how it can be set automatically when 'spelllang' is set. +Vim counts the number of times a good word is encountered. This is used to +sort the suggestions: words that have been seen before get a small bonus, +words that have been seen often get a bigger bonus. The COMMON item in the +affix file can be used to define common words, so that this mechanism also +works in a new or short file |spell-COMMON|. + ============================================================================== 2. Remarks on spell checking *spell-remarks* @@ -296,6 +302,11 @@ A spell file might not be available in the current 'encoding'. See |spell-mkspell| about how to create a spell file. Converting a spell file with "iconv" will NOT work! + *spell-sug-file* +If there is a file with exactly the same name as the ".spl" file but ending in +".sug", that file will be used for giving better suggestions. It isn't loaded +before suggestions are made to reduce memory use. + *E758* *E759* When loading a spell file Vim checks that it is properly formatted. If you get an error the file may be truncated, modified or intended for another Vim @@ -531,6 +542,11 @@ used spelling files, use this command: Note: For some languages the result may be enormous, causing Vim to run out of memory. +:spelld[ump]! Like ":spelldump" and include the word count. This is + the number of times the word was found while + updating the screen. Words that are in COMMON items + get a starting count of 10. + The format of the word list is used |spell-wordlist-format|. You should be able to read it with ":mkspell" to generate one .spl file that includes all the words. @@ -569,13 +585,16 @@ Additionally the following items are recognized: - Empty and blank lines are ignored. + # comment ~ - Lines starting with a # are ignored (comment lines). + /encoding=utf-8 ~ - A line starting with "/encoding=", before any word, specifies the encoding of the file. After the second '=' comes an encoding name. This tells Vim to setup conversion from the specified encoding to 'encoding'. Thus you can use one word list for several target encodings. + /regions=usca ~ - A line starting with "/regions=" specifies the region names that are supported. Each region name must be two ASCII letters. The first one is region 1. Thus "/regions=usca" has region 1 "us" and region 2 "ca". @@ -583,7 +602,8 @@ Additionally the following items are recognized: list! - Other lines starting with '/' are reserved for future use. The ones that - are not recognized are ignored (but you do get a warning message). + are not recognized are ignored. You do get a warning message, so that you + know something won't work. - A "/" may follow the word with the following items: = Case must match exactly. @@ -608,17 +628,18 @@ accepted. This is different from a word with mixed case that is automatically marked as keep-case, those words may appear in all upper-case letters. -FORMAT WITH AFFIX COMPRESSION +FORMAT WITH .AFF and .DIC FILES -There are two files: the basic word list and an affix file. The affixes are +There are two files: the basic word list and an affix file. The affix file +specifies settings for the language and can contain affixes. The affixes are used to modify the basic words to get the full word list. This significantly reduces the number of words, especially for a language like Polish. This is called affix compression. -The basic word list and the affix file are combined and turned into a binary -spell file. All the preprocessing has been done, thus this file loads fast. -The binary spell file format is described in the source code (src/spell.c). -But only developers need to know about it. +The basic word list and the affix file are combined with the ":mkspell" +command and results in a binary spell file. All the preprocessing has been +done, thus this file loads fast. The binary spell file format is described in +the source code (src/spell.c). But only developers need to know about it. The preprocessing also allows us to take the Myspell language files and modify them before the Vim word list is made. The tools for this can be found in the @@ -630,39 +651,47 @@ here: http://lingucomponent.openoffice.org/affix.readme ~ Note that affixes are case sensitive, this isn't obvious from the description. -Vim does not use the TRY item, it is ignored. For making suggestions the -possible characters in the words are used. - Vim supports quite a few extras. They are described below |spell-affix-vim|. Attempts have been made to keep this compatible with other spell checkers, so -that the same files can be used. +that the same files can often be used. One other project that offers more +than Myspell is Hunspell ( http://hunspell.sf.net ). WORD LIST FORMAT *spell-dic-format* -A very short example, with line numbers: - - 1 1234 - 2 aan - 3 Als - 4 Etten-Leur - 5 et al. - 6 's-Gravenhage - 7 's-Gravenhaags - 8 bedel/P - 9 kado/1 - 10 cadeau/2 - 11 TCP,IP +A short example, with line numbers: + + 1 1234 ~ + 2 aan ~ + 3 Als ~ + 4 Etten-Leur ~ + 5 et al. ~ + 6 's-Gravenhage ~ + 7 's-Gravenhaags ~ + 8 # word that differs between regions ~ + 9 kado/1 ~ + 10 cadeau/2 ~ + 11 TCP,IP ~ + 12 /the S affix may add a 's' ~ + 13 bedel/S ~ The first line contains the number of words. Vim ignores it, but you do get an error message if it's not there. *E760* -What follows is one word per line. There should be no white space before or -after the word. After the word there is an optional slash and flags. Most of -these flags are letters that indicate the affixes that can be used with this -word. These are specified with SFX and PFX lines in the .aff file. See the -Myspell documentation. Vim allows using other flag types with the FLAG item -in the affix file |spell-FLAG|. +What follows is one word per line. White space at the end of the line is +ignored, all other white space matters. The encoding is specified in the +affix file |spell-SET|. + +Comment lines start with '#' or '/'. See the example lines 8 and 12. Note +that putting a comment after a word is NOT allowed: + + someword # comment that causes an error! ~ + +After the word there is an optional slash and flags. Most of these flags are +letters that indicate the affixes that can be used with this word. These are +specified with SFX and PFX lines in the .aff file, see |spell-SFX| and +|spell-PFX|. Vim allows using other flag types with the FLAG item in the +affix file |spell-FLAG|. When the word only has lower-case letters it will also match with the word starting with an upper-case letter. @@ -672,7 +701,7 @@ is required at this position. The same word with a lower-case letter at this position will not match. When some of the other letters are upper-case it will not match either. -The word with all upper-case characters will always be OK. +The word with all upper-case characters will always be OK, word list matches does not match ~ als als Als ALS ALs AlS aLs aLS @@ -683,46 +712,57 @@ The word with all upper-case characters will always be OK. The KEEPCASE affix ID can be used to specifically match a word with identical case only, see below |spell-KEEPCASE|. -Note in line 5 to 7 that non-word characters are used. You can include -any character in a word. When checking the text a word still only matches -when it appears with a non-word character before and after it. For Myspell a -word starting with a non-word character probably won't work. +Note: in line 5 to 7 non-word characters are used. You can include any +character in a word. When checking the text a word still only matches when it +appears with a non-word character before and after it. For Myspell a word +starting with a non-word character probably won't work. In line 12 the word "TCP/IP" is defined. Since the slash has a special meaning the comma is used instead. This is defined with the SLASH item in the -affix file, see |spell-SLASH|. Note that without this SLASH item the -word will be "TCP,IP". +affix file, see |spell-SLASH|. Note that without this SLASH item the word +will be "TCP,IP". - *spell-affix-vim* -A flag that Vim adds and is not in Myspell is the flag defined with KEEPCASE -in the affix file. This has the meaning that case matters. This can be used -if the word does not have the first letter in upper case at the start of a -sentence. Example (assuming that = was used for KEEPCASE): - word list matches does not match ~ - 's morgens/= 's morgens 'S morgens 's Morgens 'S MORGENS - 's Morgens 's Morgens 'S MORGENS 'S morgens 's morgens +AFFIX FILE FORMAT *spell-aff-format* *spell-affix-vim* -The flag can also be used to avoid that the word matches when it is in all -upper-case letters. + *spell-affix-comment* +Comment lines in the .aff file start with a '#': + + # comment line ~ +With some items it's also possible to put a comment after it, but this isn't +supported in general. + + +ENCODING *spell-SET* + +The affix file can be in any encoding that is supported by "iconv". However, +in some cases the current locale should also be set properly at the time +|:mkspell| is invoked. Adding FOL/LOW/UPP lines removes this requirement +|spell-FOL|. + +The encoding should be specified before anything where the encoding matters. +The encoding applies both to the affix file and the dictionary file. It is +done with a SET line: + + SET utf-8 ~ + +The encoding can be different from the value of the 'encoding' option at the +time ":mkspell" is used. Vim will then convert everything to 'encoding' and +generate a spell file for 'encoding'. If some of the used characters to not +fit in 'encoding' you will get an error message. *spell-affix-mbyte* -The basic word list is normally in an 8-bit encoding, which is mentioned in -the affix file. The affix file must always be in the same encoding as the -word list. This is compatible with Myspell. For Vim the encoding may also be -something else, any encoding that "iconv" supports. The "SET" line must -specify the name of the encoding. When using a multi-byte encoding it's -possible to use more different affixes (but Myspell doesn't support that, thus -you may not want to use it anyway). +When using a multi-byte encoding it's possible to use more different affix +flags. But Myspell doesn't support that, thus you may not want to use it +anyway. For compatibility use an 8-bit encoding. CHARACTER TABLES *spell-affix-chars* When using an 8-bit encoding the affix file should define what characters are -word characters (as specified with ENC). This is because the system where -":mkspell" is used may not support a locale with this encoding and isalpha() -won't work. For example when using "cp1250" on Unix. - +word characters. This is because the system where ":mkspell" is used may not +support a locale with this encoding and isalpha() won't work. For example +when using "cp1250" on Unix. *E761* *E762* *spell-FOL* *spell-LOW* *spell-UPP* Three lines in the affix file are needed. Simplistic example: @@ -774,7 +814,7 @@ the word. This is needed to detect a spelling error such as they'are. That should be they're, but since "they" and "are" are words themselves that would go unnoticed. -These characters are defined with MIDWORD in the .aff file: +These characters are defined with MIDWORD in the .aff file. Example: MIDWORD '- ~ @@ -808,9 +848,58 @@ The usual PFX (prefix) and SFX (suffix) lines are supported (see the Myspell documentation or the Aspell manual: http://aspell.net/man-html/Affix-Compression.html). -Note that Myspell ignores any extra text after the relevant info. Vim -requires this text to start with a "#" so that mistakes don't go unnoticed. -Example: +Summary: + SFX L Y 2 ~ + SFX L 0 re [^x] ~ + SFX L 0 ro x ~ + +The first line is a header and has four fields: + SFX {flag} {combine} {count} + +{flag} The name used for the suffix. Mostly it's a single letter, + but other characters can be used, see |spell-FLAG|. + +{combine} Can be 'Y' or 'N'. When 'Y' then the word plus suffix can + also have a prefix. When 'N' then a prefix is not allowed. + +{count} The number of lines following. If this is wrong you will get + an error message. + +For PFX the fields are exactly the same. + +The basic format for the following lines is: + SFX {flag} {strip} {add} {condition} + +{flag} Must be the same as the {flag} used in the first line. + +{strip} Characters removed from the basic word. There is no check if + the characters are actually there, only the length is used (in + bytes). This better match the {condition}, otherwise strange + things may happen. If the {strip} length is equal to or + longer than the basic word the suffix won't be used. + When {strip} is 0 (zero) then nothing is stripped. + +{add} Characters added to the basic word, after removing {strip}. + +{condition} A simplistic pattern. Only when this matches with a basic + word will the suffix be used for that word. This is normally + for using one suffix letter with different {add} and {strip} + fields for words with different endings. + When {condition} is a . (dot) there is no condition. + The pattern may contain: + - Literal characters. + - A set of characters in []. [abc] matches a, b and c. + A dash is allowed for a range [a-c], but this is + Vim-specific. + - A set of characters that starts with a ^, meaning the + complement of the specified characters. [^abc] matches any + character but a, b and c. + +For PFX the fields are the same, but the {strip}, {add} and {condition} apply +to the start of the word. + +Note: Myspell ignores any extra text after the relevant info. Vim requires +this text to start with a "#" so that mistakes don't go unnoticed. Example: SFX F 0 in [^i]n # Spion > Spionin ~ SFX F 0 nen in # Bauerin > Bauerinnen ~ @@ -826,16 +915,49 @@ Myspell that use this feature apparently have this flag. Example: SFX a 0 en . ~ SFX a 0 on . ~ + +AFFIX FLAGS *spell-affix-flags* + +This is a feature that comes from Hunspell: The affix may specify flags. This +works similar to flags specified on a basic word. The flags apply to the +basic word plus the affix. Example: + + SFX S Y 1 ~ + SFX S 0 s . ~ + + SFX A Y 1 ~ + SFX A 0 able/S . ~ + +When the dictionary file contains "drink/AS" then these words are possible: + + drink + drinks uses S suffix + drinkable uses A suffix + drinkables uses A suffix and then S suffix + +Generally the flags of the suffix are added to the flags of the basic word, +both are used for the word plus suffix. But the flags of the basic word are +only used once for affixes, except that both one prefix and one suffix can be +used when both support combining. + +Specifically, the affix flags can be used for: +- Affixes on affixes, as in the example above. +- Making the word with the affix rare, by using the |spell-RARE| flag. +- Exclude the word with the affix from compounding, by using the + |spell-COMPOUNDFORBIDFLAG| flag. + +-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +OLD STUFF *spell-affix-rare* An extra item for Vim is the "rare" flag. It must come after the other fields, before a comment. When used then all words that use the affix will be -marked as rare words. Example: +marked as rare words. Examples: PFX F 0 nene . rare ~ SFX F 0 oin n rare # hardly ever used ~ -However, if the word also appears as a good word in another way it won't be -marked as rare. +However, if the word also appears as a good word in another way (e.g., in +another region) it won't be marked as rare. *spell-affix-nocomp* Another extra item for Vim is the "nocomp" flag. It must come after the other @@ -852,6 +974,7 @@ Example: util/ac ~ This allows for "wordutil" and "wordutils" but not "wordutilize". +-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- *spell-PFXPOSTPONE* When an affix file has very many prefixes that apply to many words it's not @@ -891,7 +1014,16 @@ for keep-case words. Example: KEEPCASE = ~ -See above for an example |spell-affix-vim|. +This flag is not supported by Myspell. It has the meaning that case matters. +This can be used if the word does not have the first letter in upper case at +the start of a sentence. Example: + + word list matches does not match ~ + 's morgens/= 's morgens 'S morgens 's Morgens 'S MORGENS + 's Morgens 's Morgens 'S MORGENS 'S morgens 's morgens + +The flag can also be used to avoid that the word matches when it is in all +upper-case letters. RARE WORDS *spell-RARE* @@ -922,18 +1054,15 @@ This can be used to exclude words that would otherwise be good. For example Once a word has been marked as bad it won't be undone by encountering the same word as good. +The flag also applies to the word with affixes, thus this can be used to mark +a whole bunch of related words as bad. + *spell-NEEDAFFIX* The NEEDAFFIX flag is used to require that a word is used with an affix. The -word itself is not a good word. Example: +word itself is not a good word (unless there is an empty affix). Example: NEEDAFFIX + ~ - *spell-NEEDCOMPOUND* -The NEEDCOMPOUND flag is used to require that a word is used as part of a -compound word The word itself is not a good word. Example: - - NEEDCOMPOUND & ~ - COMPOUND WORDS *spell-compound* @@ -944,8 +1073,8 @@ call this character a flag here. Obviously these flags must be different from any affix IDs used. *spell-COMPOUNDFLAG* -The Myspell compatible method uses one flag, specified with COMPOUNDFLAG. -All words with this flag combine in any order. This means there is no control +The Myspell compatible method uses one flag, specified with COMPOUNDFLAG. All +words with this flag combine in any order. This means there is no control over which word comes first. Example: COMPOUNDFLAG c ~ @@ -1006,6 +1135,12 @@ A specific example: Allow a compound to be made of two words and a dash: This allows for the word "start-end", but not "startend". + *spell-NEEDCOMPOUND* +The NEEDCOMPOUND flag is used to require that a word is used as part of a +compound word. The word itself is not a good word. Example: + + NEEDCOMPOUND & ~ + *spell-COMPOUNDMIN* The minimal character length of a word used for compounding is specified with COMPOUNDMIN. Example: @@ -1037,6 +1172,17 @@ If both COMPOUNDMAX and COMPOUNDSYLMAX are defined, a compound word is accepted if it fits one of the criteria, thus is either made from up to COMPOUNDMAX words or contains up to COMPOUNDSYLMAX syllables. + *spell-COMPOUNDFORBIDFLAG* +The COMPOUNDFORBIDFLAG specifies a flag that can be used on an affix. It +means that the word plus affix cannot be used in a compound word. +NOT IMPLEMENTED YET. + + *spell-COMPOUNDPERMITFLAG* +The COMPOUNDPERMITFLAG specifies a flag that can be used on an affix. It +means that the word plus affix can also be used in a compound word in a way +where the affix ends up halfway the word. +NOT IMPLEMENTED YET. + *spell-SYLLABLE* The SYLLABLE item defines characters or character sequences that are used to count the number of syllables in a word. Example: @@ -1105,6 +1251,30 @@ lists that support this. >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + *spell-COMMON* +Common words can be specified with the COMMON item. This will give better +suggestions when editing a short file. Example: + + COMMON the of to and a in is it you that he was for on are ~ + +The words must be separated by white space, up to 25 per line. +When multiple regions are specified in a ":mkspell" command the common words +for all regions are combined and used for all regions. + + *spell-NOSPLITSUGS* +This item indicates that suggestions for splitting a word will not appear: + + NOSPLITSUGS ~ + + *spell-NOSUGGEST* +The flag specified with NOSUGGEST can be used for words that will not be +suggested. Can be used for obscene words. + + NOSUGGEST % ~ + +NOT IMPLEMENTED YET. + + REPLACEMENTS *spell-REP* In the affix file REP items can be used to define common mistakes. This is @@ -1118,7 +1288,7 @@ used to make spelling suggestions. The items define the "from" text and the REP ch k ~ The first line specifies the number of REP lines following. Vim ignores the -number, but it must be there. +number, but it must be there (for compatibility with Myspell). Don't include simple one-character replacements or swaps. Vim will try these anyway. You can include whole words if you want to, but you might want to use @@ -1146,6 +1316,17 @@ Each letter must appear in only one of the MAP items. It's a bit more efficient if the first letter is ASCII or at least one without accents. +.SUG FILE *spell-NOSUGFILE* + +When soundfolding is specified in the affix file then ":mkspell" will normally +p ~ ~roduce a .sug file next to the .spl file. This used to find suggestions by +their sound-a-like form quickly. At the cost of a lot of memory. + +To avoid producing a .sug file use this item in the affix file: + + NOSUGFILE ~ + + SOUND-A-LIKE *spell-SAL* In the affix file SAL items can be used to define the sounds-a-like mechanism @@ -1197,4 +1378,105 @@ You can use the |soundfold()| function to try out the results. Or set the 'verbose' option to see the score in the output of the |z=| command. +UNSUPPORTED ITEMS *spell-affix-not-supported* + +These items appear in the affix file of other spell checkers. In Vim they are +ignored, not supported or defined in another way. + +ACCENT (Hunspell) *spell-ACCENT* + Use MAP instead. |spell-MAP| + +CHECKCOMPOUNDCASE (Hunspell) *spell-CHECKCOMPOUNDCASE* + Disallow uppercase letters at compound word boundaries. + Not supported. + +CHECKCOMPOUNDDUP (Hunspell) *spell-CHECKCOMPOUNDDUP* + Disallow using the same word twice in a compound. Not + supported. + +CHECKCOMPOUNDREP (Hunspell) *spell-CHECKCOMPOUNDREP* + Something about using REP items and compound words. Not + supported. + +CHECKCOMPOUNDTRIPLE (Hunspell) *spell-CHECKCOMPOUNDTRIPLE* + Forbid three identical characters when compounding. Not + supported. + +CHECKCOMPOUNDPATTERN (Hunspell) *spell-CHECKCOMPOUNDPATTERN* + Forbid compounding when patterns match. Not supported. + +CIRCUMFIX (Hunspell) *spell-CIRCUMFIX* + This means a prefix and suffix must be added at the same time. + Instead only specify the suffix, and give the that suffix two + flags: The required prefix and the NEEDAFFIX flag. + |spell-NEEDAFFIX| + +COMPLEXPREFIXES (Hunspell) *spell-COMPLEXPREFIXES* + Enables using two prefixes. Not supported. + +COMPOUNDBEGIN (Hunspell) *spell-COMPOUNDBEGIN* + Use COMPOUNDFLAGS instead. |spell-COMPOUNDFLAGS| + +COMPOUNDEND (Hunspell) *spell-COMPOUNDEND* + Use COMPOUNDFLAGS instead. |spell-COMPOUNDFLAGS| + +COMPOUNDMIDDLE (Hunspell) *spell-COMPOUNDMIDDLE* + Use COMPOUNDFLAGS instead. |spell-COMPOUNDFLAGS| + +COMPOUNDROOT (Hunspell) *spell-COMPOUNDROOT* + Flag for words in the dictionary that are already a compound. + Vim doesn't use it. + +COMPOUNDSYLLABLE (Hunspell) *spell-COMPOUNDSYLLABLE* + Use SYLLABLE and COMPOUNDSYLMAX instead. |spell-SYLLABLE| + |spell-COMPOUNDSYLMAX| + +COMPOUNDWORDMAX (Hunspell) *spell-COMPOUNDWORDMAX* + Use COMPOUNDMAX instead. |spell-COMPOUNDMAX| + +FORBIDDENWORD (Hunspell) *spell-FORBIDDENWORD* + Use BAD instead. |spell-BAD| + +HOME (Hunspell) *spell-HOME* + Specifies the website for the language. Not supported. + +LANG (Hunspell) *spell-LANG* + This specifies language-specific behavior. This actually + moves part of the language knowledge into the program, + therefore Vim does not support it. Each language property + must be specified separately. + +LEMMA_PRESENT (Hunspell) *spell-LEMMA_PRESENT* + Only needed for mprphological analysis. + +MAXNGRAMSUGS (Hunspell) *spell-MAXNGRAMSUGS* + Not supported. + +NAME (Hunspell) *spell-NAME* + Specifies the name of the language. Not supported. + +ONLYINCOMPOUND (Hunspell) *spell-ONLYINCOMPOUND* + Use NEEDCOMPOUND instead. |spell-NEEDCOMPOUND| + +PSEUDOROOT (Hunspell) *spell-PSEUDOROOT* + Use NEEDAFFIX instead. |spell-NEEDAFFIX| + +SUGSWITHDOTS (Hunspell) *spell-SUGSWITHDOTS* + Adds dots to suggestions. Vim doesn't need this. + +SYLLABLENUM (Hunspell) *spell-SYLLABLENUM* + Not supported. + +TRY (Myspell, Hunspell, others) *spell-TRY* + Vim does not use the TRY item, it is ignored. For making + suggestions the actual characters in the words are used. + +VERSION (Hunspell) *spell-VERSION* + Specifies the version for the language. Not supported. + +WORDCHARS (Hunspell) *spell-WORDCHARS* + Used to recognize words. Vim doesn't need it, because there + is no need to separate words before checking them (using a + trie instead of a hashtable). + vim:tw=78:sw=4:ts=8:ft=help:norl: diff --git a/runtime/doc/syntax.txt b/runtime/doc/syntax.txt index 02a4ea016..2ded3f2eb 100644 --- a/runtime/doc/syntax.txt +++ b/runtime/doc/syntax.txt @@ -1,4 +1,4 @@ -*syntax.txt* For Vim version 7.0aa. Last change: 2005 Nov 30 +*syntax.txt* For Vim version 7.0aa. Last change: 2005 Dec 31 VIM REFERENCE MANUAL by Bram Moolenaar @@ -3245,7 +3245,7 @@ A more complicated Example: > < abcfoostringbarabc mmmmmmmmmmm match - ssrrrreee highlight start/region/end ("Foo", "Exa" and "Bar") + sssrrreee highlight start/region/end ("Foo", "Exa" and "Bar") Leading context *:syn-lc* *:syn-leading* *:syn-context* diff --git a/runtime/doc/tags b/runtime/doc/tags index c2ec052d6..187a06e1b 100644 --- a/runtime/doc/tags +++ b/runtime/doc/tags @@ -1782,7 +1782,9 @@ $VIMRUNTIME starting.txt /*$VIMRUNTIME* :cabbrev map.txt /*:cabbrev* :cabc map.txt /*:cabc* :cabclear map.txt /*:cabclear* -:cad quickfix.txt /*:cad* +:cadde quickfix.txt /*:cadde* +:caddexpr quickfix.txt /*:caddexpr* +:caddf quickfix.txt /*:caddf* :caddfile quickfix.txt /*:caddfile* :cal eval.txt /*:cal* :call eval.txt /*:call* diff --git a/runtime/doc/todo.txt b/runtime/doc/todo.txt index 745e4b789..542e6f133 100644 --- a/runtime/doc/todo.txt +++ b/runtime/doc/todo.txt @@ -1,4 +1,4 @@ -*todo.txt* For Vim version 7.0aa. Last change: 2005 Dec 29 +*todo.txt* For Vim version 7.0aa. Last change: 2006 Jan 12 VIM REFERENCE MANUAL by Bram Moolenaar @@ -30,25 +30,81 @@ be worked on, but only if you sponsor Vim development. See |sponsor|. *known-bugs* -------------------- Known bugs and current work ----------------------- +Find E999 and hand out numbers. + +Compress list of word numbers: sort them, computer differences, store as utf-8 +bytes. + +Undo bug: Gerald Lai Jan 3. + +Syntax HL: when region start has an offset that happens to be after the end of +the line then strange things happen. (Brett Stahlman Dec 31) + +Add Python complete script (Aaron Griffin) + +Evaluating CTRL-R = in the sandbox causes trouble (G. Sumner Hayes). Can the +rules for the commandline window be used? + +Evaluate 'balloonexpr' in the sandbox only when it was set from an unsafe +place (e.g., modeline)? Patch from Sumner Hayes, Jan 12. Also use for other +options? + +":saveas asdf.c" should set 'filetype' to c when it's empty. Also for ":w +asdf.c" when it sets the buffer filename. + ccomplete: +- When using page-up/page-down in menu it sometimes jumps more than a page. - When an option is set: In completion mode and the user types (identifier) characters, advance to the first match instead of removing the popup menu. If there is no match remove the selection. (Yegappan Lakshmanan) - Complete the longest common match instead of the first match? For all kinds of completions? Configurable? -- Window resize when poup is displayed +- Window resize when poup is displayed. - When completing something that is a structure, add the "." or "->" right away. How to figure out if it's a pointer or not? - When a typedef or struct is local to a file only use it in that file? +- Extra info for each entry to show in a tooltip kind of thing. +- Special mappings for when the popup menu is visible? Would allow for making + a specific selection (e.g, methods vs variables). +- Provide a function to popup the menu, so that an insert mode mapping can + start it (with a specific selection). - !_TAG_FILE_FORMAT and it's ilk are listed in the global completions Can't reproduce it right now... spelling: -- Hunspell has NOSUGGEST flag (use for obscene words?) -- Check out Hunspell 1.1.2. +- NL woordenlijst naar Adri sturen. +- Include script to cleanup a .add file. (Antonio Colombo, Jan 9) +- suggestions for "macARONI" doesn't include "macaroni", they are all allcap. + suggestion for "KG" to "kg" when it's keepcase. +- Autocommand event for when a spell file is missing. Allows making a plugin + that fetches the file over internet. Pattern == language. +- Using KEEPCASE flag still allows all-upper word, docs say it doesn't. + Don't allow it, because there is no other way to do this. +- Implement NOSUGGEST flag (used for obscene words). +- Implement NOSPLITSUGS. +- Rename COMPOUNDFLAGS to COMPOUNDPATTERN or COMPOUNDRULE? + Hunspell now uses COMPOUND with a count. +- Check out Hunspell 1.1.3. + what does MAXNGRAMSUGS do? + See announcement (Nemeth, 5 jan) + use "\/" instead of SLASH item? + is COMPLEXPREFIXES necessary now that we have flags for affixes? - Look into hungarian dictionary: - http://magyarispell.sourceforge.net/rc3-beta2.zip -- Support breakpoint character · 0xb7 and ignore it? + http://magyarispell.sourceforge.net/hu_HU-1.0.tar.gz +- Support flags on a suffix. Used for second level affixes, rare and + nocomp. The flags may also be used for compounding. Default is an OR + mechanism with the flags of the word. Adding "compset" on the affixes + means the compound flags of the word are not used. + Instead of "SFX a 0 add/FLAGS ." we could use "SFX a 0 add . /FLAGS" (or + support both). +- When compounding Hunspell doesn't allow affixes inside the compound word, + only before and after it. COMPOUNDPERMITFLAG can be used to allow it. + Check Myspell and Aspell if they also work this way. + Thus a word + suffix needs a flag that it can't be used with a following + compound, and word + prefix can't be after another word in a compound. +- Implement COMPOUNDFORBIDFLAG. +- Support breakpoint character · 0xb7 and ignore it? Makes it possible to use + same wordlist for hyphenation. 8 Alternate Dutch word list at www.nederlandsewoorden.nl (use script to obtain). But new Myspell wordlist will come (Hagen) - Finding suggestions with sound folding is slow. Somehow store the @@ -56,6 +112,9 @@ spelling: - Also use the spelling dictionary for dictionary completion. - Have "zg" and "zw" report the file that was modified. (Marvin Renich) - Add a command like "zg" that selects one of the files 'spellfile'. +- Add a "zug" command that undoes "zg"? Deletes the good word instead of + adding a bad word like "zw" would. Use "zuw" to undo "zw"? (Antonio + Colombo) GTK: get an X error while exiting quickly after starting (running the tests). Caused by new GTK library? @@ -66,6 +125,18 @@ Support saving and restoring session for X windows? It should work to do gui_x11_wm_protocol_handler() already takes care of the rest. global_event_filter() for GTK. +Is it easy to have an item in a pattern that matches with a mark location? +Similar to |/\%>l| and |/\%c|. (Benji Fisher) + +Patch to support lists and dicts for the Python interface. (G. Sumner Hayes, +Jan 12). Docs in a previous patch. +Use free_tv() instead of clear_tv() and vim_free(). + +Win32 installer: Default _vimrc contains absolute path to diff.exe. After +upgrading it becomes invalid. Fix it automatically somehow? Use $VIMRUNTIME +in the path instead of filling it the path? At least give a clear error +message. + In diff mode deleting lines is very slow. E.g., when diffing two .po files and then sourcing po/cleaup.vim. @@ -278,6 +349,7 @@ PLANNED FOR VERSION 7.0: 8 Support four composing/combining characters, needed for Hebrew. (Ron Aaron) Add the 'maxcombining' option to set the nr. of composing characters. At the same time support more colors (use two bytes when necessary). +8 "ga" should show all composing characters, also if there are more than 2. 8 Searching for a composing character by itself should work. Perhaps "." with a composing char should work too. - Add a few more things to 'diffopt': "horizontal", "vertical", @@ -368,6 +440,7 @@ Add gui_mch_browsedir() for Motif, Mac OS/X. Add extra list of file locations. A bit like the quickfix list, but there is one per window. Can be used with: :ltag list of matching tags, like :tselect +Patch from Yegappan Lakshmanan, Jan 9. Commands to use the location list: :lnext next location :lprevious :lNext previous location @@ -404,6 +477,11 @@ Add more tests for all new functionality in Vim 7. Especially new functions. Updated Ruby interface. (Ryan Paul) +'errorformat' docs are a bit unclear. Suggestions by Charles Campbell (2006 +Jan 6) +Add a flag to check for a match with the next item first? Helps for +continuation lines that may contain just about anything. + Awaiting updated patches: --- awaiting updated patch --- 8 Add ":n" to fnamemodify(): normalize path, remove "../" when possible. @@ -1395,12 +1473,8 @@ Spell checking: - Compound word is accepted if nr of words is <= COMPOUNDMAX OR nr of syllables <= COMPOUNDSYLMAX. Specify using AND in the affix file? - COMPOUNDMAX -> COMPOUNDWORDMAX? -- Support flags on a suffix. Used for second level affixes. The flags may - also be used for compounding. Default is an OR mechanism with the flags - of the word. Adding "compset" on the affixes means the compound flags of - the word are not used. Instead of "SFX a 0 add/FLAGS ." we could use "SFX - a 0 add . /FLAGS" (or support both). -- NEEDCOMPOUND also used for affix? Or use "needcomp" after affix? +- NEEDCOMPOUND also used for affix? Or is this called ONLYINCOMPOUND now? + Or is ONLYINCOMPOUND only for inside a compound, not at start or end? - Do we need a flag for the rule that when compounding is done the following word doesn't have a capital after a word character, even for Onecap words? - New hunspell home page: http://hunspell.sourceforge.net/ @@ -1425,8 +1499,8 @@ Spell checking: - Add flags to count extra syllables in a word. SYLLABLEADD1 SYLLABLEADD2, etc.? Or make it possible to specify the syllable count of a word directly, e.g., after another slash: /abc/3 -- MORPHO item in affix file: ignore morphological fields after word and - affix. +- MORPHO item in affix file: ignore TAB and morphological field after + word/flags and affix. - Implement multiple flags for compound words and CMP item? Await comments from other spell checking authors. - Also see tklspell: http://tkltrans.sourceforge.net/ @@ -1487,8 +1561,8 @@ Folding: - 'foldmethod' "textobject": fold on sections and paragraph text objects. - Add 'hidecomment' option: don't display comments in /* */ and after //. Or is the conceal patch from Vince Negri a more generic solution? -- "zu": undo change in manual fold. "zU" redo change in manual fold. How to - implement this? +- "zuf": undo change in manual fold. "zUf" redo change in manual fold. How + to implement this? - "zJ" command: add the line or fold below the fold in the fold under the cursor. - 'foldmethod' "syntax": "fold=3": set fold level for a region. @@ -1525,7 +1599,6 @@ Multi-byte characters: 8 Should add test for using various commands with multi-byte characters. 8 'infercase' doesn't work with multi-byte characters. 8 toupper() function doesn't handle byte count changes. -8 "ga" should show all composing characters, also if there are more than 2. 7 When searching, should order of composing characters be ignored? 8 Should implement 'delcombine' for command line editing. 8 Detect overlong UTF-8 sequences and handle them like illegal bytes. @@ -2814,8 +2887,9 @@ Incsearch: Searching: -7 Add "g/" and "gb" to search for a pattern in the Visually selected text? +8 Add "g/" and "gb" to search for a pattern in the Visually selected text? "g?" is already used for rot13. + Can use "g/" in Normal mode, uses the '< to '> area. 8 Add a mechanism for recursiveness: "\@(([^()]*\@g[^()]*)\)". \@g stands for "go recursive here" and \@( \) marks the recursive part. Perl does it this way: diff --git a/runtime/doc/various.txt b/runtime/doc/various.txt index b4172ccb1..4cb9687cd 100644 --- a/runtime/doc/various.txt +++ b/runtime/doc/various.txt @@ -1,4 +1,4 @@ -*various.txt* For Vim version 7.0aa. Last change: 2005 Oct 14 +*various.txt* For Vim version 7.0aa. Last change: 2006 Jan 08 VIM REFERENCE MANUAL by Bram Moolenaar @@ -321,6 +321,7 @@ B *+multi_byte* Korean and other languages |multibyte| *+multi_byte_ime* Win32 input method for multibyte chars |multibyte-ime| N *+multi_lang* non-English language support |multi-lang| m *+mzscheme* Mzscheme interface |mzscheme| +m *+mzscheme/dyn* Mzscheme interface |mzscheme-dynamic| |/dyn| m *+netbeans_intg* |netbeans| m *+ole* Win32 GUI only: |ole-interface| *+osfiletype* Support for the 'osfiletype' option and filetype diff --git a/runtime/doc/version7.txt b/runtime/doc/version7.txt index 8518f8280..560b7ffa3 100644 --- a/runtime/doc/version7.txt +++ b/runtime/doc/version7.txt @@ -1,4 +1,4 @@ -*version7.txt* For Vim version 7.0aa. Last change: 2005 Dec 28 +*version7.txt* For Vim version 7.0aa. Last change: 2006 Jan 09 VIM REFERENCE MANUAL by Bram Moolenaar @@ -435,6 +435,9 @@ Win32: The ":winpos" command now also works in the console. (Vipin Aravind) |:cexpr| Read error messages from a Vim expression (Yegappan Lakshmanan). +|:caddexpr| Add error messages from a Vim expression to an + existing quickfix list. (Yegappan Lakshmanan). + Ex command modifiers: ~ @@ -919,6 +922,9 @@ without losing the last inserted text. The exists() function now supports checking for autocmd group definition and for supported autocommand events. (Yegappan Lakshmanan) +Allow using ":global" in the sandbox, it doesn't do anything harmful by +itself. + ============================================================================== COMPILE TIME CHANGES *compile-changes-7* @@ -1523,7 +1529,7 @@ string, because it may cause trouble in Insert mode. When evaluating an expression for CTRL-R = on the command line it was possible to open a new window, resulting in errors for incremental search, and many other nasty things were possible. Now evaluate the expression in the sandbox -to protect from unexpected behavior. +to protect from unexpected behavior. Same for CTRL-\ e. "d(" deleted the character under the cursor, while the documentation specified an exclusive motion. Vi also doesn't delete the character under the cursor. @@ -1533,4 +1539,10 @@ when it just fits in the window. In coladvance() don't stop at the window edge when filling with spaces and when in Insert mode. In mswin.vim avoid getting a beep from the "l" command. +Win32 GUI: When Alt-F4 is used to close the window and Cancel is selected in +the dialog then Vim would insert <M-F4> in the text. Now it's ignored. + +When ":silent! {cmd}" caused the swap file dialog, which isn't displayed, +there would still be a hit-enter prompt. + vim:tw=78:ts=8:ft=help:norl: diff --git a/runtime/doc/vi_diff.txt b/runtime/doc/vi_diff.txt index 76893e255..4fe7ff970 100644 --- a/runtime/doc/vi_diff.txt +++ b/runtime/doc/vi_diff.txt @@ -1,4 +1,4 @@ -*vi_diff.txt* For Vim version 7.0aa. Last change: 2005 Apr 01 +*vi_diff.txt* For Vim version 7.0aa. Last change: 2006 Jan 02 VIM REFERENCE MANUAL by Bram Moolenaar @@ -87,7 +87,9 @@ Length of an expanded string option Maximum display width Unix and Win32: 1024 characters, otherwise 255 characters Maximum lhs of a mapping 50 characters. -Number of highlighting different types: 223 +Number of different highlighting types: over 30000 +Range of a Number variable: -2147483648 to 2147483647 (more on 64 bit + systems) Information for undo and text in registers is kept in memory, thus when making (big) changes the amount of (virtual) memory available limits the number of diff --git a/runtime/filetype.vim b/runtime/filetype.vim index e2b1d1041..3a4d70ff9 100644 --- a/runtime/filetype.vim +++ b/runtime/filetype.vim @@ -1,7 +1,7 @@ " Vim support file to detect file types " " Maintainer: Bram Moolenaar <Bram@vim.org> -" Last Change: 2005 Nov 23 +" Last Change: 2006 Jan 12 " Listen very carefully, I will say this only once if exists("did_load_filetypes") @@ -399,7 +399,7 @@ fun! s:FTent() setf dtd endfun -" Clipper (or FoxPro) +" Clipper (or FoxPro; could also be eviews) au BufNewFile,BufRead *.prg \ if exists("g:filetype_prg") | \ exe "setf " . g:filetype_prg | @@ -606,6 +606,9 @@ au BufNewFile,BufRead *.gpi setf gnuplot " GrADS scripts au BufNewFile,BufRead *.gs setf grads +" Gretl +au BufNewFile,BufRead *.gretl setf gretl + " Groovy au BufNewFile,BufRead *.groovy setf groovy diff --git a/runtime/makemenu.vim b/runtime/makemenu.vim index 4cb0ddfb9..3728041cc 100644 --- a/runtime/makemenu.vim +++ b/runtime/makemenu.vim @@ -1,6 +1,6 @@ " Script to define the syntax menu in synmenu.vim " Maintainer: Bram Moolenaar <Bram@vim.org> -" Last Change: 2005 Dec 01 +" Last Change: 2006 Jan 12 " This is used by "make menu" in the src directory. edit <sfile>:p:h/synmenu.vim @@ -186,7 +186,9 @@ SynMenu FG.Grub:grub SynMenu FG.GNU\ Server\ Pages:gsp SynMenu FG.GNUplot:gnuplot SynMenu FG.GrADS\ scripts:grads +SynMenu FG.Gretl:gretl SynMenu FG.Groff:groff +SynMenu FG.Groovy:groovy SynMenu FG.GTKrc:gtkrc SynMenu HIJK.Haskell.Haskell:haskell diff --git a/runtime/optwin.vim b/runtime/optwin.vim index 5c53b0560..67538dfb5 100644 --- a/runtime/optwin.vim +++ b/runtime/optwin.vim @@ -1,7 +1,7 @@ " These commands create the option window. " " Maintainer: Bram Moolenaar <Bram@vim.org> -" Last Change: 2005 Oct 02 +" Last Change: 2006 Jan 13 " If there already is an option window, jump to that one. if bufwinnr("option-window") > 0 @@ -725,6 +725,8 @@ if has("digraphs") endif call append("$", "tildeop\tthe \"~\" command behaves like an operator") call <SID>BinOptionG("top", &top) +call append("$", "operatorfunc\tfunction called for the\"g@\" operator") +call <SID>OptionG("opfunc", &opfunc) call append("$", "showmatch\tWhen inserting a bracket, briefly jump to its match") call <SID>BinOptionG("sm", &sm) call append("$", "matchtime\ttenth of a second to show a match for 'showmatch'") diff --git a/runtime/spell/en.ascii.spl b/runtime/spell/en.ascii.spl Binary files differindex 10ed3b6a2..33cb8202a 100644 --- a/runtime/spell/en.ascii.spl +++ b/runtime/spell/en.ascii.spl diff --git a/runtime/spell/en.ascii.sug b/runtime/spell/en.ascii.sug Binary files differnew file mode 100644 index 000000000..a254ed27b --- /dev/null +++ b/runtime/spell/en.ascii.sug diff --git a/runtime/spell/en.latin1.spl b/runtime/spell/en.latin1.spl Binary files differindex fb522ea5a..68cade004 100644 --- a/runtime/spell/en.latin1.spl +++ b/runtime/spell/en.latin1.spl diff --git a/runtime/spell/en.latin1.sug b/runtime/spell/en.latin1.sug Binary files differnew file mode 100644 index 000000000..7aa6bb5fc --- /dev/null +++ b/runtime/spell/en.latin1.sug diff --git a/runtime/spell/en.utf-8.spl b/runtime/spell/en.utf-8.spl Binary files differindex 49ddd8ae2..b0dd947eb 100644 --- a/runtime/spell/en.utf-8.spl +++ b/runtime/spell/en.utf-8.spl diff --git a/runtime/spell/en.utf-8.sug b/runtime/spell/en.utf-8.sug Binary files differnew file mode 100644 index 000000000..62f59e531 --- /dev/null +++ b/runtime/spell/en.utf-8.sug diff --git a/runtime/syntax/eviews.vim b/runtime/syntax/eviews.vim new file mode 100644 index 000000000..911e1bedf --- /dev/null +++ b/runtime/syntax/eviews.vim @@ -0,0 +1,104 @@ +" Vim syntax file +" Language: Eviews (http://www.eviews.com) +" Maintainer: Vaidotas Zemlys <zemlys@gmail.com> +" Last Change: 2006 Jan 11 +" Filenames: *.prg +" URL: http://uosis.mif.vu.lt/~zemlys/vim-syntax/eviews.vim +" For version 5.x: Clear all syntax items +" For version 6.x: Quit when a syntax file was already loaded +if version < 600 + syntax clear +elseif exists("b:current_syntax") + finish +endif + +if version >= 600 + setlocal iskeyword=@,48-57,_,. +else + set iskeyword=@,48-57,_,. +endif + +syn case match + +" Comment +syn match eComment /\'.*/ + +" Constant +" string enclosed in double quotes +syn region eString start=/"/ skip=/\\\\\|\\"/ end=/"/ +" number with no fractional part or exponent +syn match eNumber /\d\+/ +" floating point number with integer and fractional parts and optional exponent +syn match eFloat /\d\+\.\d*\([Ee][-+]\=\d\+\)\=/ +" floating point number with no integer part and optional exponent +syn match eFloat /\.\d\+\([Ee][-+]\=\d\+\)\=/ +" floating point number with no fractional part and optional exponent +syn match eFloat /\d\+[Ee][-+]\=\d\+/ + +" Identifier +" identifier with leading letter and optional following keyword characters +syn match eIdentifier /\a\k*/ + +" Eviews Programing Language +syn keyword eProgLang @date else endif @errorcount @evpath exitloop for if @isobject next poff pon return statusline step stop @temppath then @time to @toc wend while include call subroutine endsub and or + +" Eviews Objects, Views and Procedures +syn keyword eOVP alpha coef equation graph group link logl matrix model pool rowvector sample scalar series sspace sym system table text valmap var vector + + +" Standard Eviews Commands +syn keyword eStdCmd 3sls add addassign addinit addtext align alpha append arch archtest area arlm arma arroots auto axis bar bdstest binary block boxplot boxplotby bplabel cause ccopy cd cdfplot cellipse censored cfetch checkderivs chow clabel cleartext close coef coefcov coint comment control copy cor correl correlsq count cov create cross data datelabel dates db dbcopy dbcreate dbdelete dbopen dbpack dbrebuild dbrename dbrepair decomp define delete derivs describe displayname do draw driconvert drop dtable ec edftest endog eqs equation errbar exclude exit expand fetch fill fiml fit forecast freeze freq frml garch genr gmm grads graph group hconvert hfetch hilo hist hlabel hpf impulse jbera kdensity kerfit label laglen legend line linefit link linkto load logit logl ls makecoint makederivs makeendog makefilter makegarch makegrads makegraph makegroup makelimits makemodel makeregs makeresids makesignals makestates makestats makesystem map matrix means merge metafile ml model msg name nnfit open options ordered output override pageappend pagecontract pagecopy pagecreate pagedelete pageload pagerename pagesave pageselect pagestack pagestruct pageunstack param pcomp pie pool predict print probit program qqplot qstats range read rename representations resample reset residcor residcov resids results rls rndint rndseed rowvector run sample save scalar scale scat scatmat scenario seas seasplot series set setbpelem setcell setcolwidth setconvert setelem setfillcolor setfont setformat setheight setindent setjust setline setlines setmerge settextcolor setwidth sheet show signalgraphs smooth smpl solve solveopt sort spec spike sspace statby statefinal stategraphs stateinit stats statusline stomna store structure sur svar sym system table template testadd testbtw testby testdrop testexog testfit testlags teststat text tic toc trace tramoseats tsls unlink update updatecoefs uroot usage valmap var vars vector wald wfcreate wfopen wfsave wfselect white wls workfile write wtsls x11 x12 xy xyline xypair + +" Constant Identifier +syn match eConstant /\!\k*/ +" String Identifier +syn match eStringId /%\k*/ +" Command Identifier +syn match eCommand /@\k*/ + +" Special +syn match eDelimiter /[,;:]/ + +" Error +syn region eRegion matchgroup=Delimiter start=/(/ matchgroup=Delimiter end=/)/ transparent contains=ALLBUT,rError,rBraceError,rCurlyError +syn region eRegion matchgroup=Delimiter start=/{/ matchgroup=Delimiter end=/}/ transparent contains=ALLBUT,rError,rBraceError,rParenError +syn region eRegion matchgroup=Delimiter start=/\[/ matchgroup=Delimiter end=/]/ transparent contains=ALLBUT,rError,rCurlyError,rParenError +syn match eError /[)\]}]/ +syn match eBraceError /[)}]/ contained +syn match eCurlyError /[)\]]/ contained +syn match eParenError /[\]}]/ contained + +" Define the default highlighting. +" For version 5.7 and earlier: only when not done already +" For version 5.8 and later: only when an item doesn't have highlighting yet +if version >= 508 || !exists("did_r_syn_inits") + if version < 508 + let did_r_syn_inits = 1 + command -nargs=+ HiLink hi link <args> + else + command -nargs=+ HiLink hi def link <args> + endif + HiLink eComment Comment + HiLink eConstant Identifier + HiLink eStringId Identifier + HiLink eCommand Type + HiLink eString String + HiLink eNumber Number + HiLink eBoolean Boolean + HiLink eFloat Float + HiLink eConditional Conditional + HiLink eProgLang Statement + HiLink eOVP Statement + HiLink eStdCmd Statement + HiLink eIdentifier Normal + HiLink eDelimiter Delimiter + HiLink eError Error + HiLink eBraceError Error + HiLink eCurlyError Error + HiLink eParenError Error + delcommand HiLink +endif + +let b:current_syntax="eviews" + +" vim: ts=8 sw=2 diff --git a/runtime/syntax/gretl.vim b/runtime/syntax/gretl.vim new file mode 100644 index 000000000..9999ff0f4 --- /dev/null +++ b/runtime/syntax/gretl.vim @@ -0,0 +1,102 @@ +" Vim syntax file +" Language: gretl (http://gretl.sf.net) +" Maintainer: Vaidotas Zemlys <zemlys@gmail.com> +" Last Change: 2006 Jan 6 +" Filenames: *.inp *.gretl +" URL: http://uosis.mif.vu.lt/~zemlys/vim-syntax/gretl.vim +" For version 5.x: Clear all syntax items +" For version 6.x: Quit when a syntax file was already loaded + +if version < 600 + syntax clear +elseif exists("b:current_syntax") + finish +endif + +if version >= 600 + setlocal iskeyword=@,48-57,_,. +else + set iskeyword=@,48-57,_,. +endif + +syn case match + +" Constant +" string enclosed in double quotes +syn region gString start=/"/ skip=/\\\\\|\\"/ end=/"/ +" number with no fractional part or exponent +syn match gNumber /\d\+/ +" floating point number with integer and fractional parts and optional exponent +syn match gFloat /\d\+\.\d*\([Ee][-+]\=\d\+\)\=/ +" floating point number with no integer part and optional exponent +syn match gFloat /\.\d\+\([Ee][-+]\=\d\+\)\=/ +" floating point number with no fractional part and optional exponent +syn match gFloat /\d\+[Ee][-+]\=\d\+/ + +" Gretl commands +syn keyword gCommands add addobs addto adf append ar arch arma break boxplot chow coeffsum coint coint2 corc corr corrgm criteria critical cusum data delete diff else end endif endloop eqnprint equation estimate fcast fcasterr fit freq function funcerr garch genr gnuplot graph hausman hccm help hilu hsk hurst if import include info kpss label labels lad lags ldiff leverage lmtest logistic logit logs loop mahal meantest mle modeltab mpols multiply nls nulldata ols omit omitfrom open outfile panel pca pergm plot poisson pooled print printf probit pvalue pwe quit remember rename reset restrict rhodiff rmplot run runs scatters sdiff set setobs setmiss shell sim smpl spearman square store summary system tabprint testuhat tobit transpos tsls var varlist vartest vecm vif wls + +"Gretl genr functions +syn keyword gGenrFunc log exp sin cos tan atan diff ldiff sdiff mean sd min max sort int ln coeff abs rho sqrt sum nobs firstobs lastobs normal uniform stderr cum missing ok misszero corr vcv var sst cov median zeromiss pvalue critical obsnum mpow dnorm cnorm gamma lngamma resample hpfilt bkfilt fracdiff varnum isvector islist nelem + +" Identifier +" identifier with leading letter and optional following keyword characters +syn match gIdentifier /\a\k*/ + +" Variable with leading $ +syn match gVariable /\$\k*/ +" Arrow +syn match gArrow /<-/ + +" Special +syn match gDelimiter /[,;:]/ + +" Error +syn region gRegion matchgroup=Delimiter start=/(/ matchgroup=Delimiter end=/)/ transparent contains=ALLBUT,rError,rBraceError,rCurlyError,gBCstart,gBCend +syn region gRegion matchgroup=Delimiter start=/{/ matchgroup=Delimiter end=/}/ transparent contains=ALLBUT,rError,rBraceError,rParenError +syn region gRegion matchgroup=Delimiter start=/\[/ matchgroup=Delimiter end=/]/ transparent contains=ALLBUT,rError,rCurlyError,rParenError +syn match gError /[)\]}]/ +syn match gBraceError /[)}]/ contained +syn match gCurlyError /[)\]]/ contained +syn match gParenError /[\]}]/ contained + +" Comment +syn match gComment /#.*/ +syn match gBCstart /(\*/ +syn match gBCend /\*)/ + +syn region gBlockComment matchgroup=gCommentStart start="(\*" end="\*)" + +" Define the default highlighting. +" For version 5.7 and earlier: only when not done already +" For version 5.8 and later: only when an item doesn't have highlighting yet +if version >= 508 || !exists("did_r_syn_inits") + if version < 508 + let did_r_syn_inits = 1 + command -nargs=+ HiLink hi link <args> + else + command -nargs=+ HiLink hi def link <args> + endif + HiLink gComment Comment + HiLink gCommentStart Comment + HiLink gBlockComment Comment + HiLink gString String + HiLink gNumber Number + HiLink gBoolean Boolean + HiLink gFloat Float + HiLink gCommands Repeat + HiLink gGenrFunc Type + HiLink gDelimiter Delimiter + HiLink gError Error + HiLink gBraceError Error + HiLink gCurlyError Error + HiLink gParenError Error + HiLink gIdentifier Normal + HiLink gVariable Identifier + HiLink gArrow Repeat + delcommand HiLink +endif + +let b:current_syntax="gretl" + +" vim: ts=8 sw=2 diff --git a/runtime/syntax/r.vim b/runtime/syntax/r.vim index c2ecca4ef..8432c208e 100644 --- a/runtime/syntax/r.vim +++ b/runtime/syntax/r.vim @@ -1,9 +1,13 @@ " Vim syntax file " Language: R (GNU S) -" Maintainer: Tom Payne <tom@tompayne.org> -" Last Change: 2003 May 11 -" Filenames: *.r -" URL: http://www.tompayne.org/vim/syntax/r.vim +" Maintainer: Vaidotas Zemlys <zemlys@gmail.com> +" Last Change: 2006 January 12 +" Filenames: *.R *.Rout *.r *.Rhistory *.Rt *.Rout.save *.Rout.fail +" URL: http://uosis.mif.vu.lt/~zemlys/vim-syntax/r.vim + +" First maintainer Tom Payne <tom@tompayne.org> +" Modified to make syntax less colourful and added the highlighting of +" R assignment arrow " For version 5.x: Clear all syntax items " For version 6.x: Quit when a syntax file was already loaded @@ -54,9 +58,10 @@ syn keyword rConstant LETTERS letters month.ab month.name pi syn keyword rConstant NULL syn keyword rBoolean FALSE TRUE syn keyword rNumber NA +syn match rArrow /<\{1,2}-/ " Type -syn keyword rType array category character complex double function integer list logical matrix numeric vector +syn keyword rType array category character complex double function integer list logical matrix numeric vector data.frame " Special syn match rDelimiter /[,;:]/ @@ -89,8 +94,9 @@ if version >= 508 || !exists("did_r_syn_inits") HiLink rStatement Statement HiLink rConditional Conditional HiLink rRepeat Repeat - HiLink rIdentifier Identifier - HiLink rType Type + HiLink rIdentifier Normal + HiLink rArrow Statement + HiLink rType Type HiLink rDelimiter Delimiter HiLink rError Error HiLink rBraceError Error @@ -102,3 +108,4 @@ endif let b:current_syntax="r" " vim: ts=8 sw=2 + diff --git a/runtime/syntax/vim.vim b/runtime/syntax/vim.vim index 52ca8ae1b..21715b7ba 100644 --- a/runtime/syntax/vim.vim +++ b/runtime/syntax/vim.vim @@ -1,8 +1,8 @@ " Vim syntax file " Language: Vim 7.0 script " Maintainer: Dr. Charles E. Campbell, Jr. <NdrOchipS@PcampbellAfamily.Mbiz> -" Last Change: December 09, 2005 -" Version: 7.0-21 +" Last Change: Jan 09, 2006 +" Version: 7.0-22 " Automatically generated keyword lists: {{{1 " Quit when a syntax file was already loaded {{{2 @@ -175,7 +175,7 @@ endif syn match vimComment excludenl +\s"[^\-:.%#=*].*$+lc=1 contains=@vimCommentGroup,vimCommentString syn match vimComment +\<endif\s\+".*$+lc=5 contains=@vimCommentGroup,vimCommentString syn match vimComment +\<else\s\+".*$+lc=4 contains=@vimCommentGroup,vimCommentString -syn region vimCommentString contained oneline start='\S\s\+"'ms=s+1 end='"' +syn region vimCommentString contained oneline start='\S\s\+"'ms=e end='"' " Environment Variables: {{{2 " ===================== @@ -422,7 +422,7 @@ syn match vimHiBang contained "!" skipwhite nextgroup=@vimHighlightCluster syn match vimHiGroup contained "\i\+" syn case ignore -syn keyword vimHiAttrib contained none bold inverse italic reverse standout underline +syn keyword vimHiAttrib contained none bold inverse italic reverse standout underline undercurl syn keyword vimFgBgAttrib contained none bg background fg foreground syn case match syn match vimHiAttribList contained "\i\+" contains=vimHiAttrib @@ -450,7 +450,7 @@ syn match vimHiCTerm contained "\ccterm="he=e-1 nextgroup=vimHiAttribList syn match vimHiCtermFgBg contained "\ccterm[fb]g="he=e-1 nextgroup=vimNumber,vimHiCtermColor,vimFgBgAttrib,vimHiCtermError syn match vimHiGui contained "\cgui="he=e-1 nextgroup=vimHiAttribList syn match vimHiGuiFont contained "\cfont="he=e-1 nextgroup=vimHiFontname -syn match vimHiGuiFgBg contained "\cgui[fb]g="he=e-1 nextgroup=vimHiGroup,vimHiGuiFontname,vimHiGuiRgb,vimFgBgAttrib +syn match vimHiGuiFgBg contained "\cgui\%([fb]g\|sp\)="he=e-1 nextgroup=vimHiGroup,vimHiGuiFontname,vimHiGuiRgb,vimFgBgAttrib syn match vimHiTermcap contained "\S\+" contains=vimNotation " Highlight: clear {{{2 diff --git a/src/Makefile b/src/Makefile index 8dd1be3e6..810dd7b78 100644 --- a/src/Makefile +++ b/src/Makefile @@ -532,9 +532,12 @@ LINT_OPTIONS = -beprxzF # PROFILING - Uncomment the next two lines to do profiling with gcc and gprof. # Might not work with GUI or Perl. +# For unknown reasons adding "-lc" fixes a linking problem with GCC. That's +# probably a bug in the "-pg" implementation. # Need to recompile everything after changing this: "make clean" "make". #PROFILE_CFLAGS = -pg -g #PROFILE_LIBS = -pg +#PROFILE_LIBS = -pg -lc # MEMORY LEAK DETECTION # Requires installing the ccmalloc library. diff --git a/src/buffer.c b/src/buffer.c index f252489c2..119011791 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -82,7 +82,7 @@ open_buffer(read_stdin, eap) && (curbuf->b_flags & BF_NEVERLOADED)) curbuf->b_p_ro = TRUE; - if (ml_open() == FAIL) + if (ml_open(curbuf) == FAIL) { /* * There MUST be a memfile, otherwise we can't do anything @@ -1505,6 +1505,8 @@ buflist_new(ffname, sfname, lnum, flags) * buffer. Otherwise: Need to allocate a new buffer structure. * * This is the ONLY place where a new buffer structure is allocated! + * (A spell file buffer is allocated in spell.c, but that's not a normal + * buffer.) */ buf = NULL; if ((flags & BLN_CURBUF) @@ -5191,7 +5193,7 @@ buf_contents_changed(buf) curwin->w_buffer = newbuf; #endif - if (ml_open() == OK + if (ml_open(curbuf) == OK && readfile(buf->b_ffname, buf->b_fname, (linenr_T)0, (linenr_T)0, (linenr_T)MAXLNUM, &ea, READ_NEW | READ_DUMMY) == OK) diff --git a/src/edit.c b/src/edit.c index 23dd57eb9..c037d1f96 100644 --- a/src/edit.c +++ b/src/edit.c @@ -987,6 +987,15 @@ doESCkey: case K_IGNORE: /* Something mapped to nothing */ break; +#ifdef FEAT_GUI_W32 + /* On Win32 ignore <M-F4>, we get it when closing the window was + * cancelled. */ + case K_F4: + if (mod_mask != MOD_MASK_ALT) + goto normalchar; + break; +#endif + #ifdef FEAT_GUI case K_VER_SCROLLBAR: ins_scroll(); diff --git a/src/eval.c b/src/eval.c index eef865a4b..482359331 100644 --- a/src/eval.c +++ b/src/eval.c @@ -654,7 +654,6 @@ static int get_var_tv __ARGS((char_u *name, int len, typval_T *rettv, int verbos static int handle_subscript __ARGS((char_u **arg, typval_T *rettv, int evaluate, int verbose)); static typval_T *alloc_tv __ARGS((void)); static typval_T *alloc_string_tv __ARGS((char_u *string)); -static void free_tv __ARGS((typval_T *varp)); static void init_tv __ARGS((typval_T *varp)); static long get_tv_number __ARGS((typval_T *varp)); static linenr_T get_tv_lnum __ARGS((typval_T *argvars)); @@ -1323,7 +1322,9 @@ get_spellword(list, pp) #endif /* - * Top level evaluation function, + * Top level evaluation function. + * Returns an allocated typval_T with the result. + * Returns NULL when there is an error. */ typval_T * eval_expr(arg, nextcmd) @@ -1333,13 +1334,10 @@ eval_expr(arg, nextcmd) typval_T *tv; tv = (typval_T *)alloc(sizeof(typval_T)); - if (!tv) - return NULL; - - if (eval0(arg, tv, nextcmd, TRUE) == FAIL) + if (tv != NULL && eval0(arg, tv, nextcmd, TRUE) == FAIL) { vim_free(tv); - return NULL; + tv = NULL; } return tv; @@ -13930,7 +13928,7 @@ f_spellbadword(argvars, rettv) /* Check the argument for spelling. */ while (*str != NUL) { - len = spell_check(curwin, str, &attr, &capcol); + len = spell_check(curwin, str, &attr, &capcol, FALSE); if (attr != HLF_COUNT) { word = str; @@ -13996,7 +13994,7 @@ f_spellsuggest(argvars, rettv) else maxcount = 25; - spell_suggest_list(&ga, str, maxcount, need_capital); + spell_suggest_list(&ga, str, maxcount, need_capital, FALSE); for (i = 0; i < ga.ga_len; ++i) { @@ -15904,7 +15902,7 @@ alloc_string_tv(s) /* * Free the memory for a variable type-value. */ - static void + void free_tv(varp) typval_T *varp; { @@ -16910,7 +16908,10 @@ ex_execute(eap) if (ret != FAIL && ga.ga_data != NULL) { if (eap->cmdidx == CMD_echomsg) + { MSG_ATTR(ga.ga_data, echo_attr); + out_flush(); + } else if (eap->cmdidx == CMD_echoerr) { /* We don't want to abort following commands, restore did_emsg. */ diff --git a/src/ex_cmds.c b/src/ex_cmds.c index 006733c4d..c4fd0c4bd 100644 --- a/src/ex_cmds.c +++ b/src/ex_cmds.c @@ -111,9 +111,9 @@ do_ascii(eap) IObuff[len++] = ' '; IObuff[len++] = '<'; if (utf_iscomposing(c) -#ifdef USE_GUI +# ifdef USE_GUI && !gui.in_use -#endif +# endif ) IObuff[len++] = ' '; /* draw composing char on top of a space */ len += (*mb_char2bytes)(c, IObuff + len); diff --git a/src/ex_cmds.h b/src/ex_cmds.h index 4c4ab6ff0..a15ed464d 100644 --- a/src/ex_cmds.h +++ b/src/ex_cmds.h @@ -187,6 +187,8 @@ EX(CMD_cabbrev, "cabbrev", ex_abbreviate, EXTRA|TRLBAR|NOTRLCOM|USECTRLV|CMDWIN), EX(CMD_cabclear, "cabclear", ex_abclear, EXTRA|TRLBAR|CMDWIN), +EX(CMD_caddexpr, "caddexpr", ex_cexpr, + NEEDARG|WORD1|NOTRLCOM|TRLBAR|BANG), EX(CMD_caddfile, "caddfile", ex_cfile, TRLBAR|FILE1), EX(CMD_call, "call", ex_call, @@ -394,7 +396,7 @@ EX(CMD_for, "for", ex_while, EX(CMD_function, "function", ex_function, EXTRA|BANG|CMDWIN), EX(CMD_global, "global", ex_global, - RANGE|WHOLEFOLD|BANG|EXTRA|DFLALL|CMDWIN), + RANGE|WHOLEFOLD|BANG|EXTRA|DFLALL|SBOXOK|CMDWIN), EX(CMD_goto, "goto", ex_goto, RANGE|NOTADR|COUNT|TRLBAR|SBOXOK|CMDWIN), EX(CMD_grep, "grep", ex_make, @@ -768,7 +770,7 @@ EX(CMD_spellgood, "spellgood", ex_spell, EX(CMD_spellwrong, "spellwrong", ex_spell, BANG|RANGE|NOTADR|NEEDARG|EXTRA|TRLBAR), EX(CMD_spelldump, "spelldump", ex_spelldump, - TRLBAR), + BANG|TRLBAR), EX(CMD_spellrepall, "spellrepall", ex_spellrepall, TRLBAR), EX(CMD_sprevious, "sprevious", ex_previous, diff --git a/src/ex_docmd.c b/src/ex_docmd.c index 1eb2bb9a5..e15c6c5d7 100644 --- a/src/ex_docmd.c +++ b/src/ex_docmd.c @@ -8234,8 +8234,11 @@ ex_mkrc(eap) failed = TRUE; if (put_line(fd, "doautoall SessionLoadPost") == FAIL) failed = TRUE; - if (put_line(fd, "unlet SessionLoad") == FAIL) - failed = TRUE; + if (eap->cmdidx == CMD_mksession) + { + if (put_line(fd, "unlet SessionLoad") == FAIL) + failed = TRUE; + } } #endif if (put_line(fd, "\" vim: set ft=vim :") == FAIL) diff --git a/src/ex_getln.c b/src/ex_getln.c index 335f2a411..95109534a 100644 --- a/src/ex_getln.c +++ b/src/ex_getln.c @@ -645,8 +645,8 @@ getcmdline(firstc, count, indent) /* * Replace the command line with the result of an expression. - * Need to save the current command line, to be able to enter - * a new one... + * Need to save and restore the current command line, to be + * able to enter a new one... */ if (ccline.cmdpos == ccline.cmdlen) new_cmdpos = 99999; /* keep it at the end */ @@ -658,8 +658,17 @@ getcmdline(firstc, count, indent) restore_cmdline(&save_ccline); if (c == '=') { + /* Need to save and restore ccline. And go into the + * sandbox to avoid nasty things like going to another + * buffer when evaluating an expression. */ save_cmdline(&save_ccline); +#ifdef HAVE_SANDBOX + ++sandbox; +#endif p = get_expr_line(); +#ifdef HAVE_SANDBOX + --sandbox; +#endif restore_cmdline(&save_ccline); if (p != NULL && realloc_cmdbuff((int)STRLEN(p) + 1) == OK) @@ -1192,6 +1201,18 @@ getcmdline(firstc, count, indent) case K_IGNORE: goto cmdline_not_changed; /* Ignore mouse */ +#ifdef FEAT_GUI_W32 + /* On Win32 ignore <M-F4>, we get it when closing the window was + * cancelled. */ + case K_F4: + if (mod_mask == MOD_MASK_ALT) + { + redrawcmd(); /* somehow the cmdline is cleared */ + goto cmdline_not_changed; + } + break; +#endif + #ifdef FEAT_MOUSE case K_MIDDLEDRAG: case K_MIDDLERELEASE: diff --git a/src/feature.h b/src/feature.h index 69a784e44..efe791567 100644 --- a/src/feature.h +++ b/src/feature.h @@ -127,6 +127,15 @@ #endif /* + * Message history is fixed at 100 message, 20 for the tiny version. + */ +#ifdef FEAT_SMALL +# define MAX_MSG_HIST_LEN 100 +#else +# define MAX_MSG_HIST_LEN 20 +#endif + +/* * +jumplist Jumplist, CTRL-O and CTRL-I commands. */ #ifdef FEAT_SMALL diff --git a/src/fileio.c b/src/fileio.c index e05fb54db..c5c926299 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -6405,7 +6405,7 @@ buf_reload(buf) /* Open the memline. */ curbuf = savebuf; curwin->w_buffer = savebuf; - saved = ml_open(); + saved = ml_open(curbuf); curbuf = buf; curwin->w_buffer = buf; } diff --git a/src/globals.h b/src/globals.h index 8c2110569..c22466c06 100644 --- a/src/globals.h +++ b/src/globals.h @@ -528,7 +528,12 @@ EXTERN int starting INIT(= NO_SCREEN); /* first NO_SCREEN, then NO_BUFFERS and then * set to 0 when starting up finished */ EXTERN int exiting INIT(= FALSE); - /* TRUE when abandoning Vim */ + /* TRUE when planning to exit Vim. Might + * still keep on running if there is a changed + * buffer. */ +EXTERN int really_exiting INIT(= FALSE); + /* TRUE when we are sure to exit, e.g., after + * a deadly signal */ EXTERN int full_screen INIT(= FALSE); /* TRUE when doing full-screen output * otherwise only writing some messages */ diff --git a/src/gui_gtk_x11.c b/src/gui_gtk_x11.c index 1502e0c1c..84c5fe406 100644 --- a/src/gui_gtk_x11.c +++ b/src/gui_gtk_x11.c @@ -2328,6 +2328,7 @@ sm_client_die(GnomeClient *client, gpointer data) vim_strncpy(IObuff, _("Vim: Received \"die\" request from session manager\n"), IOSIZE - 1); + deadly_exit = TRUE; preserve_exit(); } diff --git a/src/hashtable.c b/src/hashtable.c index 904be4704..066f0f3cb 100644 --- a/src/hashtable.c +++ b/src/hashtable.c @@ -86,6 +86,31 @@ hash_clear(ht) } /* + * Free the array of a hash table and all the keys it contains. The keys must + * have been allocated. "off" is the offset from the start of the allocate + * memory to the location of the key (it's always positive). + */ + void +hash_clear_all(ht, off) + hashtab_T *ht; + int off; +{ + int todo; + hashitem_T *hi; + + todo = ht->ht_used; + for (hi = ht->ht_array; todo > 0; ++hi) + { + if (!HASHITEM_EMPTY(hi)) + { + vim_free(hi->hi_key - off); + --todo; + } + } + hash_clear(ht); +} + +/* * Find "key" in hashtable "ht". "key" must not be NULL. * Always returns a pointer to a hashitem. If the item was not found then * HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key diff --git a/src/memline.c b/src/memline.c index 3eba98d59..9abf15523 100644 --- a/src/memline.c +++ b/src/memline.c @@ -13,10 +13,11 @@ /* * memline.c: Contains the functions for appending, deleting and changing the - * text lines. The memfile functions are used to store the information in blocks - * of memory, backed up by a file. The structure of the information is a tree. - * The root of the tree is a pointer block. The leaves of the tree are data - * blocks. In between may be several layers of pointer blocks, forming branches. + * text lines. The memfile functions are used to store the information in + * blocks of memory, backed up by a file. The structure of the information is + * a tree. The root of the tree is a pointer block. The leaves of the tree + * are data blocks. In between may be several layers of pointer blocks, + * forming branches. * * Three types of blocks are used: * - Block nr 0 contains information for recovery @@ -169,7 +170,7 @@ struct block0 }; /* - * Note: b0_fname and b0_flags are put at the end of the file name. For very + * Note: b0_dirty and b0_flags are put at the end of the file name. For very * long file names in older versions of Vim they are invalid. * The 'fileencoding' comes before b0_flags, with a NUL in front. But only * when there is room, for very long file names it's omitted. @@ -245,12 +246,13 @@ static void ml_updatechunk __ARGS((buf_T *buf, long line, long len, int updtype) #endif /* - * open a new memline for 'curbuf' + * Open a new memline for "buf". * - * return FAIL for failure, OK otherwise + * Return FAIL for failure, OK otherwise. */ int -ml_open() +ml_open(buf) + buf_T *buf; { memfile_T *mfp; bhdr_T *hp = NULL; @@ -258,36 +260,36 @@ ml_open() PTR_BL *pp; DATA_BL *dp; -/* - * init fields in memline struct - */ - curbuf->b_ml.ml_stack_size = 0; /* no stack yet */ - curbuf->b_ml.ml_stack = NULL; /* no stack yet */ - curbuf->b_ml.ml_stack_top = 0; /* nothing in the stack */ - curbuf->b_ml.ml_locked = NULL; /* no cached block */ - curbuf->b_ml.ml_line_lnum = 0; /* no cached line */ + /* + * init fields in memline struct + */ + buf->b_ml.ml_stack_size = 0; /* no stack yet */ + buf->b_ml.ml_stack = NULL; /* no stack yet */ + buf->b_ml.ml_stack_top = 0; /* nothing in the stack */ + buf->b_ml.ml_locked = NULL; /* no cached block */ + buf->b_ml.ml_line_lnum = 0; /* no cached line */ #ifdef FEAT_BYTEOFF - curbuf->b_ml.ml_chunksize = NULL; + buf->b_ml.ml_chunksize = NULL; #endif -/* - * When 'updatecount' is non-zero, flag that a swap file may be opened later. - */ - if (p_uc && curbuf->b_p_swf) - curbuf->b_may_swap = TRUE; + /* + * When 'updatecount' is non-zero swap file may be opened later. + */ + if (p_uc && buf->b_p_swf) + buf->b_may_swap = TRUE; else - curbuf->b_may_swap = FALSE; + buf->b_may_swap = FALSE; -/* - * Open the memfile. No swap file is created yet. - */ + /* + * Open the memfile. No swap file is created yet. + */ mfp = mf_open(NULL, 0); if (mfp == NULL) goto error; - curbuf->b_ml.ml_mfp = mfp; - curbuf->b_ml.ml_flags = ML_EMPTY; - curbuf->b_ml.ml_line_count = 1; + buf->b_ml.ml_mfp = mfp; + buf->b_ml.ml_flags = ML_EMPTY; + buf->b_ml.ml_line_count = 1; #ifdef FEAT_LINEBREAK curwin->w_nrwidth_line_count = 0; #endif @@ -296,7 +298,7 @@ ml_open() /* for 16 bit MS-DOS create a swapfile now, because we run out of * memory very quickly */ if (p_uc != 0) - ml_open_file(curbuf); + ml_open_file(buf); #endif /* @@ -313,36 +315,40 @@ ml_open() b0p->b0_id[0] = BLOCK0_ID0; b0p->b0_id[1] = BLOCK0_ID1; - b0p->b0_dirty = curbuf->b_changed ? B0_DIRTY : 0; - b0p->b0_flags = get_fileformat(curbuf) + 1; b0p->b0_magic_long = (long)B0_MAGIC_LONG; b0p->b0_magic_int = (int)B0_MAGIC_INT; b0p->b0_magic_short = (short)B0_MAGIC_SHORT; b0p->b0_magic_char = B0_MAGIC_CHAR; - STRNCPY(b0p->b0_version, "VIM ", 4); STRNCPY(b0p->b0_version + 4, Version, 6); - set_b0_fname(b0p, curbuf); long_to_char((long)mfp->mf_page_size, b0p->b0_page_size); - (void)get_user_name(b0p->b0_uname, B0_UNAME_SIZE); - b0p->b0_uname[B0_UNAME_SIZE - 1] = NUL; - mch_get_host_name(b0p->b0_hname, B0_HNAME_SIZE); - b0p->b0_hname[B0_HNAME_SIZE - 1] = NUL; - long_to_char(mch_get_pid(), b0p->b0_pid); + + if (!B_SPELL(buf)) + { + b0p->b0_dirty = buf->b_changed ? B0_DIRTY : 0; + b0p->b0_flags = get_fileformat(buf) + 1; + set_b0_fname(b0p, buf); + (void)get_user_name(b0p->b0_uname, B0_UNAME_SIZE); + b0p->b0_uname[B0_UNAME_SIZE - 1] = NUL; + mch_get_host_name(b0p->b0_hname, B0_HNAME_SIZE); + b0p->b0_hname[B0_HNAME_SIZE - 1] = NUL; + long_to_char(mch_get_pid(), b0p->b0_pid); + } /* * Always sync block number 0 to disk, so we can check the file name in - * the swap file in findswapname(). Don't do this for help files though. + * the swap file in findswapname(). Don't do this for help files though + * and spell buffer though. * Only works when there's a swapfile, otherwise it's done when the file * is created. */ mf_put(mfp, hp, TRUE, FALSE); - if (!curbuf->b_help) + if (!buf->b_help && !B_SPELL(buf)) (void)mf_sync(mfp, 0); -/* - * fill in root pointer block and write page 1 - */ + /* + * Fill in root pointer block and write page 1. + */ if ((hp = ml_new_ptr(mfp)) == NULL) goto error; if (hp->bh_bnum != 1) @@ -358,9 +364,9 @@ ml_open() pp->pb_pointer[0].pe_line_count = 1; /* line count after insertion */ mf_put(mfp, hp, TRUE, FALSE); -/* - * allocate first data block and create an empty line 1. - */ + /* + * Allocate first data block and create an empty line 1. + */ if ((hp = ml_new_data(mfp, FALSE, 1)) == NULL) goto error; if (hp->bh_bnum != 2) @@ -384,7 +390,7 @@ error: mf_put(mfp, hp, FALSE, FALSE); mf_close(mfp, TRUE); /* will also free(mfp->mf_fname) */ } - curbuf->b_ml.ml_mfp = NULL; + buf->b_ml.ml_mfp = NULL; return FAIL; } @@ -518,6 +524,18 @@ ml_open_file(buf) if (mfp == NULL || mfp->mf_fd >= 0 || !buf->b_p_swf) return; /* nothing to do */ +#ifdef FEAT_SYN_HL + /* For a spell buffer use a temp file name. */ + if (buf->b_spell) + { + fname = vim_tempname('s'); + if (fname != NULL) + (void)mf_open_file(mfp, fname); /* consumes fname! */ + buf->b_may_swap = FALSE; + return; + } +#endif + /* * Try all directories in 'directory' option. */ @@ -886,7 +904,7 @@ ml_recover() goto theend; /* out of memory */ /* When called from main() still need to initialize storage structure */ - if (called_from_main && ml_open() == FAIL) + if (called_from_main && ml_open(curbuf) == FAIL) getout(1); /* @@ -2100,6 +2118,28 @@ ml_append(lnum, line, len, newfile) return ml_append_int(curbuf, lnum, line, len, newfile, FALSE); } +#if defined(FEAT_SYN_HL) || defined(PROTO) +/* + * Like ml_append() but for an arbitrary buffer. The buffer must already have + * a memline. + */ + int +ml_append_buf(buf, lnum, line, len, newfile) + buf_T *buf; + linenr_T lnum; /* append after this line (can be 0) */ + char_u *line; /* text of the new line */ + colnr_T len; /* length of new line, including NUL, or 0 */ + int newfile; /* flag, see above */ +{ + if (buf->b_ml.ml_mfp == NULL) + return FAIL; + + if (buf->b_ml.ml_line_lnum != 0) + ml_flush_line(buf); + return ml_append_int(buf, lnum, line, len, newfile, FALSE); +} +#endif + static int ml_append_int(buf, lnum, line, len, newfile, mark) buf_T *buf; @@ -2599,7 +2639,7 @@ ml_append_int(buf, lnum, line, len, newfile, mark) } /* - * replace line lnum, with buffering, in current buffer + * Replace line lnum, with buffering, in current buffer. * * If copy is TRUE, make a copy of the line, otherwise the line has been * copied to allocated memory already. @@ -2643,7 +2683,7 @@ ml_replace(lnum, line, copy) } /* - * delete line 'lnum' + * Delete line 'lnum' in the current buffer. * * Check: The caller of this function should probably also call * deleted_lines() after this. @@ -4114,7 +4154,9 @@ findswapname(buf, dirp, old_fname) #endif { MSG_PUTS("\n"); - need_wait_return = TRUE; /* call wait_return later */ + if (msg_silent == 0) + /* call wait_return() later */ + need_wait_return = TRUE; } #ifdef CREATE_DUMMY_FILE diff --git a/src/message.c b/src/message.c index 8ea49ae20..465b25e7e 100644 --- a/src/message.c +++ b/src/message.c @@ -752,7 +752,7 @@ add_msg_hist(s, len, attr) return; /* Don't let the message history get too big */ - while (msg_hist_len > 20) + while (msg_hist_len > MAX_MSG_HIST_LEN) (void)delete_first_msg(); /* allocate an entry and add the message at the end of the history */ diff --git a/src/misc1.c b/src/misc1.c index a5a1805c2..08cd20c91 100644 --- a/src/misc1.c +++ b/src/misc1.c @@ -7952,6 +7952,10 @@ preserve_exit() prepare_to_exit(); + /* Setting this will prevent free() calls. That avoids calling free() + * recursively when free() was invoked with a bad pointer. */ + really_exiting = TRUE; + out_str(IObuff); screen_start(); /* don't know where cursor is now */ out_flush(); diff --git a/src/misc2.c b/src/misc2.c index c3f2b3798..f5b13e762 100644 --- a/src/misc2.c +++ b/src/misc2.c @@ -1426,13 +1426,15 @@ copy_option_part(option, buf, maxlen, sep_chars) } /* - * replacement for free() that ignores NULL pointers + * Replacement for free() that ignores NULL pointers. + * Also skip free() when exiting for sure, this helps when we caught a deadly + * signal that was caused by a crash in free(). */ void vim_free(x) void *x; { - if (x != NULL) + if (x != NULL && !really_exiting) { #ifdef MEM_PROFILE mem_pre_free(&x); diff --git a/src/po/it.po b/src/po/it.po index ed25f47fc..dba546c95 100644 --- a/src/po/it.po +++ b/src/po/it.po @@ -12,9 +12,8 @@ msgid "" msgstr "" "Project-Id-Version: vim 7.0\n" -"Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2005-08-25 09:30+0200\n" -"PO-Revision-Date: 2005-08-29 21:30+0200\n" +"POT-Creation-Date: 2006-01-03 16:07+0100\n" +"PO-Revision-Date: 2006-01-06 13:50+0100\n" "Last-Translator: Vlad Sandrini <marco@sandrini.biz>\n" "Language-Team: Italian" " Antonio Colombo <azc10@yahoo.com>" @@ -22,6 +21,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=ISO_8859-1\n" "Content-Transfer-Encoding: 8-bit\n" +"Report-Msgid-Bugs-To: \n" msgid "E82: Cannot allocate any buffer, exiting..." msgstr "E82: Non riesco ad allocare alcun buffer, esco..." @@ -254,8 +254,8 @@ msgstr " Completamento linea comandi (^V^N^P)" msgid " User defined completion (^U^N^P)" msgstr " Completamento definito dall'utente (^U^N^P)" -msgid " Occult completion (^O^N^P)" -msgstr " Completamento nascosto (^O^N^P)" +msgid " Omni completion (^O^N^P)" +msgstr " Completamento globale (^O^N^P)" msgid " Spelling suggestion (^S^N^P)" msgstr " Suggerimento ortografico (^S^N^P)" @@ -267,10 +267,10 @@ msgid "Hit end of paragraph" msgstr "Giunto alla fine del paragrafo" msgid "'dictionary' option is empty" -msgstr "l'opzione 'dictionary' è vuota" +msgstr "l'opzione 'dictionary' non è impostata" msgid "'thesaurus' option is empty" -msgstr "l'opzione 'thesaurus' è vuota" +msgstr "l'opzione 'thesaurus' non è impostata" #, c-format msgid "Scanning dictionary: %s" @@ -1131,21 +1131,8 @@ msgstr "E177: Non si può specificare due volte il contatore" msgid "E178: Invalid default value for count" msgstr "E178: Valore predefinito del contatore non valido" -msgid "E179: argument required for complete" -msgstr "E179: argomento necessario per complete" - -#, c-format -msgid "E180: Invalid complete value: %s" -msgstr "E180: Valore %s non valido per 'complete'" - -msgid "E468: Completion argument only allowed for custom completion" -msgstr "" -"E468: Argomento di completamento permesso solo per completamento " -"personalizzato" - -msgid "E467: Custom completion requires a function argument" -msgstr "" -"E467: Il completamento personalizzato richiede un argomento di funzione" +msgid "E179: argument required for -complete" +msgstr "E179: argomento necessario per -complete" #, c-format msgid "E181: Invalid attribute: %s" @@ -1163,6 +1150,19 @@ msgid "E184: No such user-defined command: %s" msgstr "E184: Comando definito dall'utente %s inesistente" #, c-format +msgid "E180: Invalid complete value: %s" +msgstr "E180: Valore %s non valido per 'complete'" + +msgid "E468: Completion argument only allowed for custom completion" +msgstr "" +"E468: Argomento di completamento permesso solo per completamento " +"personalizzato" + +msgid "E467: Custom completion requires a function argument" +msgstr "" +"E467: Il completamento personalizzato richiede un argomento di funzione" + +#, c-format msgid "E185: Cannot find color scheme %s" msgstr "E185: Non riesco a trovare schema colore %s" @@ -1420,6 +1420,12 @@ msgstr "non è un file" msgid "[New File]" msgstr "[File nuovo]" +msgid "[New DIRECTORY]" +msgstr "[Nuova DIRECTORY]" + +msgid "[File too big]" +msgstr "[File troppo grande]" + msgid "[Permission Denied]" msgstr "[Tipo di accesso non consentito]" @@ -1469,8 +1475,9 @@ msgstr "[convertito]" msgid "[crypted]" msgstr "[cifrato]" -msgid "[CONVERSION ERROR]" -msgstr "[ERRORE DI CONVERSIONE]" +#, c-format +msgid "[CONVERSION ERROR in line %ld]" +msgstr "[ERRORE DI CONVERSIONE alla linea %ld]" #, c-format msgid "[ILLEGAL BYTE in line %ld]" @@ -3024,25 +3031,6 @@ msgstr "" msgid "--socketid <xid>\tOpen Vim inside another GTK widget" msgstr "--socketid <xid>\tApri Vim dentro un altro 'widget' GTK" -msgid "" -"\n" -"Arguments recognised by kvim (KDE version):\n" -msgstr "" -"\n" -"Argomenti accettati da kvim (versione KDE):\n" - -msgid "-black\t\tUse reverse video" -msgstr "-black\t\tUsa colori invertiti" - -msgid "-tip\t\t\tDisplay the tip dialog on startup" -msgstr "-tip\t\t\tMostra la finestra consigli all'avvio" - -msgid "-notip\t\tDisable the tip dialog" -msgstr "-notip\t\tDisabilita la finestra consigli" - -msgid "--display <display>\tRun vim on <display>" -msgstr "--display <schermo>\tEsegui vim su <schermo>" - msgid "-P <parent title>\tOpen Vim inside parent application" msgstr "-P <titolo padre>\tApri Vim in un'applicazione padre" @@ -3468,6 +3456,10 @@ msgstr "Dimensione 'stack' aumentata" msgid "E317: pointer block id wrong 2" msgstr "E317: ID blocco puntatori errato 2" +#, c-format +msgid "E773: Symlink loop for \"%s\"" +msgstr "E773: Collegamento ricorsivo per \"%s\"" + msgid "E325: ATTENTION" msgstr "E325: ATTENZIONE" @@ -3641,6 +3633,10 @@ msgstr "Interruzione: " msgid "Press ENTER or type command to continue" msgstr "Premi INVIO o un comando per proseguire" +#, c-format +msgid "%s line %ld" +msgstr "%s linea %ld" + msgid "-- More --" msgstr "-- Ancora --" @@ -3830,6 +3826,12 @@ msgstr "E658: Connessione NetBeans persa per il buffer %ld" msgid "E505: " msgstr "E505: " +msgid "E774: 'operatorfunc' is empty" +msgstr "E774: opzione 'operatorfunc' non impostata" + +msgid "E775: Eval feature not available" +msgstr "E775: Tipo di valorizzazione [eval] non disponibile" + msgid "Warning: terminal cannot highlight" msgstr "Attenzione: il terminale non è in grado di evidenziare" @@ -3843,7 +3845,7 @@ msgid "E352: Cannot erase folds with current 'foldmethod'" msgstr "E352: Non posso togliere piegature con il 'foldmethod' in uso" msgid "E664: changelist is empty" -msgstr "E664: lista modifiche vuota" +msgstr "E664: lista modifiche non impostata" msgid "E662: At start of changelist" msgstr "E662: All'inizio della lista modifiche" @@ -4225,14 +4227,6 @@ msgstr "Vim: Intercettato segnale fatale\n" msgid "Opening the X display took %ld msec" msgstr "Attivazione visualizzazione X ha richiesto %ld msec" -#. KDE sometimes produces X error that we want to ignore -msgid "" -"\n" -"Vim: Got X error but we continue...\n" -msgstr "" -"\n" -"Vim: Preso errore X ma andiamo avanti...\n" - msgid "" "\n" "Vim: Got X error\n" @@ -4562,12 +4556,6 @@ msgstr " SELEZIONA BLOCCO" msgid "recording" msgstr "registrazione" -msgid "search hit TOP, continuing at BOTTOM" -msgstr "raggiunta la CIMA nella ricerca, continuo dal FONDO" - -msgid "search hit BOTTOM, continuing at TOP" -msgstr "raggiunto il FONDO nella ricerca, continuo dalla CIMA" - #, c-format msgid "E383: Invalid search string: %s" msgstr "E383: Stringa di ricerca non valida: %s" @@ -4645,7 +4633,7 @@ msgid "Compressing word tree..." msgstr "Comprimo albero di parole..." msgid "E756: Spell checking is not enabled" -msgstr "E756: Il controllo ortografico non è abilitato" +msgstr "E756: Controllo ortografico non abilitato" #, c-format msgid "Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\"" @@ -4659,7 +4647,7 @@ msgid "E757: This does not look like a spell file" msgstr "E757: Questo non sembra un file ortografico" msgid "E771: Old spell file, needs to be updated" -msgstr "E771: File ortografico obsoleto, necessario aggiornamento" +msgstr "E771: File ortografico obsoleto, è necessario aggiornarlo" msgid "E772: Spell file is for newer version of Vim" msgstr "E772: Il file ortografico è per versioni di Vim più recenti" @@ -4712,14 +4700,26 @@ msgid "Wrong COMPOUNDSYLMAX value in %s line %d: %s" msgstr "Valore errato per COMPOUNDSYLMAX in %s linea %d: %s" #, c-format -msgid "Expected Y or N in %s line %d: %s" -msgstr "Y o N deve essere presente in %s linea %d: %s" +msgid "Different combining flag in continued affix block in %s line %d: %s" +msgstr "" +"Flag combinazione diverso in blocco affissi continuo in %s linea %d: %s" #, c-format msgid "Duplicate affix in %s line %d: %s" msgstr "Affisso duplicato in %s linea %d: %s" #, c-format +msgid "" +"Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND in %s line %d: %" +"s" +msgstr "" +"Affisso usato anche per BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND in %s linea %d: %s" + +#, c-format +msgid "Expected Y or N in %s line %d: %s" +msgstr "Y o N deve essere presente in %s linea %d: %s" + +#, c-format msgid "Broken condition in %s line %d: %s" msgstr "Condizione non rispettata in %s linea %d: %s" @@ -4747,13 +4747,13 @@ msgid "COMPOUNDSYLMAX used without SYLLABLE" msgstr "COMPOUNDSYLMAX usato senza SYLLABLE" msgid "Too many postponed prefixes" -msgstr "Troppi prefissi posposti" +msgstr "Troppi suffissi" msgid "Too many compound flags" msgstr "Troppi flag composti" msgid "Too many posponed prefixes and/or compound flags" -msgstr "Troppi prefissi posposti e/o flag composti" +msgstr "Troppi suffissi e/o flag composti" #, c-format msgid "Missing SOFO%s line in %s" @@ -4867,11 +4867,8 @@ msgstr "Fatto!" msgid "Estimated runtime memory use: %d bytes" msgstr "Uso stimato di memoria durante esecuzione: %d bytes" -msgid "E764: 'spellfile' is not set" -msgstr "E764: opzione 'spellfile' vuota" - #, c-format -msgid "E765: 'spellfile' does not have %ld enties" +msgid "E765: 'spellfile' does not have %ld entries" msgstr "E765: 'spellfile' non ha %ld elementi" msgid "E763: Word characters differ between spell files" @@ -5406,9 +5403,6 @@ msgstr "con GUI Cocoa." msgid "with (classic) GUI." msgstr "con GUI (classica)." -msgid "with KDE GUI." -msgstr "con GUI KDE." - msgid " Features included (+) or not (-):\n" msgstr " Opzioni incluse (+) o escluse (-):\n" @@ -5862,7 +5856,7 @@ msgid "E49: Invalid scroll size" msgstr "E49: Quantità di 'scroll' non valida" msgid "E91: 'shell' option is empty" -msgstr "E91: opzione 'shell' vuota" +msgstr "E91: opzione 'shell' non impostata" msgid "E255: Couldn't read in sign data!" msgstr "E255: Errore -- non sono riuscito a leggere i dati del 'sign'!" @@ -5871,7 +5865,7 @@ msgid "E72: Close error on swap file" msgstr "E72: Errore durante chiusura swap file" msgid "E73: tag stack empty" -msgstr "E73: tag stack vuota" +msgstr "E73: tag stack non impostata" msgid "E74: Command too complex" msgstr "E74: Comando troppo complesso" @@ -5933,3 +5927,13 @@ msgstr "E682: Espressione o delimitatore di ricerca non validi" msgid "E139: File is loaded in another buffer" msgstr "E139: File già caricato in un altro buffer" + +#, c-format +msgid "E764: Option '%s' is not set" +msgstr "E764: opzione '%s' non impostata" + +msgid "search hit TOP, continuing at BOTTOM" +msgstr "raggiunta la CIMA nella ricerca, continuo dal FONDO" + +msgid "search hit BOTTOM, continuing at TOP" +msgstr "raggiunto il FONDO nella ricerca, continuo dalla CIMA" diff --git a/src/proto/eval.pro b/src/proto/eval.pro index 7c0ca8402..0b76ff6af 100644 --- a/src/proto/eval.pro +++ b/src/proto/eval.pro @@ -61,6 +61,7 @@ void set_reg_var __ARGS((int c)); char_u *v_exception __ARGS((char_u *oldval)); char_u *v_throwpoint __ARGS((char_u *oldval)); char_u *set_cmdarg __ARGS((exarg_T *eap, char_u *oldarg)); +void free_tv __ARGS((typval_T *varp)); void clear_tv __ARGS((typval_T *varp)); long get_tv_number_chk __ARGS((typval_T *varp, int *denote)); char_u *get_tv_string_chk __ARGS((typval_T *varp)); diff --git a/src/proto/hashtable.pro b/src/proto/hashtable.pro index aebb458d6..90723765c 100644 --- a/src/proto/hashtable.pro +++ b/src/proto/hashtable.pro @@ -1,6 +1,7 @@ /* hashtable.c */ void hash_init __ARGS((hashtab_T *ht)); void hash_clear __ARGS((hashtab_T *ht)); +void hash_clear_all __ARGS((hashtab_T *ht, int off)); hashitem_T *hash_find __ARGS((hashtab_T *ht, char_u *key)); hashitem_T *hash_lookup __ARGS((hashtab_T *ht, char_u *key, hash_T hash)); void hash_debug_results __ARGS((void)); diff --git a/src/proto/memline.pro b/src/proto/memline.pro index 90d328559..de75a7dcf 100644 --- a/src/proto/memline.pro +++ b/src/proto/memline.pro @@ -1,32 +1,33 @@ /* memline.c */ -extern int ml_open __ARGS((void)); -extern void ml_setname __ARGS((buf_T *buf)); -extern void ml_open_files __ARGS((void)); -extern void ml_open_file __ARGS((buf_T *buf)); -extern void check_need_swap __ARGS((int newfile)); -extern void ml_close __ARGS((buf_T *buf, int del_file)); -extern void ml_close_all __ARGS((int del_file)); -extern void ml_close_notmod __ARGS((void)); -extern void ml_timestamp __ARGS((buf_T *buf)); -extern void ml_recover __ARGS((void)); -extern int recover_names __ARGS((char_u **fname, int list, int nr)); -extern void ml_sync_all __ARGS((int check_file, int check_char)); -extern void ml_preserve __ARGS((buf_T *buf, int message)); -extern char_u *ml_get __ARGS((linenr_T lnum)); -extern char_u *ml_get_pos __ARGS((pos_T *pos)); -extern char_u *ml_get_curline __ARGS((void)); -extern char_u *ml_get_cursor __ARGS((void)); -extern char_u *ml_get_buf __ARGS((buf_T *buf, linenr_T lnum, int will_change)); -extern int ml_line_alloced __ARGS((void)); -extern int ml_append __ARGS((linenr_T lnum, char_u *line, colnr_T len, int newfile)); -extern int ml_replace __ARGS((linenr_T lnum, char_u *line, int copy)); -extern int ml_delete __ARGS((linenr_T lnum, int message)); -extern void ml_setmarked __ARGS((linenr_T lnum)); -extern linenr_T ml_firstmarked __ARGS((void)); -extern void ml_clearmarked __ARGS((void)); -extern char_u *makeswapname __ARGS((char_u *fname, char_u *ffname, buf_T *buf, char_u *dir_name)); -extern char_u *get_file_in_dir __ARGS((char_u *fname, char_u *dname)); -extern void ml_setflags __ARGS((buf_T *buf)); -extern long ml_find_line_or_offset __ARGS((buf_T *buf, linenr_T lnum, long *offp)); -extern void goto_byte __ARGS((long cnt)); +int ml_open __ARGS((buf_T *buf)); +void ml_setname __ARGS((buf_T *buf)); +void ml_open_files __ARGS((void)); +void ml_open_file __ARGS((buf_T *buf)); +void check_need_swap __ARGS((int newfile)); +void ml_close __ARGS((buf_T *buf, int del_file)); +void ml_close_all __ARGS((int del_file)); +void ml_close_notmod __ARGS((void)); +void ml_timestamp __ARGS((buf_T *buf)); +void ml_recover __ARGS((void)); +int recover_names __ARGS((char_u **fname, int list, int nr)); +void ml_sync_all __ARGS((int check_file, int check_char)); +void ml_preserve __ARGS((buf_T *buf, int message)); +char_u *ml_get __ARGS((linenr_T lnum)); +char_u *ml_get_pos __ARGS((pos_T *pos)); +char_u *ml_get_curline __ARGS((void)); +char_u *ml_get_cursor __ARGS((void)); +char_u *ml_get_buf __ARGS((buf_T *buf, linenr_T lnum, int will_change)); +int ml_line_alloced __ARGS((void)); +int ml_append __ARGS((linenr_T lnum, char_u *line, colnr_T len, int newfile)); +int ml_append_buf __ARGS((buf_T *buf, linenr_T lnum, char_u *line, colnr_T len, int newfile)); +int ml_replace __ARGS((linenr_T lnum, char_u *line, int copy)); +int ml_delete __ARGS((linenr_T lnum, int message)); +void ml_setmarked __ARGS((linenr_T lnum)); +linenr_T ml_firstmarked __ARGS((void)); +void ml_clearmarked __ARGS((void)); +char_u *makeswapname __ARGS((char_u *fname, char_u *ffname, buf_T *buf, char_u *dir_name)); +char_u *get_file_in_dir __ARGS((char_u *fname, char_u *dname)); +void ml_setflags __ARGS((buf_T *buf)); +long ml_find_line_or_offset __ARGS((buf_T *buf, linenr_T lnum, long *offp)); +void goto_byte __ARGS((long cnt)); /* vim: set ft=c : */ diff --git a/src/proto/spell.pro b/src/proto/spell.pro index 87261a244..791ec240b 100644 --- a/src/proto/spell.pro +++ b/src/proto/spell.pro @@ -1,5 +1,5 @@ /* spell.c */ -int spell_check __ARGS((win_T *wp, char_u *ptr, hlf_T *attrp, int *capcol)); +int spell_check __ARGS((win_T *wp, char_u *ptr, hlf_T *attrp, int *capcol, int docount)); int spell_move_to __ARGS((win_T *wp, int dir, int allwords, int curline, hlf_T *attrp)); void spell_cat_line __ARGS((char_u *buf, char_u *line, int maxlen)); char_u *did_set_spelllang __ARGS((buf_T *buf)); @@ -14,7 +14,7 @@ void init_spell_chartab __ARGS((void)); int spell_check_sps __ARGS((void)); void spell_suggest __ARGS((int count)); void ex_spellrepall __ARGS((exarg_T *eap)); -void spell_suggest_list __ARGS((garray_T *gap, char_u *word, int maxcount, int need_cap)); +void spell_suggest_list __ARGS((garray_T *gap, char_u *word, int maxcount, int need_cap, int interactive)); char_u *eval_soundfold __ARGS((char_u *word)); void ex_spelldump __ARGS((exarg_T *eap)); char_u *spell_to_word_end __ARGS((char_u *start, buf_T *buf)); diff --git a/src/quickfix.c b/src/quickfix.c index d0a2e153e..f550f116a 100644 --- a/src/quickfix.c +++ b/src/quickfix.c @@ -86,6 +86,7 @@ struct eformat /* 'O' overread (partial) message */ char_u flags; /* additional flags given in prefix */ /* '-' do not include this line */ + /* '+' include whole line in message */ }; static int qf_init_ext __ARGS((char_u *efile, buf_T *buf, typval_T *tv, char_u *errorformat, int newlist, linenr_T lnumfirst, linenr_T lnumlast)); @@ -578,7 +579,7 @@ restofline: col = (int)atol((char *)regmatch.startp[i]); if ((i = (int)fmt_ptr->addr[4]) > 0) /* %t */ type = *regmatch.startp[i]; - if (fmt_ptr->flags == '+' && !multiscan) /* %+ */ + if (fmt_ptr->flags == '+' && !multiscan) /* %+ */ STRCPY(errmsg, IObuff); else if ((i = (int)fmt_ptr->addr[5]) > 0) /* %m */ { @@ -613,9 +614,9 @@ restofline: } } multiscan = FALSE; - if (!fmt_ptr || idx == 'D' || idx == 'X') + if (fmt_ptr == NULL || idx == 'D' || idx == 'X') { - if (fmt_ptr) + if (fmt_ptr != NULL) { if (idx == 'D') /* enter directory */ { @@ -634,10 +635,10 @@ restofline: lnum = 0; /* don't jump to this line */ valid = FALSE; STRCPY(errmsg, IObuff); /* copy whole line to error message */ - if (!fmt_ptr) + if (fmt_ptr == NULL) multiline = multiignore = FALSE; } - else if (fmt_ptr) + else if (fmt_ptr != NULL) { if (vim_strchr((char_u *)"AEWI", idx) != NULL) multiline = TRUE; /* start of a multi-line message */ @@ -2747,7 +2748,7 @@ load_dummy_buffer(fname) /* Need to set the filename for autocommands. */ (void)setfname(curbuf, fname, NULL, FALSE); - if (ml_open() == OK) + if (ml_open(curbuf) == OK) { /* Create swap file now to avoid the ATTENTION message. */ check_need_swap(TRUE); @@ -2978,7 +2979,7 @@ ex_cbuffer(eap) #if defined(FEAT_EVAL) || defined(PROTO) /* - * ":cexpr {expr}" command. + * ":cexpr {expr}" and ":caddexpr {expr}" command. */ void ex_cexpr(eap) @@ -2986,16 +2987,23 @@ ex_cexpr(eap) { typval_T *tv; + /* Evaluate the expression. When the result is a string or a list we can + * use it to fill the errorlist. */ tv = eval_expr(eap->arg, NULL); - if (!tv || (tv->v_type != VAR_STRING && tv->v_type != VAR_LIST) || - (tv->v_type == VAR_STRING && !tv->vval.v_string) || - (tv->v_type == VAR_LIST && !tv->vval.v_list)) - return; - - if (qf_init_ext(NULL, NULL, tv, p_efm, TRUE, (linenr_T)0, (linenr_T)0) > 0) - qf_jump(0, 0, eap->forceit); /* display first error */ - - clear_tv(tv); + if (tv != NULL) + { + if ((tv->v_type == VAR_STRING && tv->vval.v_string != NULL) + || (tv->v_type == VAR_LIST && tv->vval.v_list != NULL)) + { + if (qf_init_ext(NULL, NULL, tv, p_efm, eap->cmdidx == CMD_cexpr, + (linenr_T)0, (linenr_T)0) > 0 + && eap->cmdidx == CMD_cexpr) + qf_jump(0, 0, eap->forceit); /* display first error */ + } + else + EMSG(_("E999: String or List expected")); + free_tv(tv); + } } #endif diff --git a/src/screen.c b/src/screen.c index 09821bd1a..fb5593447 100644 --- a/src/screen.c +++ b/src/screen.c @@ -134,7 +134,7 @@ static void fold_line __ARGS((win_T *wp, long fold_count, foldinfo_T *foldinfo, static void fill_foldcolumn __ARGS((char_u *p, win_T *wp, int closed, linenr_T lnum)); static void copy_text_attr __ARGS((int off, char_u *buf, int len, int attr)); #endif -static int win_line __ARGS((win_T *, linenr_T, int, int)); +static int win_line __ARGS((win_T *, linenr_T, int, int, int nochange)); static int char_needs_redraw __ARGS((int off_from, int off_to, int cols)); #ifdef FEAT_RIGHTLEFT static void screen_line __ARGS((int row, int coloff, int endcol, int clear_width, int rlflag)); @@ -1681,7 +1681,7 @@ win_update(wp) /* * Display one line. */ - row = win_line(wp, lnum, srow, wp->w_height); + row = win_line(wp, lnum, srow, wp->w_height, mod_top == 0); #ifdef FEAT_FOLDING wp->w_lines[idx].wl_folded = FALSE; @@ -2446,12 +2446,14 @@ fill_foldcolumn(p, wp, closed, lnum) * * Return the number of last row the line occupies. */ +/* ARGSUSED */ static int -win_line(wp, lnum, startrow, endrow) +win_line(wp, lnum, startrow, endrow, nochange) win_T *wp; linenr_T lnum; int startrow; int endrow; + int nochange; /* not updating for changed text */ { int col; /* visual column on screen */ unsigned off; /* offset in ScreenLines/ScreenAttrs */ @@ -3744,7 +3746,8 @@ win_line(wp, lnum, startrow, endrow) else p = prev_ptr; cap_col -= (prev_ptr - line); - len = spell_check(wp, p, &spell_hlf, &cap_col); + len = spell_check(wp, p, &spell_hlf, &cap_col, + nochange); word_end = v + len; /* In Insert mode only highlight a word that diff --git a/src/spell.c b/src/spell.c index ffe5a207c..a1ab89c82 100644 --- a/src/spell.c +++ b/src/spell.c @@ -43,6 +43,9 @@ * * Thanks to Olaf Seibert for providing an example implementation of this tree * and the compression mechanism. + * LZ trie ideas: + * http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf + * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html * * Matching involves checking the caps type: Onecap ALLCAP KeepCap. * @@ -56,17 +59,28 @@ # define SPELL_PRINTTREE #endif +/* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk(). */ +#if 0 +# define DEBUG_TRIEWALK +#endif + /* * Use this to adjust the score after finding suggestions, based on the * suggested word sounding like the bad word. This is much faster than doing * it for every possible suggestion. - * Disadvantage: When "the" is typed as "hte" it sounds different and goes - * down in the list. + * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@" + * vs "ht") and goes down in the list. * Used when 'spellsuggest' is set to "best". */ #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4) /* + * Do the opposite: based on a maximum end score and a known sound score, + * compute the the maximum word score that can be used. + */ +#define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3) + +/* * Vim spell file format: <HEADER> * <SECTIONS> * <LWORDTREE> @@ -133,6 +147,9 @@ * <reptolen> 1 byte length of <repto> * <repto> N bytes "to" part of replacement * + * sectionID == SN_REPSAL: <repcount> <rep> ... + * just like SN_REP but for soundfolded words + * * sectionID == SN_SAL: <salflags> <salcount> <sal> ... * <salflags> 1 byte flags for soundsalike conversion: * SAL_F0LLOWUP @@ -151,6 +168,12 @@ * <sofotolen> 2 bytes length of <sofoto> * <sofoto> N bytes "to" part of soundfold * + * sectionID == SN_SUGFILE: <timestamp> + * <timestamp> 8 bytes time in seconds that must match with .sug file + * + * sectionID == SN_WORDS: <word> ... + * <word> N bytes NUL terminated common word + * * sectionID == SN_MAP: <mapstr> * <mapstr> N bytes String with sequences of similar characters, * separated by slashes. @@ -236,6 +259,32 @@ * All text characters are in 'encoding', but stored as single bytes. */ +/* + * Vim .sug file format: <SUGHEADER> + * <SUGWORDTREE> + * <SUGTABLE> + * + * <SUGHEADER>: <fileID> <versionnr> <timestamp> + * + * <fileID> 6 bytes "VIMsug" + * <versionnr> 1 byte VIMSUGVERSION + * <timestamp> 8 bytes timestamp that must match with .spl file + * + * + * <SUGWORDTREE>: <wordtree> (see above, no flags or region used) + * + * + * <SUGTABLE>: <sugwcount> <sugline> ... + * + * <sugwcount> 4 bytes number of <sugline> following + * + * <sugline>: <sugnr> ... NUL + * + * <sugnr>: X bytes word number that results in this soundfolded word, + * stored as an offset to the previous number in as + * few bytes as possible, see offset2bytes()) + */ + #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64) # include <io.h> /* for lseek(), must be before vim.h */ #endif @@ -248,6 +297,10 @@ # include <fcntl.h> #endif +#ifndef UNIX /* it's in os_unix.h for Unix */ +# include <time.h> /* for time_t */ +#endif + #define MAXWLEN 250 /* Assume max. word len is this many bytes. Some places assume a word length fits in a byte, thus it can't be above 255. */ @@ -302,8 +355,8 @@ typedef long idx_T; * follow; never used in prefix tree */ #define BY_SPECIAL BY_FLAGS2 /* highest special byte value */ -/* Info from "REP" and "SAL" entries in ".aff" file used in si_rep, sl_rep, - * and si_sal. Not for sl_sal! +/* Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep, + * si_repsal, sl_rep, and si_sal. Not for sl_sal! * One replacement: from "ft_from" to "ft_to". */ typedef struct fromto_S { @@ -374,6 +427,8 @@ struct slang_S char_u *sl_midword; /* MIDWORD string or NULL */ + hashtab_T sl_wordcount; /* hashtable with word count, wordcount_T */ + int sl_compmax; /* COMPOUNDMAX (default: MAXWLEN) */ int sl_compminlen; /* COMPOUNDMIN (default: 0) */ int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */ @@ -394,12 +449,23 @@ struct slang_S garray_T sl_sal; /* list of salitem_T entries from SAL lines */ salfirst_T sl_sal_first[256]; /* indexes where byte first appears, -1 if there is none */ - int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items: - * "sl_sal_first" maps chars, when has_mbyte - * "sl_sal" is a list of wide char lists. */ int sl_followup; /* SAL followup */ int sl_collapse; /* SAL collapse_result */ int sl_rem_accents; /* SAL remove_accents */ + int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items: + * "sl_sal_first" maps chars, when has_mbyte + * "sl_sal" is a list of wide char lists. */ + garray_T sl_repsal; /* list of fromto_T entries from REPSAL lines */ + short sl_repsal_first[256]; /* sl_rep_first for REPSAL lines */ + + /* Info from the .sug file. Loaded on demand. */ + time_t sl_sugtime; /* timestamp for .sug file */ + char_u *sl_sbyts; /* soundfolded word bytes */ + idx_T *sl_sidxs; /* soundfolded word indexes */ + buf_T *sl_sugbuf; /* buffer with word number table */ + int sl_sugloaded; /* TRUE when .sug file was loaded or failed to + load */ + int sl_has_map; /* TRUE if there is a MAP line */ #ifdef FEAT_MBYTE hashtab_T sl_map_hash; /* MAP for multi-byte chars */ @@ -407,6 +473,8 @@ struct slang_S #else char_u sl_map_array[256]; /* MAP for first 256 chars */ #endif + hashtab_T sl_sounddone; /* table with soundfolded words that have + handled, see add_sound_suggest() */ }; /* First language that is loaded, start of the linked list of loaded @@ -437,6 +505,10 @@ typedef struct langp_S #define VIMSPELLMAGICL 8 #define VIMSPELLVERSION 50 +#define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */ +#define VIMSUGMAGICL 6 +#define VIMSUGVERSION 1 + /* Section IDs. Only renumber them when VIMSPELLVERSION changes! */ #define SN_REGION 0 /* <regionname> section */ #define SN_CHARFLAGS 1 /* charflags section */ @@ -449,6 +521,9 @@ typedef struct langp_S #define SN_COMPOUND 8 /* compound words section */ #define SN_SYLLABLE 9 /* syllable section */ #define SN_NOBREAK 10 /* NOBREAK section */ +#define SN_SUGFILE 11 /* timestamp for .sug file */ +#define SN_REPSAL 12 /* REPSAL items section */ +#define SN_WORDS 13 /* common words */ #define SN_END 255 /* end of sections */ #define SNF_REQUIRED 1 /* <sectionflags>: required section */ @@ -463,6 +538,17 @@ typedef struct langp_S /* file used for "zG" and "zW" */ static char_u *int_wordlist = NULL; +typedef struct wordcount_S +{ + short_u wc_count; /* nr of times word was seen */ + char_u wc_word[1]; /* word, actually longer */ +} wordcount_T; + +static wordcount_T dumwc; +#define WC_KEY_OFF (dumwc.wc_word - (char_u *)&dumwc) +#define HI2WC(hi) ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF)) +#define MAXWORDCOUNT 0xffff + /* * Information used when looking for suggestions. */ @@ -471,6 +557,7 @@ typedef struct suginfo_S garray_T su_ga; /* suggestions, contains "suggest_T" */ int su_maxcount; /* max. number of suggestions displayed */ int su_maxscore; /* maximum score for adding to su_ga */ + int su_sfmaxscore; /* idem, for when doing soundfold words */ garray_T su_sga; /* like su_ga, sound-folded scoring */ char_u *su_badptr; /* start of bad word in line */ int su_badlen; /* length of detected bad word in line */ @@ -478,7 +565,6 @@ typedef struct suginfo_S char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */ - slang_T *su_slang_first; /* slang_T used for su_sal_badword */ hashtab_T su_banned; /* table with banned words */ slang_T *su_sallang; /* default language for sound folding */ } suginfo_T; @@ -487,6 +573,7 @@ typedef struct suginfo_S typedef struct suggest_S { char_u *st_word; /* suggested word, allocated string */ + int st_wordlen; /* STRLEN(st_word) */ int st_orglen; /* length of replaced text */ int st_score; /* lower is better */ int st_altscore; /* used when st_score compares equal */ @@ -497,21 +584,24 @@ typedef struct suggest_S #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i]) -/* Number of suggestions kept when cleaning up. When rescore_suggestions() is - * called the score may change, thus we need to keep more than what is - * displayed. */ -#define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 50 ? 50 : (su)->su_maxcount) +/* TRUE if a word appears in the list of banned words. */ +#define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word))) + +/* Number of suggestions kept when cleaning up. we need to keep more than + * what is displayed, because when rescore_suggestions() is called the score + * may change and wrong suggestions may be removed later. */ +#define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20) /* Threshold for sorting and cleaning up suggestions. Don't want to keep lots * of suggestions that are not going to be displayed. */ -#define SUG_MAX_COUNT(su) ((su)->su_maxcount + 50) +#define SUG_MAX_COUNT(su) (SUG_CLEAN_COUNT(su) + 50) /* score for various changes */ #define SCORE_SPLIT 149 /* split bad word */ #define SCORE_ICASE 52 /* slightly different case */ #define SCORE_REGION 200 /* word is for different region */ #define SCORE_RARE 180 /* rare word */ -#define SCORE_SWAP 90 /* swap two characters */ +#define SCORE_SWAP 75 /* swap two characters */ #define SCORE_SWAP3 110 /* swap two characters in three */ #define SCORE_REP 65 /* REP replacement */ #define SCORE_SUBST 93 /* substitute a character */ @@ -529,8 +619,27 @@ typedef struct suggest_S #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. * 350 allows for about three changes. */ +#define SCORE_COMMON1 30 /* subtracted for words seen before */ +#define SCORE_COMMON2 40 /* subtracted for words often seen */ +#define SCORE_COMMON3 50 /* subtracted for words very often seen */ +#define SCORE_THRES2 10 /* word count threshold for COMMON2 */ +#define SCORE_THRES3 100 /* word count threshold for COMMON3 */ + +/* When trying changed soundfold words it becomes slow when trying more than + * two changes. With less then two changes it's slightly faster but we miss a + * few good suggestions. In rare cases we need to try three of four changes. + */ +#define SCORE_SFMAX1 200 /* maximum score for first try */ +#define SCORE_SFMAX2 300 /* maximum score for second try */ +#define SCORE_SFMAX3 400 /* maximum score for third try */ + #define SCORE_BIG SCORE_INS * 3 /* big difference */ -#define SCORE_MAXMAX 999999 /* accept any score */ +#define SCORE_MAXMAX 999999 /* accept any score */ +#define SCORE_LIMITMAX 350 /* for spell_edit_score_limit() */ + +/* for spell_edit_score_limit() we need to know the minimum value of + * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */ +#define SCORE_EDIT_MIN SCORE_SIMILAR /* * Structure to store info for word matching. @@ -617,6 +726,7 @@ typedef enum STATE_ENDNUL, /* Past NUL bytes at start of the node. */ STATE_PLAIN, /* Use each byte of the node. */ STATE_DEL, /* Delete a byte from the bad word. */ + STATE_INS_PREP, /* Prepare for inserting bytes. */ STATE_INS, /* Insert a byte in the bad word. */ STATE_SWAP, /* Swap two bytes. */ STATE_UNSWAP, /* Undo swap two characters. */ @@ -657,6 +767,8 @@ typedef struct trystate_S char_u ts_complen; /* nr of compound words used */ char_u ts_compsplit; /* index for "compflags" where word was spit */ char_u ts_save_badflags; /* su_badflags saved here */ + char_u ts_delidx; /* index in fword for char that was deleted, + valid when "ts_flags" has TSF_DIDDEL */ } trystate_T; /* values for ts_isdiff */ @@ -667,11 +779,12 @@ typedef struct trystate_S /* values for ts_flags */ #define TSF_PREFIXOK 1 /* already checked that prefix is OK */ #define TSF_DIDSPLIT 2 /* tried split at this point */ +#define TSF_DIDDEL 4 /* did a delete, "ts_delidx" has index */ /* special values ts_prefixdepth */ #define PFD_NOPREFIX 0xff /* not using prefixes */ #define PFD_PREFIXTREE 0xfe /* walking through the prefix tree */ -#define PFD_NOTSPECIAL 0xfd /* first value that's not special */ +#define PFD_NOTSPECIAL 0xfd /* highest value that's not special */ /* mode values for find_word */ #define FIND_FOLDWORD 0 /* find word case-folded */ @@ -683,6 +796,7 @@ typedef struct trystate_S static slang_T *slang_alloc __ARGS((char_u *lang)); static void slang_free __ARGS((slang_T *lp)); static void slang_clear __ARGS((slang_T *lp)); +static void slang_clear_sug __ARGS((slang_T *lp)); static void find_word __ARGS((matchinf_T *mip, int mode)); static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags)); static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req)); @@ -700,8 +814,11 @@ static char_u *read_string __ARGS((FILE *fd, int cnt)); static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len)); static int read_charflags_section __ARGS((FILE *fd)); static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp)); -static int read_rep_section __ARGS((FILE *fd, slang_T *slang)); +static int read_rep_section __ARGS((FILE *fd, garray_T *gap, short *first)); static int read_sal_section __ARGS((FILE *fd, slang_T *slang)); +static int read_words_section __ARGS((FILE *fd, slang_T *lp, int len)); +static void count_common_word __ARGS((slang_T *lp, char_u *word, int len, int count)); +static int score_wordcount_adj __ARGS((slang_T *slang, int score, char_u *word, int split)); static int read_sofo_section __ARGS((FILE *fd, slang_T *slang)); static int read_compound __ARGS((FILE *fd, slang_T *slang, int len)); static int byte_in_str __ARGS((char_u *str, int byte)); @@ -712,7 +829,8 @@ static void set_sal_first __ARGS((slang_T *lp)); #ifdef FEAT_MBYTE static int *mb_str2wide __ARGS((char_u *s)); #endif -static idx_T read_tree __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr)); +static int spell_read_tree __ARGS((FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt)); +static idx_T read_tree_node __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr)); static void clear_midword __ARGS((buf_T *buf)); static void use_midword __ARGS((slang_T *lp, buf_T *buf)); static int find_region __ARGS((char_u *rp, char_u *region)); @@ -723,18 +841,21 @@ static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp)); static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp)); static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen)); static int check_need_cap __ARGS((linenr_T lnum, colnr_T col)); -static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount, int banbadword, int need_cap)); +static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive)); #ifdef FEAT_EVAL static void spell_suggest_expr __ARGS((suginfo_T *su, char_u *expr)); #endif static void spell_suggest_file __ARGS((suginfo_T *su, char_u *fname)); -static void spell_suggest_intern __ARGS((suginfo_T *su)); +static void spell_suggest_intern __ARGS((suginfo_T *su, int interactive)); +static void suggest_load_files __ARGS((void)); +static void tree_count_words __ARGS((char_u *byts, idx_T *idxs)); static void spell_find_cleanup __ARGS((suginfo_T *su)); static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper)); static void allcap_copy __ARGS((char_u *word, char_u *wcopy)); static void suggest_try_special __ARGS((suginfo_T *su)); static void suggest_try_change __ARGS((suginfo_T *su)); -static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add)); +static void suggest_trie_walk __ARGS((suginfo_T *su, langp_T *lp, char_u *fword, int soundfold)); +static void go_deeper __ARGS((trystate_T *stack, int depth, int score_add)); #ifdef FEAT_MBYTE static int nofold_len __ARGS((char_u *fword, int flen, char_u *word)); #endif @@ -742,14 +863,17 @@ static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kwo static void score_comp_sal __ARGS((suginfo_T *su)); static void score_combine __ARGS((suginfo_T *su)); static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound)); +static void suggest_try_soundalike_prep __ARGS((void)); static void suggest_try_soundalike __ARGS((suginfo_T *su)); +static void suggest_try_soundalike_finish __ARGS((void)); +static void add_sound_suggest __ARGS((suginfo_T *su, char_u *goodword, int score, langp_T *lp)); +static int soundfold_find __ARGS((slang_T *slang, char_u *word)); static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags)); static void set_map_str __ARGS((slang_T *lp, char_u *map)); static int similar_chars __ARGS((slang_T *slang, int c1, int c2)); -static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang)); +static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf)); +static void check_suggestions __ARGS((suginfo_T *su, garray_T *gap)); static void add_banned __ARGS((suginfo_T *su, char_u *word)); -static int was_banned __ARGS((suginfo_T *su, char_u *word)); -static void free_banned __ARGS((suginfo_T *su)); static void rescore_suggestions __ARGS((suginfo_T *su)); static void rescore_one __ARGS((suginfo_T *su, suggest_T *stp)); static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep)); @@ -760,9 +884,15 @@ static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u * static void spell_soundfold_wsal __ARGS((slang_T *slang, char_u *inword, char_u *res)); #endif static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound)); -static int spell_edit_score __ARGS((char_u *badword, char_u *goodword)); -static void dump_word __ARGS((char_u *word, int round, int flags, linenr_T lnum)); +static int spell_edit_score __ARGS((slang_T *slang, char_u *badword, char_u *goodword)); +static int spell_edit_score_limit __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit)); +#ifdef FEAT_MBYTE +static int spell_edit_score_limit_w __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit)); +#endif +static void dump_word __ARGS((slang_T *slang, char_u *word, int round, int flags, linenr_T lnum)); static linenr_T dump_prefixes __ARGS((slang_T *slang, char_u *word, int round, int flags, linenr_T startlnum)); +static buf_T *open_spellbuf __ARGS((void)); +static void close_spellbuf __ARGS((buf_T *buf)); /* * Use our own character-case definitions, because the current locale may @@ -831,11 +961,12 @@ static char *msg_compressing = N_("Compressing word tree..."); * caller can skip over the word. */ int -spell_check(wp, ptr, attrp, capcol) +spell_check(wp, ptr, attrp, capcol, docount) win_T *wp; /* current window */ char_u *ptr; hlf_T *attrp; int *capcol; /* column to check for Capital */ + int docount; /* count good words */ { matchinf_T mi; /* Most things are put in "mi" so that it can be passed to functions quickly. */ @@ -843,6 +974,7 @@ spell_check(wp, ptr, attrp, capcol) int c; int wrongcaplen = 0; int lpi; + int count_word = docount; /* A word never starts at a space or a control character. Return quickly * then, skipping over the character. */ @@ -905,8 +1037,8 @@ spell_check(wp, ptr, attrp, capcol) /* * Loop over the languages specified in 'spelllang'. - * We check them all, because a matching word may be longer than an - * already found matching word. + * We check them all, because a word may be matched longer in another + * language. */ for (lpi = 0; lpi < wp->w_buffer->b_langp.ga_len; ++lpi) { @@ -934,6 +1066,14 @@ spell_check(wp, ptr, attrp, capcol) mi.mi_result = mi.mi_result2; mi.mi_end = mi.mi_end2; } + + /* Count the word in the first language where it's found to be OK. */ + if (count_word && mi.mi_result == SP_OK) + { + count_common_word(mi.mi_lp->lp_slang, ptr, + (int)(mi.mi_end - ptr), 1); + count_word = FALSE; + } } if (mi.mi_result != SP_OK) @@ -1897,7 +2037,7 @@ spell_move_to(wp, dir, allwords, curline, attrp) /* start of word */ attr = HLF_COUNT; - len = spell_check(wp, p, &attr, &capcol); + len = spell_check(wp, p, &attr, &capcol, FALSE); if (attr != HLF_COUNT) { @@ -2140,7 +2280,7 @@ int_wordlist_spl(fname) } /* - * Allocate a new slang_T. + * Allocate a new slang_T for language "lang". "lang" can be NULL. * Caller must fill "sl_next". */ static slang_T * @@ -2152,11 +2292,15 @@ slang_alloc(lang) lp = (slang_T *)alloc_clear(sizeof(slang_T)); if (lp != NULL) { - lp->sl_name = vim_strsave(lang); + if (lang != NULL) + lp->sl_name = vim_strsave(lang); ga_init2(&lp->sl_rep, sizeof(fromto_T), 10); + ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10); lp->sl_compmax = MAXWLEN; lp->sl_compsylmax = MAXWLEN; + hash_init(&lp->sl_wordcount); } + return lp; } @@ -2184,6 +2328,7 @@ slang_clear(lp) fromto_T *ftp; salitem_T *smp; int i; + int round; vim_free(lp->sl_fbyts); lp->sl_fbyts = NULL; @@ -2199,14 +2344,17 @@ slang_clear(lp) vim_free(lp->sl_pidxs); lp->sl_pidxs = NULL; - gap = &lp->sl_rep; - while (gap->ga_len > 0) + for (round = 1; round <= 2; ++round) { - ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len]; - vim_free(ftp->ft_from); - vim_free(ftp->ft_to); + gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal; + while (gap->ga_len > 0) + { + ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len]; + vim_free(ftp->ft_from); + vim_free(ftp->ft_to); + } + ga_clear(gap); } - ga_clear(gap); gap = &lp->sl_sal; if (lp->sl_sofo) @@ -2253,21 +2401,16 @@ slang_clear(lp) lp->sl_syllable = NULL; ga_clear(&lp->sl_syl_items); -#ifdef FEAT_MBYTE - { - int todo = lp->sl_map_hash.ht_used; - hashitem_T *hi; + hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF); + hash_init(&lp->sl_wordcount); - for (hi = lp->sl_map_hash.ht_array; todo > 0; ++hi) - if (!HASHITEM_EMPTY(hi)) - { - --todo; - vim_free(hi->hi_key); - } - } - hash_clear(&lp->sl_map_hash); +#ifdef FEAT_MBYTE + hash_clear_all(&lp->sl_map_hash, 0); #endif + /* Clear info from .sug file. */ + slang_clear_sug(lp); + lp->sl_compmax = MAXWLEN; lp->sl_compminlen = 0; lp->sl_compsylmax = MAXWLEN; @@ -2275,6 +2418,23 @@ slang_clear(lp) } /* + * Clear the info from the .sug file in "lp". + */ + static void +slang_clear_sug(lp) + slang_T *lp; +{ + vim_free(lp->sl_sbyts); + lp->sl_sbyts = NULL; + vim_free(lp->sl_sidxs); + lp->sl_sidxs = NULL; + close_spellbuf(lp->sl_sugbuf); + lp->sl_sugbuf = NULL; + lp->sl_sugloaded = FALSE; + lp->sl_sugtime = 0; +} + +/* * Load one spell file and store the info into a slang_T. * Invoked through do_in_runtimepath(). */ @@ -2303,11 +2463,13 @@ spell_load_cb(fname, cookie) /* * Load one spell file and store the info into a slang_T. * - * This is invoked in two ways: + * This is invoked in three ways: * - From spell_load_cb() to load a spell file for the first time. "lang" is * the language name, "old_lp" is NULL. Will allocate an slang_T. * - To reload a spell file that was changed. "lang" is NULL and "old_lp" * points to the existing slang_T. + * - Just after writing a .spl file; it's read back to produce the .sug file. + * "old_lp" is NULL and "lang" is a dummy name. Will allocate an slang_T. * Returns the slang_T the spell file was loaded into. NULL for error. */ static slang_T * @@ -2320,16 +2482,12 @@ spell_load_file(fname, lang, old_lp, silent) FILE *fd; char_u buf[VIMSPELLMAGICL]; char_u *p; - char_u *bp; - idx_T *ip; int i; int n; int len; - int round; char_u *save_sourcing_name = sourcing_name; linenr_T save_sourcing_lnum = sourcing_lnum; slang_T *lp = NULL; - idx_T idx; int c = 0; int res; @@ -2374,7 +2532,8 @@ spell_load_file(fname, lang, old_lp, silent) sourcing_name = fname; sourcing_lnum = 0; - /* <HEADER>: <fileID> + /* + * <HEADER>: <fileID> */ for (i = 0; i < VIMSPELLMAGICL; ++i) buf[i] = getc(fd); /* <fileID> */ @@ -2433,7 +2592,11 @@ spell_load_file(fname, lang, old_lp, silent) break; case SN_REP: - res = read_rep_section(fd, lp); + res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); + break; + + case SN_REPSAL: + res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); break; case SN_SAL: @@ -2452,6 +2615,15 @@ spell_load_file(fname, lang, old_lp, silent) vim_free(p); break; + case SN_WORDS: + res = read_words_section(fd, lp, len); + break; + + case SN_SUGFILE: + for (i = 7; i >= 0; --i) /* <timestamp> */ + lp->sl_sugtime += getc(fd) << (i * 8); + break; + case SN_COMPOUND: res = read_compound(fd, lp, len); break; @@ -2481,9 +2653,9 @@ spell_load_file(fname, lang, old_lp, silent) goto truncerr; break; } +someerror: if (res == SP_FORMERROR) { -formerr: EMSG(_(e_format)); goto endFAIL; } @@ -2497,48 +2669,21 @@ truncerr: goto endFAIL; } - /* round 1: <LWORDTREE> - * round 2: <KWORDTREE> - * round 3: <PREFIXTREE> */ - for (round = 1; round <= 3; ++round) - { - /* The tree size was computed when writing the file, so that we can - * allocate it as one long block. <nodecount> */ - len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); - if (len < 0) - goto truncerr; - if (len > 0) - { - /* Allocate the byte array. */ - bp = lalloc((long_u)len, TRUE); - if (bp == NULL) - goto endFAIL; - if (round == 1) - lp->sl_fbyts = bp; - else if (round == 2) - lp->sl_kbyts = bp; - else - lp->sl_pbyts = bp; + /* <LWORDTREE> */ + res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0); + if (res != 0) + goto someerror; - /* Allocate the index array. */ - ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE); - if (ip == NULL) - goto endFAIL; - if (round == 1) - lp->sl_fidxs = ip; - else if (round == 2) - lp->sl_kidxs = ip; - else - lp->sl_pidxs = ip; + /* <KWORDTREE> */ + res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0); + if (res != 0) + goto someerror; - /* Read the tree and store it in the array. */ - idx = read_tree(fd, bp, ip, len, 0, round == 3, lp->sl_prefixcnt); - if (idx == -1) - goto truncerr; - if (idx < 0) - goto formerr; - } - } + /* <PREFIXTREE> */ + res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE, + lp->sl_prefixcnt); + if (res != 0) + goto someerror; /* For a new file link it in the list of spell files. */ if (old_lp == NULL) @@ -2733,25 +2878,23 @@ read_prefcond_section(fd, lp) } /* - * Read REP items section from "fd": <repcount> <rep> ... + * Read REP or REPSAL items section from "fd": <repcount> <rep> ... * Return SP_*ERROR flags. */ static int -read_rep_section(fd, slang) +read_rep_section(fd, gap, first) FILE *fd; - slang_T *slang; + garray_T *gap; + short *first; { int cnt; - garray_T *gap; fromto_T *ftp; - short *first; int i; cnt = (getc(fd) << 8) + getc(fd); /* <repcount> */ if (cnt < 0) return SP_TRUNCERROR; - gap = &slang->sl_rep; if (ga_grow(gap, cnt) == FAIL) return SP_OTHERERROR; @@ -2775,7 +2918,6 @@ read_rep_section(fd, slang) } /* Fill the first-index table. */ - first = slang->sl_rep_first; for (i = 0; i < 256; ++i) first[i] = -1; for (i = 0; i < gap->ga_len; ++i) @@ -2941,6 +3083,119 @@ read_sal_section(fd, slang) } /* + * Read SN_WORDS: <word> ... + * Return SP_*ERROR flags. + */ + static int +read_words_section(fd, lp, len) + FILE *fd; + slang_T *lp; + int len; +{ + int done = 0; + int i; + char_u word[MAXWLEN]; + + while (done < len) + { + /* Read one word at a time. */ + for (i = 0; ; ++i) + { + word[i] = getc(fd); + if (word[i] == NUL) + break; + if (i == MAXWLEN - 1) + return SP_FORMERROR; + } + + /* Init the count to 10. */ + count_common_word(lp, word, -1, 10); + done += i + 1; + } + return 0; +} + +/* + * Add a word to the hashtable of common words. + * If it's already there then the counter is increased. + */ + static void +count_common_word(lp, word, len, count) + slang_T *lp; + char_u *word; + int len; /* word length, -1 for upto NUL */ + int count; /* 1 to count once, 10 to init */ +{ + hash_T hash; + hashitem_T *hi; + wordcount_T *wc; + char_u buf[MAXWLEN]; + char_u *p; + + if (len == -1) + p = word; + else + { + vim_strncpy(buf, word, len); + p = buf; + } + + hash = hash_hash(p); + hi = hash_lookup(&lp->sl_wordcount, p, hash); + if (HASHITEM_EMPTY(hi)) + { + wc = (wordcount_T *)alloc(sizeof(wordcount_T) + STRLEN(p)); + if (wc == NULL) + return; + STRCPY(wc->wc_word, p); + wc->wc_count = count; + hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash); + } + else + { + wc = HI2WC(hi); + if ((wc->wc_count += count) < (unsigned)count) /* check for overflow */ + wc->wc_count = MAXWORDCOUNT; + } +} + +/* + * Adjust the score of common words. + */ + static int +score_wordcount_adj(slang, score, word, split) + slang_T *slang; + int score; + char_u *word; + int split; /* word was split, less bonus */ +{ + hashitem_T *hi; + wordcount_T *wc; + int bonus; + int newscore; + + hi = hash_find(&slang->sl_wordcount, word); + if (!HASHITEM_EMPTY(hi)) + { + wc = HI2WC(hi); + if (wc->wc_count < SCORE_THRES2) + bonus = SCORE_COMMON1; + else if (wc->wc_count < SCORE_THRES3) + bonus = SCORE_COMMON2; + else + bonus = SCORE_COMMON3; + if (split) + newscore = score - bonus / 2; + else + newscore = score - bonus; + if (newscore < 0) + return 0; + return newscore; + } + return score; +} + +/* * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> * Return SP_*ERROR flags. */ @@ -3434,17 +3689,63 @@ mb_str2wide(s) #endif /* + * Read a tree from the .spl or .sug file. + * Allocates the memory and stores pointers in "bytsp" and "idxsp". + * This is skipped when the tree has zero length. + * Returns zero when OK, SP_ value for an error. + */ + static int +spell_read_tree(fd, bytsp, idxsp, prefixtree, prefixcnt) + FILE *fd; + char_u **bytsp; + idx_T **idxsp; + int prefixtree; /* TRUE for the prefix tree */ + int prefixcnt; /* when "prefixtree" is TRUE: prefix count */ +{ + int len; + int idx; + char_u *bp; + idx_T *ip; + + /* The tree size was computed when writing the file, so that we can + * allocate it as one long block. <nodecount> */ + len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); + if (len < 0) + return SP_TRUNCERROR; + if (len > 0) + { + /* Allocate the byte array. */ + bp = lalloc((long_u)len, TRUE); + if (bp == NULL) + return SP_OTHERERROR; + *bytsp = bp; + + /* Allocate the index array. */ + ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE); + if (ip == NULL) + return SP_OTHERERROR; + *idxsp = ip; + + /* Recursively read the tree and store it in the array. */ + idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); + if (idx < 0) + return idx; + } + return 0; +} + +/* * Read one row of siblings from the spell file and store it in the byte array * "byts" and index array "idxs". Recursively read the children. * - * NOTE: The code here must match put_node(). + * NOTE: The code here must match put_node()! * - * Returns the index follosing the siblings. - * Returns -1 if the file is shorter than expected. - * Returns -2 if there is a format error. + * Returns the index (>= 0) following the siblings. + * Returns SP_TRUNCERROR if the file is shorter than expected. + * Returns SP_FORMERROR if there is a format error. */ static idx_T -read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) +read_tree_node(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) FILE *fd; char_u *byts; idx_T *idxs; @@ -3463,10 +3764,10 @@ read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) len = getc(fd); /* <siblingcount> */ if (len <= 0) - return -1; + return SP_TRUNCERROR; if (startidx + len >= maxidx) - return -2; + return SP_FORMERROR; byts[idx++] = len; /* Read the byte values, flag/region bytes and shared indexes. */ @@ -3474,7 +3775,7 @@ read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) { c = getc(fd); /* <byte> */ if (c < 0) - return -1; + return SP_TRUNCERROR; if (c <= BY_SPECIAL) { if (c == BY_NOFLAGS && !prefixtree) @@ -3500,7 +3801,7 @@ read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) n = (getc(fd) << 8) + getc(fd); /* <prefcondnr> */ if (n >= maxprefcondnr) - return -2; + return SP_FORMERROR; c |= (n << 8); } else /* c must be BY_FLAGS or BY_FLAGS2 */ @@ -3526,7 +3827,7 @@ read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) /* <nodeidx> */ n = (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); if (n < 0 || n >= maxidx) - return -2; + return SP_FORMERROR; idxs[idx] = n + SHARED_MASK; c = getc(fd); /* <xbyte> */ } @@ -3545,7 +3846,7 @@ read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) else { idxs[startidx + i] = idx; - idx = read_tree(fd, byts, idxs, maxidx, idx, + idx = read_tree_node(fd, byts, idxs, maxidx, idx, prefixtree, maxprefcondnr); if (idx < 0) break; @@ -3820,7 +4121,7 @@ did_set_spelllang(buf) /* language has REP items itself */ lp->lp_replang = lp->lp_slang; else - /* find first similar language that does sound folding */ + /* find first similar language that has REP items */ for (j = 0; j < ga.ga_len; ++j) { lp2 = LANGP_ENTRY(ga, j); @@ -4239,11 +4540,15 @@ struct wordnode_S siblings, in following siblings it is always one. */ char_u wn_byte; /* Byte for this node. NUL for word end */ - char_u wn_affixID; /* when "wn_byte" is NUL: supported/required - prefix ID or 0 */ - short_u wn_flags; /* when "wn_byte" is NUL: WF_ flags */ - short wn_region; /* when "wn_byte" is NUL: region mask; for - PREFIXTREE it's the prefcondnr */ + + /* Info for when "wn_byte" is NUL. + * In PREFIXTREE "wn_region" is used for the prefcondnr. + * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and + * "wn_region" the LSW of the wordnr. */ + char_u wn_affixID; /* supported/required prefix ID or 0 */ + short_u wn_flags; /* WF_ flags */ + short wn_region; /* region mask */ + #ifdef SPELL_PRINTTREE int wn_nr; /* sequence nr for printing */ #endif @@ -4266,6 +4571,8 @@ typedef struct spellinfo_S wordnode_T *si_prefroot; /* tree with postponed prefixes */ + long si_sugtree; /* creating the soundfolding trie */ + sblock_T *si_blocks; /* memory blocks used */ long si_blocks_cnt; /* memory blocks allocated */ long si_compress_cnt; /* words to add before lowering @@ -4276,7 +4583,7 @@ typedef struct spellinfo_S #ifdef SPELL_PRINTTREE int si_wordnode_nr; /* sequence nr for nodes */ #endif - + buf_T *si_spellbuf; /* buffer used to store soundfold word table */ int si_ascii; /* handling only ASCII words */ int si_add; /* addition file */ @@ -4292,11 +4599,15 @@ typedef struct spellinfo_S * si_region_count > 1) */ garray_T si_rep; /* list of fromto_T entries from REP lines */ + garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */ garray_T si_sal; /* list of fromto_T entries from SAL lines */ char_u *si_sofofr; /* SOFOFROM text */ char_u *si_sofoto; /* SOFOTO text */ + int si_nosugfile; /* NOSUGFILE item found */ int si_followup; /* soundsalike: ? */ int si_collapse; /* soundsalike: ? */ + hashtab_T si_commonwords; /* hashtable for common words */ + time_t si_sugtime; /* timestamp for .sug file */ int si_rem_accents; /* soundsalike: remove accents */ garray_T si_map; /* MAP info concatenated */ char_u *si_midword; /* MIDWORD chars or NULL */ @@ -4337,15 +4648,24 @@ static wordnode_T *wordtree_alloc __ARGS((spellinfo_T *spin)); static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix)); static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID)); static wordnode_T *get_wordnode __ARGS((spellinfo_T *spin)); -static void deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node)); +static int deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node)); static void free_wordnode __ARGS((spellinfo_T *spin, wordnode_T *n)); static void wordtree_compress __ARGS((spellinfo_T *spin, wordnode_T *root)); static int node_compress __ARGS((spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot)); static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2)); +static void put_sugtime __ARGS((spellinfo_T *spin, FILE *fd)); static int write_vim_spell __ARGS((spellinfo_T *spin, char_u *fname)); static void clear_node __ARGS((wordnode_T *node)); static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree)); +static void spell_make_sugfile __ARGS((spellinfo_T *spin, char_u *wfname)); +static int sug_filltree __ARGS((spellinfo_T *spin, slang_T *slang)); +static int sug_maketable __ARGS((spellinfo_T *spin)); +static int sug_filltable __ARGS((spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap)); +static int offset2bytes __ARGS((int nr, char_u *buf)); +static int bytes2offset __ARGS((char_u **pp)); +static void sug_write __ARGS((spellinfo_T *spin, char_u *fname)); static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word)); +static void spell_message __ARGS((spellinfo_T *spin, char_u *str)); static void init_spellfile __ARGS((void)); /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags, @@ -4475,7 +4795,7 @@ spell_read_aff(spin, fname) char_u rline[MAXLINELEN]; char_u *line; char_u *pc = NULL; -#define MAXITEMCNT 7 +#define MAXITEMCNT 30 char_u *(items[MAXITEMCNT]); int itemcnt; char_u *p; @@ -4488,6 +4808,7 @@ spell_read_aff(spin, fname) char_u *fol = NULL; char_u *upp = NULL; int do_rep; + int do_repsal; int do_sal; int do_map; int found_map = FALSE; @@ -4513,19 +4834,15 @@ spell_read_aff(spin, fname) return NULL; } - if (spin->si_verbose || p_verbose > 2) - { - if (!spin->si_verbose) - verbose_enter(); - smsg((char_u *)_("Reading affix file %s ..."), fname); - out_flush(); - if (!spin->si_verbose) - verbose_leave(); - } + vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname); + spell_message(spin, IObuff); /* Only do REP lines when not done in another .aff file already. */ do_rep = spin->si_rep.ga_len == 0; + /* Only do REPSAL lines when not done in another .aff file already. */ + do_repsal = spin->si_repsal.ga_len == 0; + /* Only do SAL lines when not done in another .aff file already. */ do_sal = spin->si_sal.ga_len == 0; @@ -4756,6 +5073,10 @@ spell_read_aff(spin, fname) { spin->si_nobreak = TRUE; } + else if (STRCMP(items[0], "NOSUGFILE") == 0 && itemcnt == 1) + { + spin->si_nosugfile = TRUE; + } else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) { aff->af_pfxpostpone = TRUE; @@ -5061,21 +5382,25 @@ spell_read_aff(spin, fname) { upp = vim_strsave(items[1]); } - else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2) + else if ((STRCMP(items[0], "REP") == 0 + || STRCMP(items[0], "REPSAL") == 0) + && itemcnt == 2) { - /* Ignore REP count */; + /* Ignore REP/REPSAL count */; if (!isdigit(*items[1])) - smsg((char_u *)_("Expected REP count in %s line %d"), + smsg((char_u *)_("Expected REP(SAL) count in %s line %d"), fname, lnum); } - else if (STRCMP(items[0], "REP") == 0 && itemcnt >= 3) + else if ((STRCMP(items[0], "REP") == 0 + || STRCMP(items[0], "REPSAL") == 0) + && itemcnt >= 3) { - /* REP item */ + /* REP/REPSAL item */ /* Myspell ignores extra arguments, we require it starts with * # to detect mistakes. */ if (itemcnt > 3 && items[3][0] != '#') smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]); - if (do_rep) + if (items[0][3] == 'S' ? do_repsal : do_rep) { /* Replace underscore with space (can't include a space * directly). */ @@ -5085,7 +5410,9 @@ spell_read_aff(spin, fname) for (p = items[2]; *p != NUL; mb_ptr_adv(p)) if (*p == '_') *p = ' '; - add_fromto(spin, &spin->si_rep, items[1], items[2]); + add_fromto(spin, items[0][3] == 'S' + ? &spin->si_repsal + : &spin->si_rep, items[1], items[2]); } } else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2) @@ -5156,6 +5483,22 @@ spell_read_aff(spin, fname) { sofoto = getroom_save(spin, items[1]); } + else if (STRCMP(items[0], "COMMON") == 0) + { + int i; + + for (i = 1; i < itemcnt; ++i) + { + if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, + items[i]))) + { + p = vim_strsave(items[i]); + if (p == NULL) + break; + hash_add(&spin->si_commonwords, p); + } + } + } else smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"), fname, lnum, items[0]); @@ -5665,15 +6008,9 @@ spell_read_dic(spin, fname, affile) /* The hashtable is only used to detect duplicated words. */ hash_init(&ht); - if (spin->si_verbose || p_verbose > 2) - { - if (!spin->si_verbose) - verbose_enter(); - smsg((char_u *)_("Reading dictionary file %s ..."), fname); - out_flush(); - if (!spin->si_verbose) - verbose_leave(); - } + vim_snprintf((char *)IObuff, IOSIZE, + _("Reading dictionary file %s ..."), fname); + spell_message(spin, IObuff); /* start with a message for the first line */ spin->si_msg_count = 999999; @@ -6122,15 +6459,8 @@ spell_read_wordfile(spin, fname) return FAIL; } - if (spin->si_verbose || p_verbose > 2) - { - if (!spin->si_verbose) - verbose_enter(); - smsg((char_u *)_("Reading word file %s ..."), fname); - out_flush(); - if (!spin->si_verbose) - verbose_leave(); - } + vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname); + spell_message(spin, IObuff); /* * Read all the lines in the file one by one. @@ -6294,15 +6624,13 @@ spell_read_wordfile(spin, fname) vim_free(pc); fclose(fd); - if (spin->si_ascii && non_ascii > 0 && (spin->si_verbose || p_verbose > 2)) + if (spin->si_ascii && non_ascii > 0) { - if (p_verbose > 2) - verbose_enter(); - smsg((char_u *)_("Ignored %d words with non-ASCII characters"), - non_ascii); - if (p_verbose > 2) - verbose_leave(); + vim_snprintf((char *)IObuff, IOSIZE, + _("Ignored %d words with non-ASCII characters"), non_ascii); + spell_message(spin, IObuff); } + return retval; } @@ -6442,7 +6770,7 @@ store_word(spin, word, flags, region, pfxlist, need_affix) /* * Add word "word" to a word tree at "root". - * When "flags" < 0 we are adding to the prefix tree where flags is used for + * When "flags" < 0 we are adding to the prefix tree where "flags" is used for * "rare" and "region" is the condition nr. * Returns FAIL when out of memory. */ @@ -6507,10 +6835,13 @@ tree_add_word(spin, word, root, flags, region, affixID) && (node->wn_byte < word[i] || (node->wn_byte == NUL && (flags < 0 - ? node->wn_affixID < affixID - : node->wn_flags < (flags & WN_MASK) + ? node->wn_affixID < (unsigned)affixID + : (node->wn_flags < (unsigned)(flags & WN_MASK) || (node->wn_flags == (flags & WN_MASK) - && node->wn_affixID < affixID))))) + && (spin->si_sugtree + ? (node->wn_region & 0xffff) < region + : node->wn_affixID + < (unsigned)affixID))))))) { prev = &node->wn_sibling; node = *prev; @@ -6519,6 +6850,7 @@ tree_add_word(spin, word, root, flags, region, affixID) || node->wn_byte != word[i] || (word[i] == NUL && (flags < 0 + || spin->si_sugtree || node->wn_flags != (flags & WN_MASK) || node->wn_affixID != affixID))) { @@ -6606,9 +6938,11 @@ tree_add_word(spin, word, root, flags, region, affixID) /* Compress both trees. Either they both have many nodes, which makes * compression useful, or one of them is small, which means - * compression goes fast. */ + * compression goes fast. But when filling the souldfold word tree + * there is no keep-case tree. */ wordtree_compress(spin, spin->si_foldroot); - wordtree_compress(spin, spin->si_keeproot); + if (affixID >= 0) + wordtree_compress(spin, spin->si_keeproot); } return OK; @@ -6684,21 +7018,28 @@ get_wordnode(spin) * Decrement the reference count on a node (which is the head of a list of * siblings). If the reference count becomes zero free the node and its * siblings. + * Returns the number of nodes actually freed. */ - static void + static int deref_wordnode(spin, node) spellinfo_T *spin; wordnode_T *node; { - wordnode_T *np; + wordnode_T *np; + int cnt = 0; if (--node->wn_refs == 0) + { for (np = node; np != NULL; np = np->wn_sibling) { if (np->wn_child != NULL) - deref_wordnode(spin, np->wn_child); + cnt += deref_wordnode(spin, np->wn_child); free_wordnode(spin, np); + ++cnt; } + ++cnt; /* length field */ + } + return cnt; } /* @@ -6739,18 +7080,16 @@ wordtree_compress(spin, root) if (spin->si_verbose || p_verbose > 2) #endif { - if (!spin->si_verbose) - verbose_enter(); if (tot > 1000000) perc = (tot - n) / (tot / 100); else if (tot == 0) perc = 0; else perc = (tot - n) * 100 / tot; - smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"), - n, tot, perc); - if (p_verbose > 2) - verbose_leave(); + vim_snprintf((char *)IObuff, IOSIZE, + _("Compressed %d of %d nodes; %d (%d%%) remaining"), + n, tot, tot - n, perc); + spell_message(spin, IObuff); } #ifdef SPELL_PRINTTREE spell_print_tree(root->wn_sibling); @@ -6784,24 +7123,24 @@ node_compress(spin, node, ht, tot) * Go through the list of siblings. Compress each child and then try * finding an identical child to replace it. * Note that with "child" we mean not just the node that is pointed to, - * but the whole list of siblings, of which the node is the first. + * but the whole list of siblings of which the child node is the first. */ for (np = node; np != NULL && !got_int; np = np->wn_sibling) { ++len; if ((child = np->wn_child) != NULL) { - /* Compress the child. This fills hashkey. */ + /* Compress the child first. This fills hashkey. */ compressed += node_compress(spin, child, ht, tot); /* Try to find an identical child. */ hash = hash_hash(child->wn_u1.hashkey); hi = hash_lookup(ht, child->wn_u1.hashkey, hash); - tp = NULL; if (!HASHITEM_EMPTY(hi)) { - /* There are children with an identical hash value. Now check - * if there is one that is really identical. */ + /* There are children we encountered before with a hash value + * identical to the current child. Now check if there is one + * that is really identical. */ for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) if (node_equal(child, tp)) { @@ -6809,9 +7148,8 @@ node_compress(spin, node, ht, tot) * current one. This means the current child and all * its siblings is unlinked from the tree. */ ++tp->wn_refs; - deref_wordnode(spin, child); + compressed += deref_wordnode(spin, child); np->wn_child = tp; - ++compressed; break; } if (tp == NULL) @@ -6830,7 +7168,7 @@ node_compress(spin, node, ht, tot) hash_add_item(ht, hi, child->wn_u1.hashkey, hash); } } - *tot += len; + *tot += len + 1; /* add one for the node that stores the length */ /* * Make a hash key for the node and its siblings, so that we can quickly @@ -6906,6 +7244,30 @@ put_bytes(fd, nr, len) putc((int)(nr >> (i * 8)), fd); } +/* + * Write spin->si_sugtime to file "fd". + */ + static void +put_sugtime(spin, fd) + spellinfo_T *spin; + FILE *fd; +{ + int c; + int i; + + /* time_t can be up to 8 bytes in size, more than long_u, thus we + * can't use put_bytes() here. */ + for (i = 7; i >= 0; --i) + if (i + 1 > sizeof(time_t)) + /* ">>" doesn't work well when shifting more bits than avail */ + putc(0, fd); + else + { + c = (unsigned)spin->si_sugtime >> (i * 8); + putc(c, fd); + } +} + static int #ifdef __BORLANDC__ _RTLENTRYF @@ -7056,29 +7418,37 @@ write_vim_spell(spin, fname) } /* SN_REP: <repcount> <rep> ... - * SN_SAL: <salflags> <salcount> <sal> ... */ - - /* Sort the REP items. */ - qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len, - sizeof(fromto_T), rep_compare); + * SN_SAL: <salflags> <salcount> <sal> ... + * SN_REPSAL: <repcount> <rep> ... */ /* round 1: SN_REP section - * round 2: SN_SAL section (unless SN_SOFO is used) */ - for (round = 1; round <= 2; ++round) + * round 2: SN_SAL section (unless SN_SOFO is used) + * round 3: SN_REPSAL section */ + for (round = 1; round <= 3; ++round) { if (round == 1) - { gap = &spin->si_rep; - putc(SN_REP, fd); /* <sectionID> */ - } - else + else if (round == 2) { + /* Don't write SN_SAL when using a SN_SOFO section */ if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) - /* using SN_SOFO section instead of SN_SAL */ - break; + continue; gap = &spin->si_sal; - putc(SN_SAL, fd); /* <sectionID> */ } + else + gap = &spin->si_repsal; + + /* Don't write the section if there are no items. */ + if (gap->ga_len == 0) + continue; + + /* Sort the REP/REPSAL items. */ + if (round != 2) + qsort(gap->ga_data, (size_t)gap->ga_len, + sizeof(fromto_T), rep_compare); + + i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); + putc(i, fd); /* <sectionID> */ /* This is for making suggestions, section is not required. */ putc(0, fd); /* <sectionflags> */ @@ -7143,6 +7513,36 @@ write_vim_spell(spin, fname) fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */ } + /* SN_WORDS: <word> ... + * This is for making suggestions, section is not required. */ + if (spin->si_commonwords.ht_used > 0) + { + putc(SN_WORDS, fd); /* <sectionID> */ + putc(0, fd); /* <sectionflags> */ + + /* round 1: count the bytes + * round 2: write the bytes */ + for (round = 1; round <= 2; ++round) + { + int todo; + int len = 0; + hashitem_T *hi; + + todo = spin->si_commonwords.ht_used; + for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi) + if (!HASHITEM_EMPTY(hi)) + { + l = STRLEN(hi->hi_key) + 1; + len += l; + if (round == 2) /* <word> */ + fwrite(hi->hi_key, (size_t)l, (size_t)1, fd); + --todo; + } + if (round == 1) + put_bytes(fd, (long_u)len, 4); /* <sectionlen> */ + } + } + /* SN_MAP: <mapstr> * This is for making suggestions, section is not required. */ if (spin->si_map.ga_len > 0) @@ -7155,6 +7555,24 @@ write_vim_spell(spin, fname) /* <mapstr> */ } + /* SN_SUGFILE: <timestamp> + * This is used to notify that a .sug file may be available and at the + * same time allows for checking that a .sug file that is found matches + * with this .spl file. That's because the word numbers must be exactly + * right. */ + if (!spin->si_nosugfile + && (spin->si_sal.ga_len > 0 + || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) + { + putc(SN_SUGFILE, fd); /* <sectionID> */ + putc(0, fd); /* <sectionflags> */ + put_bytes(fd, (long_u)8, 4); /* <sectionlen> */ + + /* Set si_sugtime and write it to the file. */ + spin->si_sugtime = time(NULL); + put_sugtime(spin, fd); /* <timestamp> */ + } + /* SN_COMPOUND: compound info. * We don't mark it required, when not supported all compound words will * be bad words. */ @@ -7267,9 +7685,9 @@ clear_node(node) * This first writes the list of possible bytes (siblings). Then for each * byte recursively write the children. * - * NOTE: The code here must match the code in read_tree(), since assumptions - * are made about the indexes (so that we don't have to write them in the - * file). + * NOTE: The code here must match the code in read_tree_node(), since + * assumptions are made about the indexes (so that we don't have to write them + * in the file). * * Returns the number of nodes used. */ @@ -7427,6 +7845,520 @@ ex_mkspell(eap) } /* + * Create the .sug file. + * Uses the soundfold info in "spin". + * Writes the file with the name "wfname", with ".spl" changed to ".sug". + */ + static void +spell_make_sugfile(spin, wfname) + spellinfo_T *spin; + char_u *wfname; +{ + char_u fname[MAXPATHL]; + int len; + slang_T *slang; + int free_slang = FALSE; + + /* + * Read back the .spl file that was written. This fills the required + * info for soundfolding. This also uses less memory than the + * pointer-linked version of the trie. And it avoids having two versions + * of the code for the soundfolding stuff. + * It might have been done already by spell_reload_one(). + */ + for (slang = first_lang; slang != NULL; slang = slang->sl_next) + if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME) + break; + if (slang == NULL) + { + spell_message(spin, (char_u *)_("Reading back spell file...")); + slang = spell_load_file(wfname, NULL, NULL, FALSE); + if (slang == NULL) + return; + /* don't want this language in the list */ + if (first_lang == slang) + first_lang = slang->sl_next; + free_slang = TRUE; + } + + /* + * Clear the info in "spin" that is used. + */ + spin->si_blocks = NULL; + spin->si_blocks_cnt = 0; + spin->si_compress_cnt = 0; /* will stay at 0 all the time*/ + spin->si_free_count = 0; + spin->si_first_free = NULL; + spin->si_foldwcount = 0; + + /* + * Go through the trie of good words, soundfold each word and add it to + * the soundfold trie. + */ + spell_message(spin, (char_u *)_("Performing soundfolding...")); + if (sug_filltree(spin, slang) == FAIL) + goto theend; + + /* + * Create the table which links each soundfold word with a list of the + * good words it may come from. Creates buffer "spin->si_spellbuf". + * This also removes the wordnr from the NUL byte entries to make + * compression possible. + */ + if (sug_maketable(spin) == FAIL) + goto theend; + + smsg((char_u *)_("Number of words after soundfolding: %ld"), + (long)spin->si_spellbuf->b_ml.ml_line_count); + + /* + * Compress the soundfold trie. + */ + spell_message(spin, (char_u *)_(msg_compressing)); + wordtree_compress(spin, spin->si_foldroot); + + /* + * Write the .sug file. + * Make the file name by changing ".spl" to ".sug". + */ + STRCPY(fname, wfname); + len = STRLEN(fname); + fname[len - 2] = 'u'; + fname[len - 1] = 'g'; + sug_write(spin, fname); + +theend: + if (free_slang) + slang_free(slang); + free_blocks(spin->si_blocks); + close_spellbuf(spin->si_spellbuf); +} + +/* + * Build the soundfold trie for language "slang". + */ + static int +sug_filltree(spin, slang) + spellinfo_T *spin; + slang_T *slang; +{ + char_u *byts; + idx_T *idxs; + int depth; + idx_T arridx[MAXWLEN]; + int curi[MAXWLEN]; + char_u tword[MAXWLEN]; + char_u tsalword[MAXWLEN]; + int c; + idx_T n; + unsigned words_done = 0; + int wordcount[MAXWLEN]; + + /* We use si_foldroot for the souldfolded trie. */ + spin->si_foldroot = wordtree_alloc(spin); + if (spin->si_foldroot == NULL) + return FAIL; + + /* let tree_add_word() know we're adding to the soundfolded tree */ + spin->si_sugtree = TRUE; + + /* + * Go through the whole case-folded tree, soundfold each word and put it + * in the trie. + */ + byts = slang->sl_fbyts; + idxs = slang->sl_fidxs; + + arridx[0] = 0; + curi[0] = 1; + wordcount[0] = 0; + + depth = 0; + while (depth >= 0 && !got_int) + { + if (curi[depth] > byts[arridx[depth]]) + { + /* Done all bytes at this node, go up one level. */ + idxs[arridx[depth]] = wordcount[depth]; + if (depth > 0) + wordcount[depth - 1] += wordcount[depth]; + + --depth; + line_breakcheck(); + } + else + { + + /* Do one more byte at this node. */ + n = arridx[depth] + curi[depth]; + ++curi[depth]; + + c = byts[n]; + if (c == 0) + { + /* Sound-fold the word. */ + tword[depth] = NUL; + spell_soundfold(slang, tword, TRUE, tsalword); + + /* We use the "flags" field for the MSB of the wordnr, + * "region" for the LSB of the wordnr. */ + if (tree_add_word(spin, tsalword, spin->si_foldroot, + words_done >> 16, words_done & 0xffff, + 0) == FAIL) + return FAIL; + + ++words_done; + ++wordcount[depth]; + + /* Reset the block count each time to avoid compression + * kicking in. */ + spin->si_blocks_cnt = 0; + + /* Skip over any other NUL bytes (same word with different + * flags). */ + while (byts[n + 1] == 0) + { + ++n; + ++curi[depth]; + } + } + else + { + /* Normal char, go one level deeper. */ + tword[depth++] = c; + arridx[depth] = idxs[n]; + curi[depth] = 1; + wordcount[depth] = 0; + } + } + } + + smsg((char_u *)_("Total number of words: %d"), words_done); + + return OK; +} + +/* + * Make the table that links each word in the soundfold trie to the words it + * can be produced from. + * This is not unlike lines in a file, thus use a memfile to be able to access + * the table efficiently. + * Returns FAIL when out of memory. + */ + static int +sug_maketable(spin) + spellinfo_T *spin; +{ + garray_T ga; + int res = OK; + + /* Allocate a buffer, open a memline for it and create the swap file + * (uses a temp file, not a .swp file). */ + spin->si_spellbuf = open_spellbuf(); + if (spin->si_spellbuf == NULL) + return FAIL; + + /* Use a buffer to store the line info, avoids allocating many small + * pieces of memory. */ + ga_init2(&ga, 1, 100); + + /* recursively go through the tree */ + if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) + res = FAIL; + + ga_clear(&ga); + return res; +} + +/* + * Fill the table for one node and its children. + * Returns the wordnr at the start of the node. + * Returns -1 when out of memory. + */ + static int +sug_filltable(spin, node, startwordnr, gap) + spellinfo_T *spin; + wordnode_T *node; + int startwordnr; + garray_T *gap; /* place to store line of numbers */ +{ + wordnode_T *p, *np; + int wordnr = startwordnr; + int nr; + int prev_nr; + + for (p = node; p != NULL; p = p->wn_sibling) + { + if (p->wn_byte == NUL) + { + gap->ga_len = 0; + prev_nr = 0; + for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) + { + if (ga_grow(gap, 10) == FAIL) + return -1; + + nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); + /* Compute the offset from the previous nr and store the + * offset in a way that it takes a minimum number of bytes. + * It's a bit like utf-8, but without the need to mark + * following bytes. */ + nr -= prev_nr; + prev_nr += nr; + gap->ga_len += offset2bytes(nr, + (char_u *)gap->ga_data + gap->ga_len); + } + + /* add the NUL byte */ + ((char_u *)gap->ga_data)[gap->ga_len++] = NUL; + + if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, + gap->ga_data, gap->ga_len, TRUE) == FAIL) + return -1; + ++wordnr; + + /* Remove extra NUL entries, we no longer need them. We don't + * bother freeing the nodes, the won't be reused anyway. */ + while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) + p->wn_sibling = p->wn_sibling->wn_sibling; + + /* Clear the flags on the remaining NUL node, so that compression + * works a lot better. */ + p->wn_flags = 0; + p->wn_region = 0; + } + else + { + wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); + if (wordnr == -1) + return -1; + } + } + return wordnr; +} + +/* + * Convert an offset into a minimal number of bytes. + * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL + * bytes. + */ + static int +offset2bytes(nr, buf) + int nr; + char_u *buf; +{ + int rem; + int b1, b2, b3, b4; + + /* Split the number in parts of base 255. We need to avoid NUL bytes. */ + b1 = nr % 255 + 1; + rem = nr / 255; + b2 = rem % 255 + 1; + rem = rem / 255; + b3 = rem % 255 + 1; + b4 = rem / 255 + 1; + + if (b4 > 1 || b3 > 0x1f) /* 4 bytes */ + { + buf[0] = 0xe0 + b4; + buf[1] = b3; + buf[2] = b2; + buf[3] = b1; + return 4; + } + if (b3 > 1 || b2 > 0x3f ) /* 3 bytes */ + { + buf[0] = 0xc0 + b3; + buf[1] = b2; + buf[2] = b1; + return 3; + } + if (b2 > 1 || b1 > 0x7f ) /* 2 bytes */ + { + buf[0] = 0x80 + b2; + buf[1] = b1; + return 2; + } + /* 1 byte */ + buf[0] = b1; + return 1; +} + +/* + * Opposite of offset2bytes(). + * "pp" points to the bytes and is advanced over it. + * Returns the offset. + */ + static int +bytes2offset(pp) + char_u **pp; +{ + char_u *p = *pp; + int nr; + int c; + + c = *p++; + if ((c & 0x80) == 0x00) /* 1 byte */ + { + nr = c - 1; + } + else if ((c & 0xc0) == 0x80) /* 2 bytes */ + { + nr = (c & 0x3f) - 1; + nr = nr * 255 + (*p++ - 1); + } + else if ((c & 0xe0) == 0xc0) /* 3 bytes */ + { + nr = (c & 0x1f) - 1; + nr = nr * 255 + (*p++ - 1); + nr = nr * 255 + (*p++ - 1); + } + else /* 4 bytes */ + { + nr = (c & 0x0f) - 1; + nr = nr * 255 + (*p++ - 1); + nr = nr * 255 + (*p++ - 1); + nr = nr * 255 + (*p++ - 1); + } + + *pp = p; + return nr; +} + +/* + * Write the .sug file in "fname". + */ + static void +sug_write(spin, fname) + spellinfo_T *spin; + char_u *fname; +{ + FILE *fd; + wordnode_T *tree; + int nodecount; + int wcount; + char_u *line; + linenr_T lnum; + int len; + + /* Create the file. Note that an existing file is silently overwritten! */ + fd = mch_fopen((char *)fname, "w"); + if (fd == NULL) + { + EMSG2(_(e_notopen), fname); + return; + } + + vim_snprintf((char *)IObuff, IOSIZE, + _("Writing suggestion file %s ..."), fname); + spell_message(spin, IObuff); + + /* + * <SUGHEADER>: <fileID> <versionnr> <timestamp> + */ + if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */ + { + EMSG(_(e_write)); + goto theend; + } + putc(VIMSUGVERSION, fd); /* <versionnr> */ + + /* Write si_sugtime to the file. */ + put_sugtime(spin, fd); /* <timestamp> */ + + /* + * <SUGWORDTREE> + */ + spin->si_memtot = 0; + tree = spin->si_foldroot->wn_sibling; + + /* Clear the index and wnode fields in the tree. */ + clear_node(tree); + + /* Count the number of nodes. Needed to be able to allocate the + * memory when reading the nodes. Also fills in index for shared + * nodes. */ + nodecount = put_node(NULL, tree, 0, 0, FALSE); + + /* number of nodes in 4 bytes */ + put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ + spin->si_memtot += nodecount + nodecount * sizeof(int); + + /* Write the nodes. */ + (void)put_node(fd, tree, 0, 0, FALSE); + + /* + * <SUGTABLE>: <sugwcount> <sugline> ... + */ + wcount = spin->si_spellbuf->b_ml.ml_line_count; + put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */ + + for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum) + { + /* <sugline>: <sugnr> ... NUL */ + line = ml_get_buf(spin->si_spellbuf, lnum, FALSE); + len = STRLEN(line) + 1; + if (fwrite(line, (size_t)len, (size_t)1, fd) == 0) + { + EMSG(_(e_write)); + goto theend; + } + spin->si_memtot += len; + } + + /* Write another byte to check for errors. */ + if (putc(0, fd) == EOF) + EMSG(_(e_write)); + + vim_snprintf((char *)IObuff, IOSIZE, + _("Estimated runtime memory use: %d bytes"), spin->si_memtot); + spell_message(spin, IObuff); + +theend: + /* close the file */ + fclose(fd); +} + +/* + * Open a spell buffer. This is a nameless buffer that is not in the buffer + * list and only contains text lines. Can use a swapfile to reduce memory + * use. + * Most other fields are invalid! Esp. watch out for string options being + * NULL and there is no undo info. + * Returns NULL when out of memory. + */ + static buf_T * +open_spellbuf() +{ + buf_T *buf; + + buf = (buf_T *)alloc_clear(sizeof(buf_T)); + if (buf != NULL) + { + buf->b_spell = TRUE; + buf->b_p_swf = TRUE; /* may create a swap file */ + ml_open(buf); + ml_open_file(buf); /* create swap file now */ + } + return buf; +} + +/* + * Close the buffer used for spell info. + */ + static void +close_spellbuf(buf) + buf_T *buf; +{ + if (buf != NULL) + { + ml_close(buf, TRUE); + vim_free(buf); + } +} + + +/* * Create a Vim spell file from one or more word lists. * "fnames[0]" is the output file name. * "fnames[fcount - 1]" is the last input file name. @@ -7458,9 +8390,11 @@ mkspell(fcount, fnames, ascii, overwrite, added_word) spin.si_followup = TRUE; spin.si_rem_accents = TRUE; ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20); + ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20); ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); ga_init2(&spin.si_map, (int)sizeof(char_u), 100); ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); + hash_init(&spin.si_commonwords); spin.si_newcompID = 127; /* start compound ID at first maximum */ /* default: fnames[0] is output file, following are input files */ @@ -7613,64 +8547,47 @@ mkspell(fcount, fnames, ascii, overwrite, added_word) if (spin.si_compflags != NULL && spin.si_nobreak) MSG(_("Warning: both compounding and NOBREAK specified")); - if (!error) + if (!error && !got_int) { /* * Combine tails in the tree. */ - if (spin.si_verbose || p_verbose > 2) - { - if (!spin.si_verbose) - verbose_enter(); - MSG(_(msg_compressing)); - out_flush(); - if (!spin.si_verbose) - verbose_leave(); - } + spell_message(&spin, (char_u *)_(msg_compressing)); wordtree_compress(&spin, spin.si_foldroot); wordtree_compress(&spin, spin.si_keeproot); wordtree_compress(&spin, spin.si_prefroot); } - if (!error) + if (!error && !got_int) { /* * Write the info in the spell file. */ - if (spin.si_verbose || p_verbose > 2) - { - if (!spin.si_verbose) - verbose_enter(); - smsg((char_u *)_("Writing spell file %s ..."), wfname); - out_flush(); - if (!spin.si_verbose) - verbose_leave(); - } + vim_snprintf((char *)IObuff, IOSIZE, + _("Writing spell file %s ..."), wfname); + spell_message(&spin, IObuff); error = write_vim_spell(&spin, wfname) == FAIL; - if (spin.si_verbose || p_verbose > 2) - { - if (!spin.si_verbose) - verbose_enter(); - MSG(_("Done!")); - smsg((char_u *)_("Estimated runtime memory use: %d bytes"), - spin.si_memtot); - out_flush(); - if (!spin.si_verbose) - verbose_leave(); - } + spell_message(&spin, (char_u *)_("Done!")); + vim_snprintf((char *)IObuff, IOSIZE, + _("Estimated runtime memory use: %d bytes"), spin.si_memtot); + spell_message(&spin, IObuff); - /* If the file is loaded need to reload it. */ + /* + * If the file is loaded need to reload it. + */ if (!error) spell_reload_one(wfname, added_word); } /* Free the allocated memory. */ ga_clear(&spin.si_rep); + ga_clear(&spin.si_repsal); ga_clear(&spin.si_sal); ga_clear(&spin.si_map); ga_clear(&spin.si_prefcond); + hash_clear_all(&spin.si_commonwords, 0); /* Free the .aff file structures. */ for (i = 0; i < incount; ++i) @@ -7679,9 +8596,36 @@ mkspell(fcount, fnames, ascii, overwrite, added_word) /* Free all the bits and pieces at once. */ free_blocks(spin.si_blocks); + + /* + * If there is soundfolding info and no NOSUGFILE item create the + * .sug file with the soundfolded word trie. + */ + if (spin.si_sugtime != 0 && !error && !got_int) + spell_make_sugfile(&spin, wfname); + } } +/* + * Display a message for spell file processing when 'verbose' is set or using + * ":mkspell". "str" can be IObuff. + */ + static void +spell_message(spin, str) + spellinfo_T *spin; + char_u *str; +{ + if (spin->si_verbose || p_verbose > 2) + { + if (!spin->si_verbose) + verbose_enter(); + MSG(str); + out_flush(); + if (!spin->si_verbose) + verbose_leave(); + } +} /* * ":[count]spellgood {word}" @@ -8334,12 +9278,13 @@ spell_casefold(str, len, buf, buflen) return OK; } +/* values for sps_flags */ #define SPS_BEST 1 #define SPS_FAST 2 #define SPS_DOUBLE 4 -static int sps_flags = SPS_BEST; -static int sps_limit = 9999; +static int sps_flags = SPS_BEST; /* flags from 'spellsuggest' */ +static int sps_limit = 9999; /* max nr of suggestions given */ /* * Check the 'spellsuggest' option. Return FAIL if it's wrong. @@ -8461,7 +9406,7 @@ spell_suggest(count) else limit = sps_limit; spell_find_suggest(line + curwin->w_cursor.col, &sug, limit, - TRUE, need_cap); + TRUE, need_cap, TRUE); if (sug.su_ga.ga_len == 0) MSG(_("Sorry, no suggestions")); @@ -8512,7 +9457,7 @@ spell_suggest(count) * the not replaced part. */ STRCPY(wcopy, stp->st_word); if (sug.su_badlen > stp->st_orglen) - vim_strncpy(wcopy + STRLEN(wcopy), + vim_strncpy(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen, sug.su_badlen - stp->st_orglen); vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1); @@ -8586,7 +9531,7 @@ spell_suggest(count) } /* Replace the word. */ - p = alloc(STRLEN(line) - stp->st_orglen + STRLEN(stp->st_word) + 1); + p = alloc(STRLEN(line) - stp->st_orglen + stp->st_wordlen + 1); if (p != NULL) { c = sug.su_badptr - line; @@ -8601,7 +9546,7 @@ spell_suggest(count) ResetRedobuff(); AppendToRedobuff((char_u *)"ciw"); AppendToRedobuffLit(p + c, - STRLEN(stp->st_word) + sug.su_badlen - stp->st_orglen); + stp->st_wordlen + sug.su_badlen - stp->st_orglen); AppendCharToRedobuff(ESC); } } @@ -8759,18 +9704,19 @@ ex_spellrepall(eap) * a list of allocated strings. */ void -spell_suggest_list(gap, word, maxcount, need_cap) +spell_suggest_list(gap, word, maxcount, need_cap, interactive) garray_T *gap; char_u *word; int maxcount; /* maximum nr of suggestions */ int need_cap; /* 'spellcapcheck' matched */ + int interactive; { suginfo_T sug; int i; suggest_T *stp; char_u *wcopy; - spell_find_suggest(word, &sug, maxcount, FALSE, need_cap); + spell_find_suggest(word, &sug, maxcount, FALSE, need_cap, interactive); /* Make room in "gap". */ ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1); @@ -8783,12 +9729,12 @@ spell_suggest_list(gap, word, maxcount, need_cap) /* The suggested word may replace only part of "word", add the not * replaced part. */ - wcopy = alloc(STRLEN(stp->st_word) + wcopy = alloc(stp->st_wordlen + STRLEN(sug.su_badptr + stp->st_orglen) + 1); if (wcopy == NULL) break; STRCPY(wcopy, stp->st_word); - STRCAT(wcopy, sug.su_badptr + stp->st_orglen); + STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen); ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy; } @@ -8803,12 +9749,13 @@ spell_suggest_list(gap, word, maxcount, need_cap) * This is based on the mechanisms of Aspell, but completely reimplemented. */ static void -spell_find_suggest(badptr, su, maxcount, banbadword, need_cap) +spell_find_suggest(badptr, su, maxcount, banbadword, need_cap, interactive) char_u *badptr; suginfo_T *su; int maxcount; int banbadword; /* don't include badword in suggestions */ int need_cap; /* word should start with capital */ + int interactive; { hlf_T attr = HLF_COUNT; char_u buf[MAXPATHL]; @@ -8833,7 +9780,7 @@ spell_find_suggest(badptr, su, maxcount, banbadword, need_cap) hash_init(&su->su_banned); su->su_badptr = badptr; - su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL); + su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE); su->su_maxcount = maxcount; su->su_maxscore = SCORE_MAXINIT; @@ -8876,7 +9823,7 @@ spell_find_suggest(badptr, su, maxcount, banbadword, need_cap) { make_case_word(su->su_badword, buf, WF_ONECAP); add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE, - 0, TRUE, su->su_sallang); + 0, TRUE, su->su_sallang, FALSE); } /* Ban the bad word itself. It may appear in another region. */ @@ -8912,7 +9859,7 @@ spell_find_suggest(badptr, su, maxcount, banbadword, need_cap) else { /* Use internal method. */ - spell_suggest_intern(su); + spell_suggest_intern(su, interactive); if (sps_flags & SPS_DOUBLE) do_combine = TRUE; } @@ -8952,14 +9899,15 @@ spell_suggest_expr(su, expr) { /* Get the word and the score from the items. */ score = get_spellword(li->li_tv.vval.v_list, &p); - if (score >= 0) - add_suggestion(su, &su->su_ga, p, - su->su_badlen, score, 0, TRUE, su->su_sallang); + if (score >= 0 && score <= su->su_maxscore) + add_suggestion(su, &su->su_ga, p, su->su_badlen, + score, 0, TRUE, su->su_sallang, FALSE); } list_unref(list); } - /* Sort the suggestions and truncate at "maxcount". */ + /* Remove bogus suggestions, sort and truncate at "maxcount". */ + check_suggestions(su, &su->su_ga); (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); } #endif @@ -9011,13 +9959,14 @@ spell_suggest_file(su, fname) } add_suggestion(su, &su->su_ga, p, su->su_badlen, - SCORE_FILE, 0, TRUE, su->su_sallang); + SCORE_FILE, 0, TRUE, su->su_sallang, FALSE); } } fclose(fd); - /* Sort the suggestions and truncate at "maxcount". */ + /* Remove bogus suggestions, sort and truncate at "maxcount". */ + check_suggestions(su, &su->su_ga); (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); } @@ -9025,10 +9974,16 @@ spell_suggest_file(su, fname) * Find suggestions for the internal method indicated by "sps_flags". */ static void -spell_suggest_intern(su) +spell_suggest_intern(su, interactive) suginfo_T *su; + int interactive; { /* + * Load the .sug file(s) that are available and not done yet. + */ + suggest_load_files(); + + /* * 1. Try special cases, such as repeating a word: "the the" -> "the". * * Set a maximum score to limit the combination of operations that is @@ -9048,22 +10003,50 @@ spell_suggest_intern(su) /* * 3. Try finding sound-a-like words. - * - * Only do this when we don't have a lot of suggestions yet, because it's - * very slow and often doesn't find new suggestions. */ - if ((sps_flags & SPS_DOUBLE) - || (!(sps_flags & SPS_FAST) - && su->su_ga.ga_len < SUG_CLEAN_COUNT(su))) + if ((sps_flags & SPS_FAST) == 0) { - /* Allow a higher score now. */ - su->su_maxscore = SCORE_MAXMAX; + if (sps_flags & SPS_BEST) + /* Adjust the word score for the suggestions found so far for how + * they sounds like. */ + rescore_suggestions(su); + + /* + * While going throught the soundfold tree "su_maxscore" is the score + * for the soundfold word, limits the changes that are being tried, + * and "su_sfmaxscore" the rescored score, which is set by + * cleanup_suggestions(). + * First find words with a small edit distance, because this is much + * faster and often already finds the top-N suggestions. If we didn't + * find many suggestions try again with a higher edit distance. + * "sl_sounddone" is used to avoid doing the same word twice. + */ + suggest_try_soundalike_prep(); + su->su_maxscore = SCORE_SFMAX1; + su->su_sfmaxscore = SCORE_MAXINIT * 3; suggest_try_soundalike(su); + if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) + { + /* We didn't find enough matches, try again, allowing more + * changes to the soundfold word. */ + su->su_maxscore = SCORE_SFMAX2; + suggest_try_soundalike(su); + if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) + { + /* Still didn't find enough matches, try again, allowing even + * more changes to the soundfold word. */ + su->su_maxscore = SCORE_SFMAX3; + suggest_try_soundalike(su); + } + } + su->su_maxscore = su->su_sfmaxscore; + suggest_try_soundalike_finish(); } - /* When CTRL-C was hit while searching do show the results. */ + /* When CTRL-C was hit while searching do show the results. Only clear + * got_int when using a command, not for spellsuggest(). */ ui_breakcheck(); - if (got_int) + if (interactive && got_int) { (void)vgetc(); got_int = FALSE; @@ -9075,12 +10058,220 @@ spell_suggest_intern(su) /* Adjust the word score for how it sounds like. */ rescore_suggestions(su); - /* Sort the suggestions and truncate at "maxcount". */ + /* Remove bogus suggestions, sort and truncate at "maxcount". */ + check_suggestions(su, &su->su_ga); (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); } } /* + * Load the .sug files for languages that have one and weren't loaded yet. + */ + static void +suggest_load_files() +{ + langp_T *lp; + int lpi; + slang_T *slang; + char_u *dotp; + FILE *fd; + char_u buf[MAXWLEN]; + int i; + time_t timestamp; + int wcount; + int wordnr; + garray_T ga; + int c; + + /* Do this for all languages that support sound folding. */ + for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) + { + lp = LANGP_ENTRY(curbuf->b_langp, lpi); + slang = lp->lp_slang; + if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) + { + /* Change ".spl" to ".sug" and open the file. When the file isn't + * found silently skip it. Do set "sl_sugloaded" so that we + * don't try again and again. */ + slang->sl_sugloaded = TRUE; + + dotp = vim_strrchr(slang->sl_fname, '.'); + if (dotp == NULL || fnamecmp(dotp, ".spl") != 0) + continue; + STRCPY(dotp, ".sug"); + fd = fopen((char *)slang->sl_fname, "r"); + if (fd == NULL) + goto nextone; + + /* + * <SUGHEADER>: <fileID> <versionnr> <timestamp> + */ + for (i = 0; i < VIMSUGMAGICL; ++i) + buf[i] = getc(fd); /* <fileID> */ + if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) + { + EMSG2(_("E999: This does not look like a .sug file: %s"), + slang->sl_fname); + goto nextone; + } + c = getc(fd); /* <versionnr> */ + if (c < VIMSUGVERSION) + { + EMSG2(_("E999: Old .sug file, needs to be updated: %s"), + slang->sl_fname); + goto nextone; + } + else if (c > VIMSUGVERSION) + { + EMSG2(_("E999: .sug file is for newer version of Vim: %s"), + slang->sl_fname); + goto nextone; + } + + /* Check the timestamp, it must be exactly the same as the one in + * the .spl file. Otherwise the word numbers won't match. */ + timestamp = 0; + for (i = 7; i >= 0; --i) /* <timestamp> */ + timestamp += getc(fd) << (i * 8); + if (timestamp != slang->sl_sugtime) + { + EMSG2(_("E999: .sug file doesn't match .spl file: %s"), + slang->sl_fname); + goto nextone; + } + + /* + * <SUGWORDTREE>: <wordtree> + * Read the trie with the soundfolded words. + */ + if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs, + FALSE, 0) != 0) + { +someerror: + EMSG2(_("E999: error while reading .sug file: %s"), + slang->sl_fname); + slang_clear_sug(slang); + goto nextone; + } + + /* + * <SUGTABLE>: <sugwcount> <sugline> ... + * + * Read the table with word numbers. We use a file buffer for + * this, because it's so much like a file with lines. Makes it + * possible to swap the info and save on memory use. + */ + slang->sl_sugbuf = open_spellbuf(); + if (slang->sl_sugbuf == NULL) + goto someerror; + /* <sugwcount> */ + wcount = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + + getc(fd); + if (wcount < 0) + goto someerror; + + /* Read all the wordnr lists into the buffer, one NUL terminated + * list per line. */ + ga_init2(&ga, 1, 100); + for (wordnr = 0; wordnr < wcount; ++wordnr) + { + ga.ga_len = 0; + for (;;) + { + c = getc(fd); /* <sugline> */ + if (c < 0 || ga_grow(&ga, 1) == FAIL) + goto someerror; + ((char_u *)ga.ga_data)[ga.ga_len++] = c; + if (c == NUL) + break; + } + if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, + ga.ga_data, ga.ga_len, TRUE) == FAIL) + goto someerror; + } + ga_clear(&ga); + + /* + * Need to put word counts in the word tries, so that we can find + * a word by its number. + */ + tree_count_words(slang->sl_fbyts, slang->sl_fidxs); + tree_count_words(slang->sl_sbyts, slang->sl_sidxs); + +nextone: + if (fd != NULL) + fclose(fd); + STRCPY(dotp, ".spl"); + } + } +} + + +/* + * Fill in the wordcount fields for a trie. + * Returns the total number of words. + */ + static void +tree_count_words(byts, idxs) + char_u *byts; + idx_T *idxs; +{ + int depth; + idx_T arridx[MAXWLEN]; + int curi[MAXWLEN]; + int c; + idx_T n; + int wordcount[MAXWLEN]; + + arridx[0] = 0; + curi[0] = 1; + wordcount[0] = 0; + depth = 0; + while (depth >= 0 && !got_int) + { + if (curi[depth] > byts[arridx[depth]]) + { + /* Done all bytes at this node, go up one level. */ + idxs[arridx[depth]] = wordcount[depth]; + if (depth > 0) + wordcount[depth - 1] += wordcount[depth]; + + --depth; + fast_breakcheck(); + } + else + { + /* Do one more byte at this node. */ + n = arridx[depth] + curi[depth]; + ++curi[depth]; + + c = byts[n]; + if (c == 0) + { + /* End of word, count it. */ + ++wordcount[depth]; + + /* Skip over any other NUL bytes (same word with different + * flags). */ + while (byts[n + 1] == 0) + { + ++n; + ++curi[depth]; + } + } + else + { + /* Normal char, go one level deeper to count the words. */ + ++depth; + arridx[depth] = idxs[n]; + curi[depth] = 1; + wordcount[depth] = 0; + } + } + } +} + +/* * Free the info put in "*su" by spell_find_suggest(). */ static void @@ -9098,7 +10289,7 @@ spell_find_cleanup(su) ga_clear(&su->su_sga); /* Free the banned words. */ - free_banned(su); + hash_clear_all(&su->su_banned, 0); } /* @@ -9224,31 +10415,87 @@ suggest_try_special(su) /* Give a soundalike score of 0, compute the score as if deleting one * character. */ add_suggestion(su, &su->su_ga, word, su->su_badlen, - RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang); + RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE); + } +} + +/* + * Try finding suggestions by adding/removing/swapping letters. + */ + static void +suggest_try_change(su) + suginfo_T *su; +{ + char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ + int n; + char_u *p; + int lpi; + langp_T *lp; + + /* We make a copy of the case-folded bad word, so that we can modify it + * to find matches (esp. REP items). Append some more text, changing + * chars after the bad word may help. */ + STRCPY(fword, su->su_fbadword); + n = STRLEN(fword); + p = su->su_badptr + su->su_badlen; + (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n); + + for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) + { + lp = LANGP_ENTRY(curbuf->b_langp, lpi); + + /* If reloading a spell file fails it's still in the list but + * everything has been cleared. */ + if (lp->lp_slang->sl_fbyts == NULL) + continue; + + /* Try it for this language. Will add possible suggestions. */ + suggest_trie_walk(su, lp, fword, FALSE); } } +/* Check the maximum score, if we go over it we won't try this change. */ +#define TRY_DEEPER(su, stack, depth, add) \ + (stack[depth].ts_score + (add) < su->su_maxscore) + /* * Try finding suggestions by adding/removing/swapping letters. * * This uses a state machine. At each node in the tree we try various - * operations. When trying if an operation work "depth" is increased and the + * operations. When trying if an operation works "depth" is increased and the * stack[] is used to store info. This allows combinations, thus insert one * character, replace one and delete another. The number of changes is - * limited by su->su_maxscore, checked in try_deeper(). + * limited by su->su_maxscore. * * After implementing this I noticed an article by Kemal Oflazer that * describes something similar: "Error-tolerant Finite State Recognition with * Applications to Morphological Analysis and Spelling Correction" (1996). * The implementation in the article is simplified and requires a stack of - * unknown depth. The implementation here only needs a stack depth of the - * length of the word. + * unknown depth. The implementation here only needs a stack depth equal to + * the length of the word. + * + * This is also used for the sound-folded word, "soundfold" is TRUE then. + * The mechanism is the same, but we find a match with a sound-folded word + * that comes from one or more original words. Each of these words may be + * added, this is done by add_sound_suggest(). + * Don't use: + * the prefix tree or the keep-case tree + * "su->su_badlen" + * anything to do with upper and lower case + * anything to do with word or non-word characters ("spell_iswordp()") + * banned words + * word flags (rare, region, compounding) + * word splitting for now + * "similar_chars()" + * use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep" */ static void -suggest_try_change(su) +suggest_trie_walk(su, lp, fword, soundfold) suginfo_T *su; + langp_T *lp; + char_u *fword; + int soundfold; { - char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ char_u tword[MAXWLEN]; /* good word collected so far */ trystate_T stack[MAXWLEN]; char_u preword[MAXWLEN * 3]; /* word found with proper case; @@ -9259,12 +10506,12 @@ suggest_try_change(su) char_u compflags[MAXWLEN]; /* compound flags, one for each word */ trystate_T *sp; int newscore; - langp_T *lp; + int score; char_u *byts, *fbyts, *pbyts; idx_T *idxs, *fidxs, *pidxs; int depth; int c, c2, c3; - int n; + int n = 0; int flags; garray_T *gap; idx_T arridx; @@ -9273,41 +10520,39 @@ suggest_try_change(su) fromto_T *ftp; int fl = 0, tl; int repextra = 0; /* extra bytes in fword[] from REP item */ - slang_T *slang; + slang_T *slang = lp->lp_slang; int fword_ends; - int lpi; - int maysplit; int goodword_ends; +#ifdef DEBUG_TRIEWALK + /* Stores the name of the change made at each level. */ + char_u changename[MAXWLEN][80]; +#endif + int breakcheckcount = 1000; + int compound_ok; - /* We make a copy of the case-folded bad word, so that we can modify it - * to find matches (esp. REP items). Append some more text, changing - * chars after the bad word may help. */ - STRCPY(fword, su->su_fbadword); - n = STRLEN(fword); - p = su->su_badptr + su->su_badlen; - (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n); + /* + * Go through the whole case-fold tree, try changes at each node. + * "tword[]" contains the word collected from nodes in the tree. + * "fword[]" the word we are trying to match with (initially the bad + * word). + */ + depth = 0; + sp = &stack[0]; + vim_memset(sp, 0, sizeof(trystate_T)); + sp->ts_curi = 1; - for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) + if (soundfold) + { + /* Going through the soundfold tree. */ + byts = fbyts = slang->sl_sbyts; + idxs = fidxs = slang->sl_sidxs; + pbyts = NULL; + pidxs = NULL; + sp->ts_prefixdepth = PFD_NOPREFIX; + sp->ts_state = STATE_START; + } + else { - lp = LANGP_ENTRY(curbuf->b_langp, lpi); - slang = lp->lp_slang; - - /* If reloading a spell file fails it's still in the list but - * everything has been cleared. */ - if (slang->sl_fbyts == NULL) - continue; - - /* - * Go through the whole case-fold tree, try changes at each node. - * "tword[]" contains the word collected from nodes in the tree. - * "fword[]" the word we are trying to match with (initially the bad - * word). - */ - depth = 0; - sp = &stack[0]; - vim_memset(sp, 0, sizeof(trystate_T)); - sp->ts_curi = 1; - /* * When there are postponed prefixes we need to use these first. At * the end of the prefix we continue in the case-fold tree. @@ -9330,232 +10575,243 @@ suggest_try_change(su) sp->ts_prefixdepth = PFD_NOPREFIX; sp->ts_state = STATE_START; } + } - /* - * Loop to find all suggestions. At each round we either: - * - For the current state try one operation, advance "ts_curi", - * increase "depth". - * - When a state is done go to the next, set "ts_state". - * - When all states are tried decrease "depth". - */ - while (depth >= 0 && !got_int) + /* + * Loop to find all suggestions. At each round we either: + * - For the current state try one operation, advance "ts_curi", + * increase "depth". + * - When a state is done go to the next, set "ts_state". + * - When all states are tried decrease "depth". + */ + while (depth >= 0 && !got_int) + { + sp = &stack[depth]; + switch (sp->ts_state) { - sp = &stack[depth]; - switch (sp->ts_state) + case STATE_START: + case STATE_NOPREFIX: + /* + * Start of node: Deal with NUL bytes, which means + * tword[] may end here. + */ + arridx = sp->ts_arridx; /* current node in the tree */ + len = byts[arridx]; /* bytes in this node */ + arridx += sp->ts_curi; /* index of current byte */ + + if (sp->ts_prefixdepth == PFD_PREFIXTREE) { - case STATE_START: - case STATE_NOPREFIX: - /* - * Start of node: Deal with NUL bytes, which means - * tword[] may end here. - */ - arridx = sp->ts_arridx; /* current node in the tree */ - len = byts[arridx]; /* bytes in this node */ - arridx += sp->ts_curi; /* index of current byte */ + /* Skip over the NUL bytes, we use them later. */ + for (n = 0; n < len && byts[arridx + n] == 0; ++n) + ; + sp->ts_curi += n; + + /* Always past NUL bytes now. */ + n = (int)sp->ts_state; + sp->ts_state = STATE_ENDNUL; + sp->ts_save_badflags = su->su_badflags; - if (sp->ts_prefixdepth == PFD_PREFIXTREE) + /* At end of a prefix or at start of prefixtree: check for + * following word. */ + if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) { - /* Skip over the NUL bytes, we use them later. */ - for (n = 0; n < len && byts[arridx + n] == 0; ++n) - ; - sp->ts_curi += n; - - /* Always past NUL bytes now. */ - n = (int)sp->ts_state; - sp->ts_state = STATE_ENDNUL; - sp->ts_save_badflags = su->su_badflags; - - /* At end of a prefix or at start of prefixtree: check for - * following word. */ - if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) - { - /* Set su->su_badflags to the caps type at this - * position. Use the caps type until here for the - * prefix itself. */ + /* Set su->su_badflags to the caps type at this position. + * Use the caps type until here for the prefix itself. */ #ifdef FEAT_MBYTE - if (has_mbyte) - n = nofold_len(fword, sp->ts_fidx, su->su_badptr); - else + if (has_mbyte) + n = nofold_len(fword, sp->ts_fidx, su->su_badptr); + else #endif - n = sp->ts_fidx; - flags = badword_captype(su->su_badptr, - su->su_badptr + n); - su->su_badflags = badword_captype(su->su_badptr + n, + n = sp->ts_fidx; + flags = badword_captype(su->su_badptr, su->su_badptr + n); + su->su_badflags = badword_captype(su->su_badptr + n, su->su_badptr + su->su_badlen); - ++depth; - stack[depth] = stack[depth - 1]; - sp = &stack[depth]; - sp->ts_prefixdepth = depth - 1; - byts = fbyts; - idxs = fidxs; - sp->ts_state = STATE_START; - sp->ts_curi = 1; /* start just after length byte */ - sp->ts_arridx = 0; - - /* Move the prefix to preword[] with the right case - * and make find_keepcap_word() works. */ - tword[sp->ts_twordlen] = NUL; - make_case_word(tword + sp->ts_splitoff, - preword + sp->ts_prewordlen, - flags); - sp->ts_prewordlen = STRLEN(preword); - sp->ts_splitoff = sp->ts_twordlen; - } - break; +#ifdef DEBUG_TRIEWALK + sprintf(changename[depth], "prefix"); +#endif + go_deeper(stack, depth, 0); + ++depth; + sp = &stack[depth]; + sp->ts_prefixdepth = depth - 1; + byts = fbyts; + idxs = fidxs; + sp->ts_arridx = 0; + + /* Move the prefix to preword[] with the right case + * and make find_keepcap_word() works. */ + tword[sp->ts_twordlen] = NUL; + make_case_word(tword + sp->ts_splitoff, + preword + sp->ts_prewordlen, flags); + sp->ts_prewordlen = STRLEN(preword); + sp->ts_splitoff = sp->ts_twordlen; } + break; + } - if (sp->ts_curi > len || byts[arridx] != 0) - { - /* Past bytes in node and/or past NUL bytes. */ - sp->ts_state = STATE_ENDNUL; - sp->ts_save_badflags = su->su_badflags; - break; - } + if (sp->ts_curi > len || byts[arridx] != 0) + { + /* Past bytes in node and/or past NUL bytes. */ + sp->ts_state = STATE_ENDNUL; + sp->ts_save_badflags = su->su_badflags; + break; + } - /* - * End of word in tree. - */ - ++sp->ts_curi; /* eat one NUL byte */ + /* + * End of word in tree. + */ + ++sp->ts_curi; /* eat one NUL byte */ - flags = (int)idxs[arridx]; - fword_ends = (fword[sp->ts_fidx] == NUL - || !spell_iswordp(fword + sp->ts_fidx, curbuf)); - tword[sp->ts_twordlen] = NUL; + flags = (int)idxs[arridx]; + fword_ends = (fword[sp->ts_fidx] == NUL + || (soundfold + ? vim_iswhite(fword[sp->ts_fidx]) + : !spell_iswordp(fword + sp->ts_fidx, curbuf))); + tword[sp->ts_twordlen] = NUL; - if (sp->ts_prefixdepth <= PFD_NOTSPECIAL + if (sp->ts_prefixdepth <= PFD_NOTSPECIAL && (sp->ts_flags & TSF_PREFIXOK) == 0) + { + /* There was a prefix before the word. Check that the prefix + * can be used with this word. */ + /* Count the length of the NULs in the prefix. If there are + * none this must be the first try without a prefix. */ + n = stack[sp->ts_prefixdepth].ts_arridx; + len = pbyts[n++]; + for (c = 0; c < len && pbyts[n + c] == 0; ++c) + ; + if (c > 0) { - /* There was a prefix before the word. Check that the - * prefix can be used with this word. */ - /* Count the length of the NULs in the prefix. If there - * are none this must be the first try without a prefix. - */ - n = stack[sp->ts_prefixdepth].ts_arridx; - len = pbyts[n++]; - for (c = 0; c < len && pbyts[n + c] == 0; ++c) - ; - if (c > 0) - { - c = valid_word_prefix(c, n, flags, + c = valid_word_prefix(c, n, flags, tword + sp->ts_splitoff, slang, FALSE); - if (c == 0) - break; + if (c == 0) + break; - /* Use the WF_RARE flag for a rare prefix. */ - if (c & WF_RAREPFX) - flags |= WF_RARE; + /* Use the WF_RARE flag for a rare prefix. */ + if (c & WF_RAREPFX) + flags |= WF_RARE; - /* Tricky: when checking for both prefix and - * compounding we run into the prefix flag first. - * Remember that it's OK, so that we accept the prefix - * when arriving at a compound flag. */ - sp->ts_flags |= TSF_PREFIXOK; - } + /* Tricky: when checking for both prefix and compounding + * we run into the prefix flag first. + * Remember that it's OK, so that we accept the prefix + * when arriving at a compound flag. */ + sp->ts_flags |= TSF_PREFIXOK; } + } - /* Check NEEDCOMPOUND: can't use word without compounding. Do - * try appending another compound word below. */ - if (sp->ts_complen == sp->ts_compsplit && fword_ends + /* Check NEEDCOMPOUND: can't use word without compounding. Do try + * appending another compound word below. */ + if (sp->ts_complen == sp->ts_compsplit && fword_ends && (flags & WF_NEEDCOMP)) - goodword_ends = FALSE; - else - goodword_ends = TRUE; + goodword_ends = FALSE; + else + goodword_ends = TRUE; - if (sp->ts_complen > sp->ts_compsplit) + p = NULL; + compound_ok = TRUE; + if (sp->ts_complen > sp->ts_compsplit) + { + if (slang->sl_nobreak) { - if (slang->sl_nobreak) - { - /* There was a word before this word. When there was - * no change in this word (it was correct) add the - * first word as a suggestion. If this word was - * corrected too, we need to check if a correct word - * follows. */ - if (sp->ts_fidx - sp->ts_splitfidx + /* There was a word before this word. When there was no + * change in this word (it was correct) add the first word + * as a suggestion. If this word was corrected too, we + * need to check if a correct word follows. */ + if (sp->ts_fidx - sp->ts_splitfidx == sp->ts_twordlen - sp->ts_splitoff - && STRNCMP(fword + sp->ts_splitfidx, - tword + sp->ts_splitoff, + && STRNCMP(fword + sp->ts_splitfidx, + tword + sp->ts_splitoff, sp->ts_fidx - sp->ts_splitfidx) == 0) - { - preword[sp->ts_prewordlen] = NUL; + { + preword[sp->ts_prewordlen] = NUL; + newscore = score_wordcount_adj(slang, sp->ts_score, + preword + sp->ts_prewordlen, + sp->ts_prewordlen > 0); + /* Add the suggestion if the score isn't too bad. */ + if (newscore <= su->su_maxscore) add_suggestion(su, &su->su_ga, preword, sp->ts_splitfidx - repextra, - sp->ts_score, 0, FALSE, - lp->lp_sallang); - break; - } + newscore, 0, FALSE, + lp->lp_sallang, FALSE); + break; } - else - { - /* There was a compound word before this word. If - * this word does not support compounding then give up - * (splitting is tried for the word without compound - * flag). */ - if (((unsigned)flags >> 24) == 0 - || sp->ts_twordlen - sp->ts_splitoff + } + else + { + /* There was a compound word before this word. If this + * word does not support compounding then give up + * (splitting is tried for the word without compound + * flag). */ + if (((unsigned)flags >> 24) == 0 + || sp->ts_twordlen - sp->ts_splitoff < slang->sl_compminlen) - break; + break; #ifdef FEAT_MBYTE - /* For multi-byte chars check character length against - * COMPOUNDMIN. */ - if (has_mbyte - && slang->sl_compminlen > 0 - && mb_charlen(tword + sp->ts_splitoff) + /* For multi-byte chars check character length against + * COMPOUNDMIN. */ + if (has_mbyte + && slang->sl_compminlen > 0 + && mb_charlen(tword + sp->ts_splitoff) < slang->sl_compminlen) - break; + break; #endif - compflags[sp->ts_complen] = ((unsigned)flags >> 24); - compflags[sp->ts_complen + 1] = NUL; - vim_strncpy(preword + sp->ts_prewordlen, - tword + sp->ts_splitoff, - sp->ts_twordlen - sp->ts_splitoff); - p = preword; - while (*skiptowhite(p) != NUL) - p = skipwhite(skiptowhite(p)); - if (fword_ends && !can_compound(slang, p, + compflags[sp->ts_complen] = ((unsigned)flags >> 24); + compflags[sp->ts_complen + 1] = NUL; + vim_strncpy(preword + sp->ts_prewordlen, + tword + sp->ts_splitoff, + sp->ts_twordlen - sp->ts_splitoff); + p = preword; + while (*skiptowhite(p) != NUL) + p = skipwhite(skiptowhite(p)); + if (fword_ends && !can_compound(slang, p, compflags + sp->ts_compsplit)) - break; + /* Compound is not allowed. But it may still be + * possible if we add another (short) word. */ + compound_ok = FALSE; - /* Get pointer to last char of previous word. */ - p = preword + sp->ts_prewordlen; - mb_ptr_back(preword, p); - } + /* Get pointer to last char of previous word. */ + p = preword + sp->ts_prewordlen; + mb_ptr_back(preword, p); } - else - p = NULL; + } - /* - * Form the word with proper case in preword. - * If there is a word from a previous split, append. - */ - if (flags & WF_KEEPCAP) - /* Must find the word in the keep-case tree. */ - find_keepcap_word(slang, tword + sp->ts_splitoff, + /* + * Form the word with proper case in preword. + * If there is a word from a previous split, append. + * For the soundfold tree don't change the case, simply append. + */ + if (soundfold) + STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff); + else if (flags & WF_KEEPCAP) + /* Must find the word in the keep-case tree. */ + find_keepcap_word(slang, tword + sp->ts_splitoff, preword + sp->ts_prewordlen); - else - { - /* Include badflags: if the badword is onecap or allcap - * use that for the goodword too. But if the badword is - * allcap and it's only one char long use onecap. */ - c = su->su_badflags; - if ((c & WF_ALLCAP) + else + { + /* Include badflags: If the badword is onecap or allcap + * use that for the goodword too. But if the badword is + * allcap and it's only one char long use onecap. */ + c = su->su_badflags; + if ((c & WF_ALLCAP) #ifdef FEAT_MBYTE - && su->su_badlen == (*mb_ptr2len)(su->su_badptr) + && su->su_badlen == (*mb_ptr2len)(su->su_badptr) #else - && su->su_badlen == 1 + && su->su_badlen == 1 #endif - ) - c = WF_ONECAP; - c |= flags; - - /* When appending a compound word after a word character - * don't use Onecap. */ - if (p != NULL && spell_iswordp_nmw(p)) - c &= ~WF_ONECAP; - make_case_word(tword + sp->ts_splitoff, + ) + c = WF_ONECAP; + c |= flags; + + /* When appending a compound word after a word character don't + * use Onecap. */ + if (p != NULL && spell_iswordp_nmw(p)) + c &= ~WF_ONECAP; + make_case_word(tword + sp->ts_splitoff, preword + sp->ts_prewordlen, c); - } + } + if (!soundfold) + { /* Don't use a banned word. It may appear again as a good * word, thus remember it. */ if (flags & WF_BANNED) @@ -9564,16 +10820,19 @@ suggest_try_change(su) break; } if ((sp->ts_complen == sp->ts_compsplit - && was_banned(su, preword + sp->ts_prewordlen)) - || was_banned(su, preword)) + && WAS_BANNED(su, preword + sp->ts_prewordlen)) + || WAS_BANNED(su, preword)) { if (slang->sl_compprog == NULL) break; /* the word so far was banned but we may try compounding */ goodword_ends = FALSE; } + } - newscore = 0; + newscore = 0; + if (!soundfold) /* soundfold words don't have flags */ + { if ((flags & WF_REGION) && (((unsigned)flags >> 16) & lp->lp_region) == 0) newscore += SCORE_REGION; @@ -9583,113 +10842,141 @@ suggest_try_change(su) if (!spell_valid_case(su->su_badflags, captype(preword + sp->ts_prewordlen, NULL))) newscore += SCORE_ICASE; + } - maysplit = TRUE; - if (fword_ends && goodword_ends - && sp->ts_fidx >= sp->ts_fidxtry) + /* TODO: how about splitting in the soundfold tree? */ + if (fword_ends + && goodword_ends + && sp->ts_fidx >= sp->ts_fidxtry + && compound_ok) + { + /* The badword also ends: add suggestions. */ +#ifdef DEBUG_TRIEWALK + if (soundfold && STRCMP(preword, "smwrd") == 0) { - /* The badword also ends: add suggestions. Give a penalty - * when changing non-word char to word char, e.g., "thes," - * -> "these". */ - p = fword + sp->ts_fidx; -#ifdef FEAT_MBYTE - if (has_mbyte) - mb_ptr_back(fword, p); - else + int j; + + /* print the stack of changes that brought us here */ + smsg("------ %s -------", fword); + for (j = 0; j < depth; ++j) + smsg("%s", changename[j]); + } #endif - --p; + if (soundfold) + { + /* For soundfolded words we need to find the original + * words, the edit distrance and then add them. */ + add_sound_suggest(su, preword, sp->ts_score, lp); + } + else + { + /* Give a penalty when changing non-word char to word + * char, e.g., "thes," -> "these". */ + p = fword + sp->ts_fidx; + mb_ptr_back(fword, p); if (!spell_iswordp(p, curbuf)) { p = preword + STRLEN(preword); -#ifdef FEAT_MBYTE - if (has_mbyte) - mb_ptr_back(preword, p); - else -#endif - --p; + mb_ptr_back(preword, p); if (spell_iswordp(p, curbuf)) newscore += SCORE_NONWORD; } - add_suggestion(su, &su->su_ga, preword, - sp->ts_fidx - repextra, - sp->ts_score + newscore, 0, FALSE, - lp->lp_sallang); - - /* When the bad word doesn't end yet, try changing the - * next word. E.g., find suggestions for "the the" where - * the second "the" is different. It's done like a split. - */ - if (sp->ts_fidx - repextra >= su->su_badlen) - maysplit = FALSE; + /* Give a bonus to words seen before. */ + score = score_wordcount_adj(slang, + sp->ts_score + newscore, + preword + sp->ts_prewordlen, + sp->ts_prewordlen > 0); + + /* Add the suggestion if the score isn't too bad. */ + if (score <= su->su_maxscore) + add_suggestion(su, &su->su_ga, preword, + sp->ts_fidx - repextra, + score, 0, FALSE, lp->lp_sallang, FALSE); } + } - if (maysplit - && (sp->ts_fidx >= sp->ts_fidxtry || fword_ends) + /* + * Try word split and/or compounding. + */ + if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends) #ifdef FEAT_MBYTE - /* Don't split halfway a character. */ - && (!has_mbyte || sp->ts_tcharlen == 0) -#endif - ) - { - int try_compound; - - /* Get here in two situations: - * 1. The word in the tree ends but the badword continues: - * If the word allows compounding try that. Otherwise - * try a split by inserting a space. For both check - * that a valid words starts at fword[sp->ts_fidx]. - * For NOBREAK do like compounding to be able to check - * if the next word is valid. - * 2. The badword does end, but it was due to a change - * (e.g., a swap). No need to split, but do check that - * the following word is valid. - */ - try_compound = FALSE; - if ((!fword_ends || !goodword_ends) - && slang->sl_compprog != NULL - && ((unsigned)flags >> 24) != 0 - && sp->ts_twordlen - sp->ts_splitoff - >= slang->sl_compminlen + /* Don't split halfway a character. */ + && (!has_mbyte || sp->ts_tcharlen == 0) +#endif + ) + { + int try_compound; + int try_split; + + /* If past the end of the bad word don't try a split. + * Otherwise try changing the next word. E.g., find + * suggestions for "the the" where the second "the" is + * different. It's done like a split. + * TODO: word split for soundfold words */ + try_split = (sp->ts_fidx - repextra < su->su_badlen) + && !soundfold; + + /* Get here in several situations: + * 1. The word in the tree ends: + * If the word allows compounding try that. Otherwise try + * a split by inserting a space. For both check that a + * valid words starts at fword[sp->ts_fidx]. + * For NOBREAK do like compounding to be able to check if + * the next word is valid. + * 2. The badword does end, but it was due to a change (e.g., + * a swap). No need to split, but do check that the + * following word is valid. + * 3. The badword and the word in the tree end. It may still + * be possible to compound another (short) word. + */ + try_compound = FALSE; + if (!soundfold + && slang->sl_compprog != NULL + && ((unsigned)flags >> 24) != 0 + && sp->ts_twordlen - sp->ts_splitoff + >= slang->sl_compminlen #ifdef FEAT_MBYTE - && (!has_mbyte - || slang->sl_compminlen == 0 - || mb_charlen(tword + sp->ts_splitoff) + && (!has_mbyte + || slang->sl_compminlen == 0 + || mb_charlen(tword + sp->ts_splitoff) >= slang->sl_compminlen) #endif - && (slang->sl_compsylmax < MAXWLEN - || sp->ts_complen + 1 - sp->ts_compsplit - < slang->sl_compmax) - && (byte_in_str(sp->ts_complen == sp->ts_compsplit - ? slang->sl_compstartflags - : slang->sl_compallflags, + && (slang->sl_compsylmax < MAXWLEN + || sp->ts_complen + 1 - sp->ts_compsplit + < slang->sl_compmax) + && (byte_in_str(sp->ts_complen == sp->ts_compsplit + ? slang->sl_compstartflags + : slang->sl_compallflags, ((unsigned)flags >> 24)))) - { - try_compound = TRUE; - compflags[sp->ts_complen] = ((unsigned)flags >> 24); - compflags[sp->ts_complen + 1] = NUL; - } + { + try_compound = TRUE; + compflags[sp->ts_complen] = ((unsigned)flags >> 24); + compflags[sp->ts_complen + 1] = NUL; + } - /* For NOBREAK we never try splitting, it won't make any - * word valid. */ - if (slang->sl_nobreak) - try_compound = TRUE; - - /* If we could add a compound word, and it's also possible - * to split at this point, do the split first and set - * TSF_DIDSPLIT to avoid doing it again. */ - else if (!fword_ends - && try_compound - && (sp->ts_flags & TSF_DIDSPLIT) == 0) - { - try_compound = FALSE; - sp->ts_flags |= TSF_DIDSPLIT; - --sp->ts_curi; /* do the same NUL again */ - compflags[sp->ts_complen] = NUL; - } - else - sp->ts_flags &= ~TSF_DIDSPLIT; + /* For NOBREAK we never try splitting, it won't make any word + * valid. */ + if (slang->sl_nobreak) + try_compound = TRUE; + + /* If we could add a compound word, and it's also possible to + * split at this point, do the split first and set + * TSF_DIDSPLIT to avoid doing it again. */ + else if (!fword_ends + && try_compound + && (sp->ts_flags & TSF_DIDSPLIT) == 0) + { + try_compound = FALSE; + sp->ts_flags |= TSF_DIDSPLIT; + --sp->ts_curi; /* do the same NUL again */ + compflags[sp->ts_complen] = NUL; + } + else + sp->ts_flags &= ~TSF_DIDSPLIT; + if (try_split || try_compound) + { if (!try_compound && (!fword_ends || !goodword_ends)) { /* If we're going to split need to check that the @@ -9707,10 +10994,23 @@ suggest_try_change(su) compflags + sp->ts_compsplit)) break; newscore += SCORE_SPLIT; + + /* Give a bonus to words seen before. */ + newscore = score_wordcount_adj(slang, newscore, + preword + sp->ts_prewordlen, TRUE); } - if (try_deeper(su, stack, depth, newscore)) + if (TRY_DEEPER(su, stack, depth, newscore)) { + go_deeper(stack, depth, newscore); +#ifdef DEBUG_TRIEWALK + if (!try_compound && !fword_ends) + sprintf(changename[depth], "%.*s-%s: split", + sp->ts_twordlen, tword, fword + sp->ts_fidx); + else + sprintf(changename[depth], "%.*s-%s: compound", + sp->ts_twordlen, tword, fword + sp->ts_fidx); +#endif /* Save things to be restored at STATE_SPLITUNDO. */ sp->ts_save_badflags = su->su_badflags; sp->ts_state = STATE_SPLITUNDO; @@ -9730,10 +11030,11 @@ suggest_try_change(su) * non-word character with a space. Always skip a * character when the word ends. But only when the * good word can end. */ - if (((!try_compound - && !spell_iswordp_nmw(fword + sp->ts_fidx)) - || fword_ends) - && goodword_ends) + if (((!try_compound && !spell_iswordp_nmw(fword + + sp->ts_fidx)) + || fword_ends) + && fword[sp->ts_fidx] != NUL + && goodword_ends) { int l; @@ -9789,508 +11090,644 @@ suggest_try_change(su) } } } - break; + } + break; - case STATE_SPLITUNDO: - /* Undo the changes done for word split or compound word. */ - su->su_badflags = sp->ts_save_badflags; + case STATE_SPLITUNDO: + /* Undo the changes done for word split or compound word. */ + su->su_badflags = sp->ts_save_badflags; - /* Continue looking for NUL bytes. */ - sp->ts_state = STATE_START; + /* Continue looking for NUL bytes. */ + sp->ts_state = STATE_START; - /* In case we went into the prefix tree. */ - byts = fbyts; - idxs = fidxs; - break; + /* In case we went into the prefix tree. */ + byts = fbyts; + idxs = fidxs; + break; - case STATE_ENDNUL: - /* Past the NUL bytes in the node. */ - su->su_badflags = sp->ts_save_badflags; - if (fword[sp->ts_fidx] == NUL + case STATE_ENDNUL: + /* Past the NUL bytes in the node. */ + su->su_badflags = sp->ts_save_badflags; + if (fword[sp->ts_fidx] == NUL #ifdef FEAT_MBYTE - && sp->ts_tcharlen == 0 + && sp->ts_tcharlen == 0 #endif - ) - { - /* The badword ends, can't use the bytes in this node. */ - sp->ts_state = STATE_DEL; - break; - } - sp->ts_state = STATE_PLAIN; - /*FALLTHROUGH*/ + ) + { + /* The badword ends, can't use STATE_PLAIN. */ + sp->ts_state = STATE_DEL; + break; + } + sp->ts_state = STATE_PLAIN; + /*FALLTHROUGH*/ - case STATE_PLAIN: - /* - * Go over all possible bytes at this node, add each to - * tword[] and use child node. "ts_curi" is the index. - */ - arridx = sp->ts_arridx; - if (sp->ts_curi > byts[arridx]) - { - /* Done all bytes at this node, do next state. When still - * at already changed bytes skip the other tricks. */ - if (sp->ts_fidx >= sp->ts_fidxtry) - sp->ts_state = STATE_DEL; - else - sp->ts_state = STATE_FINAL; - } + case STATE_PLAIN: + /* + * Go over all possible bytes at this node, add each to tword[] + * and use child node. "ts_curi" is the index. + */ + arridx = sp->ts_arridx; + if (sp->ts_curi > byts[arridx]) + { + /* Done all bytes at this node, do next state. When still at + * already changed bytes skip the other tricks. */ + if (sp->ts_fidx >= sp->ts_fidxtry) + sp->ts_state = STATE_DEL; else - { - arridx += sp->ts_curi++; - c = byts[arridx]; + sp->ts_state = STATE_FINAL; + } + else + { + arridx += sp->ts_curi++; + c = byts[arridx]; - /* Normal byte, go one level deeper. If it's not equal to - * the byte in the bad word adjust the score. But don't - * even try when the byte was already changed. */ - if (c == fword[sp->ts_fidx] + /* Normal byte, go one level deeper. If it's not equal to the + * byte in the bad word adjust the score. But don't even try + * when the byte was already changed. And don't try when we + * just deleted this byte, accepting it is always cheaper then + * delete + substitute. */ + if (c == fword[sp->ts_fidx] #ifdef FEAT_MBYTE - || (sp->ts_tcharlen > 0 - && sp->ts_isdiff != DIFF_NONE) + || (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE) #endif - ) - newscore = 0; + ) + newscore = 0; + else + newscore = SCORE_SUBST; + if ((newscore == 0 + || (sp->ts_fidx >= sp->ts_fidxtry + && ((sp->ts_flags & TSF_DIDDEL) == 0 + || c != fword[sp->ts_delidx]))) + && TRY_DEEPER(su, stack, depth, newscore)) + { + go_deeper(stack, depth, newscore); +#ifdef DEBUG_TRIEWALK + if (newscore > 0) + sprintf(changename[depth], "%.*s-%s: subst %c to %c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + fword[sp->ts_fidx], c); else - newscore = SCORE_SUBST; - if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry) - && try_deeper(su, stack, depth, newscore)) - { - ++depth; - sp = &stack[depth]; - ++sp->ts_fidx; - tword[sp->ts_twordlen++] = c; - sp->ts_arridx = idxs[arridx]; + sprintf(changename[depth], "%.*s-%s: accept %c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + fword[sp->ts_fidx]); +#endif + ++depth; + sp = &stack[depth]; + ++sp->ts_fidx; + tword[sp->ts_twordlen++] = c; + sp->ts_arridx = idxs[arridx]; #ifdef FEAT_MBYTE - if (newscore == SCORE_SUBST) - sp->ts_isdiff = DIFF_YES; - if (has_mbyte) + if (newscore == SCORE_SUBST) + sp->ts_isdiff = DIFF_YES; + if (has_mbyte) + { + /* Multi-byte characters are a bit complicated to + * handle: They differ when any of the bytes differ + * and then their length may also differ. */ + if (sp->ts_tcharlen == 0) { - /* Multi-byte characters are a bit complicated to - * handle: They differ when any of the bytes - * differ and then their length may also differ. */ - if (sp->ts_tcharlen == 0) - { - /* First byte. */ - sp->ts_tcharidx = 0; - sp->ts_tcharlen = MB_BYTE2LEN(c); - sp->ts_fcharstart = sp->ts_fidx - 1; - sp->ts_isdiff = (newscore != 0) + /* First byte. */ + sp->ts_tcharidx = 0; + sp->ts_tcharlen = MB_BYTE2LEN(c); + sp->ts_fcharstart = sp->ts_fidx - 1; + sp->ts_isdiff = (newscore != 0) ? DIFF_YES : DIFF_NONE; - } - else if (sp->ts_isdiff == DIFF_INSERT) - /* When inserting trail bytes don't advance in - * the bad word. */ - --sp->ts_fidx; - if (++sp->ts_tcharidx == sp->ts_tcharlen) + } + else if (sp->ts_isdiff == DIFF_INSERT) + /* When inserting trail bytes don't advance in the + * bad word. */ + --sp->ts_fidx; + if (++sp->ts_tcharidx == sp->ts_tcharlen) + { + /* Last byte of character. */ + if (sp->ts_isdiff == DIFF_YES) { - /* Last byte of character. */ - if (sp->ts_isdiff == DIFF_YES) - { - /* Correct ts_fidx for the byte length of - * the character (we didn't check that - * before). */ - sp->ts_fidx = sp->ts_fcharstart - + MB_BYTE2LEN( + /* Correct ts_fidx for the byte length of the + * character (we didn't check that before). */ + sp->ts_fidx = sp->ts_fcharstart + + MB_BYTE2LEN( fword[sp->ts_fcharstart]); - /* For changing a composing character - * adjust the score from SCORE_SUBST to - * SCORE_SUBCOMP. */ - if (enc_utf8 - && utf_iscomposing( - mb_ptr2char(tword - + sp->ts_twordlen + /* For changing a composing character adjust + * the score from SCORE_SUBST to + * SCORE_SUBCOMP. */ + if (enc_utf8 + && utf_iscomposing( + mb_ptr2char(tword + + sp->ts_twordlen - sp->ts_tcharlen)) - && utf_iscomposing( - mb_ptr2char(fword + && utf_iscomposing( + mb_ptr2char(fword + sp->ts_fcharstart))) - sp->ts_score -= + sp->ts_score -= SCORE_SUBST - SCORE_SUBCOMP; - /* For a similar character adjust score - * from SCORE_SUBST to SCORE_SIMILAR. */ - else if (slang->sl_has_map - && similar_chars(slang, - mb_ptr2char(tword - + sp->ts_twordlen + /* For a similar character adjust score from + * SCORE_SUBST to SCORE_SIMILAR. */ + else if (!soundfold + && slang->sl_has_map + && similar_chars(slang, + mb_ptr2char(tword + + sp->ts_twordlen - sp->ts_tcharlen), - mb_ptr2char(fword + mb_ptr2char(fword + sp->ts_fcharstart))) - sp->ts_score -= + sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; + } + else if (sp->ts_isdiff == DIFF_INSERT + && sp->ts_twordlen > sp->ts_tcharlen) + { + p = tword + sp->ts_twordlen - sp->ts_tcharlen; + c = mb_ptr2char(p); + if (enc_utf8 && utf_iscomposing(c)) + { + /* Inserting a composing char doesn't + * count that much. */ + sp->ts_score -= SCORE_INS - SCORE_INSCOMP; } - else if (sp->ts_isdiff == DIFF_INSERT - && sp->ts_twordlen > sp->ts_tcharlen) + else { - p = tword + sp->ts_twordlen - - sp->ts_tcharlen; - c = mb_ptr2char(p); - if (enc_utf8 && utf_iscomposing(c)) - { - /* Inserting a composing char doesn't - * count that much. */ + /* If the previous character was the same, + * thus doubling a character, give a bonus + * to the score. Also for the soundfold + * tree (might seem illogical but does + * give better scores). */ + mb_ptr_back(tword, p); + if (c == mb_ptr2char(p)) sp->ts_score -= SCORE_INS - - SCORE_INSCOMP; - } - else - { - /* If the previous character was the - * same, thus doubling a character, - * give a bonus to the score. */ - mb_ptr_back(tword, p); - if (c == mb_ptr2char(p)) - sp->ts_score -= SCORE_INS - SCORE_INSDUP; - } } - - /* Starting a new char, reset the length. */ - sp->ts_tcharlen = 0; } + + /* Starting a new char, reset the length. */ + sp->ts_tcharlen = 0; } - else + } + else #endif - { - /* If we found a similar char adjust the score. - * We do this after calling try_deeper() because - * it's slow. */ - if (newscore != 0 - && slang->sl_has_map - && similar_chars(slang, + { + /* If we found a similar char adjust the score. + * We do this after calling go_deeper() because + * it's slow. */ + if (newscore != 0 + && !soundfold + && slang->sl_has_map + && similar_chars(slang, c, fword[sp->ts_fidx - 1])) - sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; - } + sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; } } - break; + } + break; - case STATE_DEL: + case STATE_DEL: #ifdef FEAT_MBYTE - /* When past the first byte of a multi-byte char don't try - * delete/insert/swap a character. */ - if (has_mbyte && sp->ts_tcharlen > 0) - { - sp->ts_state = STATE_FINAL; - break; - } + /* When past the first byte of a multi-byte char don't try + * delete/insert/swap a character. */ + if (has_mbyte && sp->ts_tcharlen > 0) + { + sp->ts_state = STATE_FINAL; + break; + } #endif - /* - * Try skipping one character in the bad word (delete it). - */ - sp->ts_state = STATE_INS; - sp->ts_curi = 1; - if (fword[sp->ts_fidx] != NUL - && try_deeper(su, stack, depth, SCORE_DEL)) - { - ++depth; - - /* Advance over the character in fword[]. Give a bonus to - * the score if the same character is following "nn" -> - * "n". */ -#ifdef FEAT_MBYTE - if (has_mbyte) - { - c = mb_ptr2char(fword + sp->ts_fidx); - stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]); - if (enc_utf8 && utf_iscomposing(c)) - stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP; - else if (c == mb_ptr2char(fword + stack[depth].ts_fidx)) - stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; - } - else + /* + * Try skipping one character in the bad word (delete it). + */ + sp->ts_state = STATE_INS_PREP; + sp->ts_curi = 1; + if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*') + /* Deleting a vowel at the start of a word counts less, see + * soundalike_score(). */ + newscore = 2 * SCORE_DEL / 3; + else + newscore = SCORE_DEL; + if (fword[sp->ts_fidx] != NUL + && TRY_DEEPER(su, stack, depth, newscore)) + { + go_deeper(stack, depth, newscore); +#ifdef DEBUG_TRIEWALK + sprintf(changename[depth], "%.*s-%s: delete %c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + fword[sp->ts_fidx]); #endif - { - ++stack[depth].ts_fidx; - if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1]) - stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; - } - break; - } - /*FALLTHROUGH*/ + ++depth; - case STATE_INS: - /* Insert one byte. Do this for each possible byte at this - * node. */ - n = sp->ts_arridx; - if (sp->ts_curi > byts[n]) + /* Remember what character we deleted, so that we can avoid + * inserting it again. */ + stack[depth].ts_flags |= TSF_DIDDEL; + stack[depth].ts_delidx = sp->ts_fidx; + + /* Advance over the character in fword[]. Give a bonus to the + * score if the same character is following "nn" -> "n". It's + * a bit illogical for soundfold tree but it does give better + * results. */ +#ifdef FEAT_MBYTE + if (has_mbyte) { - /* Done all bytes at this node, do next state. */ - sp->ts_state = STATE_SWAP; + c = mb_ptr2char(fword + sp->ts_fidx); + stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]); + if (enc_utf8 && utf_iscomposing(c)) + stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP; + else if (c == mb_ptr2char(fword + stack[depth].ts_fidx)) + stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; } else - { - /* Do one more byte at this node. Skip NUL bytes. */ - n += sp->ts_curi++; - c = byts[n]; - if (c != 0 && try_deeper(su, stack, depth, SCORE_INS)) - { - ++depth; - sp = &stack[depth]; - tword[sp->ts_twordlen++] = c; - sp->ts_arridx = idxs[n]; -#ifdef FEAT_MBYTE - if (has_mbyte) - { - fl = MB_BYTE2LEN(c); - if (fl > 1) - { - /* There are following bytes for the same - * character. We must find all bytes before - * trying delete/insert/swap/etc. */ - sp->ts_tcharlen = fl; - sp->ts_tcharidx = 1; - sp->ts_isdiff = DIFF_INSERT; - } - } - else - fl = 1; - if (fl == 1) #endif - { - /* If the previous character was the same, thus - * doubling a character, give a bonus to the - * score. */ - if (sp->ts_twordlen >= 2 - && tword[sp->ts_twordlen - 2] == c) - sp->ts_score -= SCORE_INS - SCORE_INSDUP; - } - } + { + ++stack[depth].ts_fidx; + if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1]) + stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; } break; + } + /*FALLTHROUGH*/ - case STATE_SWAP: - /* - * Swap two bytes in the bad word: "12" -> "21". - * We change "fword" here, it's changed back afterwards. - */ - p = fword + sp->ts_fidx; - c = *p; - if (c == NUL) + case STATE_INS_PREP: + if (sp->ts_flags & TSF_DIDDEL) + { + /* If we just deleted a byte then inserting won't make sense, + * a substitute is always cheaper. */ + sp->ts_state = STATE_SWAP; + break; + } + + /* skip over NUL bytes */ + n = sp->ts_arridx; + for (;;) + { + if (sp->ts_curi > byts[n]) { - /* End of word, can't swap or replace. */ - sp->ts_state = STATE_FINAL; + /* Only NUL bytes at this node, go to next state. */ + sp->ts_state = STATE_SWAP; break; } - - /* Don't swap if the first character is not a word character. - * SWAP3 etc. also don't make sense then. */ - if (!spell_iswordp(p, curbuf)) + if (byts[n + sp->ts_curi] != NUL) { - sp->ts_state = STATE_REP_INI; + /* Found a byte to insert. */ + sp->ts_state = STATE_INS; break; } + ++sp->ts_curi; + } + break; + + /*FALLTHROUGH*/ + + case STATE_INS: + /* Insert one byte. Repeat this for each possible byte at this + * node. */ + n = sp->ts_arridx; + if (sp->ts_curi > byts[n]) + { + /* Done all bytes at this node, go to next state. */ + sp->ts_state = STATE_SWAP; + break; + } + /* Do one more byte at this node, but: + * - Skip NUL bytes. + * - Skip the byte if it's equal to the byte in the word, + * accepting that byte is always better. + */ + n += sp->ts_curi++; + c = byts[n]; + if (soundfold && sp->ts_twordlen == 0 && c == '*') + /* Inserting a vowel at the start of a word counts less, + * see soundalike_score(). */ + newscore = 2 * SCORE_INS / 3; + else + newscore = SCORE_INS; + if (c != fword[sp->ts_fidx] + && TRY_DEEPER(su, stack, depth, newscore)) + { + go_deeper(stack, depth, newscore); +#ifdef DEBUG_TRIEWALK + sprintf(changename[depth], "%.*s-%s: insert %c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + c); +#endif + ++depth; + sp = &stack[depth]; + tword[sp->ts_twordlen++] = c; + sp->ts_arridx = idxs[n]; #ifdef FEAT_MBYTE if (has_mbyte) { - n = mb_cptr2len(p); - c = mb_ptr2char(p); - if (!spell_iswordp(p + n, curbuf)) - c2 = c; /* don't swap non-word char */ - else - c2 = mb_ptr2char(p + n); + fl = MB_BYTE2LEN(c); + if (fl > 1) + { + /* There are following bytes for the same character. + * We must find all bytes before trying + * delete/insert/swap/etc. */ + sp->ts_tcharlen = fl; + sp->ts_tcharidx = 1; + sp->ts_isdiff = DIFF_INSERT; + } } else + fl = 1; + if (fl == 1) #endif { - if (!spell_iswordp(p + 1, curbuf)) - c2 = c; /* don't swap non-word char */ - else - c2 = p[1]; + /* If the previous character was the same, thus doubling a + * character, give a bonus to the score. Also for + * soundfold words (illogical but does give a better + * score). */ + if (sp->ts_twordlen >= 2 + && tword[sp->ts_twordlen - 2] == c) + sp->ts_score -= SCORE_INS - SCORE_INSDUP; } + } + break; + + case STATE_SWAP: + /* + * Swap two bytes in the bad word: "12" -> "21". + * We change "fword" here, it's changed back afterwards at + * STATE_UNSWAP. + */ + p = fword + sp->ts_fidx; + c = *p; + if (c == NUL) + { + /* End of word, can't swap or replace. */ + sp->ts_state = STATE_FINAL; + break; + } + + /* Don't swap if the first character is not a word character. + * SWAP3 etc. also don't make sense then. */ + if (!soundfold && !spell_iswordp(p, curbuf)) + { + sp->ts_state = STATE_REP_INI; + break; + } - /* When characters are identical, swap won't do anything. - * Also get here if the second char is not a word character. */ - if (c == c2) - { - sp->ts_state = STATE_SWAP3; - break; - } - if (c2 != NUL && try_deeper(su, stack, depth, SCORE_SWAP)) - { - sp->ts_state = STATE_UNSWAP; - ++depth; #ifdef FEAT_MBYTE - if (has_mbyte) - { - fl = mb_char2len(c2); - mch_memmove(p, p + n, fl); - mb_char2bytes(c, p + fl); - stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; - } - else + if (has_mbyte) + { + n = mb_cptr2len(p); + c = mb_ptr2char(p); + if (!soundfold && !spell_iswordp(p + n, curbuf)) + c2 = c; /* don't swap non-word char */ + else + c2 = mb_ptr2char(p + n); + } + else #endif - { - p[0] = c2; - p[1] = c; - stack[depth].ts_fidxtry = sp->ts_fidx + 2; - } - } + { + if (!soundfold && !spell_iswordp(p + 1, curbuf)) + c2 = c; /* don't swap non-word char */ else - /* If this swap doesn't work then SWAP3 won't either. */ - sp->ts_state = STATE_REP_INI; - break; + c2 = p[1]; + } - case STATE_UNSWAP: - /* Undo the STATE_SWAP swap: "21" -> "12". */ - p = fword + sp->ts_fidx; + /* When characters are identical, swap won't do anything. + * Also get here if the second char is not a word character. */ + if (c == c2) + { + sp->ts_state = STATE_SWAP3; + break; + } + if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP)) + { + go_deeper(stack, depth, SCORE_SWAP); +#ifdef DEBUG_TRIEWALK + sprintf(changename[depth], "%.*s-%s: swap %c and %c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + c, c2); +#endif + sp->ts_state = STATE_UNSWAP; + ++depth; #ifdef FEAT_MBYTE if (has_mbyte) { - n = MB_BYTE2LEN(*p); - c = mb_ptr2char(p + n); - mch_memmove(p + MB_BYTE2LEN(p[n]), p, n); - mb_char2bytes(c, p); + fl = mb_char2len(c2); + mch_memmove(p, p + n, fl); + mb_char2bytes(c, p + fl); + stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; } else #endif { - c = *p; - *p = p[1]; + p[0] = c2; p[1] = c; + stack[depth].ts_fidxtry = sp->ts_fidx + 2; } - /*FALLTHROUGH*/ + } + else + /* If this swap doesn't work then SWAP3 won't either. */ + sp->ts_state = STATE_REP_INI; + break; - case STATE_SWAP3: - /* Swap two bytes, skipping one: "123" -> "321". We change - * "fword" here, it's changed back afterwards. */ - p = fword + sp->ts_fidx; + case STATE_UNSWAP: + /* Undo the STATE_SWAP swap: "21" -> "12". */ + p = fword + sp->ts_fidx; +#ifdef FEAT_MBYTE + if (has_mbyte) + { + n = MB_BYTE2LEN(*p); + c = mb_ptr2char(p + n); + mch_memmove(p + MB_BYTE2LEN(p[n]), p, n); + mb_char2bytes(c, p); + } + else +#endif + { + c = *p; + *p = p[1]; + p[1] = c; + } + /*FALLTHROUGH*/ + + case STATE_SWAP3: + /* Swap two bytes, skipping one: "123" -> "321". We change + * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */ + p = fword + sp->ts_fidx; +#ifdef FEAT_MBYTE + if (has_mbyte) + { + n = mb_cptr2len(p); + c = mb_ptr2char(p); + fl = mb_cptr2len(p + n); + c2 = mb_ptr2char(p + n); + if (!soundfold && !spell_iswordp(p + n + fl, curbuf)) + c3 = c; /* don't swap non-word char */ + else + c3 = mb_ptr2char(p + n + fl); + } + else +#endif + { + c = *p; + c2 = p[1]; + if (!soundfold && !spell_iswordp(p + 2, curbuf)) + c3 = c; /* don't swap non-word char */ + else + c3 = p[2]; + } + + /* When characters are identical: "121" then SWAP3 result is + * identical, ROT3L result is same as SWAP: "211", ROT3L result is + * same as SWAP on next char: "112". Thus skip all swapping. + * Also skip when c3 is NUL. + * Also get here when the third character is not a word character. + * Second character may any char: "a.b" -> "b.a" */ + if (c == c3 || c3 == NUL) + { + sp->ts_state = STATE_REP_INI; + break; + } + if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) + { + go_deeper(stack, depth, SCORE_SWAP3); +#ifdef DEBUG_TRIEWALK + sprintf(changename[depth], "%.*s-%s: swap3 %c and %c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + c, c3); +#endif + sp->ts_state = STATE_UNSWAP3; + ++depth; #ifdef FEAT_MBYTE if (has_mbyte) { - n = mb_cptr2len(p); - c = mb_ptr2char(p); - fl = mb_cptr2len(p + n); - c2 = mb_ptr2char(p + n); - if (!spell_iswordp(p + n + fl, curbuf)) - c3 = c; /* don't swap non-word char */ - else - c3 = mb_ptr2char(p + n + fl); + tl = mb_char2len(c3); + mch_memmove(p, p + n + fl, tl); + mb_char2bytes(c2, p + tl); + mb_char2bytes(c, p + fl + tl); + stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl; } else #endif { - c = *p; - c2 = p[1]; - if (!spell_iswordp(p + 2, curbuf)) - c3 = c; /* don't swap non-word char */ - else - c3 = p[2]; + p[0] = p[2]; + p[2] = c; + stack[depth].ts_fidxtry = sp->ts_fidx + 3; } + } + else + sp->ts_state = STATE_REP_INI; + break; - /* When characters are identical: "121" then SWAP3 result is - * identical, ROT3L result is same as SWAP: "211", ROT3L - * result is same as SWAP on next char: "112". Thus skip all - * swapping. Also skip when c3 is NUL. - * Also get here when the third character is not a word - * character. Second character may any char: "a.b" -> "b.a" */ - if (c == c3 || c3 == NUL) - { - sp->ts_state = STATE_REP_INI; - break; - } - if (try_deeper(su, stack, depth, SCORE_SWAP3)) - { - sp->ts_state = STATE_UNSWAP3; - ++depth; + case STATE_UNSWAP3: + /* Undo STATE_SWAP3: "321" -> "123" */ + p = fword + sp->ts_fidx; #ifdef FEAT_MBYTE - if (has_mbyte) - { - tl = mb_char2len(c3); - mch_memmove(p, p + n + fl, tl); - mb_char2bytes(c2, p + tl); - mb_char2bytes(c, p + fl + tl); - stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl; - } - else + if (has_mbyte) + { + n = MB_BYTE2LEN(*p); + c2 = mb_ptr2char(p + n); + fl = MB_BYTE2LEN(p[n]); + c = mb_ptr2char(p + n + fl); + tl = MB_BYTE2LEN(p[n + fl]); + mch_memmove(p + fl + tl, p, n); + mb_char2bytes(c, p); + mb_char2bytes(c2, p + tl); + p = p + tl; + } + else #endif - { - p[0] = p[2]; - p[2] = c; - stack[depth].ts_fidxtry = sp->ts_fidx + 3; - } - } - else - sp->ts_state = STATE_REP_INI; + { + c = *p; + *p = p[2]; + p[2] = c; + ++p; + } + + if (!soundfold && !spell_iswordp(p, curbuf)) + { + /* Middle char is not a word char, skip the rotate. First and + * third char were already checked at swap and swap3. */ + sp->ts_state = STATE_REP_INI; break; + } - case STATE_UNSWAP3: - /* Undo STATE_SWAP3: "321" -> "123" */ + /* Rotate three characters left: "123" -> "231". We change + * "fword" here, it's changed back afterwards at STATE_UNROT3L. */ + if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) + { + go_deeper(stack, depth, SCORE_SWAP3); +#ifdef DEBUG_TRIEWALK + p = fword + sp->ts_fidx; + sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + p[0], p[1], p[2]); +#endif + sp->ts_state = STATE_UNROT3L; + ++depth; p = fword + sp->ts_fidx; #ifdef FEAT_MBYTE if (has_mbyte) { - n = MB_BYTE2LEN(*p); - c2 = mb_ptr2char(p + n); - fl = MB_BYTE2LEN(p[n]); - c = mb_ptr2char(p + n + fl); - tl = MB_BYTE2LEN(p[n + fl]); - mch_memmove(p + fl + tl, p, n); - mb_char2bytes(c, p); - mb_char2bytes(c2, p + tl); - p = p + tl; + n = mb_cptr2len(p); + c = mb_ptr2char(p); + fl = mb_cptr2len(p + n); + fl += mb_cptr2len(p + n + fl); + mch_memmove(p, p + n, fl); + mb_char2bytes(c, p + fl); + stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; } else #endif { c = *p; - *p = p[2]; + *p = p[1]; + p[1] = p[2]; p[2] = c; - ++p; - } - - if (!spell_iswordp(p, curbuf)) - { - /* Middle char is not a word char, skip the rotate. - * First and third char were already checked at swap - * and swap3. */ - sp->ts_state = STATE_REP_INI; - break; + stack[depth].ts_fidxtry = sp->ts_fidx + 3; } + } + else + sp->ts_state = STATE_REP_INI; + break; - /* Rotate three characters left: "123" -> "231". We change - * "fword" here, it's changed back afterwards. */ - if (try_deeper(su, stack, depth, SCORE_SWAP3)) - { - sp->ts_state = STATE_UNROT3L; - ++depth; - p = fword + sp->ts_fidx; + case STATE_UNROT3L: + /* Undo ROT3L: "231" -> "123" */ + p = fword + sp->ts_fidx; #ifdef FEAT_MBYTE - if (has_mbyte) - { - n = mb_cptr2len(p); - c = mb_ptr2char(p); - fl = mb_cptr2len(p + n); - fl += mb_cptr2len(p + n + fl); - mch_memmove(p, p + n, fl); - mb_char2bytes(c, p + fl); - stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; - } - else + if (has_mbyte) + { + n = MB_BYTE2LEN(*p); + n += MB_BYTE2LEN(p[n]); + c = mb_ptr2char(p + n); + tl = MB_BYTE2LEN(p[n]); + mch_memmove(p + tl, p, n); + mb_char2bytes(c, p); + } + else #endif - { - c = *p; - *p = p[1]; - p[1] = p[2]; - p[2] = c; - stack[depth].ts_fidxtry = sp->ts_fidx + 3; - } - } - else - sp->ts_state = STATE_REP_INI; - break; + { + c = p[2]; + p[2] = p[1]; + p[1] = *p; + *p = c; + } - case STATE_UNROT3L: - /* Undo ROT3L: "231" -> "123" */ + /* Rotate three bytes right: "123" -> "312". We change "fword" + * here, it's changed back afterwards at STATE_UNROT3R. */ + if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) + { + go_deeper(stack, depth, SCORE_SWAP3); +#ifdef DEBUG_TRIEWALK + p = fword + sp->ts_fidx; + sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + p[0], p[1], p[2]); +#endif + sp->ts_state = STATE_UNROT3R; + ++depth; p = fword + sp->ts_fidx; #ifdef FEAT_MBYTE if (has_mbyte) { - n = MB_BYTE2LEN(*p); - n += MB_BYTE2LEN(p[n]); + n = mb_cptr2len(p); + n += mb_cptr2len(p + n); c = mb_ptr2char(p + n); - tl = MB_BYTE2LEN(p[n]); + tl = mb_cptr2len(p + n); mch_memmove(p + tl, p, n); mb_char2bytes(c, p); + stack[depth].ts_fidxtry = sp->ts_fidx + n + tl; } else #endif @@ -10299,193 +11736,176 @@ suggest_try_change(su) p[2] = p[1]; p[1] = *p; *p = c; + stack[depth].ts_fidxtry = sp->ts_fidx + 3; } + } + else + sp->ts_state = STATE_REP_INI; + break; - /* Rotate three bytes right: "123" -> "312". We change - * "fword" here, it's changed back afterwards. */ - if (try_deeper(su, stack, depth, SCORE_SWAP3)) - { - sp->ts_state = STATE_UNROT3R; - ++depth; - p = fword + sp->ts_fidx; + case STATE_UNROT3R: + /* Undo ROT3R: "312" -> "123" */ + p = fword + sp->ts_fidx; #ifdef FEAT_MBYTE - if (has_mbyte) - { - n = mb_cptr2len(p); - n += mb_cptr2len(p + n); - c = mb_ptr2char(p + n); - tl = mb_cptr2len(p + n); - mch_memmove(p + tl, p, n); - mb_char2bytes(c, p); - stack[depth].ts_fidxtry = sp->ts_fidx + n + tl; - } - else + if (has_mbyte) + { + c = mb_ptr2char(p); + tl = MB_BYTE2LEN(*p); + n = MB_BYTE2LEN(p[tl]); + n += MB_BYTE2LEN(p[tl + n]); + mch_memmove(p, p + tl, n); + mb_char2bytes(c, p + n); + } + else #endif - { - c = p[2]; - p[2] = p[1]; - p[1] = *p; - *p = c; - stack[depth].ts_fidxtry = sp->ts_fidx + 3; - } - } - else - sp->ts_state = STATE_REP_INI; + { + c = *p; + *p = p[1]; + p[1] = p[2]; + p[2] = c; + } + /*FALLTHROUGH*/ + + case STATE_REP_INI: + /* Check if matching with REP items from the .aff file would work. + * Quickly skip if: + * - there are no REP items and we are not in the soundfold trie + * - the score is going to be too high anyway + * - already applied a REP item or swapped here */ + if ((lp->lp_replang == NULL && !soundfold) + || sp->ts_score + SCORE_REP >= su->su_maxscore + || sp->ts_fidx < sp->ts_fidxtry) + { + sp->ts_state = STATE_FINAL; break; + } - case STATE_UNROT3R: - /* Undo ROT3R: "312" -> "123" */ - p = fword + sp->ts_fidx; -#ifdef FEAT_MBYTE - if (has_mbyte) - { - c = mb_ptr2char(p); - tl = MB_BYTE2LEN(*p); - n = MB_BYTE2LEN(p[tl]); - n += MB_BYTE2LEN(p[tl + n]); - mch_memmove(p, p + tl, n); - mb_char2bytes(c, p + n); - } - else -#endif - { - c = *p; - *p = p[1]; - p[1] = p[2]; - p[2] = c; - } - /*FALLTHROUGH*/ - - case STATE_REP_INI: - /* Check if matching with REP items from the .aff file would - * work. Quickly skip if: - * - there are no REP items - * - the score is going to be too high anyway - * - already applied a REP item or swapped here */ - if (lp->lp_replang == NULL - || sp->ts_score + SCORE_REP >= su->su_maxscore - || sp->ts_fidx < sp->ts_fidxtry) - { - sp->ts_state = STATE_FINAL; - break; - } - gap = &lp->lp_replang->sl_rep; - - /* Use the first byte to quickly find the first entry that - * may match. If the index is -1 there is none. */ + /* Use the first byte to quickly find the first entry that may + * match. If the index is -1 there is none. */ + if (soundfold) + sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]]; + else sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]]; - if (sp->ts_curi < 0) - { - sp->ts_state = STATE_FINAL; - break; - } - sp->ts_state = STATE_REP; - /*FALLTHROUGH*/ + if (sp->ts_curi < 0) + { + sp->ts_state = STATE_FINAL; + break; + } - case STATE_REP: - /* Try matching with REP items from the .aff file. For each - * match replace the characters and check if the resulting - * word is valid. */ - p = fword + sp->ts_fidx; + sp->ts_state = STATE_REP; + /*FALLTHROUGH*/ + + case STATE_REP: + /* Try matching with REP items from the .aff file. For each match + * replace the characters and check if the resulting word is + * valid. */ + p = fword + sp->ts_fidx; + if (soundfold) + gap = &slang->sl_repsal; + else gap = &lp->lp_replang->sl_rep; - while (sp->ts_curi < gap->ga_len) + while (sp->ts_curi < gap->ga_len) + { + ftp = (fromto_T *)gap->ga_data + sp->ts_curi++; + if (*ftp->ft_from != *p) + { + /* past possible matching entries */ + sp->ts_curi = gap->ga_len; + break; + } + if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0 + && TRY_DEEPER(su, stack, depth, SCORE_REP)) { - ftp = (fromto_T *)gap->ga_data + sp->ts_curi++; - if (*ftp->ft_from != *p) + go_deeper(stack, depth, SCORE_REP); +#ifdef DEBUG_TRIEWALK + sprintf(changename[depth], "%.*s-%s: replace %s with %s", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + ftp->ft_from, ftp->ft_to); +#endif + /* Need to undo this afterwards. */ + sp->ts_state = STATE_REP_UNDO; + + /* Change the "from" to the "to" string. */ + ++depth; + fl = STRLEN(ftp->ft_from); + tl = STRLEN(ftp->ft_to); + if (fl != tl) { - /* past possible matching entries */ - sp->ts_curi = gap->ga_len; - break; + mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1); + repextra += tl - fl; } - if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0 - && try_deeper(su, stack, depth, SCORE_REP)) - { - /* Need to undo this afterwards. */ - sp->ts_state = STATE_REP_UNDO; - - /* Change the "from" to the "to" string. */ - ++depth; - fl = STRLEN(ftp->ft_from); - tl = STRLEN(ftp->ft_to); - if (fl != tl) - { - mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1); - repextra += tl - fl; - } - mch_memmove(p, ftp->ft_to, tl); - stack[depth].ts_fidxtry = sp->ts_fidx + tl; + mch_memmove(p, ftp->ft_to, tl); + stack[depth].ts_fidxtry = sp->ts_fidx + tl; #ifdef FEAT_MBYTE - stack[depth].ts_tcharlen = 0; + stack[depth].ts_tcharlen = 0; #endif - break; - } + break; } + } - if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP) - /* No (more) matches. */ - sp->ts_state = STATE_FINAL; + if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP) + /* No (more) matches. */ + sp->ts_state = STATE_FINAL; - break; + break; - case STATE_REP_UNDO: - /* Undo a REP replacement and continue with the next one. */ - ftp = (fromto_T *)lp->lp_replang->sl_rep.ga_data - + sp->ts_curi - 1; - fl = STRLEN(ftp->ft_from); - tl = STRLEN(ftp->ft_to); - p = fword + sp->ts_fidx; - if (fl != tl) - { - mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1); - repextra -= tl - fl; - } - mch_memmove(p, ftp->ft_from, fl); - sp->ts_state = STATE_REP; - break; + case STATE_REP_UNDO: + /* Undo a REP replacement and continue with the next one. */ + if (soundfold) + gap = &slang->sl_repsal; + else + gap = &lp->lp_replang->sl_rep; + ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1; + fl = STRLEN(ftp->ft_from); + tl = STRLEN(ftp->ft_to); + p = fword + sp->ts_fidx; + if (fl != tl) + { + mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1); + repextra -= tl - fl; + } + mch_memmove(p, ftp->ft_from, fl); + sp->ts_state = STATE_REP; + break; - default: - /* Did all possible states at this level, go up one level. */ - --depth; + default: + /* Did all possible states at this level, go up one level. */ + --depth; - if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE) - { - /* Continue in or go back to the prefix tree. */ - byts = pbyts; - idxs = pidxs; - } + if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE) + { + /* Continue in or go back to the prefix tree. */ + byts = pbyts; + idxs = pidxs; + } - /* Don't check for CTRL-C too often, it takes time. */ - line_breakcheck(); + /* Don't check for CTRL-C too often, it takes time. */ + if (--breakcheckcount == 0) + { + ui_breakcheck(); + breakcheckcount = 1000; } } } } + /* - * Try going one level deeper in the tree. + * Go one level deeper in the tree. */ - static int -try_deeper(su, stack, depth, score_add) - suginfo_T *su; + static void +go_deeper(stack, depth, score_add) trystate_T *stack; int depth; int score_add; { - int newscore; - - /* Refuse to go deeper if the scrore is getting too big. */ - newscore = stack[depth].ts_score + score_add; - if (newscore >= su->su_maxscore) - return FALSE; - stack[depth + 1] = stack[depth]; stack[depth + 1].ts_state = STATE_START; - stack[depth + 1].ts_score = newscore; + stack[depth + 1].ts_score = stack[depth].ts_score + score_add; stack[depth + 1].ts_curi = 1; /* start just after length byte */ stack[depth + 1].ts_flags = 0; - return TRUE; } #ifdef FEAT_MBYTE @@ -10713,6 +12133,7 @@ score_comp_sal(su) sstp->st_word = vim_strsave(stp->st_word); if (sstp->st_word != NULL) { + sstp->st_wordlen = stp->st_wordlen; sstp->st_score = score; sstp->st_altscore = 0; sstp->st_orglen = stp->st_orglen; @@ -10743,6 +12164,7 @@ score_combine(su) char_u badsound[MAXWLEN]; int round; int lpi; + slang_T *slang = NULL; /* Add the alternate score to su_ga. */ for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) @@ -10751,13 +12173,13 @@ score_combine(su) if (lp->lp_slang->sl_sal.ga_len > 0) { /* soundfold the bad word */ - spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound); + slang = lp->lp_slang; + spell_soundfold(slang, su->su_fbadword, TRUE, badsound); for (i = 0; i < su->su_ga.ga_len; ++i) { stp = &SUG(su->su_ga, i); - stp->st_altscore = stp_sal_score(stp, su, lp->lp_slang, - badsound); + stp->st_altscore = stp_sal_score(stp, su, slang, badsound); if (stp->st_altscore == SCORE_MAXMAX) stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4; else @@ -10769,11 +12191,15 @@ score_combine(su) } } + if (slang == NULL) /* just in case */ + return; + /* Add the alternate score to su_sga. */ for (i = 0; i < su->su_sga.ga_len; ++i) { stp = &SUG(su->su_sga, i); - stp->st_altscore = spell_edit_score(su->su_badword, stp->st_word); + stp->st_altscore = spell_edit_score(slang, + su->su_badword, stp->st_word); if (stp->st_score == SCORE_MAXMAX) stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8; else @@ -10781,8 +12207,11 @@ score_combine(su) stp->st_salscore = TRUE; } - /* Sort the suggestions and truncate at "maxcount" for both lists. */ + /* Remove bad suggestions, sort the suggestions and truncate at "maxcount" + * for both lists. */ + check_suggestions(su, &su->su_ga); (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); + check_suggestions(su, &su->su_sga); (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount); ga_init2(&ga, (int)sizeof(suginfo_T), 1); @@ -10872,7 +12301,8 @@ stp_sal_score(stp, su, slang, badsound) /* Add part of the bad word to the good word, so that we soundfold * what replaces the bad word. */ STRCPY(goodword, stp->st_word); - STRNCAT(goodword, su->su_badptr + su->su_badlen - lendiff, lendiff); + vim_strncpy(goodword + stp->st_wordlen, + su->su_badptr + su->su_badlen - lendiff, lendiff); pgood = goodword; } else @@ -10884,6 +12314,40 @@ stp_sal_score(stp, su, slang, badsound) return soundalike_score(goodsound, pbad); } +/* structure used to store soundfolded words that add_sound_suggest() has + * handled already. */ +typedef struct +{ + short sft_score; /* lowest score used */ + char_u sft_word[1]; /* soundfolded word, actually longer */ +} sftword_T; + +static sftword_T dumsft; +#define HIKEY2SFT(p) ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft))) +#define HI2SFT(hi) HIKEY2SFT((hi)->hi_key) + +/* + * Prepare for calling suggest_try_soundalike(). + */ + static void +suggest_try_soundalike_prep() +{ + langp_T *lp; + int lpi; + slang_T *slang; + + /* Do this for all languages that support sound folding and for which a + * .sug file has been loaded. */ + for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) + { + lp = LANGP_ENTRY(curbuf->b_langp, lpi); + slang = lp->lp_slang; + if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) + /* prepare the hashtable used by add_sound_suggest() */ + hash_init(&slang->sl_sounddone); + } +} + /* * Find suggestions by comparing the word in a sound-a-like form. * Note: This doesn't support postponed prefixes. @@ -10893,161 +12357,340 @@ suggest_try_soundalike(su) suginfo_T *su; { char_u salword[MAXWLEN]; - char_u tword[MAXWLEN]; - char_u tsalword[MAXWLEN]; - idx_T arridx[MAXWLEN]; - int curi[MAXWLEN]; langp_T *lp; - char_u *byts; - idx_T *idxs; - int depth; - int c; - idx_T n; - int round; - int flags; - int sound_score; - int local_score; int lpi; slang_T *slang; - /* Do this for all languages that support sound folding. */ + /* Do this for all languages that support sound folding and for which a + * .sug file has been loaded. */ for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) { lp = LANGP_ENTRY(curbuf->b_langp, lpi); slang = lp->lp_slang; - if (slang->sl_sal.ga_len > 0) + if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) { /* soundfold the bad word */ spell_soundfold(slang, su->su_fbadword, TRUE, salword); - /* - * Go through the whole tree, soundfold each word and compare. - * round 1: use the case-folded tree. - * round 2: use the keep-case tree. - */ - for (round = 1; round <= 2; ++round) + /* try all kinds of inserts/deletes/swaps/etc. */ + /* TODO: also soundfold the next words, so that we can try joining + * and splitting */ + suggest_trie_walk(su, lp, salword, TRUE); + } + } +} + +/* + * Finish up after calling suggest_try_soundalike(). + */ + static void +suggest_try_soundalike_finish() +{ + langp_T *lp; + int lpi; + slang_T *slang; + int todo; + hashitem_T *hi; + + /* Do this for all languages that support sound folding and for which a + * .sug file has been loaded. */ + for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) + { + lp = LANGP_ENTRY(curbuf->b_langp, lpi); + slang = lp->lp_slang; + if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) + { + /* Free the info about handled words. */ + todo = slang->sl_sounddone.ht_used; + for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi) + if (!HASHITEM_EMPTY(hi)) + { + vim_free(HI2SFT(hi)); + --todo; + } + hash_clear(&slang->sl_sounddone); + } + } +} + +/* + * A match with a soundfolded word is found. Add the good word(s) that + * produce this soundfolded word. + */ + static void +add_sound_suggest(su, goodword, score, lp) + suginfo_T *su; + char_u *goodword; + int score; /* soundfold score */ + langp_T *lp; +{ + slang_T *slang = lp->lp_slang; /* language for sound folding */ + int sfwordnr; + char_u *nrline; + int orgnr; + char_u theword[MAXWLEN]; + int i; + int wlen; + char_u *byts; + idx_T *idxs; + int n; + int wordcount; + int wc; + int goodscore; + hash_T hash; + hashitem_T *hi; + sftword_T *sft; + int bc, gc; + int limit; + + /* + * It's very well possible that the same soundfold word is found several + * times with different scores. Since the following is quite slow only do + * the words that have a better score than before. Use a hashtable to + * remember the words that have been done. + */ + hash = hash_hash(goodword); + hi = hash_lookup(&slang->sl_sounddone, goodword, hash); + if (HASHITEM_EMPTY(hi)) + { + sft = (sftword_T *)alloc(sizeof(sftword_T) + STRLEN(goodword)); + if (sft != NULL) + { + sft->sft_score = score; + STRCPY(sft->sft_word, goodword); + hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash); + } + } + else + { + sft = HI2SFT(hi); + if (score >= sft->sft_score) + return; + sft->sft_score = score; + } + + /* + * Find the word nr in the soundfold tree. + */ + sfwordnr = soundfold_find(slang, goodword); + if (sfwordnr < 0) + { + EMSG2(_(e_intern2), "add_sound_suggest()"); + return; + } + + /* + * go over the list of good words that produce this soundfold word + */ + nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE); + orgnr = 0; + while (*nrline != NUL) + { + /* The wordnr was stored in a minimal nr of bytes as an offset to the + * previous wordnr. */ + orgnr += bytes2offset(&nrline); + + byts = slang->sl_fbyts; + idxs = slang->sl_fidxs; + + /* Lookup the word "orgnr" one of the two tries. */ + n = 0; + wlen = 0; + wordcount = 0; + for (;;) + { + i = 1; + if (wordcount == orgnr && byts[n + 1] == NUL) + break; /* found end of word */ + + if (byts[n + 1] == NUL) + ++wordcount; + + /* skip over the NUL bytes */ + for ( ; byts[n + i] == NUL; ++i) + if (i > byts[n]) /* safety check */ + { + STRCPY(theword + wlen, "BAD"); + goto badword; + } + + /* One of the siblings must have the word. */ + for ( ; i < byts[n]; ++i) + { + wc = idxs[idxs[n + i]]; /* nr of words under this byte */ + if (wordcount + wc > orgnr) + break; + wordcount += wc; + } + + theword[wlen++] = byts[n + i]; + n = idxs[n + i]; + } +badword: + theword[wlen] = NUL; + + /* Go over the possible flags and regions. */ + for (; i <= byts[n] && byts[n + i] == NUL; ++i) + { + char_u cword[MAXWLEN]; + char_u *p; + int flags = (int)idxs[n + i]; + + if (flags & WF_KEEPCAP) { - if (round == 1) + /* Must find the word in the keep-case tree. */ + find_keepcap_word(slang, theword, cword); + p = cword; + } + else + { + flags |= su->su_badflags; + if ((flags & WF_CAPMASK) != 0) { - byts = slang->sl_fbyts; - idxs = slang->sl_fidxs; + /* Need to fix case according to "flags". */ + make_case_word(theword, cword, flags); + p = cword; } else + p = theword; + } + + /* Add the suggestion. */ + if (sps_flags & SPS_DOUBLE) + { + /* Add the suggestion if the score isn't too bad. */ + if (score <= su->su_maxscore) + add_suggestion(su, &su->su_sga, p, su->su_badlen, + score, 0, FALSE, slang, FALSE); + } + else + { + /* Add a penalty for words in another region. */ + if ((flags & WF_REGION) + && (((unsigned)flags >> 16) & lp->lp_region) == 0) + goodscore = SCORE_REGION; + else + goodscore = 0; + + /* Add a small penalty for changing the first letter from + * lower to upper case. Helps for "tath" -> "Kath", which is + * less common thatn "tath" -> "path". Don't do it when the + * letter is the same, that has already been counted. */ + gc = PTR2CHAR(p); + if (SPELL_ISUPPER(gc)) { - byts = slang->sl_kbyts; - idxs = slang->sl_kidxs; - if (byts == NULL) /* no keep-case words */ - continue; + bc = PTR2CHAR(su->su_badword); + if (!SPELL_ISUPPER(bc) + && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc)) + goodscore += SCORE_ICASE / 2; } - depth = 0; - arridx[0] = 0; - curi[0] = 1; - while (depth >= 0 && !got_int) + /* Compute the score for the good word. This only does letter + * insert/delete/swap/replace. REP items are not considered, + * which may make the score a bit higher. + * Use a limit for the score to make it work faster. Use + * MAXSCORE(), because RESCORE() will change the score. + * If the limit is very high then the iterative method is + * inefficient, using an array is quicker. */ + limit = MAXSCORE(su->su_sfmaxscore - goodscore, score); + if (limit > SCORE_LIMITMAX) + goodscore += spell_edit_score(slang, su->su_badword, p); + else + goodscore += spell_edit_score_limit(slang, su->su_badword, + p, limit); + + /* When going over the limit don't bother to do the rest. */ + if (goodscore < SCORE_MAXMAX) { - if (curi[depth] > byts[arridx[depth]]) - { - /* Done all bytes at this node, go up one level. */ - --depth; - line_breakcheck(); - } - else - { - /* Do one more byte at this node. */ - n = arridx[depth] + curi[depth]; - ++curi[depth]; - c = byts[n]; - if (c == 0) - { - /* End of word, deal with the word. */ - flags = (int)idxs[n]; - if (round == 2 || (flags & WF_KEEPCAP) == 0) - { - tword[depth] = NUL; - /* Sound-fold. Only in keep-case tree need to - * case-fold the word. */ - spell_soundfold(slang, tword, - round == 1, tsalword); - - /* Compute the edit distance between the - * sound-a-like words. */ - sound_score = soundalike_score(salword, - tsalword); - - /* Add a penalty for words in another region. */ - if ((flags & WF_REGION) && (((unsigned)flags - >> 16) & lp->lp_region) == 0) - local_score = SCORE_REGION; - else - local_score = 0; - sound_score += local_score; + /* Give a bonus to words seen before. */ + goodscore = score_wordcount_adj(slang, goodscore, p, FALSE); + + /* Add the suggestion if the score isn't too bad. */ + goodscore = RESCORE(goodscore, score); + if (goodscore <= su->su_sfmaxscore) + add_suggestion(su, &su->su_ga, p, su->su_badlen, + goodscore, score, TRUE, slang, TRUE); + } + } + } + /* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */ + } +} - if (sound_score < SCORE_MAXMAX) - { - char_u cword[MAXWLEN]; - char_u *p; - int score; +/* + * Find word "word" in fold-case tree for "slang" and return the word number. + */ + static int +soundfold_find(slang, word) + slang_T *slang; + char_u *word; +{ + idx_T arridx = 0; + int len; + int wlen = 0; + int c; + char_u *ptr = word; + char_u *byts; + idx_T *idxs; + int wordnr = 0; - flags |= su->su_badflags; - if (round == 1 && (flags & WF_CAPMASK) != 0) - { - /* Need to fix case according to - * "flags". */ - make_case_word(tword, cword, flags); - p = cword; - } - else - p = tword; - - if (sps_flags & SPS_DOUBLE) - add_suggestion(su, &su->su_sga, p, - su->su_badlen, - sound_score, 0, FALSE, - lp->lp_sallang); - else - { - /* Compute the score. */ - score = spell_edit_score( - su->su_badword, p) - + local_score; - if (sps_flags & SPS_BEST) - /* give a bonus for the good word - * sounding the same as the bad - * word */ - add_suggestion(su, &su->su_ga, p, - su->su_badlen, - RESCORE(score, sound_score), - sound_score, TRUE, - lp->lp_sallang); - else - add_suggestion(su, &su->su_ga, p, - su->su_badlen, - score + sound_score, - 0, FALSE, - lp->lp_sallang); - } - } - } + byts = slang->sl_sbyts; + idxs = slang->sl_sidxs; - /* Skip over other NUL bytes. */ - while (byts[n + 1] == 0) - { - ++n; - ++curi[depth]; - } - } - else - { - /* Normal char, go one level deeper. */ - tword[depth++] = c; - arridx[depth] = idxs[n]; - curi[depth] = 1; - } - } - } + for (;;) + { + /* First byte is the number of possible bytes. */ + len = byts[arridx++]; + + /* If the first possible byte is a zero the word could end here. + * If the word ends we found the word. If not skip the NUL bytes. */ + c = ptr[wlen]; + if (byts[arridx] == NUL) + { + if (c == NUL) + break; + + /* Skip over the zeros, there can be several. */ + while (len > 0 && byts[arridx] == NUL) + { + ++arridx; + --len; } + if (len == 0) + return -1; /* no children, word should have ended here */ + ++wordnr; + } + + /* If the word ends we didn't find it. */ + if (c == NUL) + return -1; + + /* Perform a binary search in the list of accepted bytes. */ + if (c == TAB) /* <Tab> is handled like <Space> */ + c = ' '; + while (byts[arridx] < c) + { + /* The word count is in the first idxs[] entry of the child. */ + wordnr += idxs[idxs[arridx]]; + ++arridx; + if (--len == 0) /* end of the bytes, didn't find it */ + return -1; } + if (byts[arridx] != c) /* didn't find the byte */ + return -1; + + /* Continue at the child (if there is one). */ + arridx = idxs[arridx]; + ++wlen; + + /* One space in the good word may stand for several spaces in the + * checked word. */ + if (c == ' ') + while (ptr[wlen] == ' ' || ptr[wlen] == TAB) + ++wlen; } + + return wordnr; } /* @@ -11090,7 +12733,7 @@ set_map_str(lp, map) } lp->sl_has_map = TRUE; - /* Init the array and hash table empty. */ + /* Init the array and hash tables empty. */ for (i = 0; i < 256; ++i) lp->sl_map_array[i] = 0; #ifdef FEAT_MBYTE @@ -11204,45 +12847,39 @@ similar_chars(slang, c1, c2) /* * Add a suggestion to the list of suggestions. - * Do not add a duplicate suggestion or suggestions with a bad score. - * When "use_score" is not zero it's used, otherwise the score is computed - * with spell_edit_score(). + * For a suggestion that is already in the list the lowest score is remembered. */ static void -add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus, slang) +add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus, + slang, maxsf) suginfo_T *su; - garray_T *gap; + garray_T *gap; /* either su_ga or su_sga */ char_u *goodword; int badlenarg; /* len of bad word replaced with "goodword" */ int score; int altscore; int had_bonus; /* value for st_had_bonus */ slang_T *slang; /* language for sound folding */ + int maxsf; /* su_maxscore applies to soundfold score, + su_sfmaxscore to the total score. */ { - int goodlen = STRLEN(goodword); /* len of goodword changed */ - int badlen = badlenarg; /* len of bad word changed */ + int goodlen; /* len of goodword changed */ + int badlen; /* len of bad word changed */ suggest_T *stp; suggest_T new_sug; int i; - hlf_T attr = HLF_COUNT; - char_u longword[MAXWLEN + 1]; char_u *pgood, *pbad; - /* Check that the word really is valid. Esp. for banned words and for - * split words, such as "the the". Need to append what follows to check - * for that. */ - STRCPY(longword, goodword); - vim_strncpy(longword + goodlen, su->su_badptr + badlen, MAXWLEN - goodlen); - (void)spell_check(curwin, longword, &attr, NULL); - if (attr != HLF_COUNT) - return; - /* Minimize "badlen" for consistency. Avoids that changing "the the" to * "thee the" is added next to changing the first "the" the "thee". */ pgood = goodword + STRLEN(goodword); - pbad = su->su_badptr + badlen; - while (pgood > goodword && pbad > su->su_badptr) + pbad = su->su_badptr + badlenarg; + for (;;) { + goodlen = pgood - goodword; + badlen = pbad - su->su_badptr; + if (goodlen <= 0 || badlen <= 0) + break; mb_ptr_back(goodword, pgood); mb_ptr_back(su->su_badptr, pbad); #ifdef FEAT_MBYTE @@ -11255,143 +12892,152 @@ add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus, slang) #endif if (*pgood != *pbad) break; - badlen = pbad - su->su_badptr; - goodlen = pgood - goodword; } + if (badlen == 0 && goodlen == 0) /* goodword doesn't change anything; may happen for "the the" changing * the first "the" to itself. */ return; - if (score <= su->su_maxscore) - { - /* Check if the word is already there. Also check the length that is - * being replaced "thes," -> "these" is a different suggestion from - * "thes" -> "these". */ - stp = &SUG(*gap, 0); - for (i = gap->ga_len - 1; i >= 0; --i) - if ((int)STRLEN(stp[i].st_word) == goodlen - && STRNCMP(stp[i].st_word, goodword, goodlen) == 0 - && stp[i].st_orglen == badlen) - { - /* - * Found it. Remember the word with the lowest score. - */ - if (stp[i].st_slang == NULL) - stp[i].st_slang = slang; + /* Check if the word is already there. Also check the length that is + * being replaced "thes," -> "these" is a different suggestion from + * "thes" -> "these". */ + stp = &SUG(*gap, 0); + for (i = gap->ga_len; --i >= 0; ++stp) + if (stp->st_wordlen == goodlen + && stp->st_orglen == badlen + && STRNCMP(stp->st_word, goodword, goodlen) == 0) + { + /* + * Found it. Remember the word with the lowest score. + */ + if (stp->st_slang == NULL) + stp->st_slang = slang; - new_sug.st_score = score; - new_sug.st_altscore = altscore; - new_sug.st_had_bonus = had_bonus; + new_sug.st_score = score; + new_sug.st_altscore = altscore; + new_sug.st_had_bonus = had_bonus; - if (stp[i].st_had_bonus != had_bonus) + if (stp->st_had_bonus != had_bonus) + { + /* Only one of the two had the soundalike score computed. + * Need to do that for the other one now, otherwise the + * scores can't be compared. This happens because + * suggest_try_change() doesn't compute the soundalike + * word to keep it fast, while some special methods set + * the soundalike score to zero. */ + if (had_bonus) + rescore_one(su, stp); + else { - /* Only one of the two had the soundalike score computed. - * Need to do that for the other one now, otherwise the - * scores can't be compared. This happens because - * suggest_try_change() doesn't compute the soundalike - * word to keep it fast, while some special methods set - * the soundalike score to zero. */ - if (had_bonus) - rescore_one(su, &stp[i]); - else - { - new_sug.st_word = goodword; - new_sug.st_slang = stp[i].st_slang; - new_sug.st_orglen = badlen; - rescore_one(su, &new_sug); - } + new_sug.st_word = stp->st_word; + new_sug.st_wordlen = stp->st_wordlen; + new_sug.st_slang = stp->st_slang; + new_sug.st_orglen = badlen; + rescore_one(su, &new_sug); } + } - if (stp[i].st_score > new_sug.st_score) - { - stp[i].st_score = new_sug.st_score; - stp[i].st_altscore = new_sug.st_altscore; - stp[i].st_had_bonus = new_sug.st_had_bonus; - } - break; + if (stp->st_score > new_sug.st_score) + { + stp->st_score = new_sug.st_score; + stp->st_altscore = new_sug.st_altscore; + stp->st_had_bonus = new_sug.st_had_bonus; } + break; + } - if (i < 0 && ga_grow(gap, 1) == OK) + if (i < 0 && ga_grow(gap, 1) == OK) + { + /* Add a suggestion. */ + stp = &SUG(*gap, gap->ga_len); + stp->st_word = vim_strnsave(goodword, goodlen); + if (stp->st_word != NULL) { - /* Add a suggestion. */ - stp = &SUG(*gap, gap->ga_len); - stp->st_word = vim_strnsave(goodword, goodlen); - if (stp->st_word != NULL) - { - stp->st_score = score; - stp->st_altscore = altscore; - stp->st_had_bonus = had_bonus; - stp->st_orglen = badlen; - stp->st_slang = slang; - ++gap->ga_len; + stp->st_wordlen = goodlen; + stp->st_score = score; + stp->st_altscore = altscore; + stp->st_had_bonus = had_bonus; + stp->st_orglen = badlen; + stp->st_slang = slang; + ++gap->ga_len; - /* If we have too many suggestions now, sort the list and keep - * the best suggestions. */ - if (gap->ga_len > SUG_MAX_COUNT(su)) - su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore, - SUG_CLEAN_COUNT(su)); + /* If we have too many suggestions now, sort the list and keep + * the best suggestions. */ + if (gap->ga_len > SUG_MAX_COUNT(su)) + { + if (maxsf) + su->su_sfmaxscore = cleanup_suggestions(gap, + su->su_sfmaxscore, SUG_CLEAN_COUNT(su)); + else + { + i = su->su_maxscore; + su->su_maxscore = cleanup_suggestions(gap, + su->su_maxscore, SUG_CLEAN_COUNT(su)); + } } } } } /* - * Add a word to be banned. + * Suggestions may in fact be flagged as errors. Esp. for banned words and + * for split words, such as "the the". Remove these from the list here. */ static void -add_banned(su, word) +check_suggestions(su, gap) suginfo_T *su; - char_u *word; + garray_T *gap; /* either su_ga or su_sga */ { - char_u *s = vim_strsave(word); - hash_T hash; - hashitem_T *hi; + suggest_T *stp; + int i; + char_u longword[MAXWLEN + 1]; + int len; + hlf_T attr; - if (s != NULL) + stp = &SUG(*gap, 0); + for (i = gap->ga_len - 1; i >= 0; --i) { - hash = hash_hash(s); - hi = hash_lookup(&su->su_banned, s, hash); - if (HASHITEM_EMPTY(hi)) - hash_add_item(&su->su_banned, hi, s, hash); - else - vim_free(s); + /* Need to append what follows to check for "the the". */ + STRCPY(longword, stp[i].st_word); + len = stp[i].st_wordlen; + vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen, + MAXWLEN - len); + attr = HLF_COUNT; + (void)spell_check(curwin, longword, &attr, NULL, FALSE); + if (attr != HLF_COUNT) + { + /* Remove this entry. */ + vim_free(stp[i].st_word); + --gap->ga_len; + if (i < gap->ga_len) + mch_memmove(stp + i, stp + i + 1, + sizeof(suggest_T) * (gap->ga_len - i)); + } } } -/* - * Return TRUE if a word appears in the list of banned words. - */ - static int -was_banned(su, word) - suginfo_T *su; - char_u *word; -{ - hashitem_T *hi = hash_find(&su->su_banned, word); - - return !HASHITEM_EMPTY(hi); -} /* - * Free the banned words in "su". + * Add a word to be banned. */ static void -free_banned(su) +add_banned(su, word) suginfo_T *su; + char_u *word; { - int todo; + char_u *s = vim_strsave(word); + hash_T hash; hashitem_T *hi; - todo = su->su_banned.ht_used; - for (hi = su->su_banned.ht_array; todo > 0; ++hi) + hash = hash_hash(word); + hi = hash_lookup(&su->su_banned, word, hash); + if (HASHITEM_EMPTY(hi)) { - if (!HASHITEM_EMPTY(hi)) - { - vim_free(hi->hi_key); - --todo; - } + s = vim_strsave(word); + if (s != NULL) + hash_add_item(&su->su_banned, hi, s, hash); } - hash_clear(&su->su_banned); } /* @@ -12270,11 +13916,21 @@ soundalike_score(goodstart, badstart) * counted so much, vowels halfway the word aren't counted at all. */ if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound) { - score = SCORE_DEL / 2; - if (*badsound == '*') - ++badsound; + if (badsound[1] == goodsound[1] + || (badsound[1] != NUL + && goodsound[1] != NUL + && badsound[2] == goodsound[2])) + { + /* handle like a substitute */ + } else - ++goodsound; + { + score = 2 * SCORE_DEL / 3; + if (*badsound == '*') + ++badsound; + else + ++goodsound; + } } goodlen = STRLEN(goodsound); @@ -12470,7 +14126,8 @@ soundalike_score(goodstart, badstart) * support multi-byte characters. */ static int -spell_edit_score(badword, goodword) +spell_edit_score(slang, badword, goodword) + slang_T *slang; char_u *badword; char_u *goodword; { @@ -12512,11 +14169,11 @@ spell_edit_score(badword, goodword) CNT(0, 0) = 0; for (j = 1; j <= goodlen; ++j) - CNT(0, j) = CNT(0, j - 1) + SCORE_DEL; + CNT(0, j) = CNT(0, j - 1) + SCORE_INS; for (i = 1; i <= badlen; ++i) { - CNT(i, 0) = CNT(i - 1, 0) + SCORE_INS; + CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL; for (j = 1; j <= goodlen; ++j) { #ifdef FEAT_MBYTE @@ -12539,7 +14196,15 @@ spell_edit_score(badword, goodword) if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1); else - CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1); + { + /* For a similar character use SCORE_SIMILAR. */ + if (slang != NULL + && slang->sl_has_map + && similar_chars(slang, gc, bc)) + CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1); + else + CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1); + } if (i > 1 && j > 1) { @@ -12577,6 +14242,392 @@ spell_edit_score(badword, goodword) return i; } +typedef struct +{ + int badi; + int goodi; + int score; +} limitscore_T; + +/* + * Like spell_edit_score(), but with a limit on the score to make it faster. + * May return SCORE_MAXMAX when the score is higher than "limit". + * + * This uses a stack for the edits still to be tried. + * The idea comes from Aspell leditdist.cpp. Rewritten in C and added support + * for multi-byte characters. + */ + static int +spell_edit_score_limit(slang, badword, goodword, limit) + slang_T *slang; + char_u *badword; + char_u *goodword; + int limit; +{ + limitscore_T stack[10]; /* allow for over 3 * 2 edits */ + int stackidx; + int bi, gi; + int bi2, gi2; + int bc, gc; + int score; + int score_off; + int minscore; + int round; + +#ifdef FEAT_MBYTE + /* Multi-byte characters require a bit more work, use a different function + * to avoid testing "has_mbyte" quite often. */ + if (has_mbyte) + return spell_edit_score_limit_w(slang, badword, goodword, limit); +#endif + + /* + * The idea is to go from start to end over the words. So long as + * characters are equal just continue, this always gives the lowest score. + * When there is a difference try several alternatives. Each alternative + * increases "score" for the edit distance. Some of the alternatives are + * pushed unto a stack and tried later, some are tried right away. At the + * end of the word the score for one alternative is known. The lowest + * possible score is stored in "minscore". + */ + stackidx = 0; + bi = 0; + gi = 0; + score = 0; + minscore = limit + 1; + + for (;;) + { + /* Skip over an equal part, score remains the same. */ + for (;;) + { + bc = badword[bi]; + gc = goodword[gi]; + if (bc != gc) /* stop at a char that's different */ + break; + if (bc == NUL) /* both words end */ + { + if (score < minscore) + minscore = score; + goto pop; /* do next alternative */ + } + ++bi; + ++gi; + } + + if (gc == NUL) /* goodword ends, delete badword chars */ + { + do + { + if ((score += SCORE_DEL) >= minscore) + goto pop; /* do next alternative */ + } while (badword[++bi] != NUL); + minscore = score; + } + else if (bc == NUL) /* badword ends, insert badword chars */ + { + do + { + if ((score += SCORE_INS) >= minscore) + goto pop; /* do next alternative */ + } while (goodword[++gi] != NUL); + minscore = score; + } + else /* both words continue */ + { + /* If not close to the limit, perform a change. Only try changes + * that may lead to a lower score than "minscore". + * round 0: try deleting a char from badword + * round 1: try inserting a char in badword */ + for (round = 0; round <= 1; ++round) + { + score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); + if (score_off < minscore) + { + if (score_off + SCORE_EDIT_MIN >= minscore) + { + /* Near the limit, rest of the words must match. We + * can check that right now, no need to push an item + * onto the stack. */ + bi2 = bi + 1 - round; + gi2 = gi + round; + while (goodword[gi2] == badword[bi2]) + { + if (goodword[gi2] == NUL) + { + minscore = score_off; + break; + } + ++bi2; + ++gi2; + } + } + else + { + /* try deleting/inserting a character later */ + stack[stackidx].badi = bi + 1 - round; + stack[stackidx].goodi = gi + round; + stack[stackidx].score = score_off; + ++stackidx; + } + } + } + + if (score + SCORE_SWAP < minscore) + { + /* If swapping two characters makes a match then the + * substitution is more expensive, thus there is no need to + * try both. */ + if (gc == badword[bi + 1] && bc == goodword[gi + 1]) + { + /* Swap two characters, that is: skip them. */ + gi += 2; + bi += 2; + score += SCORE_SWAP; + continue; + } + } + + /* Substitute one character for another which is the same + * thing as deleting a character from both goodword and badword. + * Use a better score when there is only a case difference. */ + if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) + score += SCORE_ICASE; + else + { + /* For a similar character use SCORE_SIMILAR. */ + if (slang != NULL + && slang->sl_has_map + && similar_chars(slang, gc, bc)) + score += SCORE_SIMILAR; + else + score += SCORE_SUBST; + } + + if (score < minscore) + { + /* Do the substitution. */ + ++gi; + ++bi; + continue; + } + } +pop: + /* + * Get here to try the next alternative, pop it from the stack. + */ + if (stackidx == 0) /* stack is empty, finished */ + break; + + /* pop an item from the stack */ + --stackidx; + gi = stack[stackidx].goodi; + bi = stack[stackidx].badi; + score = stack[stackidx].score; + } + + /* When the score goes over "limit" it may actually be much higher. + * Return a very large number to avoid going below the limit when giving a + * bonus. */ + if (minscore > limit) + return SCORE_MAXMAX; + return minscore; +} + +#ifdef FEAT_MBYTE +/* + * Multi-byte version of spell_edit_score_limit(). + * Keep it in sync with the above! + */ + static int +spell_edit_score_limit_w(slang, badword, goodword, limit) + slang_T *slang; + char_u *badword; + char_u *goodword; + int limit; +{ + limitscore_T stack[10]; /* allow for over 3 * 2 edits */ + int stackidx; + int bi, gi; + int bi2, gi2; + int bc, gc; + int score; + int score_off; + int minscore; + int round; + char_u *p; + int wbadword[MAXWLEN]; + int wgoodword[MAXWLEN]; + + /* Get the characters from the multi-byte strings and put them in an + * int array for easy access. */ + bi = 0; + for (p = badword; *p != NUL; ) + wbadword[bi++] = mb_cptr2char_adv(&p); + wbadword[bi++] = 0; + gi = 0; + for (p = goodword; *p != NUL; ) + wgoodword[gi++] = mb_cptr2char_adv(&p); + wgoodword[gi++] = 0; + + /* + * The idea is to go from start to end over the words. So long as + * characters are equal just continue, this always gives the lowest score. + * When there is a difference try several alternatives. Each alternative + * increases "score" for the edit distance. Some of the alternatives are + * pushed unto a stack and tried later, some are tried right away. At the + * end of the word the score for one alternative is known. The lowest + * possible score is stored in "minscore". + */ + stackidx = 0; + bi = 0; + gi = 0; + score = 0; + minscore = limit + 1; + + for (;;) + { + /* Skip over an equal part, score remains the same. */ + for (;;) + { + bc = wbadword[bi]; + gc = wgoodword[gi]; + + if (bc != gc) /* stop at a char that's different */ + break; + if (bc == NUL) /* both words end */ + { + if (score < minscore) + minscore = score; + goto pop; /* do next alternative */ + } + ++bi; + ++gi; + } + + if (gc == NUL) /* goodword ends, delete badword chars */ + { + do + { + if ((score += SCORE_DEL) >= minscore) + goto pop; /* do next alternative */ + } while (wbadword[++bi] != NUL); + minscore = score; + } + else if (bc == NUL) /* badword ends, insert badword chars */ + { + do + { + if ((score += SCORE_INS) >= minscore) + goto pop; /* do next alternative */ + } while (wgoodword[++gi] != NUL); + minscore = score; + } + else /* both words continue */ + { + /* If not close to the limit, perform a change. Only try changes + * that may lead to a lower score than "minscore". + * round 0: try deleting a char from badword + * round 1: try inserting a char in badword */ + for (round = 0; round <= 1; ++round) + { + score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); + if (score_off < minscore) + { + if (score_off + SCORE_EDIT_MIN >= minscore) + { + /* Near the limit, rest of the words must match. We + * can check that right now, no need to push an item + * onto the stack. */ + bi2 = bi + 1 - round; + gi2 = gi + round; + while (wgoodword[gi2] == wbadword[bi2]) + { + if (wgoodword[gi2] == NUL) + { + minscore = score_off; + break; + } + ++bi2; + ++gi2; + } + } + else + { + /* try deleting a character from badword later */ + stack[stackidx].badi = bi + 1 - round; + stack[stackidx].goodi = gi + round; + stack[stackidx].score = score_off; + ++stackidx; + } + } + } + + if (score + SCORE_SWAP < minscore) + { + /* If swapping two characters makes a match then the + * substitution is more expensive, thus there is no need to + * try both. */ + if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1]) + { + /* Swap two characters, that is: skip them. */ + gi += 2; + bi += 2; + score += SCORE_SWAP; + continue; + } + } + + /* Substitute one character for another which is the same + * thing as deleting a character from both goodword and badword. + * Use a better score when there is only a case difference. */ + if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) + score += SCORE_ICASE; + else + { + /* For a similar character use SCORE_SIMILAR. */ + if (slang != NULL + && slang->sl_has_map + && similar_chars(slang, gc, bc)) + score += SCORE_SIMILAR; + else + score += SCORE_SUBST; + } + + if (score < minscore) + { + /* Do the substitution. */ + ++gi; + ++bi; + continue; + } + } +pop: + /* + * Get here to try the next alternative, pop it from the stack. + */ + if (stackidx == 0) /* stack is empty, finished */ + break; + + /* pop an item from the stack */ + --stackidx; + gi = stack[stackidx].goodi; + bi = stack[stackidx].badi; + score = stack[stackidx].score; + } + + /* When the score goes over "limit" it may actually be much higher. + * Return a very large number to avoid going below the limit when giving a + * bonus. */ + if (minscore > limit) + return SCORE_MAXMAX; + return minscore; +} +#endif + +#define DUMPFLAG_KEEPCASE 1 /* round 2: keep-case tree */ +#define DUMPFLAG_COUNT 2 /* include word count */ + /* * ":spelldump" */ @@ -12603,6 +14654,7 @@ ex_spelldump(eap) int do_region = TRUE; /* dump region names and numbers */ char_u *p; int lpi; + int dumpflags; if (no_spell_checking(curwin)) return; @@ -12657,17 +14709,22 @@ ex_spelldump(eap) { if (round == 1) { + dumpflags = 0; byts = slang->sl_fbyts; idxs = slang->sl_fidxs; } else { + dumpflags = DUMPFLAG_KEEPCASE; byts = slang->sl_kbyts; idxs = slang->sl_kidxs; } if (byts == NULL) continue; /* array is empty */ + if (eap->forceit) + dumpflags |= DUMPFLAG_COUNT; + depth = 0; arridx[0] = 0; curi[0] = 1; @@ -12707,11 +14764,12 @@ ex_spelldump(eap) * when it's the first one. */ c = (unsigned)flags >> 24; if (c == 0 || curi[depth] == 2) - dump_word(word, round, flags, lnum++); + dump_word(slang, word, dumpflags, + flags, lnum++); /* Apply the prefix, if there is one. */ if (c != 0) - lnum = dump_prefixes(slang, word, round, + lnum = dump_prefixes(slang, word, dumpflags, flags, lnum); } } @@ -12738,19 +14796,21 @@ ex_spelldump(eap) * Dump one word: apply case modifications and append a line to the buffer. */ static void -dump_word(word, round, flags, lnum) +dump_word(slang, word, dumpflags, flags, lnum) + slang_T *slang; char_u *word; - int round; + int dumpflags; int flags; linenr_T lnum; { int keepcap = FALSE; char_u *p; + char_u *tw; char_u cword[MAXWLEN]; char_u badword[MAXWLEN + 10]; int i; - if (round == 1 && (flags & WF_CAPMASK) != 0) + if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0) { /* Need to fix case according to "flags". */ make_case_word(word, cword, flags); @@ -12759,10 +14819,12 @@ dump_word(word, round, flags, lnum) else { p = word; - if (round == 2 && ((captype(word, NULL) & WF_KEEPCAP) == 0 + if ((dumpflags & DUMPFLAG_KEEPCASE) + && ((captype(word, NULL) & WF_KEEPCAP) == 0 || (flags & WF_FIXCAP) != 0)) keepcap = TRUE; } + tw = p; /* Add flags and regions after a slash. */ if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap) @@ -12782,6 +14844,20 @@ dump_word(word, round, flags, lnum) p = badword; } + if (dumpflags & DUMPFLAG_COUNT) + { + hashitem_T *hi; + + /* Include the word count for ":spelldump!". */ + hi = hash_find(&slang->sl_wordcount, tw); + if (!HASHITEM_EMPTY(hi)) + { + vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d", + tw, HI2WC(hi)->wc_count); + p = IObuff; + } + } + ml_append(lnum, p, (colnr_T)0, FALSE); } @@ -12791,10 +14867,10 @@ dump_word(word, round, flags, lnum) * Return the updated line number. */ static linenr_T -dump_prefixes(slang, word, round, flags, startlnum) +dump_prefixes(slang, word, dumpflags, flags, startlnum) slang_T *slang; char_u *word; /* case-folded word */ - int round; + int dumpflags; int flags; /* flags with prefix ID */ linenr_T startlnum; { @@ -12860,7 +14936,7 @@ dump_prefixes(slang, word, round, flags, startlnum) if (c != 0) { vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1); - dump_word(prefix, round, + dump_word(slang, prefix, dumpflags, (c & WF_RAREPFX) ? (flags | WF_RARE) : flags, lnum++); } @@ -12876,7 +14952,7 @@ dump_prefixes(slang, word, round, flags, startlnum) { vim_strncpy(prefix + depth, word_up, MAXWLEN - depth - 1); - dump_word(prefix, round, + dump_word(slang, prefix, dumpflags, (c & WF_RAREPFX) ? (flags | WF_RARE) : flags, lnum++); } @@ -12981,7 +15057,7 @@ expand_spelling(lnum, col, pat, matchp) { garray_T ga; - spell_suggest_list(&ga, pat, 100, spell_expand_need_cap); + spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE); *matchp = ga.ga_data; return ga.ga_len; } diff --git a/src/structs.h b/src/structs.h index 5b91cecb6..5420b4e4b 100644 --- a/src/structs.h +++ b/src/structs.h @@ -1074,6 +1074,13 @@ struct dictvar_S #define SYNSPL_TOP 1 /* spell check toplevel text */ #define SYNSPL_NOTOP 2 /* don't spell check toplevel text */ +/* avoid #ifdefs for when b_spell is not available */ +#ifdef FEAT_SYN_HL +# define B_SPELL(buf) ((buf)->b_spell) +#else +# define B_SPELL(buf) (0) +#endif + /* * buffer: structure that holds information about one file @@ -1407,8 +1414,19 @@ struct file_buffer int b_may_swap; int b_did_warn; /* Set to 1 if user has been warned on first change of a read-only file */ - int b_help; /* buffer for help file (when set b_p_bt is - "help") */ + + /* Two special kinds of buffers: + * help buffer - used for help files, won't use a swap file. + * spell buffer - used for spell info, never displayed and doesn't have a + * file name. + */ + int b_help; /* TRUE for help file buffer (when set b_p_bt + is "help") */ +#ifdef FEAT_SYN_HL + int b_spell; /* TRUE for a spell file buffer, most fields + are not used! Use the B_SPELL macro to + access b_spell without #ifdef. */ +#endif #ifndef SHORT_FNAME int b_shortname; /* this file has an 8.3 file name */ diff --git a/src/testdir/test58.ok b/src/testdir/test58.ok index 75caa7e66..b7ed09466 100644 --- a/src/testdir/test58.ok +++ b/src/testdir/test58.ok @@ -20,11 +20,11 @@ uk wrong ------- bad -['put', 'OK', 'uk'] +['put', 'uk', 'OK'] inputs ['input', 'puts', 'outputs'] comment -['Comment'] +['Comment', 'outtest', 'the end'] ok ['OK', 'uk', 'put'] Ok @@ -34,7 +34,7 @@ test déôl ['deol', 'déôr', 'test'] end -['put', 'OK', 'test'] +['put', 'uk', 'test'] the ['put', 'uk', 'test'] gebletegek @@ -141,7 +141,7 @@ bad wordutilize ['word utilize', 'wordutils', 'wordutil'] pro -['bork', 'end', 'word'] +['bork', 'word', 'end'] borkborkborkborkborkbork ['bork borkborkborkborkbork', 'borkbork borkborkborkbork', 'borkborkbork borkborkbork'] tomatotomatotomato @@ -185,7 +185,7 @@ probarbirk middle [] startmiddle -['startmiddleend'] +['startmiddleend', 'startmiddlebar'] middleend [] endstart @@ -217,7 +217,7 @@ probarbirk middle [] leadmiddle -['leadmiddleend'] +['leadmiddleend', 'leadmiddlebar'] middleend [] endlead @@ -249,7 +249,7 @@ probarmaat middle [] leadmiddle -[] +['leadmiddlebar'] middletail [] taillead diff --git a/src/testdir/test59.ok b/src/testdir/test59.ok index 9c49be4cb..29c9696e8 100644 --- a/src/testdir/test59.ok +++ b/src/testdir/test59.ok @@ -20,11 +20,11 @@ uk wrong ------- bad -['put', 'OK', 'uk'] +['put', 'uk', 'OK'] inputs ['input', 'puts', 'outputs'] comment -['Comment'] +['Comment', 'outtest', 'the end'] ok ['OK', 'uk', 'put'] Ok @@ -34,7 +34,7 @@ test déôl ['deol', 'déôr', 'test'] end -['put', 'OK', 'test'] +['put', 'uk', 'test'] the ['put', 'uk', 'test'] gebletegek @@ -141,7 +141,7 @@ bad wordutilize ['word utilize', 'wordutils', 'wordutil'] pro -['bork', 'end', 'word'] +['bork', 'word', 'end'] borkborkborkborkborkbork ['bork borkborkborkborkbork', 'borkbork borkborkborkbork', 'borkborkbork borkborkbork'] tomatotomatotomato @@ -185,7 +185,7 @@ probarbirk middle [] startmiddle -['startmiddleend'] +['startmiddleend', 'startmiddlebar'] middleend [] endstart @@ -217,7 +217,7 @@ probarbirk middle [] leadmiddle -['leadmiddleend'] +['leadmiddleend', 'leadmiddlebar'] middleend [] endlead @@ -249,7 +249,7 @@ probarmaat middle [] leadmiddle -[] +['leadmiddlebar'] middletail [] taillead diff --git a/src/testdir/test60.in b/src/testdir/test60.in index 9899a94eb..2c414b38e 100644 --- a/src/testdir/test60.in +++ b/src/testdir/test60.in @@ -51,6 +51,10 @@ endfunction let test_cases += [['&textwidth', 1]] " Existing and working option (short form) let test_cases += [['&tw', 1]] + " Global option + let test_cases += [['&g:errorformat', 1]] + " Local option + let test_cases += [['&l:errorformat', 1]] " Negative form of existing and working option (long form) let test_cases += [['&nojoinspaces', 0]] " Negative form of existing and working option (short form) @@ -212,6 +216,26 @@ endfunction echo "FAILED" endif + " Existing local curly-brace variable + let str = "local" + let curly_{str}_var = 1 + echo 'curly_' . str . '_var: 1' + if exists('curly_{str}_var') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing local curly-brace variable + unlet curly_{str}_var + echo 'curly_' . str . '_var: 0' + if !exists('curly_{str}_var') + echo "OK" + else + echo "FAILED" + endif + + " Existing global variable let g:global_var = 1 echo 'g:global_var: 1' @@ -230,29 +254,46 @@ endfunction echo "FAILED" endif - " Existing local curly-brace variable - let curly_local_var = 1 - let str = "local" - echo 'curly_{str}_var: 1' - if exists('curly_{str}_var') + " Existing global list + let g:global_list = ["blue", "orange"] + echo 'g:global_list: 1' + if exists('g:global_list') echo "OK" else echo "FAILED" endif - " Non-existing local curly-brace variable - unlet curly_local_var - echo 'curly_{str}_var: 0' - if !exists('curly_{str}_var') + " Non-existing global list + unlet g:global_list + echo 'g:global_list: 0' + if !exists('g:global_list') + echo "OK" + else + echo "FAILED" + endif + + " Existing global dictionary + let g:global_dict = {"xcord":100, "ycord":2} + echo 'g:global_dict: 1' + if exists('g:global_dict') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing global dictionary + unlet g:global_dict + echo 'g:global_dict: 0' + if !exists('g:global_dict') echo "OK" else echo "FAILED" endif " Existing global curly-brace variable - let g:curly_global_var = 1 let str = "global" - echo 'g:curly_{str}_var: 1' + let g:curly_{str}_var = 1 + echo 'g:curly_' . str . '_var: 1' if exists('g:curly_{str}_var') echo "OK" else @@ -260,17 +301,212 @@ endfunction endif " Non-existing global curly-brace variable - unlet g:curly_global_var - echo 'g:curly_{str}_var: 0' + unlet g:curly_{str}_var + echo 'g:curly_' . str . '_var: 0' if !exists('g:curly_{str}_var') echo "OK" else echo "FAILED" endif + " Existing window variable + echo 'w:window_var: 1' + let w:window_var = 1 + if exists('w:window_var') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing window variable + unlet w:window_var + echo 'w:window_var: 0' + if !exists('w:window_var') + echo "OK" + else + echo "FAILED" + endif + + " Existing window list + let w:window_list = ["blue", "orange"] + echo 'w:window_list: 1' + if exists('w:window_list') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing window list + unlet w:window_list + echo 'w:window_list: 0' + if !exists('w:window_list') + echo "OK" + else + echo "FAILED" + endif + + " Existing window dictionary + let w:window_dict = {"xcord":100, "ycord":2} + echo 'w:window_dict: 1' + if exists('w:window_dict') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing window dictionary + unlet w:window_dict + echo 'w:window_dict: 0' + if !exists('w:window_dict') + echo "OK" + else + echo "FAILED" + endif + + " Existing window curly-brace variable + let str = "window" + let w:curly_{str}_var = 1 + echo 'w:curly_' . str . '_var: 1' + if exists('w:curly_{str}_var') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing window curly-brace variable + unlet w:curly_{str}_var + echo 'w:curly_' . str . '_var: 0' + if !exists('w:curly_{str}_var') + echo "OK" + else + echo "FAILED" + endif + + " Existing buffer variable + echo 'b:buffer_var: 1' + let b:buffer_var = 1 + if exists('b:buffer_var') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing buffer variable + unlet b:buffer_var + echo 'b:buffer_var: 0' + if !exists('b:buffer_var') + echo "OK" + else + echo "FAILED" + endif + + " Existing buffer list + let b:buffer_list = ["blue", "orange"] + echo 'b:buffer_list: 1' + if exists('b:buffer_list') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing buffer list + unlet b:buffer_list + echo 'b:buffer_list: 0' + if !exists('b:buffer_list') + echo "OK" + else + echo "FAILED" + endif + + " Existing buffer dictionary + let b:buffer_dict = {"xcord":100, "ycord":2} + echo 'b:buffer_dict: 1' + if exists('b:buffer_dict') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing buffer dictionary + unlet b:buffer_dict + echo 'b:buffer_dict: 0' + if !exists('b:buffer_dict') + echo "OK" + else + echo "FAILED" + endif + + " Existing buffer curly-brace variable + let str = "buffer" + let b:curly_{str}_var = 1 + echo 'b:curly_' . str . '_var: 1' + if exists('b:curly_{str}_var') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing buffer curly-brace variable + unlet b:curly_{str}_var + echo 'b:curly_' . str . '_var: 0' + if !exists('b:curly_{str}_var') + echo "OK" + else + echo "FAILED" + endif + " Script-local tests source test60.vim + " Existing Vim internal variable + echo 'v:version: 1' + if exists('v:version') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing Vim internal variable + echo 'v:non_exists_var: 0' + if !exists('v:non_exists_var') + echo "OK" + else + echo "FAILED" + endif + + " Function arguments + function TestFuncArg(func_arg, ...) + echo 'a:func_arg: 1' + if exists('a:func_arg') + echo "OK" + else + echo "FAILED" + endif + + echo 'a:non_exists_arg: 0' + if !exists('a:non_exists_arg') + echo "OK" + else + echo "FAILED" + endif + + echo 'a:1: 1' + if exists('a:1') + echo "OK" + else + echo "FAILED" + endif + + echo 'a:2: 0' + if !exists('a:2') + echo "OK" + else + echo "FAILED" + endif + endfunction + + call TestFuncArg("arg1", "arg2") + redir END endfunction :call TestExists() diff --git a/src/testdir/test60.ok b/src/testdir/test60.ok index fe6c4b765..46a72e4d2 100644 --- a/src/testdir/test60.ok +++ b/src/testdir/test60.ok @@ -33,6 +33,10 @@ OK OK &tw: 1 OK +&g:errorformat: 1 +OK +&l:errorformat: 1 +OK &nojoinspaces: 0 OK &nojs: 0 @@ -85,27 +89,87 @@ local_dict: 1 OK local_dict: 0 OK +curly_local_var: 1 +OK +curly_local_var: 0 +OK g:global_var: 1 OK g:global_var: 0 OK -curly_{str}_var: 1 +g:global_list: 1 +OK +g:global_list: 0 +OK +g:global_dict: 1 +OK +g:global_dict: 0 +OK +g:curly_global_var: 1 +OK +g:curly_global_var: 0 +OK +w:window_var: 1 +OK +w:window_var: 0 +OK +w:window_list: 1 +OK +w:window_list: 0 +OK +w:window_dict: 1 +OK +w:window_dict: 0 +OK +w:curly_window_var: 1 OK -curly_{str}_var: 0 +w:curly_window_var: 0 OK -g:curly_{str}_var: 1 +b:buffer_var: 1 OK -g:curly_{str}_var: 0 +b:buffer_var: 0 +OK +b:buffer_list: 1 +OK +b:buffer_list: 0 +OK +b:buffer_dict: 1 +OK +b:buffer_dict: 0 +OK +b:curly_buffer_var: 1 +OK +b:curly_buffer_var: 0 OK s:script_var: 1 OK s:script_var: 0 OK -s:curly_{str}_var: 1 +s:script_list: 1 +OK +s:script_list: 0 +OK +s:script_dict: 1 OK -s:curly_{str}_var: 0 +s:script_dict: 0 +OK +s:curly_script_var: 1 +OK +s:curly_script_var: 0 OK *s:my_script_func: 1 OK *s:my_script_func: 0 OK +v:version: 1 +OK +v:non_exists_var: 0 +OK +a:func_arg: 1 +OK +a:non_exists_arg: 0 +OK +a:1: 1 +OK +a:2: 0 +OK diff --git a/src/testdir/test60.vim b/src/testdir/test60.vim new file mode 100644 index 000000000..48eea4279 --- /dev/null +++ b/src/testdir/test60.vim @@ -0,0 +1,97 @@ +" Vim script for exists() function test +" Script-local variables are checked here + +" Existing script-local variable +let s:script_var = 1 +echo 's:script_var: 1' +if exists('s:script_var') + echo "OK" +else + echo "FAILED" +endif + +" Non-existing script-local variable +unlet s:script_var +echo 's:script_var: 0' +if !exists('s:script_var') + echo "OK" +else + echo "FAILED" +endif + +" Existing script-local list +let s:script_list = ["blue", "orange"] +echo 's:script_list: 1' +if exists('s:script_list') + echo "OK" +else + echo "FAILED" +endif + +" Non-existing script-local list +unlet s:script_list +echo 's:script_list: 0' +if !exists('s:script_list') + echo "OK" +else + echo "FAILED" +endif + +" Existing script-local dictionary +let s:script_dict = {"xcord":100, "ycord":2} +echo 's:script_dict: 1' +if exists('s:script_dict') + echo "OK" +else + echo "FAILED" +endif + +" Non-existing script-local dictionary +unlet s:script_dict +echo 's:script_dict: 0' +if !exists('s:script_dict') + echo "OK" +else + echo "FAILED" +endif + +" Existing script curly-brace variable +let str = "script" +let s:curly_{str}_var = 1 +echo 's:curly_' . str . '_var: 1' +if exists('s:curly_{str}_var') + echo "OK" +else + echo "FAILED" +endif + +" Non-existing script-local curly-brace variable +unlet s:curly_{str}_var +echo 's:curly_' . str . '_var: 0' +if !exists('s:curly_{str}_var') + echo "OK" +else + echo "FAILED" +endif + +" Existing script-local function +function! s:my_script_func() +endfunction + +echo '*s:my_script_func: 1' +if exists('*s:my_script_func') + echo "OK" +else + echo "FAILED" +endif + +" Non-existing script-local function +delfunction s:my_script_func + +echo '*s:my_script_func: 0' +if !exists('*s:my_script_func') + echo "OK" +else + echo "FAILED" +endif + diff --git a/src/version.h b/src/version.h index 253102b52..447f4fa0b 100644 --- a/src/version.h +++ b/src/version.h @@ -36,5 +36,5 @@ #define VIM_VERSION_NODOT "vim70aa" #define VIM_VERSION_SHORT "7.0aa" #define VIM_VERSION_MEDIUM "7.0aa ALPHA" -#define VIM_VERSION_LONG "VIM - Vi IMproved 7.0aa ALPHA (2005 Dec 29)" -#define VIM_VERSION_LONG_DATE "VIM - Vi IMproved 7.0aa ALPHA (2005 Dec 29, compiled " +#define VIM_VERSION_LONG "VIM - Vi IMproved 7.0aa ALPHA (2006 Jan 12)" +#define VIM_VERSION_LONG_DATE "VIM - Vi IMproved 7.0aa ALPHA (2006 Jan 12, compiled " |