diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile | 3 | ||||
-rw-r--r-- | src/buffer.c | 6 | ||||
-rw-r--r-- | src/edit.c | 9 | ||||
-rw-r--r-- | src/eval.c | 21 | ||||
-rw-r--r-- | src/ex_cmds.c | 4 | ||||
-rw-r--r-- | src/ex_cmds.h | 6 | ||||
-rw-r--r-- | src/ex_docmd.c | 7 | ||||
-rw-r--r-- | src/ex_getln.c | 25 | ||||
-rw-r--r-- | src/feature.h | 9 | ||||
-rw-r--r-- | src/fileio.c | 2 | ||||
-rw-r--r-- | src/globals.h | 7 | ||||
-rw-r--r-- | src/gui_gtk_x11.c | 1 | ||||
-rw-r--r-- | src/hashtable.c | 25 | ||||
-rw-r--r-- | src/memline.c | 146 | ||||
-rw-r--r-- | src/message.c | 2 | ||||
-rw-r--r-- | src/misc1.c | 4 | ||||
-rw-r--r-- | src/misc2.c | 6 | ||||
-rw-r--r-- | src/po/it.po | 150 | ||||
-rw-r--r-- | src/proto/eval.pro | 1 | ||||
-rw-r--r-- | src/proto/hashtable.pro | 1 | ||||
-rw-r--r-- | src/proto/memline.pro | 61 | ||||
-rw-r--r-- | src/proto/spell.pro | 4 | ||||
-rw-r--r-- | src/quickfix.c | 40 | ||||
-rw-r--r-- | src/screen.c | 11 | ||||
-rw-r--r-- | src/spell.c | 4874 | ||||
-rw-r--r-- | src/structs.h | 22 | ||||
-rw-r--r-- | src/testdir/test58.ok | 14 | ||||
-rw-r--r-- | src/testdir/test59.ok | 14 | ||||
-rw-r--r-- | src/testdir/test60.in | 262 | ||||
-rw-r--r-- | src/testdir/test60.ok | 76 | ||||
-rw-r--r-- | src/testdir/test60.vim | 97 | ||||
-rw-r--r-- | src/version.h | 4 |
32 files changed, 4276 insertions, 1638 deletions
diff --git a/src/Makefile b/src/Makefile index 8dd1be3e6..810dd7b78 100644 --- a/src/Makefile +++ b/src/Makefile @@ -532,9 +532,12 @@ LINT_OPTIONS = -beprxzF # PROFILING - Uncomment the next two lines to do profiling with gcc and gprof. # Might not work with GUI or Perl. +# For unknown reasons adding "-lc" fixes a linking problem with GCC. That's +# probably a bug in the "-pg" implementation. # Need to recompile everything after changing this: "make clean" "make". #PROFILE_CFLAGS = -pg -g #PROFILE_LIBS = -pg +#PROFILE_LIBS = -pg -lc # MEMORY LEAK DETECTION # Requires installing the ccmalloc library. diff --git a/src/buffer.c b/src/buffer.c index f252489c2..119011791 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -82,7 +82,7 @@ open_buffer(read_stdin, eap) && (curbuf->b_flags & BF_NEVERLOADED)) curbuf->b_p_ro = TRUE; - if (ml_open() == FAIL) + if (ml_open(curbuf) == FAIL) { /* * There MUST be a memfile, otherwise we can't do anything @@ -1505,6 +1505,8 @@ buflist_new(ffname, sfname, lnum, flags) * buffer. Otherwise: Need to allocate a new buffer structure. * * This is the ONLY place where a new buffer structure is allocated! + * (A spell file buffer is allocated in spell.c, but that's not a normal + * buffer.) */ buf = NULL; if ((flags & BLN_CURBUF) @@ -5191,7 +5193,7 @@ buf_contents_changed(buf) curwin->w_buffer = newbuf; #endif - if (ml_open() == OK + if (ml_open(curbuf) == OK && readfile(buf->b_ffname, buf->b_fname, (linenr_T)0, (linenr_T)0, (linenr_T)MAXLNUM, &ea, READ_NEW | READ_DUMMY) == OK) diff --git a/src/edit.c b/src/edit.c index 23dd57eb9..c037d1f96 100644 --- a/src/edit.c +++ b/src/edit.c @@ -987,6 +987,15 @@ doESCkey: case K_IGNORE: /* Something mapped to nothing */ break; +#ifdef FEAT_GUI_W32 + /* On Win32 ignore <M-F4>, we get it when closing the window was + * cancelled. */ + case K_F4: + if (mod_mask != MOD_MASK_ALT) + goto normalchar; + break; +#endif + #ifdef FEAT_GUI case K_VER_SCROLLBAR: ins_scroll(); diff --git a/src/eval.c b/src/eval.c index eef865a4b..482359331 100644 --- a/src/eval.c +++ b/src/eval.c @@ -654,7 +654,6 @@ static int get_var_tv __ARGS((char_u *name, int len, typval_T *rettv, int verbos static int handle_subscript __ARGS((char_u **arg, typval_T *rettv, int evaluate, int verbose)); static typval_T *alloc_tv __ARGS((void)); static typval_T *alloc_string_tv __ARGS((char_u *string)); -static void free_tv __ARGS((typval_T *varp)); static void init_tv __ARGS((typval_T *varp)); static long get_tv_number __ARGS((typval_T *varp)); static linenr_T get_tv_lnum __ARGS((typval_T *argvars)); @@ -1323,7 +1322,9 @@ get_spellword(list, pp) #endif /* - * Top level evaluation function, + * Top level evaluation function. + * Returns an allocated typval_T with the result. + * Returns NULL when there is an error. */ typval_T * eval_expr(arg, nextcmd) @@ -1333,13 +1334,10 @@ eval_expr(arg, nextcmd) typval_T *tv; tv = (typval_T *)alloc(sizeof(typval_T)); - if (!tv) - return NULL; - - if (eval0(arg, tv, nextcmd, TRUE) == FAIL) + if (tv != NULL && eval0(arg, tv, nextcmd, TRUE) == FAIL) { vim_free(tv); - return NULL; + tv = NULL; } return tv; @@ -13930,7 +13928,7 @@ f_spellbadword(argvars, rettv) /* Check the argument for spelling. */ while (*str != NUL) { - len = spell_check(curwin, str, &attr, &capcol); + len = spell_check(curwin, str, &attr, &capcol, FALSE); if (attr != HLF_COUNT) { word = str; @@ -13996,7 +13994,7 @@ f_spellsuggest(argvars, rettv) else maxcount = 25; - spell_suggest_list(&ga, str, maxcount, need_capital); + spell_suggest_list(&ga, str, maxcount, need_capital, FALSE); for (i = 0; i < ga.ga_len; ++i) { @@ -15904,7 +15902,7 @@ alloc_string_tv(s) /* * Free the memory for a variable type-value. */ - static void + void free_tv(varp) typval_T *varp; { @@ -16910,7 +16908,10 @@ ex_execute(eap) if (ret != FAIL && ga.ga_data != NULL) { if (eap->cmdidx == CMD_echomsg) + { MSG_ATTR(ga.ga_data, echo_attr); + out_flush(); + } else if (eap->cmdidx == CMD_echoerr) { /* We don't want to abort following commands, restore did_emsg. */ diff --git a/src/ex_cmds.c b/src/ex_cmds.c index 006733c4d..c4fd0c4bd 100644 --- a/src/ex_cmds.c +++ b/src/ex_cmds.c @@ -111,9 +111,9 @@ do_ascii(eap) IObuff[len++] = ' '; IObuff[len++] = '<'; if (utf_iscomposing(c) -#ifdef USE_GUI +# ifdef USE_GUI && !gui.in_use -#endif +# endif ) IObuff[len++] = ' '; /* draw composing char on top of a space */ len += (*mb_char2bytes)(c, IObuff + len); diff --git a/src/ex_cmds.h b/src/ex_cmds.h index 4c4ab6ff0..a15ed464d 100644 --- a/src/ex_cmds.h +++ b/src/ex_cmds.h @@ -187,6 +187,8 @@ EX(CMD_cabbrev, "cabbrev", ex_abbreviate, EXTRA|TRLBAR|NOTRLCOM|USECTRLV|CMDWIN), EX(CMD_cabclear, "cabclear", ex_abclear, EXTRA|TRLBAR|CMDWIN), +EX(CMD_caddexpr, "caddexpr", ex_cexpr, + NEEDARG|WORD1|NOTRLCOM|TRLBAR|BANG), EX(CMD_caddfile, "caddfile", ex_cfile, TRLBAR|FILE1), EX(CMD_call, "call", ex_call, @@ -394,7 +396,7 @@ EX(CMD_for, "for", ex_while, EX(CMD_function, "function", ex_function, EXTRA|BANG|CMDWIN), EX(CMD_global, "global", ex_global, - RANGE|WHOLEFOLD|BANG|EXTRA|DFLALL|CMDWIN), + RANGE|WHOLEFOLD|BANG|EXTRA|DFLALL|SBOXOK|CMDWIN), EX(CMD_goto, "goto", ex_goto, RANGE|NOTADR|COUNT|TRLBAR|SBOXOK|CMDWIN), EX(CMD_grep, "grep", ex_make, @@ -768,7 +770,7 @@ EX(CMD_spellgood, "spellgood", ex_spell, EX(CMD_spellwrong, "spellwrong", ex_spell, BANG|RANGE|NOTADR|NEEDARG|EXTRA|TRLBAR), EX(CMD_spelldump, "spelldump", ex_spelldump, - TRLBAR), + BANG|TRLBAR), EX(CMD_spellrepall, "spellrepall", ex_spellrepall, TRLBAR), EX(CMD_sprevious, "sprevious", ex_previous, diff --git a/src/ex_docmd.c b/src/ex_docmd.c index 1eb2bb9a5..e15c6c5d7 100644 --- a/src/ex_docmd.c +++ b/src/ex_docmd.c @@ -8234,8 +8234,11 @@ ex_mkrc(eap) failed = TRUE; if (put_line(fd, "doautoall SessionLoadPost") == FAIL) failed = TRUE; - if (put_line(fd, "unlet SessionLoad") == FAIL) - failed = TRUE; + if (eap->cmdidx == CMD_mksession) + { + if (put_line(fd, "unlet SessionLoad") == FAIL) + failed = TRUE; + } } #endif if (put_line(fd, "\" vim: set ft=vim :") == FAIL) diff --git a/src/ex_getln.c b/src/ex_getln.c index 335f2a411..95109534a 100644 --- a/src/ex_getln.c +++ b/src/ex_getln.c @@ -645,8 +645,8 @@ getcmdline(firstc, count, indent) /* * Replace the command line with the result of an expression. - * Need to save the current command line, to be able to enter - * a new one... + * Need to save and restore the current command line, to be + * able to enter a new one... */ if (ccline.cmdpos == ccline.cmdlen) new_cmdpos = 99999; /* keep it at the end */ @@ -658,8 +658,17 @@ getcmdline(firstc, count, indent) restore_cmdline(&save_ccline); if (c == '=') { + /* Need to save and restore ccline. And go into the + * sandbox to avoid nasty things like going to another + * buffer when evaluating an expression. */ save_cmdline(&save_ccline); +#ifdef HAVE_SANDBOX + ++sandbox; +#endif p = get_expr_line(); +#ifdef HAVE_SANDBOX + --sandbox; +#endif restore_cmdline(&save_ccline); if (p != NULL && realloc_cmdbuff((int)STRLEN(p) + 1) == OK) @@ -1192,6 +1201,18 @@ getcmdline(firstc, count, indent) case K_IGNORE: goto cmdline_not_changed; /* Ignore mouse */ +#ifdef FEAT_GUI_W32 + /* On Win32 ignore <M-F4>, we get it when closing the window was + * cancelled. */ + case K_F4: + if (mod_mask == MOD_MASK_ALT) + { + redrawcmd(); /* somehow the cmdline is cleared */ + goto cmdline_not_changed; + } + break; +#endif + #ifdef FEAT_MOUSE case K_MIDDLEDRAG: case K_MIDDLERELEASE: diff --git a/src/feature.h b/src/feature.h index 69a784e44..efe791567 100644 --- a/src/feature.h +++ b/src/feature.h @@ -127,6 +127,15 @@ #endif /* + * Message history is fixed at 100 message, 20 for the tiny version. + */ +#ifdef FEAT_SMALL +# define MAX_MSG_HIST_LEN 100 +#else +# define MAX_MSG_HIST_LEN 20 +#endif + +/* * +jumplist Jumplist, CTRL-O and CTRL-I commands. */ #ifdef FEAT_SMALL diff --git a/src/fileio.c b/src/fileio.c index e05fb54db..c5c926299 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -6405,7 +6405,7 @@ buf_reload(buf) /* Open the memline. */ curbuf = savebuf; curwin->w_buffer = savebuf; - saved = ml_open(); + saved = ml_open(curbuf); curbuf = buf; curwin->w_buffer = buf; } diff --git a/src/globals.h b/src/globals.h index 8c2110569..c22466c06 100644 --- a/src/globals.h +++ b/src/globals.h @@ -528,7 +528,12 @@ EXTERN int starting INIT(= NO_SCREEN); /* first NO_SCREEN, then NO_BUFFERS and then * set to 0 when starting up finished */ EXTERN int exiting INIT(= FALSE); - /* TRUE when abandoning Vim */ + /* TRUE when planning to exit Vim. Might + * still keep on running if there is a changed + * buffer. */ +EXTERN int really_exiting INIT(= FALSE); + /* TRUE when we are sure to exit, e.g., after + * a deadly signal */ EXTERN int full_screen INIT(= FALSE); /* TRUE when doing full-screen output * otherwise only writing some messages */ diff --git a/src/gui_gtk_x11.c b/src/gui_gtk_x11.c index 1502e0c1c..84c5fe406 100644 --- a/src/gui_gtk_x11.c +++ b/src/gui_gtk_x11.c @@ -2328,6 +2328,7 @@ sm_client_die(GnomeClient *client, gpointer data) vim_strncpy(IObuff, _("Vim: Received \"die\" request from session manager\n"), IOSIZE - 1); + deadly_exit = TRUE; preserve_exit(); } diff --git a/src/hashtable.c b/src/hashtable.c index 904be4704..066f0f3cb 100644 --- a/src/hashtable.c +++ b/src/hashtable.c @@ -86,6 +86,31 @@ hash_clear(ht) } /* + * Free the array of a hash table and all the keys it contains. The keys must + * have been allocated. "off" is the offset from the start of the allocate + * memory to the location of the key (it's always positive). + */ + void +hash_clear_all(ht, off) + hashtab_T *ht; + int off; +{ + int todo; + hashitem_T *hi; + + todo = ht->ht_used; + for (hi = ht->ht_array; todo > 0; ++hi) + { + if (!HASHITEM_EMPTY(hi)) + { + vim_free(hi->hi_key - off); + --todo; + } + } + hash_clear(ht); +} + +/* * Find "key" in hashtable "ht". "key" must not be NULL. * Always returns a pointer to a hashitem. If the item was not found then * HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key diff --git a/src/memline.c b/src/memline.c index 3eba98d59..9abf15523 100644 --- a/src/memline.c +++ b/src/memline.c @@ -13,10 +13,11 @@ /* * memline.c: Contains the functions for appending, deleting and changing the - * text lines. The memfile functions are used to store the information in blocks - * of memory, backed up by a file. The structure of the information is a tree. - * The root of the tree is a pointer block. The leaves of the tree are data - * blocks. In between may be several layers of pointer blocks, forming branches. + * text lines. The memfile functions are used to store the information in + * blocks of memory, backed up by a file. The structure of the information is + * a tree. The root of the tree is a pointer block. The leaves of the tree + * are data blocks. In between may be several layers of pointer blocks, + * forming branches. * * Three types of blocks are used: * - Block nr 0 contains information for recovery @@ -169,7 +170,7 @@ struct block0 }; /* - * Note: b0_fname and b0_flags are put at the end of the file name. For very + * Note: b0_dirty and b0_flags are put at the end of the file name. For very * long file names in older versions of Vim they are invalid. * The 'fileencoding' comes before b0_flags, with a NUL in front. But only * when there is room, for very long file names it's omitted. @@ -245,12 +246,13 @@ static void ml_updatechunk __ARGS((buf_T *buf, long line, long len, int updtype) #endif /* - * open a new memline for 'curbuf' + * Open a new memline for "buf". * - * return FAIL for failure, OK otherwise + * Return FAIL for failure, OK otherwise. */ int -ml_open() +ml_open(buf) + buf_T *buf; { memfile_T *mfp; bhdr_T *hp = NULL; @@ -258,36 +260,36 @@ ml_open() PTR_BL *pp; DATA_BL *dp; -/* - * init fields in memline struct - */ - curbuf->b_ml.ml_stack_size = 0; /* no stack yet */ - curbuf->b_ml.ml_stack = NULL; /* no stack yet */ - curbuf->b_ml.ml_stack_top = 0; /* nothing in the stack */ - curbuf->b_ml.ml_locked = NULL; /* no cached block */ - curbuf->b_ml.ml_line_lnum = 0; /* no cached line */ + /* + * init fields in memline struct + */ + buf->b_ml.ml_stack_size = 0; /* no stack yet */ + buf->b_ml.ml_stack = NULL; /* no stack yet */ + buf->b_ml.ml_stack_top = 0; /* nothing in the stack */ + buf->b_ml.ml_locked = NULL; /* no cached block */ + buf->b_ml.ml_line_lnum = 0; /* no cached line */ #ifdef FEAT_BYTEOFF - curbuf->b_ml.ml_chunksize = NULL; + buf->b_ml.ml_chunksize = NULL; #endif -/* - * When 'updatecount' is non-zero, flag that a swap file may be opened later. - */ - if (p_uc && curbuf->b_p_swf) - curbuf->b_may_swap = TRUE; + /* + * When 'updatecount' is non-zero swap file may be opened later. + */ + if (p_uc && buf->b_p_swf) + buf->b_may_swap = TRUE; else - curbuf->b_may_swap = FALSE; + buf->b_may_swap = FALSE; -/* - * Open the memfile. No swap file is created yet. - */ + /* + * Open the memfile. No swap file is created yet. + */ mfp = mf_open(NULL, 0); if (mfp == NULL) goto error; - curbuf->b_ml.ml_mfp = mfp; - curbuf->b_ml.ml_flags = ML_EMPTY; - curbuf->b_ml.ml_line_count = 1; + buf->b_ml.ml_mfp = mfp; + buf->b_ml.ml_flags = ML_EMPTY; + buf->b_ml.ml_line_count = 1; #ifdef FEAT_LINEBREAK curwin->w_nrwidth_line_count = 0; #endif @@ -296,7 +298,7 @@ ml_open() /* for 16 bit MS-DOS create a swapfile now, because we run out of * memory very quickly */ if (p_uc != 0) - ml_open_file(curbuf); + ml_open_file(buf); #endif /* @@ -313,36 +315,40 @@ ml_open() b0p->b0_id[0] = BLOCK0_ID0; b0p->b0_id[1] = BLOCK0_ID1; - b0p->b0_dirty = curbuf->b_changed ? B0_DIRTY : 0; - b0p->b0_flags = get_fileformat(curbuf) + 1; b0p->b0_magic_long = (long)B0_MAGIC_LONG; b0p->b0_magic_int = (int)B0_MAGIC_INT; b0p->b0_magic_short = (short)B0_MAGIC_SHORT; b0p->b0_magic_char = B0_MAGIC_CHAR; - STRNCPY(b0p->b0_version, "VIM ", 4); STRNCPY(b0p->b0_version + 4, Version, 6); - set_b0_fname(b0p, curbuf); long_to_char((long)mfp->mf_page_size, b0p->b0_page_size); - (void)get_user_name(b0p->b0_uname, B0_UNAME_SIZE); - b0p->b0_uname[B0_UNAME_SIZE - 1] = NUL; - mch_get_host_name(b0p->b0_hname, B0_HNAME_SIZE); - b0p->b0_hname[B0_HNAME_SIZE - 1] = NUL; - long_to_char(mch_get_pid(), b0p->b0_pid); + + if (!B_SPELL(buf)) + { + b0p->b0_dirty = buf->b_changed ? B0_DIRTY : 0; + b0p->b0_flags = get_fileformat(buf) + 1; + set_b0_fname(b0p, buf); + (void)get_user_name(b0p->b0_uname, B0_UNAME_SIZE); + b0p->b0_uname[B0_UNAME_SIZE - 1] = NUL; + mch_get_host_name(b0p->b0_hname, B0_HNAME_SIZE); + b0p->b0_hname[B0_HNAME_SIZE - 1] = NUL; + long_to_char(mch_get_pid(), b0p->b0_pid); + } /* * Always sync block number 0 to disk, so we can check the file name in - * the swap file in findswapname(). Don't do this for help files though. + * the swap file in findswapname(). Don't do this for help files though + * and spell buffer though. * Only works when there's a swapfile, otherwise it's done when the file * is created. */ mf_put(mfp, hp, TRUE, FALSE); - if (!curbuf->b_help) + if (!buf->b_help && !B_SPELL(buf)) (void)mf_sync(mfp, 0); -/* - * fill in root pointer block and write page 1 - */ + /* + * Fill in root pointer block and write page 1. + */ if ((hp = ml_new_ptr(mfp)) == NULL) goto error; if (hp->bh_bnum != 1) @@ -358,9 +364,9 @@ ml_open() pp->pb_pointer[0].pe_line_count = 1; /* line count after insertion */ mf_put(mfp, hp, TRUE, FALSE); -/* - * allocate first data block and create an empty line 1. - */ + /* + * Allocate first data block and create an empty line 1. + */ if ((hp = ml_new_data(mfp, FALSE, 1)) == NULL) goto error; if (hp->bh_bnum != 2) @@ -384,7 +390,7 @@ error: mf_put(mfp, hp, FALSE, FALSE); mf_close(mfp, TRUE); /* will also free(mfp->mf_fname) */ } - curbuf->b_ml.ml_mfp = NULL; + buf->b_ml.ml_mfp = NULL; return FAIL; } @@ -518,6 +524,18 @@ ml_open_file(buf) if (mfp == NULL || mfp->mf_fd >= 0 || !buf->b_p_swf) return; /* nothing to do */ +#ifdef FEAT_SYN_HL + /* For a spell buffer use a temp file name. */ + if (buf->b_spell) + { + fname = vim_tempname('s'); + if (fname != NULL) + (void)mf_open_file(mfp, fname); /* consumes fname! */ + buf->b_may_swap = FALSE; + return; + } +#endif + /* * Try all directories in 'directory' option. */ @@ -886,7 +904,7 @@ ml_recover() goto theend; /* out of memory */ /* When called from main() still need to initialize storage structure */ - if (called_from_main && ml_open() == FAIL) + if (called_from_main && ml_open(curbuf) == FAIL) getout(1); /* @@ -2100,6 +2118,28 @@ ml_append(lnum, line, len, newfile) return ml_append_int(curbuf, lnum, line, len, newfile, FALSE); } +#if defined(FEAT_SYN_HL) || defined(PROTO) +/* + * Like ml_append() but for an arbitrary buffer. The buffer must already have + * a memline. + */ + int +ml_append_buf(buf, lnum, line, len, newfile) + buf_T *buf; + linenr_T lnum; /* append after this line (can be 0) */ + char_u *line; /* text of the new line */ + colnr_T len; /* length of new line, including NUL, or 0 */ + int newfile; /* flag, see above */ +{ + if (buf->b_ml.ml_mfp == NULL) + return FAIL; + + if (buf->b_ml.ml_line_lnum != 0) + ml_flush_line(buf); + return ml_append_int(buf, lnum, line, len, newfile, FALSE); +} +#endif + static int ml_append_int(buf, lnum, line, len, newfile, mark) buf_T *buf; @@ -2599,7 +2639,7 @@ ml_append_int(buf, lnum, line, len, newfile, mark) } /* - * replace line lnum, with buffering, in current buffer + * Replace line lnum, with buffering, in current buffer. * * If copy is TRUE, make a copy of the line, otherwise the line has been * copied to allocated memory already. @@ -2643,7 +2683,7 @@ ml_replace(lnum, line, copy) } /* - * delete line 'lnum' + * Delete line 'lnum' in the current buffer. * * Check: The caller of this function should probably also call * deleted_lines() after this. @@ -4114,7 +4154,9 @@ findswapname(buf, dirp, old_fname) #endif { MSG_PUTS("\n"); - need_wait_return = TRUE; /* call wait_return later */ + if (msg_silent == 0) + /* call wait_return() later */ + need_wait_return = TRUE; } #ifdef CREATE_DUMMY_FILE diff --git a/src/message.c b/src/message.c index 8ea49ae20..465b25e7e 100644 --- a/src/message.c +++ b/src/message.c @@ -752,7 +752,7 @@ add_msg_hist(s, len, attr) return; /* Don't let the message history get too big */ - while (msg_hist_len > 20) + while (msg_hist_len > MAX_MSG_HIST_LEN) (void)delete_first_msg(); /* allocate an entry and add the message at the end of the history */ diff --git a/src/misc1.c b/src/misc1.c index a5a1805c2..08cd20c91 100644 --- a/src/misc1.c +++ b/src/misc1.c @@ -7952,6 +7952,10 @@ preserve_exit() prepare_to_exit(); + /* Setting this will prevent free() calls. That avoids calling free() + * recursively when free() was invoked with a bad pointer. */ + really_exiting = TRUE; + out_str(IObuff); screen_start(); /* don't know where cursor is now */ out_flush(); diff --git a/src/misc2.c b/src/misc2.c index c3f2b3798..f5b13e762 100644 --- a/src/misc2.c +++ b/src/misc2.c @@ -1426,13 +1426,15 @@ copy_option_part(option, buf, maxlen, sep_chars) } /* - * replacement for free() that ignores NULL pointers + * Replacement for free() that ignores NULL pointers. + * Also skip free() when exiting for sure, this helps when we caught a deadly + * signal that was caused by a crash in free(). */ void vim_free(x) void *x; { - if (x != NULL) + if (x != NULL && !really_exiting) { #ifdef MEM_PROFILE mem_pre_free(&x); diff --git a/src/po/it.po b/src/po/it.po index ed25f47fc..dba546c95 100644 --- a/src/po/it.po +++ b/src/po/it.po @@ -12,9 +12,8 @@ msgid "" msgstr "" "Project-Id-Version: vim 7.0\n" -"Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2005-08-25 09:30+0200\n" -"PO-Revision-Date: 2005-08-29 21:30+0200\n" +"POT-Creation-Date: 2006-01-03 16:07+0100\n" +"PO-Revision-Date: 2006-01-06 13:50+0100\n" "Last-Translator: Vlad Sandrini <marco@sandrini.biz>\n" "Language-Team: Italian" " Antonio Colombo <azc10@yahoo.com>" @@ -22,6 +21,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=ISO_8859-1\n" "Content-Transfer-Encoding: 8-bit\n" +"Report-Msgid-Bugs-To: \n" msgid "E82: Cannot allocate any buffer, exiting..." msgstr "E82: Non riesco ad allocare alcun buffer, esco..." @@ -254,8 +254,8 @@ msgstr " Completamento linea comandi (^V^N^P)" msgid " User defined completion (^U^N^P)" msgstr " Completamento definito dall'utente (^U^N^P)" -msgid " Occult completion (^O^N^P)" -msgstr " Completamento nascosto (^O^N^P)" +msgid " Omni completion (^O^N^P)" +msgstr " Completamento globale (^O^N^P)" msgid " Spelling suggestion (^S^N^P)" msgstr " Suggerimento ortografico (^S^N^P)" @@ -267,10 +267,10 @@ msgid "Hit end of paragraph" msgstr "Giunto alla fine del paragrafo" msgid "'dictionary' option is empty" -msgstr "l'opzione 'dictionary' è vuota" +msgstr "l'opzione 'dictionary' non è impostata" msgid "'thesaurus' option is empty" -msgstr "l'opzione 'thesaurus' è vuota" +msgstr "l'opzione 'thesaurus' non è impostata" #, c-format msgid "Scanning dictionary: %s" @@ -1131,21 +1131,8 @@ msgstr "E177: Non si può specificare due volte il contatore" msgid "E178: Invalid default value for count" msgstr "E178: Valore predefinito del contatore non valido" -msgid "E179: argument required for complete" -msgstr "E179: argomento necessario per complete" - -#, c-format -msgid "E180: Invalid complete value: %s" -msgstr "E180: Valore %s non valido per 'complete'" - -msgid "E468: Completion argument only allowed for custom completion" -msgstr "" -"E468: Argomento di completamento permesso solo per completamento " -"personalizzato" - -msgid "E467: Custom completion requires a function argument" -msgstr "" -"E467: Il completamento personalizzato richiede un argomento di funzione" +msgid "E179: argument required for -complete" +msgstr "E179: argomento necessario per -complete" #, c-format msgid "E181: Invalid attribute: %s" @@ -1163,6 +1150,19 @@ msgid "E184: No such user-defined command: %s" msgstr "E184: Comando definito dall'utente %s inesistente" #, c-format +msgid "E180: Invalid complete value: %s" +msgstr "E180: Valore %s non valido per 'complete'" + +msgid "E468: Completion argument only allowed for custom completion" +msgstr "" +"E468: Argomento di completamento permesso solo per completamento " +"personalizzato" + +msgid "E467: Custom completion requires a function argument" +msgstr "" +"E467: Il completamento personalizzato richiede un argomento di funzione" + +#, c-format msgid "E185: Cannot find color scheme %s" msgstr "E185: Non riesco a trovare schema colore %s" @@ -1420,6 +1420,12 @@ msgstr "non è un file" msgid "[New File]" msgstr "[File nuovo]" +msgid "[New DIRECTORY]" +msgstr "[Nuova DIRECTORY]" + +msgid "[File too big]" +msgstr "[File troppo grande]" + msgid "[Permission Denied]" msgstr "[Tipo di accesso non consentito]" @@ -1469,8 +1475,9 @@ msgstr "[convertito]" msgid "[crypted]" msgstr "[cifrato]" -msgid "[CONVERSION ERROR]" -msgstr "[ERRORE DI CONVERSIONE]" +#, c-format +msgid "[CONVERSION ERROR in line %ld]" +msgstr "[ERRORE DI CONVERSIONE alla linea %ld]" #, c-format msgid "[ILLEGAL BYTE in line %ld]" @@ -3024,25 +3031,6 @@ msgstr "" msgid "--socketid <xid>\tOpen Vim inside another GTK widget" msgstr "--socketid <xid>\tApri Vim dentro un altro 'widget' GTK" -msgid "" -"\n" -"Arguments recognised by kvim (KDE version):\n" -msgstr "" -"\n" -"Argomenti accettati da kvim (versione KDE):\n" - -msgid "-black\t\tUse reverse video" -msgstr "-black\t\tUsa colori invertiti" - -msgid "-tip\t\t\tDisplay the tip dialog on startup" -msgstr "-tip\t\t\tMostra la finestra consigli all'avvio" - -msgid "-notip\t\tDisable the tip dialog" -msgstr "-notip\t\tDisabilita la finestra consigli" - -msgid "--display <display>\tRun vim on <display>" -msgstr "--display <schermo>\tEsegui vim su <schermo>" - msgid "-P <parent title>\tOpen Vim inside parent application" msgstr "-P <titolo padre>\tApri Vim in un'applicazione padre" @@ -3468,6 +3456,10 @@ msgstr "Dimensione 'stack' aumentata" msgid "E317: pointer block id wrong 2" msgstr "E317: ID blocco puntatori errato 2" +#, c-format +msgid "E773: Symlink loop for \"%s\"" +msgstr "E773: Collegamento ricorsivo per \"%s\"" + msgid "E325: ATTENTION" msgstr "E325: ATTENZIONE" @@ -3641,6 +3633,10 @@ msgstr "Interruzione: " msgid "Press ENTER or type command to continue" msgstr "Premi INVIO o un comando per proseguire" +#, c-format +msgid "%s line %ld" +msgstr "%s linea %ld" + msgid "-- More --" msgstr "-- Ancora --" @@ -3830,6 +3826,12 @@ msgstr "E658: Connessione NetBeans persa per il buffer %ld" msgid "E505: " msgstr "E505: " +msgid "E774: 'operatorfunc' is empty" +msgstr "E774: opzione 'operatorfunc' non impostata" + +msgid "E775: Eval feature not available" +msgstr "E775: Tipo di valorizzazione [eval] non disponibile" + msgid "Warning: terminal cannot highlight" msgstr "Attenzione: il terminale non è in grado di evidenziare" @@ -3843,7 +3845,7 @@ msgid "E352: Cannot erase folds with current 'foldmethod'" msgstr "E352: Non posso togliere piegature con il 'foldmethod' in uso" msgid "E664: changelist is empty" -msgstr "E664: lista modifiche vuota" +msgstr "E664: lista modifiche non impostata" msgid "E662: At start of changelist" msgstr "E662: All'inizio della lista modifiche" @@ -4225,14 +4227,6 @@ msgstr "Vim: Intercettato segnale fatale\n" msgid "Opening the X display took %ld msec" msgstr "Attivazione visualizzazione X ha richiesto %ld msec" -#. KDE sometimes produces X error that we want to ignore -msgid "" -"\n" -"Vim: Got X error but we continue...\n" -msgstr "" -"\n" -"Vim: Preso errore X ma andiamo avanti...\n" - msgid "" "\n" "Vim: Got X error\n" @@ -4562,12 +4556,6 @@ msgstr " SELEZIONA BLOCCO" msgid "recording" msgstr "registrazione" -msgid "search hit TOP, continuing at BOTTOM" -msgstr "raggiunta la CIMA nella ricerca, continuo dal FONDO" - -msgid "search hit BOTTOM, continuing at TOP" -msgstr "raggiunto il FONDO nella ricerca, continuo dalla CIMA" - #, c-format msgid "E383: Invalid search string: %s" msgstr "E383: Stringa di ricerca non valida: %s" @@ -4645,7 +4633,7 @@ msgid "Compressing word tree..." msgstr "Comprimo albero di parole..." msgid "E756: Spell checking is not enabled" -msgstr "E756: Il controllo ortografico non è abilitato" +msgstr "E756: Controllo ortografico non abilitato" #, c-format msgid "Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\"" @@ -4659,7 +4647,7 @@ msgid "E757: This does not look like a spell file" msgstr "E757: Questo non sembra un file ortografico" msgid "E771: Old spell file, needs to be updated" -msgstr "E771: File ortografico obsoleto, necessario aggiornamento" +msgstr "E771: File ortografico obsoleto, è necessario aggiornarlo" msgid "E772: Spell file is for newer version of Vim" msgstr "E772: Il file ortografico è per versioni di Vim più recenti" @@ -4712,14 +4700,26 @@ msgid "Wrong COMPOUNDSYLMAX value in %s line %d: %s" msgstr "Valore errato per COMPOUNDSYLMAX in %s linea %d: %s" #, c-format -msgid "Expected Y or N in %s line %d: %s" -msgstr "Y o N deve essere presente in %s linea %d: %s" +msgid "Different combining flag in continued affix block in %s line %d: %s" +msgstr "" +"Flag combinazione diverso in blocco affissi continuo in %s linea %d: %s" #, c-format msgid "Duplicate affix in %s line %d: %s" msgstr "Affisso duplicato in %s linea %d: %s" #, c-format +msgid "" +"Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND in %s line %d: %" +"s" +msgstr "" +"Affisso usato anche per BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND in %s linea %d: %s" + +#, c-format +msgid "Expected Y or N in %s line %d: %s" +msgstr "Y o N deve essere presente in %s linea %d: %s" + +#, c-format msgid "Broken condition in %s line %d: %s" msgstr "Condizione non rispettata in %s linea %d: %s" @@ -4747,13 +4747,13 @@ msgid "COMPOUNDSYLMAX used without SYLLABLE" msgstr "COMPOUNDSYLMAX usato senza SYLLABLE" msgid "Too many postponed prefixes" -msgstr "Troppi prefissi posposti" +msgstr "Troppi suffissi" msgid "Too many compound flags" msgstr "Troppi flag composti" msgid "Too many posponed prefixes and/or compound flags" -msgstr "Troppi prefissi posposti e/o flag composti" +msgstr "Troppi suffissi e/o flag composti" #, c-format msgid "Missing SOFO%s line in %s" @@ -4867,11 +4867,8 @@ msgstr "Fatto!" msgid "Estimated runtime memory use: %d bytes" msgstr "Uso stimato di memoria durante esecuzione: %d bytes" -msgid "E764: 'spellfile' is not set" -msgstr "E764: opzione 'spellfile' vuota" - #, c-format -msgid "E765: 'spellfile' does not have %ld enties" +msgid "E765: 'spellfile' does not have %ld entries" msgstr "E765: 'spellfile' non ha %ld elementi" msgid "E763: Word characters differ between spell files" @@ -5406,9 +5403,6 @@ msgstr "con GUI Cocoa." msgid "with (classic) GUI." msgstr "con GUI (classica)." -msgid "with KDE GUI." -msgstr "con GUI KDE." - msgid " Features included (+) or not (-):\n" msgstr " Opzioni incluse (+) o escluse (-):\n" @@ -5862,7 +5856,7 @@ msgid "E49: Invalid scroll size" msgstr "E49: Quantità di 'scroll' non valida" msgid "E91: 'shell' option is empty" -msgstr "E91: opzione 'shell' vuota" +msgstr "E91: opzione 'shell' non impostata" msgid "E255: Couldn't read in sign data!" msgstr "E255: Errore -- non sono riuscito a leggere i dati del 'sign'!" @@ -5871,7 +5865,7 @@ msgid "E72: Close error on swap file" msgstr "E72: Errore durante chiusura swap file" msgid "E73: tag stack empty" -msgstr "E73: tag stack vuota" +msgstr "E73: tag stack non impostata" msgid "E74: Command too complex" msgstr "E74: Comando troppo complesso" @@ -5933,3 +5927,13 @@ msgstr "E682: Espressione o delimitatore di ricerca non validi" msgid "E139: File is loaded in another buffer" msgstr "E139: File già caricato in un altro buffer" + +#, c-format +msgid "E764: Option '%s' is not set" +msgstr "E764: opzione '%s' non impostata" + +msgid "search hit TOP, continuing at BOTTOM" +msgstr "raggiunta la CIMA nella ricerca, continuo dal FONDO" + +msgid "search hit BOTTOM, continuing at TOP" +msgstr "raggiunto il FONDO nella ricerca, continuo dalla CIMA" diff --git a/src/proto/eval.pro b/src/proto/eval.pro index 7c0ca8402..0b76ff6af 100644 --- a/src/proto/eval.pro +++ b/src/proto/eval.pro @@ -61,6 +61,7 @@ void set_reg_var __ARGS((int c)); char_u *v_exception __ARGS((char_u *oldval)); char_u *v_throwpoint __ARGS((char_u *oldval)); char_u *set_cmdarg __ARGS((exarg_T *eap, char_u *oldarg)); +void free_tv __ARGS((typval_T *varp)); void clear_tv __ARGS((typval_T *varp)); long get_tv_number_chk __ARGS((typval_T *varp, int *denote)); char_u *get_tv_string_chk __ARGS((typval_T *varp)); diff --git a/src/proto/hashtable.pro b/src/proto/hashtable.pro index aebb458d6..90723765c 100644 --- a/src/proto/hashtable.pro +++ b/src/proto/hashtable.pro @@ -1,6 +1,7 @@ /* hashtable.c */ void hash_init __ARGS((hashtab_T *ht)); void hash_clear __ARGS((hashtab_T *ht)); +void hash_clear_all __ARGS((hashtab_T *ht, int off)); hashitem_T *hash_find __ARGS((hashtab_T *ht, char_u *key)); hashitem_T *hash_lookup __ARGS((hashtab_T *ht, char_u *key, hash_T hash)); void hash_debug_results __ARGS((void)); diff --git a/src/proto/memline.pro b/src/proto/memline.pro index 90d328559..de75a7dcf 100644 --- a/src/proto/memline.pro +++ b/src/proto/memline.pro @@ -1,32 +1,33 @@ /* memline.c */ -extern int ml_open __ARGS((void)); -extern void ml_setname __ARGS((buf_T *buf)); -extern void ml_open_files __ARGS((void)); -extern void ml_open_file __ARGS((buf_T *buf)); -extern void check_need_swap __ARGS((int newfile)); -extern void ml_close __ARGS((buf_T *buf, int del_file)); -extern void ml_close_all __ARGS((int del_file)); -extern void ml_close_notmod __ARGS((void)); -extern void ml_timestamp __ARGS((buf_T *buf)); -extern void ml_recover __ARGS((void)); -extern int recover_names __ARGS((char_u **fname, int list, int nr)); -extern void ml_sync_all __ARGS((int check_file, int check_char)); -extern void ml_preserve __ARGS((buf_T *buf, int message)); -extern char_u *ml_get __ARGS((linenr_T lnum)); -extern char_u *ml_get_pos __ARGS((pos_T *pos)); -extern char_u *ml_get_curline __ARGS((void)); -extern char_u *ml_get_cursor __ARGS((void)); -extern char_u *ml_get_buf __ARGS((buf_T *buf, linenr_T lnum, int will_change)); -extern int ml_line_alloced __ARGS((void)); -extern int ml_append __ARGS((linenr_T lnum, char_u *line, colnr_T len, int newfile)); -extern int ml_replace __ARGS((linenr_T lnum, char_u *line, int copy)); -extern int ml_delete __ARGS((linenr_T lnum, int message)); -extern void ml_setmarked __ARGS((linenr_T lnum)); -extern linenr_T ml_firstmarked __ARGS((void)); -extern void ml_clearmarked __ARGS((void)); -extern char_u *makeswapname __ARGS((char_u *fname, char_u *ffname, buf_T *buf, char_u *dir_name)); -extern char_u *get_file_in_dir __ARGS((char_u *fname, char_u *dname)); -extern void ml_setflags __ARGS((buf_T *buf)); -extern long ml_find_line_or_offset __ARGS((buf_T *buf, linenr_T lnum, long *offp)); -extern void goto_byte __ARGS((long cnt)); +int ml_open __ARGS((buf_T *buf)); +void ml_setname __ARGS((buf_T *buf)); +void ml_open_files __ARGS((void)); +void ml_open_file __ARGS((buf_T *buf)); +void check_need_swap __ARGS((int newfile)); +void ml_close __ARGS((buf_T *buf, int del_file)); +void ml_close_all __ARGS((int del_file)); +void ml_close_notmod __ARGS((void)); +void ml_timestamp __ARGS((buf_T *buf)); +void ml_recover __ARGS((void)); +int recover_names __ARGS((char_u **fname, int list, int nr)); +void ml_sync_all __ARGS((int check_file, int check_char)); +void ml_preserve __ARGS((buf_T *buf, int message)); +char_u *ml_get __ARGS((linenr_T lnum)); +char_u *ml_get_pos __ARGS((pos_T *pos)); +char_u *ml_get_curline __ARGS((void)); +char_u *ml_get_cursor __ARGS((void)); +char_u *ml_get_buf __ARGS((buf_T *buf, linenr_T lnum, int will_change)); +int ml_line_alloced __ARGS((void)); +int ml_append __ARGS((linenr_T lnum, char_u *line, colnr_T len, int newfile)); +int ml_append_buf __ARGS((buf_T *buf, linenr_T lnum, char_u *line, colnr_T len, int newfile)); +int ml_replace __ARGS((linenr_T lnum, char_u *line, int copy)); +int ml_delete __ARGS((linenr_T lnum, int message)); +void ml_setmarked __ARGS((linenr_T lnum)); +linenr_T ml_firstmarked __ARGS((void)); +void ml_clearmarked __ARGS((void)); +char_u *makeswapname __ARGS((char_u *fname, char_u *ffname, buf_T *buf, char_u *dir_name)); +char_u *get_file_in_dir __ARGS((char_u *fname, char_u *dname)); +void ml_setflags __ARGS((buf_T *buf)); +long ml_find_line_or_offset __ARGS((buf_T *buf, linenr_T lnum, long *offp)); +void goto_byte __ARGS((long cnt)); /* vim: set ft=c : */ diff --git a/src/proto/spell.pro b/src/proto/spell.pro index 87261a244..791ec240b 100644 --- a/src/proto/spell.pro +++ b/src/proto/spell.pro @@ -1,5 +1,5 @@ /* spell.c */ -int spell_check __ARGS((win_T *wp, char_u *ptr, hlf_T *attrp, int *capcol)); +int spell_check __ARGS((win_T *wp, char_u *ptr, hlf_T *attrp, int *capcol, int docount)); int spell_move_to __ARGS((win_T *wp, int dir, int allwords, int curline, hlf_T *attrp)); void spell_cat_line __ARGS((char_u *buf, char_u *line, int maxlen)); char_u *did_set_spelllang __ARGS((buf_T *buf)); @@ -14,7 +14,7 @@ void init_spell_chartab __ARGS((void)); int spell_check_sps __ARGS((void)); void spell_suggest __ARGS((int count)); void ex_spellrepall __ARGS((exarg_T *eap)); -void spell_suggest_list __ARGS((garray_T *gap, char_u *word, int maxcount, int need_cap)); +void spell_suggest_list __ARGS((garray_T *gap, char_u *word, int maxcount, int need_cap, int interactive)); char_u *eval_soundfold __ARGS((char_u *word)); void ex_spelldump __ARGS((exarg_T *eap)); char_u *spell_to_word_end __ARGS((char_u *start, buf_T *buf)); diff --git a/src/quickfix.c b/src/quickfix.c index d0a2e153e..f550f116a 100644 --- a/src/quickfix.c +++ b/src/quickfix.c @@ -86,6 +86,7 @@ struct eformat /* 'O' overread (partial) message */ char_u flags; /* additional flags given in prefix */ /* '-' do not include this line */ + /* '+' include whole line in message */ }; static int qf_init_ext __ARGS((char_u *efile, buf_T *buf, typval_T *tv, char_u *errorformat, int newlist, linenr_T lnumfirst, linenr_T lnumlast)); @@ -578,7 +579,7 @@ restofline: col = (int)atol((char *)regmatch.startp[i]); if ((i = (int)fmt_ptr->addr[4]) > 0) /* %t */ type = *regmatch.startp[i]; - if (fmt_ptr->flags == '+' && !multiscan) /* %+ */ + if (fmt_ptr->flags == '+' && !multiscan) /* %+ */ STRCPY(errmsg, IObuff); else if ((i = (int)fmt_ptr->addr[5]) > 0) /* %m */ { @@ -613,9 +614,9 @@ restofline: } } multiscan = FALSE; - if (!fmt_ptr || idx == 'D' || idx == 'X') + if (fmt_ptr == NULL || idx == 'D' || idx == 'X') { - if (fmt_ptr) + if (fmt_ptr != NULL) { if (idx == 'D') /* enter directory */ { @@ -634,10 +635,10 @@ restofline: lnum = 0; /* don't jump to this line */ valid = FALSE; STRCPY(errmsg, IObuff); /* copy whole line to error message */ - if (!fmt_ptr) + if (fmt_ptr == NULL) multiline = multiignore = FALSE; } - else if (fmt_ptr) + else if (fmt_ptr != NULL) { if (vim_strchr((char_u *)"AEWI", idx) != NULL) multiline = TRUE; /* start of a multi-line message */ @@ -2747,7 +2748,7 @@ load_dummy_buffer(fname) /* Need to set the filename for autocommands. */ (void)setfname(curbuf, fname, NULL, FALSE); - if (ml_open() == OK) + if (ml_open(curbuf) == OK) { /* Create swap file now to avoid the ATTENTION message. */ check_need_swap(TRUE); @@ -2978,7 +2979,7 @@ ex_cbuffer(eap) #if defined(FEAT_EVAL) || defined(PROTO) /* - * ":cexpr {expr}" command. + * ":cexpr {expr}" and ":caddexpr {expr}" command. */ void ex_cexpr(eap) @@ -2986,16 +2987,23 @@ ex_cexpr(eap) { typval_T *tv; + /* Evaluate the expression. When the result is a string or a list we can + * use it to fill the errorlist. */ tv = eval_expr(eap->arg, NULL); - if (!tv || (tv->v_type != VAR_STRING && tv->v_type != VAR_LIST) || - (tv->v_type == VAR_STRING && !tv->vval.v_string) || - (tv->v_type == VAR_LIST && !tv->vval.v_list)) - return; - - if (qf_init_ext(NULL, NULL, tv, p_efm, TRUE, (linenr_T)0, (linenr_T)0) > 0) - qf_jump(0, 0, eap->forceit); /* display first error */ - - clear_tv(tv); + if (tv != NULL) + { + if ((tv->v_type == VAR_STRING && tv->vval.v_string != NULL) + || (tv->v_type == VAR_LIST && tv->vval.v_list != NULL)) + { + if (qf_init_ext(NULL, NULL, tv, p_efm, eap->cmdidx == CMD_cexpr, + (linenr_T)0, (linenr_T)0) > 0 + && eap->cmdidx == CMD_cexpr) + qf_jump(0, 0, eap->forceit); /* display first error */ + } + else + EMSG(_("E999: String or List expected")); + free_tv(tv); + } } #endif diff --git a/src/screen.c b/src/screen.c index 09821bd1a..fb5593447 100644 --- a/src/screen.c +++ b/src/screen.c @@ -134,7 +134,7 @@ static void fold_line __ARGS((win_T *wp, long fold_count, foldinfo_T *foldinfo, static void fill_foldcolumn __ARGS((char_u *p, win_T *wp, int closed, linenr_T lnum)); static void copy_text_attr __ARGS((int off, char_u *buf, int len, int attr)); #endif -static int win_line __ARGS((win_T *, linenr_T, int, int)); +static int win_line __ARGS((win_T *, linenr_T, int, int, int nochange)); static int char_needs_redraw __ARGS((int off_from, int off_to, int cols)); #ifdef FEAT_RIGHTLEFT static void screen_line __ARGS((int row, int coloff, int endcol, int clear_width, int rlflag)); @@ -1681,7 +1681,7 @@ win_update(wp) /* * Display one line. */ - row = win_line(wp, lnum, srow, wp->w_height); + row = win_line(wp, lnum, srow, wp->w_height, mod_top == 0); #ifdef FEAT_FOLDING wp->w_lines[idx].wl_folded = FALSE; @@ -2446,12 +2446,14 @@ fill_foldcolumn(p, wp, closed, lnum) * * Return the number of last row the line occupies. */ +/* ARGSUSED */ static int -win_line(wp, lnum, startrow, endrow) +win_line(wp, lnum, startrow, endrow, nochange) win_T *wp; linenr_T lnum; int startrow; int endrow; + int nochange; /* not updating for changed text */ { int col; /* visual column on screen */ unsigned off; /* offset in ScreenLines/ScreenAttrs */ @@ -3744,7 +3746,8 @@ win_line(wp, lnum, startrow, endrow) else p = prev_ptr; cap_col -= (prev_ptr - line); - len = spell_check(wp, p, &spell_hlf, &cap_col); + len = spell_check(wp, p, &spell_hlf, &cap_col, + nochange); word_end = v + len; /* In Insert mode only highlight a word that diff --git a/src/spell.c b/src/spell.c index ffe5a207c..a1ab89c82 100644 --- a/src/spell.c +++ b/src/spell.c @@ -43,6 +43,9 @@ * * Thanks to Olaf Seibert for providing an example implementation of this tree * and the compression mechanism. + * LZ trie ideas: + * http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf + * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html * * Matching involves checking the caps type: Onecap ALLCAP KeepCap. * @@ -56,17 +59,28 @@ # define SPELL_PRINTTREE #endif +/* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk(). */ +#if 0 +# define DEBUG_TRIEWALK +#endif + /* * Use this to adjust the score after finding suggestions, based on the * suggested word sounding like the bad word. This is much faster than doing * it for every possible suggestion. - * Disadvantage: When "the" is typed as "hte" it sounds different and goes - * down in the list. + * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@" + * vs "ht") and goes down in the list. * Used when 'spellsuggest' is set to "best". */ #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4) /* + * Do the opposite: based on a maximum end score and a known sound score, + * compute the the maximum word score that can be used. + */ +#define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3) + +/* * Vim spell file format: <HEADER> * <SECTIONS> * <LWORDTREE> @@ -133,6 +147,9 @@ * <reptolen> 1 byte length of <repto> * <repto> N bytes "to" part of replacement * + * sectionID == SN_REPSAL: <repcount> <rep> ... + * just like SN_REP but for soundfolded words + * * sectionID == SN_SAL: <salflags> <salcount> <sal> ... * <salflags> 1 byte flags for soundsalike conversion: * SAL_F0LLOWUP @@ -151,6 +168,12 @@ * <sofotolen> 2 bytes length of <sofoto> * <sofoto> N bytes "to" part of soundfold * + * sectionID == SN_SUGFILE: <timestamp> + * <timestamp> 8 bytes time in seconds that must match with .sug file + * + * sectionID == SN_WORDS: <word> ... + * <word> N bytes NUL terminated common word + * * sectionID == SN_MAP: <mapstr> * <mapstr> N bytes String with sequences of similar characters, * separated by slashes. @@ -236,6 +259,32 @@ * All text characters are in 'encoding', but stored as single bytes. */ +/* + * Vim .sug file format: <SUGHEADER> + * <SUGWORDTREE> + * <SUGTABLE> + * + * <SUGHEADER>: <fileID> <versionnr> <timestamp> + * + * <fileID> 6 bytes "VIMsug" + * <versionnr> 1 byte VIMSUGVERSION + * <timestamp> 8 bytes timestamp that must match with .spl file + * + * + * <SUGWORDTREE>: <wordtree> (see above, no flags or region used) + * + * + * <SUGTABLE>: <sugwcount> <sugline> ... + * + * <sugwcount> 4 bytes number of <sugline> following + * + * <sugline>: <sugnr> ... NUL + * + * <sugnr>: X bytes word number that results in this soundfolded word, + * stored as an offset to the previous number in as + * few bytes as possible, see offset2bytes()) + */ + #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64) # include <io.h> /* for lseek(), must be before vim.h */ #endif @@ -248,6 +297,10 @@ # include <fcntl.h> #endif +#ifndef UNIX /* it's in os_unix.h for Unix */ +# include <time.h> /* for time_t */ +#endif + #define MAXWLEN 250 /* Assume max. word len is this many bytes. Some places assume a word length fits in a byte, thus it can't be above 255. */ @@ -302,8 +355,8 @@ typedef long idx_T; * follow; never used in prefix tree */ #define BY_SPECIAL BY_FLAGS2 /* highest special byte value */ -/* Info from "REP" and "SAL" entries in ".aff" file used in si_rep, sl_rep, - * and si_sal. Not for sl_sal! +/* Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep, + * si_repsal, sl_rep, and si_sal. Not for sl_sal! * One replacement: from "ft_from" to "ft_to". */ typedef struct fromto_S { @@ -374,6 +427,8 @@ struct slang_S char_u *sl_midword; /* MIDWORD string or NULL */ + hashtab_T sl_wordcount; /* hashtable with word count, wordcount_T */ + int sl_compmax; /* COMPOUNDMAX (default: MAXWLEN) */ int sl_compminlen; /* COMPOUNDMIN (default: 0) */ int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */ @@ -394,12 +449,23 @@ struct slang_S garray_T sl_sal; /* list of salitem_T entries from SAL lines */ salfirst_T sl_sal_first[256]; /* indexes where byte first appears, -1 if there is none */ - int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items: - * "sl_sal_first" maps chars, when has_mbyte - * "sl_sal" is a list of wide char lists. */ int sl_followup; /* SAL followup */ int sl_collapse; /* SAL collapse_result */ int sl_rem_accents; /* SAL remove_accents */ + int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items: + * "sl_sal_first" maps chars, when has_mbyte + * "sl_sal" is a list of wide char lists. */ + garray_T sl_repsal; /* list of fromto_T entries from REPSAL lines */ + short sl_repsal_first[256]; /* sl_rep_first for REPSAL lines */ + + /* Info from the .sug file. Loaded on demand. */ + time_t sl_sugtime; /* timestamp for .sug file */ + char_u *sl_sbyts; /* soundfolded word bytes */ + idx_T *sl_sidxs; /* soundfolded word indexes */ + buf_T *sl_sugbuf; /* buffer with word number table */ + int sl_sugloaded; /* TRUE when .sug file was loaded or failed to + load */ + int sl_has_map; /* TRUE if there is a MAP line */ #ifdef FEAT_MBYTE hashtab_T sl_map_hash; /* MAP for multi-byte chars */ @@ -407,6 +473,8 @@ struct slang_S #else char_u sl_map_array[256]; /* MAP for first 256 chars */ #endif + hashtab_T sl_sounddone; /* table with soundfolded words that have + handled, see add_sound_suggest() */ }; /* First language that is loaded, start of the linked list of loaded @@ -437,6 +505,10 @@ typedef struct langp_S #define VIMSPELLMAGICL 8 #define VIMSPELLVERSION 50 +#define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */ +#define VIMSUGMAGICL 6 +#define VIMSUGVERSION 1 + /* Section IDs. Only renumber them when VIMSPELLVERSION changes! */ #define SN_REGION 0 /* <regionname> section */ #define SN_CHARFLAGS 1 /* charflags section */ @@ -449,6 +521,9 @@ typedef struct langp_S #define SN_COMPOUND 8 /* compound words section */ #define SN_SYLLABLE 9 /* syllable section */ #define SN_NOBREAK 10 /* NOBREAK section */ +#define SN_SUGFILE 11 /* timestamp for .sug file */ +#define SN_REPSAL 12 /* REPSAL items section */ +#define SN_WORDS 13 /* common words */ #define SN_END 255 /* end of sections */ #define SNF_REQUIRED 1 /* <sectionflags>: required section */ @@ -463,6 +538,17 @@ typedef struct langp_S /* file used for "zG" and "zW" */ static char_u *int_wordlist = NULL; +typedef struct wordcount_S +{ + short_u wc_count; /* nr of times word was seen */ + char_u wc_word[1]; /* word, actually longer */ +} wordcount_T; + +static wordcount_T dumwc; +#define WC_KEY_OFF (dumwc.wc_word - (char_u *)&dumwc) +#define HI2WC(hi) ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF)) +#define MAXWORDCOUNT 0xffff + /* * Information used when looking for suggestions. */ @@ -471,6 +557,7 @@ typedef struct suginfo_S garray_T su_ga; /* suggestions, contains "suggest_T" */ int su_maxcount; /* max. number of suggestions displayed */ int su_maxscore; /* maximum score for adding to su_ga */ + int su_sfmaxscore; /* idem, for when doing soundfold words */ garray_T su_sga; /* like su_ga, sound-folded scoring */ char_u *su_badptr; /* start of bad word in line */ int su_badlen; /* length of detected bad word in line */ @@ -478,7 +565,6 @@ typedef struct suginfo_S char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */ - slang_T *su_slang_first; /* slang_T used for su_sal_badword */ hashtab_T su_banned; /* table with banned words */ slang_T *su_sallang; /* default language for sound folding */ } suginfo_T; @@ -487,6 +573,7 @@ typedef struct suginfo_S typedef struct suggest_S { char_u *st_word; /* suggested word, allocated string */ + int st_wordlen; /* STRLEN(st_word) */ int st_orglen; /* length of replaced text */ int st_score; /* lower is better */ int st_altscore; /* used when st_score compares equal */ @@ -497,21 +584,24 @@ typedef struct suggest_S #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i]) -/* Number of suggestions kept when cleaning up. When rescore_suggestions() is - * called the score may change, thus we need to keep more than what is - * displayed. */ -#define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 50 ? 50 : (su)->su_maxcount) +/* TRUE if a word appears in the list of banned words. */ +#define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word))) + +/* Number of suggestions kept when cleaning up. we need to keep more than + * what is displayed, because when rescore_suggestions() is called the score + * may change and wrong suggestions may be removed later. */ +#define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20) /* Threshold for sorting and cleaning up suggestions. Don't want to keep lots * of suggestions that are not going to be displayed. */ -#define SUG_MAX_COUNT(su) ((su)->su_maxcount + 50) +#define SUG_MAX_COUNT(su) (SUG_CLEAN_COUNT(su) + 50) /* score for various changes */ #define SCORE_SPLIT 149 /* split bad word */ #define SCORE_ICASE 52 /* slightly different case */ #define SCORE_REGION 200 /* word is for different region */ #define SCORE_RARE 180 /* rare word */ -#define SCORE_SWAP 90 /* swap two characters */ +#define SCORE_SWAP 75 /* swap two characters */ #define SCORE_SWAP3 110 /* swap two characters in three */ #define SCORE_REP 65 /* REP replacement */ #define SCORE_SUBST 93 /* substitute a character */ @@ -529,8 +619,27 @@ typedef struct suggest_S #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. * 350 allows for about three changes. */ +#define SCORE_COMMON1 30 /* subtracted for words seen before */ +#define SCORE_COMMON2 40 /* subtracted for words often seen */ +#define SCORE_COMMON3 50 /* subtracted for words very often seen */ +#define SCORE_THRES2 10 /* word count threshold for COMMON2 */ +#define SCORE_THRES3 100 /* word count threshold for COMMON3 */ + +/* When trying changed soundfold words it becomes slow when trying more than + * two changes. With less then two changes it's slightly faster but we miss a + * few good suggestions. In rare cases we need to try three of four changes. + */ +#define SCORE_SFMAX1 200 /* maximum score for first try */ +#define SCORE_SFMAX2 300 /* maximum score for second try */ +#define SCORE_SFMAX3 400 /* maximum score for third try */ + #define SCORE_BIG SCORE_INS * 3 /* big difference */ -#define SCORE_MAXMAX 999999 /* accept any score */ +#define SCORE_MAXMAX 999999 /* accept any score */ +#define SCORE_LIMITMAX 350 /* for spell_edit_score_limit() */ + +/* for spell_edit_score_limit() we need to know the minimum value of + * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */ +#define SCORE_EDIT_MIN SCORE_SIMILAR /* * Structure to store info for word matching. @@ -617,6 +726,7 @@ typedef enum STATE_ENDNUL, /* Past NUL bytes at start of the node. */ STATE_PLAIN, /* Use each byte of the node. */ STATE_DEL, /* Delete a byte from the bad word. */ + STATE_INS_PREP, /* Prepare for inserting bytes. */ STATE_INS, /* Insert a byte in the bad word. */ STATE_SWAP, /* Swap two bytes. */ STATE_UNSWAP, /* Undo swap two characters. */ @@ -657,6 +767,8 @@ typedef struct trystate_S char_u ts_complen; /* nr of compound words used */ char_u ts_compsplit; /* index for "compflags" where word was spit */ char_u ts_save_badflags; /* su_badflags saved here */ + char_u ts_delidx; /* index in fword for char that was deleted, + valid when "ts_flags" has TSF_DIDDEL */ } trystate_T; /* values for ts_isdiff */ @@ -667,11 +779,12 @@ typedef struct trystate_S /* values for ts_flags */ #define TSF_PREFIXOK 1 /* already checked that prefix is OK */ #define TSF_DIDSPLIT 2 /* tried split at this point */ +#define TSF_DIDDEL 4 /* did a delete, "ts_delidx" has index */ /* special values ts_prefixdepth */ #define PFD_NOPREFIX 0xff /* not using prefixes */ #define PFD_PREFIXTREE 0xfe /* walking through the prefix tree */ -#define PFD_NOTSPECIAL 0xfd /* first value that's not special */ +#define PFD_NOTSPECIAL 0xfd /* highest value that's not special */ /* mode values for find_word */ #define FIND_FOLDWORD 0 /* find word case-folded */ @@ -683,6 +796,7 @@ typedef struct trystate_S static slang_T *slang_alloc __ARGS((char_u *lang)); static void slang_free __ARGS((slang_T *lp)); static void slang_clear __ARGS((slang_T *lp)); +static void slang_clear_sug __ARGS((slang_T *lp)); static void find_word __ARGS((matchinf_T *mip, int mode)); static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags)); static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req)); @@ -700,8 +814,11 @@ static char_u *read_string __ARGS((FILE *fd, int cnt)); static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len)); static int read_charflags_section __ARGS((FILE *fd)); static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp)); -static int read_rep_section __ARGS((FILE *fd, slang_T *slang)); +static int read_rep_section __ARGS((FILE *fd, garray_T *gap, short *first)); static int read_sal_section __ARGS((FILE *fd, slang_T *slang)); +static int read_words_section __ARGS((FILE *fd, slang_T *lp, int len)); +static void count_common_word __ARGS((slang_T *lp, char_u *word, int len, int count)); +static int score_wordcount_adj __ARGS((slang_T *slang, int score, char_u *word, int split)); static int read_sofo_section __ARGS((FILE *fd, slang_T *slang)); static int read_compound __ARGS((FILE *fd, slang_T *slang, int len)); static int byte_in_str __ARGS((char_u *str, int byte)); @@ -712,7 +829,8 @@ static void set_sal_first __ARGS((slang_T *lp)); #ifdef FEAT_MBYTE static int *mb_str2wide __ARGS((char_u *s)); #endif -static idx_T read_tree __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr)); +static int spell_read_tree __ARGS((FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt)); +static idx_T read_tree_node __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr)); static void clear_midword __ARGS((buf_T *buf)); static void use_midword __ARGS((slang_T *lp, buf_T *buf)); static int find_region __ARGS((char_u *rp, char_u *region)); @@ -723,18 +841,21 @@ static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp)); static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp)); static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen)); static int check_need_cap __ARGS((linenr_T lnum, colnr_T col)); -static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount, int banbadword, int need_cap)); +static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive)); #ifdef FEAT_EVAL static void spell_suggest_expr __ARGS((suginfo_T *su, char_u *expr)); #endif static void spell_suggest_file __ARGS((suginfo_T *su, char_u *fname)); -static void spell_suggest_intern __ARGS((suginfo_T *su)); +static void spell_suggest_intern __ARGS((suginfo_T *su, int interactive)); +static void suggest_load_files __ARGS((void)); +static void tree_count_words __ARGS((char_u *byts, idx_T *idxs)); static void spell_find_cleanup __ARGS((suginfo_T *su)); static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper)); static void allcap_copy __ARGS((char_u *word, char_u *wcopy)); static void suggest_try_special __ARGS((suginfo_T *su)); static void suggest_try_change __ARGS((suginfo_T *su)); -static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add)); +static void suggest_trie_walk __ARGS((suginfo_T *su, langp_T *lp, char_u *fword, int soundfold)); +static void go_deeper __ARGS((trystate_T *stack, int depth, int score_add)); #ifdef FEAT_MBYTE static int nofold_len __ARGS((char_u *fword, int flen, char_u *word)); #endif @@ -742,14 +863,17 @@ static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kwo static void score_comp_sal __ARGS((suginfo_T *su)); static void score_combine __ARGS((suginfo_T *su)); static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound)); +static void suggest_try_soundalike_prep __ARGS((void)); static void suggest_try_soundalike __ARGS((suginfo_T *su)); +static void suggest_try_soundalike_finish __ARGS((void)); +static void add_sound_suggest __ARGS((suginfo_T *su, char_u *goodword, int score, langp_T *lp)); +static int soundfold_find __ARGS((slang_T *slang, char_u *word)); static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags)); static void set_map_str __ARGS((slang_T *lp, char_u *map)); static int similar_chars __ARGS((slang_T *slang, int c1, int c2)); -static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang)); +static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf)); +static void check_suggestions __ARGS((suginfo_T *su, garray_T *gap)); static void add_banned __ARGS((suginfo_T *su, char_u *word)); -static int was_banned __ARGS((suginfo_T *su, char_u *word)); -static void free_banned __ARGS((suginfo_T *su)); static void rescore_suggestions __ARGS((suginfo_T *su)); static void rescore_one __ARGS((suginfo_T *su, suggest_T *stp)); static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep)); @@ -760,9 +884,15 @@ static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u * static void spell_soundfold_wsal __ARGS((slang_T *slang, char_u *inword, char_u *res)); #endif static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound)); -static int spell_edit_score __ARGS((char_u *badword, char_u *goodword)); -static void dump_word __ARGS((char_u *word, int round, int flags, linenr_T lnum)); +static int spell_edit_score __ARGS((slang_T *slang, char_u *badword, char_u *goodword)); +static int spell_edit_score_limit __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit)); +#ifdef FEAT_MBYTE +static int spell_edit_score_limit_w __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit)); +#endif +static void dump_word __ARGS((slang_T *slang, char_u *word, int round, int flags, linenr_T lnum)); static linenr_T dump_prefixes __ARGS((slang_T *slang, char_u *word, int round, int flags, linenr_T startlnum)); +static buf_T *open_spellbuf __ARGS((void)); +static void close_spellbuf __ARGS((buf_T *buf)); /* * Use our own character-case definitions, because the current locale may @@ -831,11 +961,12 @@ static char *msg_compressing = N_("Compressing word tree..."); * caller can skip over the word. */ int -spell_check(wp, ptr, attrp, capcol) +spell_check(wp, ptr, attrp, capcol, docount) win_T *wp; /* current window */ char_u *ptr; hlf_T *attrp; int *capcol; /* column to check for Capital */ + int docount; /* count good words */ { matchinf_T mi; /* Most things are put in "mi" so that it can be passed to functions quickly. */ @@ -843,6 +974,7 @@ spell_check(wp, ptr, attrp, capcol) int c; int wrongcaplen = 0; int lpi; + int count_word = docount; /* A word never starts at a space or a control character. Return quickly * then, skipping over the character. */ @@ -905,8 +1037,8 @@ spell_check(wp, ptr, attrp, capcol) /* * Loop over the languages specified in 'spelllang'. - * We check them all, because a matching word may be longer than an - * already found matching word. + * We check them all, because a word may be matched longer in another + * language. */ for (lpi = 0; lpi < wp->w_buffer->b_langp.ga_len; ++lpi) { @@ -934,6 +1066,14 @@ spell_check(wp, ptr, attrp, capcol) mi.mi_result = mi.mi_result2; mi.mi_end = mi.mi_end2; } + + /* Count the word in the first language where it's found to be OK. */ + if (count_word && mi.mi_result == SP_OK) + { + count_common_word(mi.mi_lp->lp_slang, ptr, + (int)(mi.mi_end - ptr), 1); + count_word = FALSE; + } } if (mi.mi_result != SP_OK) @@ -1897,7 +2037,7 @@ spell_move_to(wp, dir, allwords, curline, attrp) /* start of word */ attr = HLF_COUNT; - len = spell_check(wp, p, &attr, &capcol); + len = spell_check(wp, p, &attr, &capcol, FALSE); if (attr != HLF_COUNT) { @@ -2140,7 +2280,7 @@ int_wordlist_spl(fname) } /* - * Allocate a new slang_T. + * Allocate a new slang_T for language "lang". "lang" can be NULL. * Caller must fill "sl_next". */ static slang_T * @@ -2152,11 +2292,15 @@ slang_alloc(lang) lp = (slang_T *)alloc_clear(sizeof(slang_T)); if (lp != NULL) { - lp->sl_name = vim_strsave(lang); + if (lang != NULL) + lp->sl_name = vim_strsave(lang); ga_init2(&lp->sl_rep, sizeof(fromto_T), 10); + ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10); lp->sl_compmax = MAXWLEN; lp->sl_compsylmax = MAXWLEN; + hash_init(&lp->sl_wordcount); } + return lp; } @@ -2184,6 +2328,7 @@ slang_clear(lp) fromto_T *ftp; salitem_T *smp; int i; + int round; vim_free(lp->sl_fbyts); lp->sl_fbyts = NULL; @@ -2199,14 +2344,17 @@ slang_clear(lp) vim_free(lp->sl_pidxs); lp->sl_pidxs = NULL; - gap = &lp->sl_rep; - while (gap->ga_len > 0) + for (round = 1; round <= 2; ++round) { - ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len]; - vim_free(ftp->ft_from); - vim_free(ftp->ft_to); + gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal; + while (gap->ga_len > 0) + { + ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len]; + vim_free(ftp->ft_from); + vim_free(ftp->ft_to); + } + ga_clear(gap); } - ga_clear(gap); gap = &lp->sl_sal; if (lp->sl_sofo) @@ -2253,21 +2401,16 @@ slang_clear(lp) lp->sl_syllable = NULL; ga_clear(&lp->sl_syl_items); -#ifdef FEAT_MBYTE - { - int todo = lp->sl_map_hash.ht_used; - hashitem_T *hi; + hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF); + hash_init(&lp->sl_wordcount); - for (hi = lp->sl_map_hash.ht_array; todo > 0; ++hi) - if (!HASHITEM_EMPTY(hi)) - { - --todo; - vim_free(hi->hi_key); - } - } - hash_clear(&lp->sl_map_hash); +#ifdef FEAT_MBYTE + hash_clear_all(&lp->sl_map_hash, 0); #endif + /* Clear info from .sug file. */ + slang_clear_sug(lp); + lp->sl_compmax = MAXWLEN; lp->sl_compminlen = 0; lp->sl_compsylmax = MAXWLEN; @@ -2275,6 +2418,23 @@ slang_clear(lp) } /* + * Clear the info from the .sug file in "lp". + */ + static void +slang_clear_sug(lp) + slang_T *lp; +{ + vim_free(lp->sl_sbyts); + lp->sl_sbyts = NULL; + vim_free(lp->sl_sidxs); + lp->sl_sidxs = NULL; + close_spellbuf(lp->sl_sugbuf); + lp->sl_sugbuf = NULL; + lp->sl_sugloaded = FALSE; + lp->sl_sugtime = 0; +} + +/* * Load one spell file and store the info into a slang_T. * Invoked through do_in_runtimepath(). */ @@ -2303,11 +2463,13 @@ spell_load_cb(fname, cookie) /* * Load one spell file and store the info into a slang_T. * - * This is invoked in two ways: + * This is invoked in three ways: * - From spell_load_cb() to load a spell file for the first time. "lang" is * the language name, "old_lp" is NULL. Will allocate an slang_T. * - To reload a spell file that was changed. "lang" is NULL and "old_lp" * points to the existing slang_T. + * - Just after writing a .spl file; it's read back to produce the .sug file. + * "old_lp" is NULL and "lang" is a dummy name. Will allocate an slang_T. * Returns the slang_T the spell file was loaded into. NULL for error. */ static slang_T * @@ -2320,16 +2482,12 @@ spell_load_file(fname, lang, old_lp, silent) FILE *fd; char_u buf[VIMSPELLMAGICL]; char_u *p; - char_u *bp; - idx_T *ip; int i; int n; int len; - int round; char_u *save_sourcing_name = sourcing_name; linenr_T save_sourcing_lnum = sourcing_lnum; slang_T *lp = NULL; - idx_T idx; int c = 0; int res; @@ -2374,7 +2532,8 @@ spell_load_file(fname, lang, old_lp, silent) sourcing_name = fname; sourcing_lnum = 0; - /* <HEADER>: <fileID> + /* + * <HEADER>: <fileID> */ for (i = 0; i < VIMSPELLMAGICL; ++i) buf[i] = getc(fd); /* <fileID> */ @@ -2433,7 +2592,11 @@ spell_load_file(fname, lang, old_lp, silent) break; case SN_REP: - res = read_rep_section(fd, lp); + res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); + break; + + case SN_REPSAL: + res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); break; case SN_SAL: @@ -2452,6 +2615,15 @@ spell_load_file(fname, lang, old_lp, silent) vim_free(p); break; + case SN_WORDS: + res = read_words_section(fd, lp, len); + break; + + case SN_SUGFILE: + for (i = 7; i >= 0; --i) /* <timestamp> */ + lp->sl_sugtime += getc(fd) << (i * 8); + break; + case SN_COMPOUND: res = read_compound(fd, lp, len); break; @@ -2481,9 +2653,9 @@ spell_load_file(fname, lang, old_lp, silent) goto truncerr; break; } +someerror: if (res == SP_FORMERROR) { -formerr: EMSG(_(e_format)); goto endFAIL; } @@ -2497,48 +2669,21 @@ truncerr: goto endFAIL; } - /* round 1: <LWORDTREE> - * round 2: <KWORDTREE> - * round 3: <PREFIXTREE> */ - for (round = 1; round <= 3; ++round) - { - /* The tree size was computed when writing the file, so that we can - * allocate it as one long block. <nodecount> */ - len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); - if (len < 0) - goto truncerr; - if (len > 0) - { - /* Allocate the byte array. */ - bp = lalloc((long_u)len, TRUE); - if (bp == NULL) - goto endFAIL; - if (round == 1) - lp->sl_fbyts = bp; - else if (round == 2) - lp->sl_kbyts = bp; - else - lp->sl_pbyts = bp; + /* <LWORDTREE> */ + res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0); + if (res != 0) + goto someerror; - /* Allocate the index array. */ - ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE); - if (ip == NULL) - goto endFAIL; - if (round == 1) - lp->sl_fidxs = ip; - else if (round == 2) - lp->sl_kidxs = ip; - else - lp->sl_pidxs = ip; + /* <KWORDTREE> */ + res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0); + if (res != 0) + goto someerror; - /* Read the tree and store it in the array. */ - idx = read_tree(fd, bp, ip, len, 0, round == 3, lp->sl_prefixcnt); - if (idx == -1) - goto truncerr; - if (idx < 0) - goto formerr; - } - } + /* <PREFIXTREE> */ + res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE, + lp->sl_prefixcnt); + if (res != 0) + goto someerror; /* For a new file link it in the list of spell files. */ if (old_lp == NULL) @@ -2733,25 +2878,23 @@ read_prefcond_section(fd, lp) } /* - * Read REP items section from "fd": <repcount> <rep> ... + * Read REP or REPSAL items section from "fd": <repcount> <rep> ... * Return SP_*ERROR flags. */ static int -read_rep_section(fd, slang) +read_rep_section(fd, gap, first) FILE *fd; - slang_T *slang; + garray_T *gap; + short *first; { int cnt; - garray_T *gap; fromto_T *ftp; - short *first; int i; cnt = (getc(fd) << 8) + getc(fd); /* <repcount> */ if (cnt < 0) return SP_TRUNCERROR; - gap = &slang->sl_rep; if (ga_grow(gap, cnt) == FAIL) return SP_OTHERERROR; @@ -2775,7 +2918,6 @@ read_rep_section(fd, slang) } /* Fill the first-index table. */ - first = slang->sl_rep_first; for (i = 0; i < 256; ++i) first[i] = -1; for (i = 0; i < gap->ga_len; ++i) @@ -2941,6 +3083,119 @@ read_sal_section(fd, slang) } /* + * Read SN_WORDS: <word> ... + * Return SP_*ERROR flags. + */ + static int +read_words_section(fd, lp, len) + FILE *fd; + slang_T *lp; + int len; +{ + int done = 0; + int i; + char_u word[MAXWLEN]; + + while (done < len) + { + /* Read one word at a time. */ + for (i = 0; ; ++i) + { + word[i] = getc(fd); + if (word[i] == NUL) + break; + if (i == MAXWLEN - 1) + return SP_FORMERROR; + } + + /* Init the count to 10. */ + count_common_word(lp, word, -1, 10); + done += i + 1; + } + return 0; +} + +/* + * Add a word to the hashtable of common words. + * If it's already there then the counter is increased. + */ + static void +count_common_word(lp, word, len, count) + slang_T *lp; + char_u *word; + int len; /* word length, -1 for upto NUL */ + int count; /* 1 to count once, 10 to init */ +{ + hash_T hash; + hashitem_T *hi; + wordcount_T *wc; + char_u buf[MAXWLEN]; + char_u *p; + + if (len == -1) + p = word; + else + { + vim_strncpy(buf, word, len); + p = buf; + } + + hash = hash_hash(p); + hi = hash_lookup(&lp->sl_wordcount, p, hash); + if (HASHITEM_EMPTY(hi)) + { + wc = (wordcount_T *)alloc(sizeof(wordcount_T) + STRLEN(p)); + if (wc == NULL) + return; + STRCPY(wc->wc_word, p); + wc->wc_count = count; + hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash); + } + else + { + wc = HI2WC(hi); + if ((wc->wc_count += count) < (unsigned)count) /* check for overflow */ + wc->wc_count = MAXWORDCOUNT; + } +} + +/* + * Adjust the score of common words. + */ + static int +score_wordcount_adj(slang, score, word, split) + slang_T *slang; + int score; + char_u *word; + int split; /* word was split, less bonus */ +{ + hashitem_T *hi; + wordcount_T *wc; + int bonus; + int newscore; + + hi = hash_find(&slang->sl_wordcount, word); + if (!HASHITEM_EMPTY(hi)) + { + wc = HI2WC(hi); + if (wc->wc_count < SCORE_THRES2) + bonus = SCORE_COMMON1; + else if (wc->wc_count < SCORE_THRES3) + bonus = SCORE_COMMON2; + else + bonus = SCORE_COMMON3; + if (split) + newscore = score - bonus / 2; + else + newscore = score - bonus; + if (newscore < 0) + return 0; + return newscore; + } + return score; +} + +/* * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> * Return SP_*ERROR flags. */ @@ -3434,17 +3689,63 @@ mb_str2wide(s) #endif /* + * Read a tree from the .spl or .sug file. + * Allocates the memory and stores pointers in "bytsp" and "idxsp". + * This is skipped when the tree has zero length. + * Returns zero when OK, SP_ value for an error. + */ + static int +spell_read_tree(fd, bytsp, idxsp, prefixtree, prefixcnt) + FILE *fd; + char_u **bytsp; + idx_T **idxsp; + int prefixtree; /* TRUE for the prefix tree */ + int prefixcnt; /* when "prefixtree" is TRUE: prefix count */ +{ + int len; + int idx; + char_u *bp; + idx_T *ip; + + /* The tree size was computed when writing the file, so that we can + * allocate it as one long block. <nodecount> */ + len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); + if (len < 0) + return SP_TRUNCERROR; + if (len > 0) + { + /* Allocate the byte array. */ + bp = lalloc((long_u)len, TRUE); + if (bp == NULL) + return SP_OTHERERROR; + *bytsp = bp; + + /* Allocate the index array. */ + ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE); + if (ip == NULL) + return SP_OTHERERROR; + *idxsp = ip; + + /* Recursively read the tree and store it in the array. */ + idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); + if (idx < 0) + return idx; + } + return 0; +} + +/* * Read one row of siblings from the spell file and store it in the byte array * "byts" and index array "idxs". Recursively read the children. * - * NOTE: The code here must match put_node(). + * NOTE: The code here must match put_node()! * - * Returns the index follosing the siblings. - * Returns -1 if the file is shorter than expected. - * Returns -2 if there is a format error. + * Returns the index (>= 0) following the siblings. + * Returns SP_TRUNCERROR if the file is shorter than expected. + * Returns SP_FORMERROR if there is a format error. */ static idx_T -read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) +read_tree_node(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) FILE *fd; char_u *byts; idx_T *idxs; @@ -3463,10 +3764,10 @@ read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) len = getc(fd); /* <siblingcount> */ if (len <= 0) - return -1; + return SP_TRUNCERROR; if (startidx + len >= maxidx) - return -2; + return SP_FORMERROR; byts[idx++] = len; /* Read the byte values, flag/region bytes and shared indexes. */ @@ -3474,7 +3775,7 @@ read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) { c = getc(fd); /* <byte> */ if (c < 0) - return -1; + return SP_TRUNCERROR; if (c <= BY_SPECIAL) { if (c == BY_NOFLAGS && !prefixtree) @@ -3500,7 +3801,7 @@ read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) n = (getc(fd) << 8) + getc(fd); /* <prefcondnr> */ if (n >= maxprefcondnr) - return -2; + return SP_FORMERROR; c |= (n << 8); } else /* c must be BY_FLAGS or BY_FLAGS2 */ @@ -3526,7 +3827,7 @@ read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) /* <nodeidx> */ n = (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); if (n < 0 || n >= maxidx) - return -2; + return SP_FORMERROR; idxs[idx] = n + SHARED_MASK; c = getc(fd); /* <xbyte> */ } @@ -3545,7 +3846,7 @@ read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) else { idxs[startidx + i] = idx; - idx = read_tree(fd, byts, idxs, maxidx, idx, + idx = read_tree_node(fd, byts, idxs, maxidx, idx, prefixtree, maxprefcondnr); if (idx < 0) break; @@ -3820,7 +4121,7 @@ did_set_spelllang(buf) /* language has REP items itself */ lp->lp_replang = lp->lp_slang; else - /* find first similar language that does sound folding */ + /* find first similar language that has REP items */ for (j = 0; j < ga.ga_len; ++j) { lp2 = LANGP_ENTRY(ga, j); @@ -4239,11 +4540,15 @@ struct wordnode_S siblings, in following siblings it is always one. */ char_u wn_byte; /* Byte for this node. NUL for word end */ - char_u wn_affixID; /* when "wn_byte" is NUL: supported/required - prefix ID or 0 */ - short_u wn_flags; /* when "wn_byte" is NUL: WF_ flags */ - short wn_region; /* when "wn_byte" is NUL: region mask; for - PREFIXTREE it's the prefcondnr */ + + /* Info for when "wn_byte" is NUL. + * In PREFIXTREE "wn_region" is used for the prefcondnr. + * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and + * "wn_region" the LSW of the wordnr. */ + char_u wn_affixID; /* supported/required prefix ID or 0 */ + short_u wn_flags; /* WF_ flags */ + short wn_region; /* region mask */ + #ifdef SPELL_PRINTTREE int wn_nr; /* sequence nr for printing */ #endif @@ -4266,6 +4571,8 @@ typedef struct spellinfo_S wordnode_T *si_prefroot; /* tree with postponed prefixes */ + long si_sugtree; /* creating the soundfolding trie */ + sblock_T *si_blocks; /* memory blocks used */ long si_blocks_cnt; /* memory blocks allocated */ long si_compress_cnt; /* words to add before lowering @@ -4276,7 +4583,7 @@ typedef struct spellinfo_S #ifdef SPELL_PRINTTREE int si_wordnode_nr; /* sequence nr for nodes */ #endif - + buf_T *si_spellbuf; /* buffer used to store soundfold word table */ int si_ascii; /* handling only ASCII words */ int si_add; /* addition file */ @@ -4292,11 +4599,15 @@ typedef struct spellinfo_S * si_region_count > 1) */ garray_T si_rep; /* list of fromto_T entries from REP lines */ + garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */ garray_T si_sal; /* list of fromto_T entries from SAL lines */ char_u *si_sofofr; /* SOFOFROM text */ char_u *si_sofoto; /* SOFOTO text */ + int si_nosugfile; /* NOSUGFILE item found */ int si_followup; /* soundsalike: ? */ int si_collapse; /* soundsalike: ? */ + hashtab_T si_commonwords; /* hashtable for common words */ + time_t si_sugtime; /* timestamp for .sug file */ int si_rem_accents; /* soundsalike: remove accents */ garray_T si_map; /* MAP info concatenated */ char_u *si_midword; /* MIDWORD chars or NULL */ @@ -4337,15 +4648,24 @@ static wordnode_T *wordtree_alloc __ARGS((spellinfo_T *spin)); static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix)); static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID)); static wordnode_T *get_wordnode __ARGS((spellinfo_T *spin)); -static void deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node)); +static int deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node)); static void free_wordnode __ARGS((spellinfo_T *spin, wordnode_T *n)); static void wordtree_compress __ARGS((spellinfo_T *spin, wordnode_T *root)); static int node_compress __ARGS((spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot)); static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2)); +static void put_sugtime __ARGS((spellinfo_T *spin, FILE *fd)); static int write_vim_spell __ARGS((spellinfo_T *spin, char_u *fname)); static void clear_node __ARGS((wordnode_T *node)); static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree)); +static void spell_make_sugfile __ARGS((spellinfo_T *spin, char_u *wfname)); +static int sug_filltree __ARGS((spellinfo_T *spin, slang_T *slang)); +static int sug_maketable __ARGS((spellinfo_T *spin)); +static int sug_filltable __ARGS((spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap)); +static int offset2bytes __ARGS((int nr, char_u *buf)); +static int bytes2offset __ARGS((char_u **pp)); +static void sug_write __ARGS((spellinfo_T *spin, char_u *fname)); static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word)); +static void spell_message __ARGS((spellinfo_T *spin, char_u *str)); static void init_spellfile __ARGS((void)); /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags, @@ -4475,7 +4795,7 @@ spell_read_aff(spin, fname) char_u rline[MAXLINELEN]; char_u *line; char_u *pc = NULL; -#define MAXITEMCNT 7 +#define MAXITEMCNT 30 char_u *(items[MAXITEMCNT]); int itemcnt; char_u *p; @@ -4488,6 +4808,7 @@ spell_read_aff(spin, fname) char_u *fol = NULL; char_u *upp = NULL; int do_rep; + int do_repsal; int do_sal; int do_map; int found_map = FALSE; @@ -4513,19 +4834,15 @@ spell_read_aff(spin, fname) return NULL; } - if (spin->si_verbose || p_verbose > 2) - { - if (!spin->si_verbose) - verbose_enter(); - smsg((char_u *)_("Reading affix file %s ..."), fname); - out_flush(); - if (!spin->si_verbose) - verbose_leave(); - } + vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname); + spell_message(spin, IObuff); /* Only do REP lines when not done in another .aff file already. */ do_rep = spin->si_rep.ga_len == 0; + /* Only do REPSAL lines when not done in another .aff file already. */ + do_repsal = spin->si_repsal.ga_len == 0; + /* Only do SAL lines when not done in another .aff file already. */ do_sal = spin->si_sal.ga_len == 0; @@ -4756,6 +5073,10 @@ spell_read_aff(spin, fname) { spin->si_nobreak = TRUE; } + else if (STRCMP(items[0], "NOSUGFILE") == 0 && itemcnt == 1) + { + spin->si_nosugfile = TRUE; + } else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) { aff->af_pfxpostpone = TRUE; @@ -5061,21 +5382,25 @@ spell_read_aff(spin, fname) { upp = vim_strsave(items[1]); } - else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2) + else if ((STRCMP(items[0], "REP") == 0 + || STRCMP(items[0], "REPSAL") == 0) + && itemcnt == 2) { - /* Ignore REP count */; + /* Ignore REP/REPSAL count */; if (!isdigit(*items[1])) - smsg((char_u *)_("Expected REP count in %s line %d"), + smsg((char_u *)_("Expected REP(SAL) count in %s line %d"), fname, lnum); } - else if (STRCMP(items[0], "REP") == 0 && itemcnt >= 3) + else if ((STRCMP(items[0], "REP") == 0 + || STRCMP(items[0], "REPSAL") == 0) + && itemcnt >= 3) { - /* REP item */ + /* REP/REPSAL item */ /* Myspell ignores extra arguments, we require it starts with * # to detect mistakes. */ if (itemcnt > 3 && items[3][0] != '#') smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]); - if (do_rep) + if (items[0][3] == 'S' ? do_repsal : do_rep) { /* Replace underscore with space (can't include a space * directly). */ @@ -5085,7 +5410,9 @@ spell_read_aff(spin, fname) for (p = items[2]; *p != NUL; mb_ptr_adv(p)) if (*p == '_') *p = ' '; - add_fromto(spin, &spin->si_rep, items[1], items[2]); + add_fromto(spin, items[0][3] == 'S' + ? &spin->si_repsal + : &spin->si_rep, items[1], items[2]); } } else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2) @@ -5156,6 +5483,22 @@ spell_read_aff(spin, fname) { sofoto = getroom_save(spin, items[1]); } + else if (STRCMP(items[0], "COMMON") == 0) + { + int i; + + for (i = 1; i < itemcnt; ++i) + { + if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, + items[i]))) + { + p = vim_strsave(items[i]); + if (p == NULL) + break; + hash_add(&spin->si_commonwords, p); + } + } + } else smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"), fname, lnum, items[0]); @@ -5665,15 +6008,9 @@ spell_read_dic(spin, fname, affile) /* The hashtable is only used to detect duplicated words. */ hash_init(&ht); - if (spin->si_verbose || p_verbose > 2) - { - if (!spin->si_verbose) - verbose_enter(); - smsg((char_u *)_("Reading dictionary file %s ..."), fname); - out_flush(); - if (!spin->si_verbose) - verbose_leave(); - } + vim_snprintf((char *)IObuff, IOSIZE, + _("Reading dictionary file %s ..."), fname); + spell_message(spin, IObuff); /* start with a message for the first line */ spin->si_msg_count = 999999; @@ -6122,15 +6459,8 @@ spell_read_wordfile(spin, fname) return FAIL; } - if (spin->si_verbose || p_verbose > 2) - { - if (!spin->si_verbose) - verbose_enter(); - smsg((char_u *)_("Reading word file %s ..."), fname); - out_flush(); - if (!spin->si_verbose) - verbose_leave(); - } + vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname); + spell_message(spin, IObuff); /* * Read all the lines in the file one by one. @@ -6294,15 +6624,13 @@ spell_read_wordfile(spin, fname) vim_free(pc); fclose(fd); - if (spin->si_ascii && non_ascii > 0 && (spin->si_verbose || p_verbose > 2)) + if (spin->si_ascii && non_ascii > 0) { - if (p_verbose > 2) - verbose_enter(); - smsg((char_u *)_("Ignored %d words with non-ASCII characters"), - non_ascii); - if (p_verbose > 2) - verbose_leave(); + vim_snprintf((char *)IObuff, IOSIZE, + _("Ignored %d words with non-ASCII characters"), non_ascii); + spell_message(spin, IObuff); } + return retval; } @@ -6442,7 +6770,7 @@ store_word(spin, word, flags, region, pfxlist, need_affix) /* * Add word "word" to a word tree at "root". - * When "flags" < 0 we are adding to the prefix tree where flags is used for + * When "flags" < 0 we are adding to the prefix tree where "flags" is used for * "rare" and "region" is the condition nr. * Returns FAIL when out of memory. */ @@ -6507,10 +6835,13 @@ tree_add_word(spin, word, root, flags, region, affixID) && (node->wn_byte < word[i] || (node->wn_byte == NUL && (flags < 0 - ? node->wn_affixID < affixID - : node->wn_flags < (flags & WN_MASK) + ? node->wn_affixID < (unsigned)affixID + : (node->wn_flags < (unsigned)(flags & WN_MASK) || (node->wn_flags == (flags & WN_MASK) - && node->wn_affixID < affixID))))) + && (spin->si_sugtree + ? (node->wn_region & 0xffff) < region + : node->wn_affixID + < (unsigned)affixID))))))) { prev = &node->wn_sibling; node = *prev; @@ -6519,6 +6850,7 @@ tree_add_word(spin, word, root, flags, region, affixID) || node->wn_byte != word[i] || (word[i] == NUL && (flags < 0 + || spin->si_sugtree || node->wn_flags != (flags & WN_MASK) || node->wn_affixID != affixID))) { @@ -6606,9 +6938,11 @@ tree_add_word(spin, word, root, flags, region, affixID) /* Compress both trees. Either they both have many nodes, which makes * compression useful, or one of them is small, which means - * compression goes fast. */ + * compression goes fast. But when filling the souldfold word tree + * there is no keep-case tree. */ wordtree_compress(spin, spin->si_foldroot); - wordtree_compress(spin, spin->si_keeproot); + if (affixID >= 0) + wordtree_compress(spin, spin->si_keeproot); } return OK; @@ -6684,21 +7018,28 @@ get_wordnode(spin) * Decrement the reference count on a node (which is the head of a list of * siblings). If the reference count becomes zero free the node and its * siblings. + * Returns the number of nodes actually freed. */ - static void + static int deref_wordnode(spin, node) spellinfo_T *spin; wordnode_T *node; { - wordnode_T *np; + wordnode_T *np; + int cnt = 0; if (--node->wn_refs == 0) + { for (np = node; np != NULL; np = np->wn_sibling) { if (np->wn_child != NULL) - deref_wordnode(spin, np->wn_child); + cnt += deref_wordnode(spin, np->wn_child); free_wordnode(spin, np); + ++cnt; } + ++cnt; /* length field */ + } + return cnt; } /* @@ -6739,18 +7080,16 @@ wordtree_compress(spin, root) if (spin->si_verbose || p_verbose > 2) #endif { - if (!spin->si_verbose) - verbose_enter(); if (tot > 1000000) perc = (tot - n) / (tot / 100); else if (tot == 0) perc = 0; else perc = (tot - n) * 100 / tot; - smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"), - n, tot, perc); - if (p_verbose > 2) - verbose_leave(); + vim_snprintf((char *)IObuff, IOSIZE, + _("Compressed %d of %d nodes; %d (%d%%) remaining"), + n, tot, tot - n, perc); + spell_message(spin, IObuff); } #ifdef SPELL_PRINTTREE spell_print_tree(root->wn_sibling); @@ -6784,24 +7123,24 @@ node_compress(spin, node, ht, tot) * Go through the list of siblings. Compress each child and then try * finding an identical child to replace it. * Note that with "child" we mean not just the node that is pointed to, - * but the whole list of siblings, of which the node is the first. + * but the whole list of siblings of which the child node is the first. */ for (np = node; np != NULL && !got_int; np = np->wn_sibling) { ++len; if ((child = np->wn_child) != NULL) { - /* Compress the child. This fills hashkey. */ + /* Compress the child first. This fills hashkey. */ compressed += node_compress(spin, child, ht, tot); /* Try to find an identical child. */ hash = hash_hash(child->wn_u1.hashkey); hi = hash_lookup(ht, child->wn_u1.hashkey, hash); - tp = NULL; if (!HASHITEM_EMPTY(hi)) { - /* There are children with an identical hash value. Now check - * if there is one that is really identical. */ + /* There are children we encountered before with a hash value + * identical to the current child. Now check if there is one + * that is really identical. */ for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) if (node_equal(child, tp)) { @@ -6809,9 +7148,8 @@ node_compress(spin, node, ht, tot) * current one. This means the current child and all * its siblings is unlinked from the tree. */ ++tp->wn_refs; - deref_wordnode(spin, child); + compressed += deref_wordnode(spin, child); np->wn_child = tp; - ++compressed; break; } if (tp == NULL) @@ -6830,7 +7168,7 @@ node_compress(spin, node, ht, tot) hash_add_item(ht, hi, child->wn_u1.hashkey, hash); } } - *tot += len; + *tot += len + 1; /* add one for the node that stores the length */ /* * Make a hash key for the node and its siblings, so that we can quickly @@ -6906,6 +7244,30 @@ put_bytes(fd, nr, len) putc((int)(nr >> (i * 8)), fd); } +/* + * Write spin->si_sugtime to file "fd". + */ + static void +put_sugtime(spin, fd) + spellinfo_T *spin; + FILE *fd; +{ + int c; + int i; + + /* time_t can be up to 8 bytes in size, more than long_u, thus we + * can't use put_bytes() here. */ + for (i = 7; i >= 0; --i) + if (i + 1 > sizeof(time_t)) + /* ">>" doesn't work well when shifting more bits than avail */ + putc(0, fd); + else + { + c = (unsigned)spin->si_sugtime >> (i * 8); + putc(c, fd); + } +} + static int #ifdef __BORLANDC__ _RTLENTRYF @@ -7056,29 +7418,37 @@ write_vim_spell(spin, fname) } /* SN_REP: <repcount> <rep> ... - * SN_SAL: <salflags> <salcount> <sal> ... */ - - /* Sort the REP items. */ - qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len, - sizeof(fromto_T), rep_compare); + * SN_SAL: <salflags> <salcount> <sal> ... + * SN_REPSAL: <repcount> <rep> ... */ /* round 1: SN_REP section - * round 2: SN_SAL section (unless SN_SOFO is used) */ - for (round = 1; round <= 2; ++round) + * round 2: SN_SAL section (unless SN_SOFO is used) + * round 3: SN_REPSAL section */ + for (round = 1; round <= 3; ++round) { if (round == 1) - { gap = &spin->si_rep; - putc(SN_REP, fd); /* <sectionID> */ - } - else + else if (round == 2) { + /* Don't write SN_SAL when using a SN_SOFO section */ if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) - /* using SN_SOFO section instead of SN_SAL */ - break; + continue; gap = &spin->si_sal; - putc(SN_SAL, fd); /* <sectionID> */ } + else + gap = &spin->si_repsal; + + /* Don't write the section if there are no items. */ + if (gap->ga_len == 0) + continue; + + /* Sort the REP/REPSAL items. */ + if (round != 2) + qsort(gap->ga_data, (size_t)gap->ga_len, + sizeof(fromto_T), rep_compare); + + i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); + putc(i, fd); /* <sectionID> */ /* This is for making suggestions, section is not required. */ putc(0, fd); /* <sectionflags> */ @@ -7143,6 +7513,36 @@ write_vim_spell(spin, fname) fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */ } + /* SN_WORDS: <word> ... + * This is for making suggestions, section is not required. */ + if (spin->si_commonwords.ht_used > 0) + { + putc(SN_WORDS, fd); /* <sectionID> */ + putc(0, fd); /* <sectionflags> */ + + /* round 1: count the bytes + * round 2: write the bytes */ + for (round = 1; round <= 2; ++round) + { + int todo; + int len = 0; + hashitem_T *hi; + + todo = spin->si_commonwords.ht_used; + for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi) + if (!HASHITEM_EMPTY(hi)) + { + l = STRLEN(hi->hi_key) + 1; + len += l; + if (round == 2) /* <word> */ + fwrite(hi->hi_key, (size_t)l, (size_t)1, fd); + --todo; + } + if (round == 1) + put_bytes(fd, (long_u)len, 4); /* <sectionlen> */ + } + } + /* SN_MAP: <mapstr> * This is for making suggestions, section is not required. */ if (spin->si_map.ga_len > 0) @@ -7155,6 +7555,24 @@ write_vim_spell(spin, fname) /* <mapstr> */ } + /* SN_SUGFILE: <timestamp> + * This is used to notify that a .sug file may be available and at the + * same time allows for checking that a .sug file that is found matches + * with this .spl file. That's because the word numbers must be exactly + * right. */ + if (!spin->si_nosugfile + && (spin->si_sal.ga_len > 0 + || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) + { + putc(SN_SUGFILE, fd); /* <sectionID> */ + putc(0, fd); /* <sectionflags> */ + put_bytes(fd, (long_u)8, 4); /* <sectionlen> */ + + /* Set si_sugtime and write it to the file. */ + spin->si_sugtime = time(NULL); + put_sugtime(spin, fd); /* <timestamp> */ + } + /* SN_COMPOUND: compound info. * We don't mark it required, when not supported all compound words will * be bad words. */ @@ -7267,9 +7685,9 @@ clear_node(node) * This first writes the list of possible bytes (siblings). Then for each * byte recursively write the children. * - * NOTE: The code here must match the code in read_tree(), since assumptions - * are made about the indexes (so that we don't have to write them in the - * file). + * NOTE: The code here must match the code in read_tree_node(), since + * assumptions are made about the indexes (so that we don't have to write them + * in the file). * * Returns the number of nodes used. */ @@ -7427,6 +7845,520 @@ ex_mkspell(eap) } /* + * Create the .sug file. + * Uses the soundfold info in "spin". + * Writes the file with the name "wfname", with ".spl" changed to ".sug". + */ + static void +spell_make_sugfile(spin, wfname) + spellinfo_T *spin; + char_u *wfname; +{ + char_u fname[MAXPATHL]; + int len; + slang_T *slang; + int free_slang = FALSE; + + /* + * Read back the .spl file that was written. This fills the required + * info for soundfolding. This also uses less memory than the + * pointer-linked version of the trie. And it avoids having two versions + * of the code for the soundfolding stuff. + * It might have been done already by spell_reload_one(). + */ + for (slang = first_lang; slang != NULL; slang = slang->sl_next) + if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME) + break; + if (slang == NULL) + { + spell_message(spin, (char_u *)_("Reading back spell file...")); + slang = spell_load_file(wfname, NULL, NULL, FALSE); + if (slang == NULL) + return; + /* don't want this language in the list */ + if (first_lang == slang) + first_lang = slang->sl_next; + free_slang = TRUE; + } + + /* + * Clear the info in "spin" that is used. + */ + spin->si_blocks = NULL; + spin->si_blocks_cnt = 0; + spin->si_compress_cnt = 0; /* will stay at 0 all the time*/ + spin->si_free_count = 0; + spin->si_first_free = NULL; + spin->si_foldwcount = 0; + + /* + * Go through the trie of good words, soundfold each word and add it to + * the soundfold trie. + */ + spell_message(spin, (char_u *)_("Performing soundfolding...")); + if (sug_filltree(spin, slang) == FAIL) + goto theend; + + /* + * Create the table which links each soundfold word with a list of the + * good words it may come from. Creates buffer "spin->si_spellbuf". + * This also removes the wordnr from the NUL byte entries to make + * compression possible. + */ + if (sug_maketable(spin) == FAIL) + goto theend; + + smsg((char_u *)_("Number of words after soundfolding: %ld"), + (long)spin->si_spellbuf->b_ml.ml_line_count); + + /* + * Compress the soundfold trie. + */ + spell_message(spin, (char_u *)_(msg_compressing)); + wordtree_compress(spin, spin->si_foldroot); + + /* + * Write the .sug file. + * Make the file name by changing ".spl" to ".sug". + */ + STRCPY(fname, wfname); + len = STRLEN(fname); + fname[len - 2] = 'u'; + fname[len - 1] = 'g'; + sug_write(spin, fname); + +theend: + if (free_slang) + slang_free(slang); + free_blocks(spin->si_blocks); + close_spellbuf(spin->si_spellbuf); +} + +/* + * Build the soundfold trie for language "slang". + */ + static int +sug_filltree(spin, slang) + spellinfo_T *spin; + slang_T *slang; +{ + char_u *byts; + idx_T *idxs; + int depth; + idx_T arridx[MAXWLEN]; + int curi[MAXWLEN]; + char_u tword[MAXWLEN]; + char_u tsalword[MAXWLEN]; + int c; + idx_T n; + unsigned words_done = 0; + int wordcount[MAXWLEN]; + + /* We use si_foldroot for the souldfolded trie. */ + spin->si_foldroot = wordtree_alloc(spin); + if (spin->si_foldroot == NULL) + return FAIL; + + /* let tree_add_word() know we're adding to the soundfolded tree */ + spin->si_sugtree = TRUE; + + /* + * Go through the whole case-folded tree, soundfold each word and put it + * in the trie. + */ + byts = slang->sl_fbyts; + idxs = slang->sl_fidxs; + + arridx[0] = 0; + curi[0] = 1; + wordcount[0] = 0; + + depth = 0; + while (depth >= 0 && !got_int) + { + if (curi[depth] > byts[arridx[depth]]) + { + /* Done all bytes at this node, go up one level. */ + idxs[arridx[depth]] = wordcount[depth]; + if (depth > 0) + wordcount[depth - 1] += wordcount[depth]; + + --depth; + line_breakcheck(); + } + else + { + + /* Do one more byte at this node. */ + n = arridx[depth] + curi[depth]; + ++curi[depth]; + + c = byts[n]; + if (c == 0) + { + /* Sound-fold the word. */ + tword[depth] = NUL; + spell_soundfold(slang, tword, TRUE, tsalword); + + /* We use the "flags" field for the MSB of the wordnr, + * "region" for the LSB of the wordnr. */ + if (tree_add_word(spin, tsalword, spin->si_foldroot, + words_done >> 16, words_done & 0xffff, + 0) == FAIL) + return FAIL; + + ++words_done; + ++wordcount[depth]; + + /* Reset the block count each time to avoid compression + * kicking in. */ + spin->si_blocks_cnt = 0; + + /* Skip over any other NUL bytes (same word with different + * flags). */ + while (byts[n + 1] == 0) + { + ++n; + ++curi[depth]; + } + } + else + { + /* Normal char, go one level deeper. */ + tword[depth++] = c; + arridx[depth] = idxs[n]; + curi[depth] = 1; + wordcount[depth] = 0; + } + } + } + + smsg((char_u *)_("Total number of words: %d"), words_done); + + return OK; +} + +/* + * Make the table that links each word in the soundfold trie to the words it + * can be produced from. + * This is not unlike lines in a file, thus use a memfile to be able to access + * the table efficiently. + * Returns FAIL when out of memory. + */ + static int +sug_maketable(spin) + spellinfo_T *spin; +{ + garray_T ga; + int res = OK; + + /* Allocate a buffer, open a memline for it and create the swap file + * (uses a temp file, not a .swp file). */ + spin->si_spellbuf = open_spellbuf(); + if (spin->si_spellbuf == NULL) + return FAIL; + + /* Use a buffer to store the line info, avoids allocating many small + * pieces of memory. */ + ga_init2(&ga, 1, 100); + + /* recursively go through the tree */ + if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) + res = FAIL; + + ga_clear(&ga); + return res; +} + +/* + * Fill the table for one node and its children. + * Returns the wordnr at the start of the node. + * Returns -1 when out of memory. + */ + static int +sug_filltable(spin, node, startwordnr, gap) + spellinfo_T *spin; + wordnode_T *node; + int startwordnr; + garray_T *gap; /* place to store line of numbers */ +{ + wordnode_T *p, *np; + int wordnr = startwordnr; + int nr; + int prev_nr; + + for (p = node; p != NULL; p = p->wn_sibling) + { + if (p->wn_byte == NUL) + { + gap->ga_len = 0; + prev_nr = 0; + for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) + { + if (ga_grow(gap, 10) == FAIL) + return -1; + + nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); + /* Compute the offset from the previous nr and store the + * offset in a way that it takes a minimum number of bytes. + * It's a bit like utf-8, but without the need to mark + * following bytes. */ + nr -= prev_nr; + prev_nr += nr; + gap->ga_len += offset2bytes(nr, + (char_u *)gap->ga_data + gap->ga_len); + } + + /* add the NUL byte */ + ((char_u *)gap->ga_data)[gap->ga_len++] = NUL; + + if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, + gap->ga_data, gap->ga_len, TRUE) == FAIL) + return -1; + ++wordnr; + + /* Remove extra NUL entries, we no longer need them. We don't + * bother freeing the nodes, the won't be reused anyway. */ + while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) + p->wn_sibling = p->wn_sibling->wn_sibling; + + /* Clear the flags on the remaining NUL node, so that compression + * works a lot better. */ + p->wn_flags = 0; + p->wn_region = 0; + } + else + { + wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); + if (wordnr == -1) + return -1; + } + } + return wordnr; +} + +/* + * Convert an offset into a minimal number of bytes. + * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL + * bytes. + */ + static int +offset2bytes(nr, buf) + int nr; + char_u *buf; +{ + int rem; + int b1, b2, b3, b4; + + /* Split the number in parts of base 255. We need to avoid NUL bytes. */ + b1 = nr % 255 + 1; + rem = nr / 255; + b2 = rem % 255 + 1; + rem = rem / 255; + b3 = rem % 255 + 1; + b4 = rem / 255 + 1; + + if (b4 > 1 || b3 > 0x1f) /* 4 bytes */ + { + buf[0] = 0xe0 + b4; + buf[1] = b3; + buf[2] = b2; + buf[3] = b1; + return 4; + } + if (b3 > 1 || b2 > 0x3f ) /* 3 bytes */ + { + buf[0] = 0xc0 + b3; + buf[1] = b2; + buf[2] = b1; + return 3; + } + if (b2 > 1 || b1 > 0x7f ) /* 2 bytes */ + { + buf[0] = 0x80 + b2; + buf[1] = b1; + return 2; + } + /* 1 byte */ + buf[0] = b1; + return 1; +} + +/* + * Opposite of offset2bytes(). + * "pp" points to the bytes and is advanced over it. + * Returns the offset. + */ + static int +bytes2offset(pp) + char_u **pp; +{ + char_u *p = *pp; + int nr; + int c; + + c = *p++; + if ((c & 0x80) == 0x00) /* 1 byte */ + { + nr = c - 1; + } + else if ((c & 0xc0) == 0x80) /* 2 bytes */ + { + nr = (c & 0x3f) - 1; + nr = nr * 255 + (*p++ - 1); + } + else if ((c & 0xe0) == 0xc0) /* 3 bytes */ + { + nr = (c & 0x1f) - 1; + nr = nr * 255 + (*p++ - 1); + nr = nr * 255 + (*p++ - 1); + } + else /* 4 bytes */ + { + nr = (c & 0x0f) - 1; + nr = nr * 255 + (*p++ - 1); + nr = nr * 255 + (*p++ - 1); + nr = nr * 255 + (*p++ - 1); + } + + *pp = p; + return nr; +} + +/* + * Write the .sug file in "fname". + */ + static void +sug_write(spin, fname) + spellinfo_T *spin; + char_u *fname; +{ + FILE *fd; + wordnode_T *tree; + int nodecount; + int wcount; + char_u *line; + linenr_T lnum; + int len; + + /* Create the file. Note that an existing file is silently overwritten! */ + fd = mch_fopen((char *)fname, "w"); + if (fd == NULL) + { + EMSG2(_(e_notopen), fname); + return; + } + + vim_snprintf((char *)IObuff, IOSIZE, + _("Writing suggestion file %s ..."), fname); + spell_message(spin, IObuff); + + /* + * <SUGHEADER>: <fileID> <versionnr> <timestamp> + */ + if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */ + { + EMSG(_(e_write)); + goto theend; + } + putc(VIMSUGVERSION, fd); /* <versionnr> */ + + /* Write si_sugtime to the file. */ + put_sugtime(spin, fd); /* <timestamp> */ + + /* + * <SUGWORDTREE> + */ + spin->si_memtot = 0; + tree = spin->si_foldroot->wn_sibling; + + /* Clear the index and wnode fields in the tree. */ + clear_node(tree); + + /* Count the number of nodes. Needed to be able to allocate the + * memory when reading the nodes. Also fills in index for shared + * nodes. */ + nodecount = put_node(NULL, tree, 0, 0, FALSE); + + /* number of nodes in 4 bytes */ + put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ + spin->si_memtot += nodecount + nodecount * sizeof(int); + + /* Write the nodes. */ + (void)put_node(fd, tree, 0, 0, FALSE); + + /* + * <SUGTABLE>: <sugwcount> <sugline> ... + */ + wcount = spin->si_spellbuf->b_ml.ml_line_count; + put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */ + + for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum) + { + /* <sugline>: <sugnr> ... NUL */ + line = ml_get_buf(spin->si_spellbuf, lnum, FALSE); + len = STRLEN(line) + 1; + if (fwrite(line, (size_t)len, (size_t)1, fd) == 0) + { + EMSG(_(e_write)); + goto theend; + } + spin->si_memtot += len; + } + + /* Write another byte to check for errors. */ + if (putc(0, fd) == EOF) + EMSG(_(e_write)); + + vim_snprintf((char *)IObuff, IOSIZE, + _("Estimated runtime memory use: %d bytes"), spin->si_memtot); + spell_message(spin, IObuff); + +theend: + /* close the file */ + fclose(fd); +} + +/* + * Open a spell buffer. This is a nameless buffer that is not in the buffer + * list and only contains text lines. Can use a swapfile to reduce memory + * use. + * Most other fields are invalid! Esp. watch out for string options being + * NULL and there is no undo info. + * Returns NULL when out of memory. + */ + static buf_T * +open_spellbuf() +{ + buf_T *buf; + + buf = (buf_T *)alloc_clear(sizeof(buf_T)); + if (buf != NULL) + { + buf->b_spell = TRUE; + buf->b_p_swf = TRUE; /* may create a swap file */ + ml_open(buf); + ml_open_file(buf); /* create swap file now */ + } + return buf; +} + +/* + * Close the buffer used for spell info. + */ + static void +close_spellbuf(buf) + buf_T *buf; +{ + if (buf != NULL) + { + ml_close(buf, TRUE); + vim_free(buf); + } +} + + +/* * Create a Vim spell file from one or more word lists. * "fnames[0]" is the output file name. * "fnames[fcount - 1]" is the last input file name. @@ -7458,9 +8390,11 @@ mkspell(fcount, fnames, ascii, overwrite, added_word) spin.si_followup = TRUE; spin.si_rem_accents = TRUE; ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20); + ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20); ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); ga_init2(&spin.si_map, (int)sizeof(char_u), 100); ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); + hash_init(&spin.si_commonwords); spin.si_newcompID = 127; /* start compound ID at first maximum */ /* default: fnames[0] is output file, following are input files */ @@ -7613,64 +8547,47 @@ mkspell(fcount, fnames, ascii, overwrite, added_word) if (spin.si_compflags != NULL && spin.si_nobreak) MSG(_("Warning: both compounding and NOBREAK specified")); - if (!error) + if (!error && !got_int) { /* * Combine tails in the tree. */ - if (spin.si_verbose || p_verbose > 2) - { - if (!spin.si_verbose) - verbose_enter(); - MSG(_(msg_compressing)); - out_flush(); - if (!spin.si_verbose) - verbose_leave(); - } + spell_message(&spin, (char_u *)_(msg_compressing)); wordtree_compress(&spin, spin.si_foldroot); wordtree_compress(&spin, spin.si_keeproot); wordtree_compress(&spin, spin.si_prefroot); } - if (!error) + if (!error && !got_int) { /* * Write the info in the spell file. */ - if (spin.si_verbose || p_verbose > 2) - { - if (!spin.si_verbose) - verbose_enter(); - smsg((char_u *)_("Writing spell file %s ..."), wfname); - out_flush(); - if (!spin.si_verbose) - verbose_leave(); - } + vim_snprintf((char *)IObuff, IOSIZE, + _("Writing spell file %s ..."), wfname); + spell_message(&spin, IObuff); error = write_vim_spell(&spin, wfname) == FAIL; - if (spin.si_verbose || p_verbose > 2) - { - if (!spin.si_verbose) - verbose_enter(); - MSG(_("Done!")); - smsg((char_u *)_("Estimated runtime memory use: %d bytes"), - spin.si_memtot); - out_flush(); - if (!spin.si_verbose) - verbose_leave(); - } + spell_message(&spin, (char_u *)_("Done!")); + vim_snprintf((char *)IObuff, IOSIZE, + _("Estimated runtime memory use: %d bytes"), spin.si_memtot); + spell_message(&spin, IObuff); - /* If the file is loaded need to reload it. */ + /* + * If the file is loaded need to reload it. + */ if (!error) spell_reload_one(wfname, added_word); } /* Free the allocated memory. */ ga_clear(&spin.si_rep); + ga_clear(&spin.si_repsal); ga_clear(&spin.si_sal); ga_clear(&spin.si_map); ga_clear(&spin.si_prefcond); + hash_clear_all(&spin.si_commonwords, 0); /* Free the .aff file structures. */ for (i = 0; i < incount; ++i) @@ -7679,9 +8596,36 @@ mkspell(fcount, fnames, ascii, overwrite, added_word) /* Free all the bits and pieces at once. */ free_blocks(spin.si_blocks); + + /* + * If there is soundfolding info and no NOSUGFILE item create the + * .sug file with the soundfolded word trie. + */ + if (spin.si_sugtime != 0 && !error && !got_int) + spell_make_sugfile(&spin, wfname); + } } +/* + * Display a message for spell file processing when 'verbose' is set or using + * ":mkspell". "str" can be IObuff. + */ + static void +spell_message(spin, str) + spellinfo_T *spin; + char_u *str; +{ + if (spin->si_verbose || p_verbose > 2) + { + if (!spin->si_verbose) + verbose_enter(); + MSG(str); + out_flush(); + if (!spin->si_verbose) + verbose_leave(); + } +} /* * ":[count]spellgood {word}" @@ -8334,12 +9278,13 @@ spell_casefold(str, len, buf, buflen) return OK; } +/* values for sps_flags */ #define SPS_BEST 1 #define SPS_FAST 2 #define SPS_DOUBLE 4 -static int sps_flags = SPS_BEST; -static int sps_limit = 9999; +static int sps_flags = SPS_BEST; /* flags from 'spellsuggest' */ +static int sps_limit = 9999; /* max nr of suggestions given */ /* * Check the 'spellsuggest' option. Return FAIL if it's wrong. @@ -8461,7 +9406,7 @@ spell_suggest(count) else limit = sps_limit; spell_find_suggest(line + curwin->w_cursor.col, &sug, limit, - TRUE, need_cap); + TRUE, need_cap, TRUE); if (sug.su_ga.ga_len == 0) MSG(_("Sorry, no suggestions")); @@ -8512,7 +9457,7 @@ spell_suggest(count) * the not replaced part. */ STRCPY(wcopy, stp->st_word); if (sug.su_badlen > stp->st_orglen) - vim_strncpy(wcopy + STRLEN(wcopy), + vim_strncpy(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen, sug.su_badlen - stp->st_orglen); vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1); @@ -8586,7 +9531,7 @@ spell_suggest(count) } /* Replace the word. */ - p = alloc(STRLEN(line) - stp->st_orglen + STRLEN(stp->st_word) + 1); + p = alloc(STRLEN(line) - stp->st_orglen + stp->st_wordlen + 1); if (p != NULL) { c = sug.su_badptr - line; @@ -8601,7 +9546,7 @@ spell_suggest(count) ResetRedobuff(); AppendToRedobuff((char_u *)"ciw"); AppendToRedobuffLit(p + c, - STRLEN(stp->st_word) + sug.su_badlen - stp->st_orglen); + stp->st_wordlen + sug.su_badlen - stp->st_orglen); AppendCharToRedobuff(ESC); } } @@ -8759,18 +9704,19 @@ ex_spellrepall(eap) * a list of allocated strings. */ void -spell_suggest_list(gap, word, maxcount, need_cap) +spell_suggest_list(gap, word, maxcount, need_cap, interactive) garray_T *gap; char_u *word; int maxcount; /* maximum nr of suggestions */ int need_cap; /* 'spellcapcheck' matched */ + int interactive; { suginfo_T sug; int i; suggest_T *stp; char_u *wcopy; - spell_find_suggest(word, &sug, maxcount, FALSE, need_cap); + spell_find_suggest(word, &sug, maxcount, FALSE, need_cap, interactive); /* Make room in "gap". */ ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1); @@ -8783,12 +9729,12 @@ spell_suggest_list(gap, word, maxcount, need_cap) /* The suggested word may replace only part of "word", add the not * replaced part. */ - wcopy = alloc(STRLEN(stp->st_word) + wcopy = alloc(stp->st_wordlen + STRLEN(sug.su_badptr + stp->st_orglen) + 1); if (wcopy == NULL) break; STRCPY(wcopy, stp->st_word); - STRCAT(wcopy, sug.su_badptr + stp->st_orglen); + STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen); ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy; } @@ -8803,12 +9749,13 @@ spell_suggest_list(gap, word, maxcount, need_cap) * This is based on the mechanisms of Aspell, but completely reimplemented. */ static void -spell_find_suggest(badptr, su, maxcount, banbadword, need_cap) +spell_find_suggest(badptr, su, maxcount, banbadword, need_cap, interactive) char_u *badptr; suginfo_T *su; int maxcount; int banbadword; /* don't include badword in suggestions */ int need_cap; /* word should start with capital */ + int interactive; { hlf_T attr = HLF_COUNT; char_u buf[MAXPATHL]; @@ -8833,7 +9780,7 @@ spell_find_suggest(badptr, su, maxcount, banbadword, need_cap) hash_init(&su->su_banned); su->su_badptr = badptr; - su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL); + su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE); su->su_maxcount = maxcount; su->su_maxscore = SCORE_MAXINIT; @@ -8876,7 +9823,7 @@ spell_find_suggest(badptr, su, maxcount, banbadword, need_cap) { make_case_word(su->su_badword, buf, WF_ONECAP); add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE, - 0, TRUE, su->su_sallang); + 0, TRUE, su->su_sallang, FALSE); } /* Ban the bad word itself. It may appear in another region. */ @@ -8912,7 +9859,7 @@ spell_find_suggest(badptr, su, maxcount, banbadword, need_cap) else { /* Use internal method. */ - spell_suggest_intern(su); + spell_suggest_intern(su, interactive); if (sps_flags & SPS_DOUBLE) do_combine = TRUE; } @@ -8952,14 +9899,15 @@ spell_suggest_expr(su, expr) { /* Get the word and the score from the items. */ score = get_spellword(li->li_tv.vval.v_list, &p); - if (score >= 0) - add_suggestion(su, &su->su_ga, p, - su->su_badlen, score, 0, TRUE, su->su_sallang); + if (score >= 0 && score <= su->su_maxscore) + add_suggestion(su, &su->su_ga, p, su->su_badlen, + score, 0, TRUE, su->su_sallang, FALSE); } list_unref(list); } - /* Sort the suggestions and truncate at "maxcount". */ + /* Remove bogus suggestions, sort and truncate at "maxcount". */ + check_suggestions(su, &su->su_ga); (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); } #endif @@ -9011,13 +9959,14 @@ spell_suggest_file(su, fname) } add_suggestion(su, &su->su_ga, p, su->su_badlen, - SCORE_FILE, 0, TRUE, su->su_sallang); + SCORE_FILE, 0, TRUE, su->su_sallang, FALSE); } } fclose(fd); - /* Sort the suggestions and truncate at "maxcount". */ + /* Remove bogus suggestions, sort and truncate at "maxcount". */ + check_suggestions(su, &su->su_ga); (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); } @@ -9025,10 +9974,16 @@ spell_suggest_file(su, fname) * Find suggestions for the internal method indicated by "sps_flags". */ static void -spell_suggest_intern(su) +spell_suggest_intern(su, interactive) suginfo_T *su; + int interactive; { /* + * Load the .sug file(s) that are available and not done yet. + */ + suggest_load_files(); + + /* * 1. Try special cases, such as repeating a word: "the the" -> "the". * * Set a maximum score to limit the combination of operations that is @@ -9048,22 +10003,50 @@ spell_suggest_intern(su) /* * 3. Try finding sound-a-like words. - * - * Only do this when we don't have a lot of suggestions yet, because it's - * very slow and often doesn't find new suggestions. */ - if ((sps_flags & SPS_DOUBLE) - || (!(sps_flags & SPS_FAST) - && su->su_ga.ga_len < SUG_CLEAN_COUNT(su))) + if ((sps_flags & SPS_FAST) == 0) { - /* Allow a higher score now. */ - su->su_maxscore = SCORE_MAXMAX; + if (sps_flags & SPS_BEST) + /* Adjust the word score for the suggestions found so far for how + * they sounds like. */ + rescore_suggestions(su); + + /* + * While going throught the soundfold tree "su_maxscore" is the score + * for the soundfold word, limits the changes that are being tried, + * and "su_sfmaxscore" the rescored score, which is set by + * cleanup_suggestions(). + * First find words with a small edit distance, because this is much + * faster and often already finds the top-N suggestions. If we didn't + * find many suggestions try again with a higher edit distance. + * "sl_sounddone" is used to avoid doing the same word twice. + */ + suggest_try_soundalike_prep(); + su->su_maxscore = SCORE_SFMAX1; + su->su_sfmaxscore = SCORE_MAXINIT * 3; suggest_try_soundalike(su); + if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) + { + /* We didn't find enough matches, try again, allowing more + * changes to the soundfold word. */ + su->su_maxscore = SCORE_SFMAX2; + suggest_try_soundalike(su); + if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) + { + /* Still didn't find enough matches, try again, allowing even + * more changes to the soundfold word. */ + su->su_maxscore = SCORE_SFMAX3; + suggest_try_soundalike(su); + } + } + su->su_maxscore = su->su_sfmaxscore; + suggest_try_soundalike_finish(); } - /* When CTRL-C was hit while searching do show the results. */ + /* When CTRL-C was hit while searching do show the results. Only clear + * got_int when using a command, not for spellsuggest(). */ ui_breakcheck(); - if (got_int) + if (interactive && got_int) { (void)vgetc(); got_int = FALSE; @@ -9075,12 +10058,220 @@ spell_suggest_intern(su) /* Adjust the word score for how it sounds like. */ rescore_suggestions(su); - /* Sort the suggestions and truncate at "maxcount". */ + /* Remove bogus suggestions, sort and truncate at "maxcount". */ + check_suggestions(su, &su->su_ga); (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); } } /* + * Load the .sug files for languages that have one and weren't loaded yet. + */ + static void +suggest_load_files() +{ + langp_T *lp; + int lpi; + slang_T *slang; + char_u *dotp; + FILE *fd; + char_u buf[MAXWLEN]; + int i; + time_t timestamp; + int wcount; + int wordnr; + garray_T ga; + int c; + + /* Do this for all languages that support sound folding. */ + for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) + { + lp = LANGP_ENTRY(curbuf->b_langp, lpi); + slang = lp->lp_slang; + if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) + { + /* Change ".spl" to ".sug" and open the file. When the file isn't + * found silently skip it. Do set "sl_sugloaded" so that we + * don't try again and again. */ + slang->sl_sugloaded = TRUE; + + dotp = vim_strrchr(slang->sl_fname, '.'); + if (dotp == NULL || fnamecmp(dotp, ".spl") != 0) + continue; + STRCPY(dotp, ".sug"); + fd = fopen((char *)slang->sl_fname, "r"); + if (fd == NULL) + goto nextone; + + /* + * <SUGHEADER>: <fileID> <versionnr> <timestamp> + */ + for (i = 0; i < VIMSUGMAGICL; ++i) + buf[i] = getc(fd); /* <fileID> */ + if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) + { + EMSG2(_("E999: This does not look like a .sug file: %s"), + slang->sl_fname); + goto nextone; + } + c = getc(fd); /* <versionnr> */ + if (c < VIMSUGVERSION) + { + EMSG2(_("E999: Old .sug file, needs to be updated: %s"), + slang->sl_fname); + goto nextone; + } + else if (c > VIMSUGVERSION) + { + EMSG2(_("E999: .sug file is for newer version of Vim: %s"), + slang->sl_fname); + goto nextone; + } + + /* Check the timestamp, it must be exactly the same as the one in + * the .spl file. Otherwise the word numbers won't match. */ + timestamp = 0; + for (i = 7; i >= 0; --i) /* <timestamp> */ + timestamp += getc(fd) << (i * 8); + if (timestamp != slang->sl_sugtime) + { + EMSG2(_("E999: .sug file doesn't match .spl file: %s"), + slang->sl_fname); + goto nextone; + } + + /* + * <SUGWORDTREE>: <wordtree> + * Read the trie with the soundfolded words. + */ + if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs, + FALSE, 0) != 0) + { +someerror: + EMSG2(_("E999: error while reading .sug file: %s"), + slang->sl_fname); + slang_clear_sug(slang); + goto nextone; + } + + /* + * <SUGTABLE>: <sugwcount> <sugline> ... + * + * Read the table with word numbers. We use a file buffer for + * this, because it's so much like a file with lines. Makes it + * possible to swap the info and save on memory use. + */ + slang->sl_sugbuf = open_spellbuf(); + if (slang->sl_sugbuf == NULL) + goto someerror; + /* <sugwcount> */ + wcount = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + + getc(fd); + if (wcount < 0) + goto someerror; + + /* Read all the wordnr lists into the buffer, one NUL terminated + * list per line. */ + ga_init2(&ga, 1, 100); + for (wordnr = 0; wordnr < wcount; ++wordnr) + { + ga.ga_len = 0; + for (;;) + { + c = getc(fd); /* <sugline> */ + if (c < 0 || ga_grow(&ga, 1) == FAIL) + goto someerror; + ((char_u *)ga.ga_data)[ga.ga_len++] = c; + if (c == NUL) + break; + } + if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, + ga.ga_data, ga.ga_len, TRUE) == FAIL) + goto someerror; + } + ga_clear(&ga); + + /* + * Need to put word counts in the word tries, so that we can find + * a word by its number. + */ + tree_count_words(slang->sl_fbyts, slang->sl_fidxs); + tree_count_words(slang->sl_sbyts, slang->sl_sidxs); + +nextone: + if (fd != NULL) + fclose(fd); + STRCPY(dotp, ".spl"); + } + } +} + + +/* + * Fill in the wordcount fields for a trie. + * Returns the total number of words. + */ + static void +tree_count_words(byts, idxs) + char_u *byts; + idx_T *idxs; +{ + int depth; + idx_T arridx[MAXWLEN]; + int curi[MAXWLEN]; + int c; + idx_T n; + int wordcount[MAXWLEN]; + + arridx[0] = 0; + curi[0] = 1; + wordcount[0] = 0; + depth = 0; + while (depth >= 0 && !got_int) + { + if (curi[depth] > byts[arridx[depth]]) + { + /* Done all bytes at this node, go up one level. */ + idxs[arridx[depth]] = wordcount[depth]; + if (depth > 0) + wordcount[depth - 1] += wordcount[depth]; + + --depth; + fast_breakcheck(); + } + else + { + /* Do one more byte at this node. */ + n = arridx[depth] + curi[depth]; + ++curi[depth]; + + c = byts[n]; + if (c == 0) + { + /* End of word, count it. */ + ++wordcount[depth]; + + /* Skip over any other NUL bytes (same word with different + * flags). */ + while (byts[n + 1] == 0) + { + ++n; + ++curi[depth]; + } + } + else + { + /* Normal char, go one level deeper to count the words. */ + ++depth; + arridx[depth] = idxs[n]; + curi[depth] = 1; + wordcount[depth] = 0; + } + } + } +} + +/* * Free the info put in "*su" by spell_find_suggest(). */ static void @@ -9098,7 +10289,7 @@ spell_find_cleanup(su) ga_clear(&su->su_sga); /* Free the banned words. */ - free_banned(su); + hash_clear_all(&su->su_banned, 0); } /* @@ -9224,31 +10415,87 @@ suggest_try_special(su) /* Give a soundalike score of 0, compute the score as if deleting one * character. */ add_suggestion(su, &su->su_ga, word, su->su_badlen, - RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang); + RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE); + } +} + +/* + * Try finding suggestions by adding/removing/swapping letters. + */ + static void +suggest_try_change(su) + suginfo_T *su; +{ + char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ + int n; + char_u *p; + int lpi; + langp_T *lp; + + /* We make a copy of the case-folded bad word, so that we can modify it + * to find matches (esp. REP items). Append some more text, changing + * chars after the bad word may help. */ + STRCPY(fword, su->su_fbadword); + n = STRLEN(fword); + p = su->su_badptr + su->su_badlen; + (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n); + + for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) + { + lp = LANGP_ENTRY(curbuf->b_langp, lpi); + + /* If reloading a spell file fails it's still in the list but + * everything has been cleared. */ + if (lp->lp_slang->sl_fbyts == NULL) + continue; + + /* Try it for this language. Will add possible suggestions. */ + suggest_trie_walk(su, lp, fword, FALSE); } } +/* Check the maximum score, if we go over it we won't try this change. */ +#define TRY_DEEPER(su, stack, depth, add) \ + (stack[depth].ts_score + (add) < su->su_maxscore) + /* * Try finding suggestions by adding/removing/swapping letters. * * This uses a state machine. At each node in the tree we try various - * operations. When trying if an operation work "depth" is increased and the + * operations. When trying if an operation works "depth" is increased and the * stack[] is used to store info. This allows combinations, thus insert one * character, replace one and delete another. The number of changes is - * limited by su->su_maxscore, checked in try_deeper(). + * limited by su->su_maxscore. * * After implementing this I noticed an article by Kemal Oflazer that * describes something similar: "Error-tolerant Finite State Recognition with * Applications to Morphological Analysis and Spelling Correction" (1996). * The implementation in the article is simplified and requires a stack of - * unknown depth. The implementation here only needs a stack depth of the - * length of the word. + * unknown depth. The implementation here only needs a stack depth equal to + * the length of the word. + * + * This is also used for the sound-folded word, "soundfold" is TRUE then. + * The mechanism is the same, but we find a match with a sound-folded word + * that comes from one or more original words. Each of these words may be + * added, this is done by add_sound_suggest(). + * Don't use: + * the prefix tree or the keep-case tree + * "su->su_badlen" + * anything to do with upper and lower case + * anything to do with word or non-word characters ("spell_iswordp()") + * banned words + * word flags (rare, region, compounding) + * word splitting for now + * "similar_chars()" + * use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep" */ static void -suggest_try_change(su) +suggest_trie_walk(su, lp, fword, soundfold) suginfo_T *su; + langp_T *lp; + char_u *fword; + int soundfold; { - char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ char_u tword[MAXWLEN]; /* good word collected so far */ trystate_T stack[MAXWLEN]; char_u preword[MAXWLEN * 3]; /* word found with proper case; @@ -9259,12 +10506,12 @@ suggest_try_change(su) char_u compflags[MAXWLEN]; /* compound flags, one for each word */ trystate_T *sp; int newscore; - langp_T *lp; + int score; char_u *byts, *fbyts, *pbyts; idx_T *idxs, *fidxs, *pidxs; int depth; int c, c2, c3; - int n; + int n = 0; int flags; garray_T *gap; idx_T arridx; @@ -9273,41 +10520,39 @@ suggest_try_change(su) fromto_T *ftp; int fl = 0, tl; int repextra = 0; /* extra bytes in fword[] from REP item */ - slang_T *slang; + slang_T *slang = lp->lp_slang; int fword_ends; - int lpi; - int maysplit; int goodword_ends; +#ifdef DEBUG_TRIEWALK + /* Stores the name of the change made at each level. */ + char_u changename[MAXWLEN][80]; +#endif + int breakcheckcount = 1000; + int compound_ok; - /* We make a copy of the case-folded bad word, so that we can modify it - * to find matches (esp. REP items). Append some more text, changing - * chars after the bad word may help. */ - STRCPY(fword, su->su_fbadword); - n = STRLEN(fword); - p = su->su_badptr + su->su_badlen; - (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n); + /* + * Go through the whole case-fold tree, try changes at each node. + * "tword[]" contains the word collected from nodes in the tree. + * "fword[]" the word we are trying to match with (initially the bad + * word). + */ + depth = 0; + sp = &stack[0]; + vim_memset(sp, 0, sizeof(trystate_T)); + sp->ts_curi = 1; - for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) + if (soundfold) + { + /* Going through the soundfold tree. */ + byts = fbyts = slang->sl_sbyts; + idxs = fidxs = slang->sl_sidxs; + pbyts = NULL; + pidxs = NULL; + sp->ts_prefixdepth = PFD_NOPREFIX; + sp->ts_state = STATE_START; + } + else { - lp = LANGP_ENTRY(curbuf->b_langp, lpi); - slang = lp->lp_slang; - - /* If reloading a spell file fails it's still in the list but - * everything has been cleared. */ - if (slang->sl_fbyts == NULL) - continue; - - /* - * Go through the whole case-fold tree, try changes at each node. - * "tword[]" contains the word collected from nodes in the tree. - * "fword[]" the word we are trying to match with (initially the bad - * word). - */ - depth = 0; - sp = &stack[0]; - vim_memset(sp, 0, sizeof(trystate_T)); - sp->ts_curi = 1; - /* * When there are postponed prefixes we need to use these first. At * the end of the prefix we continue in the case-fold tree. @@ -9330,232 +10575,243 @@ suggest_try_change(su) sp->ts_prefixdepth = PFD_NOPREFIX; sp->ts_state = STATE_START; } + } - /* - * Loop to find all suggestions. At each round we either: - * - For the current state try one operation, advance "ts_curi", - * increase "depth". - * - When a state is done go to the next, set "ts_state". - * - When all states are tried decrease "depth". - */ - while (depth >= 0 && !got_int) + /* + * Loop to find all suggestions. At each round we either: + * - For the current state try one operation, advance "ts_curi", + * increase "depth". + * - When a state is done go to the next, set "ts_state". + * - When all states are tried decrease "depth". + */ + while (depth >= 0 && !got_int) + { + sp = &stack[depth]; + switch (sp->ts_state) { - sp = &stack[depth]; - switch (sp->ts_state) + case STATE_START: + case STATE_NOPREFIX: + /* + * Start of node: Deal with NUL bytes, which means + * tword[] may end here. + */ + arridx = sp->ts_arridx; /* current node in the tree */ + len = byts[arridx]; /* bytes in this node */ + arridx += sp->ts_curi; /* index of current byte */ + + if (sp->ts_prefixdepth == PFD_PREFIXTREE) { - case STATE_START: - case STATE_NOPREFIX: - /* - * Start of node: Deal with NUL bytes, which means - * tword[] may end here. - */ - arridx = sp->ts_arridx; /* current node in the tree */ - len = byts[arridx]; /* bytes in this node */ - arridx += sp->ts_curi; /* index of current byte */ + /* Skip over the NUL bytes, we use them later. */ + for (n = 0; n < len && byts[arridx + n] == 0; ++n) + ; + sp->ts_curi += n; + + /* Always past NUL bytes now. */ + n = (int)sp->ts_state; + sp->ts_state = STATE_ENDNUL; + sp->ts_save_badflags = su->su_badflags; - if (sp->ts_prefixdepth == PFD_PREFIXTREE) + /* At end of a prefix or at start of prefixtree: check for + * following word. */ + if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) { - /* Skip over the NUL bytes, we use them later. */ - for (n = 0; n < len && byts[arridx + n] == 0; ++n) - ; - sp->ts_curi += n; - - /* Always past NUL bytes now. */ - n = (int)sp->ts_state; - sp->ts_state = STATE_ENDNUL; - sp->ts_save_badflags = su->su_badflags; - - /* At end of a prefix or at start of prefixtree: check for - * following word. */ - if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) - { - /* Set su->su_badflags to the caps type at this - * position. Use the caps type until here for the - * prefix itself. */ + /* Set su->su_badflags to the caps type at this position. + * Use the caps type until here for the prefix itself. */ #ifdef FEAT_MBYTE - if (has_mbyte) - n = nofold_len(fword, sp->ts_fidx, su->su_badptr); - else + if (has_mbyte) + n = nofold_len(fword, sp->ts_fidx, su->su_badptr); + else #endif - n = sp->ts_fidx; - flags = badword_captype(su->su_badptr, - su->su_badptr + n); - su->su_badflags = badword_captype(su->su_badptr + n, + n = sp->ts_fidx; + flags = badword_captype(su->su_badptr, su->su_badptr + n); + su->su_badflags = badword_captype(su->su_badptr + n, su->su_badptr + su->su_badlen); - ++depth; - stack[depth] = stack[depth - 1]; - sp = &stack[depth]; - sp->ts_prefixdepth = depth - 1; - byts = fbyts; - idxs = fidxs; - sp->ts_state = STATE_START; - sp->ts_curi = 1; /* start just after length byte */ - sp->ts_arridx = 0; - - /* Move the prefix to preword[] with the right case - * and make find_keepcap_word() works. */ - tword[sp->ts_twordlen] = NUL; - make_case_word(tword + sp->ts_splitoff, - preword + sp->ts_prewordlen, - flags); - sp->ts_prewordlen = STRLEN(preword); - sp->ts_splitoff = sp->ts_twordlen; - } - break; +#ifdef DEBUG_TRIEWALK + sprintf(changename[depth], "prefix"); +#endif + go_deeper(stack, depth, 0); + ++depth; + sp = &stack[depth]; + sp->ts_prefixdepth = depth - 1; + byts = fbyts; + idxs = fidxs; + sp->ts_arridx = 0; + + /* Move the prefix to preword[] with the right case + * and make find_keepcap_word() works. */ + tword[sp->ts_twordlen] = NUL; + make_case_word(tword + sp->ts_splitoff, + preword + sp->ts_prewordlen, flags); + sp->ts_prewordlen = STRLEN(preword); + sp->ts_splitoff = sp->ts_twordlen; } + break; + } - if (sp->ts_curi > len || byts[arridx] != 0) - { - /* Past bytes in node and/or past NUL bytes. */ - sp->ts_state = STATE_ENDNUL; - sp->ts_save_badflags = su->su_badflags; - break; - } + if (sp->ts_curi > len || byts[arridx] != 0) + { + /* Past bytes in node and/or past NUL bytes. */ + sp->ts_state = STATE_ENDNUL; + sp->ts_save_badflags = su->su_badflags; + break; + } - /* - * End of word in tree. - */ - ++sp->ts_curi; /* eat one NUL byte */ + /* + * End of word in tree. + */ + ++sp->ts_curi; /* eat one NUL byte */ - flags = (int)idxs[arridx]; - fword_ends = (fword[sp->ts_fidx] == NUL - || !spell_iswordp(fword + sp->ts_fidx, curbuf)); - tword[sp->ts_twordlen] = NUL; + flags = (int)idxs[arridx]; + fword_ends = (fword[sp->ts_fidx] == NUL + || (soundfold + ? vim_iswhite(fword[sp->ts_fidx]) + : !spell_iswordp(fword + sp->ts_fidx, curbuf))); + tword[sp->ts_twordlen] = NUL; - if (sp->ts_prefixdepth <= PFD_NOTSPECIAL + if (sp->ts_prefixdepth <= PFD_NOTSPECIAL && (sp->ts_flags & TSF_PREFIXOK) == 0) + { + /* There was a prefix before the word. Check that the prefix + * can be used with this word. */ + /* Count the length of the NULs in the prefix. If there are + * none this must be the first try without a prefix. */ + n = stack[sp->ts_prefixdepth].ts_arridx; + len = pbyts[n++]; + for (c = 0; c < len && pbyts[n + c] == 0; ++c) + ; + if (c > 0) { - /* There was a prefix before the word. Check that the - * prefix can be used with this word. */ - /* Count the length of the NULs in the prefix. If there - * are none this must be the first try without a prefix. - */ - n = stack[sp->ts_prefixdepth].ts_arridx; - len = pbyts[n++]; - for (c = 0; c < len && pbyts[n + c] == 0; ++c) - ; - if (c > 0) - { - c = valid_word_prefix(c, n, flags, + c = valid_word_prefix(c, n, flags, tword + sp->ts_splitoff, slang, FALSE); - if (c == 0) - break; + if (c == 0) + break; - /* Use the WF_RARE flag for a rare prefix. */ - if (c & WF_RAREPFX) - flags |= WF_RARE; + /* Use the WF_RARE flag for a rare prefix. */ + if (c & WF_RAREPFX) + flags |= WF_RARE; - /* Tricky: when checking for both prefix and - * compounding we run into the prefix flag first. - * Remember that it's OK, so that we accept the prefix - * when arriving at a compound flag. */ - sp->ts_flags |= TSF_PREFIXOK; - } + /* Tricky: when checking for both prefix and compounding + * we run into the prefix flag first. + * Remember that it's OK, so that we accept the prefix + * when arriving at a compound flag. */ + sp->ts_flags |= TSF_PREFIXOK; } + } - /* Check NEEDCOMPOUND: can't use word without compounding. Do - * try appending another compound word below. */ - if (sp->ts_complen == sp->ts_compsplit && fword_ends + /* Check NEEDCOMPOUND: can't use word without compounding. Do try + * appending another compound word below. */ + if (sp->ts_complen == sp->ts_compsplit && fword_ends && (flags & WF_NEEDCOMP)) - goodword_ends = FALSE; - else - goodword_ends = TRUE; + goodword_ends = FALSE; + else + goodword_ends = TRUE; - if (sp->ts_complen > sp->ts_compsplit) + p = NULL; + compound_ok = TRUE; + if (sp->ts_complen > sp->ts_compsplit) + { + if (slang->sl_nobreak) { - if (slang->sl_nobreak) - { - /* There was a word before this word. When there was - * no change in this word (it was correct) add the - * first word as a suggestion. If this word was - * corrected too, we need to check if a correct word - * follows. */ - if (sp->ts_fidx - sp->ts_splitfidx + /* There was a word before this word. When there was no + * change in this word (it was correct) add the first word + * as a suggestion. If this word was corrected too, we + * need to check if a correct word follows. */ + if (sp->ts_fidx - sp->ts_splitfidx == sp->ts_twordlen - sp->ts_splitoff - && STRNCMP(fword + sp->ts_splitfidx, - tword + sp->ts_splitoff, + && STRNCMP(fword + sp->ts_splitfidx, + tword + sp->ts_splitoff, sp->ts_fidx - sp->ts_splitfidx) == 0) - { - preword[sp->ts_prewordlen] = NUL; + { + preword[sp->ts_prewordlen] = NUL; + newscore = score_wordcount_adj(slang, sp->ts_score, + preword + sp->ts_prewordlen, + sp->ts_prewordlen > 0); + /* Add the suggestion if the score isn't too bad. */ + if (newscore <= su->su_maxscore) add_suggestion(su, &su->su_ga, preword, sp->ts_splitfidx - repextra, - sp->ts_score, 0, FALSE, - lp->lp_sallang); - break; - } + newscore, 0, FALSE, + lp->lp_sallang, FALSE); + break; } - else - { - /* There was a compound word before this word. If - * this word does not support compounding then give up - * (splitting is tried for the word without compound - * flag). */ - if (((unsigned)flags >> 24) == 0 - || sp->ts_twordlen - sp->ts_splitoff + } + else + { + /* There was a compound word before this word. If this + * word does not support compounding then give up + * (splitting is tried for the word without compound + * flag). */ + if (((unsigned)flags >> 24) == 0 + || sp->ts_twordlen - sp->ts_splitoff < slang->sl_compminlen) - break; + break; #ifdef FEAT_MBYTE - /* For multi-byte chars check character length against - * COMPOUNDMIN. */ - if (has_mbyte - && slang->sl_compminlen > 0 - && mb_charlen(tword + sp->ts_splitoff) + /* For multi-byte chars check character length against + * COMPOUNDMIN. */ + if (has_mbyte + && slang->sl_compminlen > 0 + && mb_charlen(tword + sp->ts_splitoff) < slang->sl_compminlen) - break; + break; #endif - compflags[sp->ts_complen] = ((unsigned)flags >> 24); - compflags[sp->ts_complen + 1] = NUL; - vim_strncpy(preword + sp->ts_prewordlen, - tword + sp->ts_splitoff, - sp->ts_twordlen - sp->ts_splitoff); - p = preword; - while (*skiptowhite(p) != NUL) - p = skipwhite(skiptowhite(p)); - if (fword_ends && !can_compound(slang, p, + compflags[sp->ts_complen] = ((unsigned)flags >> 24); + compflags[sp->ts_complen + 1] = NUL; + vim_strncpy(preword + sp->ts_prewordlen, + tword + sp->ts_splitoff, + sp->ts_twordlen - sp->ts_splitoff); + p = preword; + while (*skiptowhite(p) != NUL) + p = skipwhite(skiptowhite(p)); + if (fword_ends && !can_compound(slang, p, compflags + sp->ts_compsplit)) - break; + /* Compound is not allowed. But it may still be + * possible if we add another (short) word. */ + compound_ok = FALSE; - /* Get pointer to last char of previous word. */ - p = preword + sp->ts_prewordlen; - mb_ptr_back(preword, p); - } + /* Get pointer to last char of previous word. */ + p = preword + sp->ts_prewordlen; + mb_ptr_back(preword, p); } - else - p = NULL; + } - /* - * Form the word with proper case in preword. - * If there is a word from a previous split, append. - */ - if (flags & WF_KEEPCAP) - /* Must find the word in the keep-case tree. */ - find_keepcap_word(slang, tword + sp->ts_splitoff, + /* + * Form the word with proper case in preword. + * If there is a word from a previous split, append. + * For the soundfold tree don't change the case, simply append. + */ + if (soundfold) + STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff); + else if (flags & WF_KEEPCAP) + /* Must find the word in the keep-case tree. */ + find_keepcap_word(slang, tword + sp->ts_splitoff, preword + sp->ts_prewordlen); - else - { - /* Include badflags: if the badword is onecap or allcap - * use that for the goodword too. But if the badword is - * allcap and it's only one char long use onecap. */ - c = su->su_badflags; - if ((c & WF_ALLCAP) + else + { + /* Include badflags: If the badword is onecap or allcap + * use that for the goodword too. But if the badword is + * allcap and it's only one char long use onecap. */ + c = su->su_badflags; + if ((c & WF_ALLCAP) #ifdef FEAT_MBYTE - && su->su_badlen == (*mb_ptr2len)(su->su_badptr) + && su->su_badlen == (*mb_ptr2len)(su->su_badptr) #else - && su->su_badlen == 1 + && su->su_badlen == 1 #endif - ) - c = WF_ONECAP; - c |= flags; - - /* When appending a compound word after a word character - * don't use Onecap. */ - if (p != NULL && spell_iswordp_nmw(p)) - c &= ~WF_ONECAP; - make_case_word(tword + sp->ts_splitoff, + ) + c = WF_ONECAP; + c |= flags; + + /* When appending a compound word after a word character don't + * use Onecap. */ + if (p != NULL && spell_iswordp_nmw(p)) + c &= ~WF_ONECAP; + make_case_word(tword + sp->ts_splitoff, preword + sp->ts_prewordlen, c); - } + } + if (!soundfold) + { /* Don't use a banned word. It may appear again as a good * word, thus remember it. */ if (flags & WF_BANNED) @@ -9564,16 +10820,19 @@ suggest_try_change(su) break; } if ((sp->ts_complen == sp->ts_compsplit - && was_banned(su, preword + sp->ts_prewordlen)) - || was_banned(su, preword)) + && WAS_BANNED(su, preword + sp->ts_prewordlen)) + || WAS_BANNED(su, preword)) { if (slang->sl_compprog == NULL) break; /* the word so far was banned but we may try compounding */ goodword_ends = FALSE; } + } - newscore = 0; + newscore = 0; + if (!soundfold) /* soundfold words don't have flags */ + { if ((flags & WF_REGION) && (((unsigned)flags >> 16) & lp->lp_region) == 0) newscore += SCORE_REGION; @@ -9583,113 +10842,141 @@ suggest_try_change(su) if (!spell_valid_case(su->su_badflags, captype(preword + sp->ts_prewordlen, NULL))) newscore += SCORE_ICASE; + } - maysplit = TRUE; - if (fword_ends && goodword_ends - && sp->ts_fidx >= sp->ts_fidxtry) + /* TODO: how about splitting in the soundfold tree? */ + if (fword_ends + && goodword_ends + && sp->ts_fidx >= sp->ts_fidxtry + && compound_ok) + { + /* The badword also ends: add suggestions. */ +#ifdef DEBUG_TRIEWALK + if (soundfold && STRCMP(preword, "smwrd") == 0) { - /* The badword also ends: add suggestions. Give a penalty - * when changing non-word char to word char, e.g., "thes," - * -> "these". */ - p = fword + sp->ts_fidx; -#ifdef FEAT_MBYTE - if (has_mbyte) - mb_ptr_back(fword, p); - else + int j; + + /* print the stack of changes that brought us here */ + smsg("------ %s -------", fword); + for (j = 0; j < depth; ++j) + smsg("%s", changename[j]); + } #endif - --p; + if (soundfold) + { + /* For soundfolded words we need to find the original + * words, the edit distrance and then add them. */ + add_sound_suggest(su, preword, sp->ts_score, lp); + } + else + { + /* Give a penalty when changing non-word char to word + * char, e.g., "thes," -> "these". */ + p = fword + sp->ts_fidx; + mb_ptr_back(fword, p); if (!spell_iswordp(p, curbuf)) { p = preword + STRLEN(preword); -#ifdef FEAT_MBYTE - if (has_mbyte) - mb_ptr_back(preword, p); - else -#endif - --p; + mb_ptr_back(preword, p); if (spell_iswordp(p, curbuf)) newscore += SCORE_NONWORD; } - add_suggestion(su, &su->su_ga, preword, - sp->ts_fidx - repextra, - sp->ts_score + newscore, 0, FALSE, - lp->lp_sallang); - - /* When the bad word doesn't end yet, try changing the - * next word. E.g., find suggestions for "the the" where - * the second "the" is different. It's done like a split. - */ - if (sp->ts_fidx - repextra >= su->su_badlen) - maysplit = FALSE; + /* Give a bonus to words seen before. */ + score = score_wordcount_adj(slang, + sp->ts_score + newscore, + preword + sp->ts_prewordlen, + sp->ts_prewordlen > 0); + + /* Add the suggestion if the score isn't too bad. */ + if (score <= su->su_maxscore) + add_suggestion(su, &su->su_ga, preword, + sp->ts_fidx - repextra, + score, 0, FALSE, lp->lp_sallang, FALSE); } + } - if (maysplit - && (sp->ts_fidx >= sp->ts_fidxtry || fword_ends) + /* + * Try word split and/or compounding. + */ + if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends) #ifdef FEAT_MBYTE - /* Don't split halfway a character. */ - && (!has_mbyte || sp->ts_tcharlen == 0) -#endif - ) - { - int try_compound; - - /* Get here in two situations: - * 1. The word in the tree ends but the badword continues: - * If the word allows compounding try that. Otherwise - * try a split by inserting a space. For both check - * that a valid words starts at fword[sp->ts_fidx]. - * For NOBREAK do like compounding to be able to check - * if the next word is valid. - * 2. The badword does end, but it was due to a change - * (e.g., a swap). No need to split, but do check that - * the following word is valid. - */ - try_compound = FALSE; - if ((!fword_ends || !goodword_ends) - && slang->sl_compprog != NULL - && ((unsigned)flags >> 24) != 0 - && sp->ts_twordlen - sp->ts_splitoff - >= slang->sl_compminlen + /* Don't split halfway a character. */ + && (!has_mbyte || sp->ts_tcharlen == 0) +#endif + ) + { + int try_compound; + int try_split; + + /* If past the end of the bad word don't try a split. + * Otherwise try changing the next word. E.g., find + * suggestions for "the the" where the second "the" is + * different. It's done like a split. + * TODO: word split for soundfold words */ + try_split = (sp->ts_fidx - repextra < su->su_badlen) + && !soundfold; + + /* Get here in several situations: + * 1. The word in the tree ends: + * If the word allows compounding try that. Otherwise try + * a split by inserting a space. For both check that a + * valid words starts at fword[sp->ts_fidx]. + * For NOBREAK do like compounding to be able to check if + * the next word is valid. + * 2. The badword does end, but it was due to a change (e.g., + * a swap). No need to split, but do check that the + * following word is valid. + * 3. The badword and the word in the tree end. It may still + * be possible to compound another (short) word. + */ + try_compound = FALSE; + if (!soundfold + && slang->sl_compprog != NULL + && ((unsigned)flags >> 24) != 0 + && sp->ts_twordlen - sp->ts_splitoff + >= slang->sl_compminlen #ifdef FEAT_MBYTE - && (!has_mbyte - || slang->sl_compminlen == 0 - || mb_charlen(tword + sp->ts_splitoff) + && (!has_mbyte + || slang->sl_compminlen == 0 + || mb_charlen(tword + sp->ts_splitoff) >= slang->sl_compminlen) #endif - && (slang->sl_compsylmax < MAXWLEN - || sp->ts_complen + 1 - sp->ts_compsplit - < slang->sl_compmax) - && (byte_in_str(sp->ts_complen == sp->ts_compsplit - ? slang->sl_compstartflags - : slang->sl_compallflags, + && (slang->sl_compsylmax < MAXWLEN + || sp->ts_complen + 1 - sp->ts_compsplit + < slang->sl_compmax) + && (byte_in_str(sp->ts_complen == sp->ts_compsplit + ? slang->sl_compstartflags + : slang->sl_compallflags, ((unsigned)flags >> 24)))) - { - try_compound = TRUE; - compflags[sp->ts_complen] = ((unsigned)flags >> 24); - compflags[sp->ts_complen + 1] = NUL; - } + { + try_compound = TRUE; + compflags[sp->ts_complen] = ((unsigned)flags >> 24); + compflags[sp->ts_complen + 1] = NUL; + } - /* For NOBREAK we never try splitting, it won't make any - * word valid. */ - if (slang->sl_nobreak) - try_compound = TRUE; - - /* If we could add a compound word, and it's also possible - * to split at this point, do the split first and set - * TSF_DIDSPLIT to avoid doing it again. */ - else if (!fword_ends - && try_compound - && (sp->ts_flags & TSF_DIDSPLIT) == 0) - { - try_compound = FALSE; - sp->ts_flags |= TSF_DIDSPLIT; - --sp->ts_curi; /* do the same NUL again */ - compflags[sp->ts_complen] = NUL; - } - else - sp->ts_flags &= ~TSF_DIDSPLIT; + /* For NOBREAK we never try splitting, it won't make any word + * valid. */ + if (slang->sl_nobreak) + try_compound = TRUE; + + /* If we could add a compound word, and it's also possible to + * split at this point, do the split first and set + * TSF_DIDSPLIT to avoid doing it again. */ + else if (!fword_ends + && try_compound + && (sp->ts_flags & TSF_DIDSPLIT) == 0) + { + try_compound = FALSE; + sp->ts_flags |= TSF_DIDSPLIT; + --sp->ts_curi; /* do the same NUL again */ + compflags[sp->ts_complen] = NUL; + } + else + sp->ts_flags &= ~TSF_DIDSPLIT; + if (try_split || try_compound) + { if (!try_compound && (!fword_ends || !goodword_ends)) { /* If we're going to split need to check that the @@ -9707,10 +10994,23 @@ suggest_try_change(su) compflags + sp->ts_compsplit)) break; newscore += SCORE_SPLIT; + + /* Give a bonus to words seen before. */ + newscore = score_wordcount_adj(slang, newscore, + preword + sp->ts_prewordlen, TRUE); } - if (try_deeper(su, stack, depth, newscore)) + if (TRY_DEEPER(su, stack, depth, newscore)) { + go_deeper(stack, depth, newscore); +#ifdef DEBUG_TRIEWALK + if (!try_compound && !fword_ends) + sprintf(changename[depth], "%.*s-%s: split", + sp->ts_twordlen, tword, fword + sp->ts_fidx); + else + sprintf(changename[depth], "%.*s-%s: compound", + sp->ts_twordlen, tword, fword + sp->ts_fidx); +#endif /* Save things to be restored at STATE_SPLITUNDO. */ sp->ts_save_badflags = su->su_badflags; sp->ts_state = STATE_SPLITUNDO; @@ -9730,10 +11030,11 @@ suggest_try_change(su) * non-word character with a space. Always skip a * character when the word ends. But only when the * good word can end. */ - if (((!try_compound - && !spell_iswordp_nmw(fword + sp->ts_fidx)) - || fword_ends) - && goodword_ends) + if (((!try_compound && !spell_iswordp_nmw(fword + + sp->ts_fidx)) + || fword_ends) + && fword[sp->ts_fidx] != NUL + && goodword_ends) { int l; @@ -9789,508 +11090,644 @@ suggest_try_change(su) } } } - break; + } + break; - case STATE_SPLITUNDO: - /* Undo the changes done for word split or compound word. */ - su->su_badflags = sp->ts_save_badflags; + case STATE_SPLITUNDO: + /* Undo the changes done for word split or compound word. */ + su->su_badflags = sp->ts_save_badflags; - /* Continue looking for NUL bytes. */ - sp->ts_state = STATE_START; + /* Continue looking for NUL bytes. */ + sp->ts_state = STATE_START; - /* In case we went into the prefix tree. */ - byts = fbyts; - idxs = fidxs; - break; + /* In case we went into the prefix tree. */ + byts = fbyts; + idxs = fidxs; + break; - case STATE_ENDNUL: - /* Past the NUL bytes in the node. */ - su->su_badflags = sp->ts_save_badflags; - if (fword[sp->ts_fidx] == NUL + case STATE_ENDNUL: + /* Past the NUL bytes in the node. */ + su->su_badflags = sp->ts_save_badflags; + if (fword[sp->ts_fidx] == NUL #ifdef FEAT_MBYTE - && sp->ts_tcharlen == 0 + && sp->ts_tcharlen == 0 #endif - ) - { - /* The badword ends, can't use the bytes in this node. */ - sp->ts_state = STATE_DEL; - break; - } - sp->ts_state = STATE_PLAIN; - /*FALLTHROUGH*/ + ) + { + /* The badword ends, can't use STATE_PLAIN. */ + sp->ts_state = STATE_DEL; + break; + } + sp->ts_state = STATE_PLAIN; + /*FALLTHROUGH*/ - case STATE_PLAIN: - /* - * Go over all possible bytes at this node, add each to - * tword[] and use child node. "ts_curi" is the index. - */ - arridx = sp->ts_arridx; - if (sp->ts_curi > byts[arridx]) - { - /* Done all bytes at this node, do next state. When still - * at already changed bytes skip the other tricks. */ - if (sp->ts_fidx >= sp->ts_fidxtry) - sp->ts_state = STATE_DEL; - else - sp->ts_state = STATE_FINAL; - } + case STATE_PLAIN: + /* + * Go over all possible bytes at this node, add each to tword[] + * and use child node. "ts_curi" is the index. + */ + arridx = sp->ts_arridx; + if (sp->ts_curi > byts[arridx]) + { + /* Done all bytes at this node, do next state. When still at + * already changed bytes skip the other tricks. */ + if (sp->ts_fidx >= sp->ts_fidxtry) + sp->ts_state = STATE_DEL; else - { - arridx += sp->ts_curi++; - c = byts[arridx]; + sp->ts_state = STATE_FINAL; + } + else + { + arridx += sp->ts_curi++; + c = byts[arridx]; - /* Normal byte, go one level deeper. If it's not equal to - * the byte in the bad word adjust the score. But don't - * even try when the byte was already changed. */ - if (c == fword[sp->ts_fidx] + /* Normal byte, go one level deeper. If it's not equal to the + * byte in the bad word adjust the score. But don't even try + * when the byte was already changed. And don't try when we + * just deleted this byte, accepting it is always cheaper then + * delete + substitute. */ + if (c == fword[sp->ts_fidx] #ifdef FEAT_MBYTE - || (sp->ts_tcharlen > 0 - && sp->ts_isdiff != DIFF_NONE) + || (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE) #endif - ) - newscore = 0; + ) + newscore = 0; + else + newscore = SCORE_SUBST; + if ((newscore == 0 + || (sp->ts_fidx >= sp->ts_fidxtry + && ((sp->ts_flags & TSF_DIDDEL) == 0 + || c != fword[sp->ts_delidx]))) + && TRY_DEEPER(su, stack, depth, newscore)) + { + go_deeper(stack, depth, newscore); +#ifdef DEBUG_TRIEWALK + if (newscore > 0) + sprintf(changename[depth], "%.*s-%s: subst %c to %c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + fword[sp->ts_fidx], c); else - newscore = SCORE_SUBST; - if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry) - && try_deeper(su, stack, depth, newscore)) - { - ++depth; - sp = &stack[depth]; - ++sp->ts_fidx; - tword[sp->ts_twordlen++] = c; - sp->ts_arridx = idxs[arridx]; + sprintf(changename[depth], "%.*s-%s: accept %c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + fword[sp->ts_fidx]); +#endif + ++depth; + sp = &stack[depth]; + ++sp->ts_fidx; + tword[sp->ts_twordlen++] = c; + sp->ts_arridx = idxs[arridx]; #ifdef FEAT_MBYTE - if (newscore == SCORE_SUBST) - sp->ts_isdiff = DIFF_YES; - if (has_mbyte) + if (newscore == SCORE_SUBST) + sp->ts_isdiff = DIFF_YES; + if (has_mbyte) + { + /* Multi-byte characters are a bit complicated to + * handle: They differ when any of the bytes differ + * and then their length may also differ. */ + if (sp->ts_tcharlen == 0) { - /* Multi-byte characters are a bit complicated to - * handle: They differ when any of the bytes - * differ and then their length may also differ. */ - if (sp->ts_tcharlen == 0) - { - /* First byte. */ - sp->ts_tcharidx = 0; - sp->ts_tcharlen = MB_BYTE2LEN(c); - sp->ts_fcharstart = sp->ts_fidx - 1; - sp->ts_isdiff = (newscore != 0) + /* First byte. */ + sp->ts_tcharidx = 0; + sp->ts_tcharlen = MB_BYTE2LEN(c); + sp->ts_fcharstart = sp->ts_fidx - 1; + sp->ts_isdiff = (newscore != 0) ? DIFF_YES : DIFF_NONE; - } - else if (sp->ts_isdiff == DIFF_INSERT) - /* When inserting trail bytes don't advance in - * the bad word. */ - --sp->ts_fidx; - if (++sp->ts_tcharidx == sp->ts_tcharlen) + } + else if (sp->ts_isdiff == DIFF_INSERT) + /* When inserting trail bytes don't advance in the + * bad word. */ + --sp->ts_fidx; + if (++sp->ts_tcharidx == sp->ts_tcharlen) + { + /* Last byte of character. */ + if (sp->ts_isdiff == DIFF_YES) { - /* Last byte of character. */ - if (sp->ts_isdiff == DIFF_YES) - { - /* Correct ts_fidx for the byte length of - * the character (we didn't check that - * before). */ - sp->ts_fidx = sp->ts_fcharstart - + MB_BYTE2LEN( + /* Correct ts_fidx for the byte length of the + * character (we didn't check that before). */ + sp->ts_fidx = sp->ts_fcharstart + + MB_BYTE2LEN( fword[sp->ts_fcharstart]); - /* For changing a composing character - * adjust the score from SCORE_SUBST to - * SCORE_SUBCOMP. */ - if (enc_utf8 - && utf_iscomposing( - mb_ptr2char(tword - + sp->ts_twordlen + /* For changing a composing character adjust + * the score from SCORE_SUBST to + * SCORE_SUBCOMP. */ + if (enc_utf8 + && utf_iscomposing( + mb_ptr2char(tword + + sp->ts_twordlen - sp->ts_tcharlen)) - && utf_iscomposing( - mb_ptr2char(fword + && utf_iscomposing( + mb_ptr2char(fword + sp->ts_fcharstart))) - sp->ts_score -= + sp->ts_score -= SCORE_SUBST - SCORE_SUBCOMP; - /* For a similar character adjust score - * from SCORE_SUBST to SCORE_SIMILAR. */ - else if (slang->sl_has_map - && similar_chars(slang, - mb_ptr2char(tword - + sp->ts_twordlen + /* For a similar character adjust score from + * SCORE_SUBST to SCORE_SIMILAR. */ + else if (!soundfold + && slang->sl_has_map + && similar_chars(slang, + mb_ptr2char(tword + + sp->ts_twordlen - sp->ts_tcharlen), - mb_ptr2char(fword + mb_ptr2char(fword + sp->ts_fcharstart))) - sp->ts_score -= + sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; + } + else if (sp->ts_isdiff == DIFF_INSERT + && sp->ts_twordlen > sp->ts_tcharlen) + { + p = tword + sp->ts_twordlen - sp->ts_tcharlen; + c = mb_ptr2char(p); + if (enc_utf8 && utf_iscomposing(c)) + { + /* Inserting a composing char doesn't + * count that much. */ + sp->ts_score -= SCORE_INS - SCORE_INSCOMP; } - else if (sp->ts_isdiff == DIFF_INSERT - && sp->ts_twordlen > sp->ts_tcharlen) + else { - p = tword + sp->ts_twordlen - - sp->ts_tcharlen; - c = mb_ptr2char(p); - if (enc_utf8 && utf_iscomposing(c)) - { - /* Inserting a composing char doesn't - * count that much. */ + /* If the previous character was the same, + * thus doubling a character, give a bonus + * to the score. Also for the soundfold + * tree (might seem illogical but does + * give better scores). */ + mb_ptr_back(tword, p); + if (c == mb_ptr2char(p)) sp->ts_score -= SCORE_INS - - SCORE_INSCOMP; - } - else - { - /* If the previous character was the - * same, thus doubling a character, - * give a bonus to the score. */ - mb_ptr_back(tword, p); - if (c == mb_ptr2char(p)) - sp->ts_score -= SCORE_INS - SCORE_INSDUP; - } } - - /* Starting a new char, reset the length. */ - sp->ts_tcharlen = 0; } + + /* Starting a new char, reset the length. */ + sp->ts_tcharlen = 0; } - else + } + else #endif - { - /* If we found a similar char adjust the score. - * We do this after calling try_deeper() because - * it's slow. */ - if (newscore != 0 - && slang->sl_has_map - && similar_chars(slang, + { + /* If we found a similar char adjust the score. + * We do this after calling go_deeper() because + * it's slow. */ + if (newscore != 0 + && !soundfold + && slang->sl_has_map + && similar_chars(slang, c, fword[sp->ts_fidx - 1])) - sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; - } + sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; } } - break; + } + break; - case STATE_DEL: + case STATE_DEL: #ifdef FEAT_MBYTE - /* When past the first byte of a multi-byte char don't try - * delete/insert/swap a character. */ - if (has_mbyte && sp->ts_tcharlen > 0) - { - sp->ts_state = STATE_FINAL; - break; - } + /* When past the first byte of a multi-byte char don't try + * delete/insert/swap a character. */ + if (has_mbyte && sp->ts_tcharlen > 0) + { + sp->ts_state = STATE_FINAL; + break; + } #endif - /* - * Try skipping one character in the bad word (delete it). - */ - sp->ts_state = STATE_INS; - sp->ts_curi = 1; - if (fword[sp->ts_fidx] != NUL - && try_deeper(su, stack, depth, SCORE_DEL)) - { - ++depth; - - /* Advance over the character in fword[]. Give a bonus to - * the score if the same character is following "nn" -> - * "n". */ -#ifdef FEAT_MBYTE - if (has_mbyte) - { - c = mb_ptr2char(fword + sp->ts_fidx); - stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]); - if (enc_utf8 && utf_iscomposing(c)) - stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP; - else if (c == mb_ptr2char(fword + stack[depth].ts_fidx)) - stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; - } - else + /* + * Try skipping one character in the bad word (delete it). + */ + sp->ts_state = STATE_INS_PREP; + sp->ts_curi = 1; + if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*') + /* Deleting a vowel at the start of a word counts less, see + * soundalike_score(). */ + newscore = 2 * SCORE_DEL / 3; + else + newscore = SCORE_DEL; + if (fword[sp->ts_fidx] != NUL + && TRY_DEEPER(su, stack, depth, newscore)) + { + go_deeper(stack, depth, newscore); +#ifdef DEBUG_TRIEWALK + sprintf(changename[depth], "%.*s-%s: delete %c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + fword[sp->ts_fidx]); #endif - { - ++stack[depth].ts_fidx; - if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1]) - stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; - } - break; - } - /*FALLTHROUGH*/ + ++depth; - case STATE_INS: - /* Insert one byte. Do this for each possible byte at this - * node. */ - n = sp->ts_arridx; - if (sp->ts_curi > byts[n]) + /* Remember what character we deleted, so that we can avoid + * inserting it again. */ + stack[depth].ts_flags |= TSF_DIDDEL; + stack[depth].ts_delidx = sp->ts_fidx; + + /* Advance over the character in fword[]. Give a bonus to the + * score if the same character is following "nn" -> "n". It's + * a bit illogical for soundfold tree but it does give better + * results. */ +#ifdef FEAT_MBYTE + if (has_mbyte) { - /* Done all bytes at this node, do next state. */ - sp->ts_state = STATE_SWAP; + c = mb_ptr2char(fword + sp->ts_fidx); + stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]); + if (enc_utf8 && utf_iscomposing(c)) + stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP; + else if (c == mb_ptr2char(fword + stack[depth].ts_fidx)) + stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; } else - { - /* Do one more byte at this node. Skip NUL bytes. */ - n += sp->ts_curi++; - c = byts[n]; - if (c != 0 && try_deeper(su, stack, depth, SCORE_INS)) - { - ++depth; - sp = &stack[depth]; - tword[sp->ts_twordlen++] = c; - sp->ts_arridx = idxs[n]; -#ifdef FEAT_MBYTE - if (has_mbyte) - { - fl = MB_BYTE2LEN(c); - if (fl > 1) - { - /* There are following bytes for the same - * character. We must find all bytes before - * trying delete/insert/swap/etc. */ - sp->ts_tcharlen = fl; - sp->ts_tcharidx = 1; - sp->ts_isdiff = DIFF_INSERT; - } - } - else - fl = 1; - if (fl == 1) #endif - { - /* If the previous character was the same, thus - * doubling a character, give a bonus to the - * score. */ - if (sp->ts_twordlen >= 2 - && tword[sp->ts_twordlen - 2] == c) - sp->ts_score -= SCORE_INS - SCORE_INSDUP; - } - } + { + ++stack[depth].ts_fidx; + if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1]) + stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; } break; + } + /*FALLTHROUGH*/ - case STATE_SWAP: - /* - * Swap two bytes in the bad word: "12" -> "21". - * We change "fword" here, it's changed back afterwards. - */ - p = fword + sp->ts_fidx; - c = *p; - if (c == NUL) + case STATE_INS_PREP: + if (sp->ts_flags & TSF_DIDDEL) + { + /* If we just deleted a byte then inserting won't make sense, + * a substitute is always cheaper. */ + sp->ts_state = STATE_SWAP; + break; + } + + /* skip over NUL bytes */ + n = sp->ts_arridx; + for (;;) + { + if (sp->ts_curi > byts[n]) { - /* End of word, can't swap or replace. */ - sp->ts_state = STATE_FINAL; + /* Only NUL bytes at this node, go to next state. */ + sp->ts_state = STATE_SWAP; break; } - - /* Don't swap if the first character is not a word character. - * SWAP3 etc. also don't make sense then. */ - if (!spell_iswordp(p, curbuf)) + if (byts[n + sp->ts_curi] != NUL) { - sp->ts_state = STATE_REP_INI; + /* Found a byte to insert. */ + sp->ts_state = STATE_INS; break; } + ++sp->ts_curi; + } + break; + + /*FALLTHROUGH*/ + + case STATE_INS: + /* Insert one byte. Repeat this for each possible byte at this + * node. */ + n = sp->ts_arridx; + if (sp->ts_curi > byts[n]) + { + /* Done all bytes at this node, go to next state. */ + sp->ts_state = STATE_SWAP; + break; + } + /* Do one more byte at this node, but: + * - Skip NUL bytes. + * - Skip the byte if it's equal to the byte in the word, + * accepting that byte is always better. + */ + n += sp->ts_curi++; + c = byts[n]; + if (soundfold && sp->ts_twordlen == 0 && c == '*') + /* Inserting a vowel at the start of a word counts less, + * see soundalike_score(). */ + newscore = 2 * SCORE_INS / 3; + else + newscore = SCORE_INS; + if (c != fword[sp->ts_fidx] + && TRY_DEEPER(su, stack, depth, newscore)) + { + go_deeper(stack, depth, newscore); +#ifdef DEBUG_TRIEWALK + sprintf(changename[depth], "%.*s-%s: insert %c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + c); +#endif + ++depth; + sp = &stack[depth]; + tword[sp->ts_twordlen++] = c; + sp->ts_arridx = idxs[n]; #ifdef FEAT_MBYTE if (has_mbyte) { - n = mb_cptr2len(p); - c = mb_ptr2char(p); - if (!spell_iswordp(p + n, curbuf)) - c2 = c; /* don't swap non-word char */ - else - c2 = mb_ptr2char(p + n); + fl = MB_BYTE2LEN(c); + if (fl > 1) + { + /* There are following bytes for the same character. + * We must find all bytes before trying + * delete/insert/swap/etc. */ + sp->ts_tcharlen = fl; + sp->ts_tcharidx = 1; + sp->ts_isdiff = DIFF_INSERT; + } } else + fl = 1; + if (fl == 1) #endif { - if (!spell_iswordp(p + 1, curbuf)) - c2 = c; /* don't swap non-word char */ - else - c2 = p[1]; + /* If the previous character was the same, thus doubling a + * character, give a bonus to the score. Also for + * soundfold words (illogical but does give a better + * score). */ + if (sp->ts_twordlen >= 2 + && tword[sp->ts_twordlen - 2] == c) + sp->ts_score -= SCORE_INS - SCORE_INSDUP; } + } + break; + + case STATE_SWAP: + /* + * Swap two bytes in the bad word: "12" -> "21". + * We change "fword" here, it's changed back afterwards at + * STATE_UNSWAP. + */ + p = fword + sp->ts_fidx; + c = *p; + if (c == NUL) + { + /* End of word, can't swap or replace. */ + sp->ts_state = STATE_FINAL; + break; + } + + /* Don't swap if the first character is not a word character. + * SWAP3 etc. also don't make sense then. */ + if (!soundfold && !spell_iswordp(p, curbuf)) + { + sp->ts_state = STATE_REP_INI; + break; + } - /* When characters are identical, swap won't do anything. - * Also get here if the second char is not a word character. */ - if (c == c2) - { - sp->ts_state = STATE_SWAP3; - break; - } - if (c2 != NUL && try_deeper(su, stack, depth, SCORE_SWAP)) - { - sp->ts_state = STATE_UNSWAP; - ++depth; #ifdef FEAT_MBYTE - if (has_mbyte) - { - fl = mb_char2len(c2); - mch_memmove(p, p + n, fl); - mb_char2bytes(c, p + fl); - stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; - } - else + if (has_mbyte) + { + n = mb_cptr2len(p); + c = mb_ptr2char(p); + if (!soundfold && !spell_iswordp(p + n, curbuf)) + c2 = c; /* don't swap non-word char */ + else + c2 = mb_ptr2char(p + n); + } + else #endif - { - p[0] = c2; - p[1] = c; - stack[depth].ts_fidxtry = sp->ts_fidx + 2; - } - } + { + if (!soundfold && !spell_iswordp(p + 1, curbuf)) + c2 = c; /* don't swap non-word char */ else - /* If this swap doesn't work then SWAP3 won't either. */ - sp->ts_state = STATE_REP_INI; - break; + c2 = p[1]; + } - case STATE_UNSWAP: - /* Undo the STATE_SWAP swap: "21" -> "12". */ - p = fword + sp->ts_fidx; + /* When characters are identical, swap won't do anything. + * Also get here if the second char is not a word character. */ + if (c == c2) + { + sp->ts_state = STATE_SWAP3; + break; + } + if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP)) + { + go_deeper(stack, depth, SCORE_SWAP); +#ifdef DEBUG_TRIEWALK + sprintf(changename[depth], "%.*s-%s: swap %c and %c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + c, c2); +#endif + sp->ts_state = STATE_UNSWAP; + ++depth; #ifdef FEAT_MBYTE if (has_mbyte) { - n = MB_BYTE2LEN(*p); - c = mb_ptr2char(p + n); - mch_memmove(p + MB_BYTE2LEN(p[n]), p, n); - mb_char2bytes(c, p); + fl = mb_char2len(c2); + mch_memmove(p, p + n, fl); + mb_char2bytes(c, p + fl); + stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; } else #endif { - c = *p; - *p = p[1]; + p[0] = c2; p[1] = c; + stack[depth].ts_fidxtry = sp->ts_fidx + 2; } - /*FALLTHROUGH*/ + } + else + /* If this swap doesn't work then SWAP3 won't either. */ + sp->ts_state = STATE_REP_INI; + break; - case STATE_SWAP3: - /* Swap two bytes, skipping one: "123" -> "321". We change - * "fword" here, it's changed back afterwards. */ - p = fword + sp->ts_fidx; + case STATE_UNSWAP: + /* Undo the STATE_SWAP swap: "21" -> "12". */ + p = fword + sp->ts_fidx; +#ifdef FEAT_MBYTE + if (has_mbyte) + { + n = MB_BYTE2LEN(*p); + c = mb_ptr2char(p + n); + mch_memmove(p + MB_BYTE2LEN(p[n]), p, n); + mb_char2bytes(c, p); + } + else +#endif + { + c = *p; + *p = p[1]; + p[1] = c; + } + /*FALLTHROUGH*/ + + case STATE_SWAP3: + /* Swap two bytes, skipping one: "123" -> "321". We change + * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */ + p = fword + sp->ts_fidx; +#ifdef FEAT_MBYTE + if (has_mbyte) + { + n = mb_cptr2len(p); + c = mb_ptr2char(p); + fl = mb_cptr2len(p + n); + c2 = mb_ptr2char(p + n); + if (!soundfold && !spell_iswordp(p + n + fl, curbuf)) + c3 = c; /* don't swap non-word char */ + else + c3 = mb_ptr2char(p + n + fl); + } + else +#endif + { + c = *p; + c2 = p[1]; + if (!soundfold && !spell_iswordp(p + 2, curbuf)) + c3 = c; /* don't swap non-word char */ + else + c3 = p[2]; + } + + /* When characters are identical: "121" then SWAP3 result is + * identical, ROT3L result is same as SWAP: "211", ROT3L result is + * same as SWAP on next char: "112". Thus skip all swapping. + * Also skip when c3 is NUL. + * Also get here when the third character is not a word character. + * Second character may any char: "a.b" -> "b.a" */ + if (c == c3 || c3 == NUL) + { + sp->ts_state = STATE_REP_INI; + break; + } + if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) + { + go_deeper(stack, depth, SCORE_SWAP3); +#ifdef DEBUG_TRIEWALK + sprintf(changename[depth], "%.*s-%s: swap3 %c and %c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + c, c3); +#endif + sp->ts_state = STATE_UNSWAP3; + ++depth; #ifdef FEAT_MBYTE if (has_mbyte) { - n = mb_cptr2len(p); - c = mb_ptr2char(p); - fl = mb_cptr2len(p + n); - c2 = mb_ptr2char(p + n); - if (!spell_iswordp(p + n + fl, curbuf)) - c3 = c; /* don't swap non-word char */ - else - c3 = mb_ptr2char(p + n + fl); + tl = mb_char2len(c3); + mch_memmove(p, p + n + fl, tl); + mb_char2bytes(c2, p + tl); + mb_char2bytes(c, p + fl + tl); + stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl; } else #endif { - c = *p; - c2 = p[1]; - if (!spell_iswordp(p + 2, curbuf)) - c3 = c; /* don't swap non-word char */ - else - c3 = p[2]; + p[0] = p[2]; + p[2] = c; + stack[depth].ts_fidxtry = sp->ts_fidx + 3; } + } + else + sp->ts_state = STATE_REP_INI; + break; - /* When characters are identical: "121" then SWAP3 result is - * identical, ROT3L result is same as SWAP: "211", ROT3L - * result is same as SWAP on next char: "112". Thus skip all - * swapping. Also skip when c3 is NUL. - * Also get here when the third character is not a word - * character. Second character may any char: "a.b" -> "b.a" */ - if (c == c3 || c3 == NUL) - { - sp->ts_state = STATE_REP_INI; - break; - } - if (try_deeper(su, stack, depth, SCORE_SWAP3)) - { - sp->ts_state = STATE_UNSWAP3; - ++depth; + case STATE_UNSWAP3: + /* Undo STATE_SWAP3: "321" -> "123" */ + p = fword + sp->ts_fidx; #ifdef FEAT_MBYTE - if (has_mbyte) - { - tl = mb_char2len(c3); - mch_memmove(p, p + n + fl, tl); - mb_char2bytes(c2, p + tl); - mb_char2bytes(c, p + fl + tl); - stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl; - } - else + if (has_mbyte) + { + n = MB_BYTE2LEN(*p); + c2 = mb_ptr2char(p + n); + fl = MB_BYTE2LEN(p[n]); + c = mb_ptr2char(p + n + fl); + tl = MB_BYTE2LEN(p[n + fl]); + mch_memmove(p + fl + tl, p, n); + mb_char2bytes(c, p); + mb_char2bytes(c2, p + tl); + p = p + tl; + } + else #endif - { - p[0] = p[2]; - p[2] = c; - stack[depth].ts_fidxtry = sp->ts_fidx + 3; - } - } - else - sp->ts_state = STATE_REP_INI; + { + c = *p; + *p = p[2]; + p[2] = c; + ++p; + } + + if (!soundfold && !spell_iswordp(p, curbuf)) + { + /* Middle char is not a word char, skip the rotate. First and + * third char were already checked at swap and swap3. */ + sp->ts_state = STATE_REP_INI; break; + } - case STATE_UNSWAP3: - /* Undo STATE_SWAP3: "321" -> "123" */ + /* Rotate three characters left: "123" -> "231". We change + * "fword" here, it's changed back afterwards at STATE_UNROT3L. */ + if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) + { + go_deeper(stack, depth, SCORE_SWAP3); +#ifdef DEBUG_TRIEWALK + p = fword + sp->ts_fidx; + sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + p[0], p[1], p[2]); +#endif + sp->ts_state = STATE_UNROT3L; + ++depth; p = fword + sp->ts_fidx; #ifdef FEAT_MBYTE if (has_mbyte) { - n = MB_BYTE2LEN(*p); - c2 = mb_ptr2char(p + n); - fl = MB_BYTE2LEN(p[n]); - c = mb_ptr2char(p + n + fl); - tl = MB_BYTE2LEN(p[n + fl]); - mch_memmove(p + fl + tl, p, n); - mb_char2bytes(c, p); - mb_char2bytes(c2, p + tl); - p = p + tl; + n = mb_cptr2len(p); + c = mb_ptr2char(p); + fl = mb_cptr2len(p + n); + fl += mb_cptr2len(p + n + fl); + mch_memmove(p, p + n, fl); + mb_char2bytes(c, p + fl); + stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; } else #endif { c = *p; - *p = p[2]; + *p = p[1]; + p[1] = p[2]; p[2] = c; - ++p; - } - - if (!spell_iswordp(p, curbuf)) - { - /* Middle char is not a word char, skip the rotate. - * First and third char were already checked at swap - * and swap3. */ - sp->ts_state = STATE_REP_INI; - break; + stack[depth].ts_fidxtry = sp->ts_fidx + 3; } + } + else + sp->ts_state = STATE_REP_INI; + break; - /* Rotate three characters left: "123" -> "231". We change - * "fword" here, it's changed back afterwards. */ - if (try_deeper(su, stack, depth, SCORE_SWAP3)) - { - sp->ts_state = STATE_UNROT3L; - ++depth; - p = fword + sp->ts_fidx; + case STATE_UNROT3L: + /* Undo ROT3L: "231" -> "123" */ + p = fword + sp->ts_fidx; #ifdef FEAT_MBYTE - if (has_mbyte) - { - n = mb_cptr2len(p); - c = mb_ptr2char(p); - fl = mb_cptr2len(p + n); - fl += mb_cptr2len(p + n + fl); - mch_memmove(p, p + n, fl); - mb_char2bytes(c, p + fl); - stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; - } - else + if (has_mbyte) + { + n = MB_BYTE2LEN(*p); + n += MB_BYTE2LEN(p[n]); + c = mb_ptr2char(p + n); + tl = MB_BYTE2LEN(p[n]); + mch_memmove(p + tl, p, n); + mb_char2bytes(c, p); + } + else #endif - { - c = *p; - *p = p[1]; - p[1] = p[2]; - p[2] = c; - stack[depth].ts_fidxtry = sp->ts_fidx + 3; - } - } - else - sp->ts_state = STATE_REP_INI; - break; + { + c = p[2]; + p[2] = p[1]; + p[1] = *p; + *p = c; + } - case STATE_UNROT3L: - /* Undo ROT3L: "231" -> "123" */ + /* Rotate three bytes right: "123" -> "312". We change "fword" + * here, it's changed back afterwards at STATE_UNROT3R. */ + if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) + { + go_deeper(stack, depth, SCORE_SWAP3); +#ifdef DEBUG_TRIEWALK + p = fword + sp->ts_fidx; + sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + p[0], p[1], p[2]); +#endif + sp->ts_state = STATE_UNROT3R; + ++depth; p = fword + sp->ts_fidx; #ifdef FEAT_MBYTE if (has_mbyte) { - n = MB_BYTE2LEN(*p); - n += MB_BYTE2LEN(p[n]); + n = mb_cptr2len(p); + n += mb_cptr2len(p + n); c = mb_ptr2char(p + n); - tl = MB_BYTE2LEN(p[n]); + tl = mb_cptr2len(p + n); mch_memmove(p + tl, p, n); mb_char2bytes(c, p); + stack[depth].ts_fidxtry = sp->ts_fidx + n + tl; } else #endif @@ -10299,193 +11736,176 @@ suggest_try_change(su) p[2] = p[1]; p[1] = *p; *p = c; + stack[depth].ts_fidxtry = sp->ts_fidx + 3; } + } + else + sp->ts_state = STATE_REP_INI; + break; - /* Rotate three bytes right: "123" -> "312". We change - * "fword" here, it's changed back afterwards. */ - if (try_deeper(su, stack, depth, SCORE_SWAP3)) - { - sp->ts_state = STATE_UNROT3R; - ++depth; - p = fword + sp->ts_fidx; + case STATE_UNROT3R: + /* Undo ROT3R: "312" -> "123" */ + p = fword + sp->ts_fidx; #ifdef FEAT_MBYTE - if (has_mbyte) - { - n = mb_cptr2len(p); - n += mb_cptr2len(p + n); - c = mb_ptr2char(p + n); - tl = mb_cptr2len(p + n); - mch_memmove(p + tl, p, n); - mb_char2bytes(c, p); - stack[depth].ts_fidxtry = sp->ts_fidx + n + tl; - } - else + if (has_mbyte) + { + c = mb_ptr2char(p); + tl = MB_BYTE2LEN(*p); + n = MB_BYTE2LEN(p[tl]); + n += MB_BYTE2LEN(p[tl + n]); + mch_memmove(p, p + tl, n); + mb_char2bytes(c, p + n); + } + else #endif - { - c = p[2]; - p[2] = p[1]; - p[1] = *p; - *p = c; - stack[depth].ts_fidxtry = sp->ts_fidx + 3; - } - } - else - sp->ts_state = STATE_REP_INI; + { + c = *p; + *p = p[1]; + p[1] = p[2]; + p[2] = c; + } + /*FALLTHROUGH*/ + + case STATE_REP_INI: + /* Check if matching with REP items from the .aff file would work. + * Quickly skip if: + * - there are no REP items and we are not in the soundfold trie + * - the score is going to be too high anyway + * - already applied a REP item or swapped here */ + if ((lp->lp_replang == NULL && !soundfold) + || sp->ts_score + SCORE_REP >= su->su_maxscore + || sp->ts_fidx < sp->ts_fidxtry) + { + sp->ts_state = STATE_FINAL; break; + } - case STATE_UNROT3R: - /* Undo ROT3R: "312" -> "123" */ - p = fword + sp->ts_fidx; -#ifdef FEAT_MBYTE - if (has_mbyte) - { - c = mb_ptr2char(p); - tl = MB_BYTE2LEN(*p); - n = MB_BYTE2LEN(p[tl]); - n += MB_BYTE2LEN(p[tl + n]); - mch_memmove(p, p + tl, n); - mb_char2bytes(c, p + n); - } - else -#endif - { - c = *p; - *p = p[1]; - p[1] = p[2]; - p[2] = c; - } - /*FALLTHROUGH*/ - - case STATE_REP_INI: - /* Check if matching with REP items from the .aff file would - * work. Quickly skip if: - * - there are no REP items - * - the score is going to be too high anyway - * - already applied a REP item or swapped here */ - if (lp->lp_replang == NULL - || sp->ts_score + SCORE_REP >= su->su_maxscore - || sp->ts_fidx < sp->ts_fidxtry) - { - sp->ts_state = STATE_FINAL; - break; - } - gap = &lp->lp_replang->sl_rep; - - /* Use the first byte to quickly find the first entry that - * may match. If the index is -1 there is none. */ + /* Use the first byte to quickly find the first entry that may + * match. If the index is -1 there is none. */ + if (soundfold) + sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]]; + else sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]]; - if (sp->ts_curi < 0) - { - sp->ts_state = STATE_FINAL; - break; - } - sp->ts_state = STATE_REP; - /*FALLTHROUGH*/ + if (sp->ts_curi < 0) + { + sp->ts_state = STATE_FINAL; + break; + } - case STATE_REP: - /* Try matching with REP items from the .aff file. For each - * match replace the characters and check if the resulting - * word is valid. */ - p = fword + sp->ts_fidx; + sp->ts_state = STATE_REP; + /*FALLTHROUGH*/ + + case STATE_REP: + /* Try matching with REP items from the .aff file. For each match + * replace the characters and check if the resulting word is + * valid. */ + p = fword + sp->ts_fidx; + if (soundfold) + gap = &slang->sl_repsal; + else gap = &lp->lp_replang->sl_rep; - while (sp->ts_curi < gap->ga_len) + while (sp->ts_curi < gap->ga_len) + { + ftp = (fromto_T *)gap->ga_data + sp->ts_curi++; + if (*ftp->ft_from != *p) + { + /* past possible matching entries */ + sp->ts_curi = gap->ga_len; + break; + } + if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0 + && TRY_DEEPER(su, stack, depth, SCORE_REP)) { - ftp = (fromto_T *)gap->ga_data + sp->ts_curi++; - if (*ftp->ft_from != *p) + go_deeper(stack, depth, SCORE_REP); +#ifdef DEBUG_TRIEWALK + sprintf(changename[depth], "%.*s-%s: replace %s with %s", + sp->ts_twordlen, tword, fword + sp->ts_fidx, + ftp->ft_from, ftp->ft_to); +#endif + /* Need to undo this afterwards. */ + sp->ts_state = STATE_REP_UNDO; + + /* Change the "from" to the "to" string. */ + ++depth; + fl = STRLEN(ftp->ft_from); + tl = STRLEN(ftp->ft_to); + if (fl != tl) { - /* past possible matching entries */ - sp->ts_curi = gap->ga_len; - break; + mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1); + repextra += tl - fl; } - if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0 - && try_deeper(su, stack, depth, SCORE_REP)) - { - /* Need to undo this afterwards. */ - sp->ts_state = STATE_REP_UNDO; - - /* Change the "from" to the "to" string. */ - ++depth; - fl = STRLEN(ftp->ft_from); - tl = STRLEN(ftp->ft_to); - if (fl != tl) - { - mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1); - repextra += tl - fl; - } - mch_memmove(p, ftp->ft_to, tl); - stack[depth].ts_fidxtry = sp->ts_fidx + tl; + mch_memmove(p, ftp->ft_to, tl); + stack[depth].ts_fidxtry = sp->ts_fidx + tl; #ifdef FEAT_MBYTE - stack[depth].ts_tcharlen = 0; + stack[depth].ts_tcharlen = 0; #endif - break; - } + break; } + } - if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP) - /* No (more) matches. */ - sp->ts_state = STATE_FINAL; + if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP) + /* No (more) matches. */ + sp->ts_state = STATE_FINAL; - break; + break; - case STATE_REP_UNDO: - /* Undo a REP replacement and continue with the next one. */ - ftp = (fromto_T *)lp->lp_replang->sl_rep.ga_data - + sp->ts_curi - 1; - fl = STRLEN(ftp->ft_from); - tl = STRLEN(ftp->ft_to); - p = fword + sp->ts_fidx; - if (fl != tl) - { - mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1); - repextra -= tl - fl; - } - mch_memmove(p, ftp->ft_from, fl); - sp->ts_state = STATE_REP; - break; + case STATE_REP_UNDO: + /* Undo a REP replacement and continue with the next one. */ + if (soundfold) + gap = &slang->sl_repsal; + else + gap = &lp->lp_replang->sl_rep; + ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1; + fl = STRLEN(ftp->ft_from); + tl = STRLEN(ftp->ft_to); + p = fword + sp->ts_fidx; + if (fl != tl) + { + mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1); + repextra -= tl - fl; + } + mch_memmove(p, ftp->ft_from, fl); + sp->ts_state = STATE_REP; + break; - default: - /* Did all possible states at this level, go up one level. */ - --depth; + default: + /* Did all possible states at this level, go up one level. */ + --depth; - if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE) - { - /* Continue in or go back to the prefix tree. */ - byts = pbyts; - idxs = pidxs; - } + if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE) + { + /* Continue in or go back to the prefix tree. */ + byts = pbyts; + idxs = pidxs; + } - /* Don't check for CTRL-C too often, it takes time. */ - line_breakcheck(); + /* Don't check for CTRL-C too often, it takes time. */ + if (--breakcheckcount == 0) + { + ui_breakcheck(); + breakcheckcount = 1000; } } } } + /* - * Try going one level deeper in the tree. + * Go one level deeper in the tree. */ - static int -try_deeper(su, stack, depth, score_add) - suginfo_T *su; + static void +go_deeper(stack, depth, score_add) trystate_T *stack; int depth; int score_add; { - int newscore; - - /* Refuse to go deeper if the scrore is getting too big. */ - newscore = stack[depth].ts_score + score_add; - if (newscore >= su->su_maxscore) - return FALSE; - stack[depth + 1] = stack[depth]; stack[depth + 1].ts_state = STATE_START; - stack[depth + 1].ts_score = newscore; + stack[depth + 1].ts_score = stack[depth].ts_score + score_add; stack[depth + 1].ts_curi = 1; /* start just after length byte */ stack[depth + 1].ts_flags = 0; - return TRUE; } #ifdef FEAT_MBYTE @@ -10713,6 +12133,7 @@ score_comp_sal(su) sstp->st_word = vim_strsave(stp->st_word); if (sstp->st_word != NULL) { + sstp->st_wordlen = stp->st_wordlen; sstp->st_score = score; sstp->st_altscore = 0; sstp->st_orglen = stp->st_orglen; @@ -10743,6 +12164,7 @@ score_combine(su) char_u badsound[MAXWLEN]; int round; int lpi; + slang_T *slang = NULL; /* Add the alternate score to su_ga. */ for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) @@ -10751,13 +12173,13 @@ score_combine(su) if (lp->lp_slang->sl_sal.ga_len > 0) { /* soundfold the bad word */ - spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound); + slang = lp->lp_slang; + spell_soundfold(slang, su->su_fbadword, TRUE, badsound); for (i = 0; i < su->su_ga.ga_len; ++i) { stp = &SUG(su->su_ga, i); - stp->st_altscore = stp_sal_score(stp, su, lp->lp_slang, - badsound); + stp->st_altscore = stp_sal_score(stp, su, slang, badsound); if (stp->st_altscore == SCORE_MAXMAX) stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4; else @@ -10769,11 +12191,15 @@ score_combine(su) } } + if (slang == NULL) /* just in case */ + return; + /* Add the alternate score to su_sga. */ for (i = 0; i < su->su_sga.ga_len; ++i) { stp = &SUG(su->su_sga, i); - stp->st_altscore = spell_edit_score(su->su_badword, stp->st_word); + stp->st_altscore = spell_edit_score(slang, + su->su_badword, stp->st_word); if (stp->st_score == SCORE_MAXMAX) stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8; else @@ -10781,8 +12207,11 @@ score_combine(su) stp->st_salscore = TRUE; } - /* Sort the suggestions and truncate at "maxcount" for both lists. */ + /* Remove bad suggestions, sort the suggestions and truncate at "maxcount" + * for both lists. */ + check_suggestions(su, &su->su_ga); (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); + check_suggestions(su, &su->su_sga); (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount); ga_init2(&ga, (int)sizeof(suginfo_T), 1); @@ -10872,7 +12301,8 @@ stp_sal_score(stp, su, slang, badsound) /* Add part of the bad word to the good word, so that we soundfold * what replaces the bad word. */ STRCPY(goodword, stp->st_word); - STRNCAT(goodword, su->su_badptr + su->su_badlen - lendiff, lendiff); + vim_strncpy(goodword + stp->st_wordlen, + su->su_badptr + su->su_badlen - lendiff, lendiff); pgood = goodword; } else @@ -10884,6 +12314,40 @@ stp_sal_score(stp, su, slang, badsound) return soundalike_score(goodsound, pbad); } +/* structure used to store soundfolded words that add_sound_suggest() has + * handled already. */ +typedef struct +{ + short sft_score; /* lowest score used */ + char_u sft_word[1]; /* soundfolded word, actually longer */ +} sftword_T; + +static sftword_T dumsft; +#define HIKEY2SFT(p) ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft))) +#define HI2SFT(hi) HIKEY2SFT((hi)->hi_key) + +/* + * Prepare for calling suggest_try_soundalike(). + */ + static void +suggest_try_soundalike_prep() +{ + langp_T *lp; + int lpi; + slang_T *slang; + + /* Do this for all languages that support sound folding and for which a + * .sug file has been loaded. */ + for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) + { + lp = LANGP_ENTRY(curbuf->b_langp, lpi); + slang = lp->lp_slang; + if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) + /* prepare the hashtable used by add_sound_suggest() */ + hash_init(&slang->sl_sounddone); + } +} + /* * Find suggestions by comparing the word in a sound-a-like form. * Note: This doesn't support postponed prefixes. @@ -10893,161 +12357,340 @@ suggest_try_soundalike(su) suginfo_T *su; { char_u salword[MAXWLEN]; - char_u tword[MAXWLEN]; - char_u tsalword[MAXWLEN]; - idx_T arridx[MAXWLEN]; - int curi[MAXWLEN]; langp_T *lp; - char_u *byts; - idx_T *idxs; - int depth; - int c; - idx_T n; - int round; - int flags; - int sound_score; - int local_score; int lpi; slang_T *slang; - /* Do this for all languages that support sound folding. */ + /* Do this for all languages that support sound folding and for which a + * .sug file has been loaded. */ for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) { lp = LANGP_ENTRY(curbuf->b_langp, lpi); slang = lp->lp_slang; - if (slang->sl_sal.ga_len > 0) + if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) { /* soundfold the bad word */ spell_soundfold(slang, su->su_fbadword, TRUE, salword); - /* - * Go through the whole tree, soundfold each word and compare. - * round 1: use the case-folded tree. - * round 2: use the keep-case tree. - */ - for (round = 1; round <= 2; ++round) + /* try all kinds of inserts/deletes/swaps/etc. */ + /* TODO: also soundfold the next words, so that we can try joining + * and splitting */ + suggest_trie_walk(su, lp, salword, TRUE); + } + } +} + +/* + * Finish up after calling suggest_try_soundalike(). + */ + static void +suggest_try_soundalike_finish() +{ + langp_T *lp; + int lpi; + slang_T *slang; + int todo; + hashitem_T *hi; + + /* Do this for all languages that support sound folding and for which a + * .sug file has been loaded. */ + for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) + { + lp = LANGP_ENTRY(curbuf->b_langp, lpi); + slang = lp->lp_slang; + if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) + { + /* Free the info about handled words. */ + todo = slang->sl_sounddone.ht_used; + for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi) + if (!HASHITEM_EMPTY(hi)) + { + vim_free(HI2SFT(hi)); + --todo; + } + hash_clear(&slang->sl_sounddone); + } + } +} + +/* + * A match with a soundfolded word is found. Add the good word(s) that + * produce this soundfolded word. + */ + static void +add_sound_suggest(su, goodword, score, lp) + suginfo_T *su; + char_u *goodword; + int score; /* soundfold score */ + langp_T *lp; +{ + slang_T *slang = lp->lp_slang; /* language for sound folding */ + int sfwordnr; + char_u *nrline; + int orgnr; + char_u theword[MAXWLEN]; + int i; + int wlen; + char_u *byts; + idx_T *idxs; + int n; + int wordcount; + int wc; + int goodscore; + hash_T hash; + hashitem_T *hi; + sftword_T *sft; + int bc, gc; + int limit; + + /* + * It's very well possible that the same soundfold word is found several + * times with different scores. Since the following is quite slow only do + * the words that have a better score than before. Use a hashtable to + * remember the words that have been done. + */ + hash = hash_hash(goodword); + hi = hash_lookup(&slang->sl_sounddone, goodword, hash); + if (HASHITEM_EMPTY(hi)) + { + sft = (sftword_T *)alloc(sizeof(sftword_T) + STRLEN(goodword)); + if (sft != NULL) + { + sft->sft_score = score; + STRCPY(sft->sft_word, goodword); + hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash); + } + } + else + { + sft = HI2SFT(hi); + if (score >= sft->sft_score) + return; + sft->sft_score = score; + } + + /* + * Find the word nr in the soundfold tree. + */ + sfwordnr = soundfold_find(slang, goodword); + if (sfwordnr < 0) + { + EMSG2(_(e_intern2), "add_sound_suggest()"); + return; + } + + /* + * go over the list of good words that produce this soundfold word + */ + nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE); + orgnr = 0; + while (*nrline != NUL) + { + /* The wordnr was stored in a minimal nr of bytes as an offset to the + * previous wordnr. */ + orgnr += bytes2offset(&nrline); + + byts = slang->sl_fbyts; + idxs = slang->sl_fidxs; + + /* Lookup the word "orgnr" one of the two tries. */ + n = 0; + wlen = 0; + wordcount = 0; + for (;;) + { + i = 1; + if (wordcount == orgnr && byts[n + 1] == NUL) + break; /* found end of word */ + + if (byts[n + 1] == NUL) + ++wordcount; + + /* skip over the NUL bytes */ + for ( ; byts[n + i] == NUL; ++i) + if (i > byts[n]) /* safety check */ + { + STRCPY(theword + wlen, "BAD"); + goto badword; + } + + /* One of the siblings must have the word. */ + for ( ; i < byts[n]; ++i) + { + wc = idxs[idxs[n + i]]; /* nr of words under this byte */ + if (wordcount + wc > orgnr) + break; + wordcount += wc; + } + + theword[wlen++] = byts[n + i]; + n = idxs[n + i]; + } +badword: + theword[wlen] = NUL; + + /* Go over the possible flags and regions. */ + for (; i <= byts[n] && byts[n + i] == NUL; ++i) + { + char_u cword[MAXWLEN]; + char_u *p; + int flags = (int)idxs[n + i]; + + if (flags & WF_KEEPCAP) { - if (round == 1) + /* Must find the word in the keep-case tree. */ + find_keepcap_word(slang, theword, cword); + p = cword; + } + else + { + flags |= su->su_badflags; + if ((flags & WF_CAPMASK) != 0) { - byts = slang->sl_fbyts; - idxs = slang->sl_fidxs; + /* Need to fix case according to "flags". */ + make_case_word(theword, cword, flags); + p = cword; } else + p = theword; + } + + /* Add the suggestion. */ + if (sps_flags & SPS_DOUBLE) + { + /* Add the suggestion if the score isn't too bad. */ + if (score <= su->su_maxscore) + add_suggestion(su, &su->su_sga, p, su->su_badlen, + score, 0, FALSE, slang, FALSE); + } + else + { + /* Add a penalty for words in another region. */ + if ((flags & WF_REGION) + && (((unsigned)flags >> 16) & lp->lp_region) == 0) + goodscore = SCORE_REGION; + else + goodscore = 0; + + /* Add a small penalty for changing the first letter from + * lower to upper case. Helps for "tath" -> "Kath", which is + * less common thatn "tath" -> "path". Don't do it when the + * letter is the same, that has already been counted. */ + gc = PTR2CHAR(p); + if (SPELL_ISUPPER(gc)) { - byts = slang->sl_kbyts; - idxs = slang->sl_kidxs; - if (byts == NULL) /* no keep-case words */ - continue; + bc = PTR2CHAR(su->su_badword); + if (!SPELL_ISUPPER(bc) + && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc)) + goodscore += SCORE_ICASE / 2; } - depth = 0; - arridx[0] = 0; - curi[0] = 1; - while (depth >= 0 && !got_int) + /* Compute the score for the good word. This only does letter + * insert/delete/swap/replace. REP items are not considered, + * which may make the score a bit higher. + * Use a limit for the score to make it work faster. Use + * MAXSCORE(), because RESCORE() will change the score. + * If the limit is very high then the iterative method is + * inefficient, using an array is quicker. */ + limit = MAXSCORE(su->su_sfmaxscore - goodscore, score); + if (limit > SCORE_LIMITMAX) + goodscore += spell_edit_score(slang, su->su_badword, p); + else + goodscore += spell_edit_score_limit(slang, su->su_badword, + p, limit); + + /* When going over the limit don't bother to do the rest. */ + if (goodscore < SCORE_MAXMAX) { - if (curi[depth] > byts[arridx[depth]]) - { - /* Done all bytes at this node, go up one level. */ - --depth; - line_breakcheck(); - } - else - { - /* Do one more byte at this node. */ - n = arridx[depth] + curi[depth]; - ++curi[depth]; - c = byts[n]; - if (c == 0) - { - /* End of word, deal with the word. */ - flags = (int)idxs[n]; - if (round == 2 || (flags & WF_KEEPCAP) == 0) - { - tword[depth] = NUL; - /* Sound-fold. Only in keep-case tree need to - * case-fold the word. */ - spell_soundfold(slang, tword, - round == 1, tsalword); - - /* Compute the edit distance between the - * sound-a-like words. */ - sound_score = soundalike_score(salword, - tsalword); - - /* Add a penalty for words in another region. */ - if ((flags & WF_REGION) && (((unsigned)flags - >> 16) & lp->lp_region) == 0) - local_score = SCORE_REGION; - else - local_score = 0; - sound_score += local_score; + /* Give a bonus to words seen before. */ + goodscore = score_wordcount_adj(slang, goodscore, p, FALSE); + + /* Add the suggestion if the score isn't too bad. */ + goodscore = RESCORE(goodscore, score); + if (goodscore <= su->su_sfmaxscore) + add_suggestion(su, &su->su_ga, p, su->su_badlen, + goodscore, score, TRUE, slang, TRUE); + } + } + } + /* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */ + } +} - if (sound_score < SCORE_MAXMAX) - { - char_u cword[MAXWLEN]; - char_u *p; - int score; +/* + * Find word "word" in fold-case tree for "slang" and return the word number. + */ + static int +soundfold_find(slang, word) + slang_T *slang; + char_u *word; +{ + idx_T arridx = 0; + int len; + int wlen = 0; + int c; + char_u *ptr = word; + char_u *byts; + idx_T *idxs; + int wordnr = 0; - flags |= su->su_badflags; - if (round == 1 && (flags & WF_CAPMASK) != 0) - { - /* Need to fix case according to - * "flags". */ - make_case_word(tword, cword, flags); - p = cword; - } - else - p = tword; - - if (sps_flags & SPS_DOUBLE) - add_suggestion(su, &su->su_sga, p, - su->su_badlen, - sound_score, 0, FALSE, - lp->lp_sallang); - else - { - /* Compute the score. */ - score = spell_edit_score( - su->su_badword, p) - + local_score; - if (sps_flags & SPS_BEST) - /* give a bonus for the good word - * sounding the same as the bad - * word */ - add_suggestion(su, &su->su_ga, p, - su->su_badlen, - RESCORE(score, sound_score), - sound_score, TRUE, - lp->lp_sallang); - else - add_suggestion(su, &su->su_ga, p, - su->su_badlen, - score + sound_score, - 0, FALSE, - lp->lp_sallang); - } - } - } + byts = slang->sl_sbyts; + idxs = slang->sl_sidxs; - /* Skip over other NUL bytes. */ - while (byts[n + 1] == 0) - { - ++n; - ++curi[depth]; - } - } - else - { - /* Normal char, go one level deeper. */ - tword[depth++] = c; - arridx[depth] = idxs[n]; - curi[depth] = 1; - } - } - } + for (;;) + { + /* First byte is the number of possible bytes. */ + len = byts[arridx++]; + + /* If the first possible byte is a zero the word could end here. + * If the word ends we found the word. If not skip the NUL bytes. */ + c = ptr[wlen]; + if (byts[arridx] == NUL) + { + if (c == NUL) + break; + + /* Skip over the zeros, there can be several. */ + while (len > 0 && byts[arridx] == NUL) + { + ++arridx; + --len; } + if (len == 0) + return -1; /* no children, word should have ended here */ + ++wordnr; + } + + /* If the word ends we didn't find it. */ + if (c == NUL) + return -1; + + /* Perform a binary search in the list of accepted bytes. */ + if (c == TAB) /* <Tab> is handled like <Space> */ + c = ' '; + while (byts[arridx] < c) + { + /* The word count is in the first idxs[] entry of the child. */ + wordnr += idxs[idxs[arridx]]; + ++arridx; + if (--len == 0) /* end of the bytes, didn't find it */ + return -1; } + if (byts[arridx] != c) /* didn't find the byte */ + return -1; + + /* Continue at the child (if there is one). */ + arridx = idxs[arridx]; + ++wlen; + + /* One space in the good word may stand for several spaces in the + * checked word. */ + if (c == ' ') + while (ptr[wlen] == ' ' || ptr[wlen] == TAB) + ++wlen; } + + return wordnr; } /* @@ -11090,7 +12733,7 @@ set_map_str(lp, map) } lp->sl_has_map = TRUE; - /* Init the array and hash table empty. */ + /* Init the array and hash tables empty. */ for (i = 0; i < 256; ++i) lp->sl_map_array[i] = 0; #ifdef FEAT_MBYTE @@ -11204,45 +12847,39 @@ similar_chars(slang, c1, c2) /* * Add a suggestion to the list of suggestions. - * Do not add a duplicate suggestion or suggestions with a bad score. - * When "use_score" is not zero it's used, otherwise the score is computed - * with spell_edit_score(). + * For a suggestion that is already in the list the lowest score is remembered. */ static void -add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus, slang) +add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus, + slang, maxsf) suginfo_T *su; - garray_T *gap; + garray_T *gap; /* either su_ga or su_sga */ char_u *goodword; int badlenarg; /* len of bad word replaced with "goodword" */ int score; int altscore; int had_bonus; /* value for st_had_bonus */ slang_T *slang; /* language for sound folding */ + int maxsf; /* su_maxscore applies to soundfold score, + su_sfmaxscore to the total score. */ { - int goodlen = STRLEN(goodword); /* len of goodword changed */ - int badlen = badlenarg; /* len of bad word changed */ + int goodlen; /* len of goodword changed */ + int badlen; /* len of bad word changed */ suggest_T *stp; suggest_T new_sug; int i; - hlf_T attr = HLF_COUNT; - char_u longword[MAXWLEN + 1]; char_u *pgood, *pbad; - /* Check that the word really is valid. Esp. for banned words and for - * split words, such as "the the". Need to append what follows to check - * for that. */ - STRCPY(longword, goodword); - vim_strncpy(longword + goodlen, su->su_badptr + badlen, MAXWLEN - goodlen); - (void)spell_check(curwin, longword, &attr, NULL); - if (attr != HLF_COUNT) - return; - /* Minimize "badlen" for consistency. Avoids that changing "the the" to * "thee the" is added next to changing the first "the" the "thee". */ pgood = goodword + STRLEN(goodword); - pbad = su->su_badptr + badlen; - while (pgood > goodword && pbad > su->su_badptr) + pbad = su->su_badptr + badlenarg; + for (;;) { + goodlen = pgood - goodword; + badlen = pbad - su->su_badptr; + if (goodlen <= 0 || badlen <= 0) + break; mb_ptr_back(goodword, pgood); mb_ptr_back(su->su_badptr, pbad); #ifdef FEAT_MBYTE @@ -11255,143 +12892,152 @@ add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus, slang) #endif if (*pgood != *pbad) break; - badlen = pbad - su->su_badptr; - goodlen = pgood - goodword; } + if (badlen == 0 && goodlen == 0) /* goodword doesn't change anything; may happen for "the the" changing * the first "the" to itself. */ return; - if (score <= su->su_maxscore) - { - /* Check if the word is already there. Also check the length that is - * being replaced "thes," -> "these" is a different suggestion from - * "thes" -> "these". */ - stp = &SUG(*gap, 0); - for (i = gap->ga_len - 1; i >= 0; --i) - if ((int)STRLEN(stp[i].st_word) == goodlen - && STRNCMP(stp[i].st_word, goodword, goodlen) == 0 - && stp[i].st_orglen == badlen) - { - /* - * Found it. Remember the word with the lowest score. - */ - if (stp[i].st_slang == NULL) - stp[i].st_slang = slang; + /* Check if the word is already there. Also check the length that is + * being replaced "thes," -> "these" is a different suggestion from + * "thes" -> "these". */ + stp = &SUG(*gap, 0); + for (i = gap->ga_len; --i >= 0; ++stp) + if (stp->st_wordlen == goodlen + && stp->st_orglen == badlen + && STRNCMP(stp->st_word, goodword, goodlen) == 0) + { + /* + * Found it. Remember the word with the lowest score. + */ + if (stp->st_slang == NULL) + stp->st_slang = slang; - new_sug.st_score = score; - new_sug.st_altscore = altscore; - new_sug.st_had_bonus = had_bonus; + new_sug.st_score = score; + new_sug.st_altscore = altscore; + new_sug.st_had_bonus = had_bonus; - if (stp[i].st_had_bonus != had_bonus) + if (stp->st_had_bonus != had_bonus) + { + /* Only one of the two had the soundalike score computed. + * Need to do that for the other one now, otherwise the + * scores can't be compared. This happens because + * suggest_try_change() doesn't compute the soundalike + * word to keep it fast, while some special methods set + * the soundalike score to zero. */ + if (had_bonus) + rescore_one(su, stp); + else { - /* Only one of the two had the soundalike score computed. - * Need to do that for the other one now, otherwise the - * scores can't be compared. This happens because - * suggest_try_change() doesn't compute the soundalike - * word to keep it fast, while some special methods set - * the soundalike score to zero. */ - if (had_bonus) - rescore_one(su, &stp[i]); - else - { - new_sug.st_word = goodword; - new_sug.st_slang = stp[i].st_slang; - new_sug.st_orglen = badlen; - rescore_one(su, &new_sug); - } + new_sug.st_word = stp->st_word; + new_sug.st_wordlen = stp->st_wordlen; + new_sug.st_slang = stp->st_slang; + new_sug.st_orglen = badlen; + rescore_one(su, &new_sug); } + } - if (stp[i].st_score > new_sug.st_score) - { - stp[i].st_score = new_sug.st_score; - stp[i].st_altscore = new_sug.st_altscore; - stp[i].st_had_bonus = new_sug.st_had_bonus; - } - break; + if (stp->st_score > new_sug.st_score) + { + stp->st_score = new_sug.st_score; + stp->st_altscore = new_sug.st_altscore; + stp->st_had_bonus = new_sug.st_had_bonus; } + break; + } - if (i < 0 && ga_grow(gap, 1) == OK) + if (i < 0 && ga_grow(gap, 1) == OK) + { + /* Add a suggestion. */ + stp = &SUG(*gap, gap->ga_len); + stp->st_word = vim_strnsave(goodword, goodlen); + if (stp->st_word != NULL) { - /* Add a suggestion. */ - stp = &SUG(*gap, gap->ga_len); - stp->st_word = vim_strnsave(goodword, goodlen); - if (stp->st_word != NULL) - { - stp->st_score = score; - stp->st_altscore = altscore; - stp->st_had_bonus = had_bonus; - stp->st_orglen = badlen; - stp->st_slang = slang; - ++gap->ga_len; + stp->st_wordlen = goodlen; + stp->st_score = score; + stp->st_altscore = altscore; + stp->st_had_bonus = had_bonus; + stp->st_orglen = badlen; + stp->st_slang = slang; + ++gap->ga_len; - /* If we have too many suggestions now, sort the list and keep - * the best suggestions. */ - if (gap->ga_len > SUG_MAX_COUNT(su)) - su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore, - SUG_CLEAN_COUNT(su)); + /* If we have too many suggestions now, sort the list and keep + * the best suggestions. */ + if (gap->ga_len > SUG_MAX_COUNT(su)) + { + if (maxsf) + su->su_sfmaxscore = cleanup_suggestions(gap, + su->su_sfmaxscore, SUG_CLEAN_COUNT(su)); + else + { + i = su->su_maxscore; + su->su_maxscore = cleanup_suggestions(gap, + su->su_maxscore, SUG_CLEAN_COUNT(su)); + } } } } } /* - * Add a word to be banned. + * Suggestions may in fact be flagged as errors. Esp. for banned words and + * for split words, such as "the the". Remove these from the list here. */ static void -add_banned(su, word) +check_suggestions(su, gap) suginfo_T *su; - char_u *word; + garray_T *gap; /* either su_ga or su_sga */ { - char_u *s = vim_strsave(word); - hash_T hash; - hashitem_T *hi; + suggest_T *stp; + int i; + char_u longword[MAXWLEN + 1]; + int len; + hlf_T attr; - if (s != NULL) + stp = &SUG(*gap, 0); + for (i = gap->ga_len - 1; i >= 0; --i) { - hash = hash_hash(s); - hi = hash_lookup(&su->su_banned, s, hash); - if (HASHITEM_EMPTY(hi)) - hash_add_item(&su->su_banned, hi, s, hash); - else - vim_free(s); + /* Need to append what follows to check for "the the". */ + STRCPY(longword, stp[i].st_word); + len = stp[i].st_wordlen; + vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen, + MAXWLEN - len); + attr = HLF_COUNT; + (void)spell_check(curwin, longword, &attr, NULL, FALSE); + if (attr != HLF_COUNT) + { + /* Remove this entry. */ + vim_free(stp[i].st_word); + --gap->ga_len; + if (i < gap->ga_len) + mch_memmove(stp + i, stp + i + 1, + sizeof(suggest_T) * (gap->ga_len - i)); + } } } -/* - * Return TRUE if a word appears in the list of banned words. - */ - static int -was_banned(su, word) - suginfo_T *su; - char_u *word; -{ - hashitem_T *hi = hash_find(&su->su_banned, word); - - return !HASHITEM_EMPTY(hi); -} /* - * Free the banned words in "su". + * Add a word to be banned. */ static void -free_banned(su) +add_banned(su, word) suginfo_T *su; + char_u *word; { - int todo; + char_u *s = vim_strsave(word); + hash_T hash; hashitem_T *hi; - todo = su->su_banned.ht_used; - for (hi = su->su_banned.ht_array; todo > 0; ++hi) + hash = hash_hash(word); + hi = hash_lookup(&su->su_banned, word, hash); + if (HASHITEM_EMPTY(hi)) { - if (!HASHITEM_EMPTY(hi)) - { - vim_free(hi->hi_key); - --todo; - } + s = vim_strsave(word); + if (s != NULL) + hash_add_item(&su->su_banned, hi, s, hash); } - hash_clear(&su->su_banned); } /* @@ -12270,11 +13916,21 @@ soundalike_score(goodstart, badstart) * counted so much, vowels halfway the word aren't counted at all. */ if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound) { - score = SCORE_DEL / 2; - if (*badsound == '*') - ++badsound; + if (badsound[1] == goodsound[1] + || (badsound[1] != NUL + && goodsound[1] != NUL + && badsound[2] == goodsound[2])) + { + /* handle like a substitute */ + } else - ++goodsound; + { + score = 2 * SCORE_DEL / 3; + if (*badsound == '*') + ++badsound; + else + ++goodsound; + } } goodlen = STRLEN(goodsound); @@ -12470,7 +14126,8 @@ soundalike_score(goodstart, badstart) * support multi-byte characters. */ static int -spell_edit_score(badword, goodword) +spell_edit_score(slang, badword, goodword) + slang_T *slang; char_u *badword; char_u *goodword; { @@ -12512,11 +14169,11 @@ spell_edit_score(badword, goodword) CNT(0, 0) = 0; for (j = 1; j <= goodlen; ++j) - CNT(0, j) = CNT(0, j - 1) + SCORE_DEL; + CNT(0, j) = CNT(0, j - 1) + SCORE_INS; for (i = 1; i <= badlen; ++i) { - CNT(i, 0) = CNT(i - 1, 0) + SCORE_INS; + CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL; for (j = 1; j <= goodlen; ++j) { #ifdef FEAT_MBYTE @@ -12539,7 +14196,15 @@ spell_edit_score(badword, goodword) if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1); else - CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1); + { + /* For a similar character use SCORE_SIMILAR. */ + if (slang != NULL + && slang->sl_has_map + && similar_chars(slang, gc, bc)) + CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1); + else + CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1); + } if (i > 1 && j > 1) { @@ -12577,6 +14242,392 @@ spell_edit_score(badword, goodword) return i; } +typedef struct +{ + int badi; + int goodi; + int score; +} limitscore_T; + +/* + * Like spell_edit_score(), but with a limit on the score to make it faster. + * May return SCORE_MAXMAX when the score is higher than "limit". + * + * This uses a stack for the edits still to be tried. + * The idea comes from Aspell leditdist.cpp. Rewritten in C and added support + * for multi-byte characters. + */ + static int +spell_edit_score_limit(slang, badword, goodword, limit) + slang_T *slang; + char_u *badword; + char_u *goodword; + int limit; +{ + limitscore_T stack[10]; /* allow for over 3 * 2 edits */ + int stackidx; + int bi, gi; + int bi2, gi2; + int bc, gc; + int score; + int score_off; + int minscore; + int round; + +#ifdef FEAT_MBYTE + /* Multi-byte characters require a bit more work, use a different function + * to avoid testing "has_mbyte" quite often. */ + if (has_mbyte) + return spell_edit_score_limit_w(slang, badword, goodword, limit); +#endif + + /* + * The idea is to go from start to end over the words. So long as + * characters are equal just continue, this always gives the lowest score. + * When there is a difference try several alternatives. Each alternative + * increases "score" for the edit distance. Some of the alternatives are + * pushed unto a stack and tried later, some are tried right away. At the + * end of the word the score for one alternative is known. The lowest + * possible score is stored in "minscore". + */ + stackidx = 0; + bi = 0; + gi = 0; + score = 0; + minscore = limit + 1; + + for (;;) + { + /* Skip over an equal part, score remains the same. */ + for (;;) + { + bc = badword[bi]; + gc = goodword[gi]; + if (bc != gc) /* stop at a char that's different */ + break; + if (bc == NUL) /* both words end */ + { + if (score < minscore) + minscore = score; + goto pop; /* do next alternative */ + } + ++bi; + ++gi; + } + + if (gc == NUL) /* goodword ends, delete badword chars */ + { + do + { + if ((score += SCORE_DEL) >= minscore) + goto pop; /* do next alternative */ + } while (badword[++bi] != NUL); + minscore = score; + } + else if (bc == NUL) /* badword ends, insert badword chars */ + { + do + { + if ((score += SCORE_INS) >= minscore) + goto pop; /* do next alternative */ + } while (goodword[++gi] != NUL); + minscore = score; + } + else /* both words continue */ + { + /* If not close to the limit, perform a change. Only try changes + * that may lead to a lower score than "minscore". + * round 0: try deleting a char from badword + * round 1: try inserting a char in badword */ + for (round = 0; round <= 1; ++round) + { + score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); + if (score_off < minscore) + { + if (score_off + SCORE_EDIT_MIN >= minscore) + { + /* Near the limit, rest of the words must match. We + * can check that right now, no need to push an item + * onto the stack. */ + bi2 = bi + 1 - round; + gi2 = gi + round; + while (goodword[gi2] == badword[bi2]) + { + if (goodword[gi2] == NUL) + { + minscore = score_off; + break; + } + ++bi2; + ++gi2; + } + } + else + { + /* try deleting/inserting a character later */ + stack[stackidx].badi = bi + 1 - round; + stack[stackidx].goodi = gi + round; + stack[stackidx].score = score_off; + ++stackidx; + } + } + } + + if (score + SCORE_SWAP < minscore) + { + /* If swapping two characters makes a match then the + * substitution is more expensive, thus there is no need to + * try both. */ + if (gc == badword[bi + 1] && bc == goodword[gi + 1]) + { + /* Swap two characters, that is: skip them. */ + gi += 2; + bi += 2; + score += SCORE_SWAP; + continue; + } + } + + /* Substitute one character for another which is the same + * thing as deleting a character from both goodword and badword. + * Use a better score when there is only a case difference. */ + if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) + score += SCORE_ICASE; + else + { + /* For a similar character use SCORE_SIMILAR. */ + if (slang != NULL + && slang->sl_has_map + && similar_chars(slang, gc, bc)) + score += SCORE_SIMILAR; + else + score += SCORE_SUBST; + } + + if (score < minscore) + { + /* Do the substitution. */ + ++gi; + ++bi; + continue; + } + } +pop: + /* + * Get here to try the next alternative, pop it from the stack. + */ + if (stackidx == 0) /* stack is empty, finished */ + break; + + /* pop an item from the stack */ + --stackidx; + gi = stack[stackidx].goodi; + bi = stack[stackidx].badi; + score = stack[stackidx].score; + } + + /* When the score goes over "limit" it may actually be much higher. + * Return a very large number to avoid going below the limit when giving a + * bonus. */ + if (minscore > limit) + return SCORE_MAXMAX; + return minscore; +} + +#ifdef FEAT_MBYTE +/* + * Multi-byte version of spell_edit_score_limit(). + * Keep it in sync with the above! + */ + static int +spell_edit_score_limit_w(slang, badword, goodword, limit) + slang_T *slang; + char_u *badword; + char_u *goodword; + int limit; +{ + limitscore_T stack[10]; /* allow for over 3 * 2 edits */ + int stackidx; + int bi, gi; + int bi2, gi2; + int bc, gc; + int score; + int score_off; + int minscore; + int round; + char_u *p; + int wbadword[MAXWLEN]; + int wgoodword[MAXWLEN]; + + /* Get the characters from the multi-byte strings and put them in an + * int array for easy access. */ + bi = 0; + for (p = badword; *p != NUL; ) + wbadword[bi++] = mb_cptr2char_adv(&p); + wbadword[bi++] = 0; + gi = 0; + for (p = goodword; *p != NUL; ) + wgoodword[gi++] = mb_cptr2char_adv(&p); + wgoodword[gi++] = 0; + + /* + * The idea is to go from start to end over the words. So long as + * characters are equal just continue, this always gives the lowest score. + * When there is a difference try several alternatives. Each alternative + * increases "score" for the edit distance. Some of the alternatives are + * pushed unto a stack and tried later, some are tried right away. At the + * end of the word the score for one alternative is known. The lowest + * possible score is stored in "minscore". + */ + stackidx = 0; + bi = 0; + gi = 0; + score = 0; + minscore = limit + 1; + + for (;;) + { + /* Skip over an equal part, score remains the same. */ + for (;;) + { + bc = wbadword[bi]; + gc = wgoodword[gi]; + + if (bc != gc) /* stop at a char that's different */ + break; + if (bc == NUL) /* both words end */ + { + if (score < minscore) + minscore = score; + goto pop; /* do next alternative */ + } + ++bi; + ++gi; + } + + if (gc == NUL) /* goodword ends, delete badword chars */ + { + do + { + if ((score += SCORE_DEL) >= minscore) + goto pop; /* do next alternative */ + } while (wbadword[++bi] != NUL); + minscore = score; + } + else if (bc == NUL) /* badword ends, insert badword chars */ + { + do + { + if ((score += SCORE_INS) >= minscore) + goto pop; /* do next alternative */ + } while (wgoodword[++gi] != NUL); + minscore = score; + } + else /* both words continue */ + { + /* If not close to the limit, perform a change. Only try changes + * that may lead to a lower score than "minscore". + * round 0: try deleting a char from badword + * round 1: try inserting a char in badword */ + for (round = 0; round <= 1; ++round) + { + score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); + if (score_off < minscore) + { + if (score_off + SCORE_EDIT_MIN >= minscore) + { + /* Near the limit, rest of the words must match. We + * can check that right now, no need to push an item + * onto the stack. */ + bi2 = bi + 1 - round; + gi2 = gi + round; + while (wgoodword[gi2] == wbadword[bi2]) + { + if (wgoodword[gi2] == NUL) + { + minscore = score_off; + break; + } + ++bi2; + ++gi2; + } + } + else + { + /* try deleting a character from badword later */ + stack[stackidx].badi = bi + 1 - round; + stack[stackidx].goodi = gi + round; + stack[stackidx].score = score_off; + ++stackidx; + } + } + } + + if (score + SCORE_SWAP < minscore) + { + /* If swapping two characters makes a match then the + * substitution is more expensive, thus there is no need to + * try both. */ + if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1]) + { + /* Swap two characters, that is: skip them. */ + gi += 2; + bi += 2; + score += SCORE_SWAP; + continue; + } + } + + /* Substitute one character for another which is the same + * thing as deleting a character from both goodword and badword. + * Use a better score when there is only a case difference. */ + if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) + score += SCORE_ICASE; + else + { + /* For a similar character use SCORE_SIMILAR. */ + if (slang != NULL + && slang->sl_has_map + && similar_chars(slang, gc, bc)) + score += SCORE_SIMILAR; + else + score += SCORE_SUBST; + } + + if (score < minscore) + { + /* Do the substitution. */ + ++gi; + ++bi; + continue; + } + } +pop: + /* + * Get here to try the next alternative, pop it from the stack. + */ + if (stackidx == 0) /* stack is empty, finished */ + break; + + /* pop an item from the stack */ + --stackidx; + gi = stack[stackidx].goodi; + bi = stack[stackidx].badi; + score = stack[stackidx].score; + } + + /* When the score goes over "limit" it may actually be much higher. + * Return a very large number to avoid going below the limit when giving a + * bonus. */ + if (minscore > limit) + return SCORE_MAXMAX; + return minscore; +} +#endif + +#define DUMPFLAG_KEEPCASE 1 /* round 2: keep-case tree */ +#define DUMPFLAG_COUNT 2 /* include word count */ + /* * ":spelldump" */ @@ -12603,6 +14654,7 @@ ex_spelldump(eap) int do_region = TRUE; /* dump region names and numbers */ char_u *p; int lpi; + int dumpflags; if (no_spell_checking(curwin)) return; @@ -12657,17 +14709,22 @@ ex_spelldump(eap) { if (round == 1) { + dumpflags = 0; byts = slang->sl_fbyts; idxs = slang->sl_fidxs; } else { + dumpflags = DUMPFLAG_KEEPCASE; byts = slang->sl_kbyts; idxs = slang->sl_kidxs; } if (byts == NULL) continue; /* array is empty */ + if (eap->forceit) + dumpflags |= DUMPFLAG_COUNT; + depth = 0; arridx[0] = 0; curi[0] = 1; @@ -12707,11 +14764,12 @@ ex_spelldump(eap) * when it's the first one. */ c = (unsigned)flags >> 24; if (c == 0 || curi[depth] == 2) - dump_word(word, round, flags, lnum++); + dump_word(slang, word, dumpflags, + flags, lnum++); /* Apply the prefix, if there is one. */ if (c != 0) - lnum = dump_prefixes(slang, word, round, + lnum = dump_prefixes(slang, word, dumpflags, flags, lnum); } } @@ -12738,19 +14796,21 @@ ex_spelldump(eap) * Dump one word: apply case modifications and append a line to the buffer. */ static void -dump_word(word, round, flags, lnum) +dump_word(slang, word, dumpflags, flags, lnum) + slang_T *slang; char_u *word; - int round; + int dumpflags; int flags; linenr_T lnum; { int keepcap = FALSE; char_u *p; + char_u *tw; char_u cword[MAXWLEN]; char_u badword[MAXWLEN + 10]; int i; - if (round == 1 && (flags & WF_CAPMASK) != 0) + if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0) { /* Need to fix case according to "flags". */ make_case_word(word, cword, flags); @@ -12759,10 +14819,12 @@ dump_word(word, round, flags, lnum) else { p = word; - if (round == 2 && ((captype(word, NULL) & WF_KEEPCAP) == 0 + if ((dumpflags & DUMPFLAG_KEEPCASE) + && ((captype(word, NULL) & WF_KEEPCAP) == 0 || (flags & WF_FIXCAP) != 0)) keepcap = TRUE; } + tw = p; /* Add flags and regions after a slash. */ if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap) @@ -12782,6 +14844,20 @@ dump_word(word, round, flags, lnum) p = badword; } + if (dumpflags & DUMPFLAG_COUNT) + { + hashitem_T *hi; + + /* Include the word count for ":spelldump!". */ + hi = hash_find(&slang->sl_wordcount, tw); + if (!HASHITEM_EMPTY(hi)) + { + vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d", + tw, HI2WC(hi)->wc_count); + p = IObuff; + } + } + ml_append(lnum, p, (colnr_T)0, FALSE); } @@ -12791,10 +14867,10 @@ dump_word(word, round, flags, lnum) * Return the updated line number. */ static linenr_T -dump_prefixes(slang, word, round, flags, startlnum) +dump_prefixes(slang, word, dumpflags, flags, startlnum) slang_T *slang; char_u *word; /* case-folded word */ - int round; + int dumpflags; int flags; /* flags with prefix ID */ linenr_T startlnum; { @@ -12860,7 +14936,7 @@ dump_prefixes(slang, word, round, flags, startlnum) if (c != 0) { vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1); - dump_word(prefix, round, + dump_word(slang, prefix, dumpflags, (c & WF_RAREPFX) ? (flags | WF_RARE) : flags, lnum++); } @@ -12876,7 +14952,7 @@ dump_prefixes(slang, word, round, flags, startlnum) { vim_strncpy(prefix + depth, word_up, MAXWLEN - depth - 1); - dump_word(prefix, round, + dump_word(slang, prefix, dumpflags, (c & WF_RAREPFX) ? (flags | WF_RARE) : flags, lnum++); } @@ -12981,7 +15057,7 @@ expand_spelling(lnum, col, pat, matchp) { garray_T ga; - spell_suggest_list(&ga, pat, 100, spell_expand_need_cap); + spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE); *matchp = ga.ga_data; return ga.ga_len; } diff --git a/src/structs.h b/src/structs.h index 5b91cecb6..5420b4e4b 100644 --- a/src/structs.h +++ b/src/structs.h @@ -1074,6 +1074,13 @@ struct dictvar_S #define SYNSPL_TOP 1 /* spell check toplevel text */ #define SYNSPL_NOTOP 2 /* don't spell check toplevel text */ +/* avoid #ifdefs for when b_spell is not available */ +#ifdef FEAT_SYN_HL +# define B_SPELL(buf) ((buf)->b_spell) +#else +# define B_SPELL(buf) (0) +#endif + /* * buffer: structure that holds information about one file @@ -1407,8 +1414,19 @@ struct file_buffer int b_may_swap; int b_did_warn; /* Set to 1 if user has been warned on first change of a read-only file */ - int b_help; /* buffer for help file (when set b_p_bt is - "help") */ + + /* Two special kinds of buffers: + * help buffer - used for help files, won't use a swap file. + * spell buffer - used for spell info, never displayed and doesn't have a + * file name. + */ + int b_help; /* TRUE for help file buffer (when set b_p_bt + is "help") */ +#ifdef FEAT_SYN_HL + int b_spell; /* TRUE for a spell file buffer, most fields + are not used! Use the B_SPELL macro to + access b_spell without #ifdef. */ +#endif #ifndef SHORT_FNAME int b_shortname; /* this file has an 8.3 file name */ diff --git a/src/testdir/test58.ok b/src/testdir/test58.ok index 75caa7e66..b7ed09466 100644 --- a/src/testdir/test58.ok +++ b/src/testdir/test58.ok @@ -20,11 +20,11 @@ uk wrong ------- bad -['put', 'OK', 'uk'] +['put', 'uk', 'OK'] inputs ['input', 'puts', 'outputs'] comment -['Comment'] +['Comment', 'outtest', 'the end'] ok ['OK', 'uk', 'put'] Ok @@ -34,7 +34,7 @@ test déôl ['deol', 'déôr', 'test'] end -['put', 'OK', 'test'] +['put', 'uk', 'test'] the ['put', 'uk', 'test'] gebletegek @@ -141,7 +141,7 @@ bad wordutilize ['word utilize', 'wordutils', 'wordutil'] pro -['bork', 'end', 'word'] +['bork', 'word', 'end'] borkborkborkborkborkbork ['bork borkborkborkborkbork', 'borkbork borkborkborkbork', 'borkborkbork borkborkbork'] tomatotomatotomato @@ -185,7 +185,7 @@ probarbirk middle [] startmiddle -['startmiddleend'] +['startmiddleend', 'startmiddlebar'] middleend [] endstart @@ -217,7 +217,7 @@ probarbirk middle [] leadmiddle -['leadmiddleend'] +['leadmiddleend', 'leadmiddlebar'] middleend [] endlead @@ -249,7 +249,7 @@ probarmaat middle [] leadmiddle -[] +['leadmiddlebar'] middletail [] taillead diff --git a/src/testdir/test59.ok b/src/testdir/test59.ok index 9c49be4cb..29c9696e8 100644 --- a/src/testdir/test59.ok +++ b/src/testdir/test59.ok @@ -20,11 +20,11 @@ uk wrong ------- bad -['put', 'OK', 'uk'] +['put', 'uk', 'OK'] inputs ['input', 'puts', 'outputs'] comment -['Comment'] +['Comment', 'outtest', 'the end'] ok ['OK', 'uk', 'put'] Ok @@ -34,7 +34,7 @@ test déôl ['deol', 'déôr', 'test'] end -['put', 'OK', 'test'] +['put', 'uk', 'test'] the ['put', 'uk', 'test'] gebletegek @@ -141,7 +141,7 @@ bad wordutilize ['word utilize', 'wordutils', 'wordutil'] pro -['bork', 'end', 'word'] +['bork', 'word', 'end'] borkborkborkborkborkbork ['bork borkborkborkborkbork', 'borkbork borkborkborkbork', 'borkborkbork borkborkbork'] tomatotomatotomato @@ -185,7 +185,7 @@ probarbirk middle [] startmiddle -['startmiddleend'] +['startmiddleend', 'startmiddlebar'] middleend [] endstart @@ -217,7 +217,7 @@ probarbirk middle [] leadmiddle -['leadmiddleend'] +['leadmiddleend', 'leadmiddlebar'] middleend [] endlead @@ -249,7 +249,7 @@ probarmaat middle [] leadmiddle -[] +['leadmiddlebar'] middletail [] taillead diff --git a/src/testdir/test60.in b/src/testdir/test60.in index 9899a94eb..2c414b38e 100644 --- a/src/testdir/test60.in +++ b/src/testdir/test60.in @@ -51,6 +51,10 @@ endfunction let test_cases += [['&textwidth', 1]] " Existing and working option (short form) let test_cases += [['&tw', 1]] + " Global option + let test_cases += [['&g:errorformat', 1]] + " Local option + let test_cases += [['&l:errorformat', 1]] " Negative form of existing and working option (long form) let test_cases += [['&nojoinspaces', 0]] " Negative form of existing and working option (short form) @@ -212,6 +216,26 @@ endfunction echo "FAILED" endif + " Existing local curly-brace variable + let str = "local" + let curly_{str}_var = 1 + echo 'curly_' . str . '_var: 1' + if exists('curly_{str}_var') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing local curly-brace variable + unlet curly_{str}_var + echo 'curly_' . str . '_var: 0' + if !exists('curly_{str}_var') + echo "OK" + else + echo "FAILED" + endif + + " Existing global variable let g:global_var = 1 echo 'g:global_var: 1' @@ -230,29 +254,46 @@ endfunction echo "FAILED" endif - " Existing local curly-brace variable - let curly_local_var = 1 - let str = "local" - echo 'curly_{str}_var: 1' - if exists('curly_{str}_var') + " Existing global list + let g:global_list = ["blue", "orange"] + echo 'g:global_list: 1' + if exists('g:global_list') echo "OK" else echo "FAILED" endif - " Non-existing local curly-brace variable - unlet curly_local_var - echo 'curly_{str}_var: 0' - if !exists('curly_{str}_var') + " Non-existing global list + unlet g:global_list + echo 'g:global_list: 0' + if !exists('g:global_list') + echo "OK" + else + echo "FAILED" + endif + + " Existing global dictionary + let g:global_dict = {"xcord":100, "ycord":2} + echo 'g:global_dict: 1' + if exists('g:global_dict') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing global dictionary + unlet g:global_dict + echo 'g:global_dict: 0' + if !exists('g:global_dict') echo "OK" else echo "FAILED" endif " Existing global curly-brace variable - let g:curly_global_var = 1 let str = "global" - echo 'g:curly_{str}_var: 1' + let g:curly_{str}_var = 1 + echo 'g:curly_' . str . '_var: 1' if exists('g:curly_{str}_var') echo "OK" else @@ -260,17 +301,212 @@ endfunction endif " Non-existing global curly-brace variable - unlet g:curly_global_var - echo 'g:curly_{str}_var: 0' + unlet g:curly_{str}_var + echo 'g:curly_' . str . '_var: 0' if !exists('g:curly_{str}_var') echo "OK" else echo "FAILED" endif + " Existing window variable + echo 'w:window_var: 1' + let w:window_var = 1 + if exists('w:window_var') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing window variable + unlet w:window_var + echo 'w:window_var: 0' + if !exists('w:window_var') + echo "OK" + else + echo "FAILED" + endif + + " Existing window list + let w:window_list = ["blue", "orange"] + echo 'w:window_list: 1' + if exists('w:window_list') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing window list + unlet w:window_list + echo 'w:window_list: 0' + if !exists('w:window_list') + echo "OK" + else + echo "FAILED" + endif + + " Existing window dictionary + let w:window_dict = {"xcord":100, "ycord":2} + echo 'w:window_dict: 1' + if exists('w:window_dict') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing window dictionary + unlet w:window_dict + echo 'w:window_dict: 0' + if !exists('w:window_dict') + echo "OK" + else + echo "FAILED" + endif + + " Existing window curly-brace variable + let str = "window" + let w:curly_{str}_var = 1 + echo 'w:curly_' . str . '_var: 1' + if exists('w:curly_{str}_var') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing window curly-brace variable + unlet w:curly_{str}_var + echo 'w:curly_' . str . '_var: 0' + if !exists('w:curly_{str}_var') + echo "OK" + else + echo "FAILED" + endif + + " Existing buffer variable + echo 'b:buffer_var: 1' + let b:buffer_var = 1 + if exists('b:buffer_var') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing buffer variable + unlet b:buffer_var + echo 'b:buffer_var: 0' + if !exists('b:buffer_var') + echo "OK" + else + echo "FAILED" + endif + + " Existing buffer list + let b:buffer_list = ["blue", "orange"] + echo 'b:buffer_list: 1' + if exists('b:buffer_list') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing buffer list + unlet b:buffer_list + echo 'b:buffer_list: 0' + if !exists('b:buffer_list') + echo "OK" + else + echo "FAILED" + endif + + " Existing buffer dictionary + let b:buffer_dict = {"xcord":100, "ycord":2} + echo 'b:buffer_dict: 1' + if exists('b:buffer_dict') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing buffer dictionary + unlet b:buffer_dict + echo 'b:buffer_dict: 0' + if !exists('b:buffer_dict') + echo "OK" + else + echo "FAILED" + endif + + " Existing buffer curly-brace variable + let str = "buffer" + let b:curly_{str}_var = 1 + echo 'b:curly_' . str . '_var: 1' + if exists('b:curly_{str}_var') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing buffer curly-brace variable + unlet b:curly_{str}_var + echo 'b:curly_' . str . '_var: 0' + if !exists('b:curly_{str}_var') + echo "OK" + else + echo "FAILED" + endif + " Script-local tests source test60.vim + " Existing Vim internal variable + echo 'v:version: 1' + if exists('v:version') + echo "OK" + else + echo "FAILED" + endif + + " Non-existing Vim internal variable + echo 'v:non_exists_var: 0' + if !exists('v:non_exists_var') + echo "OK" + else + echo "FAILED" + endif + + " Function arguments + function TestFuncArg(func_arg, ...) + echo 'a:func_arg: 1' + if exists('a:func_arg') + echo "OK" + else + echo "FAILED" + endif + + echo 'a:non_exists_arg: 0' + if !exists('a:non_exists_arg') + echo "OK" + else + echo "FAILED" + endif + + echo 'a:1: 1' + if exists('a:1') + echo "OK" + else + echo "FAILED" + endif + + echo 'a:2: 0' + if !exists('a:2') + echo "OK" + else + echo "FAILED" + endif + endfunction + + call TestFuncArg("arg1", "arg2") + redir END endfunction :call TestExists() diff --git a/src/testdir/test60.ok b/src/testdir/test60.ok index fe6c4b765..46a72e4d2 100644 --- a/src/testdir/test60.ok +++ b/src/testdir/test60.ok @@ -33,6 +33,10 @@ OK OK &tw: 1 OK +&g:errorformat: 1 +OK +&l:errorformat: 1 +OK &nojoinspaces: 0 OK &nojs: 0 @@ -85,27 +89,87 @@ local_dict: 1 OK local_dict: 0 OK +curly_local_var: 1 +OK +curly_local_var: 0 +OK g:global_var: 1 OK g:global_var: 0 OK -curly_{str}_var: 1 +g:global_list: 1 +OK +g:global_list: 0 +OK +g:global_dict: 1 +OK +g:global_dict: 0 +OK +g:curly_global_var: 1 +OK +g:curly_global_var: 0 +OK +w:window_var: 1 +OK +w:window_var: 0 +OK +w:window_list: 1 +OK +w:window_list: 0 +OK +w:window_dict: 1 +OK +w:window_dict: 0 +OK +w:curly_window_var: 1 OK -curly_{str}_var: 0 +w:curly_window_var: 0 OK -g:curly_{str}_var: 1 +b:buffer_var: 1 OK -g:curly_{str}_var: 0 +b:buffer_var: 0 +OK +b:buffer_list: 1 +OK +b:buffer_list: 0 +OK +b:buffer_dict: 1 +OK +b:buffer_dict: 0 +OK +b:curly_buffer_var: 1 +OK +b:curly_buffer_var: 0 OK s:script_var: 1 OK s:script_var: 0 OK -s:curly_{str}_var: 1 +s:script_list: 1 +OK +s:script_list: 0 +OK +s:script_dict: 1 OK -s:curly_{str}_var: 0 +s:script_dict: 0 +OK +s:curly_script_var: 1 +OK +s:curly_script_var: 0 OK *s:my_script_func: 1 OK *s:my_script_func: 0 OK +v:version: 1 +OK +v:non_exists_var: 0 +OK +a:func_arg: 1 +OK +a:non_exists_arg: 0 +OK +a:1: 1 +OK +a:2: 0 +OK diff --git a/src/testdir/test60.vim b/src/testdir/test60.vim new file mode 100644 index 000000000..48eea4279 --- /dev/null +++ b/src/testdir/test60.vim @@ -0,0 +1,97 @@ +" Vim script for exists() function test +" Script-local variables are checked here + +" Existing script-local variable +let s:script_var = 1 +echo 's:script_var: 1' +if exists('s:script_var') + echo "OK" +else + echo "FAILED" +endif + +" Non-existing script-local variable +unlet s:script_var +echo 's:script_var: 0' +if !exists('s:script_var') + echo "OK" +else + echo "FAILED" +endif + +" Existing script-local list +let s:script_list = ["blue", "orange"] +echo 's:script_list: 1' +if exists('s:script_list') + echo "OK" +else + echo "FAILED" +endif + +" Non-existing script-local list +unlet s:script_list +echo 's:script_list: 0' +if !exists('s:script_list') + echo "OK" +else + echo "FAILED" +endif + +" Existing script-local dictionary +let s:script_dict = {"xcord":100, "ycord":2} +echo 's:script_dict: 1' +if exists('s:script_dict') + echo "OK" +else + echo "FAILED" +endif + +" Non-existing script-local dictionary +unlet s:script_dict +echo 's:script_dict: 0' +if !exists('s:script_dict') + echo "OK" +else + echo "FAILED" +endif + +" Existing script curly-brace variable +let str = "script" +let s:curly_{str}_var = 1 +echo 's:curly_' . str . '_var: 1' +if exists('s:curly_{str}_var') + echo "OK" +else + echo "FAILED" +endif + +" Non-existing script-local curly-brace variable +unlet s:curly_{str}_var +echo 's:curly_' . str . '_var: 0' +if !exists('s:curly_{str}_var') + echo "OK" +else + echo "FAILED" +endif + +" Existing script-local function +function! s:my_script_func() +endfunction + +echo '*s:my_script_func: 1' +if exists('*s:my_script_func') + echo "OK" +else + echo "FAILED" +endif + +" Non-existing script-local function +delfunction s:my_script_func + +echo '*s:my_script_func: 0' +if !exists('*s:my_script_func') + echo "OK" +else + echo "FAILED" +endif + diff --git a/src/version.h b/src/version.h index 253102b52..447f4fa0b 100644 --- a/src/version.h +++ b/src/version.h @@ -36,5 +36,5 @@ #define VIM_VERSION_NODOT "vim70aa" #define VIM_VERSION_SHORT "7.0aa" #define VIM_VERSION_MEDIUM "7.0aa ALPHA" -#define VIM_VERSION_LONG "VIM - Vi IMproved 7.0aa ALPHA (2005 Dec 29)" -#define VIM_VERSION_LONG_DATE "VIM - Vi IMproved 7.0aa ALPHA (2005 Dec 29, compiled " +#define VIM_VERSION_LONG "VIM - Vi IMproved 7.0aa ALPHA (2006 Jan 12)" +#define VIM_VERSION_LONG_DATE "VIM - Vi IMproved 7.0aa ALPHA (2006 Jan 12, compiled " |