updated for version 7.0086

author: Bram Moolenaar <Bram@vim.org> 2005-06-16 21:51:00 +0000
committer: Bram Moolenaar <Bram@vim.org> 2005-06-16 21:51:00 +0000
commit: ea424166e2a53649eea8d8899fc9294ca023964c (patch)
tree: c35c0b8daf356f341979d346c440336bcd13c899
parent: 78599adb5219f98053673ca27683f922a9ca338b (diff)
download: vim-git-ea424166e2a53649eea8d8899fc9294ca023964c.tar.gz
4 files changed, 563 insertions, 191 deletions
diff --git a/src/ex_docmd.c b/src/ex_docmd.c
index fbb0f5a78..20c58f1d1 100644
--- a/src/ex_docmd.c
+++ b/src/ex_docmd.c
@@ -4146,7 +4146,7 @@ expand_filename(eap, cmdlinep, errormsgp)
 
 	/* For a shell command a '!' must be escaped. */
 	if ((eap->usefilter || eap->cmdidx == CMD_bang)
-					&& vim_strpbrk(repl, "!&;()") != NULL)
+			      && vim_strpbrk(repl, (char_u *)"!&;()") != NULL)
 	{
 	    char_u	*l;
 
diff --git a/src/message.c b/src/message.c
index ff7964989..79dbc2e17 100644
--- a/src/message.c
+++ b/src/message.c
@@ -657,40 +657,7 @@ emsg2(s, a1)
     return emsg3(s, a1, NULL);
 }
 
-/*
- * Print an error message with one or two "%s" and one or two string arguments.
- */
-    int
-emsg3(s, a1, a2)
-    char_u *s, *a1, *a2;
-{
-    if ((emsg_off > 0 && vim_strchr(p_debug, 'm') == NULL)
-#ifdef FEAT_EVAL
-	    || emsg_skip > 0
-#endif
-	    )
-	return TRUE;		/* no error messages at the moment */
-    vim_snprintf((char *)IObuff, IOSIZE, (char *)s, (char *)a1, (char *)a2);
-    return emsg(IObuff);
-}
-
-/*
- * Print an error message with one "%ld" and one long int argument.
- */
-    int
-emsgn(s, n)
-    char_u	*s;
-    long	n;
-{
-    if ((emsg_off > 0 && vim_strchr(p_debug, 'm') == NULL)
-#ifdef FEAT_EVAL
-	    || emsg_skip > 0
-#endif
-	    )
-	return TRUE;		/* no error messages at the moment */
-    vim_snprintf((char *)IObuff, IOSIZE, (char *)s, n);
-    return emsg(IObuff);
-}
+/* emsg3() and emsgn() are in misc2.c to avoid warnings for the prototypes. */
 
     void
 emsg_invreg(name)
@@ -3965,7 +3932,7 @@ vim_snprintf(str, str_m, fmt, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10)
 
 	    /* zero padding as requested by the precision or by the minimal
 	     * field width for numeric conversions required? */
-	    if (number_of_zeros_to_pad <= 0)
+	    if (number_of_zeros_to_pad == 0)
 	    {
 		/* will not copy first part of numeric right now, *
 		 * force it to be copied later in its entirety    */
diff --git a/src/os_vms.c b/src/os_vms.c
index c26937d8a..6fd05d503 100644
--- a/src/os_vms.c
+++ b/src/os_vms.c
@@ -300,7 +300,7 @@ vms_read(char *inbuf, size_t nbytes)
 {
     int		status, function, len;
     TT_MODE	tt_mode;
-    ITEM	itmlst[2];
+    ITEM	itmlst[3];
     static long trm_mask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
 
     /* whatever happened earlier we need an iochan here */
@@ -308,10 +308,11 @@ vms_read(char *inbuf, size_t nbytes)
 	tt_mode = get_tty();
 
     vul_item(&itmlst[0], 0, TRM$_MODIFIERS,
-	     (char *)( TRM$M_TM_ESCAPE  | TRM$M_TM_TIMED    | TRM$M_TM_NOECHO |
-                       TRM$M_TM_NOEDIT  | TRM$M_TM_NOFILTR  |
-                       TRM$M_TM_NORECALL| TRM$M_TM_TRMNOECHO), 0);
-    vul_item(&itmlst[1], sizeof(trm_mask), TRM$_TERM, (char *)&trm_mask, 0);
+	     (char *)( TRM$M_TM_ESCAPE	| TRM$M_TM_TIMED    | TRM$M_TM_NOECHO |
+		       TRM$M_TM_NOEDIT	| TRM$M_TM_NOFILTR  |
+		       TRM$M_TM_NORECALL| TRM$M_TM_TRMNOECHO), 0);
+    vul_item(&itmlst[1], 0, TRM$_TIMEOUT, (char *) 1, 0 );
+    vul_item(&itmlst[2], sizeof(trm_mask), TRM$_TERM, (char *)&trm_mask, 0);
 
     function = (IO$_READLBLK | IO$M_EXTEND);
     memset(inbuf, 0, nbytes);
diff --git a/src/spell.c b/src/spell.c
index 54ca1036b..9c00a269f 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -189,9 +189,6 @@ typedef long idx_T;
 
 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP)
 
-#define WF_USED	    0x10000	/* Word was found in text. Must be in separate
-				   byte before region and flags. */
-
 #define BY_NOFLAGS  0		/* end of word without flags or region */
 #define BY_FLAGS    1		/* end of word, flag byte follows */
 #define BY_INDEX    2		/* child is shared, index follows */
@@ -243,7 +240,13 @@ struct slang_S
     int		sl_followup;	/* SAL followup */
     int		sl_collapse;	/* SAL collapse_result */
     int		sl_rem_accents;	/* SAL remove_accents */
-    char_u	*sl_map;	/* string with similar chars from MAP lines */
+    int		sl_has_map;	/* TRUE if there is a MAP line */
+#ifdef FEAT_MBYTE
+    hashtab_T	sl_map_hash;	/* MAP for multi-byte chars */
+    int		sl_map_array[256]; /* MAP for first 256 chars */
+#else
+    char_u	sl_map_array[256]; /* MAP for first 256 chars */
+#endif
 };
 
 /* First language that is loaded, start of the linked list of loaded
@@ -329,7 +332,6 @@ typedef struct suggest_S
 #define SCORE_ALLCAP	120	/* need all-cap case */
 #define SCORE_REGION	70	/* word is for different region */
 #define SCORE_RARE	180	/* rare word */
-#define SCORE_NOTUSED	11	/* word not found in text yet */
 
 /* score for edit distance */
 #define SCORE_SWAP	90	/* swap two characters */
@@ -402,22 +404,59 @@ static int set_spell_finish __ARGS((spelltab_T	*new_st));
 #endif
 
 /*
+ * For finding suggestion: At each node in the tree these states are tried:
+ */
+typedef enum
+{
+    STATE_START = 0,	/* At start of node, check if word may end or
+			 * split word. */
+    STATE_SPLITUNDO,	/* Undo word split. */
+    STATE_ENDNUL,	/* Past NUL bytes at start of the node. */
+    STATE_PLAIN,	/* Use each byte of the node. */
+    STATE_DEL,		/* Delete a byte from the bad word. */
+    STATE_INS,		/* Insert a byte in the bad word. */
+    STATE_SWAP,		/* Swap two bytes. */
+    STATE_UNSWAP,	/* Undo swap two bytes. */
+    STATE_SWAP3,	/* Swap two bytes over three. */
+    STATE_UNSWAP3,	/* Undo Swap two bytes over three. */
+    STATE_ROT3L,	/* Rotate three bytes left */
+    STATE_UNROT3L,	/* Undo rotate three bytes left */
+    STATE_ROT3R,	/* Rotate three bytes right */
+    STATE_UNROT3R,	/* Undo rotate three bytes right */
+    STATE_REP_INI,	/* Prepare for using REP items. */
+    STATE_REP,		/* Use matching REP items from the .aff file. */
+    STATE_REP_UNDO,	/* Undo a REP item replacement. */
+    STATE_FINAL		/* End of this node. */
+} state_T;
+
+/*
  * Struct to keep the state at each level in spell_try_change().
  */
 typedef struct trystate_S
 {
-    int		ts_state;	/* state at this level, STATE_ */
+    state_T	ts_state;	/* state at this level, STATE_ */
     int		ts_score;	/* score */
-    int		ts_curi;	/* index in list of child nodes */
-    int		ts_fidx;	/* index in fword[], case-folded bad word */
-    int		ts_fidxtry;	/* ts_fidx at which bytes may be changed */
-    int		ts_twordlen;	/* valid length of tword[] */
+    short	ts_curi;	/* index in list of child nodes */
+    char_u	ts_fidx;	/* index in fword[], case-folded bad word */
+    char_u	ts_fidxtry;	/* ts_fidx at which bytes may be changed */
+    char_u	ts_twordlen;	/* valid length of tword[] */
+#ifdef FEAT_MBYTE
+    char_u	ts_tcharlen;	/* number of bytes in tword character */
+    char_u	ts_tcharidx;	/* current byte index in tword character */
+    char_u	ts_isdiff;	/* DIFF_ values */
+    char_u	ts_fcharstart;	/* index in fword where badword char started */
+#endif
     idx_T	ts_arridx;	/* index in tree array, start of node */
     char_u	ts_save_prewordlen; /* saved "prewordlen" */
-    int		ts_save_splitoff;   /* su_splitoff saved here */
-    int		ts_save_badflags;   /* badflags saved here */
+    char_u	ts_save_splitoff;   /* su_splitoff saved here */
+    char_u	ts_save_badflags;   /* badflags saved here */
 } trystate_T;
 
+/* values for ts_isdiff */
+#define DIFF_NONE	0	/* no different byte (yet) */
+#define DIFF_YES	1	/* different byte found */
+#define DIFF_INSERT	2	/* inserting character */
+
 static slang_T *slang_alloc __ARGS((char_u *lang));
 static void slang_free __ARGS((slang_T *lp));
 static void slang_clear __ARGS((slang_T *lp));
@@ -441,9 +480,8 @@ static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int s
 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
 static void spell_try_soundalike __ARGS((suginfo_T *su));
 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
-#if 0
+static void set_map_str __ARGS((slang_T *lp, char_u *map));
 static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
-#endif
 #ifdef RESCORE
 static void add_suggestion __ARGS((suginfo_T *su, char_u *goodword, int use_score, int had_bonus));
 #else
@@ -792,10 +830,6 @@ find_word(mip, keepcap)
 	{
 	    flags = idxs[arridx];
 
-	    /* Set a flag for words that were used.  The region and case
-	     * doesn't matter here, it's only used to rate the suggestions. */
-	    idxs[arridx] = flags | WF_USED;
-
 	    if (keepcap)
 	    {
 		/* For "keepcap" tree the case is always right. */
@@ -1128,8 +1162,20 @@ slang_clear(lp)
 	ga_clear(gap);
     }
 
-    vim_free(lp->sl_map);
-    lp->sl_map = NULL;
+#ifdef FEAT_MBYTE
+    {
+	int	    todo = lp->sl_map_hash.ht_used;
+	hashitem_T  *hi;
+
+	for (hi = lp->sl_map_hash.ht_array; todo > 0; ++hi)
+	    if (!HASHITEM_EMPTY(hi))
+	    {
+		--todo;
+		vim_free(hi->hi_key);
+	    }
+    }
+    hash_clear(&lp->sl_map_hash);
+#endif
 }
 
 /*
@@ -1369,7 +1415,8 @@ formerr:
     for (i = 0; i < cnt; ++i)
 	p[i] = getc(fd);			/* <mapstr> */
     p[i] = NUL;
-    lp->sl_map = p;
+    set_map_str(lp, p);
+    vim_free(p);
 
 
     /* round 1: <LWORDTREE>
@@ -4414,6 +4461,12 @@ allcap_copy(word, wcopy)
 
 /*
  * Try finding suggestions by adding/removing/swapping letters.
+ *
+ * This uses a state machine.  At each node in the tree we try various
+ * operations.  When trying if an operation work "depth" is increased and the
+ * stack[] is used to store info.  This allows combinations, thus insert one
+ * character, replace one and delete another.  The number of changes is
+ * limited by su->su_maxscore, checked in try_deeper().
  */
     static void
 spell_try_change(su)
@@ -4432,8 +4485,8 @@ spell_try_change(su)
     char_u	*byts;
     idx_T	*idxs;
     int		depth;
-    int		c;
-    int		n;
+    int		c, c2, c3;
+    int		n = 0;
     int		flags;
     int		badflags;
     garray_T	*gap;
@@ -4441,7 +4494,7 @@ spell_try_change(su)
     int		len;
     char_u	*p;
     fromto_T	*ftp;
-    int		fl, tl;
+    int		fl = 0, tl;
 
     /* get caps flags for bad word */
     badflags = captype(su->su_badptr, su->su_badptr + su->su_badlen);
@@ -4450,26 +4503,6 @@ spell_try_change(su)
      * to find matches (esp. REP items). */
     STRCPY(fword, su->su_fbadword);
 
-    /*
-     * At each node in the tree these states are tried:
-     */
-#define STATE_START	0	/* At start of node, check if word may end or
-				 * split word. */
-#define STATE_SPLITUNDO	1	/* Undo word split. */
-#define STATE_ENDNUL	2	/* Past NUL bytes at start of the node. */
-#define STATE_PLAIN	3	/* Use each byte of the node. */
-#define STATE_DEL	4	/* Delete a byte from the bad word. */
-#define STATE_INS	5	/* Insert a byte in the bad word. */
-#define STATE_SWAP	6	/* Swap two bytes. */
-#define STATE_SWAP3A	7	/* Swap two bytes over three. */
-#define STATE_ROT3L	8	/* Rotate three bytes left */
-#define STATE_ROT3R	9	/* Rotate three bytes right */
-#define STATE_ROT_UNDO	10	/* undo rotating */
-#define STATE_REP_INI	11	/* Prepare for using REP items. */
-#define STATE_REP	12	/* Use matching REP items from the .aff file. */
-#define STATE_REP_UNDO	13	/* Undo a REP item replacement. */
-#define STATE_FINAL	99	/* End of this node. */
-
 
     for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
 						   lp->lp_slang != NULL; ++lp)
@@ -4498,7 +4531,17 @@ spell_try_change(su)
 	stack[0].ts_fidxtry = 0;
 	stack[0].ts_twordlen = 0;
 	stack[0].ts_arridx = 0;
+#ifdef FEAT_MBYTE
+	stack[0].ts_tcharlen = 0;
+#endif
 
+	/*
+	 * Loop to find all suggestions.  At each round we either:
+	 * - For the current state try one operation, advance "ts_curi",
+	 *   increase "depth".
+	 * - When a state is done go to the next, set "ts_state".
+	 * - When all states are tried decrease "depth".
+	 */
 	while (depth >= 0 && !got_int)
 	{
 	    sp = &stack[depth];
@@ -4559,10 +4602,6 @@ spell_try_change(su)
 		if (flags & WF_RARE)
 		    newscore += SCORE_RARE;
 
-		/* Words that were not found in the text get a penalty. */
-		if ((flags & WF_USED) == 0)
-		    newscore += SCORE_NOTUSED;
-
 		if (!spell_valid_case(badflags,
 					 captype(preword + prewordlen, NULL)))
 		    newscore += SCORE_ICASE;
@@ -4576,7 +4615,12 @@ spell_try_change(su)
 #endif
 			    );
 		}
-		else if (sp->ts_fidx >= sp->ts_fidxtry)
+		else if (sp->ts_fidx >= sp->ts_fidxtry
+#ifdef FEAT_MBYTE
+			/* Don't split halfway a character. */
+			&& (!has_mbyte || sp->ts_tcharlen == 0)
+#endif
+			)
 		{
 		    /* The word in the tree ends but the badword
 		     * continues: try inserting a space and check that a valid
@@ -4663,165 +4707,420 @@ spell_try_change(su)
 		    /* Normal byte, go one level deeper.  If it's not equal to
 		     * the byte in the bad word adjust the score.  But don't
 		     * even try when the byte was already changed. */
-		    if (c == fword[sp->ts_fidx])
-			newscore = 0;
-
-		    /* TODO: this is too slow and comparing bytes isn't right
-		     * for multi-byte characters. */
-#if 0
-		    else if (lp->lp_slang->sl_map != NULL
-					&& similar_chars(lp->lp_slang,
-						       c, fword[sp->ts_fidx]))
-			newscore = SCORE_SIMILAR;
+		    if (c == fword[sp->ts_fidx]
+#ifdef FEAT_MBYTE
+			    || (sp->ts_tcharlen > 0
+						&& sp->ts_isdiff != DIFF_NONE)
 #endif
+			    )
+			newscore = 0;
 		    else
 			newscore = SCORE_SUBST;
 		    if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry)
 				    && try_deeper(su, stack, depth, newscore))
 		    {
 			++depth;
-			++stack[depth].ts_fidx;
-			tword[stack[depth].ts_twordlen++] = c;
-			stack[depth].ts_arridx = idxs[arridx];
+			sp = &stack[depth];
+			++sp->ts_fidx;
+			tword[sp->ts_twordlen++] = c;
+			sp->ts_arridx = idxs[arridx];
+#ifdef FEAT_MBYTE
+			if (newscore == SCORE_SUBST)
+			    sp->ts_isdiff = DIFF_YES;
+			if (has_mbyte)
+			{
+			    /* Multi-byte characters are a bit complicated to
+			     * handle: They differ when any of the bytes
+			     * differ and then their length may also differ. */
+			    if (sp->ts_tcharlen == 0)
+			    {
+				/* First byte. */
+				sp->ts_tcharidx = 0;
+				sp->ts_tcharlen = MB_BYTE2LEN(c);
+				sp->ts_fcharstart = sp->ts_fidx - 1;
+				sp->ts_isdiff = (newscore != 0)
+						       ? DIFF_YES : DIFF_NONE;
+			    }
+			    else if (sp->ts_isdiff == DIFF_INSERT)
+				/* When inserting trail bytes don't advance in
+				 * the bad word. */
+				--sp->ts_fidx;
+			    if (++sp->ts_tcharidx == sp->ts_tcharlen)
+			    {
+				/* Last byte of character. */
+				if (sp->ts_isdiff == DIFF_YES)
+				{
+				    /* Correct ts_fidx for the byte length of
+				     * the character (we didn't check that
+				     * before). */
+				    sp->ts_fidx = sp->ts_fcharstart
+						+ MB_BYTE2LEN(
+						    fword[sp->ts_fcharstart]);
+
+				    /* For a similar character adjust score
+				     * from SCORE_SUBST to SCORE_SIMILAR. */
+				    if (lp->lp_slang->sl_has_map
+					    && similar_chars(lp->lp_slang,
+						mb_ptr2char(tword
+						    + sp->ts_twordlen
+							   - sp->ts_tcharlen),
+						mb_ptr2char(fword
+							+ sp->ts_fcharstart)))
+					sp->ts_score -=
+						  SCORE_SUBST - SCORE_SIMILAR;
+				}
+
+				/* Starting a new char, reset the length. */
+				sp->ts_tcharlen = 0;
+			    }
+			}
+			else
+#endif
+			{
+			    /* If we found a similar char adjust the score.
+			     * We do this after calling try_deeper() because
+			     * it's slow. */
+			    if (newscore != 0
+				    && lp->lp_slang->sl_has_map
+				    && similar_chars(lp->lp_slang,
+						   c, fword[sp->ts_fidx - 1]))
+				sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
+			}
 		    }
 		}
 		break;
 
 	    case STATE_DEL:
-		/* Try skipping one byte in the bad word (delete it). */
+#ifdef FEAT_MBYTE
+		/* When past the first byte of a multi-byte char don't try
+		 * delete/insert/swap a character. */
+		if (has_mbyte && sp->ts_tcharlen > 0)
+		{
+		    sp->ts_state = STATE_FINAL;
+		    break;
+		}
+#endif
+		/*
+		 * Try skipping one character in the bad word (delete it).
+		 */
 		sp->ts_state = STATE_INS;
 		sp->ts_curi = 1;
 		if (fword[sp->ts_fidx] != NUL
 			&& try_deeper(su, stack, depth, SCORE_DEL))
 		{
 		    ++depth;
-		    ++stack[depth].ts_fidx;
+#ifdef FEAT_MBYTE
+		    if (has_mbyte)
+			stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]);
+		    else
+#endif
+			++stack[depth].ts_fidx;
 		    break;
 		}
 		/*FALLTHROUGH*/
 
 	    case STATE_INS:
-		/* Insert one byte.  Do this for each possible bytes at this
+		/* Insert one byte.  Do this for each possible byte at this
 		 * node. */
 		n = sp->ts_arridx;
 		if (sp->ts_curi > byts[n])
 		{
 		    /* Done all bytes at this node, do next state. */
 		    sp->ts_state = STATE_SWAP;
-		    sp->ts_curi = 1;
 		}
 		else
 		{
-		    /* Do one more byte at this node. */
+		    /* Do one more byte at this node.  Skip NUL bytes. */
 		    n += sp->ts_curi++;
 		    c = byts[n];
 		    if (c != 0 && try_deeper(su, stack, depth, SCORE_INS))
 		    {
 			++depth;
-			tword[stack[depth].ts_twordlen++] = c;
-			stack[depth].ts_arridx = idxs[n];
+			sp = &stack[depth];
+			tword[sp->ts_twordlen++] = c;
+			sp->ts_arridx = idxs[n];
+#ifdef FEAT_MBYTE
+			if (has_mbyte)
+			{
+			    fl = MB_BYTE2LEN(c);
+			    if (fl > 1)
+			    {
+				/* There are following bytes for the same
+				 * character.  We must find all bytes before
+				 * trying delete/insert/swap/etc. */
+				sp->ts_tcharlen = fl;
+				sp->ts_tcharidx = 1;
+				sp->ts_isdiff = DIFF_INSERT;
+			    }
+			}
+#endif
 		    }
 		}
 		break;
 
 	    case STATE_SWAP:
-		/* Swap two bytes: "12" -> "21".  This means looking for the
-		 * following byte at the current node and the current byte at
-		 * its child node.  We change "fword" here, it's changed back
-		 * afterwards.  TODO: should swap characters instead of bytes.
-		 * */
-		c = fword[sp->ts_fidx];
-		if (c != NUL && fword[sp->ts_fidx + 1] != NUL
-				  && try_deeper(su, stack, depth, SCORE_SWAP))
+		/*
+		 * Swap two bytes in the bad word: "12" -> "21".
+		 * We change "fword" here, it's changed back afterwards.
+		 */
+		p = fword + sp->ts_fidx;
+		c = *p;
+		if (c == NUL)
+		{
+		    /* End of word, can't swap or replace. */
+		    sp->ts_state = STATE_FINAL;
+		    break;
+		}
+#ifdef FEAT_MBYTE
+		if (has_mbyte)
 		{
-		    sp->ts_state = STATE_SWAP3A;
+		    n = mb_ptr2len_check(p);
+		    c = mb_ptr2char(p);
+		    c2 = mb_ptr2char(p + n);
+		}
+		else
+#endif
+		    c2 = p[1];
+		if (c == c2)
+		{
+		    /* Characters are identical, swap won't do anything. */
+		    sp->ts_state = STATE_SWAP3;
+		    break;
+		}
+		if (c2 != NUL && try_deeper(su, stack, depth, SCORE_SWAP))
+		{
+		    sp->ts_state = STATE_UNSWAP;
 		    ++depth;
-		    fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
-		    fword[sp->ts_fidx + 1] = c;
-		    stack[depth].ts_fidxtry = sp->ts_fidx + 2;
+#ifdef FEAT_MBYTE
+		    if (has_mbyte)
+		    {
+			fl = mb_char2len(c2);
+			mch_memmove(p, p + n, fl);
+			mb_char2bytes(c, p + fl);
+			stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
+		    }
+		    else
+#endif
+		    {
+			p[0] = c2;
+			p[1] = c;
+			stack[depth].ts_fidxtry = sp->ts_fidx + 2;
+		    }
 		}
 		else
 		    /* If this swap doesn't work then SWAP3 won't either. */
 		    sp->ts_state = STATE_REP_INI;
 		break;
 
-	    case STATE_SWAP3A:
-		/* First undo the STATE_SWAP swap: "21" -> "12". */
-		c = fword[sp->ts_fidx];
-		fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
-		fword[sp->ts_fidx + 1] = c;
+	    case STATE_UNSWAP:
+		/* Undo the STATE_SWAP swap: "21" -> "12". */
+		p = fword + sp->ts_fidx;
+#ifdef FEAT_MBYTE
+		if (has_mbyte)
+		{
+		    n = MB_BYTE2LEN(*p);
+		    c = mb_ptr2char(p + n);
+		    mch_memmove(p + MB_BYTE2LEN(p[n]), p, n);
+		    mb_char2bytes(c, p);
+		}
+		else
+#endif
+		{
+		    c = *p;
+		    *p = p[1];
+		    p[1] = c;
+		}
+		/*FALLTHROUGH*/
 
+	    case STATE_SWAP3:
 		/* Swap two bytes, skipping one: "123" -> "321".  We change
-		 * "fword" here, it's changed back afterwards.  TODO: should
-		 * swap characters instead of bytes. */
-		c = fword[sp->ts_fidx];
-		if (c != NUL && fword[sp->ts_fidx + 1] != NUL
-			&& fword[sp->ts_fidx + 2] != NUL
-				  && try_deeper(su, stack, depth, SCORE_SWAP3))
+		 * "fword" here, it's changed back afterwards. */
+		p = fword + sp->ts_fidx;
+#ifdef FEAT_MBYTE
+		if (has_mbyte)
 		{
-		    sp->ts_state = STATE_ROT3L;
+		    n = mb_ptr2len_check(p);
+		    c = mb_ptr2char(p);
+		    fl = mb_ptr2len_check(p + n);
+		    c2 = mb_ptr2char(p + n);
+		    c3 = mb_ptr2char(p + n + fl);
+		}
+		else
+#endif
+		{
+		    c = *p;
+		    c2 = p[1];
+		    c3 = p[2];
+		}
+
+		/* When characters are identical: "121" then SWAP3 result is
+		 * identical, ROT3L result is same as SWAP: "211", ROT3L
+		 * result is same as SWAP on next char: "112".  Thus skip all
+		 * swapping.  Also skip when c3 is NUL.  */
+		if (c == c3 || c3 == NUL)
+		{
+		    sp->ts_state = STATE_REP_INI;
+		    break;
+		}
+		if (try_deeper(su, stack, depth, SCORE_SWAP3))
+		{
+		    sp->ts_state = STATE_UNSWAP3;
 		    ++depth;
-		    fword[sp->ts_fidx] = fword[sp->ts_fidx + 2];
-		    fword[sp->ts_fidx + 2] = c;
-		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
+#ifdef FEAT_MBYTE
+		    if (has_mbyte)
+		    {
+			tl = mb_char2len(c3);
+			mch_memmove(p, p + n + fl, tl);
+			mb_char2bytes(c2, p + tl);
+			mb_char2bytes(c, p + fl + tl);
+			stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
+		    }
+		    else
+#endif
+		    {
+			p[0] = p[2];
+			p[2] = c;
+			stack[depth].ts_fidxtry = sp->ts_fidx + 3;
+		    }
 		}
 		else
 		    sp->ts_state = STATE_REP_INI;
 		break;
 
+	    case STATE_UNSWAP3:
+		/* Undo STATE_SWAP3: "321" -> "123" */
+		p = fword + sp->ts_fidx;
+#ifdef FEAT_MBYTE
+		if (has_mbyte)
+		{
+		    n = MB_BYTE2LEN(*p);
+		    c2 = mb_ptr2char(p + n);
+		    fl = MB_BYTE2LEN(p[n]);
+		    c = mb_ptr2char(p + n + fl);
+		    tl = MB_BYTE2LEN(p[n + fl]);
+		    mch_memmove(p + fl + tl, p, n);
+		    mb_char2bytes(c, p);
+		    mb_char2bytes(c2, p + tl);
+		}
+		else
+#endif
+		{
+		    c = *p;
+		    *p = p[2];
+		    p[2] = c;
+		}
+		/*FALLTHROUGH*/
+
 	    case STATE_ROT3L:
-		/* First undo STATE_SWAP3A: "321" -> "123" */
-		c = fword[sp->ts_fidx];
-		fword[sp->ts_fidx] = fword[sp->ts_fidx + 2];
-		fword[sp->ts_fidx + 2] = c;
-
-		/* Rotate three bytes left: "123" -> "231".  We change
-		 * "fword" here, it's changed back afterwards.  TODO: should
-		 * swap characters instead of bytes. */
+		/* Rotate three characters left: "123" -> "231".  We change
+		 * "fword" here, it's changed back afterwards. */
 		if (try_deeper(su, stack, depth, SCORE_SWAP3))
 		{
-		    sp->ts_state = STATE_ROT3R;
+		    sp->ts_state = STATE_UNROT3L;
 		    ++depth;
-		    c = fword[sp->ts_fidx];
-		    fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
-		    fword[sp->ts_fidx + 1] = fword[sp->ts_fidx + 2];
-		    fword[sp->ts_fidx + 2] = c;
-		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
+		    p = fword + sp->ts_fidx;
+#ifdef FEAT_MBYTE
+		    if (has_mbyte)
+		    {
+			n = mb_ptr2len_check(p);
+			c = mb_ptr2char(p);
+			fl = mb_ptr2len_check(p + n);
+			fl += mb_ptr2len_check(p + n + fl);
+			mch_memmove(p, p + n, fl);
+			mb_char2bytes(c, p + fl);
+			stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
+		    }
+		    else
+#endif
+		    {
+			c = *p;
+			*p = p[1];
+			p[1] = p[2];
+			p[2] = c;
+			stack[depth].ts_fidxtry = sp->ts_fidx + 3;
+		    }
 		}
 		else
 		    sp->ts_state = STATE_REP_INI;
 		break;
 
-	    case STATE_ROT3R:
-		/* First undo STATE_ROT3L: "231" -> "123" */
-		c = fword[sp->ts_fidx + 2];
-		fword[sp->ts_fidx + 2] = fword[sp->ts_fidx + 1];
-		fword[sp->ts_fidx + 1] = fword[sp->ts_fidx];
-		fword[sp->ts_fidx] = c;
+	    case STATE_UNROT3L:
+		/* Undo STATE_ROT3L: "231" -> "123" */
+		p = fword + sp->ts_fidx;
+#ifdef FEAT_MBYTE
+		if (has_mbyte)
+		{
+		    n = MB_BYTE2LEN(*p);
+		    n += MB_BYTE2LEN(p[n]);
+		    c = mb_ptr2char(p + n);
+		    tl = MB_BYTE2LEN(p[n]);
+		    mch_memmove(p + tl, p, n);
+		    mb_char2bytes(c, p);
+		}
+		else
+#endif
+		{
+		    c = p[2];
+		    p[2] = p[1];
+		    p[1] = *p;
+		    *p = c;
+		}
+		/*FALLTHROUGH*/
 
+	    case STATE_ROT3R:
 		/* Rotate three bytes right: "123" -> "312".  We change
-		 * "fword" here, it's changed back afterwards.  TODO: should
-		 * swap characters instead of bytes. */
+		 * "fword" here, it's changed back afterwards. */
 		if (try_deeper(su, stack, depth, SCORE_SWAP3))
 		{
-		    sp->ts_state = STATE_ROT_UNDO;
+		    sp->ts_state = STATE_UNROT3R;
 		    ++depth;
-		    c = fword[sp->ts_fidx + 2];
-		    fword[sp->ts_fidx + 2] = fword[sp->ts_fidx + 1];
-		    fword[sp->ts_fidx + 1] = fword[sp->ts_fidx];
-		    fword[sp->ts_fidx] = c;
-		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
+		    p = fword + sp->ts_fidx;
+#ifdef FEAT_MBYTE
+		    if (has_mbyte)
+		    {
+			n = mb_ptr2len_check(p);
+			n += mb_ptr2len_check(p + n);
+			c = mb_ptr2char(p + n);
+			tl = mb_ptr2len_check(p + n);
+			mch_memmove(p + tl, p, n);
+			mb_char2bytes(c, p);
+			stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
+		    }
+		    else
+#endif
+		    {
+			c = p[2];
+			p[2] = p[1];
+			p[1] = *p;
+			*p = c;
+			stack[depth].ts_fidxtry = sp->ts_fidx + 3;
+		    }
 		}
 		else
 		    sp->ts_state = STATE_REP_INI;
 		break;
 
-	    case STATE_ROT_UNDO:
+	    case STATE_UNROT3R:
 		/* Undo STATE_ROT3R: "312" -> "123" */
-		c = fword[sp->ts_fidx];
-		fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
-		fword[sp->ts_fidx + 1] = fword[sp->ts_fidx + 2];
-		fword[sp->ts_fidx + 2] = c;
+		p = fword + sp->ts_fidx;
+#ifdef FEAT_MBYTE
+		if (has_mbyte)
+		{
+		    c = mb_ptr2char(p);
+		    tl = MB_BYTE2LEN(*p);
+		    n = MB_BYTE2LEN(p[tl]);
+		    n += MB_BYTE2LEN(p[tl + n]);
+		    mch_memmove(p, p + tl, n);
+		    mb_char2bytes(c, p + n);
+		}
+		else
+#endif
+		{
+		    c = *p;
+		    *p = p[1];
+		    p[1] = p[2];
+		    p[2] = c;
+		}
 		/*FALLTHROUGH*/
 
 	    case STATE_REP_INI:
@@ -4837,7 +5136,7 @@ spell_try_change(su)
 		}
 
 		/* Use the first byte to quickly find the first entry that
-		 * matches.  If the index is -1 there is none. */
+		 * may match.  If the index is -1 there is none. */
 		sp->ts_curi = lp->lp_slang->sl_rep_first[fword[sp->ts_fidx]];
 		if (sp->ts_curi < 0)
 		{
@@ -4850,8 +5149,8 @@ spell_try_change(su)
 
 	    case STATE_REP:
 		/* Try matching with REP items from the .aff file.  For each
-		 * match replace the charactes and check if the resulting word
-		 * is valid. */
+		 * match replace the characters and check if the resulting
+		 * word is valid. */
 		p = fword + sp->ts_fidx;
 
 		gap = &lp->lp_slang->sl_rep;
@@ -4878,6 +5177,9 @@ spell_try_change(su)
 			    mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1);
 			mch_memmove(p, ftp->ft_to, tl);
 			stack[depth].ts_fidxtry = sp->ts_fidx + tl;
+#ifdef FEAT_MBYTE
+			stack[depth].ts_tcharlen = 0;
+#endif
 			break;
 		    }
 		}
@@ -4928,13 +5230,10 @@ try_deeper(su, stack, depth, score_add)
     if (newscore >= su->su_maxscore)
 	return FALSE;
 
+    stack[depth + 1] = stack[depth];
     stack[depth + 1].ts_state = STATE_START;
     stack[depth + 1].ts_score = newscore;
     stack[depth + 1].ts_curi = 1;	/* start just after length byte */
-    stack[depth + 1].ts_fidx = stack[depth].ts_fidx;
-    stack[depth + 1].ts_fidxtry = stack[depth].ts_fidxtry;
-    stack[depth + 1].ts_twordlen = stack[depth].ts_twordlen;
-    stack[depth + 1].ts_arridx = stack[depth].ts_arridx;
     return TRUE;
 }
 
@@ -5286,7 +5585,90 @@ make_case_word(fword, cword, flags)
 	STRCPY(cword, fword);
 }
 
-#if 0
+/*
+ * Use map string "map" for languages "lp".
+ */
+    static void
+set_map_str(lp, map)
+    slang_T	*lp;
+    char_u	*map;
+{
+    char_u	*p;
+    int		headc = 0;
+    int		c;
+    int		i;
+
+    if (*map == NUL)
+    {
+	lp->sl_has_map = FALSE;
+	return;
+    }
+    lp->sl_has_map = TRUE;
+
+    /* Init the array and hash table empty. */
+    for (i = 0; i < 256; ++i)
+	lp->sl_map_array[i] = 0;
+#ifdef FEAT_MBYTE
+    hash_init(&lp->sl_map_hash);
+#endif
+
+    /*
+     * The similar characters are stored separated with slashes:
+     * "aaa/bbb/ccc/".  Fill sl_map_array[c] with the character before c and
+     * before the same slash.  For characters above 255 sl_map_hash is used.
+     */
+    for (p = map; *p != NUL; )
+    {
+#ifdef FEAT_MBYTE
+	c = mb_ptr2char_adv(&p);
+#else
+	c = *p++;
+#endif
+	if (c == '/')
+	    headc = 0;
+	else
+	{
+	    if (headc == 0)
+		 headc = c;
+
+#ifdef FEAT_MBYTE
+	    /* Characters above 255 don't fit in sl_map_array[], put them in
+	     * the hash table.  Each entry is the char, a NUL the headchar and
+	     * a NUL. */
+	    if (c >= 256)
+	    {
+		int	    cl = mb_char2len(c);
+		int	    headcl = mb_char2len(headc);
+		char_u	    *b;
+		hash_T	    hash;
+		hashitem_T  *hi;
+
+		b = alloc((unsigned)(cl + headcl + 2));
+		if (b == NULL)
+		    return;
+		mb_char2bytes(c, b);
+		b[cl] = NUL;
+		mb_char2bytes(headc, b + cl + 1);
+		b[cl + 1 + headcl] = NUL;
+		hash = hash_hash(b);
+		hi = hash_lookup(&lp->sl_map_hash, b, hash);
+		if (HASHITEM_EMPTY(hi))
+		    hash_add_item(&lp->sl_map_hash, hi, b, hash);
+		else
+		{
+		    /* This should have been checked when generating the .spl
+		     * file. */
+		    EMSG(_("E999: duplicate char in MAP entry"));
+		    vim_free(b);
+		}
+	    }
+	    else
+#endif
+		lp->sl_map_array[c] = headc;
+	}
+    }
+}
+
 /*
  * Return TRUE if "c1" and "c2" are similar characters according to the MAP
  * lines in the .aff file.
@@ -5297,21 +5679,43 @@ similar_chars(slang, c1, c2)
     int		c1;
     int		c2;
 {
-    char_u	*p1;
-    char_u	*p2;
-
-    /* The similar characters are stored separated with slashes:
-     * "aaa/bbb/ccc/".  Search for each character and if the next slash is the
-     * same one they are in the same MAP entry. */
-    p1 = vim_strchr(slang->sl_map, c1);
-    if (p1 == NULL)
-	return FALSE;
-    p2 = vim_strchr(slang->sl_map, c2);
-    if (p2 == NULL)
+    int		m1, m2;
+#ifdef FEAT_MBYTE
+    char_u	buf[MB_MAXBYTES];
+    hashitem_T  *hi;
+
+    if (c1 >= 256)
+    {
+	buf[mb_char2bytes(c1, buf)] = 0;
+	hi = hash_find(&slang->sl_map_hash, buf);
+	if (HASHITEM_EMPTY(hi))
+	    m1 = 0;
+	else
+	    m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
+    }
+    else
+#endif
+	m1 = slang->sl_map_array[c1];
+    if (m1 == 0)
 	return FALSE;
-    return vim_strchr(p1, '/') == vim_strchr(p2, '/');
-}
+
+
+#ifdef FEAT_MBYTE
+    if (c2 >= 256)
+    {
+	buf[mb_char2bytes(c2, buf)] = 0;
+	hi = hash_find(&slang->sl_map_hash, buf);
+	if (HASHITEM_EMPTY(hi))
+	    m2 = 0;
+	else
+	    m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
+    }
+    else
 #endif
+	m2 = slang->sl_map_array[c2];
+
+    return m1 == m2;
+}
 
 /*
  * Add a suggestion to the list of suggestions.
author	Bram Moolenaar <Bram@vim.org>	2005-06-16 21:51:00 +0000
committer	Bram Moolenaar <Bram@vim.org>	2005-06-16 21:51:00 +0000
commit	ea424166e2a53649eea8d8899fc9294ca023964c (patch)
tree	c35c0b8daf356f341979d346c440336bcd13c899
parent	78599adb5219f98053673ca27683f922a9ca338b (diff)
download	vim-git-ea424166e2a53649eea8d8899fc9294ca023964c.tar.gz