summaryrefslogtreecommitdiff
path: root/src/charset.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/charset.c')
-rw-r--r--src/charset.c103
1 files changed, 97 insertions, 6 deletions
diff --git a/src/charset.c b/src/charset.c
index b51d07208..bd319ffe7 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -130,12 +130,13 @@ buf_init_chartab(buf, global)
*/
vim_memset(buf->b_chartab, 0, (size_t)32);
#ifdef FEAT_MBYTE
- for (c = 0; c < 256; ++c)
- {
- /* double-byte characters are probably word characters */
- if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
- SET_CHARTAB(buf, c);
- }
+ if (enc_dbcs != 0)
+ for (c = 0; c < 256; ++c)
+ {
+ /* double-byte characters are probably word characters */
+ if (MB_BYTE2LEN(c) == 2)
+ SET_CHARTAB(buf, c);
+ }
#endif
#ifdef FEAT_LISP
@@ -913,6 +914,96 @@ vim_iswordc_buf(p, buf)
# endif
return (GET_CHARTAB(buf, *p) != 0);
}
+
+static char spell_chartab[256];
+
+/*
+ * Init the chartab used for spelling. Only depends on 'encoding'.
+ * Called once while starting up and when 'encoding' was changed.
+ * Unfortunately, we can't use isalpha() here, since the current locale may
+ * differ from 'encoding'.
+ */
+ void
+init_spell_chartab()
+{
+ int i;
+
+ /* ASCII is always the same, no matter what 'encoding' is used.
+ * EBCDIC is not supported! */
+ for (i = 0; i < '0'; ++i)
+ spell_chartab[i] = FALSE;
+ /* We include numbers. A word shouldn't start with a number, but handling
+ * that is done separately. */
+ for ( ; i <= '9'; ++i)
+ spell_chartab[i] = TRUE;
+ for ( ; i < 'A'; ++i)
+ spell_chartab[i] = FALSE;
+ for ( ; i <= 'Z'; ++i)
+ spell_chartab[i] = TRUE;
+ for ( ; i < 'a'; ++i)
+ spell_chartab[i] = FALSE;
+ for ( ; i <= 'z'; ++i)
+ spell_chartab[i] = TRUE;
+#ifdef FEAT_MBYTE
+ if (enc_dbcs)
+ {
+ /* DBCS: assume double-wide characters are word characters. */
+ for ( ; i <= 255; ++i)
+ if (MB_BYTE2LEN(i) == 2)
+ spell_chartab[i] = TRUE;
+ else
+ spell_chartab[i] = FALSE;
+ }
+ else if (STRCMP(p_enc, "cp850") == 0)
+#endif
+#if defined(MSDOS) || defined(FEAT_MBYTE)
+ {
+ /* cp850, MS-DOS */
+ for ( ; i < 128; ++i)
+ spell_chartab[i] = FALSE;
+ for ( ; i <= 0x9a; ++i)
+ spell_chartab[i] = TRUE;
+ for ( ; i < 0xa0; ++i)
+ spell_chartab[i] = FALSE;
+ for ( ; i <= 0xa5; ++i)
+ spell_chartab[i] = TRUE;
+ for ( ; i <= 255; ++i)
+ spell_chartab[i] = FALSE;
+ }
+#endif
+#ifdef FEAT_MBYTE
+ else
+#endif
+#if defined(FEAT_MBYTE) || !defined(MSDOS)
+ {
+ /* Rough guess: anything we don't recognize assumes word characters
+ * like latin1. */
+ for ( ; i < 0xc0; ++i)
+ spell_chartab[i] = FALSE;
+ for ( ; i <= 255; ++i)
+ spell_chartab[i] = TRUE;
+# ifdef FEAT_MBYTE
+ if (STRCMP(p_enc, "latin1") == 0)
+# endif
+ spell_chartab[0xf7] = FALSE; /* divide-by */
+ }
+#endif
+}
+
+/*
+ * Return TRUE if "p" points to a word character.
+ * This only depends on 'encoding', not on 'iskeyword'.
+ */
+ int
+spell_iswordc(p)
+ char_u *p;
+{
+# ifdef FEAT_MBYTE
+ if (has_mbyte && MB_BYTE2LEN(*p) > 1)
+ return mb_get_class(p) >= 2;
+# endif
+ return spell_chartab[*p];
+}
#endif
/*