diff options
Diffstat (limited to 'src/charset.c')
-rw-r--r-- | src/charset.c | 103 |
1 files changed, 97 insertions, 6 deletions
diff --git a/src/charset.c b/src/charset.c index b51d07208..bd319ffe7 100644 --- a/src/charset.c +++ b/src/charset.c @@ -130,12 +130,13 @@ buf_init_chartab(buf, global) */ vim_memset(buf->b_chartab, 0, (size_t)32); #ifdef FEAT_MBYTE - for (c = 0; c < 256; ++c) - { - /* double-byte characters are probably word characters */ - if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2) - SET_CHARTAB(buf, c); - } + if (enc_dbcs != 0) + for (c = 0; c < 256; ++c) + { + /* double-byte characters are probably word characters */ + if (MB_BYTE2LEN(c) == 2) + SET_CHARTAB(buf, c); + } #endif #ifdef FEAT_LISP @@ -913,6 +914,96 @@ vim_iswordc_buf(p, buf) # endif return (GET_CHARTAB(buf, *p) != 0); } + +static char spell_chartab[256]; + +/* + * Init the chartab used for spelling. Only depends on 'encoding'. + * Called once while starting up and when 'encoding' was changed. + * Unfortunately, we can't use isalpha() here, since the current locale may + * differ from 'encoding'. + */ + void +init_spell_chartab() +{ + int i; + + /* ASCII is always the same, no matter what 'encoding' is used. + * EBCDIC is not supported! */ + for (i = 0; i < '0'; ++i) + spell_chartab[i] = FALSE; + /* We include numbers. A word shouldn't start with a number, but handling + * that is done separately. */ + for ( ; i <= '9'; ++i) + spell_chartab[i] = TRUE; + for ( ; i < 'A'; ++i) + spell_chartab[i] = FALSE; + for ( ; i <= 'Z'; ++i) + spell_chartab[i] = TRUE; + for ( ; i < 'a'; ++i) + spell_chartab[i] = FALSE; + for ( ; i <= 'z'; ++i) + spell_chartab[i] = TRUE; +#ifdef FEAT_MBYTE + if (enc_dbcs) + { + /* DBCS: assume double-wide characters are word characters. */ + for ( ; i <= 255; ++i) + if (MB_BYTE2LEN(i) == 2) + spell_chartab[i] = TRUE; + else + spell_chartab[i] = FALSE; + } + else if (STRCMP(p_enc, "cp850") == 0) +#endif +#if defined(MSDOS) || defined(FEAT_MBYTE) + { + /* cp850, MS-DOS */ + for ( ; i < 128; ++i) + spell_chartab[i] = FALSE; + for ( ; i <= 0x9a; ++i) + spell_chartab[i] = TRUE; + for ( ; i < 0xa0; ++i) + spell_chartab[i] = FALSE; + for ( ; i <= 0xa5; ++i) + spell_chartab[i] = TRUE; + for ( ; i <= 255; ++i) + spell_chartab[i] = FALSE; + } +#endif +#ifdef FEAT_MBYTE + else +#endif +#if defined(FEAT_MBYTE) || !defined(MSDOS) + { + /* Rough guess: anything we don't recognize assumes word characters + * like latin1. */ + for ( ; i < 0xc0; ++i) + spell_chartab[i] = FALSE; + for ( ; i <= 255; ++i) + spell_chartab[i] = TRUE; +# ifdef FEAT_MBYTE + if (STRCMP(p_enc, "latin1") == 0) +# endif + spell_chartab[0xf7] = FALSE; /* divide-by */ + } +#endif +} + +/* + * Return TRUE if "p" points to a word character. + * This only depends on 'encoding', not on 'iskeyword'. + */ + int +spell_iswordc(p) + char_u *p; +{ +# ifdef FEAT_MBYTE + if (has_mbyte && MB_BYTE2LEN(*p) > 1) + return mb_get_class(p) >= 2; +# endif + return spell_chartab[*p]; +} #endif /* |