diff options
author | Bram Moolenaar <Bram@vim.org> | 2020-06-09 19:34:54 +0200 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2020-06-09 19:34:54 +0200 |
commit | 59de417b904bbd204e313f015839317b577bd124 (patch) | |
tree | 4ebd73ce6e4ccd65b8a595f388c4535d81387319 | |
parent | dcf59c37d0e1517439c4c0c4a6a5ca09c90157ad (diff) | |
download | vim-git-59de417b904bbd204e313f015839317b577bd124.tar.gz |
patch 8.2.0938: NFA regexp uses tolower ()to compare ignore-casev8.2.0938
Problem: NFA regexp uses tolower() to compare ignore-case. (Thayne McCombs)
Solution: Use utf_fold() when possible. (ref. neovim #12456)
-rw-r--r-- | src/diff.c | 2 | ||||
-rw-r--r-- | src/macros.h | 1 | ||||
-rw-r--r-- | src/regexp_nfa.c | 14 | ||||
-rw-r--r-- | src/testdir/test_regexp_utf8.vim | 19 | ||||
-rw-r--r-- | src/version.c | 2 |
5 files changed, 30 insertions, 8 deletions
diff --git a/src/diff.c b/src/diff.c index 1b0ad4778..bb95d5ed6 100644 --- a/src/diff.c +++ b/src/diff.c @@ -747,7 +747,7 @@ diff_write_buffer(buf_T *buf, diffin_T *din) // xdiff doesn't support ignoring case, fold-case the text. c = PTR2CHAR(s); - c = enc_utf8 ? utf_fold(c) : MB_TOLOWER(c); + c = MB_CASEFOLD(c); orig_len = mb_ptr2len(s); if (mb_char2bytes(c, cbuf) != orig_len) // TODO: handle byte length difference diff --git a/src/macros.h b/src/macros.h index 16421d261..7604910a6 100644 --- a/src/macros.h +++ b/src/macros.h @@ -93,6 +93,7 @@ #define MB_ISUPPER(c) vim_isupper(c) #define MB_TOLOWER(c) vim_tolower(c) #define MB_TOUPPER(c) vim_toupper(c) +#define MB_CASEFOLD(c) (enc_utf8 ? utf_fold(c) : MB_TOLOWER(c)) // Use our own isdigit() replacement, because on MS-Windows isdigit() returns // non-zero for superscript 1. Also avoids that isdigit() crashes for numbers diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c index 465797dd3..a55750b19 100644 --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -5459,7 +5459,7 @@ find_match_text(colnr_T startcol, int regstart, char_u *match_text) { c1 = PTR2CHAR(match_text + len1); c2 = PTR2CHAR(rex.line + col + len2); - if (c1 != c2 && (!rex.reg_ic || MB_TOLOWER(c1) != MB_TOLOWER(c2))) + if (c1 != c2 && (!rex.reg_ic || MB_CASEFOLD(c1) != MB_CASEFOLD(c2))) { match = FALSE; break; @@ -6271,11 +6271,11 @@ nfa_regmatch( } if (rex.reg_ic) { - int curc_low = MB_TOLOWER(curc); + int curc_low = MB_CASEFOLD(curc); int done = FALSE; for ( ; c1 <= c2; ++c1) - if (MB_TOLOWER(c1) == curc_low) + if (MB_CASEFOLD(c1) == curc_low) { result = result_if_matched; done = TRUE; @@ -6287,8 +6287,8 @@ nfa_regmatch( } else if (state->c < 0 ? check_char_class(state->c, curc) : (curc == state->c - || (rex.reg_ic && MB_TOLOWER(curc) - == MB_TOLOWER(state->c)))) + || (rex.reg_ic && MB_CASEFOLD(curc) + == MB_CASEFOLD(state->c)))) { result = result_if_matched; break; @@ -6713,7 +6713,7 @@ nfa_regmatch( result = (c == curc); if (!result && rex.reg_ic) - result = MB_TOLOWER(c) == MB_TOLOWER(curc); + result = MB_CASEFOLD(c) == MB_CASEFOLD(curc); // If rex.reg_icombine is not set only skip over the character // itself. When it is set skip over composing characters. if (result && enc_utf8 && !rex.reg_icombine) @@ -6882,7 +6882,7 @@ nfa_regmatch( // cheaper than adding a state that won't match. c = PTR2CHAR(rex.input + clen); if (c != prog->regstart && (!rex.reg_ic - || MB_TOLOWER(c) != MB_TOLOWER(prog->regstart))) + || MB_CASEFOLD(c) != MB_CASEFOLD(prog->regstart))) { #ifdef ENABLE_LOG fprintf(log_fd, " Skipping start state, regstart does not match\n"); diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim index e9d550711..e8e67a62c 100644 --- a/src/testdir/test_regexp_utf8.vim +++ b/src/testdir/test_regexp_utf8.vim @@ -355,4 +355,23 @@ func Test_ambiwidth() set regexpengine& ambiwidth& endfunc +func Run_regexp_ignore_case() + call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g')) + + call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g')) + call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g')) + call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g')) + call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g')) + call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g')) + call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g')) +endfunc + +func Test_regexp_ignore_case() + set regexpengine=1 + call Run_regexp_ignore_case() + set regexpengine=2 + call Run_regexp_ignore_case() + set regexpengine& +endfunc + " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/version.c b/src/version.c index 922ab4605..9f1cd00aa 100644 --- a/src/version.c +++ b/src/version.c @@ -755,6 +755,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 938, +/**/ 937, /**/ 936, |