summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2020-06-09 19:34:54 +0200
committerBram Moolenaar <Bram@vim.org>2020-06-09 19:34:54 +0200
commit59de417b904bbd204e313f015839317b577bd124 (patch)
tree4ebd73ce6e4ccd65b8a595f388c4535d81387319
parentdcf59c37d0e1517439c4c0c4a6a5ca09c90157ad (diff)
downloadvim-git-59de417b904bbd204e313f015839317b577bd124.tar.gz
patch 8.2.0938: NFA regexp uses tolower ()to compare ignore-casev8.2.0938
Problem: NFA regexp uses tolower() to compare ignore-case. (Thayne McCombs) Solution: Use utf_fold() when possible. (ref. neovim #12456)
-rw-r--r--src/diff.c2
-rw-r--r--src/macros.h1
-rw-r--r--src/regexp_nfa.c14
-rw-r--r--src/testdir/test_regexp_utf8.vim19
-rw-r--r--src/version.c2
5 files changed, 30 insertions, 8 deletions
diff --git a/src/diff.c b/src/diff.c
index 1b0ad4778..bb95d5ed6 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -747,7 +747,7 @@ diff_write_buffer(buf_T *buf, diffin_T *din)
// xdiff doesn't support ignoring case, fold-case the text.
c = PTR2CHAR(s);
- c = enc_utf8 ? utf_fold(c) : MB_TOLOWER(c);
+ c = MB_CASEFOLD(c);
orig_len = mb_ptr2len(s);
if (mb_char2bytes(c, cbuf) != orig_len)
// TODO: handle byte length difference
diff --git a/src/macros.h b/src/macros.h
index 16421d261..7604910a6 100644
--- a/src/macros.h
+++ b/src/macros.h
@@ -93,6 +93,7 @@
#define MB_ISUPPER(c) vim_isupper(c)
#define MB_TOLOWER(c) vim_tolower(c)
#define MB_TOUPPER(c) vim_toupper(c)
+#define MB_CASEFOLD(c) (enc_utf8 ? utf_fold(c) : MB_TOLOWER(c))
// Use our own isdigit() replacement, because on MS-Windows isdigit() returns
// non-zero for superscript 1. Also avoids that isdigit() crashes for numbers
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c
index 465797dd3..a55750b19 100644
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -5459,7 +5459,7 @@ find_match_text(colnr_T startcol, int regstart, char_u *match_text)
{
c1 = PTR2CHAR(match_text + len1);
c2 = PTR2CHAR(rex.line + col + len2);
- if (c1 != c2 && (!rex.reg_ic || MB_TOLOWER(c1) != MB_TOLOWER(c2)))
+ if (c1 != c2 && (!rex.reg_ic || MB_CASEFOLD(c1) != MB_CASEFOLD(c2)))
{
match = FALSE;
break;
@@ -6271,11 +6271,11 @@ nfa_regmatch(
}
if (rex.reg_ic)
{
- int curc_low = MB_TOLOWER(curc);
+ int curc_low = MB_CASEFOLD(curc);
int done = FALSE;
for ( ; c1 <= c2; ++c1)
- if (MB_TOLOWER(c1) == curc_low)
+ if (MB_CASEFOLD(c1) == curc_low)
{
result = result_if_matched;
done = TRUE;
@@ -6287,8 +6287,8 @@ nfa_regmatch(
}
else if (state->c < 0 ? check_char_class(state->c, curc)
: (curc == state->c
- || (rex.reg_ic && MB_TOLOWER(curc)
- == MB_TOLOWER(state->c))))
+ || (rex.reg_ic && MB_CASEFOLD(curc)
+ == MB_CASEFOLD(state->c))))
{
result = result_if_matched;
break;
@@ -6713,7 +6713,7 @@ nfa_regmatch(
result = (c == curc);
if (!result && rex.reg_ic)
- result = MB_TOLOWER(c) == MB_TOLOWER(curc);
+ result = MB_CASEFOLD(c) == MB_CASEFOLD(curc);
// If rex.reg_icombine is not set only skip over the character
// itself. When it is set skip over composing characters.
if (result && enc_utf8 && !rex.reg_icombine)
@@ -6882,7 +6882,7 @@ nfa_regmatch(
// cheaper than adding a state that won't match.
c = PTR2CHAR(rex.input + clen);
if (c != prog->regstart && (!rex.reg_ic
- || MB_TOLOWER(c) != MB_TOLOWER(prog->regstart)))
+ || MB_CASEFOLD(c) != MB_CASEFOLD(prog->regstart)))
{
#ifdef ENABLE_LOG
fprintf(log_fd, " Skipping start state, regstart does not match\n");
diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim
index e9d550711..e8e67a62c 100644
--- a/src/testdir/test_regexp_utf8.vim
+++ b/src/testdir/test_regexp_utf8.vim
@@ -355,4 +355,23 @@ func Test_ambiwidth()
set regexpengine& ambiwidth&
endfunc
+func Run_regexp_ignore_case()
+ call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g'))
+
+ call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g'))
+ call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g'))
+ call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g'))
+ call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g'))
+ call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g'))
+ call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g'))
+endfunc
+
+func Test_regexp_ignore_case()
+ set regexpengine=1
+ call Run_regexp_ignore_case()
+ set regexpengine=2
+ call Run_regexp_ignore_case()
+ set regexpengine&
+endfunc
+
" vim: shiftwidth=2 sts=2 expandtab
diff --git a/src/version.c b/src/version.c
index 922ab4605..9f1cd00aa 100644
--- a/src/version.c
+++ b/src/version.c
@@ -755,6 +755,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 938,
+/**/
937,
/**/
936,