diff options
author | Bram Moolenaar <Bram@vim.org> | 2013-05-25 14:42:03 +0200 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2013-05-25 14:42:03 +0200 |
commit | 56d58d51bfefe428c9fcbb6dd0e87b08c0ea30b2 (patch) | |
tree | cc89237fcf74e6d151eedae7e53a30e010ba4ff8 /src | |
parent | 152e7890c17df2e34ae993863be8c6445183222b (diff) | |
download | vim-git-56d58d51bfefe428c9fcbb6dd0e87b08c0ea30b2.tar.gz |
updated for version 7.3.1015v7.3.1015
Problem: New regexp engine: Matching composing characters is wrong.
Solution: Fix matching composing characters.
Diffstat (limited to 'src')
-rw-r--r-- | src/regexp_nfa.c | 20 | ||||
-rw-r--r-- | src/testdir/test95.in | 11 | ||||
-rw-r--r-- | src/testdir/test95.ok | 10 | ||||
-rw-r--r-- | src/version.c | 2 |
4 files changed, 38 insertions, 5 deletions
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c index 0a6dded82..19b8d925e 100644 --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -716,6 +716,7 @@ nfa_regatom() * the composing char is matched here. */ if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr())) { + old_regparse = regparse; c = getchr(); goto nfa_do_multibyte; } @@ -1217,9 +1218,11 @@ collection: nfa_do_multibyte: /* Length of current char with composing chars. */ - if (enc_utf8 && clen != (plen = (*mb_ptr2len)(old_regparse))) + if (enc_utf8 && (clen != (plen = (*mb_ptr2len)(old_regparse)) + || utf_iscomposing(c))) { - /* A base character plus composing characters. + /* A base character plus composing characters, or just one + * or more composing characters. * This requires creating a separate atom as if enclosing * the characters in (), where NFA_COMPOSING is the ( and * NFA_END_COMPOSING is the ). Note that right now we are @@ -1400,7 +1403,6 @@ nfa_regpiece() /* Save pos after the repeated atom and the \{} */ new_regparse = regparse; - new_regparse = regparse; quest = (greedy == TRUE? NFA_QUEST : NFA_QUEST_NONGREEDY); for (i = 0; i < maxval; i++) { @@ -3218,11 +3220,19 @@ nfa_regmatch(start, submatch, m) result = OK; sta = t->state->out; len = 0; + if (utf_iscomposing(sta->c)) + { + /* Only match composing character(s), ignore base + * character. Used for ".{composing}" and "{composing}" + * (no preceding character). */ + len += mb_char2len(c); + } if (ireg_icombine) { - /* If \Z was present, then ignore composing characters. */ + /* If \Z was present, then ignore composing characters. + * When ignoring the base character this always matches. */ /* TODO: How about negated? */ - if (sta->c != c) + if (len == 0 && sta->c != c) result = FAIL; len = n; while (sta->c != NFA_END_COMPOSING) diff --git a/src/testdir/test95.in b/src/testdir/test95.in index 5e4822051..2484ace1a 100644 --- a/src/testdir/test95.in +++ b/src/testdir/test95.in @@ -38,6 +38,15 @@ STARTTEST :"""" Test composing character matching :call add(tl, ['.ม', 'xม่x yมy', 'yม']) :call add(tl, ['.ม่', 'xม่x yมy', 'xม่']) +:call add(tl, ["\u05b9", " x\u05b9 ", "x\u05b9"]) +:call add(tl, [".\u05b9", " x\u05b9 ", "x\u05b9"]) +:call add(tl, ["\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) +:call add(tl, [".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) +:call add(tl, ["\u05bb\u05b9", " x\u05b9\u05bb "]) +:call add(tl, [".\u05bb\u05b9", " x\u05b9\u05bb "]) +:call add(tl, ["\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"]) +:call add(tl, [".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"]) + :"""" Test \Z :call add(tl, ['ú\Z', 'x']) @@ -50,6 +59,8 @@ STARTTEST :call add(tl, ["ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"]) :call add(tl, ["ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"]) :call add(tl, ["ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"]) +:call add(tl, ["\u05b9\\+\\Z", "xyz", "xyz"]) +:call add(tl, ["\\Z\u05b9\\+", "xyz", "xyz"]) :"""" Combining different tests and features :call add(tl, ['[^[=a=]]\+', 'ddaãâbcd', 'dd']) diff --git a/src/testdir/test95.ok b/src/testdir/test95.ok index 8297899a5..9211c2ca6 100644 --- a/src/testdir/test95.ok +++ b/src/testdir/test95.ok @@ -11,6 +11,14 @@ OK - \f\+ OK - \%#=1\f\+ OK - .ม OK - .ม่ +OK - ֹ +OK - .ֹ +OK - ֹֻ +OK - .ֹֻ +OK - ֹֻ +OK - .ֹֻ +OK - ֹ +OK - .ֹ OK - ú\Z OK - יהוה\Z OK - יְהוָה\Z @@ -21,4 +29,6 @@ OK - קֹx\Z OK - קֹx\Z OK - קx\Z OK - קx\Z +OK - ֹ\+\Z +OK - \Zֹ\+ OK - [^[=a=]]\+ diff --git a/src/version.c b/src/version.c index 5063b8d01..0d4ca1871 100644 --- a/src/version.c +++ b/src/version.c @@ -729,6 +729,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 1015, +/**/ 1014, /**/ 1013, |