From af98a49dd0ef1661b4998f118151fddbf6e4df75 Mon Sep 17 00:00:00 2001 From: Bram Moolenaar Date: Sun, 24 Apr 2016 14:40:12 +0200 Subject: patch 7.4.1783 Problem: The old regexp engine doesn't handle character classes correctly. (Manuel Ortega) Solution: Use regmbc() instead of regc(). Add a test. --- src/regexp.c | 20 +++++++------- src/testdir/test_regexp_utf8.vim | 56 ++++++++++++++++++++++++++++++++++++++++ src/version.c | 2 ++ 3 files changed, 68 insertions(+), 10 deletions(-) diff --git a/src/regexp.c b/src/regexp.c index fada9fe95..733d56425 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -2544,12 +2544,12 @@ collection: case CLASS_ALNUM: for (cu = 1; cu <= 255; cu++) if (isalnum(cu)) - regc(cu); + regmbc(cu); break; case CLASS_ALPHA: for (cu = 1; cu <= 255; cu++) if (isalpha(cu)) - regc(cu); + regmbc(cu); break; case CLASS_BLANK: regc(' '); @@ -2558,32 +2558,32 @@ collection: case CLASS_CNTRL: for (cu = 1; cu <= 255; cu++) if (iscntrl(cu)) - regc(cu); + regmbc(cu); break; case CLASS_DIGIT: for (cu = 1; cu <= 255; cu++) if (VIM_ISDIGIT(cu)) - regc(cu); + regmbc(cu); break; case CLASS_GRAPH: for (cu = 1; cu <= 255; cu++) if (isgraph(cu)) - regc(cu); + regmbc(cu); break; case CLASS_LOWER: for (cu = 1; cu <= 255; cu++) if (MB_ISLOWER(cu)) - regc(cu); + regmbc(cu); break; case CLASS_PRINT: for (cu = 1; cu <= 255; cu++) if (vim_isprintc(cu)) - regc(cu); + regmbc(cu); break; case CLASS_PUNCT: for (cu = 1; cu <= 255; cu++) if (ispunct(cu)) - regc(cu); + regmbc(cu); break; case CLASS_SPACE: for (cu = 9; cu <= 13; cu++) @@ -2593,12 +2593,12 @@ collection: case CLASS_UPPER: for (cu = 1; cu <= 255; cu++) if (MB_ISUPPER(cu)) - regc(cu); + regmbc(cu); break; case CLASS_XDIGIT: for (cu = 1; cu <= 255; cu++) if (vim_isxdigit(cu)) - regc(cu); + regmbc(cu); break; case CLASS_TAB: regc('\t'); diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim index f5d7ec946..cef243677 100644 --- a/src/testdir/test_regexp_utf8.vim +++ b/src/testdir/test_regexp_utf8.vim @@ -33,3 +33,59 @@ func Test_equivalence_re2() set re=2 call s:equivalence_test() endfunc + +func s:classes_test() + call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+')) + + let alphachars = '' + let lowerchars = '' + let upperchars = '' + let alnumchars = '' + let printchars = '' + let punctchars = '' + let xdigitchars = '' + let i = 1 + while i <= 255 + let c = nr2char(i) + if c =~ '[[:alpha:]]' + let alphachars .= c + endif + if c =~ '[[:lower:]]' + let lowerchars .= c + endif + if c =~ '[[:upper:]]' + let upperchars .= c + endif + if c =~ '[[:alnum:]]' + let alnumchars .= c + endif + if c =~ '[[:print:]]' + let printchars .= c + endif + if c =~ '[[:punct:]]' + let punctchars .= c + endif + if c =~ '[[:xdigit:]]' + let xdigitchars .= c + endif + let i += 1 + endwhile + + call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars) + call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars) + call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars) + call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars) + call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars) + call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars) + call assert_equal('0123456789ABCDEFabcdef', xdigitchars) +endfunc + +func Test_classes_re1() + set re=1 + call s:classes_test() +endfunc + +func Test_classes_re2() + set re=2 + call s:classes_test() +endfunc diff --git a/src/version.c b/src/version.c index 09372ef5d..a4f325205 100644 --- a/src/version.c +++ b/src/version.c @@ -753,6 +753,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 1783, /**/ 1782, /**/ -- cgit v1.2.1