diff options
author | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2020-09-13 15:56:32 +0000 |
---|---|---|
committer | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2020-09-13 15:56:32 +0000 |
commit | 0edcbc5e42409ea801908af0917ae11eb70a1435 (patch) | |
tree | 5736d8ae7682b5c8b267d0606c4f4b16b52991c3 | |
parent | 00daac3fbc798884028754a482999e8fc1ce63f9 (diff) | |
download | pcre2-0edcbc5e42409ea801908af0917ae11eb70a1435.tar.gz |
Fix delimiters in tests 1 and 4 for correct Perl behaviour (Bugzilla #2641).
Also move \K in lookaround tests to test 2 (Perl no longer supports).
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1269 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r-- | ChangeLog | 11 | ||||
-rwxr-xr-x | perltest.sh | 3 | ||||
-rw-r--r-- | testdata/testinput1 | 43 | ||||
-rw-r--r-- | testdata/testinput2 | 16 | ||||
-rw-r--r-- | testdata/testinput4 | 8 | ||||
-rw-r--r-- | testdata/testoutput1 | 48 | ||||
-rw-r--r-- | testdata/testoutput2 | 20 | ||||
-rw-r--r-- | testdata/testoutput4 | 8 |
8 files changed, 99 insertions, 58 deletions
@@ -52,6 +52,17 @@ buffer overflow). Fixes ClusterFuzz 23779. handled by an optimization in JIT. Furthermore a wrong offset was used to read a value from a buffer which could lead to memory overread. +10. Unnoticed for many years was the fact that delimiters other than / in the +testinput1 and testinput4 files could cause incorrect behaviour when these +files were processed by perltest.sh. There were several tests that used quotes +as delimiters, and it was just luck that they didn't go wrong with perltest.sh. +All the patterns in testinput1 and testinput4 now use / as their delimiter. +This fixes Bugzilla #2641. + +11. Perl has started to give an error for \K within lookarounds (though there +are cases where it doesn't). PCRE2 still allows this, so the tests that include +this case have been moved from test 1 to test 2. + Version 10.35 09-May-2020 --------------------------- diff --git a/perltest.sh b/perltest.sh index 8368781..31406c5 100755 --- a/perltest.sh +++ b/perltest.sh @@ -68,6 +68,9 @@ fi # #newline_default commands, which are needed in the relevant pcre2test files, # are ignored. Any other #-command is ignored, with a warning message. # +# The pattern lines should use only / as the delimiter. The other characters +# that pcre2test supports cause problems with this script. +# # The data lines must not have any pcre2test modifiers. Unless # "subject_literal" is on the pattern, data lines are processed as # Perl double-quoted strings, so if they contain " $ or @ characters, these diff --git a/testdata/testinput1 b/testdata/testinput1 index 8d952e2..8a50cfa 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -1,6 +1,10 @@ # This set of tests is for features that are compatible with all versions of # Perl >= 5.10, in non-UTF mode. It should run clean for the 8-bit, 16-bit, and # 32-bit PCRE libraries, and also using the perltest.sh script. + +# WARNING: Use only / as the pattern delimiter. Although pcre2test supports +# a number of delimiters, all those other than / give problems with the +# perltest.sh script. #forbid_utf #newline_default lf any anycrlf @@ -1957,7 +1961,7 @@ abc\nzzz qqq\nabc\nzzz -"(?>.*/)foo" +/(?>.*\/)foo/ /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo \= Expect no match /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ @@ -3831,12 +3835,13 @@ /(?-x: )/x A\x20B -"(?x)(?-x: \s*#\s*)" +/(?x)(?-x: \s*#\s*)/ A # B \= Expect no match # + A s#s B -"(?x-is)(?:(?-ixs) \s*#\s*) include" +/(?x-is)(?:(?-ixs) \s*#\s*) include/ A #include \= Expect no match A#include @@ -5232,11 +5237,11 @@ name)/mark alphabetabcd endingwxyz -"(?>.*)foo" +/(?>.*)foo/ \= Expect no match abcdfooxyz -"(?>.*?)foo" +/(?>.*?)foo/ abcdfooxyz /(?:(a(*PRUNE)b)){0}(?:(?1)|ac)/ @@ -5423,18 +5428,6 @@ name)/mark \= Expect no match a -/(?=a\Kb)ab/ - ab - -/(?!a\Kb)ac/ - ac - -/^abc(?<=b\Kc)d/ - abcd - -/^abc(?<!b\Kq)d/ - abcd - /A(*PRUNE:A)A+(*SKIP:A)(B|Z) | AC/x,mark \= Expect no match AAAC @@ -5727,9 +5720,9 @@ name)/mark /(\2)(\1)/ -"Z*(|d*){216}" +/Z*(|d*){216}/ -"(?1)(?#?'){8}(a)" +/(?1)(?#?'){8}(a)/ baaaaaaaaac /((((((((((((x))))))))))))\12/ @@ -5741,7 +5734,7 @@ name)/mark /(?1)()((((((\1++))\x85)+)|))/ \x85\x85 -"(?|(\k'Pm')|(?'Pm'))" +/(?|(\k'Pm')|(?'Pm'))/ abcd /(?|(aaa)|(b))\g{1}/ @@ -6411,20 +6404,20 @@ ef) x/x,mark Hackdaws love my big sphinx of quartz. Pack my fox with five dozen liquor jugs. -"(?<=X(?(DEFINE)(A)))X(*F)" +/(?<=X(?(DEFINE)(A)))X(*F)/ \= Expect no match AXYZ -"(?<=X(?(DEFINE)(A)))." +/(?<=X(?(DEFINE)(A)))./ AXYZ -"(?<=X(?(DEFINE)(.*))Y)." +/(?<=X(?(DEFINE)(.*))Y)./ AXYZ -"(?<=X(?(DEFINE)(Y))(?1))." +/(?<=X(?(DEFINE)(Y))(?1))./ AXYZ -"(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word" +/(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word/ word # End of testinput1 diff --git a/testdata/testinput2 b/testdata/testinput2 index 47320eb..14ad824 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5866,4 +5866,20 @@ a)"xI /(?(VERSION=0.0/ +# Perl has made \K in lookarounds an error. At the moment PCRE2 still accepts. + +/(?=a\Kb)ab/ + ab + +/(?!a\Kb)ac/ + ac + +/^abc(?<=b\Kc)d/ + abcd + +/^abc(?<!b\Kq)d/ + abcd + +# --------- + # End of testinput2 diff --git a/testdata/testinput4 b/testdata/testinput4 index 0bdac57..4e2a0ab 100644 --- a/testdata/testinput4 +++ b/testdata/testinput4 @@ -3,6 +3,10 @@ # some of the property tests may differ because of different versions of # Unicode in use by PCRE2 and Perl. +# WARNING: Use only / as the pattern delimiter. Although pcre2test supports +# a number of delimiters, all those other than / give problems with the +# perltest.sh script. + #newline_default lf anycrlf any #perltest @@ -476,7 +480,7 @@ /^\ሴ/utf ሴ -"(?s)(.{1,5})"utf +/(?s)(.{1,5})/utf abcdefg ab @@ -2220,7 +2224,7 @@ /[A-`]/i,utf abcdefghijklmno -"[\S\V\H]"utf +/[\S\V\H]/utf /[^\p{Any}]*+x/utf x diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 470e412..47d42ee 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -1,6 +1,10 @@ # This set of tests is for features that are compatible with all versions of # Perl >= 5.10, in non-UTF mode. It should run clean for the 8-bit, 16-bit, and # 32-bit PCRE libraries, and also using the perltest.sh script. + +# WARNING: Use only / as the pattern delimiter. Although pcre2test supports +# a number of delimiters, all those other than / give problems with the +# perltest.sh script. #forbid_utf #newline_default lf any anycrlf @@ -2949,7 +2953,7 @@ No match qqq\nabc\nzzz No match -"(?>.*/)foo" +/(?>.*\/)foo/ /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo 0: /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo \= Expect no match @@ -6081,14 +6085,16 @@ No match A\x20B 0: -"(?x)(?-x: \s*#\s*)" +/(?x)(?-x: \s*#\s*)/ A # B 0: # \= Expect no match # No match + A s#s B +No match -"(?x-is)(?:(?-ixs) \s*#\s*) include" +/(?x-is)(?:(?-ixs) \s*#\s*) include/ A #include 0: #include \= Expect no match @@ -8402,12 +8408,12 @@ No match 1: <unset> 2: wxyz -"(?>.*)foo" +/(?>.*)foo/ \= Expect no match abcdfooxyz No match -"(?>.*?)foo" +/(?>.*?)foo/ abcdfooxyz 0: foo @@ -8648,22 +8654,6 @@ No match a No match -/(?=a\Kb)ab/ - ab - 0: b - -/(?!a\Kb)ac/ - ac - 0: ac - -/^abc(?<=b\Kc)d/ - abcd - 0: cd - -/^abc(?<!b\Kq)d/ - abcd - 0: abcd - /A(*PRUNE:A)A+(*SKIP:A)(B|Z) | AC/x,mark \= Expect no match AAAC @@ -9128,9 +9118,9 @@ No match /(\2)(\1)/ -"Z*(|d*){216}" +/Z*(|d*){216}/ -"(?1)(?#?'){8}(a)" +/(?1)(?#?'){8}(a)/ baaaaaaaaac 0: aaaaaaaaa 1: a @@ -9166,7 +9156,7 @@ No match 6: 7: -"(?|(\k'Pm')|(?'Pm'))" +/(?|(\k'Pm')|(?'Pm'))/ abcd 0: 1: @@ -10165,24 +10155,24 @@ No match Pack my fox with five dozen liquor jugs. No match -"(?<=X(?(DEFINE)(A)))X(*F)" +/(?<=X(?(DEFINE)(A)))X(*F)/ \= Expect no match AXYZ No match -"(?<=X(?(DEFINE)(A)))." +/(?<=X(?(DEFINE)(A)))./ AXYZ 0: Y -"(?<=X(?(DEFINE)(.*))Y)." +/(?<=X(?(DEFINE)(.*))Y)./ AXYZ 0: Z -"(?<=X(?(DEFINE)(Y))(?1))." +/(?<=X(?(DEFINE)(Y))(?1))./ AXYZ 0: Z -"(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word" +/(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word/ word 0: word diff --git a/testdata/testoutput2 b/testdata/testoutput2 index c06363a..d0592d3 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -17624,6 +17624,26 @@ MK: >\x00< /(?(VERSION=0.0/ Failed: error 179 at offset 14: syntax error or number too big in (?(VERSION condition +# Perl has made \K in lookarounds an error. At the moment PCRE2 still accepts. + +/(?=a\Kb)ab/ + ab + 0: b + +/(?!a\Kb)ac/ + ac + 0: ac + +/^abc(?<=b\Kc)d/ + abcd + 0: cd + +/^abc(?<!b\Kq)d/ + abcd + 0: abcd + +# --------- + # End of testinput2 Error -70: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data diff --git a/testdata/testoutput4 b/testdata/testoutput4 index 245a036..f43d940 100644 --- a/testdata/testoutput4 +++ b/testdata/testoutput4 @@ -3,6 +3,10 @@ # some of the property tests may differ because of different versions of # Unicode in use by PCRE2 and Perl. +# WARNING: Use only / as the pattern delimiter. Although pcre2test supports +# a number of delimiters, all those other than / give problems with the +# perltest.sh script. + #newline_default lf anycrlf any #perltest @@ -795,7 +799,7 @@ No match ሴ 0: \x{1234} -"(?s)(.{1,5})"utf +/(?s)(.{1,5})/utf abcdefg 0: abcde 1: abcde @@ -3596,7 +3600,7 @@ No match abcdefghijklmno 0: a -"[\S\V\H]"utf +/[\S\V\H]/utf /[^\p{Any}]*+x/utf x |