summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2020-09-13 15:56:32 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2020-09-13 15:56:32 +0000
commit0edcbc5e42409ea801908af0917ae11eb70a1435 (patch)
tree5736d8ae7682b5c8b267d0606c4f4b16b52991c3
parent00daac3fbc798884028754a482999e8fc1ce63f9 (diff)
downloadpcre2-0edcbc5e42409ea801908af0917ae11eb70a1435.tar.gz
Fix delimiters in tests 1 and 4 for correct Perl behaviour (Bugzilla #2641).
Also move \K in lookaround tests to test 2 (Perl no longer supports). git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1269 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--ChangeLog11
-rwxr-xr-xperltest.sh3
-rw-r--r--testdata/testinput143
-rw-r--r--testdata/testinput216
-rw-r--r--testdata/testinput48
-rw-r--r--testdata/testoutput148
-rw-r--r--testdata/testoutput220
-rw-r--r--testdata/testoutput48
8 files changed, 99 insertions, 58 deletions
diff --git a/ChangeLog b/ChangeLog
index 680d7f1..4f7bb89 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -52,6 +52,17 @@ buffer overflow). Fixes ClusterFuzz 23779.
handled by an optimization in JIT. Furthermore a wrong offset was used to
read a value from a buffer which could lead to memory overread.
+10. Unnoticed for many years was the fact that delimiters other than / in the
+testinput1 and testinput4 files could cause incorrect behaviour when these
+files were processed by perltest.sh. There were several tests that used quotes
+as delimiters, and it was just luck that they didn't go wrong with perltest.sh.
+All the patterns in testinput1 and testinput4 now use / as their delimiter.
+This fixes Bugzilla #2641.
+
+11. Perl has started to give an error for \K within lookarounds (though there
+are cases where it doesn't). PCRE2 still allows this, so the tests that include
+this case have been moved from test 1 to test 2.
+
Version 10.35 09-May-2020
---------------------------
diff --git a/perltest.sh b/perltest.sh
index 8368781..31406c5 100755
--- a/perltest.sh
+++ b/perltest.sh
@@ -68,6 +68,9 @@ fi
# #newline_default commands, which are needed in the relevant pcre2test files,
# are ignored. Any other #-command is ignored, with a warning message.
#
+# The pattern lines should use only / as the delimiter. The other characters
+# that pcre2test supports cause problems with this script.
+#
# The data lines must not have any pcre2test modifiers. Unless
# "subject_literal" is on the pattern, data lines are processed as
# Perl double-quoted strings, so if they contain " $ or @ characters, these
diff --git a/testdata/testinput1 b/testdata/testinput1
index 8d952e2..8a50cfa 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -1,6 +1,10 @@
# This set of tests is for features that are compatible with all versions of
# Perl >= 5.10, in non-UTF mode. It should run clean for the 8-bit, 16-bit, and
# 32-bit PCRE libraries, and also using the perltest.sh script.
+
+# WARNING: Use only / as the pattern delimiter. Although pcre2test supports
+# a number of delimiters, all those other than / give problems with the
+# perltest.sh script.
#forbid_utf
#newline_default lf any anycrlf
@@ -1957,7 +1961,7 @@
abc\nzzz
qqq\nabc\nzzz
-"(?>.*/)foo"
+/(?>.*\/)foo/
/this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo
\= Expect no match
/this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/
@@ -3831,12 +3835,13 @@
/(?-x: )/x
A\x20B
-"(?x)(?-x: \s*#\s*)"
+/(?x)(?-x: \s*#\s*)/
A # B
\= Expect no match
#
+ A s#s B
-"(?x-is)(?:(?-ixs) \s*#\s*) include"
+/(?x-is)(?:(?-ixs) \s*#\s*) include/
A #include
\= Expect no match
A#include
@@ -5232,11 +5237,11 @@ name)/mark
alphabetabcd
endingwxyz
-"(?>.*)foo"
+/(?>.*)foo/
\= Expect no match
abcdfooxyz
-"(?>.*?)foo"
+/(?>.*?)foo/
abcdfooxyz
/(?:(a(*PRUNE)b)){0}(?:(?1)|ac)/
@@ -5423,18 +5428,6 @@ name)/mark
\= Expect no match
a
-/(?=a\Kb)ab/
- ab
-
-/(?!a\Kb)ac/
- ac
-
-/^abc(?<=b\Kc)d/
- abcd
-
-/^abc(?<!b\Kq)d/
- abcd
-
/A(*PRUNE:A)A+(*SKIP:A)(B|Z) | AC/x,mark
\= Expect no match
AAAC
@@ -5727,9 +5720,9 @@ name)/mark
/(\2)(\1)/
-"Z*(|d*){216}"
+/Z*(|d*){216}/
-"(?1)(?#?'){8}(a)"
+/(?1)(?#?'){8}(a)/
baaaaaaaaac
/((((((((((((x))))))))))))\12/
@@ -5741,7 +5734,7 @@ name)/mark
/(?1)()((((((\1++))\x85)+)|))/
\x85\x85
-"(?|(\k'Pm')|(?'Pm'))"
+/(?|(\k'Pm')|(?'Pm'))/
abcd
/(?|(aaa)|(b))\g{1}/
@@ -6411,20 +6404,20 @@ ef) x/x,mark
Hackdaws love my big sphinx of quartz.
Pack my fox with five dozen liquor jugs.
-"(?<=X(?(DEFINE)(A)))X(*F)"
+/(?<=X(?(DEFINE)(A)))X(*F)/
\= Expect no match
AXYZ
-"(?<=X(?(DEFINE)(A)))."
+/(?<=X(?(DEFINE)(A)))./
AXYZ
-"(?<=X(?(DEFINE)(.*))Y)."
+/(?<=X(?(DEFINE)(.*))Y)./
AXYZ
-"(?<=X(?(DEFINE)(Y))(?1))."
+/(?<=X(?(DEFINE)(Y))(?1))./
AXYZ
-"(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word"
+/(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word/
word
# End of testinput1
diff --git a/testdata/testinput2 b/testdata/testinput2
index 47320eb..14ad824 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5866,4 +5866,20 @@ a)"xI
/(?(VERSION=0.0/
+# Perl has made \K in lookarounds an error. At the moment PCRE2 still accepts.
+
+/(?=a\Kb)ab/
+ ab
+
+/(?!a\Kb)ac/
+ ac
+
+/^abc(?<=b\Kc)d/
+ abcd
+
+/^abc(?<!b\Kq)d/
+ abcd
+
+# ---------
+
# End of testinput2
diff --git a/testdata/testinput4 b/testdata/testinput4
index 0bdac57..4e2a0ab 100644
--- a/testdata/testinput4
+++ b/testdata/testinput4
@@ -3,6 +3,10 @@
# some of the property tests may differ because of different versions of
# Unicode in use by PCRE2 and Perl.
+# WARNING: Use only / as the pattern delimiter. Although pcre2test supports
+# a number of delimiters, all those other than / give problems with the
+# perltest.sh script.
+
#newline_default lf anycrlf any
#perltest
@@ -476,7 +480,7 @@
/^\ሴ/utf
-"(?s)(.{1,5})"utf
+/(?s)(.{1,5})/utf
abcdefg
ab
@@ -2220,7 +2224,7 @@
/[A-`]/i,utf
abcdefghijklmno
-"[\S\V\H]"utf
+/[\S\V\H]/utf
/[^\p{Any}]*+x/utf
x
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 470e412..47d42ee 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -1,6 +1,10 @@
# This set of tests is for features that are compatible with all versions of
# Perl >= 5.10, in non-UTF mode. It should run clean for the 8-bit, 16-bit, and
# 32-bit PCRE libraries, and also using the perltest.sh script.
+
+# WARNING: Use only / as the pattern delimiter. Although pcre2test supports
+# a number of delimiters, all those other than / give problems with the
+# perltest.sh script.
#forbid_utf
#newline_default lf any anycrlf
@@ -2949,7 +2953,7 @@ No match
qqq\nabc\nzzz
No match
-"(?>.*/)foo"
+/(?>.*\/)foo/
/this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo
0: /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo
\= Expect no match
@@ -6081,14 +6085,16 @@ No match
A\x20B
0:
-"(?x)(?-x: \s*#\s*)"
+/(?x)(?-x: \s*#\s*)/
A # B
0: #
\= Expect no match
#
No match
+ A s#s B
+No match
-"(?x-is)(?:(?-ixs) \s*#\s*) include"
+/(?x-is)(?:(?-ixs) \s*#\s*) include/
A #include
0: #include
\= Expect no match
@@ -8402,12 +8408,12 @@ No match
1: <unset>
2: wxyz
-"(?>.*)foo"
+/(?>.*)foo/
\= Expect no match
abcdfooxyz
No match
-"(?>.*?)foo"
+/(?>.*?)foo/
abcdfooxyz
0: foo
@@ -8648,22 +8654,6 @@ No match
a
No match
-/(?=a\Kb)ab/
- ab
- 0: b
-
-/(?!a\Kb)ac/
- ac
- 0: ac
-
-/^abc(?<=b\Kc)d/
- abcd
- 0: cd
-
-/^abc(?<!b\Kq)d/
- abcd
- 0: abcd
-
/A(*PRUNE:A)A+(*SKIP:A)(B|Z) | AC/x,mark
\= Expect no match
AAAC
@@ -9128,9 +9118,9 @@ No match
/(\2)(\1)/
-"Z*(|d*){216}"
+/Z*(|d*){216}/
-"(?1)(?#?'){8}(a)"
+/(?1)(?#?'){8}(a)/
baaaaaaaaac
0: aaaaaaaaa
1: a
@@ -9166,7 +9156,7 @@ No match
6:
7:
-"(?|(\k'Pm')|(?'Pm'))"
+/(?|(\k'Pm')|(?'Pm'))/
abcd
0:
1:
@@ -10165,24 +10155,24 @@ No match
Pack my fox with five dozen liquor jugs.
No match
-"(?<=X(?(DEFINE)(A)))X(*F)"
+/(?<=X(?(DEFINE)(A)))X(*F)/
\= Expect no match
AXYZ
No match
-"(?<=X(?(DEFINE)(A)))."
+/(?<=X(?(DEFINE)(A)))./
AXYZ
0: Y
-"(?<=X(?(DEFINE)(.*))Y)."
+/(?<=X(?(DEFINE)(.*))Y)./
AXYZ
0: Z
-"(?<=X(?(DEFINE)(Y))(?1))."
+/(?<=X(?(DEFINE)(Y))(?1))./
AXYZ
0: Z
-"(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word"
+/(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word/
word
0: word
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index c06363a..d0592d3 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -17624,6 +17624,26 @@ MK: >\x00<
/(?(VERSION=0.0/
Failed: error 179 at offset 14: syntax error or number too big in (?(VERSION condition
+# Perl has made \K in lookarounds an error. At the moment PCRE2 still accepts.
+
+/(?=a\Kb)ab/
+ ab
+ 0: b
+
+/(?!a\Kb)ac/
+ ac
+ 0: ac
+
+/^abc(?<=b\Kc)d/
+ abcd
+ 0: cd
+
+/^abc(?<!b\Kq)d/
+ abcd
+ 0: abcd
+
+# ---------
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index 245a036..f43d940 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -3,6 +3,10 @@
# some of the property tests may differ because of different versions of
# Unicode in use by PCRE2 and Perl.
+# WARNING: Use only / as the pattern delimiter. Although pcre2test supports
+# a number of delimiters, all those other than / give problems with the
+# perltest.sh script.
+
#newline_default lf anycrlf any
#perltest
@@ -795,7 +799,7 @@ No match
0: \x{1234}
-"(?s)(.{1,5})"utf
+/(?s)(.{1,5})/utf
abcdefg
0: abcde
1: abcde
@@ -3596,7 +3600,7 @@ No match
abcdefghijklmno
0: a
-"[\S\V\H]"utf
+/[\S\V\H]/utf
/[^\p{Any}]*+x/utf
x