summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2018-02-20 15:45:01 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2018-02-20 15:45:01 +0000
commitb68d6793ce6c3bd7e14df27d28e69de9a8eb3cf5 (patch)
treee7b46e65babae563e4a0cec6978493140d87f75b
parent8dc7206187ef9e9486fb5f102b85400ae9795dd1 (diff)
downloadpcre-b68d6793ce6c3bd7e14df27d28e69de9a8eb3cf5.tar.gz
Fix \C backtracking in UTF-8 issue for repeated character classes, which were
overlooked when it was fixed for other repeats. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1725 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog5
-rw-r--r--pcre_exec.c4
-rw-r--r--testdata/testinput56
-rw-r--r--testdata/testoutput58
4 files changed, 21 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index 108d739..7354bb1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -44,6 +44,11 @@ non-zero starting offset, unset capturing groups with lower numbers than a
group that did capture something were not being correctly returned as "unset"
(that is, with offset values of -1).
+10. Matching the pattern /(*UTF)\C[^\v]+\x80/ against an 8-bit string
+containing multi-code-unit characters caused bad behaviour and possibly a
+crash. This issue was fixed for other kinds of repeat in release 8.37 by change
+38, but repeating character classes were overlooked.
+
Version 8.41 05-July-2017
-------------------------
diff --git a/pcre_exec.c b/pcre_exec.c
index 1993cb3..3fd58cb 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -3053,7 +3053,7 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr-- == pp) break; /* Stop if tried at original pos */
+ if (eptr-- <= pp) break; /* Stop if tried at original pos */
BACKCHAR(eptr);
}
}
@@ -3210,7 +3210,7 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr-- == pp) break; /* Stop if tried at original pos */
+ if (eptr-- <= pp) break; /* Stop if tried at original pos */
#ifdef SUPPORT_UTF
if (utf) BACKCHAR(eptr);
#endif
diff --git a/testdata/testinput5 b/testdata/testinput5
index 28561a9..c94008c 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -798,4 +798,10 @@
/(?<=\K\x{17f})/8G+
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
+/\C[^\v]+\x80/8
+ [AΏBŀC]
+
+/\C[^\d]+\x80/8
+ [AΏBŀC]
+
/-- End of testinput5 --/
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index bab989c..090e1e1 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -1942,4 +1942,12 @@ Need char = 'z'
0: \x{17f}
0+
+/\C[^\v]+\x80/8
+ [AΏBŀC]
+No match
+
+/\C[^\d]+\x80/8
+ [AΏBŀC]
+No match
+
/-- End of testinput5 --/