summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2015-11-27 17:13:13 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2015-11-27 17:13:13 +0000
commitef6b10fcde41a2687f38d4a9ff2886b037948a1b (patch)
treedc025d74bc6bc5cfbeb90e0c056ecc7ecf5589cd
parent3c80e02cd464ea049e117b423fd48fab294c51a9 (diff)
downloadpcre-ef6b10fcde41a2687f38d4a9ff2886b037948a1b.tar.gz
Fix negated POSIX class within negated overall class UCP bug.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1612 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog3
-rw-r--r--pcre_compile.c20
-rw-r--r--testdata/testinput69
-rw-r--r--testdata/testoutput616
4 files changed, 39 insertions, 9 deletions
diff --git a/ChangeLog b/ChangeLog
index b6dfa5b..beac8ef 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -10,6 +10,9 @@ Version 8.39 xx-xxxxxx-201x
1. If PCRE_AUTO_CALLOUT was set on a pattern that had a (?# comment between
an item and its qualifier (for example, A(?#comment)?B) pcre_compile()
misbehaved. This bug was found by the LLVM fuzzer.
+
+2. Further to 8.38/46, negated classes such as [^[:^ascii:]\d] were also not
+ working correctly in UCP mode.
Version 8.38 23-November-2015
diff --git a/pcre_compile.c b/pcre_compile.c
index 3360a8b..3670f1e 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -5063,20 +5063,22 @@ for (;; ptr++)
ptr = tempptr + 1;
continue;
- /* For the other POSIX classes (ascii, xdigit) we are going to fall
- through to the non-UCP case and build a bit map for characters with
- code points less than 256. If we are in a negated POSIX class
- within a non-negated overall class, characters with code points
- greater than 255 must all match. In the special case where we have
- not yet generated any xclass data, and this is the final item in
- the overall class, we need do nothing: later on, the opcode
+ /* For the other POSIX classes (ascii, cntrl, xdigit) we are going
+ to fall through to the non-UCP case and build a bit map for
+ characters with code points less than 256. If we are in a negated
+ POSIX class, characters with code points greater than 255 must
+ either all match or all not match. In the special case where we
+ have not yet generated any xclass data, and this is the final item
+ in the overall class, we need do nothing: later on, the opcode
OP_NCLASS will be used to indicate that characters greater than 255
are acceptable. If we have already seen an xclass item or one may
follow (we have to assume that it might if this is not the end of
- the class), explicitly match all wide codepoints. */
+ the class), explicitly list all wide codepoints, which will then
+ either not match or match, depending on whether the class is or is
+ not negated. */
default:
- if (!negate_class && local_negate &&
+ if (local_negate &&
(xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET))
{
*class_uchardata++ = XCL_RANGE;
diff --git a/testdata/testinput6 b/testdata/testinput6
index aeb62a0..a178d3d 100644
--- a/testdata/testinput6
+++ b/testdata/testinput6
@@ -1553,4 +1553,13 @@
\x{200}
\x{37e}
+/[^[:^ascii:]\d]/8W
+ a
+ ~
+ 0
+ \a
+ \x{7f}
+ \x{389}
+ \x{20ac}
+
/-- End of testinput6 --/
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index beb85aa..b64dc0d 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -2557,4 +2557,20 @@ No match
\x{37e}
0: \x{37e}
+/[^[:^ascii:]\d]/8W
+ a
+ 0: a
+ ~
+ 0: ~
+ 0
+No match
+ \a
+ 0: \x{07}
+ \x{7f}
+ 0: \x{7f}
+ \x{389}
+No match
+ \x{20ac}
+No match
+
/-- End of testinput6 --/