summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2015-11-20 17:34:16 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2015-11-20 17:34:16 +0000
commitcc97428733afa4374b180ae8261852f60cb61de2 (patch)
tree06cc29cc3201351b99850e8fa8e72b04c65b0d83
parent0358b7127db0d2ad71cfc27be34cb5fdd5e9a76a (diff)
downloadpcre-cc97428733afa4374b180ae8261852f60cb61de2.tar.gz
Fix wide character bug for some negative POSIX classes.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1608 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog10
-rw-r--r--pcre_compile.c23
-rw-r--r--testdata/testinput644
-rw-r--r--testdata/testoutput11-1616
-rw-r--r--testdata/testoutput11-3216
-rw-r--r--testdata/testoutput11-816
-rw-r--r--testdata/testoutput678
-rw-r--r--testdata/testoutput74
8 files changed, 176 insertions, 31 deletions
diff --git a/ChangeLog b/ChangeLog
index e0bb2ee..8073df1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -159,8 +159,8 @@ Version 8.38 27-October-2015
42. In a character class such as [\W\p{Any}] where both a negative-type escape
("not a word character") and a property escape were present, the property
escape was being ignored.
-
-43. Fix crash caused by very long (*MARK) or (*THEN) names.
+
+43. Fix crash caused by very long (*MARK) or (*THEN) names.
44. A sequence such as [[:punct:]b] that is, a POSIX character class followed
by a single ASCII character in a class item, was incorrectly compiled in
@@ -170,6 +170,12 @@ Version 8.38 27-October-2015
45. [:punct:] in UCP mode was matching some characters in the range 128-255
that should not have been matched.
+46. If [:^ascii:] or [:^xdigit:] or [:^cntrl:] are present in a non-negated
+ class, all characters with code points greater than 255 are in the class.
+ When a Unicode property was also in the class (if PCRE_UCP is set, escapes
+ such as \w are turned into Unicode properties), wide characters were not
+ correctly handled, and could fail to match.
+
Version 8.37 28-April-2015
--------------------------
diff --git a/pcre_compile.c b/pcre_compile.c
index c253f79..b16e641 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -4940,9 +4940,10 @@ for (;; ptr++)
(which is on the stack). We have to remember that there was XCLASS data,
however. */
+ if (class_uchardata > class_uchardata_base) xclass = TRUE;
+
if (lengthptr != NULL && class_uchardata > class_uchardata_base)
{
- xclass = TRUE;
*lengthptr += (int)(class_uchardata - class_uchardata_base);
class_uchardata = class_uchardata_base;
}
@@ -5045,10 +5046,26 @@ for (;; ptr++)
ptr = tempptr + 1;
continue;
- /* For all other POSIX classes, no special action is taken in UCP
- mode. Fall through to the non_UCP case. */
+ /* For the other POSIX classes (ascii, xdigit) we are going to fall
+ through to the non-UCP case and build a bit map for characters with
+ code points less than 256. If we are in a negated POSIX class
+ within a non-negated overall class, characters with code points
+ greater than 255 must all match. In the special case where we have
+ not yet generated any xclass data, and this is the final item in
+ the overall class, we need do nothing: later on, the opcode
+ OP_NCLASS will be used to indicate that characters greater than 255
+ are acceptable. If we have already seen an xclass item or one may
+ follow (we have to assume that it might if this is not the end of
+ the class), explicitly match all wide codepoints. */
default:
+ if (!negate_class && local_negate &&
+ (xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET))
+ {
+ *class_uchardata++ = XCL_RANGE;
+ class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
+ class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
+ }
break;
}
}
diff --git a/testdata/testinput6 b/testdata/testinput6
index 8aee0d0..aeb62a0 100644
--- a/testdata/testinput6
+++ b/testdata/testinput6
@@ -1509,4 +1509,48 @@
\xc2\xb4
\x{b4}
+/[[:^ascii:]]/8W
+ \x{100}
+ \x{200}
+ \x{300}
+ \x{37e}
+ a
+ 9
+ g
+
+/[[:^ascii:]\w]/8W
+ a
+ 9
+ g
+ \x{100}
+ \x{200}
+ \x{300}
+ \x{37e}
+
+/[\w[:^ascii:]]/8W
+ a
+ 9
+ g
+ \x{100}
+ \x{200}
+ \x{300}
+ \x{37e}
+
+/[^[:ascii:]\W]/8W
+ a
+ 9
+ g
+ \x{100}
+ \x{200}
+ \x{300}
+ \x{37e}
+
+/[[:^ascii:]a]/8W
+ a
+ 9
+ g
+ \x{100}
+ \x{200}
+ \x{37e}
+
/-- End of testinput6 --/
diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
index 87acadd..9a0a12d 100644
--- a/testdata/testoutput11-16
+++ b/testdata/testoutput11-16
@@ -650,18 +650,18 @@ Memory allocation (code space): 14
/[[:^alpha:][:^cntrl:]]+/8WB
------------------------------------------------------------------
- 0 26 Bra
- 2 [ -~\x80-\xff\P{L}]++
- 26 26 Ket
- 28 End
+ 0 30 Bra
+ 2 [ -~\x80-\xff\P{L}\x{100}-\x{10ffff}]++
+ 30 30 Ket
+ 32 End
------------------------------------------------------------------
/[[:^cntrl:][:^alpha:]]+/8WB
------------------------------------------------------------------
- 0 26 Bra
- 2 [ -~\x80-\xff\P{L}]++
- 26 26 Ket
- 28 End
+ 0 30 Bra
+ 2 [ -~\x80-\xff\x{100}-\x{10ffff}\P{L}]++
+ 30 30 Ket
+ 32 End
------------------------------------------------------------------
/[[:alpha:]]+/8WB
diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
index 325fedb..57e5da0 100644
--- a/testdata/testoutput11-32
+++ b/testdata/testoutput11-32
@@ -650,18 +650,18 @@ Memory allocation (code space): 28
/[[:^alpha:][:^cntrl:]]+/8WB
------------------------------------------------------------------
- 0 18 Bra
- 2 [ -~\x80-\xff\P{L}]++
- 18 18 Ket
- 20 End
+ 0 21 Bra
+ 2 [ -~\x80-\xff\P{L}\x{100}-\x{10ffff}]++
+ 21 21 Ket
+ 23 End
------------------------------------------------------------------
/[[:^cntrl:][:^alpha:]]+/8WB
------------------------------------------------------------------
- 0 18 Bra
- 2 [ -~\x80-\xff\P{L}]++
- 18 18 Ket
- 20 End
+ 0 21 Bra
+ 2 [ -~\x80-\xff\x{100}-\x{10ffff}\P{L}]++
+ 21 21 Ket
+ 23 End
------------------------------------------------------------------
/[[:alpha:]]+/8WB
diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8
index b1a4a90..748548a 100644
--- a/testdata/testoutput11-8
+++ b/testdata/testoutput11-8
@@ -650,18 +650,18 @@ Memory allocation (code space): 10
/[[:^alpha:][:^cntrl:]]+/8WB
------------------------------------------------------------------
- 0 44 Bra
- 3 [ -~\x80-\xff\P{L}]++
- 44 44 Ket
- 47 End
+ 0 51 Bra
+ 3 [ -~\x80-\xff\P{L}\x{100}-\x{10ffff}]++
+ 51 51 Ket
+ 54 End
------------------------------------------------------------------
/[[:^cntrl:][:^alpha:]]+/8WB
------------------------------------------------------------------
- 0 44 Bra
- 3 [ -~\x80-\xff\P{L}]++
- 44 44 Ket
- 47 End
+ 0 51 Bra
+ 3 [ -~\x80-\xff\x{100}-\x{10ffff}\P{L}]++
+ 51 51 Ket
+ 54 End
------------------------------------------------------------------
/[[:alpha:]]+/8WB
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index 196f993..beb85aa 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -2479,4 +2479,82 @@ No match
\x{b4}
No match
+/[[:^ascii:]]/8W
+ \x{100}
+ 0: \x{100}
+ \x{200}
+ 0: \x{200}
+ \x{300}
+ 0: \x{300}
+ \x{37e}
+ 0: \x{37e}
+ a
+No match
+ 9
+No match
+ g
+No match
+
+/[[:^ascii:]\w]/8W
+ a
+ 0: a
+ 9
+ 0: 9
+ g
+ 0: g
+ \x{100}
+ 0: \x{100}
+ \x{200}
+ 0: \x{200}
+ \x{300}
+ 0: \x{300}
+ \x{37e}
+ 0: \x{37e}
+
+/[\w[:^ascii:]]/8W
+ a
+ 0: a
+ 9
+ 0: 9
+ g
+ 0: g
+ \x{100}
+ 0: \x{100}
+ \x{200}
+ 0: \x{200}
+ \x{300}
+ 0: \x{300}
+ \x{37e}
+ 0: \x{37e}
+
+/[^[:ascii:]\W]/8W
+ a
+No match
+ 9
+No match
+ g
+No match
+ \x{100}
+ 0: \x{100}
+ \x{200}
+ 0: \x{200}
+ \x{300}
+No match
+ \x{37e}
+No match
+
+/[[:^ascii:]a]/8W
+ a
+ 0: a
+ 9
+No match
+ g
+No match
+ \x{100}
+ 0: \x{100}
+ \x{200}
+ 0: \x{200}
+ \x{37e}
+ 0: \x{37e}
+
/-- End of testinput6 --/
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index a05b381..cc9ebdd 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -949,7 +949,7 @@ No match
/[[:^alpha:][:^cntrl:]]+/8WBZ
------------------------------------------------------------------
Bra
- [ -~\x80-\xff\P{L}]++
+ [ -~\x80-\xff\P{L}\x{100}-\x{10ffff}]++
Ket
End
------------------------------------------------------------------
@@ -961,7 +961,7 @@ No match
/[[:^cntrl:][:^alpha:]]+/8WBZ
------------------------------------------------------------------
Bra
- [ -~\x80-\xff\P{L}]++
+ [ -~\x80-\xff\x{100}-\x{10ffff}\P{L}]++
Ket
End
------------------------------------------------------------------