summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2015-11-30 17:44:45 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2015-11-30 17:44:45 +0000
commite78ad4264b16988b826bd2939a1781c1165a92d9 (patch)
treef69439448ae01a81a397cdc66579e3d6f22c8fd8
parente347b40d5bb12f7ef1e632aa649571a107be7d8a (diff)
downloadpcre-e78ad4264b16988b826bd2939a1781c1165a92d9.tar.gz
Fix \Q\E before qualifier bug when auto callouts are enabled.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1616 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog4
-rw-r--r--pcre_compile.c40
-rw-r--r--testdata/testinput22
-rw-r--r--testdata/testoutput215
4 files changed, 45 insertions, 16 deletions
diff --git a/ChangeLog b/ChangeLog
index c24ea84..a33c6b7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -23,6 +23,10 @@ Version 8.39 xx-xxxxxx-201x
5. Allow for up to 32-bit numbers in the ordin() function in pcregrep.
+6 . An empty \Q\E sequence between an item and its qualifier caused
+ pcre_compile() to misbehave when auto callouts were enabled. This bug was
+ found by the LLVM fuzzer.
+
Version 8.38 23-November-2015
-----------------------------
diff --git a/pcre_compile.c b/pcre_compile.c
index 5786cd3..beed46b 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -4671,17 +4671,27 @@ for (;; ptr++)
}
goto NORMAL_CHAR;
}
+
+ /* Check for the start of a \Q...\E sequence. We must do this here rather
+ than later in case it is immediately followed by \E, which turns it into a
+ "do nothing" sequence. */
+
+ if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
+ {
+ inescq = TRUE;
+ ptr++;
+ continue;
+ }
}
- /* In extended mode, skip white space and comments. We need a loop in order
- to check for more white space and more comments after a comment. */
+ /* In extended mode, skip white space and comments. */
if ((options & PCRE_EXTENDED) != 0)
{
- for (;;)
+ const pcre_uchar *wscptr = ptr;
+ while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
+ if (c == CHAR_NUMBER_SIGN)
{
- while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
- if (c != CHAR_NUMBER_SIGN) break;
ptr++;
while (*ptr != CHAR_NULL)
{
@@ -4695,7 +4705,15 @@ for (;; ptr++)
if (utf) FORWARDCHAR(ptr);
#endif
}
- c = *ptr; /* Either NULL or the char after a newline */
+ }
+
+ /* If we skipped any characters, restart the loop. Otherwise, we didn't see
+ a comment. */
+
+ if (ptr > wscptr)
+ {
+ ptr--;
+ continue;
}
}
@@ -7900,16 +7918,6 @@ for (;; ptr++)
c = ec;
else
{
- if (escape == ESC_Q) /* Handle start of quoted string */
- {
- if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
- ptr += 2; /* avoid empty string */
- else inescq = TRUE;
- continue;
- }
-
- if (escape == ESC_E) continue; /* Perl ignores an orphan \E */
-
/* For metasequences that actually match a character, we disable the
setting of a first character if it hasn't already been set. */
diff --git a/testdata/testinput2 b/testdata/testinput2
index e8ca4fe..3a1134f 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4227,4 +4227,6 @@ backtracking verbs. --/
/(A*)\E+/CBZ
+/()\Q\E*]/BCZ
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 09756b8..ac33cc4 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14624,4 +14624,19 @@ No match
End
------------------------------------------------------------------
+/()\Q\E*]/BCZ
+------------------------------------------------------------------
+ Bra
+ Callout 255 0 7
+ Brazero
+ SCBra 1
+ Callout 255 1 0
+ KetRmax
+ Callout 255 7 1
+ ]
+ Callout 255 8 0
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput2 --/