summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph M. Becker <cmb@php.net>2015-08-13 14:20:04 +0200
committerChristoph M. Becker <cmb@php.net>2015-08-13 14:20:04 +0200
commitb9f23c2152eb635082e43e62a5c395b16f40054e (patch)
treeb41cd3b32408e7a7a2de3d15393996ddd182dd5d
parentf94bcb1026b9b38778804925d3247400422011e1 (diff)
downloadphp-git-b9f23c2152eb635082e43e62a5c395b16f40054e.tar.gz
Fix #70232: Incorrect bump-along behavior with \K and empty string match
To do global matching (/g), for every empty match we have to do a second match with PCRE_NOTEMPTY turned on. That may fail, however, when the \K escape sequence is involved. For this purpose libpcre 8.0 introduced the PCRE_NOTEMPTY_ATSTART flag, which we will use if available, and otherwise fall back to the old (possibly buggy) behavior.
-rw-r--r--ext/pcre/php_pcre.c23
-rw-r--r--ext/pcre/tests/bug70232.phpt68
2 files changed, 82 insertions, 9 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index f7afc47458..c55828bbcb 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -43,6 +43,11 @@
#define PCRE_CACHE_SIZE 4096
+/* not fully functional workaround for libpcre < 8.0, see bug #70232 */
+#ifndef PCRE_NOTEMPTY_ATSTART
+# define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
+#endif
+
enum {
PHP_PCRE_NO_ERROR = 0,
PHP_PCRE_INTERNAL_ERROR,
@@ -794,7 +799,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
pcre_free((void *) stringlist);
}
} else if (count == PCRE_ERROR_NOMATCH) {
- /* If we previously set PCRE_NOTEMPTY after a null match,
+ /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
this is not necessarily the end. We need to advance
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
@@ -811,10 +816,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
/* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
+ This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
the match again at the same point. If this fails (picked up above) we
advance to the next character. */
- g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
+ g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
/* Advance to the position right after the last full match */
start_offset = offsets[1];
@@ -1256,7 +1261,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
limit--;
} else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
- /* If we previously set PCRE_NOTEMPTY after a null match,
+ /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
this is not necessarily the end. We need to advance
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
@@ -1290,10 +1295,10 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
}
/* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
+ This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
the match again at the same point. If this fails (picked up above) we
advance to the next character. */
- g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
+ g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
/* Advance to the next piece. */
start_offset = offsets[1];
@@ -1659,7 +1664,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
}
}
} else if (count == PCRE_ERROR_NOMATCH) {
- /* If we previously set PCRE_NOTEMPTY after a null match,
+ /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
this is not necessarily the end. We need to advance
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
@@ -1691,10 +1696,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
}
/* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
+ This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
the match again at the same point. If this fails (picked up above) we
advance to the next character. */
- g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
+ g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
/* Advance to the position right after the last full match */
start_offset = offsets[1];
diff --git a/ext/pcre/tests/bug70232.phpt b/ext/pcre/tests/bug70232.phpt
new file mode 100644
index 0000000000..f421e9c43d
--- /dev/null
+++ b/ext/pcre/tests/bug70232.phpt
@@ -0,0 +1,68 @@
+--TEST--
+Bug #70232 (Incorrect bump-along behavior with \K and empty string match)
+--SKIPIF--
+<?php
+if (version_compare(explode(' ', PCRE_VERSION)[0], '8.0', 'lt')) {
+ die("skip this test requires libpcre >= 8.0");
+}
+?>
+--FILE--
+<?php
+$pattern = '~(?: |\G)\d\B\K~';
+$subject = "123 a123 1234567 b123 123";
+preg_match_all($pattern, $subject, $matches);
+var_dump($matches);
+var_dump(preg_replace($pattern, "*", $subject));
+var_dump(preg_split($pattern, $subject));
+?>
+--EXPECT--
+array(1) {
+ [0]=>
+ array(10) {
+ [0]=>
+ string(0) ""
+ [1]=>
+ string(0) ""
+ [2]=>
+ string(0) ""
+ [3]=>
+ string(0) ""
+ [4]=>
+ string(0) ""
+ [5]=>
+ string(0) ""
+ [6]=>
+ string(0) ""
+ [7]=>
+ string(0) ""
+ [8]=>
+ string(0) ""
+ [9]=>
+ string(0) ""
+ }
+}
+string(35) "1*2*3 a123 1*2*3*4*5*6*7 b123 1*2*3"
+array(11) {
+ [0]=>
+ string(1) "1"
+ [1]=>
+ string(1) "2"
+ [2]=>
+ string(8) "3 a123 1"
+ [3]=>
+ string(1) "2"
+ [4]=>
+ string(1) "3"
+ [5]=>
+ string(1) "4"
+ [6]=>
+ string(1) "5"
+ [7]=>
+ string(1) "6"
+ [8]=>
+ string(8) "7 b123 1"
+ [9]=>
+ string(1) "2"
+ [10]=>
+ string(1) "3"
+}