diff options
Diffstat (limited to 'ext/pcre')
53 files changed, 2971 insertions, 777 deletions
diff --git a/ext/pcre/config.w32 b/ext/pcre/config.w32 index 594b1cb474..02256887a1 100644 --- a/ext/pcre/config.w32 +++ b/ext/pcre/config.w32 @@ -2,7 +2,7 @@ // vim:ft=javascript EXTENSION("pcre", "php_pcre.c", false /* never shared */, - "-Iext/pcre/pcrelib"); + "-Iext/pcre/pcrelib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"); ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucd.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_maketables.c pcre_newline.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c pcre_jit_compile.c", "pcre"); ADD_DEF_FILE("ext\\pcre\\php_pcre.def"); @@ -11,3 +11,9 @@ AC_DEFINE('HAVE_PCRE', 1, 'Have PCRE library'); PHP_PCRE="yes"; PHP_INSTALL_HEADERS("ext/pcre", "php_pcre.h pcrelib/"); ADD_FLAG("CFLAGS_PCRE", " /D HAVE_CONFIG_H"); + +ARG_WITH("pcre-jit", "Enable PCRE JIT support", "yes"); +if (PHP_PCRE_JIT != "no") { + AC_DEFINE('HAVE_PCRE_JIT_SUPPORT', 1, 'PCRE library'); +} + diff --git a/ext/pcre/config0.m4 b/ext/pcre/config0.m4 index bfe2009aa0..35fc585d51 100644 --- a/ext/pcre/config0.m4 +++ b/ext/pcre/config0.m4 @@ -47,7 +47,7 @@ PHP_ARG_WITH(pcre-regex,, AC_DEFINE(HAVE_PCRE, 1, [ ]) PHP_ADD_INCLUDE($PCRE_INCDIR) - PHP_NEW_EXTENSION(pcre, php_pcre.c, no) + PHP_NEW_EXTENSION(pcre, php_pcre.c, no,, -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1) PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h]) else AC_MSG_CHECKING([for PCRE library to use]) @@ -60,9 +60,21 @@ PHP_ARG_WITH(pcre-regex,, pcrelib/pcre_tables.c pcrelib/pcre_valid_utf8.c \ pcrelib/pcre_version.c pcrelib/pcre_xclass.c \ pcrelib/pcre_jit_compile.c" - PHP_PCRE_CFLAGS="-DHAVE_CONFIG_H -I@ext_srcdir@/pcrelib" + PHP_PCRE_CFLAGS="-DHAVE_CONFIG_H -I@ext_srcdir@/pcrelib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1" PHP_NEW_EXTENSION(pcre, $pcrelib_sources php_pcre.c, no,,$PHP_PCRE_CFLAGS) PHP_ADD_BUILD_DIR($ext_builddir/pcrelib) PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h pcrelib/]) AC_DEFINE(HAVE_BUNDLED_PCRE, 1, [ ]) fi + +PHP_ARG_WITH(pcre-jit,,[ --with-pcre-jit Enable PCRE JIT functionality], yes, no) + if test "$PHP_PCRE_REGEX" != "no"; then + AC_MSG_CHECKING([whether to enable PCRE JIT functionality]) + if test "$PHP_PCRE_JIT" != "no"; then + AC_DEFINE(HAVE_PCRE_JIT_SUPPORT, 1, [ ]) + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + fi + diff --git a/ext/pcre/pcrelib/ChangeLog b/ext/pcre/pcrelib/ChangeLog index f7458764cb..5e5bf188ce 100644 --- a/ext/pcre/pcrelib/ChangeLog +++ b/ext/pcre/pcrelib/ChangeLog @@ -1985,7 +1985,7 @@ Version 8.10 25-Jun-2010 7. Minor change to pcretest.c to avoid a compiler warning. -8. Added four artificial Unicode properties to help with an option to make +8. Added four artifical Unicode properties to help with an option to make \s etc use properties (see next item). The new properties are: Xan (alphanumeric), Xsp (Perl space), Xps (POSIX space), and Xwd (word). @@ -4610,7 +4610,7 @@ Version 4.3 21-May-03 (i) The utf8_table... variables are now declared "const". (ii) The code for \cx, which used the "case flipping" table to upper case - lower case letters, now just subtracts 32. This is ASCII-specific, + lower case letters, now just substracts 32. This is ASCII-specific, but the whole concept of \cx is ASCII-specific, so it seems reasonable. diff --git a/ext/pcre/pcrelib/HACKING b/ext/pcre/pcrelib/HACKING index 8395504212..691b7a14e5 100644 --- a/ext/pcre/pcrelib/HACKING +++ b/ext/pcre/pcrelib/HACKING @@ -360,7 +360,7 @@ reference number if the reference is to a unique capturing group (either by number or by name). When named groups are used, there may be more than one group with the same name. In this case, a reference by name generates OP_DNREF or OP_DNREFI. These are followed by two counts: the index (not the byte offset) -in the group name table of the first entry for the required name, followed by +in the group name table of the first entry for the requred name, followed by the number of groups with the same name. diff --git a/ext/pcre/pcrelib/README b/ext/pcre/pcrelib/README index 7a6ddff5c7..c0f3c865ef 100644 --- a/ext/pcre/pcrelib/README +++ b/ext/pcre/pcrelib/README @@ -403,7 +403,7 @@ library. They are also documented in the pcrebuild man page. avoided by linking with libedit (which has a BSD licence) instead. Enabling libreadline causes the -lreadline option to be added to the pcretest - build. In many operating environments with a system-installed readline + build. In many operating environments with a sytem-installed readline library this is sufficient. However, in some environments (e.g. if an unmodified distribution version of readline is in use), it may be necessary to specify something like LIBS="-lncurses" as well. This is because, to quote @@ -442,7 +442,7 @@ The "configure" script also creates config.status, which is an executable script that can be run to recreate the configuration, and config.log, which contains compiler output from tests that "configure" runs. -Once "configure" has run, you can run "make". This builds the the libraries +Once "configure" has run, you can run "make". This builds the libraries libpcre, libpcre16 and/or libpcre32, and a test program called pcretest. If you enabled JIT support with --enable-jit, a test program called pcre_jit_test is built as well. diff --git a/ext/pcre/pcrelib/config.h b/ext/pcre/pcrelib/config.h index 0f7a9f73ff..e3ed148190 100644 --- a/ext/pcre/pcrelib/config.h +++ b/ext/pcre/pcrelib/config.h @@ -1,7 +1,9 @@ #include <php_compat.h> -#ifndef PHP_WIN32 +#ifdef PHP_WIN32 +# include <config.w32.h> +#else # include <php_config.h> #endif @@ -397,7 +399,9 @@ them both to 0; an emulation function will be used. */ #undef SUPPORT_GCOV /* Define to any value to enable support for Just-In-Time compiling. */ +#if HAVE_PCRE_JIT_SUPPORT #define SUPPORT_JIT +#endif /* Define to any value to allow pcregrep to be linked with libbz2, so that it is able to handle .bz2 files. */ diff --git a/ext/pcre/pcrelib/dftables.c b/ext/pcre/pcrelib/dftables.c index 1fdc8e0f23..4ec32883cf 100644 --- a/ext/pcre/pcrelib/dftables.c +++ b/ext/pcre/pcrelib/dftables.c @@ -43,9 +43,7 @@ character tables for PCRE. The tables are built according to the current locale. Now that pcre_maketables is a function visible to the outside world, we make use of its code from here in order to be consistent. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include <ctype.h> #include <stdio.h> diff --git a/ext/pcre/pcrelib/doc/pcre.txt b/ext/pcre/pcrelib/doc/pcre.txt index 59f7418650..76a47c79ef 100644 --- a/ext/pcre/pcrelib/doc/pcre.txt +++ b/ext/pcre/pcrelib/doc/pcre.txt @@ -1253,7 +1253,7 @@ PCRETEST OPTION FOR LIBREADLINE SUPPORT pcretest linked in this way, there may be licensing issues. Setting this option causes the -lreadline option to be added to the - pcretest build. In many operating environments with a system-installed + pcretest build. In many operating environments with a sytem-installed libreadline this is sufficient. However, in some environments (e.g. if an unmodified distribution version of readline is in use), some extra configuration may be necessary. The INSTALL file for libreadline says diff --git a/ext/pcre/pcrelib/pcre_chartables.c b/ext/pcre/pcrelib/pcre_chartables.c index 1e20ec29d0..78ede56a93 100644 --- a/ext/pcre/pcrelib/pcre_chartables.c +++ b/ext/pcre/pcrelib/pcre_chartables.c @@ -20,9 +20,7 @@ and dead code stripping is activated. This leads to link errors. Pulling in the header ensures that the array gets flagged as "someone outside this compilation unit might reference this" and so it will always be supplied to the linker. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include "pcre_internal.h" diff --git a/ext/pcre/pcrelib/pcre_compile.c b/ext/pcre/pcrelib/pcre_compile.c index 4d3b3139de..11a9d26ff6 100644 --- a/ext/pcre/pcrelib/pcre_compile.c +++ b/ext/pcre/pcrelib/pcre_compile.c @@ -42,9 +42,7 @@ POSSIBILITY OF SUCH DAMAGE. supporting internal functions that are not used by other modules. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #define NLBLOCK cd /* Block containing newline information */ #define PSSTART start_pattern /* Field containing pattern start */ @@ -5772,7 +5770,7 @@ for (;; ptr++) /* If previous was a character type match (\d or similar), abolish it and create a suitable repeat item. The code is shared with single-character repeats by setting op_type to add a suitable offset into repeat_type. Note - the the Unicode property types will be present only when SUPPORT_UCP is + that the Unicode property types will be present only when SUPPORT_UCP is defined, but we don't wrap the little bits of code here because it just makes it horribly messy. */ diff --git a/ext/pcre/pcrelib/pcre_config.c b/ext/pcre/pcrelib/pcre_config.c index 1cbdd9c960..92e3f0ebdb 100644 --- a/ext/pcre/pcrelib/pcre_config.c +++ b/ext/pcre/pcrelib/pcre_config.c @@ -41,9 +41,7 @@ POSSIBILITY OF SUCH DAMAGE. /* This module contains the external function pcre_config(). */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif /* Keep the original link size. */ static int real_link_size = LINK_SIZE; diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c index 81a19e75e4..c783ff6e7e 100644 --- a/ext/pcre/pcrelib/pcre_exec.c +++ b/ext/pcre/pcrelib/pcre_exec.c @@ -41,9 +41,7 @@ POSSIBILITY OF SUCH DAMAGE. pattern matching using an NFA algorithm, trying to mimic Perl as closely as possible. There are also some static supporting functions. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #define NLBLOCK md /* Block containing newline information */ #define PSSTART start_subject /* Field containing processed string start */ @@ -1040,7 +1038,7 @@ for (;;) the result of a recursive call to match() whatever happened so it was possible to reduce stack usage by turning this into a tail recursion, except in the case of a possibly empty group. However, now that there is - the possibility of (*THEN) occurring in the final alternative, this + the possiblity of (*THEN) occurring in the final alternative, this optimization is no longer always possible. We can optimize if we know there are no (*THEN)s in the pattern; at present diff --git a/ext/pcre/pcrelib/pcre_fullinfo.c b/ext/pcre/pcrelib/pcre_fullinfo.c index a6c2ece6ca..3af0cec60b 100644 --- a/ext/pcre/pcrelib/pcre_fullinfo.c +++ b/ext/pcre/pcrelib/pcre_fullinfo.c @@ -42,9 +42,7 @@ POSSIBILITY OF SUCH DAMAGE. information about a compiled pattern. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include "pcre_internal.h" diff --git a/ext/pcre/pcrelib/pcre_get.c b/ext/pcre/pcrelib/pcre_get.c index 8094b34bbf..e7ea3a56a9 100644 --- a/ext/pcre/pcrelib/pcre_get.c +++ b/ext/pcre/pcrelib/pcre_get.c @@ -43,9 +43,7 @@ from the subject string after a regex match has succeeded. The original idea for these functions came from Scott Wimer. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include "pcre_internal.h" diff --git a/ext/pcre/pcrelib/pcre_globals.c b/ext/pcre/pcrelib/pcre_globals.c index 0f106aa901..4aab651c52 100644 --- a/ext/pcre/pcrelib/pcre_globals.c +++ b/ext/pcre/pcrelib/pcre_globals.c @@ -52,9 +52,7 @@ a local function is used. Also, when compiling for Virtual Pascal, things are done differently, and global variables are not used. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include "pcre_internal.h" diff --git a/ext/pcre/pcrelib/pcre_internal.h b/ext/pcre/pcrelib/pcre_internal.h index aec1879a6a..f7a5ee7aa6 100644 --- a/ext/pcre/pcrelib/pcre_internal.h +++ b/ext/pcre/pcrelib/pcre_internal.h @@ -229,15 +229,11 @@ stdint.h is available, include it; it may define INT64_MAX. Systems that do not have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set by "configure". */ -#ifdef PHP_WIN32 -#include "win32/php_stdint.h" -#else #if defined HAVE_STDINT_H #include <stdint.h> #elif defined HAVE_INTTYPES_H #include <inttypes.h> #endif -#endif #if defined INT64_MAX || defined int64_t #define INT64_OR_DOUBLE int64_t diff --git a/ext/pcre/pcrelib/pcre_maketables.c b/ext/pcre/pcrelib/pcre_maketables.c index a44a6eaa90..209cae9024 100644 --- a/ext/pcre/pcrelib/pcre_maketables.c +++ b/ext/pcre/pcrelib/pcre_maketables.c @@ -45,9 +45,7 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */ #ifndef DFTABLES -# ifdef HAVE_CONFIG_H # include "config.h" -# endif # include "pcre_internal.h" #endif diff --git a/ext/pcre/pcrelib/pcre_newline.c b/ext/pcre/pcrelib/pcre_newline.c index b8f5a4de19..405b913375 100644 --- a/ext/pcre/pcrelib/pcre_newline.c +++ b/ext/pcre/pcrelib/pcre_newline.c @@ -47,9 +47,7 @@ and NLTYPE_ANY. The full list of Unicode newline characters is taken from http://unicode.org/unicode/reports/tr18/. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include "pcre_internal.h" diff --git a/ext/pcre/pcrelib/pcre_ord2utf8.c b/ext/pcre/pcrelib/pcre_ord2utf8.c index 95f1beb963..ba3718612f 100644 --- a/ext/pcre/pcrelib/pcre_ord2utf8.c +++ b/ext/pcre/pcrelib/pcre_ord2utf8.c @@ -41,9 +41,7 @@ POSSIBILITY OF SUCH DAMAGE. /* This file contains a private PCRE function that converts an ordinal character value into a UTF8 string. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #define COMPILE_PCRE8 diff --git a/ext/pcre/pcrelib/pcre_printint.c b/ext/pcre/pcrelib/pcre_printint.c new file mode 100644 index 0000000000..8cbb161395 --- /dev/null +++ b/ext/pcre/pcrelib/pcre_printint.c @@ -0,0 +1,832 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2012 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains a PCRE private debugging function for printing out the +internal form of a compiled regular expression, along with some supporting +local functions. This source file is used in two places: + +(1) It is #included by pcre_compile.c when it is compiled in debugging mode +(PCRE_DEBUG defined in pcre_internal.h). It is not included in production +compiles. In this case PCRE_INCLUDED is defined. + +(2) It is also compiled separately and linked with pcretest.c, which can be +asked to print out a compiled regex for debugging purposes. */ + +#ifndef PCRE_INCLUDED + +#include "config.h" + +/* For pcretest program. */ +#define PRIV(name) name + +/* We have to include pcre_internal.h because we need the internal info for +displaying the results of pcre_study() and we also need to know about the +internal macros, structures, and other internal data values; pcretest has +"inside information" compared to a program that strictly follows the PCRE API. + +Although pcre_internal.h does itself include pcre.h, we explicitly include it +here before pcre_internal.h so that the PCRE_EXP_xxx macros get set +appropriately for an application, not for building PCRE. */ + +#include "pcre.h" +#include "pcre_internal.h" + +/* These are the funtions that are contained within. It doesn't seem worth +having a separate .h file just for this. */ + +#endif /* PCRE_INCLUDED */ + +#ifdef PCRE_INCLUDED +static /* Keep the following function as private. */ +#endif + +#if defined COMPILE_PCRE8 +void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths); +#elif defined COMPILE_PCRE16 +void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths); +#elif defined COMPILE_PCRE32 +void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths); +#endif + +/* Macro that decides whether a character should be output as a literal or in +hexadecimal. We don't use isprint() because that can vary from system to system +(even without the use of locales) and we want the output always to be the same, +for testing purposes. */ + +#ifdef EBCDIC +#define PRINTABLE(c) ((c) >= 64 && (c) < 255) +#else +#define PRINTABLE(c) ((c) >= 32 && (c) < 127) +#endif + +/* The table of operator names. */ + +static const char *priv_OP_names[] = { OP_NAME_LIST }; + +/* This table of operator lengths is not actually used by the working code, +but its size is needed for a check that ensures it is the correct size for the +number of opcodes (thus catching update omissions). */ + +static const pcre_uint8 priv_OP_lengths[] = { OP_LENGTHS }; + + + +/************************************************* +* Print single- or multi-byte character * +*************************************************/ + +static unsigned int +print_char(FILE *f, pcre_uchar *ptr, BOOL utf) +{ +pcre_uint32 c = *ptr; + +#ifndef SUPPORT_UTF + +(void)utf; /* Avoid compiler warning */ +if (PRINTABLE(c)) fprintf(f, "%c", (char)c); +else if (c <= 0x80) fprintf(f, "\\x%02x", c); +else fprintf(f, "\\x{%x}", c); +return 0; + +#else + +#if defined COMPILE_PCRE8 + +if (!utf || (c & 0xc0) != 0xc0) + { + if (PRINTABLE(c)) fprintf(f, "%c", (char)c); + else if (c < 0x80) fprintf(f, "\\x%02x", c); + else fprintf(f, "\\x{%02x}", c); + return 0; + } +else + { + int i; + int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */ + int s = 6*a; + c = (c & PRIV(utf8_table3)[a]) << s; + for (i = 1; i <= a; i++) + { + /* This is a check for malformed UTF-8; it should only occur if the sanity + check has been turned off. Rather than swallow random bytes, just stop if + we hit a bad one. Print it with \X instead of \x as an indication. */ + + if ((ptr[i] & 0xc0) != 0x80) + { + fprintf(f, "\\X{%x}", c); + return i - 1; + } + + /* The byte is OK */ + + s -= 6; + c |= (ptr[i] & 0x3f) << s; + } + fprintf(f, "\\x{%x}", c); + return a; + } + +#elif defined COMPILE_PCRE16 + +if (!utf || (c & 0xfc00) != 0xd800) + { + if (PRINTABLE(c)) fprintf(f, "%c", (char)c); + else if (c <= 0x80) fprintf(f, "\\x%02x", c); + else fprintf(f, "\\x{%02x}", c); + return 0; + } +else + { + /* This is a check for malformed UTF-16; it should only occur if the sanity + check has been turned off. Rather than swallow a low surrogate, just stop if + we hit a bad one. Print it with \X instead of \x as an indication. */ + + if ((ptr[1] & 0xfc00) != 0xdc00) + { + fprintf(f, "\\X{%x}", c); + return 0; + } + + c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000; + fprintf(f, "\\x{%x}", c); + return 1; + } + +#elif defined COMPILE_PCRE32 + +if (!utf || (c & 0xfffff800u) != 0xd800u) + { + if (PRINTABLE(c)) fprintf(f, "%c", (char)c); + else if (c <= 0x80) fprintf(f, "\\x%02x", c); + else fprintf(f, "\\x{%x}", c); + return 0; + } +else + { + /* This is a check for malformed UTF-32; it should only occur if the sanity + check has been turned off. Rather than swallow a surrogate, just stop if + we hit one. Print it with \X instead of \x as an indication. */ + fprintf(f, "\\X{%x}", c); + return 0; + } + +#endif /* COMPILE_PCRE[8|16|32] */ + +#endif /* SUPPORT_UTF */ +} + +/************************************************* +* Print uchar string (regardless of utf) * +*************************************************/ + +static void +print_puchar(FILE *f, PCRE_PUCHAR ptr) +{ +while (*ptr != '\0') + { + register pcre_uint32 c = *ptr++; + if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c); + } +} + +/************************************************* +* Find Unicode property name * +*************************************************/ + +static const char * +get_ucpname(unsigned int ptype, unsigned int pvalue) +{ +#ifdef SUPPORT_UCP +int i; +for (i = PRIV(utt_size) - 1; i >= 0; i--) + { + if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break; + } +return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??"; +#else +/* It gets harder and harder to shut off unwanted compiler warnings. */ +ptype = ptype * pvalue; +return (ptype == pvalue)? "??" : "??"; +#endif +} + + +/************************************************* +* Print Unicode property value * +*************************************************/ + +/* "Normal" properties can be printed from tables. The PT_CLIST property is a +pseudo-property that contains a pointer to a list of case-equivalent +characters. This is used only when UCP support is available and UTF mode is +selected. It should never occur otherwise, but just in case it does, have +something ready to print. */ + +static void +print_prop(FILE *f, pcre_uchar *code, const char *before, const char *after) +{ +if (code[1] != PT_CLIST) + { + fprintf(f, "%s%s %s%s", before, priv_OP_names[*code], get_ucpname(code[1], + code[2]), after); + } +else + { + const char *not = (*code == OP_PROP)? "" : "not "; +#ifndef SUPPORT_UCP + fprintf(f, "%s%sclist %d%s", before, not, code[2], after); +#else + const pcre_uint32 *p = PRIV(ucd_caseless_sets) + code[2]; + fprintf (f, "%s%sclist", before, not); + while (*p < NOTACHAR) fprintf(f, " %04x", *p++); + fprintf(f, "%s", after); +#endif + } +} + + + + +/************************************************* +* Print compiled regex * +*************************************************/ + +/* Make this function work for a regex with integers either byte order. +However, we assume that what we are passed is a compiled regex. The +print_lengths flag controls whether offsets and lengths of items are printed. +They can be turned off from pcretest so that automatic tests on bytecode can be +written that do not depend on the value of LINK_SIZE. */ + +#ifdef PCRE_INCLUDED +static /* Keep the following function as private. */ +#endif +#if defined COMPILE_PCRE8 +void +pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths) +#elif defined COMPILE_PCRE16 +void +pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths) +#elif defined COMPILE_PCRE32 +void +pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths) +#endif +{ +REAL_PCRE *re = (REAL_PCRE *)external_re; +pcre_uchar *codestart, *code; +BOOL utf; + +unsigned int options = re->options; +int offset = re->name_table_offset; +int count = re->name_count; +int size = re->name_entry_size; + +if (re->magic_number != MAGIC_NUMBER) + { + offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff); + count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff); + size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff); + options = ((options << 24) & 0xff000000) | + ((options << 8) & 0x00ff0000) | + ((options >> 8) & 0x0000ff00) | + ((options >> 24) & 0x000000ff); + } + +code = codestart = (pcre_uchar *)re + offset + count * size; +/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */ +utf = (options & PCRE_UTF8) != 0; + +for(;;) + { + pcre_uchar *ccode; + const char *flag = " "; + pcre_uint32 c; + unsigned int extra = 0; + + if (print_lengths) + fprintf(f, "%3d ", (int)(code - codestart)); + else + fprintf(f, " "); + + switch(*code) + { +/* ========================================================================== */ + /* These cases are never obeyed. This is a fudge that causes a compile- + time error if the vectors OP_names or OP_lengths, which are indexed + by opcode, are not the correct length. It seems to be the only way to do + such a check at compile time, as the sizeof() operator does not work in + the C preprocessor. */ + + case OP_TABLE_LENGTH: + case OP_TABLE_LENGTH + + ((sizeof(priv_OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) && + (sizeof(priv_OP_lengths) == OP_TABLE_LENGTH)): + break; +/* ========================================================================== */ + + case OP_END: + fprintf(f, " %s\n", priv_OP_names[*code]); + fprintf(f, "------------------------------------------------------------------\n"); + return; + + case OP_CHAR: + fprintf(f, " "); + do + { + code++; + code += 1 + print_char(f, code, utf); + } + while (*code == OP_CHAR); + fprintf(f, "\n"); + continue; + + case OP_CHARI: + fprintf(f, " /i "); + do + { + code++; + code += 1 + print_char(f, code, utf); + } + while (*code == OP_CHARI); + fprintf(f, "\n"); + continue; + + case OP_CBRA: + case OP_CBRAPOS: + case OP_SCBRA: + case OP_SCBRAPOS: + if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); + else fprintf(f, " "); + fprintf(f, "%s %d", priv_OP_names[*code], GET2(code, 1+LINK_SIZE)); + break; + + case OP_BRA: + case OP_BRAPOS: + case OP_SBRA: + case OP_SBRAPOS: + case OP_KETRMAX: + case OP_KETRMIN: + case OP_KETRPOS: + case OP_ALT: + case OP_KET: + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ONCE: + case OP_ONCE_NC: + case OP_COND: + case OP_SCOND: + case OP_REVERSE: + if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); + else fprintf(f, " "); + fprintf(f, "%s", priv_OP_names[*code]); + break; + + case OP_CLOSE: + fprintf(f, " %s %d", priv_OP_names[*code], GET2(code, 1)); + break; + + case OP_CREF: + fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]); + break; + + case OP_DNCREF: + { + pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) + + IMM2_SIZE; + fprintf(f, " %s Cond ref <", flag); + print_puchar(f, entry); + fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE)); + } + break; + + case OP_RREF: + c = GET2(code, 1); + if (c == RREF_ANY) + fprintf(f, " Cond recurse any"); + else + fprintf(f, " Cond recurse %d", c); + break; + + case OP_DNRREF: + { + pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) + + IMM2_SIZE; + fprintf(f, " %s Cond recurse <", flag); + print_puchar(f, entry); + fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE)); + } + break; + + case OP_DEF: + fprintf(f, " Cond def"); + break; + + case OP_STARI: + case OP_MINSTARI: + case OP_POSSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_POSPLUSI: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_POSQUERYI: + flag = "/i"; + /* Fall through */ + case OP_STAR: + case OP_MINSTAR: + case OP_POSSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_POSPLUS: + case OP_QUERY: + case OP_MINQUERY: + case OP_POSQUERY: + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPOSSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEPOSPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSQUERY: + fprintf(f, " %s ", flag); + if (*code >= OP_TYPESTAR) + { + if (code[1] == OP_PROP || code[1] == OP_NOTPROP) + { + print_prop(f, code + 1, "", " "); + extra = 2; + } + else fprintf(f, "%s", priv_OP_names[code[1]]); + } + else extra = print_char(f, code+1, utf); + fprintf(f, "%s", priv_OP_names[*code]); + break; + + case OP_EXACTI: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_POSUPTOI: + flag = "/i"; + /* Fall through */ + case OP_EXACT: + case OP_UPTO: + case OP_MINUPTO: + case OP_POSUPTO: + fprintf(f, " %s ", flag); + extra = print_char(f, code + 1 + IMM2_SIZE, utf); + fprintf(f, "{"); + if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,"); + fprintf(f, "%d}", GET2(code,1)); + if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?"); + else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+"); + break; + + case OP_TYPEEXACT: + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEPOSUPTO: + if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) + { + print_prop(f, code + IMM2_SIZE + 1, " ", " "); + extra = 2; + } + else fprintf(f, " %s", priv_OP_names[code[1 + IMM2_SIZE]]); + fprintf(f, "{"); + if (*code != OP_TYPEEXACT) fprintf(f, "0,"); + fprintf(f, "%d}", GET2(code,1)); + if (*code == OP_TYPEMINUPTO) fprintf(f, "?"); + else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+"); + break; + + case OP_NOTI: + flag = "/i"; + /* Fall through */ + case OP_NOT: + fprintf(f, " %s [^", flag); + extra = print_char(f, code + 1, utf); + fprintf(f, "]"); + break; + + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPOSSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTPOSPLUSI: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTPOSQUERYI: + flag = "/i"; + /* Fall through */ + + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPOSSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTPOSPLUS: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTPOSQUERY: + fprintf(f, " %s [^", flag); + extra = print_char(f, code + 1, utf); + fprintf(f, "]%s", priv_OP_names[*code]); + break; + + case OP_NOTEXACTI: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTPOSUPTOI: + flag = "/i"; + /* Fall through */ + + case OP_NOTEXACT: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTPOSUPTO: + fprintf(f, " %s [^", flag); + extra = print_char(f, code + 1 + IMM2_SIZE, utf); + fprintf(f, "]{"); + if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,"); + fprintf(f, "%d}", GET2(code,1)); + if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?"); + else + if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+"); + break; + + case OP_RECURSE: + if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); + else fprintf(f, " "); + fprintf(f, "%s", priv_OP_names[*code]); + break; + + case OP_REFI: + flag = "/i"; + /* Fall through */ + case OP_REF: + fprintf(f, " %s \\%d", flag, GET2(code,1)); + ccode = code + priv_OP_lengths[*code]; + goto CLASS_REF_REPEAT; + + case OP_DNREFI: + flag = "/i"; + /* Fall through */ + case OP_DNREF: + { + pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) + + IMM2_SIZE; + fprintf(f, " %s \\k<", flag); + print_puchar(f, entry); + fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE)); + } + ccode = code + priv_OP_lengths[*code]; + goto CLASS_REF_REPEAT; + + case OP_CALLOUT: + fprintf(f, " %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2), + GET(code, 2 + LINK_SIZE)); + break; + + case OP_PROP: + case OP_NOTPROP: + print_prop(f, code, " ", ""); + break; + + /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm + in having this code always here, and it makes it less messy without all + those #ifdefs. */ + + case OP_CLASS: + case OP_NCLASS: + case OP_XCLASS: + { + int i; + unsigned int min, max; + BOOL printmap; + BOOL invertmap = FALSE; + pcre_uint8 *map; + pcre_uint8 inverted_map[32]; + + fprintf(f, " ["); + + if (*code == OP_XCLASS) + { + extra = GET(code, 1); + ccode = code + LINK_SIZE + 1; + printmap = (*ccode & XCL_MAP) != 0; + if ((*ccode & XCL_NOT) != 0) + { + invertmap = (*ccode & XCL_HASPROP) == 0; + fprintf(f, "^"); + } + ccode++; + } + else + { + printmap = TRUE; + ccode = code + 1; + } + + /* Print a bit map */ + + if (printmap) + { + map = (pcre_uint8 *)ccode; + if (invertmap) + { + for (i = 0; i < 32; i++) inverted_map[i] = ~map[i]; + map = inverted_map; + } + + for (i = 0; i < 256; i++) + { + if ((map[i/8] & (1 << (i&7))) != 0) + { + int j; + for (j = i+1; j < 256; j++) + if ((map[j/8] & (1 << (j&7))) == 0) break; + if (i == '-' || i == ']') fprintf(f, "\\"); + if (PRINTABLE(i)) fprintf(f, "%c", i); + else fprintf(f, "\\x%02x", i); + if (--j > i) + { + if (j != i + 1) fprintf(f, "-"); + if (j == '-' || j == ']') fprintf(f, "\\"); + if (PRINTABLE(j)) fprintf(f, "%c", j); + else fprintf(f, "\\x%02x", j); + } + i = j; + } + } + ccode += 32 / sizeof(pcre_uchar); + } + + /* For an XCLASS there is always some additional data */ + + if (*code == OP_XCLASS) + { + pcre_uchar ch; + while ((ch = *ccode++) != XCL_END) + { + BOOL not = FALSE; + const char *notch = ""; + + switch(ch) + { + case XCL_NOTPROP: + not = TRUE; + notch = "^"; + /* Fall through */ + + case XCL_PROP: + { + unsigned int ptype = *ccode++; + unsigned int pvalue = *ccode++; + + switch(ptype) + { + case PT_PXGRAPH: + fprintf(f, "[:%sgraph:]", notch); + break; + + case PT_PXPRINT: + fprintf(f, "[:%sprint:]", notch); + break; + + case PT_PXPUNCT: + fprintf(f, "[:%spunct:]", notch); + break; + + default: + fprintf(f, "\\%c{%s}", (not? 'P':'p'), + get_ucpname(ptype, pvalue)); + break; + } + } + break; + + default: + ccode += 1 + print_char(f, ccode, utf); + if (ch == XCL_RANGE) + { + fprintf(f, "-"); + ccode += 1 + print_char(f, ccode, utf); + } + break; + } + } + } + + /* Indicate a non-UTF class which was created by negation */ + + fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : ""); + + /* Handle repeats after a class or a back reference */ + + CLASS_REF_REPEAT: + switch(*ccode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + case OP_CRPOSQUERY: + fprintf(f, "%s", priv_OP_names[*ccode]); + extra += priv_OP_lengths[*ccode]; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + min = GET2(ccode,1); + max = GET2(ccode,1 + IMM2_SIZE); + if (max == 0) fprintf(f, "{%u,}", min); + else fprintf(f, "{%u,%u}", min, max); + if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); + else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+"); + extra += priv_OP_lengths[*ccode]; + break; + + /* Do nothing if it's not a repeat; this code stops picky compilers + warning about the lack of a default code path. */ + + default: + break; + } + } + break; + + case OP_MARK: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + fprintf(f, " %s ", priv_OP_names[*code]); + print_puchar(f, code + 2); + extra += code[1]; + break; + + case OP_THEN: + fprintf(f, " %s", priv_OP_names[*code]); + break; + + case OP_CIRCM: + case OP_DOLLM: + flag = "/m"; + /* Fall through */ + + /* Anything else is just an item with no data, but possibly a flag. */ + + default: + fprintf(f, " %s %s", flag, priv_OP_names[*code]); + break; + } + + code += priv_OP_lengths[*code] + extra; + fprintf(f, "\n"); + } +} + +/* End of pcre_printint.src */ diff --git a/ext/pcre/pcrelib/pcre_refcount.c b/ext/pcre/pcrelib/pcre_refcount.c index 79efa90f21..d5e0b61c41 100644 --- a/ext/pcre/pcrelib/pcre_refcount.c +++ b/ext/pcre/pcrelib/pcre_refcount.c @@ -44,9 +44,7 @@ pattern data block. This might be helpful in applications where the block is shared by different users. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include "pcre_internal.h" diff --git a/ext/pcre/pcrelib/pcre_study.c b/ext/pcre/pcrelib/pcre_study.c index 3afbc67a63..38ab820853 100644 --- a/ext/pcre/pcrelib/pcre_study.c +++ b/ext/pcre/pcrelib/pcre_study.c @@ -42,9 +42,7 @@ POSSIBILITY OF SUCH DAMAGE. supporting functions. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include "pcre_internal.h" @@ -1056,7 +1054,7 @@ do tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf); break; - /* Single-char up to sets the bit and tries the next */ + /* Single-char upto sets the bit and tries the next */ case OP_UPTO: case OP_MINUPTO: diff --git a/ext/pcre/pcrelib/pcre_tables.c b/ext/pcre/pcrelib/pcre_tables.c index 4960af57c4..93ea6ad160 100644 --- a/ext/pcre/pcrelib/pcre_tables.c +++ b/ext/pcre/pcrelib/pcre_tables.c @@ -45,9 +45,7 @@ uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name clashes with the library. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include "pcre_internal.h" diff --git a/ext/pcre/pcrelib/pcre_ucd.c b/ext/pcre/pcrelib/pcre_ucd.c index 69c4fd42c3..d644907675 100644 --- a/ext/pcre/pcrelib/pcre_ucd.c +++ b/ext/pcre/pcrelib/pcre_ucd.c @@ -10,9 +10,7 @@ needed. */ #ifndef PCRE_INCLUDED -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include "pcre_internal.h" diff --git a/ext/pcre/pcrelib/pcre_valid_utf8.c b/ext/pcre/pcrelib/pcre_valid_utf8.c index 3b0f6464a3..3a9fba785c 100644 --- a/ext/pcre/pcrelib/pcre_valid_utf8.c +++ b/ext/pcre/pcrelib/pcre_valid_utf8.c @@ -42,9 +42,7 @@ POSSIBILITY OF SUCH DAMAGE. strings. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include "pcre_internal.h" diff --git a/ext/pcre/pcrelib/pcre_version.c b/ext/pcre/pcrelib/pcre_version.c index ae86ff28bc..00b8dd680c 100644 --- a/ext/pcre/pcrelib/pcre_version.c +++ b/ext/pcre/pcrelib/pcre_version.c @@ -42,9 +42,7 @@ POSSIBILITY OF SUCH DAMAGE. string that identifies the PCRE version that is in use. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include "pcre_internal.h" diff --git a/ext/pcre/pcrelib/pcre_xclass.c b/ext/pcre/pcrelib/pcre_xclass.c index ef759a589a..782748f241 100644 --- a/ext/pcre/pcrelib/pcre_xclass.c +++ b/ext/pcre/pcrelib/pcre_xclass.c @@ -42,9 +42,7 @@ POSSIBILITY OF SUCH DAMAGE. class. It is used by both pcre_exec() and pcre_def_exec(). */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include "pcre_internal.h" diff --git a/ext/pcre/pcrelib/pcredemo.c b/ext/pcre/pcrelib/pcredemo.c index 1ca77f1537..946aba45cd 100644 --- a/ext/pcre/pcrelib/pcredemo.c +++ b/ext/pcre/pcrelib/pcredemo.c @@ -144,7 +144,7 @@ if (rc < 0) return 1; } -/* Match succeeded */ +/* Match succeded */ printf("\nMatch succeeded at offset %d\n", ovector[0]); @@ -362,7 +362,7 @@ for (;;) return 1; } - /* Match succeeded */ + /* Match succeded */ printf("\nMatch succeeded again at offset %d\n", ovector[0]); diff --git a/ext/pcre/pcrelib/pcreposix.c b/ext/pcre/pcrelib/pcreposix.c index f024423b63..8e0d8bb609 100644 --- a/ext/pcre/pcrelib/pcreposix.c +++ b/ext/pcre/pcrelib/pcreposix.c @@ -42,9 +42,7 @@ POSSIBILITY OF SUCH DAMAGE. functions. */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif /* Ensure that the PCREPOSIX_EXP_xxx macros are set appropriately for diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 2a8ff199b8..af1916aa45 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1,6 +1,6 @@ /* +----------------------------------------------------------------------+ - | PHP Version 5 | + | PHP Version 7 | +----------------------------------------------------------------------+ | Copyright (c) 1997-2016 The PHP Group | +----------------------------------------------------------------------+ @@ -23,7 +23,8 @@ #include "php_globals.h" #include "php_pcre.h" #include "ext/standard/info.h" -#include "ext/standard/php_smart_str.h" +#include "ext/standard/basic_functions.h" +#include "zend_smart_str.h" #if HAVE_PCRE || HAVE_BUNDLED_PCRE @@ -54,14 +55,20 @@ enum { PHP_PCRE_BACKTRACK_LIMIT_ERROR, PHP_PCRE_RECURSION_LIMIT_ERROR, PHP_PCRE_BAD_UTF8_ERROR, - PHP_PCRE_BAD_UTF8_OFFSET_ERROR + PHP_PCRE_BAD_UTF8_OFFSET_ERROR, + PHP_PCRE_JIT_STACKLIMIT_ERROR }; -ZEND_DECLARE_MODULE_GLOBALS(pcre) +PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre) +#ifdef HAVE_PCRE_JIT_SUPPORT +#define PCRE_JIT_STACK_MIN_SIZE (32 * 1024) +#define PCRE_JIT_STACK_MAX_SIZE (64 * 1024) +ZEND_TLS pcre_jit_stack *jit_stack = NULL; +#endif -static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */ +static void pcre_handle_exec_error(int pcre_code) /* {{{ */ { int preg_code = 0; @@ -82,6 +89,12 @@ static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */ preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR; break; +#ifdef HAVE_PCRE_JIT_SUPPORT + case PCRE_ERROR_JIT_STACKLIMIT: + preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR; + break; +#endif + default: preg_code = PHP_PCRE_INTERNAL_ERROR; break; @@ -91,16 +104,18 @@ static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */ } /* }}} */ -static void php_free_pcre_cache(void *data) /* {{{ */ +static void php_free_pcre_cache(zval *data) /* {{{ */ { - pcre_cache_entry *pce = (pcre_cache_entry *) data; + pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data); if (!pce) return; - pefree(pce->re, 1); - if (pce->extra) pefree(pce->extra, 1); + pcre_free(pce->re); + if (pce->extra) { + pcre_free_study(pce->extra); + } #if HAVE_SETLOCALE if ((void*)pce->tables) pefree((void*)pce->tables, 1); - pefree(pce->locale, 1); #endif + pefree(pce, 1); } /* }}} */ @@ -116,21 +131,49 @@ static PHP_GINIT_FUNCTION(pcre) /* {{{ */ static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */ { zend_hash_destroy(&pcre_globals->pcre_cache); + +#ifdef HAVE_PCRE_JIT_SUPPORT + /* Stack may only be destroyed when no cached patterns + possibly associated with it do exist. */ + if (jit_stack) { + pcre_jit_stack_free(jit_stack); + jit_stack = NULL; + } +#endif + } /* }}} */ PHP_INI_BEGIN() STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals) - STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals) + STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals) +#ifdef HAVE_PCRE_JIT_SUPPORT + STD_PHP_INI_ENTRY("pcre.jit", "1", PHP_INI_ALL, OnUpdateBool, jit, zend_pcre_globals, pcre_globals) +#endif PHP_INI_END() /* {{{ PHP_MINFO_FUNCTION(pcre) */ static PHP_MINFO_FUNCTION(pcre) { +#ifdef HAVE_PCRE_JIT_SUPPORT + int jit_yes = 0; +#endif + php_info_print_table_start(); php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" ); php_info_print_table_row(2, "PCRE Library Version", pcre_version() ); + +#ifdef HAVE_PCRE_JIT_SUPPORT + if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) { + php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled"); + } else { + php_info_print_table_row(2, "PCRE JIT Support", "unknown" ); + } +#else + php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" ); +#endif + php_info_print_table_end(); DISPLAY_INI_ENTRIES(); @@ -156,6 +199,7 @@ static PHP_MINIT_FUNCTION(pcre) REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT); return SUCCESS; @@ -171,10 +215,23 @@ static PHP_MSHUTDOWN_FUNCTION(pcre) } /* }}} */ +#ifdef HAVE_PCRE_JIT_SUPPORT +/* {{{ PHP_RINIT_FUNCTION(pcre) */ +static PHP_RINIT_FUNCTION(pcre) +{ + if (PCRE_G(jit) && jit_stack == NULL) { + jit_stack = pcre_jit_stack_alloc(PCRE_JIT_STACK_MIN_SIZE,PCRE_JIT_STACK_MAX_SIZE); + } + + return SUCCESS; +} +/* }}} */ +#endif + /* {{{ static pcre_clean_cache */ -static int pcre_clean_cache(void *data, void *arg TSRMLS_DC) +static int pcre_clean_cache(zval *data, void *arg) { - pcre_cache_entry *pce = (pcre_cache_entry *) data; + pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data); int *num_clean = (int *)arg; if (*num_clean > 0 && !pce->refcount) { @@ -187,45 +244,35 @@ static int pcre_clean_cache(void *data, void *arg TSRMLS_DC) /* }}} */ /* {{{ static make_subpats_table */ -static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC) +static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce) { pcre_extra *extra = pce->extra; - int name_cnt = 0, name_size, ni = 0; + int name_cnt = pce->name_count, name_size, ni = 0; int rc; char *name_table; unsigned short name_idx; - char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *)); + char **subpat_names; + int rc1, rc2; - rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt); + rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table); + rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size); + rc = rc2 ? rc2 : rc1; if (rc < 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); - efree(subpat_names); + php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc); return NULL; } - if (name_cnt > 0) { - int rc1, rc2; - rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table); - rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size); - rc = rc2 ? rc2 : rc1; - if (rc < 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); + subpat_names = (char **)ecalloc(num_subpats, sizeof(char *)); + while (ni++ < name_cnt) { + name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1]; + subpat_names[name_idx] = name_table + 2; + if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) { + php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed"); efree(subpat_names); return NULL; } - - while (ni++ < name_cnt) { - name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1]; - subpat_names[name_idx] = name_table + 2; - if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed"); - efree(subpat_names); - return NULL; - } - name_table += name_size; - } + name_table += name_size; } - return subpat_names; } /* }}} */ @@ -251,7 +298,7 @@ static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char /* {{{ pcre_get_compiled_regex_cache */ -PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC) +PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex) { pcre *re = NULL; pcre_extra *extra; @@ -266,50 +313,49 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le char *pattern; int do_study = 0; int poptions = 0; - int count = 0; unsigned const char *tables = NULL; -#if HAVE_SETLOCALE - char *locale; -#endif pcre_cache_entry *pce; pcre_cache_entry new_entry; - char *tmp = NULL; + int rc; + zend_string *key; #if HAVE_SETLOCALE -# if defined(PHP_WIN32) && defined(ZTS) - _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); -# endif - locale = setlocale(LC_CTYPE, NULL); + if (BG(locale_string) && + (ZSTR_LEN(BG(locale_string)) != 1 && ZSTR_VAL(BG(locale_string))[0] != 'C')) { + key = zend_string_alloc(ZSTR_LEN(regex) + ZSTR_LEN(BG(locale_string)) + 1, 0); + memcpy(ZSTR_VAL(key), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)) + 1); + memcpy(ZSTR_VAL(key) + ZSTR_LEN(BG(locale_string)), ZSTR_VAL(regex), ZSTR_LEN(regex) + 1); + } else #endif + { + key = regex; + } /* Try to lookup the cached regex entry, and if successful, just pass back the compiled pattern, otherwise go on and compile it. */ - if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) { - /* - * We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it - * is, we flush it and compile the pattern from scratch. - */ - if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) { - zend_hash_clean(&PCRE_G(pcre_cache)); - } else { + pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), key); + if (pce) { #if HAVE_SETLOCALE - if (!strcmp(pce->locale, locale)) { -#endif - return pce; -#if HAVE_SETLOCALE - } -#endif + if (key != regex) { + zend_string_release(key); } +#endif + return pce; } - p = regex; + p = ZSTR_VAL(regex); /* Parse through the leading whitespace, and display a warning if we get to the end without encountering a delimiter. */ while (isspace((int)*(unsigned char *)p)) p++; if (*p == 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, - p < regex + regex_len ? "Null byte in regex" : "Empty regular expression"); +#if HAVE_SETLOCALE + if (key != regex) { + zend_string_release(key); + } +#endif + php_error_docref(NULL, E_WARNING, + p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression"); return NULL; } @@ -317,7 +363,12 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le or a backslash. */ delimiter = *p++; if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') { - php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash"); +#if HAVE_SETLOCALE + if (key != regex) { + zend_string_release(key); + } +#endif + php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash"); return NULL; } @@ -356,12 +407,17 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le } if (*pp == 0) { - if (pp < regex + regex_len) { - php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex"); +#if HAVE_SETLOCALE + if (key != regex) { + zend_string_release(key); + } +#endif + if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) { + php_error_docref(NULL,E_WARNING, "Null byte in regex"); } else if (start_delimiter == end_delimiter) { - php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter); + php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter); } else { - php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", delimiter); + php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter); } return NULL; } @@ -374,7 +430,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le /* Parse through the options, setting appropriate flags. Display a warning if we encounter an unknown modifier. */ - while (pp < regex + regex_len) { + while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) { switch (*pp++) { /* Perl compatible options */ case 'i': coptions |= PCRE_CASELESS; break; @@ -407,18 +463,24 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le default: if (pp[-1]) { - php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]); + php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]); } else { - php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex"); + php_error_docref(NULL,E_WARNING, "Null byte in regex"); } efree(pattern); +#if HAVE_SETLOCALE + if (key != regex) { + zend_string_release(key); + } +#endif return NULL; } } #if HAVE_SETLOCALE - if (strcmp(locale, "C")) + if (key != regex) { tables = pcre_maketables(); + } #endif /* Compile pattern and display a warning if compilation failed. */ @@ -429,7 +491,12 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le tables); if (re == NULL) { - php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset); +#if HAVE_SETLOCALE + if (key != regex) { + zend_string_release(key); + } +#endif + php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset); efree(pattern); if (tables) { pefree((void*)tables, 1); @@ -437,15 +504,30 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le return NULL; } +#ifdef HAVE_PCRE_JIT_SUPPORT + if (PCRE_G(jit)) { + /* Enable PCRE JIT compiler */ + do_study = 1; + soptions |= PCRE_STUDY_JIT_COMPILE; + } +#endif + /* If study option was specified, study the pattern and store the result in extra for passing to pcre_exec. */ if (do_study) { extra = pcre_study(re, soptions, &error); if (extra) { extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; + extra->match_limit = (unsigned long)PCRE_G(backtrack_limit); + extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit); +#ifdef HAVE_PCRE_JIT_SUPPORT + if (PCRE_G(jit) && jit_stack) { + pcre_assign_jit_stack(extra, NULL, jit_stack); + } +#endif } if (error != NULL) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern"); + php_error_docref(NULL, E_WARNING, "Error while studying pattern"); } } else { extra = NULL; @@ -458,9 +540,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le * these are supposedly the oldest ones (but not necessarily the least used * ones). */ - if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) { + if (!pce && zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) { int num_clean = PCRE_CACHE_SIZE / 8; - zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC); + zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean); } /* Store the compiled pattern and extra info in the cache. */ @@ -469,11 +551,33 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le new_entry.preg_options = poptions; new_entry.compile_options = coptions; #if HAVE_SETLOCALE - new_entry.locale = pestrdup(locale, 1); + new_entry.locale = NULL; new_entry.tables = tables; #endif new_entry.refcount = 0; + rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count); + if (rc < 0) { +#if HAVE_SETLOCALE + if (key != regex) { + zend_string_release(key); + } +#endif + php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc); + return NULL; + } + + rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count); + if (rc < 0) { +#if HAVE_SETLOCALE + if (key != regex) { + zend_string_release(key); + } +#endif + php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc); + return NULL; + } + /* * Interned strings are not duplicated when stored in HashTable, * but all the interned strings created during HTTP request are removed @@ -482,15 +586,16 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le * as hash keys especually for this table. * See bug #63180 */ - if (IS_INTERNED(regex)) { - regex = tmp = estrndup(regex, regex_len); - } - - zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry, - sizeof(pcre_cache_entry), (void**)&pce); - - if (tmp) { - efree(tmp); + if (!ZSTR_IS_INTERNED(key) || !(GC_FLAGS(key) & IS_STR_PERMANENT)) { + pce = zend_hash_str_update_mem(&PCRE_G(pcre_cache), + ZSTR_VAL(key), ZSTR_LEN(key), &new_entry, sizeof(pcre_cache_entry)); +#if HAVE_SETLOCALE + if (key != regex) { + zend_string_release(key); + } +#endif + } else { + pce = zend_hash_update_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry)); } return pce; @@ -499,9 +604,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le /* {{{ pcre_get_compiled_regex */ -PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC) +PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options) { - pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC); + pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex); if (extra) { *extra = pce ? pce->extra : NULL; @@ -516,9 +621,9 @@ PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_ /* {{{ pcre_get_compiled_regex_ex */ -PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC) +PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options) { - pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC); + pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex); if (extra) { *extra = pce ? pce->extra : NULL; @@ -537,60 +642,67 @@ PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *pr /* {{{ add_offset_pair */ static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name) { - zval *match_pair; + zval match_pair, tmp; - ALLOC_ZVAL(match_pair); - array_init(match_pair); - INIT_PZVAL(match_pair); + array_init_size(&match_pair, 2); /* Add (match, offset) to the return value */ - add_next_index_stringl(match_pair, str, len, 1); - add_next_index_long(match_pair, offset); + ZVAL_STRINGL(&tmp, str, len); + zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp); + ZVAL_LONG(&tmp, offset); + zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp); if (name) { - zval_add_ref(&match_pair); - zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL); + Z_ADDREF(match_pair); + zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair); } - zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL); + zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair); } /* }}} */ static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */ { /* parameters */ - char *regex; /* Regular expression */ - char *subject; /* String to match against */ - int regex_len; - int subject_len; + zend_string *regex; /* Regular expression */ + zend_string *subject; /* String to match against */ pcre_cache_entry *pce; /* Compiled regular expression */ zval *subpats = NULL; /* Array for subpatterns */ - long flags = 0; /* Match control flags */ - long start_offset = 0; /* Where the new search starts */ + zend_long flags = 0; /* Match control flags */ + zend_long start_offset = 0; /* Where the new search starts */ - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|zll", ®ex, ®ex_len, - &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) { - RETURN_FALSE; + ZEND_PARSE_PARAMETERS_START(2, 5) + Z_PARAM_STR(regex) + Z_PARAM_STR(subject) + Z_PARAM_OPTIONAL + Z_PARAM_ZVAL_EX(subpats, 0, 1) + Z_PARAM_LONG(flags) + Z_PARAM_LONG(start_offset) + ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE); + + if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) { + php_error_docref(NULL, E_WARNING, "Subject is too long"); + RETURN_FALSE; } /* Compile regex or get it from cache. */ - if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { + if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) { RETURN_FALSE; } pce->refcount++; - php_pcre_match_impl(pce, subject, subject_len, return_value, subpats, - global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC); + php_pcre_match_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, subpats, + global, ZEND_NUM_ARGS() >= 4, flags, start_offset); pce->refcount--; } /* }}} */ /* {{{ php_pcre_match_impl() */ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, - zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC) + zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset) { - zval *result_set, /* Holds a set of subpatterns after + zval result_set, /* Holds a set of subpatterns after a global match */ - **match_sets = NULL; /* An array of sets of matches for each + *match_sets = NULL; /* An array of sets of matches for each subpattern after a global match */ pcre_extra *extra = pce->extra;/* Holds results of studying */ pcre_extra extra_data; /* Used locally for exec options */ @@ -603,15 +715,18 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec int g_notempty = 0; /* If the match should not be empty */ const char **stringlist; /* Holds list of subpatterns */ char **subpat_names; /* Array for named subpatterns */ - int i, rc; + int i; int subpats_order; /* Order of subpattern matches */ int offset_capture; /* Capture match offsets: yes/no */ unsigned char *mark = NULL; /* Target for MARK name */ - zval *marks = NULL; /* Array of marks for PREG_PATTERN_ORDER */ + zval marks; /* Array of marks for PREG_PATTERN_ORDER */ + ALLOCA_FLAG(use_heap); + + ZVAL_UNDEF(&marks); /* Overwrite the passed-in value for subpatterns with an empty array. */ if (subpats != NULL) { - zval_dtor(subpats); + zval_ptr_dtor(subpats); array_init(subpats); } @@ -629,7 +744,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) || (!global && subpats_order != 0)) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified"); + php_error_docref(NULL, E_WARNING, "Invalid flags specified"); return; } } else { @@ -648,41 +763,40 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; extra = &extra_data; } - extra->match_limit = PCRE_G(backtrack_limit); - extra->match_limit_recursion = PCRE_G(recursion_limit); + extra->match_limit = (unsigned long)PCRE_G(backtrack_limit); + extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit); #ifdef PCRE_EXTRA_MARK extra->mark = &mark; extra->flags |= PCRE_EXTRA_MARK; #endif /* Calculate the size of the offsets array, and allocate memory for it. */ - rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); - if (rc < 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); - RETURN_FALSE; - } - num_subpats++; + num_subpats = pce->capture_count + 1; size_offsets = num_subpats * 3; /* - * Build a mapping from subpattern numbers to their names. We will always - * allocate the table, even though there may be no named subpatterns. This - * avoids somewhat more complicated logic in the inner loops. + * Build a mapping from subpattern numbers to their names. We will + * allocate the table only if there are any named subpatterns. */ - subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC); - if (!subpat_names) { - RETURN_FALSE; + subpat_names = NULL; + if (pce->name_count > 0) { + subpat_names = make_subpats_table(num_subpats, pce); + if (!subpat_names) { + RETURN_FALSE; + } } - offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); + if (size_offsets <= 32) { + offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap); + } else { + offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); + } memset(offsets, 0, size_offsets*sizeof(int)); /* Allocate match sets array and initialize the values. */ if (global && subpats && subpats_order == PREG_PATTERN_ORDER) { - match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0); + match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0); for (i=0; i<num_subpats; i++) { - ALLOC_ZVAL(match_sets[i]); - array_init(match_sets[i]); - INIT_PZVAL(match_sets[i]); + array_init(&match_sets[i]); } } @@ -691,7 +805,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec do { /* Execute the regular expression. */ - count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, + count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset, exoptions|g_notempty, offsets, size_offsets); /* the string was already proved to be valid UTF-8 */ @@ -699,7 +813,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec /* Check for too many substrings condition. */ if (count == 0) { - php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings"); + php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings"); count = size_offsets/3; } @@ -711,32 +825,39 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec if (subpats != NULL) { /* Try to get the list of substrings and display a warning if failed. */ if ((offsets[1] - offsets[0] < 0) || pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) { - efree(subpat_names); - efree(offsets); + if (subpat_names) { + efree(subpat_names); + } + if (size_offsets <= 32) { + free_alloca(offsets, use_heap); + } else { + efree(offsets); + } if (match_sets) efree(match_sets); - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed"); + php_error_docref(NULL, E_WARNING, "Get subpatterns list failed"); RETURN_FALSE; } if (global) { /* global pattern matching */ if (subpats && subpats_order == PREG_PATTERN_ORDER) { /* For each subpattern, insert it into the appropriate array. */ - for (i = 0; i < count; i++) { - if (offset_capture) { - add_offset_pair(match_sets[i], (char *)stringlist[i], + if (offset_capture) { + for (i = 0; i < count; i++) { + add_offset_pair(&match_sets[i], (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL); - } else { - add_next_index_stringl(match_sets[i], (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], 1); + } + } else { + for (i = 0; i < count; i++) { + add_next_index_stringl(&match_sets[i], (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1]); } } /* Add MARK, if available */ if (mark) { - if (!marks) { - MAKE_STD_ZVAL(marks); - array_init(marks); + if (Z_TYPE(marks) == IS_UNDEF) { + array_init(&marks); } - add_index_string(marks, matched - 1, (char *) mark, 1); + add_index_string(&marks, matched - 1, (char *) mark); } /* * If the number of captured subpatterns on this run is @@ -745,55 +866,86 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec */ if (count < num_subpats) { for (; i < num_subpats; i++) { - add_next_index_string(match_sets[i], "", 1); + add_next_index_string(&match_sets[i], ""); } } } else { /* Allocate the result set array */ - ALLOC_ZVAL(result_set); - array_init(result_set); - INIT_PZVAL(result_set); + array_init_size(&result_set, count + (mark ? 1 : 0)); /* Add all the subpatterns to it */ - for (i = 0; i < count; i++) { + if (subpat_names) { if (offset_capture) { - add_offset_pair(result_set, (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]); + for (i = 0; i < count; i++) { + add_offset_pair(&result_set, (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]); + } } else { - if (subpat_names[i]) { - add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], 1); + for (i = 0; i < count; i++) { + if (subpat_names[i]) { + add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1]); + } + add_next_index_stringl(&result_set, (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1]); + } + } + } else { + if (offset_capture) { + for (i = 0; i < count; i++) { + add_offset_pair(&result_set, (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL); + } + } else { + for (i = 0; i < count; i++) { + add_next_index_stringl(&result_set, (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1]); } - add_next_index_stringl(result_set, (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], 1); } } /* Add MARK, if available */ if (mark) { - add_assoc_string(result_set, "MARK", (char *) mark, 1); + add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark); } /* And add it to the output array */ - zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL); + zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set); } } else { /* single pattern matching */ /* For each subpattern, insert it into the subpatterns array. */ - for (i = 0; i < count; i++) { + if (subpat_names) { + if (offset_capture) { + for (i = 0; i < count; i++) { + add_offset_pair(subpats, (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1], + offsets[i<<1], subpat_names[i]); + } + } else { + for (i = 0; i < count; i++) { + if (subpat_names[i]) { + add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1]); + } + add_next_index_stringl(subpats, (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1]); + } + } + } else { if (offset_capture) { - add_offset_pair(subpats, (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], - offsets[i<<1], subpat_names[i]); + for (i = 0; i < count; i++) { + add_offset_pair(subpats, (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1], + offsets[i<<1], NULL); + } } else { - if (subpat_names[i]) { - add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], 1); + for (i = 0; i < count; i++) { + add_next_index_stringl(subpats, (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1]); } - add_next_index_stringl(subpats, (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], 1); } } /* Add MARK, if available */ if (mark) { - add_assoc_string(subpats, "MARK", (char *) mark, 1); + add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark); } } @@ -807,12 +959,12 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec if (g_notempty != 0 && start_offset < subject_len) { int unit_len = calculate_unit_length(pce, subject + start_offset); - offsets[0] = start_offset; - offsets[1] = start_offset + unit_len; + offsets[0] = (int)start_offset; + offsets[1] = (int)(start_offset + unit_len); } else break; } else { - pcre_handle_exec_error(count TSRMLS_CC); + pcre_handle_exec_error(count); break; } @@ -828,23 +980,35 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec /* Add the match sets to the output array and clean up */ if (global && subpats && subpats_order == PREG_PATTERN_ORDER) { - for (i = 0; i < num_subpats; i++) { - if (subpat_names[i]) { - zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], - strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL); - Z_ADDREF_P(match_sets[i]); + if (subpat_names) { + for (i = 0; i < num_subpats; i++) { + if (subpat_names[i]) { + zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i], + strlen(subpat_names[i]), &match_sets[i]); + Z_ADDREF(match_sets[i]); + } + zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]); + } + } else { + for (i = 0; i < num_subpats; i++) { + zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]); } - zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL); } efree(match_sets); - if (marks) { - add_assoc_zval(subpats, "MARK", marks); + if (Z_TYPE(marks) != IS_UNDEF) { + add_assoc_zval(subpats, "MARK", &marks); } } - efree(offsets); - efree(subpat_names); + if (size_offsets <= 32) { + free_alloca(offsets, use_heap); + } else { + efree(offsets); + } + if (subpat_names) { + efree(subpat_names); + } /* Did we encounter an error? */ if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) { @@ -912,151 +1076,65 @@ static int preg_get_backref(char **str, int *backref) /* {{{ preg_do_repl_func */ -static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark, char **result TSRMLS_DC) +static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark) { - zval *retval_ptr; /* Function return value */ - zval **args[1]; /* Argument to pass to function */ - zval *subpats; /* Captured subpatterns */ - int result_len; /* Return value length */ + zend_string *result_str; + zval retval; /* Function return value */ + zval args[1]; /* Argument to pass to function */ int i; - MAKE_STD_ZVAL(subpats); - array_init(subpats); - for (i = 0; i < count; i++) { - if (subpat_names[i]) { - add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1); + array_init_size(&args[0], count + (mark ? 1 : 0)); + if (subpat_names) { + for (i = 0; i < count; i++) { + if (subpat_names[i]) { + add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]); + } + add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]); + } + } else { + for (i = 0; i < count; i++) { + add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]); } - add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1); } if (mark) { - add_assoc_string(subpats, "MARK", (char *) mark, 1); + add_assoc_string(&args[0], "MARK", (char *) mark); } - args[0] = &subpats; - if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) { - convert_to_string_ex(&retval_ptr); - *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr)); - result_len = Z_STRLEN_P(retval_ptr); - zval_ptr_dtor(&retval_ptr); + if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) { + result_str = zval_get_string(&retval); + zval_ptr_dtor(&retval); } else { if (!EG(exception)) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function"); + php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function"); } - result_len = offsets[1] - offsets[0]; - *result = estrndup(&subject[offsets[0]], result_len); - } - - zval_ptr_dtor(&subpats); - - return result_len; -} -/* }}} */ -/* {{{ preg_do_eval - */ -static int preg_do_eval(char *eval_str, int eval_str_len, char *subject, - int *offsets, int count, char **result TSRMLS_DC) -{ - zval retval; /* Return value from evaluation */ - char *eval_str_end, /* End of eval string */ - *match, /* Current match for a backref */ - *esc_match, /* Quote-escaped match */ - *walk, /* Used to walk the code string */ - *segment, /* Start of segment to append while walking */ - walk_last; /* Last walked character */ - int match_len; /* Length of the match */ - int esc_match_len; /* Length of the quote-escaped match */ - int result_len; /* Length of the result of the evaluation */ - int backref; /* Current backref */ - char *compiled_string_description; - smart_str code = {0}; - - eval_str_end = eval_str + eval_str_len; - walk = segment = eval_str; - walk_last = 0; - - while (walk < eval_str_end) { - /* If found a backreference.. */ - if ('\\' == *walk || '$' == *walk) { - smart_str_appendl(&code, segment, walk - segment); - if (walk_last == '\\') { - code.c[code.len-1] = *walk++; - segment = walk; - walk_last = 0; - continue; - } - segment = walk; - if (preg_get_backref(&walk, &backref)) { - if (backref < count) { - /* Find the corresponding string match and substitute it - in instead of the backref */ - match = subject + offsets[backref<<1]; - match_len = offsets[(backref<<1)+1] - offsets[backref<<1]; - if (match_len) { - esc_match = php_addslashes(match, match_len, &esc_match_len, 0 TSRMLS_CC); - } else { - esc_match = match; - esc_match_len = 0; - } - } else { - esc_match = ""; - esc_match_len = 0; - } - smart_str_appendl(&code, esc_match, esc_match_len); - - segment = walk; - - /* Clean up and reassign */ - if (esc_match_len) - efree(esc_match); - continue; - } - } - walk++; - walk_last = walk[-1]; - } - smart_str_appendl(&code, segment, walk - segment); - smart_str_0(&code); - - compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC); - /* Run the code */ - if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) { - efree(compiled_string_description); - php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c); - /* zend_error() does not return in this case */ + result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0); } - efree(compiled_string_description); - convert_to_string(&retval); - - /* Save the return value and its length */ - *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval)); - result_len = Z_STRLEN(retval); - /* Clean up */ - zval_dtor(&retval); - smart_str_free(&code); + zval_ptr_dtor(&args[0]); - return result_len; + return result_str; } /* }}} */ /* {{{ php_pcre_replace */ -PHPAPI char *php_pcre_replace(char *regex, int regex_len, +PHPAPI zend_string *php_pcre_replace(zend_string *regex, + zend_string *subject_str, char *subject, int subject_len, zval *replace_val, int is_callable_replace, - int *result_len, int limit, int *replace_count TSRMLS_DC) + int limit, int *replace_count) { pcre_cache_entry *pce; /* Compiled regular expression */ - char *result; /* Function result */ + zend_string *result; /* Function result */ /* Compile regex or get it from cache. */ - if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { + if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) { return NULL; } pce->refcount++; - result = php_pcre_replace_impl(pce, subject, subject_len, replace_val, - is_callable_replace, result_len, limit, replace_count TSRMLS_CC); + result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_val, + is_callable_replace, limit, replace_count); pce->refcount--; return result; @@ -1064,8 +1142,7 @@ PHPAPI char *php_pcre_replace(char *regex, int regex_len, /* }}} */ /* {{{ php_pcre_replace_impl() */ -PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val, - int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC) +PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int limit, int *replace_count) { pcre_extra *extra = pce->extra;/* Holds results of studying */ pcre_extra extra_data; /* Used locally for exec options */ @@ -1077,85 +1154,82 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub int size_offsets; /* Size of the offsets array */ size_t new_len; /* Length of needed storage */ size_t alloc_len; /* Actual allocated length */ - int eval_result_len=0; /* Length of the eval'ed or - function-returned string */ int match_len; /* Length of the current match */ int backref; /* Backreference number */ - int eval; /* If the replacement string should be eval'ed */ int start_offset; /* Where the new search starts */ int g_notempty=0; /* If the match should not be empty */ - int replace_len=0; /* Length of replacement string */ - char *result, /* Result of replacement */ - *replace=NULL, /* Replacement string */ - *new_buf, /* Temporary buffer for re-allocation */ + char *replace=NULL, /* Replacement string */ *walkbuf, /* Location of current replacement in the result */ *walk, /* Used to walk the replacement string */ *match, /* The current match */ *piece, /* The current piece of subject */ *replace_end=NULL, /* End of replacement string */ - *eval_result, /* Result of eval or custom function */ walk_last; /* Last walked character */ - int rc; + size_t result_len; /* Length of result */ unsigned char *mark = NULL; /* Target for MARK name */ + zend_string *result; /* Result of replacement */ + zend_string *eval_result=NULL; /* Result of custom function */ + + ALLOCA_FLAG(use_heap); if (extra == NULL) { extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; extra = &extra_data; } - extra->match_limit = PCRE_G(backtrack_limit); - extra->match_limit_recursion = PCRE_G(recursion_limit); -#ifdef PCRE_EXTRA_MARK - extra->mark = &mark; - extra->flags |= PCRE_EXTRA_MARK; -#endif - eval = pce->preg_options & PREG_REPLACE_EVAL; - if (is_callable_replace) { - if (eval) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback"); - return NULL; - } - } else { - replace = Z_STRVAL_P(replace_val); - replace_len = Z_STRLEN_P(replace_val); - replace_end = replace + replace_len; + extra->match_limit = (unsigned long)PCRE_G(backtrack_limit); + extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit); + + if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) { + php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead"); + return NULL; } - if (eval) { - php_error_docref(NULL TSRMLS_CC, E_DEPRECATED, "The /e modifier is deprecated, use preg_replace_callback instead"); + if (!is_callable_replace) { + replace = Z_STRVAL_P(replace_val); + replace_end = replace + Z_STRLEN_P(replace_val); } /* Calculate the size of the offsets array, and allocate memory for it. */ - rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); - if (rc < 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); - return NULL; - } - num_subpats++; + num_subpats = pce->capture_count + 1; size_offsets = num_subpats * 3; + if (size_offsets <= 32) { + offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap); + } else { + offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); + } /* - * Build a mapping from subpattern numbers to their names. We will always - * allocate the table, even though there may be no named subpatterns. This - * avoids somewhat more complicated logic in the inner loops. + * Build a mapping from subpattern numbers to their names. We will + * allocate the table only if there are any named subpatterns. */ - subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC); - if (!subpat_names) { - return NULL; + subpat_names = NULL; + if (UNEXPECTED(pce->name_count > 0)) { + subpat_names = make_subpats_table(num_subpats, pce); + if (!subpat_names) { + if (size_offsets <= 32) { + free_alloca(offsets, use_heap); + } else { + efree(offsets); + } + return NULL; + } } - offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); - - result = safe_emalloc(subject_len, 2*sizeof(char), 1); - alloc_len = 2 * (size_t)subject_len + 1; + alloc_len = 0; + result = NULL; /* Initialize */ match = NULL; - *result_len = 0; start_offset = 0; + result_len = 0; PCRE_G(error_code) = PHP_PCRE_NO_ERROR; while (1) { +#ifdef PCRE_EXTRA_MARK + extra->mark = &mark; + extra->flags |= PCRE_EXTRA_MARK; +#endif /* Execute the regular expression. */ count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, exoptions|g_notempty, offsets, size_offsets); @@ -1164,34 +1238,30 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub exoptions |= PCRE_NO_UTF8_CHECK; /* Check for too many substrings condition. */ - if (count == 0) { - php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings"); - count = size_offsets/3; + if (UNEXPECTED(count == 0)) { + php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings"); + count = size_offsets / 3; } piece = subject + start_offset; - if (count > 0 && (offsets[1] - offsets[0] >= 0) && (limit == -1 || limit > 0)) { - if (replace_count) { + /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */ + if (EXPECTED(count > 0 && (offsets[1] - offsets[0] >= 0) && limit)) { + if (UNEXPECTED(replace_count)) { ++*replace_count; } + /* Set the match location in subject */ match = subject + offsets[0]; - new_len = *result_len + offsets[0] - start_offset; /* part before the match */ + new_len = result_len + offsets[0] - start_offset; /* part before the match */ - /* If evaluating, do it and add the return string's length */ - if (eval) { - eval_result_len = preg_do_eval(replace, replace_len, subject, - offsets, count, &eval_result TSRMLS_CC); - new_len += eval_result_len; - } else if (is_callable_replace) { - /* Use custom function to get replacement string and its length. */ - eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark, &eval_result TSRMLS_CC); - new_len += eval_result_len; - } else { /* do regular substitution */ + /* if (!is_callable_replace) */ + if (EXPECTED(replace)) { + /* do regular substitution */ walk = replace; walk_last = 0; + while (walk < replace_end) { if ('\\' == *walk || '$' == *walk) { if (walk_last == '\\') { @@ -1209,29 +1279,23 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub walk++; walk_last = walk[-1]; } - } - if (new_len + 1 > alloc_len) { - new_buf = safe_emalloc(2, new_len + 1, alloc_len); - alloc_len = 1 + alloc_len + 2 * (size_t)new_len; - memcpy(new_buf, result, *result_len); - efree(result); - result = new_buf; - } - /* copy the part of the string before the match */ - memcpy(&result[*result_len], piece, match-piece); - *result_len += match-piece; - - /* copy replacement and backrefs */ - walkbuf = result + *result_len; - - /* If evaluating or using custom function, copy result to the buffer - * and clean up. */ - if (eval || is_callable_replace) { - memcpy(walkbuf, eval_result, eval_result_len); - *result_len += eval_result_len; - STR_FREE(eval_result); - } else { /* do regular backreference copying */ + if (new_len >= alloc_len) { + alloc_len = zend_safe_address_guarded(2, new_len, alloc_len); + if (result == NULL) { + result = zend_string_alloc(alloc_len, 0); + } else { + result = zend_string_extend(result, alloc_len, 0); + } + } + + /* copy the part of the string before the match */ + memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece); + result_len += (match-piece); + + /* copy replacement and backrefs */ + walkbuf = ZSTR_VAL(result) + result_len; + walk = replace; walk_last = 0; while (walk < replace_end) { @@ -1255,13 +1319,37 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub } *walkbuf = '\0'; /* increment the result length by how much we've added to the string */ - *result_len += walkbuf - (result + *result_len); + result_len += (walkbuf - (ZSTR_VAL(result) + result_len)); + } else { + /* Use custom function to get replacement string and its length. */ + eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark); + ZEND_ASSERT(eval_result); + new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len); + if (new_len >= alloc_len) { + alloc_len = zend_safe_address_guarded(2, new_len, alloc_len); + if (result == NULL) { + result = zend_string_alloc(alloc_len, 0); + } else { + result = zend_string_extend(result, alloc_len, 0); + } + } + /* copy the part of the string before the match */ + memcpy(ZSTR_VAL(result) + result_len, piece, match-piece); + result_len += (int)(match-piece); + + /* copy replacement and backrefs */ + walkbuf = ZSTR_VAL(result) + result_len; + + /* If using custom function, copy result to the buffer and clean up. */ + memcpy(walkbuf, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result)); + result_len += (int)ZSTR_LEN(eval_result); + zend_string_release(eval_result); } - if (limit != -1) + if (EXPECTED(limit)) { limit--; - - } else if (count == PCRE_ERROR_NOMATCH || limit == 0) { + } + } else if (count == PCRE_ERROR_NOMATCH || UNEXPECTED(limit == 0)) { /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match, this is not necessarily the end. We need to advance the start offset, and continue. Fudge the offset values @@ -1271,27 +1359,35 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub offsets[0] = start_offset; offsets[1] = start_offset + unit_len; - memcpy(&result[*result_len], piece, unit_len); - *result_len += unit_len; + memcpy(ZSTR_VAL(result) + result_len, piece, unit_len); + result_len += unit_len; } else { - new_len = *result_len + subject_len - start_offset; - if (new_len + 1 > alloc_len) { - new_buf = safe_emalloc(new_len, sizeof(char), 1); - alloc_len = (size_t)new_len + 1; /* now we know exactly how long it is */ - memcpy(new_buf, result, *result_len); - efree(result); - result = new_buf; + if (!result && subject_str) { + result = zend_string_copy(subject_str); + break; + } + new_len = result_len + subject_len - start_offset; + if (new_len >= alloc_len) { + alloc_len = new_len; /* now we know exactly how long it is */ + if (NULL != result) { + result = zend_string_realloc(result, alloc_len, 0); + } else { + result = zend_string_alloc(alloc_len, 0); + } } /* stick that last bit of string on our output */ - memcpy(&result[*result_len], piece, subject_len - start_offset); - *result_len += subject_len - start_offset; - result[*result_len] = '\0'; + memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset); + result_len += subject_len - start_offset; + ZSTR_VAL(result)[result_len] = '\0'; + ZSTR_LEN(result) = result_len; break; } } else { - pcre_handle_exec_error(count TSRMLS_CC); - efree(result); - result = NULL; + pcre_handle_exec_error(count); + if (result) { + zend_string_free(result); + result = NULL; + } break; } @@ -1305,13 +1401,13 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub start_offset = offsets[1]; } - efree(offsets); - efree(subpat_names); - - if(result && (size_t)(*result_len) > INT_MAX) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Result is too big, max is %d", INT_MAX); - efree(result); - result = NULL; + if (size_offsets <= 32) { + free_alloca(offsets, use_heap); + } else { + efree(offsets); + } + if (UNEXPECTED(subpat_names)) { + efree(subpat_names); } return result; @@ -1320,87 +1416,91 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub /* {{{ php_replace_in_subject */ -static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC) +static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count) { - zval **regex_entry, - **replace_entry = NULL, - *replace_value, - empty_replace; - char *subject_value, - *result; - int subject_len; - - /* Make sure we're dealing with strings. */ - convert_to_string_ex(subject); - /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */ - ZVAL_STRINGL(&empty_replace, "", 0, 0); + zval *regex_entry, + *replace_value, + empty_replace; + zend_string *result; + uint32_t replace_idx; + zend_string *subject_str = zval_get_string(subject); + + /* FIXME: This might need to be changed to ZSTR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */ + ZVAL_EMPTY_STRING(&empty_replace); + + if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str))) { + php_error_docref(NULL, E_WARNING, "Subject is too long"); + return NULL; + } /* If regex is an array */ if (Z_TYPE_P(regex) == IS_ARRAY) { - /* Duplicate subject string for repeated replacement */ - subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject)); - subject_len = Z_STRLEN_PP(subject); - *result_len = subject_len; - - zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex)); - replace_value = replace; - if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) - zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace)); + replace_idx = 0; /* For each entry in the regex array, get the entry */ - while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) { + ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) { + zval replace_str; /* Make sure we're dealing with strings. */ - convert_to_string_ex(regex_entry); + zend_string *regex_str = zval_get_string(regex_entry); + ZVAL_UNDEF(&replace_str); /* If replace is an array and not a callable construct */ if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) { /* Get current entry */ - if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) { + while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) { + if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNDEF) { + ZVAL_COPY(&replace_str, &Z_ARRVAL_P(replace)->arData[replace_idx].val); + break; + } + replace_idx++; + } + if (!Z_ISUNDEF(replace_str)) { if (!is_callable_replace) { - convert_to_string_ex(replace_entry); + convert_to_string(&replace_str); } - replace_value = *replace_entry; - zend_hash_move_forward(Z_ARRVAL_P(replace)); + replace_value = &replace_str; + replace_idx++; } else { /* We've run out of replacement strings, so use an empty one */ replace_value = &empty_replace; } } - /* Do the actual replacement and put the result back into subject_value + /* Do the actual replacement and put the result back into subject_str for further replacements. */ - if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry), - Z_STRLEN_PP(regex_entry), - subject_value, - subject_len, + if ((result = php_pcre_replace(regex_str, + subject_str, + ZSTR_VAL(subject_str), + (int)ZSTR_LEN(subject_str), replace_value, is_callable_replace, - result_len, limit, - replace_count TSRMLS_CC)) != NULL) { - efree(subject_value); - subject_value = result; - subject_len = *result_len; + replace_count)) != NULL) { + zend_string_release(subject_str); + subject_str = result; } else { - efree(subject_value); + zend_string_release(subject_str); + zend_string_release(regex_str); + zval_dtor(&replace_str); return NULL; } - zend_hash_move_forward(Z_ARRVAL_P(regex)); - } + zend_string_release(regex_str); + zval_dtor(&replace_str); + } ZEND_HASH_FOREACH_END(); - return subject_value; + return subject_str; } else { - result = php_pcre_replace(Z_STRVAL_P(regex), - Z_STRLEN_P(regex), - Z_STRVAL_PP(subject), - Z_STRLEN_PP(subject), + result = php_pcre_replace(Z_STR_P(regex), + subject_str, + ZSTR_VAL(subject_str), + (int)ZSTR_LEN(subject_str), replace, is_callable_replace, - result_len, limit, - replace_count TSRMLS_CC); + replace_count); + zend_string_release(subject_str); return result; } } @@ -1408,102 +1508,62 @@ static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, /* {{{ preg_replace_impl */ -static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter) +static int preg_replace_impl(zval *return_value, zval *regex, zval *replace, zval *subject, zend_long limit_val, int is_callable_replace, int is_filter) { - zval **regex, - **replace, - **subject, - **subject_entry, - **zcount = NULL; - char *result; - int result_len; - int limit_val = -1; - long limit = -1; - char *string_key; - uint string_key_len; - ulong num_key; - char *callback_name; - int replace_count=0, old_replace_count; + zval *subject_entry; + zend_string *result; + zend_string *string_key; + zend_ulong num_key; + int replace_count = 0, old_replace_count; - /* Get function parameters and do error-checking. */ - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", ®ex, &replace, &subject, &limit, &zcount) == FAILURE) { - return; - } - - if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array"); - RETURN_FALSE; - } - - SEPARATE_ZVAL(replace); - if (Z_TYPE_PP(replace) != IS_ARRAY && (Z_TYPE_PP(replace) != IS_OBJECT || !is_callable_replace)) { + if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) { convert_to_string_ex(replace); } - if (is_callable_replace) { - if (!zend_is_callable(*replace, 0, &callback_name TSRMLS_CC)) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name); - efree(callback_name); - MAKE_COPY_ZVAL(subject, return_value); - return; - } - efree(callback_name); - } - SEPARATE_ZVAL(regex); - SEPARATE_ZVAL(subject); - - if (ZEND_NUM_ARGS() > 3) { - limit_val = limit; - } - - if (Z_TYPE_PP(regex) != IS_ARRAY) + if (Z_TYPE_P(regex) != IS_ARRAY) { convert_to_string_ex(regex); + } /* if subject is an array */ - if (Z_TYPE_PP(subject) == IS_ARRAY) { - array_init(return_value); - zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject)); + if (Z_TYPE_P(subject) == IS_ARRAY) { + array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject))); /* For each subject entry, convert it to string, then perform replacement and add the result to the return_value array. */ - while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) { - SEPARATE_ZVAL(subject_entry); + ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) { old_replace_count = replace_count; - if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) { + if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) { if (!is_filter || replace_count > old_replace_count) { /* Add to return array */ - switch(zend_hash_get_current_key_ex(Z_ARRVAL_PP(subject), &string_key, &string_key_len, &num_key, 0, NULL)) - { - case HASH_KEY_IS_STRING: - add_assoc_stringl_ex(return_value, string_key, string_key_len, result, result_len, 0); - break; + zval zv; - case HASH_KEY_IS_LONG: - add_index_stringl(return_value, num_key, result, result_len, 0); - break; + ZVAL_STR(&zv, result); + if (string_key) { + zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv); + } else { + zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv); } } else { - efree(result); + zend_string_release(result); } } - - zend_hash_move_forward(Z_ARRVAL_PP(subject)); - } - } else { /* if subject is not an array */ + } ZEND_HASH_FOREACH_END(); + } else { + /* if subject is not an array */ old_replace_count = replace_count; - if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) { + if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) { if (!is_filter || replace_count > old_replace_count) { - RETVAL_STRINGL(result, result_len, 0); + RETVAL_STR(result); } else { - efree(result); + zend_string_release(result); + RETVAL_NULL(); } + } else { + RETVAL_NULL(); } } - if (ZEND_NUM_ARGS() > 4) { - zval_dtor(*zcount); - ZVAL_LONG(*zcount, replace_count); - } + return replace_count; } /* }}} */ @@ -1511,7 +1571,30 @@ static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_repl Perform Perl-style regular expression replacement. */ static PHP_FUNCTION(preg_replace) { - preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0); + zval *regex, *replace, *subject, *zcount = NULL; + zend_long limit = -1; + int replace_count; + + /* Get function parameters and do error-checking. */ + ZEND_PARSE_PARAMETERS_START(3, 5) + Z_PARAM_ZVAL(regex) + Z_PARAM_ZVAL(replace) + Z_PARAM_ZVAL(subject) + Z_PARAM_OPTIONAL + Z_PARAM_LONG(limit) + Z_PARAM_ZVAL_EX(zcount, 0, 1) + ZEND_PARSE_PARAMETERS_END(); + + if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) { + php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array"); + RETURN_FALSE; + } + + replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 0); + if (zcount) { + zval_ptr_dtor(zcount); + ZVAL_LONG(zcount, replace_count); + } } /* }}} */ @@ -1519,7 +1602,96 @@ static PHP_FUNCTION(preg_replace) Perform Perl-style regular expression replacement using replacement callback. */ static PHP_FUNCTION(preg_replace_callback) { - preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0); + zval *regex, *replace, *subject, *zcount = NULL; + zend_long limit = -1; + zend_string *callback_name; + int replace_count; + + /* Get function parameters and do error-checking. */ + ZEND_PARSE_PARAMETERS_START(3, 5) + Z_PARAM_ZVAL(regex) + Z_PARAM_ZVAL(replace) + Z_PARAM_ZVAL(subject) + Z_PARAM_OPTIONAL + Z_PARAM_LONG(limit) + Z_PARAM_ZVAL_EX(zcount, 0, 1) + ZEND_PARSE_PARAMETERS_END(); + + if (!zend_is_callable(replace, 0, &callback_name)) { + php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name)); + zend_string_release(callback_name); + ZVAL_COPY(return_value, subject); + return; + } + zend_string_release(callback_name); + + replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 1, 0); + if (zcount) { + zval_ptr_dtor(zcount); + ZVAL_LONG(zcount, replace_count); + } +} +/* }}} */ + +/* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]]) + Perform Perl-style regular expression replacement using replacement callback. */ +static PHP_FUNCTION(preg_replace_callback_array) +{ + zval regex, zv, *replace, *subject, *pattern, *zcount = NULL; + zend_long limit = -1; + zend_string *str_idx; + zend_string *callback_name; + int replace_count = 0; + + /* Get function parameters and do error-checking. */ + ZEND_PARSE_PARAMETERS_START(2, 4) + Z_PARAM_ARRAY(pattern) + Z_PARAM_ZVAL(subject) + Z_PARAM_OPTIONAL + Z_PARAM_LONG(limit) + Z_PARAM_ZVAL_EX(zcount, 0, 1) + ZEND_PARSE_PARAMETERS_END(); + + ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) { + if (str_idx) { + ZVAL_STR_COPY(®ex, str_idx); + } else { + php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash"); + zval_ptr_dtor(return_value); + RETURN_NULL(); + } + + if (!zend_is_callable(replace, 0, &callback_name)) { + php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name)); + zend_string_release(callback_name); + zval_ptr_dtor(®ex); + zval_ptr_dtor(return_value); + ZVAL_COPY(return_value, subject); + return; + } + zend_string_release(callback_name); + + if (Z_ISNULL_P(return_value)) { + replace_count += preg_replace_impl(&zv, ®ex, replace, subject, limit, 1, 0); + } else { + replace_count += preg_replace_impl(&zv, ®ex, replace, return_value, limit, 1, 0); + zval_ptr_dtor(return_value); + } + + zval_ptr_dtor(®ex); + + ZVAL_COPY_VALUE(return_value, &zv); + + if (UNEXPECTED(EG(exception))) { + zval_ptr_dtor(return_value); + RETURN_NULL(); + } + } ZEND_HASH_FOREACH_END(); + + if (zcount) { + zval_ptr_dtor(zcount); + ZVAL_LONG(zcount, replace_count); + } } /* }}} */ @@ -1527,7 +1699,30 @@ static PHP_FUNCTION(preg_replace_callback) Perform Perl-style regular expression replacement and only return matches. */ static PHP_FUNCTION(preg_filter) { - preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1); + zval *regex, *replace, *subject, *zcount = NULL; + zend_long limit = -1; + int replace_count; + + /* Get function parameters and do error-checking. */ + ZEND_PARSE_PARAMETERS_START(3, 5) + Z_PARAM_ZVAL(regex) + Z_PARAM_ZVAL(replace) + Z_PARAM_ZVAL(subject) + Z_PARAM_OPTIONAL + Z_PARAM_LONG(limit) + Z_PARAM_ZVAL_EX(zcount, 0, 1) + ZEND_PARSE_PARAMETERS_END(); + + if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) { + php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array"); + RETURN_FALSE; + } + + replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 1); + if (zcount) { + zval_ptr_dtor(zcount); + ZVAL_LONG(zcount, replace_count); + } } /* }}} */ @@ -1535,27 +1730,33 @@ static PHP_FUNCTION(preg_filter) Split string into an array using a perl-style regular expression as a delimiter */ static PHP_FUNCTION(preg_split) { - char *regex; /* Regular expression */ - char *subject; /* String to match against */ - int regex_len; - int subject_len; - long limit_val = -1;/* Integer value of limit */ - long flags = 0; /* Match control flags */ + zend_string *regex; /* Regular expression */ + zend_string *subject; /* String to match against */ + zend_long limit_val = -1;/* Integer value of limit */ + zend_long flags = 0; /* Match control flags */ pcre_cache_entry *pce; /* Compiled regular expression */ /* Get function parameters and do error checking */ - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len, - &subject, &subject_len, &limit_val, &flags) == FAILURE) { - RETURN_FALSE; + ZEND_PARSE_PARAMETERS_START(2, 4) + Z_PARAM_STR(regex) + Z_PARAM_STR(subject) + Z_PARAM_OPTIONAL + Z_PARAM_LONG(limit_val) + Z_PARAM_LONG(flags) + ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE); + + if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) { + php_error_docref(NULL, E_WARNING, "Subject is too long"); + RETURN_FALSE; } /* Compile regex or get it from cache. */ - if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { + if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) { RETURN_FALSE; } pce->refcount++; - php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC); + php_pcre_split_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, (int)limit_val, flags); pce->refcount--; } /* }}} */ @@ -1563,11 +1764,9 @@ static PHP_FUNCTION(preg_split) /* {{{ php_pcre_split */ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, - long limit_val, long flags TSRMLS_DC) + zend_long limit_val, zend_long flags) { - pcre_extra *extra = NULL; /* Holds results of studying */ - pcre *re_bump = NULL; /* Regex instance for empty matches */ - pcre_extra *extra_bump = NULL; /* Almost dummy */ + pcre_extra *extra = pce->extra;/* Holds results of studying */ pcre_extra extra_data; /* Used locally for exec options */ int *offsets; /* Array of subpattern offsets */ int size_offsets; /* Size of the offsets array */ @@ -1577,10 +1776,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec int next_offset; /* End of the last delimiter match + 1 */ int g_notempty = 0; /* If the match should not be empty */ char *last_match; /* Location of last match */ - int rc; int no_empty; /* If NO_EMPTY flag is set */ int delim_capture; /* If delimiters should be captured */ int offset_capture; /* If offsets should be captured */ + zval tmp; + ALLOCA_FLAG(use_heap); no_empty = flags & PREG_SPLIT_NO_EMPTY; delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE; @@ -1594,8 +1794,8 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; extra = &extra_data; } - extra->match_limit = PCRE_G(backtrack_limit); - extra->match_limit_recursion = PCRE_G(recursion_limit); + extra->match_limit = (unsigned long)PCRE_G(backtrack_limit); + extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit); #ifdef PCRE_EXTRA_MARK extra->flags &= ~PCRE_EXTRA_MARK; #endif @@ -1604,13 +1804,12 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec array_init(return_value); /* Calculate the size of the offsets array, and allocate memory for it. */ - rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); - if (rc < 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); - RETURN_FALSE; + size_offsets = (pce->capture_count + 1) * 3; + if (size_offsets <= 32) { + offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap); + } else { + offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); } - size_offsets = (size_offsets + 1) * 3; - offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); /* Start at the beginning of the string */ start_offset = 0; @@ -1629,7 +1828,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec /* Check for too many substrings condition. */ if (count == 0) { - php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings"); + php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings"); count = size_offsets/3; } @@ -1639,11 +1838,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec if (offset_capture) { /* Add (match, offset) pair to the return value */ - add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL); + add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL); } else { /* Add the piece to the return value */ - add_next_index_stringl(return_value, last_match, - &subject[offsets[0]]-last_match, 1); + ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match); + zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); } /* One less left to do */ @@ -1663,9 +1862,8 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec if (offset_capture) { add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL); } else { - add_next_index_stringl(return_value, - &subject[offsets[i<<1]], - match_len, 1); + ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len); + zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); } } } @@ -1676,29 +1874,13 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ if (g_notempty != 0 && start_offset < subject_len) { - if (pce->compile_options & PCRE_UTF8) { - if (re_bump == NULL) { - int dummy; - - if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) { - RETURN_FALSE; - } - } - count = pcre_exec(re_bump, extra_bump, subject, - subject_len, start_offset, - exoptions, offsets, size_offsets); - if (count < 1) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error"); - RETURN_FALSE; - } - } else { - offsets[0] = start_offset; - offsets[1] = start_offset + 1; - } - } else + offsets[0] = start_offset; + offsets[1] = start_offset + calculate_unit_length(pce, subject + start_offset); + } else { break; + } } else { - pcre_handle_exec_error(count TSRMLS_CC); + pcre_handle_exec_error(count); break; } @@ -1713,7 +1895,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec } - start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */ + start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */ if (!no_empty || start_offset < subject_len) { @@ -1722,13 +1904,18 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL); } else { /* Add the last piece to the return value */ - add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1); + ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match); + zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); } } /* Clean up */ - efree(offsets); + if (size_offsets <= 32) { + free_alloca(offsets, use_heap); + } else { + efree(offsets); + } } /* }}} */ @@ -1736,23 +1923,24 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec Quote regular expression characters plus an optional character */ static PHP_FUNCTION(preg_quote) { - int in_str_len; + size_t in_str_len; char *in_str; /* Input string argument */ char *in_str_end; /* End of the input string */ - int delim_len = 0; + size_t delim_len = 0; char *delim = NULL; /* Additional delimiter argument */ - char *out_str, /* Output string with quoted characters */ - *p, /* Iterator for input string */ + zend_string *out_str; /* Output string with quoted characters */ + char *p, /* Iterator for input string */ *q, /* Iterator for output string */ delim_char=0, /* Delimiter character to be quoted */ c; /* Current character */ zend_bool quote_delim = 0; /* Whether to quote additional delim char */ /* Get the arguments and check for errors */ - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len, - &delim, &delim_len) == FAILURE) { - return; - } + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_STRING(in_str, in_str_len) + Z_PARAM_OPTIONAL + Z_PARAM_STRING(delim, delim_len) + ZEND_PARSE_PARAMETERS_END(); in_str_end = in_str + in_str_len; @@ -1768,10 +1956,10 @@ static PHP_FUNCTION(preg_quote) /* Allocate enough memory so that even if each character is quoted, we won't run out of room */ - out_str = safe_emalloc_string(4, in_str_len, 1); + out_str = zend_string_safe_alloc(4, in_str_len, 0, 0); /* Go through the string and quote necessary characters */ - for(p = in_str, q = out_str; p != in_str_end; p++) { + for (p = in_str, q = ZSTR_VAL(out_str); p != in_str_end; p++) { c = *p; switch(c) { case '.': @@ -1815,7 +2003,8 @@ static PHP_FUNCTION(preg_quote) *q = '\0'; /* Reallocate string and return it */ - RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0); + out_str = zend_string_truncate(out_str, q - ZSTR_VAL(out_str), 0); + RETURN_NEW_STR(out_str); } /* }}} */ @@ -1823,43 +2012,43 @@ static PHP_FUNCTION(preg_quote) Searches array and returns entries which match regex */ static PHP_FUNCTION(preg_grep) { - char *regex; /* Regular expression */ - int regex_len; + zend_string *regex; /* Regular expression */ zval *input; /* Input array */ - long flags = 0; /* Match control flags */ + zend_long flags = 0; /* Match control flags */ pcre_cache_entry *pce; /* Compiled regular expression */ /* Get arguments and do error checking */ - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len, - &input, &flags) == FAILURE) { - return; - } + ZEND_PARSE_PARAMETERS_START(2, 3) + Z_PARAM_STR(regex) + Z_PARAM_ARRAY(input) + Z_PARAM_OPTIONAL + Z_PARAM_LONG(flags) + ZEND_PARSE_PARAMETERS_END(); /* Compile regex or get it from cache. */ - if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { + if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) { RETURN_FALSE; } pce->refcount++; - php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC); + php_pcre_grep_impl(pce, input, return_value, flags); pce->refcount--; } /* }}} */ -PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */ +PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */ { - zval **entry; /* An entry in the input array */ + zval *entry; /* An entry in the input array */ pcre_extra *extra = pce->extra;/* Holds results of studying */ pcre_extra extra_data; /* Used locally for exec options */ int *offsets; /* Array of subpattern offsets */ int size_offsets; /* Size of the offsets array */ int count = 0; /* Count of matched subpatterns */ - char *string_key; - uint string_key_len; - ulong num_key; + zend_string *string_key; + zend_ulong num_key; zend_bool invert; /* Whether to return non-matching entries */ - int rc; + ALLOCA_FLAG(use_heap); invert = flags & PREG_GREP_INVERT ? 1 : 0; @@ -1867,20 +2056,19 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; extra = &extra_data; } - extra->match_limit = PCRE_G(backtrack_limit); - extra->match_limit_recursion = PCRE_G(recursion_limit); + extra->match_limit = (unsigned long)PCRE_G(backtrack_limit); + extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit); #ifdef PCRE_EXTRA_MARK extra->flags &= ~PCRE_EXTRA_MARK; #endif /* Calculate the size of the offsets array, and allocate memory for it. */ - rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); - if (rc < 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); - RETURN_FALSE; + size_offsets = (pce->capture_count + 1) * 3; + if (size_offsets <= 32) { + offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap); + } else { + offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); } - size_offsets = (size_offsets + 1) * 3; - offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); /* Initialize return array */ array_init(return_value); @@ -1888,58 +2076,47 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return PCRE_G(error_code) = PHP_PCRE_NO_ERROR; /* Go through the input array */ - zend_hash_internal_pointer_reset(Z_ARRVAL_P(input)); - while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) { - zval subject = **entry; - - if (Z_TYPE_PP(entry) != IS_STRING) { - zval_copy_ctor(&subject); - convert_to_string(&subject); - } + ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) { + zend_string *subject_str = zval_get_string(entry); /* Perform the match */ - count = pcre_exec(pce->re, extra, Z_STRVAL(subject), - Z_STRLEN(subject), 0, + count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str), + (int)ZSTR_LEN(subject_str), 0, 0, offsets, size_offsets); /* Check for too many substrings condition. */ if (count == 0) { - php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings"); + php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings"); count = size_offsets/3; } else if (count < 0 && count != PCRE_ERROR_NOMATCH) { - pcre_handle_exec_error(count TSRMLS_CC); + pcre_handle_exec_error(count); + zend_string_release(subject_str); break; } /* If the entry fits our requirements */ if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) { - - Z_ADDREF_PP(entry); + if (Z_REFCOUNTED_P(entry)) { + Z_ADDREF_P(entry); + } /* Add to return array */ - switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(input), &string_key, &string_key_len, &num_key, 0, NULL)) - { - case HASH_KEY_IS_STRING: - zend_hash_update(Z_ARRVAL_P(return_value), string_key, - string_key_len, entry, sizeof(zval *), NULL); - break; - - case HASH_KEY_IS_LONG: - zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry, - sizeof(zval *), NULL); - break; + if (string_key) { + zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry); + } else { + zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry); } } - if (Z_TYPE_PP(entry) != IS_STRING) { - zval_dtor(&subject); - } + zend_string_release(subject_str); + } ZEND_HASH_FOREACH_END(); - zend_hash_move_forward(Z_ARRVAL_P(input)); - } - zend_hash_internal_pointer_reset(Z_ARRVAL_P(input)); /* Clean up */ - efree(offsets); + if (size_offsets <= 32) { + free_alloca(offsets, use_heap); + } else { + efree(offsets); + } } /* }}} */ @@ -1947,9 +2124,8 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return Returns the error code of the last regexp execution. */ static PHP_FUNCTION(preg_last_error) { - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) { - return; - } + ZEND_PARSE_PARAMETERS_START(0, 0) + ZEND_PARSE_PARAMETERS_END(); RETURN_LONG(PCRE_G(error_code)); } @@ -1990,6 +2166,13 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3) ZEND_ARG_INFO(1, count) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2) + ZEND_ARG_INFO(0, pattern) + ZEND_ARG_INFO(0, subject) + ZEND_ARG_INFO(0, limit) + ZEND_ARG_INFO(1, count) +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2) ZEND_ARG_INFO(0, pattern) ZEND_ARG_INFO(0, subject) @@ -2013,15 +2196,16 @@ ZEND_END_ARG_INFO() /* }}} */ static const zend_function_entry pcre_functions[] = { - PHP_FE(preg_match, arginfo_preg_match) - PHP_FE(preg_match_all, arginfo_preg_match_all) - PHP_FE(preg_replace, arginfo_preg_replace) - PHP_FE(preg_replace_callback, arginfo_preg_replace_callback) - PHP_FE(preg_filter, arginfo_preg_replace) - PHP_FE(preg_split, arginfo_preg_split) - PHP_FE(preg_quote, arginfo_preg_quote) - PHP_FE(preg_grep, arginfo_preg_grep) - PHP_FE(preg_last_error, arginfo_preg_last_error) + PHP_FE(preg_match, arginfo_preg_match) + PHP_FE(preg_match_all, arginfo_preg_match_all) + PHP_FE(preg_replace, arginfo_preg_replace) + PHP_FE(preg_replace_callback, arginfo_preg_replace_callback) + PHP_FE(preg_replace_callback_array, arginfo_preg_replace_callback_array) + PHP_FE(preg_filter, arginfo_preg_replace) + PHP_FE(preg_split, arginfo_preg_split) + PHP_FE(preg_quote, arginfo_preg_quote) + PHP_FE(preg_grep, arginfo_preg_grep) + PHP_FE(preg_last_error, arginfo_preg_last_error) PHP_FE_END }; @@ -2031,10 +2215,14 @@ zend_module_entry pcre_module_entry = { pcre_functions, PHP_MINIT(pcre), PHP_MSHUTDOWN(pcre), +#ifdef HAVE_PCRE_JIT_SUPPORT + PHP_RINIT(pcre), +#else NULL, +#endif NULL, PHP_MINFO(pcre), - NO_VERSION_YET, + PHP_PCRE_VERSION, PHP_MODULE_GLOBALS(pcre), PHP_GINIT(pcre), PHP_GSHUTDOWN(pcre), diff --git a/ext/pcre/php_pcre.h b/ext/pcre/php_pcre.h index 59b759e4a2..5ce3cedcda 100644 --- a/ext/pcre/php_pcre.h +++ b/ext/pcre/php_pcre.h @@ -1,6 +1,6 @@ /* +----------------------------------------------------------------------+ - | PHP Version 5 | + | PHP Version 7 | +----------------------------------------------------------------------+ | Copyright (c) 1997-2016 The PHP Group | +----------------------------------------------------------------------+ @@ -15,7 +15,7 @@ | Author: Andrei Zmievski <andrei@php.net> | +----------------------------------------------------------------------+ */ - + /* $Id$ */ #ifndef PHP_PCRE_H @@ -33,51 +33,56 @@ #include <locale.h> #endif -PHPAPI char *php_pcre_replace(char *regex, int regex_len, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC); -PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *options TSRMLS_DC); -PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *coptions TSRMLS_DC); +PHPAPI zend_string *php_pcre_replace(zend_string *regex, zend_string *subject_str, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int limit, int *replace_count); +PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *options); +PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *coptions); extern zend_module_entry pcre_module_entry; #define pcre_module_ptr &pcre_module_entry +#include "php_version.h" +#define PHP_PCRE_VERSION PHP_VERSION + typedef struct { pcre *re; pcre_extra *extra; int preg_options; + int capture_count; + int name_count; #if HAVE_SETLOCALE - char *locale; + zend_string *locale; unsigned const char *tables; #endif int compile_options; int refcount; } pcre_cache_entry; -PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC); +PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex); PHPAPI void php_pcre_match_impl( pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, - zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC); + zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset); -PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, - int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC); +PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *return_value, + int is_callable_replace, int limit, int *replace_count); PHPAPI void php_pcre_split_impl( pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, - long limit_val, long flags TSRMLS_DC); + zend_long limit_val, zend_long flags); PHPAPI void php_pcre_grep_impl( pcre_cache_entry *pce, zval *input, zval *return_value, - long flags TSRMLS_DC); + zend_long flags); ZEND_BEGIN_MODULE_GLOBALS(pcre) HashTable pcre_cache; - long backtrack_limit; - long recursion_limit; + zend_long backtrack_limit; + zend_long recursion_limit; +#ifdef HAVE_PCRE_JIT_SUPPORT + zend_bool jit; +#endif int error_code; ZEND_END_MODULE_GLOBALS(pcre) -#ifdef ZTS -# define PCRE_G(v) TSRMG(pcre_globals_id, zend_pcre_globals *, v) -#else -# define PCRE_G(v) (pcre_globals.v) -#endif +PHPAPI ZEND_EXTERN_MODULE_GLOBALS(pcre) +#define PCRE_G(v) ZEND_MODULE_GLOBALS_ACCESSOR(pcre, v) #else diff --git a/ext/pcre/tests/002.phpt b/ext/pcre/tests/002.phpt index fd245633d3..42b63c7d1b 100644 --- a/ext/pcre/tests/002.phpt +++ b/ext/pcre/tests/002.phpt @@ -34,9 +34,5 @@ string(12) "a${1b${1c${1" Warning: preg_replace(): Compilation failed: missing terminating ] for character class at offset 8 in %s002.php on line 11 NULL -Deprecated: preg_replace(): The /e modifier is deprecated, use preg_replace_callback instead in %s on line 12 - -Parse error: %s in %s002.php(12) : regexp code on line 1 - -Fatal error: preg_replace(): Failed evaluating code: -for ($ in %s002.php on line 12 +Warning: preg_replace(): The /e modifier is no longer supported, use preg_replace_callback instead in %s on line 12 +NULL diff --git a/ext/pcre/tests/004.phpt b/ext/pcre/tests/004.phpt index 1fae406b16..11361d1b32 100644 --- a/ext/pcre/tests/004.phpt +++ b/ext/pcre/tests/004.phpt @@ -12,8 +12,6 @@ var_dump($m); var_dump(preg_match_all('/zend_parse_parameters(?:_ex\s*\([^,]+,[^,]+|\s*\([^,]+),\s*"([^"]*)"\s*,\s*([^{;]*)/S', 'zend_parse_parameters( 0, "addd|s/", a, b, &c);', $m, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)); var_dump($m); -var_dump(preg_replace(array('@//.*@S', '@/\*.*\*/@SsUe'), array('', 'preg_replace("/[^\r\n]+/S", "", \'$0\')'), "hello\n//x \n/*\ns\n*/")); - var_dump(preg_split('/PHP_(?:NAMED_)?(?:FUNCTION|METHOD)\s*\((\w+(?:,\s*\w+)?)\)/S', "PHP_FUNCTION(s, preg_match)\n{\nlalala", -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE)); ?> --EXPECTF-- @@ -117,13 +115,6 @@ array(1) { } } } - -Deprecated: preg_replace(): The /e modifier is deprecated, use preg_replace_callback instead in %s on line %d -string(9) "hello - - - -" array(3) { [0]=> array(2) { diff --git a/ext/pcre/tests/006.phpt b/ext/pcre/tests/006.phpt index 2d39b6ea38..a16b4fbe71 100644 --- a/ext/pcre/tests/006.phpt +++ b/ext/pcre/tests/006.phpt @@ -1,6 +1,7 @@ --TEST-- preg_replace() with array of failing regular expressions --INI-- +pcre.jit=0 pcre.backtrack_limit=100000 --FILE-- <?php diff --git a/ext/pcre/tests/backtrack_limit.phpt b/ext/pcre/tests/backtrack_limit.phpt index 419e6c2009..3f0d8e6446 100644 --- a/ext/pcre/tests/backtrack_limit.phpt +++ b/ext/pcre/tests/backtrack_limit.phpt @@ -8,6 +8,7 @@ if (@preg_match_all('/\p{N}/', '0123456789', $dummy) === false) { ?> --INI-- pcre.backtrack_limit=2 +pcre.jit=0 --FILE-- <?php diff --git a/ext/pcre/tests/bug21758.phpt b/ext/pcre/tests/bug21758.phpt index 78a1d6a747..db599196f2 100644 --- a/ext/pcre/tests/bug21758.phpt +++ b/ext/pcre/tests/bug21758.phpt @@ -3,7 +3,7 @@ Bug #21758 (preg_replace_callback() not working with class methods) --FILE-- <?php class Foo { - function foo() { + function __construct() { $s = 'preg_replace() is broken'; diff --git a/ext/pcre/tests/bug47662.phpt b/ext/pcre/tests/bug47662.phpt index d6056746cb..abf65e6ca9 100644 --- a/ext/pcre/tests/bug47662.phpt +++ b/ext/pcre/tests/bug47662.phpt @@ -4,9 +4,10 @@ Bug #47662 (support more than 127 named subpatterns) <?php $regex = '@'; -for($bar=0; $bar<1027; $bar++) { +for($bar=0; $bar<129; $bar++) { $regex .= '((?P<x'.$bar.'>))'; } + $regex .= 'fo+bar@'; var_dump(preg_match($regex, 'foobar')); diff --git a/ext/pcre/tests/bug69864.phpt b/ext/pcre/tests/bug69864.phpt index d59aaab36e..cf7ba5b72e 100644 --- a/ext/pcre/tests/bug69864.phpt +++ b/ext/pcre/tests/bug69864.phpt @@ -6,6 +6,8 @@ if (getenv("SKIP_SLOW_TESTS")) die("skip slow test"); ?>
--FILE--
<?php
+/* CAUTION: this test will most likely fail with valgrind until --smc-check=all is used. */
+
const PREG_CACHE_SIZE = 4096; // this has to be >= the resp. constant in php_pcre.c
var_dump(preg_replace_callback('/a/', function($m) {
diff --git a/ext/pcre/tests/bug71537.phpt b/ext/pcre/tests/bug71537.phpt new file mode 100644 index 0000000000..cdc2928a28 --- /dev/null +++ b/ext/pcre/tests/bug71537.phpt @@ -0,0 +1,9 @@ +--TEST-- +Fixed bug #71537 (PCRE segfault from Opcache) +--FILE-- +<?php + +var_dump(preg_replace(array("/Monkey/"), array(2016), "Happy Year of Monkey")); +?> +--EXPECT-- +string(18) "Happy Year of 2016" diff --git a/ext/pcre/tests/bug72463.phpt b/ext/pcre/tests/bug72463.phpt new file mode 100644 index 0000000000..b40a721998 --- /dev/null +++ b/ext/pcre/tests/bug72463.phpt @@ -0,0 +1,20 @@ +--TEST-- +Bug #72463 mail fails with invalid argument +--SKIPIF-- +<?php +if(substr(PHP_OS, 0, 3) == "WIN") { + die('skip not for windows'); +} +?> +--INI-- +sendmail_path="echo >/dev/null" +--FILE-- +<?php + +mail("some.address.it.wont.ever.reach@lookup.and.try.to.find.this.host.name","subject","a", ""); +mail("some.address.it.wont.ever.reach@lookup.and.try.to.find.this.host.name","subject","a", NULL); + +?> +===DONE=== +--EXPECTREGEX-- +.*===DONE=== diff --git a/ext/pcre/tests/bug72463_2.phpt b/ext/pcre/tests/bug72463_2.phpt new file mode 100644 index 0000000000..1baeb0f2a1 --- /dev/null +++ b/ext/pcre/tests/bug72463_2.phpt @@ -0,0 +1,20 @@ +--TEST-- +Bug #72463 mail fails with invalid argument +--SKIPIF-- +<?php +if(substr(PHP_OS, 0, 3) != "WIN") { + die('skip windows only'); +} +?> +--INI-- +SMTP=non.existent.smtp.server +--FILE-- +<?php + +mail("some.address.it.wont.ever.reach@lookup.and.try.to.find.this.host.name","subject","a", ""); +mail("some.address.it.wont.ever.reach@lookup.and.try.to.find.this.host.name","subject","a", NULL); + +?> +===DONE=== +--EXPECTREGEX-- +.*===DONE=== diff --git a/ext/pcre/tests/bug73392.phpt b/ext/pcre/tests/bug73392.phpt new file mode 100644 index 0000000000..2b432543b8 --- /dev/null +++ b/ext/pcre/tests/bug73392.phpt @@ -0,0 +1,28 @@ +--TEST-- +Bug #73392 (A use-after-free in zend allocator management) +--FILE-- +<?php +class Rep { + public function __invoke() { + return "d"; + } +} +class Foo { + public static function rep($rep) { + return "ok"; + } +} +function b() { + return "b"; +} +var_dump(preg_replace_callback_array( + array( + "/a/" => 'b', "/b/" => function () { return "c"; }, "/c/" => new Rep, "reporting" => array("Foo", "rep"), "a1" => array("Foo", "rep"), + ), 'a')); +?> + +--EXPECTF-- +Warning: preg_replace_callback_array(): Delimiter must not be alphanumeric or backslash in %sbug73392.php on line %d + +Warning: preg_replace_callback_array(): Delimiter must not be alphanumeric or backslash in %sbug73392.php on line %d +NULL diff --git a/ext/pcre/tests/bug73483.phpt b/ext/pcre/tests/bug73483.phpt new file mode 100644 index 0000000000..fd10702527 --- /dev/null +++ b/ext/pcre/tests/bug73483.phpt @@ -0,0 +1,15 @@ +--TEST-- +Bug #73483 (Segmentation fault on pcre_replace_callback) +--FILE-- +<?php +$regex = "#dummy#"; +setlocale(LC_ALL, "C"); +var_dump(preg_replace_callback($regex, function (array $matches) use($regex) { + setlocale(LC_ALL, "en_US"); + $ret = preg_replace($regex, "okey", $matches[0]); + setlocale(LC_ALL, "C"); + return $ret; +}, "dummy")); +?> +--EXPECT-- +string(4) "okey" diff --git a/ext/pcre/tests/bug73612.phpt b/ext/pcre/tests/bug73612.phpt new file mode 100644 index 0000000000..707e10bce6 --- /dev/null +++ b/ext/pcre/tests/bug73612.phpt @@ -0,0 +1,27 @@ +--TEST--
+Bug #73612 (preg_*() may leak memory)
+--FILE--
+<?php
+$obj = new stdClass;
+$obj->obj = $obj;
+preg_match('/./', 'x', $obj);
+
+$obj = new stdClass;
+$obj->obj = $obj;
+preg_replace('/./', '', 'x', -1, $obj);
+
+$obj = new stdClass;
+$obj->obj = $obj;
+preg_replace_callback('/./', 'count', 'x', -1, $obj);
+
+$obj = new stdClass;
+$obj->obj = $obj;
+preg_replace_callback_array(['/./' => 'count'], 'x', -1, $obj);
+
+$obj = new stdClass;
+$obj->obj = $obj;
+preg_filter('/./', '', 'x', -1, $obj);
+?>
+===DONE===
+--EXPECT--
+===DONE===
diff --git a/ext/pcre/tests/check_jit_enabled.phpt b/ext/pcre/tests/check_jit_enabled.phpt new file mode 100644 index 0000000000..de6e263e70 --- /dev/null +++ b/ext/pcre/tests/check_jit_enabled.phpt @@ -0,0 +1,19 @@ +--TEST-- +Check for JIT enablement status +--SKIPIF-- +<?php +if (ini_get("pcre.jit") === FALSE) { + die("skip no jit built"); +} +--FILE-- +<?php + +ob_start(); +phpinfo(); +$info = ob_get_contents(); +ob_end_clean(); + +var_dump(preg_match(",PCRE JIT Support .* enabled,", $info)); +?> +--EXPECT-- +int(1) diff --git a/ext/pcre/tests/preg_match_error3.phpt b/ext/pcre/tests/preg_match_error3.phpt new file mode 100644 index 0000000000..2e91e24466 --- /dev/null +++ b/ext/pcre/tests/preg_match_error3.phpt @@ -0,0 +1,15 @@ +--TEST-- +Test preg_match() function : error conditions - jit stacklimit exhausted +--SKIPIF-- +<?php +if (ini_get("pcre.jit") === FALSE) { + die("skip no jit built"); +} +--FILE-- +<?php +var_dump(preg_match('/^(foo)+$/', str_repeat('foo', 1024*8192))); +var_dump(preg_last_error() === PREG_JIT_STACKLIMIT_ERROR); +?> +--EXPECT-- +bool(false) +bool(true) diff --git a/ext/pcre/tests/preg_match_error4.phpt b/ext/pcre/tests/preg_match_error4.phpt new file mode 100644 index 0000000000..06aa82b469 --- /dev/null +++ b/ext/pcre/tests/preg_match_error4.phpt @@ -0,0 +1,935 @@ +--TEST-- +Pattern exhausting PCRE JIT stack +--FILE-- +<?php + +$re = '{^(\\s*\\{\\s*(?:"(?:[^\\0-\\x09\\x0a-\\x1f\\\\"]+|\\\\["bfnrt/\\\\]|\\\\u[a-fA-F0-9]{4})*"\\s*:\\s*(?:[0-9.]+|null|true|false|"(?:[^\\0-\\x09\\x0a-\\x1f\\\\"]+|\\\\["bfnrt/\\\\]|\\\\u[a-fA-F0-9]{4})*"|\\[(?:[^\\]]*|\\[(?:[^\\]]*|\\[(?:[^\\]]*|\\[(?:[^\\]]*|\\[[^\\]]*\\])*\\])*\\])*\\]|(?:[^{}]*|\\{(?:[^{}]*|\\{(?:[^{}]*|\\{(?:[^{}]*|\\{[^{}]*\\})*\\})*\\})*\\})*)*\\]|\\{(?:[^{}]*|\\{(?:[^{}]*|\\{(?:[^{}]*|\\{(?:[^{}]*|\\{[^{}]*\\})*\\})*\\})*\\})*\\})\\s*,\\s*)*?)("require"\\s*:\\s*)((?:[0-9.]+|null|true|false|"(?:[^\\0-\\x09\\x0a-\\x1f\\\\"]+|\\\\["bfnrt/\\\\]|\\\\u[a-fA-F0-9]{4})*"|\\[(?:[^\\]]*|\\[(?:[^\\]]*|\\[(?:[^\\]]*|\\[(?:[^\\]]*|\\[[^\\]]*\\])*\\])*\\])*\\]|(?:[^{}]*|\\{(?:[^{}]*|\\{(?:[^{}]*|\\{(?:[^{}]*|\\{[^{}]*\\})*\\})*\\})*\\})*)*\\]|\\{(?:[^{}]*|\\{(?:[^{}]*|\\{(?:[^{}]*|\\{(?:[^{}]*|\\{[^{}]*\\})*\\})*\\})*\\})*\\}))(.*)}s'; + +$str = '{ + "config": { + "cache-files-ttl": 0, + "discard-changes": true + }, + "minimum-stability": "stable", + "prefer-stable": false, + "provide": { + "heroku-sys\\/cedar": "14.2016.03.12" + }, + "repositories": [ + { + "packagist": false + }, + { + "type": "path", + "url": "\\/tmp\\/buildpacktUY7k\\/support\\/installer\\/", + "options": { + "symlink": false + } + }, + { + "type": "composer", + "url": "https:\\/\\/lang-php.s3.amazonaws.com\\/dist-cedar-14-stable\\/" + }, + { + "type": "package", + "package": [ + { + "type": "metapackage", + "name": "algolia\\/algoliasearch-client-php", + "version": "1.8.1", + "require": { + "heroku-sys\\/ext-mbstring": "*", + "heroku-sys\\/php": ">=5.4" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "algolia\\/algoliasearch-laravel", + "version": "1.0.10", + "require": { + "heroku-sys\\/php": ">=5.5.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "asm89\\/stack-cors", + "version": "0.2.1", + "require": { + "heroku-sys\\/php": ">=5.3.2" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "aws\\/aws-sdk-php", + "version": "3.15.7", + "require": { + "heroku-sys\\/php": ">=5.5" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "aws\\/aws-sdk-php-laravel", + "version": "3.1.0", + "require": { + "heroku-sys\\/php": ">=5.5.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "barryvdh\\/laravel-cors", + "version": "v0.7.3", + "require": { + "heroku-sys\\/php": ">=5.4.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "bookingsync\\/oauth2-bookingsync-php", + "version": "0.1.3", + "require": { + "heroku-sys\\/php": ">=5.4.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "classpreloader\\/classpreloader", + "version": "3.0.0", + "require": { + "heroku-sys\\/php": ">=5.5.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "danielstjules\\/stringy", + "version": "1.10.0", + "require": { + "heroku-sys\\/ext-mbstring": "*", + "heroku-sys\\/php": ">=5.3.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "dnoegel\\/php-xdg-base-dir", + "version": "0.1", + "require": { + "heroku-sys\\/php": ">=5.3.2" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "doctrine\\/annotations", + "version": "v1.2.7", + "require": { + "heroku-sys\\/php": ">=5.3.2" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "doctrine\\/cache", + "version": "v1.6.0", + "require": { + "heroku-sys\\/php": "~5.5|~7.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "doctrine\\/collections", + "version": "v1.3.0", + "require": { + "heroku-sys\\/php": ">=5.3.2" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "doctrine\\/common", + "version": "v2.6.1", + "require": { + "heroku-sys\\/php": "~5.5|~7.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "doctrine\\/dbal", + "version": "v2.5.4", + "require": { + "heroku-sys\\/php": ">=5.3.2" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "doctrine\\/inflector", + "version": "v1.1.0", + "require": { + "heroku-sys\\/php": ">=5.3.2" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "doctrine\\/lexer", + "version": "v1.0.1", + "require": { + "heroku-sys\\/php": ">=5.3.2" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "fabpot\\/goutte", + "version": "v3.1.2", + "require": { + "heroku-sys\\/php": ">=5.5.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "graham-campbell\\/manager", + "version": "v2.3.1", + "require": { + "heroku-sys\\/php": ">=5.5.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "guzzle\\/guzzle", + "version": "v3.9.3", + "require": { + "heroku-sys\\/ext-curl": "*", + "heroku-sys\\/php": ">=5.3.3" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "guzzlehttp\\/guzzle", + "version": "6.1.1", + "require": { + "heroku-sys\\/php": ">=5.5.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "guzzlehttp\\/promises", + "version": "1.1.0", + "require": { + "heroku-sys\\/php": ">=5.5.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "guzzlehttp\\/psr7", + "version": "1.2.3", + "require": { + "heroku-sys\\/php": ">=5.4.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "intercom\\/intercom-php", + "version": "v1.4.0", + "require": { + "heroku-sys\\/php": ">=5.4.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "intervention\\/image", + "version": "2.3.6", + "require": { + "heroku-sys\\/ext-fileinfo": "*", + "heroku-sys\\/php": ">=5.4.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "jakub-onderka\\/php-console-color", + "version": "0.1", + "require": { + "heroku-sys\\/php": ">=5.3.2" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "jakub-onderka\\/php-console-highlighter", + "version": "v0.3.2", + "require": { + "heroku-sys\\/php": ">=5.3.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "jeremeamia\\/SuperClosure", + "version": "2.2.0", + "require": { + "heroku-sys\\/php": ">=5.4" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "jlapp\\/swaggervel", + "version": "dev-master", + "require": { + "heroku-sys\\/php": ">=5.3.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "laravel\\/framework", + "version": "v5.1.31", + "require": { + "heroku-sys\\/ext-mbstring": "*", + "heroku-sys\\/ext-openssl": "*", + "heroku-sys\\/php": ">=5.5.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "laravelcollective\\/html", + "version": "v5.1.9", + "require": { + "heroku-sys\\/php": ">=5.5.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "league\\/flysystem", + "version": "1.0.18", + "require": { + "heroku-sys\\/php": ">=5.4.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "league\\/flysystem-aws-s3-v3", + "version": "1.0.9", + "require": { + "heroku-sys\\/php": ">=5.5.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "league\\/fractal", + "version": "0.13.0", + "require": { + "heroku-sys\\/php": ">=5.4" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "league\\/glide", + "version": "1.0.0", + "require": { + "heroku-sys\\/php": "^5.4 | ^7.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "league\\/oauth2-client", + "version": "0.12.1", + "require": { + "heroku-sys\\/php": ">=5.4.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "mindscape\\/raygun4php", + "version": "dev-master", + "require": { + "heroku-sys\\/php": ">=5.3.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "monolog\\/monolog", + "version": "1.18.0", + "require": { + "heroku-sys\\/php": ">=5.3.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "mtdowling\\/cron-expression", + "version": "v1.1.0", + "require": { + "heroku-sys\\/php": ">=5.3.2" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "mtdowling\\/jmespath.php", + "version": "2.3.0", + "require": { + "heroku-sys\\/php": ">=5.4.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "namshi\\/jose", + "version": "5.0.2", + "require": { + "heroku-sys\\/php": ">=5.3.3" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "nesbot\\/carbon", + "version": "1.21.0", + "require": { + "heroku-sys\\/php": ">=5.3.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "nikic\\/php-parser", + "version": "v2.0.1", + "require": { + "heroku-sys\\/ext-tokenizer": "*", + "heroku-sys\\/php": ">=5.4" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "orangehill\\/iseed", + "version": "dev-master", + "require": { + "heroku-sys\\/php": ">=5.4.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "paragonie\\/random_compat", + "version": "v1.2.1", + "require": { + "heroku-sys\\/php": ">=5.2.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "phpseclib\\/phpseclib", + "version": "0.3.10", + "require": { + "heroku-sys\\/php": ">=5.0.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "predis\\/predis", + "version": "v1.0.3", + "require": { + "heroku-sys\\/php": ">=5.3.2" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "psr\\/http-message", + "version": "1.0", + "require": { + "heroku-sys\\/php": ">=5.3.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "psy\\/psysh", + "version": "v0.7.1", + "require": { + "heroku-sys\\/php": ">=5.3.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "swiftmailer\\/swiftmailer", + "version": "v5.4.1", + "require": { + "heroku-sys\\/php": ">=5.3.3" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/browser-kit", + "version": "v2.8.3", + "require": { + "heroku-sys\\/php": ">=5.3.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/console", + "version": "v2.7.10", + "require": { + "heroku-sys\\/php": ">=5.3.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/css-selector", + "version": "v2.7.10", + "require": { + "heroku-sys\\/php": ">=5.3.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/debug", + "version": "v2.7.10", + "require": { + "heroku-sys\\/php": ">=5.3.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/dom-crawler", + "version": "v2.7.10", + "require": { + "heroku-sys\\/php": ">=5.3.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/event-dispatcher", + "version": "v2.8.3", + "require": { + "heroku-sys\\/php": ">=5.3.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/finder", + "version": "v2.7.10", + "require": { + "heroku-sys\\/php": ">=5.3.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/http-foundation", + "version": "v2.7.10", + "require": { + "heroku-sys\\/php": ">=5.3.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/http-kernel", + "version": "v2.7.10", + "require": { + "heroku-sys\\/php": ">=5.3.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/polyfill-php56", + "version": "v1.1.0", + "require": { + "heroku-sys\\/php": ">=5.3.3" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/polyfill-util", + "version": "v1.1.0", + "require": { + "heroku-sys\\/php": ">=5.3.3" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/process", + "version": "v2.7.10", + "require": { + "heroku-sys\\/php": ">=5.3.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/routing", + "version": "v2.7.10", + "require": { + "heroku-sys\\/php": ">=5.3.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/translation", + "version": "v2.7.10", + "require": { + "heroku-sys\\/php": ">=5.3.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "symfony\\/var-dumper", + "version": "v2.7.10", + "require": { + "heroku-sys\\/php": ">=5.3.9" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "twilio\\/sdk", + "version": "4.10.0", + "require": { + "heroku-sys\\/php": ">=5.2.1" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "tymon\\/jwt-auth", + "version": "0.5.9", + "require": { + "heroku-sys\\/php": ">=5.4.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "vinkla\\/algolia", + "version": "2.2.1", + "require": { + "heroku-sys\\/php": "^5.5.9 || ^7.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "vlucas\\/phpdotenv", + "version": "v1.1.1", + "require": { + "heroku-sys\\/php": ">=5.3.2" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "zircote\\/swagger-php", + "version": "2.0.6", + "require": { + "heroku-sys\\/php": ">=5.4.0" + }, + "replace": [], + "provide": [], + "conflict": [] + }, + { + "type": "metapackage", + "name": "composer.json\\/composer.lock", + "version": "dev-a923f6cdbbc9439cabb74aa9003f6d51", + "require": { + "heroku-sys\\/php": ">=5.5.9", + "heroku-sys\\/ext-gd": "*", + "heroku-sys\\/ext-exif": "*", + "heroku-sys\\/ext-fileinfo": "*" + }, + "replace": [], + "provide": [], + "conflict": [] + } + ] + } + ], + "require": { + "composer.json\\/composer.lock": "dev-a923f6cdbbc9439cabb74aa9003f6d51", + "algolia\\/algoliasearch-client-php": "1.8.1", + "algolia\\/algoliasearch-laravel": "1.0.10", + "asm89\\/stack-cors": "0.2.1", + "aws\\/aws-sdk-php": "3.15.7", + "aws\\/aws-sdk-php-laravel": "3.1.0", + "barryvdh\\/laravel-cors": "v0.7.3", + "bookingsync\\/oauth2-bookingsync-php": "0.1.3", + "classpreloader\\/classpreloader": "3.0.0", + "danielstjules\\/stringy": "1.10.0", + "dnoegel\\/php-xdg-base-dir": "0.1", + "doctrine\\/annotations": "v1.2.7", + "doctrine\\/cache": "v1.6.0", + "doctrine\\/collections": "v1.3.0", + "doctrine\\/common": "v2.6.1", + "doctrine\\/dbal": "v2.5.4", + "doctrine\\/inflector": "v1.1.0", + "doctrine\\/lexer": "v1.0.1", + "fabpot\\/goutte": "v3.1.2", + "graham-campbell\\/manager": "v2.3.1", + "guzzle\\/guzzle": "v3.9.3", + "guzzlehttp\\/guzzle": "6.1.1", + "guzzlehttp\\/promises": "1.1.0", + "guzzlehttp\\/psr7": "1.2.3", + "intercom\\/intercom-php": "v1.4.0", + "intervention\\/image": "2.3.6", + "jakub-onderka\\/php-console-color": "0.1", + "jakub-onderka\\/php-console-highlighter": "v0.3.2", + "jeremeamia\\/SuperClosure": "2.2.0", + "jlapp\\/swaggervel": "dev-master", + "laravel\\/framework": "v5.1.31", + "laravelcollective\\/html": "v5.1.9", + "league\\/flysystem": "1.0.18", + "league\\/flysystem-aws-s3-v3": "1.0.9", + "league\\/fractal": "0.13.0", + "league\\/glide": "1.0.0", + "league\\/oauth2-client": "0.12.1", + "mindscape\\/raygun4php": "dev-master", + "monolog\\/monolog": "1.18.0", + "mtdowling\\/cron-expression": "v1.1.0", + "mtdowling\\/jmespath.php": "2.3.0", + "namshi\\/jose": "5.0.2", + "nesbot\\/carbon": "1.21.0", + "nikic\\/php-parser": "v2.0.1", + "orangehill\\/iseed": "dev-master", + "paragonie\\/random_compat": "v1.2.1", + "phpseclib\\/phpseclib": "0.3.10", + "predis\\/predis": "v1.0.3", + "psr\\/http-message": "1.0", + "psy\\/psysh": "v0.7.1", + "swiftmailer\\/swiftmailer": "v5.4.1", + "symfony\\/browser-kit": "v2.8.3", + "symfony\\/console": "v2.7.10", + "symfony\\/css-selector": "v2.7.10", + "symfony\\/debug": "v2.7.10", + "symfony\\/dom-crawler": "v2.7.10", + "symfony\\/event-dispatcher": "v2.8.3", + "symfony\\/finder": "v2.7.10", + "symfony\\/http-foundation": "v2.7.10", + "symfony\\/http-kernel": "v2.7.10", + "symfony\\/polyfill-php56": "v1.1.0", + "symfony\\/polyfill-util": "v1.1.0", + "symfony\\/process": "v2.7.10", + "symfony\\/routing": "v2.7.10", + "symfony\\/translation": "v2.7.10", + "symfony\\/var-dumper": "v2.7.10", + "twilio\\/sdk": "4.10.0", + "tymon\\/jwt-auth": "0.5.9", + "vinkla\\/algolia": "2.2.1", + "vlucas\\/phpdotenv": "v1.1.1", + "zircote\\/swagger-php": "2.0.6", + "heroku-sys\\/apache": "^2.4.10", + "heroku-sys\\/nginx": "~1.8.0" + } +}'; + +$count = preg_match($re, $str, $matches); + +if($count === false) { + switch (preg_last_error()) { + case PREG_NO_ERROR: + throw new \RuntimeException('Failed to execute regex: PREG_NO_ERROR', PREG_NO_ERROR); + case PREG_INTERNAL_ERROR: + throw new \RuntimeException('Failed to execute regex: PREG_INTERNAL_ERROR', PREG_INTERNAL_ERROR); + case PREG_BACKTRACK_LIMIT_ERROR: + throw new \RuntimeException('Failed to execute regex: PREG_BACKTRACK_LIMIT_ERROR', PREG_BACKTRACK_LIMIT_ERROR); + case PREG_RECURSION_LIMIT_ERROR: + throw new \RuntimeException('Failed to execute regex: PREG_RECURSION_LIMIT_ERROR', PREG_RECURSION_LIMIT_ERROR); + case PREG_BAD_UTF8_ERROR: + throw new \RuntimeException('Failed to execute regex: PREG_BAD_UTF8_ERROR', PREG_BAD_UTF8_ERROR); + case PREG_BAD_UTF8_OFFSET_ERROR: + throw new \RuntimeException('Failed to execute regex: PREG_BAD_UTF8_OFFSET_ERROR', PREG_BAD_UTF8_OFFSET_ERROR); + case PREG_JIT_STACKLIMIT_ERROR: + throw new \RuntimeException('Failed to execute regex: PREG_JIT_STACKLIMIT_ERROR', PREG_JIT_STACKLIMIT_ERROR); + default: + throw new \RuntimeException('Failed to execute regex: Unknown error'); + } +} else { + var_dump("Done, $count matches."); +} +?> +==DONE== +--EXPECT-- +string(16) "Done, 1 matches." +==DONE== diff --git a/ext/pcre/tests/preg_replace.phpt b/ext/pcre/tests/preg_replace.phpt index f7b5f74157..092857c57e 100644 --- a/ext/pcre/tests/preg_replace.phpt +++ b/ext/pcre/tests/preg_replace.phpt @@ -8,18 +8,8 @@ var_dump(preg_replace('{{\D+}}', 'ddd', 'abcd')); var_dump(preg_replace('/(ab)(c)(d)(e)(f)(g)(h)(i)(j)(k)/', 'a${1}2$103', 'zabcdefghijkl')); -var_dump(preg_replace_callback('//e', '', '')); - -var_dump(preg_replace_callback('//e', 'strtolower', '')); - ?> --EXPECTF-- string(1) "x" string(4) "abcd" string(8) "zaab2k3l" - -Warning: preg_replace_callback(): Requires argument 2, '', to be a valid callback in %spreg_replace.php on line 8 -string(0) "" - -Warning: preg_replace_callback(): Modifier /e cannot be used with replacement callback in %spreg_replace.php on line 10 -NULL diff --git a/ext/pcre/tests/preg_replace_callback3.phpt b/ext/pcre/tests/preg_replace_callback3.phpt index 30799e21f9..6484c074fe 100644 --- a/ext/pcre/tests/preg_replace_callback3.phpt +++ b/ext/pcre/tests/preg_replace_callback3.phpt @@ -36,9 +36,9 @@ int(3) Warning: preg_replace_callback(): Requires argument 2, '2', to be a valid callback in %s on line %d int(3) -Warning: preg_replace_callback() expects parameter 4 to be long, string given in %s on line %d +Warning: preg_replace_callback() expects parameter 4 to be integer, string given in %s on line %d NULL -Warning: preg_replace_callback() expects parameter 4 to be long, array given in %s on line %d +Warning: preg_replace_callback() expects parameter 4 to be integer, array given in %s on line %d NULL Done diff --git a/ext/pcre/tests/preg_replace_callback_array.phpt b/ext/pcre/tests/preg_replace_callback_array.phpt new file mode 100644 index 0000000000..9e9e819134 --- /dev/null +++ b/ext/pcre/tests/preg_replace_callback_array.phpt @@ -0,0 +1,49 @@ +--TEST-- +preg_replace_callback_array() basic functions +--FILE-- +<?php + +class Rep { + public function __invoke() { + return "d"; + } +} + +class Foo { + public static function rep($rep) { + return "ok"; + } +} + +function b() { + return "b"; +} + +var_dump(preg_replace_callback_array( + array( + "/a/" => 'b', + "/b/" => function () { return "c"; }, + "/c/" => new Rep, + '/d/' => array("Foo", "rep")), 'a')); + +var_dump(preg_replace_callback_array( + array( + "/a/" => 'b', + "/c/" => new Rep, + "/b/" => function () { return "ok"; }, + '/d/' => array("Foo", "rep")), 'a')); + +var_dump(preg_replace_callback_array( + array( + '/d/' => array("Foo", "rep"), + "/c/" => new Rep, + "/a/" => 'b', + "/b/" => create_function('$a', 'return "ok";')), 'a', -1, $count)); + +var_dump($count); +?> +--EXPECTF-- +string(2) "ok" +string(2) "ok" +string(2) "ok" +int(2) diff --git a/ext/pcre/tests/preg_replace_callback_array2.phpt b/ext/pcre/tests/preg_replace_callback_array2.phpt new file mode 100644 index 0000000000..8e70bb06e8 --- /dev/null +++ b/ext/pcre/tests/preg_replace_callback_array2.phpt @@ -0,0 +1,67 @@ +--TEST-- +preg_replace_callback_array() errors +--FILE-- +<?php + +var_dump(preg_replace_callback_array()); +var_dump(preg_replace_callback_array(1)); +var_dump(preg_replace_callback_array(1,2)); +var_dump(preg_replace_callback_array(1,2,3)); +$a = 5; +var_dump(preg_replace_callback_array(1,2,3,$a)); +$a = ""; +var_dump(preg_replace_callback_array(array("" => ""),"","",$a)); +$a = array(); +$b = ""; +var_dump(preg_replace_callback($a, $a, $a, $a, $b)); +var_dump($b); +$b = ""; +var_dump(preg_replace_callback_array(array("xx" => "s"), $a, -1, $b)); +var_dump($b); +function f() { + static $count = 1; + throw new Exception($count); +} + +var_dump(preg_replace_callback_array(array('/\w' => 'f'), 'z')); + +try { + var_dump(preg_replace_callback_array(array('/\w/' => 'f', '/.*/' => 'f'), 'z')); +} catch (Exception $e) { + var_dump($e->getMessage()); +} + +echo "Done\n"; +?> +--EXPECTF-- +Warning: preg_replace_callback_array() expects at least 2 parameters, 0 given in %s on line %d +NULL + +Warning: preg_replace_callback_array() expects at least 2 parameters, 1 given in %s on line %d +NULL + +Warning: preg_replace_callback_array() expects parameter 1 to be array, integer given in %s on line %d +NULL + +Warning: preg_replace_callback_array() expects parameter 1 to be array, integer given in %s on line %d +NULL + +Warning: preg_replace_callback_array() expects parameter 1 to be array, integer given in %s on line %d +NULL + +Warning: preg_replace_callback_array() expects parameter 3 to be integer, string given in %s on line %d +NULL + +Warning: preg_replace_callback() expects parameter 4 to be integer, array given in %s on line %d +NULL +string(0) "" + +Warning: preg_replace_callback_array(): 's' is not a valid callback in %spreg_replace_callback_array2.php on line %d +array(0) { +} +string(0) "" + +Warning: preg_replace_callback_array(): No ending delimiter '/' found in %spreg_replace_callback_array2.php on line %d +NULL +string(1) "1" +Done diff --git a/ext/pcre/tests/recursion_limit.phpt b/ext/pcre/tests/recursion_limit.phpt index 7dee7ba4e7..294931388d 100644 --- a/ext/pcre/tests/recursion_limit.phpt +++ b/ext/pcre/tests/recursion_limit.phpt @@ -7,6 +7,7 @@ if (@preg_match_all('/\p{N}/', '0123456789', $dummy) === false) { } ?> --INI-- +pcre.jit=0 pcre.recursion_limit=2 --FILE-- <?php diff --git a/ext/pcre/tests/split2.phpt b/ext/pcre/tests/split2.phpt index 391acb951e..ccbb7242fd 100644 --- a/ext/pcre/tests/split2.phpt +++ b/ext/pcre/tests/split2.phpt @@ -1,5 +1,7 @@ --TEST-- preg_split() 2nd test +--INI-- +pcre.jit=0 --FILE-- <?php |
