summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcretest.c
diff options
context:
space:
mode:
authorAndrei Zmievski <andrei@php.net>2005-08-08 23:59:04 +0000
committerAndrei Zmievski <andrei@php.net>2005-08-08 23:59:04 +0000
commite623e5352d8c42619a03d76fd999031ad927576d (patch)
tree7b0576ce8f2b7c3d0b84e457027201ffce56735c /ext/pcre/pcrelib/pcretest.c
parent1a7524d7c6115953034048fb3bfa3e204b9c3968 (diff)
downloadphp-git-e623e5352d8c42619a03d76fd999031ad927576d.tar.gz
Upgrade to PCRE version 6.2.
# Don't worry, upgrade for other branches is coming
Diffstat (limited to 'ext/pcre/pcrelib/pcretest.c')
-rw-r--r--ext/pcre/pcrelib/pcretest.c255
1 files changed, 155 insertions, 100 deletions
diff --git a/ext/pcre/pcrelib/pcretest.c b/ext/pcre/pcrelib/pcretest.c
index e531cc134e..9b63470863 100644
--- a/ext/pcre/pcrelib/pcretest.c
+++ b/ext/pcre/pcrelib/pcretest.c
@@ -4,7 +4,7 @@
/* This program was hacked up as a tester for PCRE. I really should have
written it more tidily in the first place. Will I ever learn? It has grown and
-been extended and consequently is now rather untidy in places.
+been extended and consequently is now rather, er, *very* untidy in places.
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -44,11 +44,15 @@ POSSIBILITY OF SUCH DAMAGE.
#include <locale.h>
#include <errno.h>
-/* We need the internal info for displaying the results of pcre_study(). Also
-for getting the opcodes for showing compiled code. */
-
#define PCRE_SPY /* For Win32 build, import data, not export */
-#include "internal.h"
+
+/* We need the internal info for displaying the results of pcre_study() and
+other internal data; pcretest also uses some of the fixed tables, and generally
+has "inside information" compared to a program that strictly follows the PCRE
+API. */
+
+#include "pcre_internal.h"
+
/* It is possible to compile this test program without including support for
testing the POSIX interface, though this is not available via the standard
@@ -58,6 +62,12 @@ Makefile. */
#include "pcreposix.h"
#endif
+/* It is also possible, for the benefit of the version imported into Exim, to
+build pcretest without support for UTF8 (define NOUTF8), without the interface
+to the DFA matcher (NODFA), and without the doublecheck of the old "info"
+function (define NOINFOCHECK). */
+
+
#ifndef CLOCKS_PER_SEC
#ifdef CLK_TCK
#define CLOCKS_PER_SEC CLK_TCK
@@ -87,34 +97,6 @@ static size_t gotten_store;
static uschar *pbuffer = NULL;
-static const int utf8_table1[] = {
- 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
-
-static const int utf8_table2[] = {
- 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
-
-static const int utf8_table3[] = {
- 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
-
-
-
-/*************************************************
-* Print compiled regex *
-*************************************************/
-
-/* The code for doing this is held in a separate file that is also included in
-pcre.c when it is compiled with the debug switch. It defines a function called
-print_internals(), which uses a table of opcode lengths defined by the macro
-OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates
-Unicode property names to numbers; this is kept in a separate file. */
-
-static uschar OP_lengths[] = { OP_LENGTHS };
-
-#include "ucp.h"
-#include "ucptypetable.c"
-#include "printint.c"
-
-
/*************************************************
* Read number from string *
@@ -143,42 +125,6 @@ return(result);
-/*************************************************
-* Convert character value to UTF-8 *
-*************************************************/
-
-/* This function takes an integer value in the range 0 - 0x7fffffff
-and encodes it as a UTF-8 character in 0 to 6 bytes.
-
-Arguments:
- cvalue the character value
- buffer pointer to buffer for result - at least 6 bytes long
-
-Returns: number of characters placed in the buffer
- -1 if input character is negative
- 0 if input character is positive but too big (only when
- int is longer than 32 bits)
-*/
-
-static int
-ord2utf8(int cvalue, unsigned char *buffer)
-{
-register int i, j;
-for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
- if (cvalue <= utf8_table1[i]) break;
-if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
-if (cvalue < 0) return -1;
-
-buffer += i;
-for (j = i; j > 0; j--)
- {
- *buffer-- = 0x80 | (cvalue & 0x3f);
- cvalue >>= 6;
- }
-*buffer = utf8_table2[i] | cvalue;
-return i + 1;
-}
-
/*************************************************
* Convert UTF-8 string to value *
@@ -195,6 +141,8 @@ Returns: > 0 => the number of bytes consumed
-6 to 0 => malformed UTF-8 character at offset = (-return)
*/
+#if !defined NOUTF8
+
static int
utf82ord(unsigned char *buffer, int *vptr)
{
@@ -214,7 +162,7 @@ if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
/* i now has a value in the range 1-5 */
s = 6*i;
-d = (c & utf8_table3[i]) << s;
+d = (c & _pcre_utf8_table3[i]) << s;
for (j = 0; j < i; j++)
{
@@ -226,8 +174,8 @@ for (j = 0; j < i; j++)
/* Check that encoding was the correct unique one */
-for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
- if (d <= utf8_table1[j]) break;
+for (j = 0; j < _pcre_utf8_table1_size; j++)
+ if (d <= _pcre_utf8_table1[j]) break;
if (j != i) return -(i+1);
/* Valid value */
@@ -236,6 +184,8 @@ if (j != i) return -(i+1);
return i+1;
}
+#endif
+
/*************************************************
@@ -253,6 +203,7 @@ int yield = 0;
while (length-- > 0)
{
+#if !defined NOUTF8
if (use_utf8)
{
int rc = utf82ord(p, &c);
@@ -275,6 +226,7 @@ while (length-- > 0)
continue;
}
}
+#endif
/* Not UTF-8, or malformed UTF-8 */
@@ -403,7 +355,7 @@ static void *new_malloc(size_t size)
void *block = malloc(size);
gotten_store = size;
if (show_malloc)
- fprintf(outfile, "malloc %3d %p\n", size, block);
+ fprintf(outfile, "malloc %3d %p\n", (int)size, block);
return block;
}
@@ -421,7 +373,7 @@ static void *stack_malloc(size_t size)
{
void *block = malloc(size);
if (show_malloc)
- fprintf(outfile, "stack_malloc %3d %p\n", size, block);
+ fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
return block;
}
@@ -484,12 +436,14 @@ int showinfo = 0;
int showstore = 0;
int size_offsets = 45;
int size_offsets_max;
-int *offsets;
+int *offsets = NULL;
#if !defined NOPOSIX
int posix = 0;
#endif
int debug = 0;
int done = 0;
+int all_use_dfa = 0;
+int yield = 0;
unsigned char *buffer;
unsigned char *dbuffer;
@@ -522,6 +476,9 @@ while (argc > 1 && argv[op][0] == '-')
else if (strcmp(argv[op], "-t") == 0) timeit = 1;
else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
+#if !defined NODFA
+ else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
+#endif
else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
*endptr == 0))
@@ -558,8 +515,11 @@ while (argc > 1 && argv[op][0] == '-')
printf("** Unknown or malformed option %s\n", argv[op]);
printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
printf(" -C show PCRE compile-time options and exit\n");
- printf(" -d debug: show compiled code; implies -i\n"
- " -i show information about compiled pattern\n"
+ printf(" -d debug: show compiled code; implies -i\n");
+#if !defined NODFA
+ printf(" -dfa force DFA matching for all subjects\n");
+#endif
+ printf(" -i show information about compiled pattern\n"
" -m output memory used information\n"
" -o <n> set size of offsets vector to <n>\n");
#if !defined NOPOSIX
@@ -567,7 +527,8 @@ while (argc > 1 && argv[op][0] == '-')
#endif
printf(" -s output store (memory) used information\n"
" -t time compilation and execution\n");
- return 1;
+ yield = 1;
+ goto EXIT;
}
op++;
argc--;
@@ -581,7 +542,8 @@ if (offsets == NULL)
{
printf("** Failed to get %d bytes of memory for offsets vector\n",
size_offsets_max * sizeof(int));
- return 1;
+ yield = 1;
+ goto EXIT;
}
/* Sort out the input and output files */
@@ -592,7 +554,8 @@ if (argc > 1)
if (infile == NULL)
{
printf("** Failed to open %s\n", argv[op]);
- return 1;
+ yield = 1;
+ goto EXIT;
}
}
@@ -602,7 +565,8 @@ if (argc > 2)
if (outfile == NULL)
{
printf("** Failed to open %s\n", argv[op+1]);
- return 1;
+ yield = 1;
+ goto EXIT;
}
}
@@ -802,6 +766,7 @@ while (!done)
{
switch (*pp++)
{
+ case 'f': options |= PCRE_FIRSTLINE; break;
case 'g': do_g = 1; break;
case 'i': options |= PCRE_CASELESS; break;
case 'm': options |= PCRE_MULTILINE; break;
@@ -831,7 +796,8 @@ while (!done)
case 'L':
ppp = pp;
- while (*ppp != '\n' && *ppp != ' ') ppp++;
+ /* The '\r' test here is so that it works on Windows */
+ while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
*ppp = 0;
if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
{
@@ -849,7 +815,10 @@ while (!done)
*pp = 0;
break;
- case '\n': case ' ': break;
+ case '\r': /* So that it works in Windows */
+ case '\n':
+ case ' ':
+ break;
default:
fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
@@ -869,6 +838,7 @@ while (!done)
if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
+ if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
rc = regcomp(&preg, (char *)p, cflags);
/* Compilation failed; go back for another re, skipping to blank line
@@ -1008,7 +978,9 @@ while (!done)
if (do_showinfo)
{
unsigned long int get_options, all_options;
+#if !defined NOINFOCHECK
int old_first_char, old_options, old_count;
+#endif
int count, backrefmax, first_char, need_char;
int nameentrysize, namecount;
const uschar *nametable;
@@ -1016,7 +988,7 @@ while (!done)
if (do_debug)
{
fprintf(outfile, "------------------------------------------------------------------\n");
- print_internals(re, outfile);
+ _pcre_printint(re, outfile);
}
new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
@@ -1029,6 +1001,7 @@ while (!done)
new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
+#if !defined NOINFOCHECK
old_count = pcre_info(re, &old_options, &old_first_char);
if (count < 0) fprintf(outfile,
"Error %d from pcre_info()\n", count);
@@ -1046,10 +1019,11 @@ while (!done)
"Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
get_options, old_options);
}
+#endif
if (size != regex_gotten_store) fprintf(outfile,
"Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
- size, regex_gotten_store);
+ (int)size, (int)regex_gotten_store);
fprintf(outfile, "Capturing subpattern count = %d\n", count);
if (backrefmax > 0)
@@ -1080,11 +1054,12 @@ while (!done)
fprintf(outfile, "Partial matching not supported\n");
if (get_options == 0) fprintf(outfile, "No options\n");
- else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
+ else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
+ ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
((get_options & PCRE_EXTRA) != 0)? " extra" : "",
@@ -1222,6 +1197,10 @@ while (!done)
}
fclose(f);
}
+
+ new_free(re);
+ if (extra != NULL) new_free(extra);
+ if (tables != NULL) new_free((void *)tables);
continue; /* With next regex */
}
} /* End of non-POSIX compile */
@@ -1244,6 +1223,7 @@ while (!done)
int gmatched = 0;
int start_offset = 0;
int g_notempty = 0;
+ int use_dfa = 0;
options = 0;
@@ -1299,6 +1279,7 @@ while (!done)
/* Handle \x{..} specially - new Perl thing for utf8 */
+#if !defined NOUTF8
if (*p == '{')
{
unsigned char *pt = p;
@@ -1309,7 +1290,7 @@ while (!done)
{
unsigned char buff8[8];
int ii, utn;
- utn = ord2utf8(c, buff8);
+ utn = _pcre_ord2utf8(c, buff8);
for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
c = buff8[ii]; /* Last byte */
p = pt + 1;
@@ -1317,6 +1298,7 @@ while (!done)
}
/* Not correct form; fall through */
}
+#endif
/* Ordinary \x */
@@ -1397,6 +1379,21 @@ while (!done)
}
continue;
+#if !defined NODFA
+ case 'D':
+#if !defined NOPOSIX
+ if (posix || do_posix)
+ printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
+ else
+#endif
+ use_dfa = 1;
+ continue;
+
+ case 'F':
+ options |= PCRE_DFA_SHORTEST;
+ continue;
+#endif
+
case 'G':
if (isdigit(*p))
{
@@ -1439,7 +1436,8 @@ while (!done)
{
printf("** Failed to get %d bytes of memory for offsets vector\n",
size_offsets_max * sizeof(int));
- return 1;
+ yield = 1;
+ goto EXIT;
}
}
use_size_offsets = n;
@@ -1450,6 +1448,12 @@ while (!done)
options |= PCRE_PARTIAL;
continue;
+#if !defined NODFA
+ case 'R':
+ options |= PCRE_DFA_RESTART;
+ continue;
+#endif
+
case 'S':
show_malloc = 1;
continue;
@@ -1467,6 +1471,12 @@ while (!done)
*q = 0;
len = q - dbuffer;
+ if ((all_use_dfa || use_dfa) && find_match_limit)
+ {
+ printf("**Match limit not relevant for DFA matching: ignored\n");
+ find_match_limit = 0;
+ }
+
/* Handle matching via the POSIX interface, which does not
support timing or playing with the match limit or callout data. */
@@ -1524,9 +1534,23 @@ while (!done)
register int i;
clock_t time_taken;
clock_t start_time = clock();
+
+#if !defined NODFA
+ if (all_use_dfa || use_dfa)
+ {
+ int workspace[1000];
+ for (i = 0; i < LOOPREPEAT; i++)
+ count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
+ options | g_notempty, use_offsets, use_size_offsets, workspace,
+ sizeof(workspace)/sizeof(int));
+ }
+ else
+#endif
+
for (i = 0; i < LOOPREPEAT; i++)
count = pcre_exec(re, extra, (char *)bptr, len,
start_offset, options | g_notempty, use_offsets, use_size_offsets);
+
time_taken = clock() - start_time;
fprintf(outfile, "Execute time %.3f milliseconds\n",
(((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
@@ -1597,16 +1621,30 @@ while (!done)
/* The normal case is just to do the match once, with the default
value of match_limit. */
- else
+#if !defined NODFA
+ else if (all_use_dfa || use_dfa)
{
- count = pcre_exec(re, extra, (char *)bptr, len,
- start_offset, options | g_notempty, use_offsets, use_size_offsets);
+ int workspace[1000];
+ count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
+ options | g_notempty, use_offsets, use_size_offsets, workspace,
+ sizeof(workspace)/sizeof(int));
+ if (count == 0)
+ {
+ fprintf(outfile, "Matched, but too many subsidiary matches\n");
+ count = use_size_offsets/2;
+ }
}
+#endif
- if (count == 0)
+ else
{
- fprintf(outfile, "Matched, but too many substrings\n");
- count = use_size_offsets/3;
+ count = pcre_exec(re, extra, (char *)bptr, len,
+ start_offset, options | g_notempty, use_offsets, use_size_offsets);
+ if (count == 0)
+ {
+ fprintf(outfile, "Matched, but too many substrings\n");
+ count = use_size_offsets/3;
+ }
}
/* Matched */
@@ -1692,7 +1730,13 @@ while (!done)
else if (count == PCRE_ERROR_PARTIAL)
{
- fprintf(outfile, "Partial match\n");
+ fprintf(outfile, "Partial match");
+#if !defined NODFA
+ if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
+ fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
+ bptr + use_offsets[0]);
+#endif
+ fprintf(outfile, "\n");
break; /* Out of the /g loop */
}
@@ -1770,17 +1814,28 @@ while (!done)
if (posix || do_posix) regfree(&preg);
#endif
- if (re != NULL) free(re);
- if (extra != NULL) free(extra);
+ if (re != NULL) new_free(re);
+ if (extra != NULL) new_free(extra);
if (tables != NULL)
{
- free((void *)tables);
+ new_free((void *)tables);
setlocale(LC_CTYPE, "C");
}
}
if (infile == stdin) fprintf(outfile, "\n");
-return 0;
+
+EXIT:
+
+if (infile != NULL && infile != stdin) fclose(infile);
+if (outfile != NULL && outfile != stdout) fclose(outfile);
+
+free(buffer);
+free(dbuffer);
+free(pbuffer);
+free(offsets);
+
+return yield;
}
-/* End */
+/* End of pcretest.c */