summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog21
-rwxr-xr-xRunTest9
-rw-r--r--doc/html/pcrestack.html54
-rw-r--r--doc/pcre.txt41
-rw-r--r--doc/pcrestack.364
-rw-r--r--pcre_exec.c14
-rw-r--r--pcre_study.c2
-rw-r--r--pcretest.c6
8 files changed, 136 insertions, 75 deletions
diff --git a/ChangeLog b/ChangeLog
index 6135e56..aec68e9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -13,24 +13,27 @@ Version 8.0 02 Jul-08
2. Updated the Unicode datatables to Unicode 5.1.0. This adds yet more
scripts.
-
+
3. Change 12 for 7.7 introduced a bug in pcre_study() when a pattern contained
a group with a zero qualifier. The result of the study could be incorrect,
- or the function might crash, depending on the pattern.
-
-4. Caseless matching was not working for non-ASCII characters in back
+ or the function might crash, depending on the pattern.
+
+4. Caseless matching was not working for non-ASCII characters in back
references. For example, /(\x{de})\1/8i was not matching \x{de}\x{fe}.
- It now works when Unicode Property Support is available.
-
+ It now works when Unicode Property Support is available.
+
5. In pcretest, an escape such as \x{de} in the data was always generating
a UTF-8 string, even in non-UTF-8 mode. Now it generates a single byte in
non-UTF-8 mode. If the value is greater than 255, it gives a warning about
- truncation.
-
-6. Minor bugfix in pcrecpp.cc (change "" == ... to NULL == ...).
+ truncation.
+
+6. Minor bugfix in pcrecpp.cc (change "" == ... to NULL == ...).
7. Added two (int) casts to pcregrep when printing the difference of two
pointers, in case they are 64-bit values.
+
+8. Added comments about Mac OS X stack usage to the pcrestack man page and to
+ test 2 if it fails.
Version 7.7 07-May-08
diff --git a/RunTest b/RunTest
index 5ae334a..24f4c15 100755
--- a/RunTest
+++ b/RunTest
@@ -144,7 +144,14 @@ if [ $do2 = yes ] ; then
if [ $? = 0 ] ; then
$cf $testdata/testoutput2 testtry
if [ $? != 0 ] ; then exit 1; fi
- else exit 1
+ else
+ echo " "
+ echo "** Test 2 requires a lot of stack. If it has crashed with a"
+ echo "** segmentation fault, it may be that you do not have enough"
+ echo "** stack available by default. Please see the 'pcrestack' man"
+ echo "** page for a discussion of PCRE's stack usage."
+ echo " "
+ exit 1
fi
echo "OK"
fi
diff --git a/doc/html/pcrestack.html b/doc/html/pcrestack.html
index 2cc7d26..6048828 100644
--- a/doc/html/pcrestack.html
+++ b/doc/html/pcrestack.html
@@ -77,6 +77,9 @@ This example shows that one way of avoiding stack problems when matching long
subject strings is to write repeated parenthesized subpatterns to match more
than one character whenever possible.
</P>
+<br><b>
+Compiling PCRE to use heap instead of stack
+</b><br>
<P>
In environments where stack memory is constrained, you might want to compile
PCRE to use heap memory instead of stack for remembering back-up points. This
@@ -91,6 +94,30 @@ cause PCRE to use your own functions. Since the block sizes are always the
same, and are always freed in reverse order, it may be possible to implement
customized memory handlers that are more efficient than the standard functions.
</P>
+<br><b>
+Limiting PCRE's stack usage
+</b><br>
+<P>
+PCRE has an internal counter that can be used to limit the depth of recursion,
+and thus cause <b>pcre_exec()</b> to give an error code before it runs out of
+stack. By default, the limit is very large, and unlikely ever to operate. It
+can be changed when PCRE is built, and it can also be set when
+<b>pcre_exec()</b> is called. For details of these interfaces, see the
+<a href="pcrebuild.html"><b>pcrebuild</b></a>
+and
+<a href="pcreapi.html"><b>pcreapi</b></a>
+documentation.
+</P>
+<P>
+As a very rough rule of thumb, you should reckon on about 500 bytes per
+recursion. Thus, if you want to limit your stack usage to 8Mb, you
+should set the limit at 16000 recursions. A 64Mb stack, on the other hand, can
+support around 128000 recursions. The <b>pcretest</b> test program has a command
+line option (<b>-S</b>) that can be used to increase the size of its stack.
+</P>
+<br><b>
+Changing stack size in Unix-like systems
+</b><br>
<P>
In Unix-like environments, there is not often a problem with the stack unless
very long strings are involved, though the default limit on stack size varies
@@ -112,23 +139,14 @@ This reads the current limits (soft and hard) using <b>getrlimit()</b>, then
attempts to increase the soft limit to 100Mb using <b>setrlimit()</b>. You must
do this before calling <b>pcre_exec()</b>.
</P>
+<br><b>
+Changing stack size in Mac OS X
+</b><br>
<P>
-PCRE has an internal counter that can be used to limit the depth of recursion,
-and thus cause <b>pcre_exec()</b> to give an error code before it runs out of
-stack. By default, the limit is very large, and unlikely ever to operate. It
-can be changed when PCRE is built, and it can also be set when
-<b>pcre_exec()</b> is called. For details of these interfaces, see the
-<a href="pcrebuild.html"><b>pcrebuild</b></a>
-and
-<a href="pcreapi.html"><b>pcreapi</b></a>
-documentation.
-</P>
-<P>
-As a very rough rule of thumb, you should reckon on about 500 bytes per
-recursion. Thus, if you want to limit your stack usage to 8Mb, you
-should set the limit at 16000 recursions. A 64Mb stack, on the other hand, can
-support around 128000 recursions. The <b>pcretest</b> test program has a command
-line option (<b>-S</b>) that can be used to increase the size of its stack.
+Using <b>setrlimit()</b>, as described above, should also work on Mac OS X. It
+is also possible to set a stack size when linking a program. There is a
+discussion about stack sizes in Mac OS X at this web site:
+<a href="http://developer.apple.com/qa/qa2005/qa1419.html">http://developer.apple.com/qa/qa2005/qa1419.html.</a>
</P>
<br><b>
AUTHOR
@@ -145,9 +163,9 @@ Cambridge CB2 3QH, England.
REVISION
</b><br>
<P>
-Last updated: 05 June 2007
+Last updated: 09 July 2008
<br>
-Copyright &copy; 1997-2007 University of Cambridge.
+Copyright &copy; 1997-2008 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
diff --git a/doc/pcre.txt b/doc/pcre.txt
index 1a328ce..f9a76b1 100644
--- a/doc/pcre.txt
+++ b/doc/pcre.txt
@@ -6536,6 +6536,8 @@ PCRE DISCUSSION OF STACK USAGE
ing long subject strings is to write repeated parenthesized subpatterns
to match more than one character whenever possible.
+ Compiling PCRE to use heap instead of stack
+
In environments where stack memory is constrained, you might want to
compile PCRE to use heap memory instead of stack for remembering back-
up points. This makes it run a lot more slowly, however. Details of how
@@ -6548,6 +6550,24 @@ PCRE DISCUSSION OF STACK USAGE
freed in reverse order, it may be possible to implement customized mem-
ory handlers that are more efficient than the standard functions.
+ Limiting PCRE's stack usage
+
+ PCRE has an internal counter that can be used to limit the depth of
+ recursion, and thus cause pcre_exec() to give an error code before it
+ runs out of stack. By default, the limit is very large, and unlikely
+ ever to operate. It can be changed when PCRE is built, and it can also
+ be set when pcre_exec() is called. For details of these interfaces, see
+ the pcrebuild and pcreapi documentation.
+
+ As a very rough rule of thumb, you should reckon on about 500 bytes per
+ recursion. Thus, if you want to limit your stack usage to 8Mb, you
+ should set the limit at 16000 recursions. A 64Mb stack, on the other
+ hand, can support around 128000 recursions. The pcretest test program
+ has a command line option (-S) that can be used to increase the size of
+ its stack.
+
+ Changing stack size in Unix-like systems
+
In Unix-like environments, there is not often a problem with the stack
unless very long strings are involved, though the default limit on
stack size varies from system to system. Values from 8Mb to 64Mb are
@@ -6568,19 +6588,12 @@ PCRE DISCUSSION OF STACK USAGE
attempts to increase the soft limit to 100Mb using setrlimit(). You
must do this before calling pcre_exec().
- PCRE has an internal counter that can be used to limit the depth of
- recursion, and thus cause pcre_exec() to give an error code before it
- runs out of stack. By default, the limit is very large, and unlikely
- ever to operate. It can be changed when PCRE is built, and it can also
- be set when pcre_exec() is called. For details of these interfaces, see
- the pcrebuild and pcreapi documentation.
+ Changing stack size in Mac OS X
- As a very rough rule of thumb, you should reckon on about 500 bytes per
- recursion. Thus, if you want to limit your stack usage to 8Mb, you
- should set the limit at 16000 recursions. A 64Mb stack, on the other
- hand, can support around 128000 recursions. The pcretest test program
- has a command line option (-S) that can be used to increase the size of
- its stack.
+ Using setrlimit(), as described above, should also work on Mac OS X. It
+ is also possible to set a stack size when linking a program. There is a
+ discussion about stack sizes in Mac OS X at this web site:
+ http://developer.apple.com/qa/qa2005/qa1419.html.
AUTHOR
@@ -6592,8 +6605,8 @@ AUTHOR
REVISION
- Last updated: 05 June 2007
- Copyright (c) 1997-2007 University of Cambridge.
+ Last updated: 09 July 2008
+ Copyright (c) 1997-2008 University of Cambridge.
------------------------------------------------------------------------------
diff --git a/doc/pcrestack.3 b/doc/pcrestack.3
index 7e9bfc9..845425d 100644
--- a/doc/pcrestack.3
+++ b/doc/pcrestack.3
@@ -64,7 +64,10 @@ stack usage.
This example shows that one way of avoiding stack problems when matching long
subject strings is to write repeated parenthesized subpatterns to match more
than one character whenever possible.
-.P
+.
+.SS "Compiling PCRE to use heap instead of stack"
+.rs
+.sp
In environments where stack memory is constrained, you might want to compile
PCRE to use heap memory instead of stack for remembering back-up points. This
makes it run a lot more slowly, however. Details of how to do this are given in
@@ -79,7 +82,33 @@ point to \fBmalloc()\fP and \fBfree()\fP, but you can replace the pointers to
cause PCRE to use your own functions. Since the block sizes are always the
same, and are always freed in reverse order, it may be possible to implement
customized memory handlers that are more efficient than the standard functions.
+.
+.SS "Limiting PCRE's stack usage"
+.rs
+.sp
+PCRE has an internal counter that can be used to limit the depth of recursion,
+and thus cause \fBpcre_exec()\fP to give an error code before it runs out of
+stack. By default, the limit is very large, and unlikely ever to operate. It
+can be changed when PCRE is built, and it can also be set when
+\fBpcre_exec()\fP is called. For details of these interfaces, see the
+.\" HREF
+\fBpcrebuild\fP
+.\"
+and
+.\" HREF
+\fBpcreapi\fP
+.\"
+documentation.
.P
+As a very rough rule of thumb, you should reckon on about 500 bytes per
+recursion. Thus, if you want to limit your stack usage to 8Mb, you
+should set the limit at 16000 recursions. A 64Mb stack, on the other hand, can
+support around 128000 recursions. The \fBpcretest\fP test program has a command
+line option (\fB-S\fP) that can be used to increase the size of its stack.
+.
+.SS "Changing stack size in Unix-like systems"
+.rs
+.sp
In Unix-like environments, there is not often a problem with the stack unless
very long strings are involved, though the default limit on stack size varies
from system to system. Values from 8Mb to 64Mb are common. You can find your
@@ -99,26 +128,17 @@ limit on stack size by code such as this:
This reads the current limits (soft and hard) using \fBgetrlimit()\fP, then
attempts to increase the soft limit to 100Mb using \fBsetrlimit()\fP. You must
do this before calling \fBpcre_exec()\fP.
-.P
-PCRE has an internal counter that can be used to limit the depth of recursion,
-and thus cause \fBpcre_exec()\fP to give an error code before it runs out of
-stack. By default, the limit is very large, and unlikely ever to operate. It
-can be changed when PCRE is built, and it can also be set when
-\fBpcre_exec()\fP is called. For details of these interfaces, see the
-.\" HREF
-\fBpcrebuild\fP
-.\"
-and
-.\" HREF
-\fBpcreapi\fP
+.
+.SS "Changing stack size in Mac OS X"
+.rs
+.sp
+Using \fBsetrlimit()\fP, as described above, should also work on Mac OS X. It
+is also possible to set a stack size when linking a program. There is a
+discussion about stack sizes in Mac OS X at this web site:
+.\" HTML <a href="http://developer.apple.com/qa/qa2005/qa1419.html">
+.\" </a>
+http://developer.apple.com/qa/qa2005/qa1419.html.
.\"
-documentation.
-.P
-As a very rough rule of thumb, you should reckon on about 500 bytes per
-recursion. Thus, if you want to limit your stack usage to 8Mb, you
-should set the limit at 16000 recursions. A 64Mb stack, on the other hand, can
-support around 128000 recursions. The \fBpcretest\fP test program has a command
-line option (\fB-S\fP) that can be used to increase the size of its stack.
.
.
.SH AUTHOR
@@ -135,6 +155,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 05 June 2007
-Copyright (c) 1997-2007 University of Cambridge.
+Last updated: 09 July 2008
+Copyright (c) 1997-2008 University of Cambridge.
.fi
diff --git a/pcre_exec.c b/pcre_exec.c
index 008b950..0e69715 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -168,29 +168,29 @@ if ((ims & PCRE_CASELESS) != 0)
#ifdef SUPPORT_UCP
if (md->utf8)
{
- USPTR endptr = eptr + length;
+ USPTR endptr = eptr + length;
while (eptr < endptr)
{
- int c, d;
+ int c, d;
GETCHARINC(c, eptr);
GETCHARINC(d, p);
if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
- }
- }
+ }
+ }
else
#endif
#endif
/* The same code works when not in UTF-8 mode and in UTF-8 mode when there
is no UCP support. */
-
+
while (length-- > 0)
{ if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
}
-
+
/* In the caseful case, we can just compare the bytes, whether or not we
are in UTF-8 mode. */
-
+
else
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
diff --git a/pcre_study.c b/pcre_study.c
index 0136e1e..f3a7dd4 100644
--- a/pcre_study.c
+++ b/pcre_study.c
@@ -220,7 +220,7 @@ do
/* SKIPZERO skips the bracket. */
case OP_SKIPZERO:
- tcode++;
+ tcode++;
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
diff --git a/pcretest.c b/pcretest.c
index 31d0357..6dadb39 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -1807,18 +1807,18 @@ while (!done)
unsigned char buff8[8];
int ii, utn;
if (use_utf8)
- {
+ {
utn = ord2utf8(c, buff8);
for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
c = buff8[ii]; /* Last byte */
}
else
{
- if (c > 255)
+ if (c > 255)
fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
"UTF-8 mode is not enabled.\n"
"** Truncation will probably give the wrong result.\n", c);
- }
+ }
p = pt + 1;
break;
}