summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFather Chrysostomos <sprout@cpan.org>2013-09-20 00:33:49 -0700
committerFather Chrysostomos <sprout@cpan.org>2013-09-20 01:19:07 -0700
commit4aaee9b8df62149cfed9099d3b9422c6d397c274 (patch)
treef698b358a7ca2fa5002f1b18c2499f90b5a90d5e
parent364b4e90ddaca97dd7233f3266dbaa36aabbe6f5 (diff)
downloadperl-4aaee9b8df62149cfed9099d3b9422c6d397c274.tar.gz
Fix parser buffer corruption with multiline *{...}
Since commit a49b10d0a, it has been possible for scan_ident in toke.c to reallocate the parser’s buffer (SvPVX(PL_linestr)) when scanning for multiline whitespace. For the sake of those cases where it finds an arbitrary expression, not just an identifier, it records a pointer to the first opening brace, which it returns to the parser after finding out that there is indeed an expression. That pointer was not being updated when the buffer was being allocated. The solution is to record an offset, rather than a pointer, of the opening brace relative to the beginning of the current line of input. This one-liner: $ ./miniperl -e '*{' -e ' XS::APItest::gv_fetchmeth_type()' -e '}' was giving me: Unrecognized character \x80; marked by <-- HERE after 2<-- HERE near column 24 at -e line 2. (There were nine nulls before the 2, but git stripped them out.)
-rw-r--r--t/op/lex.t19
-rw-r--r--toke.c10
2 files changed, 22 insertions, 7 deletions
diff --git a/t/op/lex.t b/t/op/lex.t
index 43b4107b99..b33f0efc99 100644
--- a/t/op/lex.t
+++ b/t/op/lex.t
@@ -2,9 +2,9 @@
use strict;
use warnings;
-require './test.pl';
+BEGIN { chdir 't'; require './test.pl'; }
-plan(tests => 7);
+plan(tests => 8);
{
no warnings 'deprecated';
@@ -73,3 +73,18 @@ fresh_perl_is(
{ stderr => 1 },
'no crash when charnames cannot load and %^H holds string reference'
);
+
+# not fresh_perl_is, as it seems to hide the error
+is runperl(
+ nolib => 1, # -Ilib may also hide the error
+ progs => [
+ '*{',
+ ' XS::APItest::gv_fetchmeth_type()',
+ '}'
+ ],
+ stderr => 1,
+ ),
+ "Undefined subroutine &XS::APItest::gv_fetchmeth_type called at -e line "
+ ."2.\n",
+ 'no buffer corruption with multiline *{...expr...}'
+;
diff --git a/toke.c b/toke.c
index 9d75cc0ffe..14d9b983a3 100644
--- a/toke.c
+++ b/toke.c
@@ -9371,7 +9371,7 @@ STATIC char *
S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
{
dVAR;
- char *bracket = NULL;
+ SSize_t bracket = -1;
char funny = *s++;
char *d = dest;
char * const e = d + destlen - 3; /* two-character token, ending NUL */
@@ -9415,7 +9415,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
}
/* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
if (*s == '{') {
- bracket = s;
+ bracket = s - SvPVX(PL_linestr);
s++;
orig_copline = CopLINE(PL_curcop);
if (s < PL_bufend && isSPACE(*s)) {
@@ -9466,9 +9466,9 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
/* Warn about ambiguous code after unary operators if {...} notation isn't
used. There's no difference in ambiguity; it's merely a heuristic
about when not to warn. */
- else if (ck_uni && !bracket)
+ else if (ck_uni && bracket == -1)
check_uni();
- if (bracket) {
+ if (bracket != -1) {
/* If we were processing {...} notation then... */
if (isIDFIRST_lazy_if(d,is_utf8)) {
/* if it starts as a valid identifier, assume that it is one.
@@ -9550,7 +9550,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
else {
/* Didn't find the closing } at the point we expected, so restore
state such that the next thing to process is the opening { and */
- s = bracket; /* let the parser handle it */
+ s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
CopLINE_set(PL_curcop, orig_copline);
*dest = '\0';
}