summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZefram <zefram@fysh.org>2010-08-21 18:54:04 +0100
committerRafael Garcia-Suarez <rgs@consttype.org>2010-09-06 23:25:34 +0200
commit28ac2b49dea6847c95a32afde577935fec51650f (patch)
tree2af2b2eb45e3584e390fddcc564587e2c2286f4a
parent544cdeac5a054fa1c1b543769d0076fa6c3faf68 (diff)
downloadperl-28ac2b49dea6847c95a32afde577935fec51650f.tar.gz
function interface to parse Perl statement
yyparse() becomes reentrant. The yacc stack and related resources are allocated in yyparse(), rather than in lex_start(), and they are localised to yyparse(), preserving their values from any outer parser. yyparse() now takes a parameter which determines which production it will parse at the top level. New API function parse_fullstmt() uses this facility to parse just a single statement. The top-level single-statement production that is used for this then messes with the parser's head so that the parsing stops without seeing EOF, and any lookahead token seen after the statement is pushed back to the lexer.
-rw-r--r--MANIFEST1
-rw-r--r--embed.fnc7
-rw-r--r--ext/XS-APItest-KeywordRPN/KeywordRPN.xs26
-rw-r--r--ext/XS-APItest-KeywordRPN/t/swaptwostmts.t158
-rw-r--r--perl.c2
-rw-r--r--perly.c32
-rw-r--r--perly.y33
-rw-r--r--pod/perldiag.pod5
-rw-r--r--pp_ctl.c6
-rw-r--r--sv.c3
-rw-r--r--toke.c72
11 files changed, 312 insertions, 33 deletions
diff --git a/MANIFEST b/MANIFEST
index 4e56f448c5..4925ab3403 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -3318,6 +3318,7 @@ ext/XS-APItest-KeywordRPN/README XS::APItest::KeywordRPN extension
ext/XS-APItest-KeywordRPN/t/keyword_plugin.t test keyword plugin mechanism
ext/XS-APItest-KeywordRPN/t/multiline.t test plugin parsing across lines
ext/XS-APItest-KeywordRPN/t/stuff_svcur_bug.t test for a bug in lex_stuff_pvn
+ext/XS-APItest-KeywordRPN/t/swaptwostmts.t test recursive descent statement parsing
ext/XS-APItest/Makefile.PL XS::APItest extension
ext/XS-APItest/MANIFEST XS::APItest extension
ext/XS-APItest/notcore.c Test API functions when PERL_CORE is not defined
diff --git a/embed.fnc b/embed.fnc
index 63269f0f7e..ecb6e71f8c 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -622,6 +622,8 @@ AMpd |bool |lex_next_chunk |U32 flags
AMpd |I32 |lex_peek_unichar|U32 flags
AMpd |I32 |lex_read_unichar|U32 flags
AMpd |void |lex_read_space |U32 flags
+: Public parser API
+AMpd |OP* |parse_fullstmt |U32 flags
: Used in various files
Ap |void |op_null |NN OP* o
: FIXME. Used by Data::Alias
@@ -1326,8 +1328,9 @@ p |void |write_to_stderr|NN SV* msv
p |int |yyerror |NN const char *const s
: Used in perly.y, and by Data::Alias
EXp |int |yylex
+p |void |yyunlex
: Used in perl.c, pp_ctl.c
-p |int |yyparse
+p |int |yyparse |int gramtype
: Only used in scope.c
p |void |parser_free |NN const yy_parser *parser
#if defined(PERL_IN_TOKE_C)
@@ -2341,7 +2344,7 @@ s |void |start_force |int where
s |void |curmad |char slot|NULLOK SV *sv
# endif
Mp |int |madlex
-Mp |int |madparse
+Mp |int |madparse |int gramtype
#endif
#if !defined(HAS_SIGNBIT)
AMdnoP |int |Perl_signbit |NV f
diff --git a/ext/XS-APItest-KeywordRPN/KeywordRPN.xs b/ext/XS-APItest-KeywordRPN/KeywordRPN.xs
index a5dfcd9adc..6c622564ff 100644
--- a/ext/XS-APItest-KeywordRPN/KeywordRPN.xs
+++ b/ext/XS-APItest-KeywordRPN/KeywordRPN.xs
@@ -9,6 +9,7 @@
(SvFLAGS(sv) & (SVf_IOK|SVf_NOK|SVf_POK|SVp_IOK|SVp_NOK|SVp_POK)))
static SV *hintkey_rpn_sv, *hintkey_calcrpn_sv, *hintkey_stufftest_sv;
+static SV *hintkey_swaptwostmts_sv;
static int (*next_keyword_plugin)(pTHX_ char *, STRLEN, OP **);
/* low-level parser helpers */
@@ -171,6 +172,18 @@ static OP *THX_parse_keyword_stufftest(pTHX)
}
#define parse_keyword_stufftest() THX_parse_keyword_stufftest(aTHX)
+static OP *THX_parse_keyword_swaptwostmts(pTHX)
+{
+ OP *a, *b;
+ a = parse_fullstmt(0);
+ b = parse_fullstmt(0);
+ if(a && b)
+ PL_hints |= HINT_BLOCK_SCOPE;
+ /* should use append_list(), but that's not part of the public API */
+ return !a ? b : !b ? a : newLISTOP(OP_LINESEQ, 0, b, a);
+}
+#define parse_keyword_swaptwostmts() THX_parse_keyword_swaptwostmts(aTHX)
+
/* plugin glue */
static int THX_keyword_active(pTHX_ SV *hintkey_sv)
@@ -225,6 +238,11 @@ static int my_keyword_plugin(pTHX_
keyword_active(hintkey_stufftest_sv)) {
*op_ptr = parse_keyword_stufftest();
return KEYWORD_PLUGIN_STMT;
+ } else if(keyword_len == 12 &&
+ strnEQ(keyword_ptr, "swaptwostmts", 12) &&
+ keyword_active(hintkey_swaptwostmts_sv)) {
+ *op_ptr = parse_keyword_swaptwostmts();
+ return KEYWORD_PLUGIN_STMT;
} else {
return next_keyword_plugin(aTHX_
keyword_ptr, keyword_len, op_ptr);
@@ -238,6 +256,8 @@ BOOT:
hintkey_calcrpn_sv = newSVpvs_share("XS::APItest::KeywordRPN/calcrpn");
hintkey_stufftest_sv =
newSVpvs_share("XS::APItest::KeywordRPN/stufftest");
+ hintkey_swaptwostmts_sv =
+ newSVpvs_share("XS::APItest::KeywordRPN/swaptwostmts");
next_keyword_plugin = PL_keyword_plugin;
PL_keyword_plugin = my_keyword_plugin;
@@ -255,6 +275,9 @@ PPCODE:
} else if(sv_is_string(item) &&
strEQ(SvPVX(item), "stufftest")) {
keyword_enable(hintkey_stufftest_sv);
+ } else if(sv_is_string(item) &&
+ strEQ(SvPVX(item), "swaptwostmts")) {
+ keyword_enable(hintkey_swaptwostmts_sv);
} else {
croak("\"%s\" is not exported by the %s module",
SvPV_nolen(item), SvPV_nolen(ST(0)));
@@ -275,6 +298,9 @@ PPCODE:
} else if(sv_is_string(item) &&
strEQ(SvPVX(item), "stufftest")) {
keyword_disable(hintkey_stufftest_sv);
+ } else if(sv_is_string(item) &&
+ strEQ(SvPVX(item), "swaptwostmts")) {
+ keyword_disable(hintkey_swaptwostmts_sv);
} else {
croak("\"%s\" is not exported by the %s module",
SvPV_nolen(item), SvPV_nolen(ST(0)));
diff --git a/ext/XS-APItest-KeywordRPN/t/swaptwostmts.t b/ext/XS-APItest-KeywordRPN/t/swaptwostmts.t
new file mode 100644
index 0000000000..44e9e7aaae
--- /dev/null
+++ b/ext/XS-APItest-KeywordRPN/t/swaptwostmts.t
@@ -0,0 +1,158 @@
+use warnings;
+use strict;
+
+use Test::More tests => 22;
+
+BEGIN { $^H |= 0x20000; }
+
+my $t;
+
+$t = "";
+eval q{
+ use XS::APItest::KeywordRPN ();
+ $t .= "a";
+ swaptwostmts
+ $t .= "b";
+ $t .= "c";
+ $t .= "d";
+};
+isnt $@, "";
+
+$t = "";
+eval q{
+ use XS::APItest::KeywordRPN qw(swaptwostmts);
+ $t .= "a";
+ swaptwostmts
+ $t .= "b";
+ $t .= "c";
+ $t .= "d";
+};
+is $@, "";
+is $t, "acbd";
+
+$t = "";
+eval q{
+ use XS::APItest::KeywordRPN qw(swaptwostmts);
+ $t .= "a";
+ swaptwostmts
+ if(1) { $t .= "b"; }
+ $t .= "c";
+ $t .= "d";
+};
+is $@, "";
+is $t, "acbd";
+
+$t = "";
+eval q{
+ use XS::APItest::KeywordRPN qw(swaptwostmts);
+ $t .= "a";
+ swaptwostmts
+ $t .= "b";
+ if(1) { $t .= "c"; }
+ $t .= "d";
+};
+is $@, "";
+is $t, "acbd";
+
+$t = "";
+eval q{
+ use XS::APItest::KeywordRPN qw(swaptwostmts);
+ $t .= "a";
+ swaptwostmts
+ $t .= "b";
+ foreach(1..3) {
+ $t .= "c";
+ swaptwostmts
+ $t .= "d";
+ $t .= "e";
+ $t .= "f";
+ }
+ $t .= "g";
+};
+is $@, "";
+is $t, "acedfcedfcedfbg";
+
+$t = "";
+eval q{
+ use XS::APItest::KeywordRPN qw(swaptwostmts);
+ $t .= "a";
+ swaptwostmts
+ $t .= "b";
+ $t .= "c";
+};
+is $@, "";
+is $t, "acb";
+
+$t = "";
+eval q{
+ use XS::APItest::KeywordRPN qw(swaptwostmts);
+ $t .= "a";
+ swaptwostmts
+ $t .= "b";
+ $t .= "c"
+};
+is $@, "";
+is $t, "acb";
+
+$t = "";
+eval q{
+ use XS::APItest::KeywordRPN qw(swaptwostmts);
+ $t .= "a";
+ swaptwostmts
+ $t .= "b"
+};
+isnt $@, "";
+
+$t = "";
+eval q{
+ use XS::APItest::KeywordRPN qw(swaptwostmts);
+ $_ = $t;
+ $_ .= "a";
+ swaptwostmts
+ if(1) { $_ .= "b"; }
+ tr/a-z/A-Z/;
+ $_ .= "d";
+ $t = $_;
+};
+is $@, "";
+is $t, "Abd";
+
+$t = "";
+eval q{
+ use XS::APItest::KeywordRPN qw(swaptwostmts);
+ sub add_to_t { $t .= $_[0]; }
+ add_to_t "a";
+ swaptwostmts
+ if(1) { add_to_t "b"; }
+ add_to_t "c";
+ add_to_t "d";
+};
+is $@, "";
+is $t, "acbd";
+
+$t = "";
+eval q{
+ use XS::APItest::KeywordRPN qw(swaptwostmts);
+ { $t .= "a"; }
+ swaptwostmts
+ if(1) { { $t .= "b"; } }
+ { $t .= "c"; }
+ { $t .= "d"; }
+};
+is $@, "";
+is $t, "acbd";
+
+$t = "";
+eval q{
+ use XS::APItest::KeywordRPN qw(swaptwostmts);
+ no warnings "void";
+ "@{[ $t .= 'a' ]}";
+ swaptwostmts
+ if(1) { "@{[ $t .= 'b' ]}"; }
+ "@{[ $t .= 'c' ]}";
+ "@{[ $t .= 'd' ]}";
+};
+is $@, "";
+is $t, "acbd";
+
+1;
diff --git a/perl.c b/perl.c
index e0b9fa62ff..a04cfd6302 100644
--- a/perl.c
+++ b/perl.c
@@ -2168,7 +2168,7 @@ S_parse_body(pTHX_ char **env, XSINIT_t xsinit)
/* now parse the script */
SETERRNO(0,SS_NORMAL);
- if (yyparse() || PL_parser->error_count) {
+ if (yyparse(GRAMPROG) || PL_parser->error_count) {
if (PL_minus_c)
Perl_croak(aTHX_ "%s had compilation errors.\n", PL_origfilename);
else {
diff --git a/perly.c b/perly.c
index 3624ca3c04..3edf57da86 100644
--- a/perly.c
+++ b/perly.c
@@ -34,6 +34,9 @@ typedef unsigned short int yytype_uint16;
typedef short int yytype_int16;
typedef signed char yysigned_char;
+/* YYINITDEPTH -- initial size of the parser's stacks. */
+#define YYINITDEPTH 200
+
#ifdef DEBUGGING
# define YYDEBUG 1
#else
@@ -195,7 +198,7 @@ S_clear_yystack(pTHX_ const yy_parser *parser)
yy_stack_frame *ps = parser->ps;
int i = 0;
- if (!parser->stack || ps == parser->stack)
+ if (!parser->stack)
return;
YYDPRINTF ((Perl_debug_log, "clearing the parse stack\n"));
@@ -311,6 +314,8 @@ S_clear_yystack(pTHX_ const yy_parser *parser)
SvREFCNT_dec(ps->compcv);
ps--;
}
+
+ Safefree(parser->stack);
}
@@ -320,9 +325,9 @@ S_clear_yystack(pTHX_ const yy_parser *parser)
int
#ifdef PERL_IN_MADLY_C
-Perl_madparse (pTHX)
+Perl_madparse (pTHX_ int gramtype)
#else
-Perl_yyparse (pTHX)
+Perl_yyparse (pTHX_ int gramtype)
#endif
{
dVAR;
@@ -346,16 +351,31 @@ Perl_yyparse (pTHX)
#ifndef PERL_IN_MADLY_C
# ifdef PERL_MAD
if (PL_madskills)
- return madparse();
+ return madparse(gramtype);
# endif
#endif
YYDPRINTF ((Perl_debug_log, "Starting parse\n"));
parser = PL_parser;
- ps = parser->ps;
- ENTER; /* force parser stack cleanup before we return */
+ ENTER; /* force parser state cleanup/restoration before we return */
+ SAVEPPTR(parser->yylval.pval);
+ SAVEINT(parser->yychar);
+ SAVEINT(parser->yyerrstatus);
+ SAVEINT(parser->stack_size);
+ SAVEINT(parser->yylen);
+ SAVEVPTR(parser->stack);
+ SAVEVPTR(parser->ps);
+
+ /* initialise state for this parse */
+ parser->yychar = gramtype;
+ parser->yyerrstatus = 0;
+ parser->stack_size = YYINITDEPTH;
+ parser->yylen = 0;
+ Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
+ ps = parser->ps = parser->stack;
+ ps->state = 0;
SAVEDESTRUCTOR_X(S_clear_yystack, parser);
/*------------------------------------------------------------.
diff --git a/perly.y b/perly.y
index ebcf5e7878..26f593a664 100644
--- a/perly.y
+++ b/perly.y
@@ -49,7 +49,7 @@
/* FIXME for MAD - is the new mintro on while and until important? */
-%start prog
+%start grammar
%union {
I32 ival; /* __DEFAULT__ (marker for regen_perly.pl;
@@ -69,6 +69,8 @@
#endif
}
+%token <ival> GRAMPROG GRAMFULLSTMT
+
%token <i_tkval> '{' '}' '[' ']' '-' '+' '$' '@' '%' '*' '&' ';'
%token <opval> WORD METHOD FUNCMETH THING PMFUNC PRIVATEREF
@@ -85,13 +87,12 @@
%token <i_tkval> LOCAL MY MYSUB REQUIRE
%token <i_tkval> COLONATTR
-%type <ival> prog progstart remember mremember
+%type <ival> grammar prog progstart remember mremember
%type <ival> startsub startanonsub startformsub
/* FIXME for MAD - are these two ival? */
%type <ival> mydefsv mintro
-%type <opval> decl format subrout mysubrout package use peg
-
+%type <opval> fullstmt decl format subrout mysubrout package use peg
%type <opval> block package_block mblock lineseq line loop cond else
%type <opval> expr term subscripted scalar ary hsh arylen star amper sideff
%type <opval> argexpr nexpr texpr iexpr mexpr mnexpr miexpr
@@ -137,6 +138,18 @@
%% /* RULES */
+/* Top-level choice of what kind of thing yyparse was called to parse */
+grammar : GRAMPROG prog
+ { $$ = $2; }
+ | GRAMFULLSTMT fullstmt
+ {
+ PL_eval_root = $2;
+ $$ = 0;
+ yyunlex();
+ parser->yychar = YYEOF;
+ }
+ ;
+
/* The whole program */
prog : progstart
/*CONTINUED*/ lineseq
@@ -200,7 +213,17 @@ lineseq : /* NULL */
}
;
-/* A "line" in the program */
+/* A statement, or "line", in the program */
+fullstmt: decl
+ { $$ = $1; }
+ | line
+ {
+ PL_pad_reset_pending = TRUE;
+ $$ = $1;
+ }
+ ;
+
+/* A non-declaration statement */
line : label cond
{ $$ = newSTATEOP(0, PVAL($1), $2);
TOKEN_GETMAD($1,((LISTOP*)$$)->op_first,'L'); }
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index d7c0970f41..fc146a0dd3 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -3478,6 +3478,11 @@ to even) byte length.
(P) The lexer got into a bad state while processing a case modifier.
+=item Parsing code internal error (%s)
+
+(F) Parsing code supplied by an extension violated the parser's API in
+a detectable way.
+
=item Pattern subroutine nesting without pos change exceeded limit in regex; marked by <-- HERE in m/%s/
(F) You used a pattern that uses too many nested subpattern calls without
diff --git a/pp_ctl.c b/pp_ctl.c
index 8c0c52018a..308ccca90b 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -3039,7 +3039,7 @@ Perl_find_runcv(pTHX_ U32 *db_seqp)
* 3: yyparse() died
*/
STATIC int
-S_try_yyparse(pTHX)
+S_try_yyparse(pTHX_ int gramtype)
{
int ret;
dJMPENV;
@@ -3048,7 +3048,7 @@ S_try_yyparse(pTHX)
JMPENV_PUSH(ret);
switch (ret) {
case 0:
- ret = yyparse() ? 1 : 0;
+ ret = yyparse(gramtype) ? 1 : 0;
break;
case 3:
break;
@@ -3137,7 +3137,7 @@ S_doeval(pTHX_ int gimme, OP** startop, CV* outside, U32 seq)
/* note that yyparse() may raise an exception, e.g. C<BEGIN{die}>,
* so honour CATCH_GET and trap it here if necessary */
- yystatus = (!in_require && CATCH_GET) ? S_try_yyparse(aTHX) : yyparse();
+ yystatus = (!in_require && CATCH_GET) ? S_try_yyparse(aTHX_ GRAMPROG) : yyparse(GRAMPROG);
if (yystatus || PL_parser->error_count || !PL_eval_root) {
SV **newsp; /* Used by POPBLOCK. */
diff --git a/sv.c b/sv.c
index cd40d7737d..136c65bba6 100644
--- a/sv.c
+++ b/sv.c
@@ -10752,9 +10752,6 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param)
Newxz(parser, 1, yy_parser);
ptr_table_store(PL_ptr_table, proto, parser);
- parser->yyerrstatus = 0;
- parser->yychar = YYEMPTY; /* Cause a token to be read. */
-
/* XXX these not yet duped */
parser->old_parser = NULL;
parser->stack = NULL;
diff --git a/toke.c b/toke.c
index 42f0103281..6d4d01493c 100644
--- a/toke.c
+++ b/toke.c
@@ -45,9 +45,6 @@ Individual members of C<PL_parser> have their own documentation.
#define pl_yylval (PL_parser->yylval)
-/* YYINITDEPTH -- initial size of the parser's stacks. */
-#define YYINITDEPTH 200
-
/* XXX temporary backwards compatibility */
#define PL_lex_brackets (PL_parser->lex_brackets)
#define PL_lex_brackstack (PL_parser->lex_brackstack)
@@ -675,13 +672,9 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
parser->old_parser = oparser = PL_parser;
PL_parser = parser;
- Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
- parser->ps = parser->stack;
- parser->stack_size = YYINITDEPTH;
-
- parser->stack->state = 0;
- parser->yyerrstatus = 0;
- parser->yychar = YYEMPTY; /* Cause a token to be read. */
+ parser->stack = NULL;
+ parser->ps = NULL;
+ parser->stack_size = 0;
/* on scope exit, free this parser and restore any outer one */
SAVEPARSER(parser);
@@ -750,7 +743,6 @@ Perl_parser_free(pTHX_ const yy_parser *parser)
PerlIO_close(parser->rsfp);
SvREFCNT_dec(parser->rsfp_filters);
- Safefree(parser->stack);
Safefree(parser->lex_brackstack);
Safefree(parser->lex_casestack);
PL_parser = parser->old_parser;
@@ -1929,6 +1921,17 @@ S_force_next(pTHX_ I32 type)
#endif
}
+void
+Perl_yyunlex(pTHX)
+{
+ if (PL_parser->yychar != YYEMPTY) {
+ start_force(-1);
+ NEXTVAL_NEXTTOKE = PL_parser->yylval;
+ force_next(PL_parser->yychar);
+ PL_parser->yychar = YYEMPTY;
+ }
+}
+
STATIC SV *
S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
{
@@ -3953,7 +3956,7 @@ Perl_madlex(pTHX)
PL_thismad = 0;
/* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
- if (PL_pending_ident)
+ if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
return S_pending_ident(aTHX);
/* previous token ate up our whitespace? */
@@ -4212,7 +4215,7 @@ Perl_yylex(pTHX)
SvREFCNT_dec(tmp);
} );
/* check if there's an identifier for us to look at */
- if (PL_pending_ident)
+ if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
return REPORT(S_pending_ident(aTHX));
/* no identifier pending identification */
@@ -13940,6 +13943,49 @@ Perl_keyword_plugin_standard(pTHX_
}
/*
+=for apidoc Amx|OP *|parse_fullstmt|U32 flags
+
+Parse a single complete Perl statement. This may be a normal imperative
+statement, including optional label, or a declaration that has
+compile-time effect. It is up to the caller to ensure that the dynamic
+parser state (L</PL_parser> et al) is correctly set to reflect the source
+of the code to be parsed and the lexical context for the statement.
+
+The op tree representing the statement is returned. This may be a
+null pointer if the statement is null, for example if it was actually
+a subroutine definition (which has compile-time side effects). If not
+null, it will be the result of a L</newSTATEOP> call, normally including
+a C<nextstate> or equivalent op.
+
+If an error occurs in parsing or compilation, in most cases a valid op
+tree (most likely null) is returned anyway. The error is reflected in
+the parser state, normally resulting in a single exception at the top
+level of parsing which covers all the compilation errors that occurred.
+Some compilation errors, however, will throw an exception immediately.
+
+The I<flags> parameter is reserved for future use, and must always
+be zero.
+
+=cut
+*/
+
+OP *
+Perl_parse_fullstmt(pTHX_ U32 flags)
+{
+ OP *fullstmtop;
+ if (flags)
+ Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
+ ENTER;
+ SAVEVPTR(PL_eval_root);
+ PL_eval_root = NULL;
+ if(yyparse(GRAMFULLSTMT) && !PL_parser->error_count)
+ qerror(Perl_mess(aTHX_ "Parse error"));
+ fullstmtop = PL_eval_root;
+ LEAVE;
+ return fullstmtop;
+}
+
+/*
* Local variables:
* c-indentation-style: bsd
* c-basic-offset: 4