diff options
author | Zefram <zefram@fysh.org> | 2010-08-21 18:54:04 +0100 |
---|---|---|
committer | Rafael Garcia-Suarez <rgs@consttype.org> | 2010-09-06 23:25:34 +0200 |
commit | 28ac2b49dea6847c95a32afde577935fec51650f (patch) | |
tree | 2af2b2eb45e3584e390fddcc564587e2c2286f4a | |
parent | 544cdeac5a054fa1c1b543769d0076fa6c3faf68 (diff) | |
download | perl-28ac2b49dea6847c95a32afde577935fec51650f.tar.gz |
function interface to parse Perl statement
yyparse() becomes reentrant. The yacc stack and related resources
are allocated in yyparse(), rather than in lex_start(), and they are
localised to yyparse(), preserving their values from any outer parser.
yyparse() now takes a parameter which determines which production it
will parse at the top level. New API function parse_fullstmt() uses this
facility to parse just a single statement. The top-level single-statement
production that is used for this then messes with the parser's head so
that the parsing stops without seeing EOF, and any lookahead token seen
after the statement is pushed back to the lexer.
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | embed.fnc | 7 | ||||
-rw-r--r-- | ext/XS-APItest-KeywordRPN/KeywordRPN.xs | 26 | ||||
-rw-r--r-- | ext/XS-APItest-KeywordRPN/t/swaptwostmts.t | 158 | ||||
-rw-r--r-- | perl.c | 2 | ||||
-rw-r--r-- | perly.c | 32 | ||||
-rw-r--r-- | perly.y | 33 | ||||
-rw-r--r-- | pod/perldiag.pod | 5 | ||||
-rw-r--r-- | pp_ctl.c | 6 | ||||
-rw-r--r-- | sv.c | 3 | ||||
-rw-r--r-- | toke.c | 72 |
11 files changed, 312 insertions, 33 deletions
@@ -3318,6 +3318,7 @@ ext/XS-APItest-KeywordRPN/README XS::APItest::KeywordRPN extension ext/XS-APItest-KeywordRPN/t/keyword_plugin.t test keyword plugin mechanism ext/XS-APItest-KeywordRPN/t/multiline.t test plugin parsing across lines ext/XS-APItest-KeywordRPN/t/stuff_svcur_bug.t test for a bug in lex_stuff_pvn +ext/XS-APItest-KeywordRPN/t/swaptwostmts.t test recursive descent statement parsing ext/XS-APItest/Makefile.PL XS::APItest extension ext/XS-APItest/MANIFEST XS::APItest extension ext/XS-APItest/notcore.c Test API functions when PERL_CORE is not defined @@ -622,6 +622,8 @@ AMpd |bool |lex_next_chunk |U32 flags AMpd |I32 |lex_peek_unichar|U32 flags AMpd |I32 |lex_read_unichar|U32 flags AMpd |void |lex_read_space |U32 flags +: Public parser API +AMpd |OP* |parse_fullstmt |U32 flags : Used in various files Ap |void |op_null |NN OP* o : FIXME. Used by Data::Alias @@ -1326,8 +1328,9 @@ p |void |write_to_stderr|NN SV* msv p |int |yyerror |NN const char *const s : Used in perly.y, and by Data::Alias EXp |int |yylex +p |void |yyunlex : Used in perl.c, pp_ctl.c -p |int |yyparse +p |int |yyparse |int gramtype : Only used in scope.c p |void |parser_free |NN const yy_parser *parser #if defined(PERL_IN_TOKE_C) @@ -2341,7 +2344,7 @@ s |void |start_force |int where s |void |curmad |char slot|NULLOK SV *sv # endif Mp |int |madlex -Mp |int |madparse +Mp |int |madparse |int gramtype #endif #if !defined(HAS_SIGNBIT) AMdnoP |int |Perl_signbit |NV f diff --git a/ext/XS-APItest-KeywordRPN/KeywordRPN.xs b/ext/XS-APItest-KeywordRPN/KeywordRPN.xs index a5dfcd9adc..6c622564ff 100644 --- a/ext/XS-APItest-KeywordRPN/KeywordRPN.xs +++ b/ext/XS-APItest-KeywordRPN/KeywordRPN.xs @@ -9,6 +9,7 @@ (SvFLAGS(sv) & (SVf_IOK|SVf_NOK|SVf_POK|SVp_IOK|SVp_NOK|SVp_POK))) static SV *hintkey_rpn_sv, *hintkey_calcrpn_sv, *hintkey_stufftest_sv; +static SV *hintkey_swaptwostmts_sv; static int (*next_keyword_plugin)(pTHX_ char *, STRLEN, OP **); /* low-level parser helpers */ @@ -171,6 +172,18 @@ static OP *THX_parse_keyword_stufftest(pTHX) } #define parse_keyword_stufftest() THX_parse_keyword_stufftest(aTHX) +static OP *THX_parse_keyword_swaptwostmts(pTHX) +{ + OP *a, *b; + a = parse_fullstmt(0); + b = parse_fullstmt(0); + if(a && b) + PL_hints |= HINT_BLOCK_SCOPE; + /* should use append_list(), but that's not part of the public API */ + return !a ? b : !b ? a : newLISTOP(OP_LINESEQ, 0, b, a); +} +#define parse_keyword_swaptwostmts() THX_parse_keyword_swaptwostmts(aTHX) + /* plugin glue */ static int THX_keyword_active(pTHX_ SV *hintkey_sv) @@ -225,6 +238,11 @@ static int my_keyword_plugin(pTHX_ keyword_active(hintkey_stufftest_sv)) { *op_ptr = parse_keyword_stufftest(); return KEYWORD_PLUGIN_STMT; + } else if(keyword_len == 12 && + strnEQ(keyword_ptr, "swaptwostmts", 12) && + keyword_active(hintkey_swaptwostmts_sv)) { + *op_ptr = parse_keyword_swaptwostmts(); + return KEYWORD_PLUGIN_STMT; } else { return next_keyword_plugin(aTHX_ keyword_ptr, keyword_len, op_ptr); @@ -238,6 +256,8 @@ BOOT: hintkey_calcrpn_sv = newSVpvs_share("XS::APItest::KeywordRPN/calcrpn"); hintkey_stufftest_sv = newSVpvs_share("XS::APItest::KeywordRPN/stufftest"); + hintkey_swaptwostmts_sv = + newSVpvs_share("XS::APItest::KeywordRPN/swaptwostmts"); next_keyword_plugin = PL_keyword_plugin; PL_keyword_plugin = my_keyword_plugin; @@ -255,6 +275,9 @@ PPCODE: } else if(sv_is_string(item) && strEQ(SvPVX(item), "stufftest")) { keyword_enable(hintkey_stufftest_sv); + } else if(sv_is_string(item) && + strEQ(SvPVX(item), "swaptwostmts")) { + keyword_enable(hintkey_swaptwostmts_sv); } else { croak("\"%s\" is not exported by the %s module", SvPV_nolen(item), SvPV_nolen(ST(0))); @@ -275,6 +298,9 @@ PPCODE: } else if(sv_is_string(item) && strEQ(SvPVX(item), "stufftest")) { keyword_disable(hintkey_stufftest_sv); + } else if(sv_is_string(item) && + strEQ(SvPVX(item), "swaptwostmts")) { + keyword_disable(hintkey_swaptwostmts_sv); } else { croak("\"%s\" is not exported by the %s module", SvPV_nolen(item), SvPV_nolen(ST(0))); diff --git a/ext/XS-APItest-KeywordRPN/t/swaptwostmts.t b/ext/XS-APItest-KeywordRPN/t/swaptwostmts.t new file mode 100644 index 0000000000..44e9e7aaae --- /dev/null +++ b/ext/XS-APItest-KeywordRPN/t/swaptwostmts.t @@ -0,0 +1,158 @@ +use warnings; +use strict; + +use Test::More tests => 22; + +BEGIN { $^H |= 0x20000; } + +my $t; + +$t = ""; +eval q{ + use XS::APItest::KeywordRPN (); + $t .= "a"; + swaptwostmts + $t .= "b"; + $t .= "c"; + $t .= "d"; +}; +isnt $@, ""; + +$t = ""; +eval q{ + use XS::APItest::KeywordRPN qw(swaptwostmts); + $t .= "a"; + swaptwostmts + $t .= "b"; + $t .= "c"; + $t .= "d"; +}; +is $@, ""; +is $t, "acbd"; + +$t = ""; +eval q{ + use XS::APItest::KeywordRPN qw(swaptwostmts); + $t .= "a"; + swaptwostmts + if(1) { $t .= "b"; } + $t .= "c"; + $t .= "d"; +}; +is $@, ""; +is $t, "acbd"; + +$t = ""; +eval q{ + use XS::APItest::KeywordRPN qw(swaptwostmts); + $t .= "a"; + swaptwostmts + $t .= "b"; + if(1) { $t .= "c"; } + $t .= "d"; +}; +is $@, ""; +is $t, "acbd"; + +$t = ""; +eval q{ + use XS::APItest::KeywordRPN qw(swaptwostmts); + $t .= "a"; + swaptwostmts + $t .= "b"; + foreach(1..3) { + $t .= "c"; + swaptwostmts + $t .= "d"; + $t .= "e"; + $t .= "f"; + } + $t .= "g"; +}; +is $@, ""; +is $t, "acedfcedfcedfbg"; + +$t = ""; +eval q{ + use XS::APItest::KeywordRPN qw(swaptwostmts); + $t .= "a"; + swaptwostmts + $t .= "b"; + $t .= "c"; +}; +is $@, ""; +is $t, "acb"; + +$t = ""; +eval q{ + use XS::APItest::KeywordRPN qw(swaptwostmts); + $t .= "a"; + swaptwostmts + $t .= "b"; + $t .= "c" +}; +is $@, ""; +is $t, "acb"; + +$t = ""; +eval q{ + use XS::APItest::KeywordRPN qw(swaptwostmts); + $t .= "a"; + swaptwostmts + $t .= "b" +}; +isnt $@, ""; + +$t = ""; +eval q{ + use XS::APItest::KeywordRPN qw(swaptwostmts); + $_ = $t; + $_ .= "a"; + swaptwostmts + if(1) { $_ .= "b"; } + tr/a-z/A-Z/; + $_ .= "d"; + $t = $_; +}; +is $@, ""; +is $t, "Abd"; + +$t = ""; +eval q{ + use XS::APItest::KeywordRPN qw(swaptwostmts); + sub add_to_t { $t .= $_[0]; } + add_to_t "a"; + swaptwostmts + if(1) { add_to_t "b"; } + add_to_t "c"; + add_to_t "d"; +}; +is $@, ""; +is $t, "acbd"; + +$t = ""; +eval q{ + use XS::APItest::KeywordRPN qw(swaptwostmts); + { $t .= "a"; } + swaptwostmts + if(1) { { $t .= "b"; } } + { $t .= "c"; } + { $t .= "d"; } +}; +is $@, ""; +is $t, "acbd"; + +$t = ""; +eval q{ + use XS::APItest::KeywordRPN qw(swaptwostmts); + no warnings "void"; + "@{[ $t .= 'a' ]}"; + swaptwostmts + if(1) { "@{[ $t .= 'b' ]}"; } + "@{[ $t .= 'c' ]}"; + "@{[ $t .= 'd' ]}"; +}; +is $@, ""; +is $t, "acbd"; + +1; @@ -2168,7 +2168,7 @@ S_parse_body(pTHX_ char **env, XSINIT_t xsinit) /* now parse the script */ SETERRNO(0,SS_NORMAL); - if (yyparse() || PL_parser->error_count) { + if (yyparse(GRAMPROG) || PL_parser->error_count) { if (PL_minus_c) Perl_croak(aTHX_ "%s had compilation errors.\n", PL_origfilename); else { @@ -34,6 +34,9 @@ typedef unsigned short int yytype_uint16; typedef short int yytype_int16; typedef signed char yysigned_char; +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#define YYINITDEPTH 200 + #ifdef DEBUGGING # define YYDEBUG 1 #else @@ -195,7 +198,7 @@ S_clear_yystack(pTHX_ const yy_parser *parser) yy_stack_frame *ps = parser->ps; int i = 0; - if (!parser->stack || ps == parser->stack) + if (!parser->stack) return; YYDPRINTF ((Perl_debug_log, "clearing the parse stack\n")); @@ -311,6 +314,8 @@ S_clear_yystack(pTHX_ const yy_parser *parser) SvREFCNT_dec(ps->compcv); ps--; } + + Safefree(parser->stack); } @@ -320,9 +325,9 @@ S_clear_yystack(pTHX_ const yy_parser *parser) int #ifdef PERL_IN_MADLY_C -Perl_madparse (pTHX) +Perl_madparse (pTHX_ int gramtype) #else -Perl_yyparse (pTHX) +Perl_yyparse (pTHX_ int gramtype) #endif { dVAR; @@ -346,16 +351,31 @@ Perl_yyparse (pTHX) #ifndef PERL_IN_MADLY_C # ifdef PERL_MAD if (PL_madskills) - return madparse(); + return madparse(gramtype); # endif #endif YYDPRINTF ((Perl_debug_log, "Starting parse\n")); parser = PL_parser; - ps = parser->ps; - ENTER; /* force parser stack cleanup before we return */ + ENTER; /* force parser state cleanup/restoration before we return */ + SAVEPPTR(parser->yylval.pval); + SAVEINT(parser->yychar); + SAVEINT(parser->yyerrstatus); + SAVEINT(parser->stack_size); + SAVEINT(parser->yylen); + SAVEVPTR(parser->stack); + SAVEVPTR(parser->ps); + + /* initialise state for this parse */ + parser->yychar = gramtype; + parser->yyerrstatus = 0; + parser->stack_size = YYINITDEPTH; + parser->yylen = 0; + Newx(parser->stack, YYINITDEPTH, yy_stack_frame); + ps = parser->ps = parser->stack; + ps->state = 0; SAVEDESTRUCTOR_X(S_clear_yystack, parser); /*------------------------------------------------------------. @@ -49,7 +49,7 @@ /* FIXME for MAD - is the new mintro on while and until important? */ -%start prog +%start grammar %union { I32 ival; /* __DEFAULT__ (marker for regen_perly.pl; @@ -69,6 +69,8 @@ #endif } +%token <ival> GRAMPROG GRAMFULLSTMT + %token <i_tkval> '{' '}' '[' ']' '-' '+' '$' '@' '%' '*' '&' ';' %token <opval> WORD METHOD FUNCMETH THING PMFUNC PRIVATEREF @@ -85,13 +87,12 @@ %token <i_tkval> LOCAL MY MYSUB REQUIRE %token <i_tkval> COLONATTR -%type <ival> prog progstart remember mremember +%type <ival> grammar prog progstart remember mremember %type <ival> startsub startanonsub startformsub /* FIXME for MAD - are these two ival? */ %type <ival> mydefsv mintro -%type <opval> decl format subrout mysubrout package use peg - +%type <opval> fullstmt decl format subrout mysubrout package use peg %type <opval> block package_block mblock lineseq line loop cond else %type <opval> expr term subscripted scalar ary hsh arylen star amper sideff %type <opval> argexpr nexpr texpr iexpr mexpr mnexpr miexpr @@ -137,6 +138,18 @@ %% /* RULES */ +/* Top-level choice of what kind of thing yyparse was called to parse */ +grammar : GRAMPROG prog + { $$ = $2; } + | GRAMFULLSTMT fullstmt + { + PL_eval_root = $2; + $$ = 0; + yyunlex(); + parser->yychar = YYEOF; + } + ; + /* The whole program */ prog : progstart /*CONTINUED*/ lineseq @@ -200,7 +213,17 @@ lineseq : /* NULL */ } ; -/* A "line" in the program */ +/* A statement, or "line", in the program */ +fullstmt: decl + { $$ = $1; } + | line + { + PL_pad_reset_pending = TRUE; + $$ = $1; + } + ; + +/* A non-declaration statement */ line : label cond { $$ = newSTATEOP(0, PVAL($1), $2); TOKEN_GETMAD($1,((LISTOP*)$$)->op_first,'L'); } diff --git a/pod/perldiag.pod b/pod/perldiag.pod index d7c0970f41..fc146a0dd3 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -3478,6 +3478,11 @@ to even) byte length. (P) The lexer got into a bad state while processing a case modifier. +=item Parsing code internal error (%s) + +(F) Parsing code supplied by an extension violated the parser's API in +a detectable way. + =item Pattern subroutine nesting without pos change exceeded limit in regex; marked by <-- HERE in m/%s/ (F) You used a pattern that uses too many nested subpattern calls without @@ -3039,7 +3039,7 @@ Perl_find_runcv(pTHX_ U32 *db_seqp) * 3: yyparse() died */ STATIC int -S_try_yyparse(pTHX) +S_try_yyparse(pTHX_ int gramtype) { int ret; dJMPENV; @@ -3048,7 +3048,7 @@ S_try_yyparse(pTHX) JMPENV_PUSH(ret); switch (ret) { case 0: - ret = yyparse() ? 1 : 0; + ret = yyparse(gramtype) ? 1 : 0; break; case 3: break; @@ -3137,7 +3137,7 @@ S_doeval(pTHX_ int gimme, OP** startop, CV* outside, U32 seq) /* note that yyparse() may raise an exception, e.g. C<BEGIN{die}>, * so honour CATCH_GET and trap it here if necessary */ - yystatus = (!in_require && CATCH_GET) ? S_try_yyparse(aTHX) : yyparse(); + yystatus = (!in_require && CATCH_GET) ? S_try_yyparse(aTHX_ GRAMPROG) : yyparse(GRAMPROG); if (yystatus || PL_parser->error_count || !PL_eval_root) { SV **newsp; /* Used by POPBLOCK. */ @@ -10752,9 +10752,6 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param) Newxz(parser, 1, yy_parser); ptr_table_store(PL_ptr_table, proto, parser); - parser->yyerrstatus = 0; - parser->yychar = YYEMPTY; /* Cause a token to be read. */ - /* XXX these not yet duped */ parser->old_parser = NULL; parser->stack = NULL; @@ -45,9 +45,6 @@ Individual members of C<PL_parser> have their own documentation. #define pl_yylval (PL_parser->yylval) -/* YYINITDEPTH -- initial size of the parser's stacks. */ -#define YYINITDEPTH 200 - /* XXX temporary backwards compatibility */ #define PL_lex_brackets (PL_parser->lex_brackets) #define PL_lex_brackstack (PL_parser->lex_brackstack) @@ -675,13 +672,9 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter) parser->old_parser = oparser = PL_parser; PL_parser = parser; - Newx(parser->stack, YYINITDEPTH, yy_stack_frame); - parser->ps = parser->stack; - parser->stack_size = YYINITDEPTH; - - parser->stack->state = 0; - parser->yyerrstatus = 0; - parser->yychar = YYEMPTY; /* Cause a token to be read. */ + parser->stack = NULL; + parser->ps = NULL; + parser->stack_size = 0; /* on scope exit, free this parser and restore any outer one */ SAVEPARSER(parser); @@ -750,7 +743,6 @@ Perl_parser_free(pTHX_ const yy_parser *parser) PerlIO_close(parser->rsfp); SvREFCNT_dec(parser->rsfp_filters); - Safefree(parser->stack); Safefree(parser->lex_brackstack); Safefree(parser->lex_casestack); PL_parser = parser->old_parser; @@ -1929,6 +1921,17 @@ S_force_next(pTHX_ I32 type) #endif } +void +Perl_yyunlex(pTHX) +{ + if (PL_parser->yychar != YYEMPTY) { + start_force(-1); + NEXTVAL_NEXTTOKE = PL_parser->yylval; + force_next(PL_parser->yychar); + PL_parser->yychar = YYEMPTY; + } +} + STATIC SV * S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) { @@ -3953,7 +3956,7 @@ Perl_madlex(pTHX) PL_thismad = 0; /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */ - if (PL_pending_ident) + if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident) return S_pending_ident(aTHX); /* previous token ate up our whitespace? */ @@ -4212,7 +4215,7 @@ Perl_yylex(pTHX) SvREFCNT_dec(tmp); } ); /* check if there's an identifier for us to look at */ - if (PL_pending_ident) + if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident) return REPORT(S_pending_ident(aTHX)); /* no identifier pending identification */ @@ -13940,6 +13943,49 @@ Perl_keyword_plugin_standard(pTHX_ } /* +=for apidoc Amx|OP *|parse_fullstmt|U32 flags + +Parse a single complete Perl statement. This may be a normal imperative +statement, including optional label, or a declaration that has +compile-time effect. It is up to the caller to ensure that the dynamic +parser state (L</PL_parser> et al) is correctly set to reflect the source +of the code to be parsed and the lexical context for the statement. + +The op tree representing the statement is returned. This may be a +null pointer if the statement is null, for example if it was actually +a subroutine definition (which has compile-time side effects). If not +null, it will be the result of a L</newSTATEOP> call, normally including +a C<nextstate> or equivalent op. + +If an error occurs in parsing or compilation, in most cases a valid op +tree (most likely null) is returned anyway. The error is reflected in +the parser state, normally resulting in a single exception at the top +level of parsing which covers all the compilation errors that occurred. +Some compilation errors, however, will throw an exception immediately. + +The I<flags> parameter is reserved for future use, and must always +be zero. + +=cut +*/ + +OP * +Perl_parse_fullstmt(pTHX_ U32 flags) +{ + OP *fullstmtop; + if (flags) + Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt"); + ENTER; + SAVEVPTR(PL_eval_root); + PL_eval_root = NULL; + if(yyparse(GRAMFULLSTMT) && !PL_parser->error_count) + qerror(Perl_mess(aTHX_ "Parse error")); + fullstmtop = PL_eval_root; + LEAVE; + return fullstmtop; +} + +/* * Local variables: * c-indentation-style: bsd * c-basic-offset: 4 |