diff options
Diffstat (limited to 'Zend/zend_language_scanner.l')
-rw-r--r-- | Zend/zend_language_scanner.l | 412 |
1 files changed, 294 insertions, 118 deletions
diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index db493211e2..ca5804f4bf 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -192,6 +192,7 @@ void startup_scanner(void) CG(doc_comment) = NULL; CG(extra_fn_flags) = 0; zend_stack_init(&SCNG(state_stack), sizeof(int)); + zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location)); zend_ptr_stack_init(&SCNG(heredoc_label_stack)); SCNG(heredoc_scan_ahead) = 0; } @@ -205,6 +206,7 @@ void shutdown_scanner(void) CG(parse_error) = 0; RESET_DOC_COMMENT(); zend_stack_destroy(&SCNG(state_stack)); + zend_stack_destroy(&SCNG(nest_location_stack)); zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1); zend_ptr_stack_destroy(&SCNG(heredoc_label_stack)); SCNG(heredoc_scan_ahead) = 0; @@ -223,6 +225,9 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state) lex_state->state_stack = SCNG(state_stack); zend_stack_init(&SCNG(state_stack), sizeof(int)); + lex_state->nest_location_stack = SCNG(nest_location_stack); + zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location)); + lex_state->heredoc_label_stack = SCNG(heredoc_label_stack); zend_ptr_stack_init(&SCNG(heredoc_label_stack)); @@ -258,6 +263,9 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state) zend_stack_destroy(&SCNG(state_stack)); SCNG(state_stack) = lex_state->state_stack; + zend_stack_destroy(&SCNG(nest_location_stack)); + SCNG(nest_location_stack) = lex_state->nest_location_stack; + zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1); zend_ptr_stack_destroy(&SCNG(heredoc_label_stack)); SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack; @@ -298,13 +306,21 @@ ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle) } } -ZEND_API void zend_lex_tstring(zval *zv) +ZEND_API int zend_lex_tstring(zval *zv, zend_lexer_ident_ref ident_ref) { + char *ident = (char *) SCNG(yy_start) + ident_ref.offset; + size_t length = ident_ref.len; + if (length == sizeof("<?=")-1 && memcmp(ident, "<?=", sizeof("<?=")-1) == 0) { + zend_throw_exception(zend_ce_parse_error, "Cannot use \"<?=\" as an identifier", 0); + return FAILURE; + } + if (SCNG(on_event)) { - SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, SCNG(on_event_context)); + SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, ident, length, SCNG(on_event_context)); } - ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng)); + ZVAL_STRINGL(zv, ident, length); + return SUCCESS; } #define BOM_UTF32_BE "\x00\x00\xfe\xff" @@ -653,6 +669,42 @@ ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type) return op_array; } +ZEND_API zend_ast *zend_compile_string_to_ast( + zend_string *code, zend_arena **ast_arena, const char *filename) { + zval code_zv; + zend_bool original_in_compilation; + zend_lex_state original_lex_state; + zend_ast *ast; + + ZVAL_STR_COPY(&code_zv, code); + + original_in_compilation = CG(in_compilation); + CG(in_compilation) = 1; + + zend_save_lexical_state(&original_lex_state); + if (zend_prepare_string_for_scanning(&code_zv, filename) == SUCCESS) { + CG(ast) = NULL; + CG(ast_arena) = zend_arena_create(1024 * 32); + LANG_SCNG(yy_state) = yycINITIAL; + + if (zendparse() != 0) { + zend_ast_destroy(CG(ast)); + zend_arena_destroy(CG(ast_arena)); + CG(ast) = NULL; + } + } + + /* restore_lexical_state changes CG(ast) and CG(ast_arena) */ + ast = CG(ast); + *ast_arena = CG(ast_arena); + + zend_restore_lexical_state(&original_lex_state); + CG(in_compilation) = original_in_compilation; + + zval_dtor(&code_zv); + + return ast; +} zend_op_array *compile_filename(int type, zval *filename) { @@ -687,7 +739,7 @@ zend_op_array *compile_filename(int type, zval *filename) return retval; } -ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename) +ZEND_API int zend_prepare_string_for_scanning(zval *str, const char *filename) { char *buf; size_t size, old_len; @@ -755,7 +807,7 @@ ZEND_API size_t zend_get_scanned_file_offset(void) return offset; } -zend_op_array *compile_string(zval *source_string, char *filename) +zend_op_array *compile_string(zval *source_string, const char *filename) { zend_lex_state original_lex_state; zend_op_array *op_array = NULL; @@ -786,7 +838,7 @@ zend_op_array *compile_string(zval *source_string, char *filename) BEGIN_EXTERN_C() -int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini) +int highlight_file(const char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini) { zend_lex_state original_lex_state; zend_file_handle file_handle; @@ -808,7 +860,7 @@ int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlight return SUCCESS; } -int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name) +int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, const char *str_name) { zend_lex_state original_lex_state; zval tmp; @@ -1215,6 +1267,63 @@ static void copy_heredoc_label_stack(void *void_heredoc_label) zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label); } +/* Check that { }, [ ], ( ) are nested correctly */ +static void report_bad_nesting(char opening, int opening_lineno, char closing) +{ + char buf[256]; + size_t used = 0; + + used = snprintf(buf, sizeof(buf), "Unclosed '%c'", opening); + + if (opening_lineno != CG(zend_lineno)) { + used += snprintf(buf + used, sizeof(buf) - used, " on line %d", opening_lineno); + } + + if (closing) { /* 'closing' will be 0 if at end of file */ + used += snprintf(buf + used, sizeof(buf) - used, " does not match '%c'", closing); + } + + zend_throw_exception(zend_ce_parse_error, buf, 0); +} + +static void enter_nesting(char opening) +{ + zend_nest_location nest_loc = {opening, CG(zend_lineno)}; + zend_stack_push(&SCNG(nest_location_stack), &nest_loc); +} + +static int exit_nesting(char closing) +{ + if (zend_stack_is_empty(&SCNG(nest_location_stack))) { + zend_throw_exception_ex(zend_ce_parse_error, 0, "Unmatched '%c'", closing); + return -1; + } + + zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack)); + char opening = nest_loc->text; + + if ((opening == '{' && closing != '}') || + (opening == '[' && closing != ']') || + (opening == '(' && closing != ')')) { + report_bad_nesting(opening, nest_loc->lineno, closing); + return -1; + } + + zend_stack_del_top(&SCNG(nest_location_stack)); + return 0; +} + +static int check_nesting_at_end() +{ + if (!zend_stack_is_empty(&SCNG(nest_location_stack))) { + zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack)); + report_bad_nesting(nest_loc->text, nest_loc->lineno, 0); + return -1; + } + + return 0; +} + #define PARSER_MODE() \ EXPECTED(elem != NULL) @@ -1234,6 +1343,11 @@ static void copy_heredoc_label_stack(void *void_heredoc_label) goto emit_token_with_str; \ } while (0) +#define RETURN_TOKEN_WITH_IDENT(_token) do { \ + token = _token; \ + goto emit_token_with_ident; \ + } while (0) + #define RETURN_OR_SKIP_TOKEN(_token) do { \ token = _token; \ if (PARSER_MODE()) { \ @@ -1242,6 +1356,22 @@ static void copy_heredoc_label_stack(void *void_heredoc_label) goto emit_token; \ } while (0) +#define RETURN_EXIT_NESTING_TOKEN(_token) do { \ + if (exit_nesting(_token) && PARSER_MODE()) { \ + RETURN_TOKEN(T_ERROR); \ + } else { \ + RETURN_TOKEN(_token); \ + } \ + } while(0) + +#define RETURN_END_TOKEN do { \ + if (check_nesting_at_end() && PARSER_MODE()) { \ + RETURN_TOKEN(T_ERROR); \ + } else { \ + RETURN_TOKEN(END); \ + } \ + } while (0) + int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem) { int token; @@ -1262,7 +1392,7 @@ BNUM "0b"[01]+(_[01]+)* LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]* WHITESPACE [ \n\r\t]+ TABS_AND_SPACES [ \t]* -TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@] +TOKENS [;:,.|^&+-/*=%!~$<>?@] ANY_CHAR [^] NEWLINE ("\r"|"\n"|"\r\n") @@ -1270,169 +1400,177 @@ NEWLINE ("\r"|"\n"|"\r\n") <!*> := yyleng = YYCURSOR - SCNG(yy_text); <ST_IN_SCRIPTING>"exit" { - RETURN_TOKEN(T_EXIT); + RETURN_TOKEN_WITH_IDENT(T_EXIT); } <ST_IN_SCRIPTING>"die" { - RETURN_TOKEN(T_EXIT); + RETURN_TOKEN_WITH_IDENT(T_EXIT); } <ST_IN_SCRIPTING>"fn" { - RETURN_TOKEN(T_FN); + RETURN_TOKEN_WITH_IDENT(T_FN); } <ST_IN_SCRIPTING>"function" { - RETURN_TOKEN(T_FUNCTION); + RETURN_TOKEN_WITH_IDENT(T_FUNCTION); } <ST_IN_SCRIPTING>"const" { - RETURN_TOKEN(T_CONST); + RETURN_TOKEN_WITH_IDENT(T_CONST); } <ST_IN_SCRIPTING>"return" { - RETURN_TOKEN(T_RETURN); + RETURN_TOKEN_WITH_IDENT(T_RETURN); +} + +<ST_IN_SCRIPTING>"@@" { + RETURN_TOKEN(T_ATTRIBUTE); } <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] { yyless(yyleng - 1); HANDLE_NEWLINES(yytext, yyleng); - RETURN_TOKEN(T_YIELD_FROM); + RETURN_TOKEN_WITH_IDENT(T_YIELD_FROM); } <ST_IN_SCRIPTING>"yield" { - RETURN_TOKEN(T_YIELD); + RETURN_TOKEN_WITH_IDENT(T_YIELD); } <ST_IN_SCRIPTING>"try" { - RETURN_TOKEN(T_TRY); + RETURN_TOKEN_WITH_IDENT(T_TRY); } <ST_IN_SCRIPTING>"catch" { - RETURN_TOKEN(T_CATCH); + RETURN_TOKEN_WITH_IDENT(T_CATCH); } <ST_IN_SCRIPTING>"finally" { - RETURN_TOKEN(T_FINALLY); + RETURN_TOKEN_WITH_IDENT(T_FINALLY); } <ST_IN_SCRIPTING>"throw" { - RETURN_TOKEN(T_THROW); + RETURN_TOKEN_WITH_IDENT(T_THROW); } <ST_IN_SCRIPTING>"if" { - RETURN_TOKEN(T_IF); + RETURN_TOKEN_WITH_IDENT(T_IF); } <ST_IN_SCRIPTING>"elseif" { - RETURN_TOKEN(T_ELSEIF); + RETURN_TOKEN_WITH_IDENT(T_ELSEIF); } <ST_IN_SCRIPTING>"endif" { - RETURN_TOKEN(T_ENDIF); + RETURN_TOKEN_WITH_IDENT(T_ENDIF); } <ST_IN_SCRIPTING>"else" { - RETURN_TOKEN(T_ELSE); + RETURN_TOKEN_WITH_IDENT(T_ELSE); } <ST_IN_SCRIPTING>"while" { - RETURN_TOKEN(T_WHILE); + RETURN_TOKEN_WITH_IDENT(T_WHILE); } <ST_IN_SCRIPTING>"endwhile" { - RETURN_TOKEN(T_ENDWHILE); + RETURN_TOKEN_WITH_IDENT(T_ENDWHILE); } <ST_IN_SCRIPTING>"do" { - RETURN_TOKEN(T_DO); + RETURN_TOKEN_WITH_IDENT(T_DO); } <ST_IN_SCRIPTING>"for" { - RETURN_TOKEN(T_FOR); + RETURN_TOKEN_WITH_IDENT(T_FOR); } <ST_IN_SCRIPTING>"endfor" { - RETURN_TOKEN(T_ENDFOR); + RETURN_TOKEN_WITH_IDENT(T_ENDFOR); } <ST_IN_SCRIPTING>"foreach" { - RETURN_TOKEN(T_FOREACH); + RETURN_TOKEN_WITH_IDENT(T_FOREACH); } <ST_IN_SCRIPTING>"endforeach" { - RETURN_TOKEN(T_ENDFOREACH); + RETURN_TOKEN_WITH_IDENT(T_ENDFOREACH); } <ST_IN_SCRIPTING>"declare" { - RETURN_TOKEN(T_DECLARE); + RETURN_TOKEN_WITH_IDENT(T_DECLARE); } <ST_IN_SCRIPTING>"enddeclare" { - RETURN_TOKEN(T_ENDDECLARE); + RETURN_TOKEN_WITH_IDENT(T_ENDDECLARE); } <ST_IN_SCRIPTING>"instanceof" { - RETURN_TOKEN(T_INSTANCEOF); + RETURN_TOKEN_WITH_IDENT(T_INSTANCEOF); } <ST_IN_SCRIPTING>"as" { - RETURN_TOKEN(T_AS); + RETURN_TOKEN_WITH_IDENT(T_AS); } <ST_IN_SCRIPTING>"switch" { - RETURN_TOKEN(T_SWITCH); + RETURN_TOKEN_WITH_IDENT(T_SWITCH); +} + +<ST_IN_SCRIPTING>"match" { + RETURN_TOKEN_WITH_IDENT(T_MATCH); } <ST_IN_SCRIPTING>"endswitch" { - RETURN_TOKEN(T_ENDSWITCH); + RETURN_TOKEN_WITH_IDENT(T_ENDSWITCH); } <ST_IN_SCRIPTING>"case" { - RETURN_TOKEN(T_CASE); + RETURN_TOKEN_WITH_IDENT(T_CASE); } <ST_IN_SCRIPTING>"default" { - RETURN_TOKEN(T_DEFAULT); + RETURN_TOKEN_WITH_IDENT(T_DEFAULT); } <ST_IN_SCRIPTING>"break" { - RETURN_TOKEN(T_BREAK); + RETURN_TOKEN_WITH_IDENT(T_BREAK); } <ST_IN_SCRIPTING>"continue" { - RETURN_TOKEN(T_CONTINUE); + RETURN_TOKEN_WITH_IDENT(T_CONTINUE); } <ST_IN_SCRIPTING>"goto" { - RETURN_TOKEN(T_GOTO); + RETURN_TOKEN_WITH_IDENT(T_GOTO); } <ST_IN_SCRIPTING>"echo" { - RETURN_TOKEN(T_ECHO); + RETURN_TOKEN_WITH_IDENT(T_ECHO); } <ST_IN_SCRIPTING>"print" { - RETURN_TOKEN(T_PRINT); + RETURN_TOKEN_WITH_IDENT(T_PRINT); } <ST_IN_SCRIPTING>"class" { - RETURN_TOKEN(T_CLASS); + RETURN_TOKEN_WITH_IDENT(T_CLASS); } <ST_IN_SCRIPTING>"interface" { - RETURN_TOKEN(T_INTERFACE); + RETURN_TOKEN_WITH_IDENT(T_INTERFACE); } <ST_IN_SCRIPTING>"trait" { - RETURN_TOKEN(T_TRAIT); + RETURN_TOKEN_WITH_IDENT(T_TRAIT); } <ST_IN_SCRIPTING>"extends" { - RETURN_TOKEN(T_EXTENDS); + RETURN_TOKEN_WITH_IDENT(T_EXTENDS); } <ST_IN_SCRIPTING>"implements" { - RETURN_TOKEN(T_IMPLEMENTS); + RETURN_TOKEN_WITH_IDENT(T_IMPLEMENTS); } <ST_IN_SCRIPTING>"->" { @@ -1440,6 +1578,11 @@ NEWLINE ("\r"|"\n"|"\r\n") RETURN_TOKEN(T_OBJECT_OPERATOR); } +<ST_IN_SCRIPTING>"?->" { + yy_push_state(ST_LOOKING_FOR_PROPERTY); + return T_NULLSAFE_OBJECT_OPERATOR; +} + <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ { goto return_whitespace; } @@ -1448,6 +1591,10 @@ NEWLINE ("\r"|"\n"|"\r\n") RETURN_TOKEN(T_OBJECT_OPERATOR); } +<ST_LOOKING_FOR_PROPERTY>"?->" { + RETURN_TOKEN(T_NULLSAFE_OBJECT_OPERATOR); +} + <ST_LOOKING_FOR_PROPERTY>{LABEL} { yy_pop_state(); RETURN_TOKEN_WITH_STR(T_STRING, 0); @@ -1463,10 +1610,6 @@ NEWLINE ("\r"|"\n"|"\r\n") RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM); } -<ST_IN_SCRIPTING>"\\" { - RETURN_TOKEN(T_NS_SEPARATOR); -} - <ST_IN_SCRIPTING>"..." { RETURN_TOKEN(T_ELLIPSIS); } @@ -1476,15 +1619,15 @@ NEWLINE ("\r"|"\n"|"\r\n") } <ST_IN_SCRIPTING>"new" { - RETURN_TOKEN(T_NEW); + RETURN_TOKEN_WITH_IDENT(T_NEW); } <ST_IN_SCRIPTING>"clone" { - RETURN_TOKEN(T_CLONE); + RETURN_TOKEN_WITH_IDENT(T_CLONE); } <ST_IN_SCRIPTING>"var" { - RETURN_TOKEN(T_VAR); + RETURN_TOKEN_WITH_IDENT(T_VAR); } <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" { @@ -1497,7 +1640,8 @@ NEWLINE ("\r"|"\n"|"\r\n") <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"real"{TABS_AND_SPACES}")" { if (PARSER_MODE()) { - zend_error(E_DEPRECATED, "The (real) cast is deprecated, use (float) instead"); + zend_throw_exception(zend_ce_parse_error, "The (real) cast has been removed, use (float) instead", 0); + RETURN_TOKEN(T_ERROR); } RETURN_TOKEN(T_DOUBLE_CAST); } @@ -1523,79 +1667,79 @@ NEWLINE ("\r"|"\n"|"\r\n") } <ST_IN_SCRIPTING>"eval" { - RETURN_TOKEN(T_EVAL); + RETURN_TOKEN_WITH_IDENT(T_EVAL); } <ST_IN_SCRIPTING>"include" { - RETURN_TOKEN(T_INCLUDE); + RETURN_TOKEN_WITH_IDENT(T_INCLUDE); } <ST_IN_SCRIPTING>"include_once" { - RETURN_TOKEN(T_INCLUDE_ONCE); + RETURN_TOKEN_WITH_IDENT(T_INCLUDE_ONCE); } <ST_IN_SCRIPTING>"require" { - RETURN_TOKEN(T_REQUIRE); + RETURN_TOKEN_WITH_IDENT(T_REQUIRE); } <ST_IN_SCRIPTING>"require_once" { - RETURN_TOKEN(T_REQUIRE_ONCE); + RETURN_TOKEN_WITH_IDENT(T_REQUIRE_ONCE); } <ST_IN_SCRIPTING>"namespace" { - RETURN_TOKEN(T_NAMESPACE); + RETURN_TOKEN_WITH_IDENT(T_NAMESPACE); } <ST_IN_SCRIPTING>"use" { - RETURN_TOKEN(T_USE); + RETURN_TOKEN_WITH_IDENT(T_USE); } <ST_IN_SCRIPTING>"insteadof" { - RETURN_TOKEN(T_INSTEADOF); + RETURN_TOKEN_WITH_IDENT(T_INSTEADOF); } <ST_IN_SCRIPTING>"global" { - RETURN_TOKEN(T_GLOBAL); + RETURN_TOKEN_WITH_IDENT(T_GLOBAL); } <ST_IN_SCRIPTING>"isset" { - RETURN_TOKEN(T_ISSET); + RETURN_TOKEN_WITH_IDENT(T_ISSET); } <ST_IN_SCRIPTING>"empty" { - RETURN_TOKEN(T_EMPTY); + RETURN_TOKEN_WITH_IDENT(T_EMPTY); } <ST_IN_SCRIPTING>"__halt_compiler" { - RETURN_TOKEN(T_HALT_COMPILER); + RETURN_TOKEN_WITH_IDENT(T_HALT_COMPILER); } <ST_IN_SCRIPTING>"static" { - RETURN_TOKEN(T_STATIC); + RETURN_TOKEN_WITH_IDENT(T_STATIC); } <ST_IN_SCRIPTING>"abstract" { - RETURN_TOKEN(T_ABSTRACT); + RETURN_TOKEN_WITH_IDENT(T_ABSTRACT); } <ST_IN_SCRIPTING>"final" { - RETURN_TOKEN(T_FINAL); + RETURN_TOKEN_WITH_IDENT(T_FINAL); } <ST_IN_SCRIPTING>"private" { - RETURN_TOKEN(T_PRIVATE); + RETURN_TOKEN_WITH_IDENT(T_PRIVATE); } <ST_IN_SCRIPTING>"protected" { - RETURN_TOKEN(T_PROTECTED); + RETURN_TOKEN_WITH_IDENT(T_PROTECTED); } <ST_IN_SCRIPTING>"public" { - RETURN_TOKEN(T_PUBLIC); + RETURN_TOKEN_WITH_IDENT(T_PUBLIC); } <ST_IN_SCRIPTING>"unset" { - RETURN_TOKEN(T_UNSET); + RETURN_TOKEN_WITH_IDENT(T_UNSET); } <ST_IN_SCRIPTING>"=>" { @@ -1603,15 +1747,15 @@ NEWLINE ("\r"|"\n"|"\r\n") } <ST_IN_SCRIPTING>"list" { - RETURN_TOKEN(T_LIST); + RETURN_TOKEN_WITH_IDENT(T_LIST); } <ST_IN_SCRIPTING>"array" { - RETURN_TOKEN(T_ARRAY); + RETURN_TOKEN_WITH_IDENT(T_ARRAY); } <ST_IN_SCRIPTING>"callable" { - RETURN_TOKEN(T_CALLABLE); + RETURN_TOKEN_WITH_IDENT(T_CALLABLE); } <ST_IN_SCRIPTING>"++" { @@ -1715,15 +1859,15 @@ NEWLINE ("\r"|"\n"|"\r\n") } <ST_IN_SCRIPTING>"OR" { - RETURN_TOKEN(T_LOGICAL_OR); + RETURN_TOKEN_WITH_IDENT(T_LOGICAL_OR); } <ST_IN_SCRIPTING>"AND" { - RETURN_TOKEN(T_LOGICAL_AND); + RETURN_TOKEN_WITH_IDENT(T_LOGICAL_AND); } <ST_IN_SCRIPTING>"XOR" { - RETURN_TOKEN(T_LOGICAL_XOR); + RETURN_TOKEN_WITH_IDENT(T_LOGICAL_XOR); } <ST_IN_SCRIPTING>"<<" { @@ -1734,6 +1878,16 @@ NEWLINE ("\r"|"\n"|"\r\n") RETURN_TOKEN(T_SR); } +<ST_IN_SCRIPTING>"]"|")" { + /* Check that ] and ) match up properly with a preceding [ or ( */ + RETURN_EXIT_NESTING_TOKEN(yytext[0]); +} + +<ST_IN_SCRIPTING>"["|"(" { + enter_nesting(yytext[0]); + RETURN_TOKEN(yytext[0]); +} + <ST_IN_SCRIPTING>{TOKENS} { RETURN_TOKEN(yytext[0]); } @@ -1741,22 +1895,23 @@ NEWLINE ("\r"|"\n"|"\r\n") <ST_IN_SCRIPTING>"{" { yy_push_state(ST_IN_SCRIPTING); + enter_nesting('{'); RETURN_TOKEN('{'); } <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" { yy_push_state(ST_LOOKING_FOR_VARNAME); + enter_nesting('{'); RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES); } - <ST_IN_SCRIPTING>"}" { RESET_DOC_COMMENT(); if (!zend_stack_is_empty(&SCNG(state_stack))) { yy_pop_state(); } - RETURN_TOKEN('}'); + RETURN_EXIT_NESTING_TOKEN('}'); } @@ -1968,35 +2123,35 @@ string: } <ST_IN_SCRIPTING>"__CLASS__" { - RETURN_TOKEN(T_CLASS_C); + RETURN_TOKEN_WITH_IDENT(T_CLASS_C); } <ST_IN_SCRIPTING>"__TRAIT__" { - RETURN_TOKEN(T_TRAIT_C); + RETURN_TOKEN_WITH_IDENT(T_TRAIT_C); } <ST_IN_SCRIPTING>"__FUNCTION__" { - RETURN_TOKEN(T_FUNC_C); + RETURN_TOKEN_WITH_IDENT(T_FUNC_C); } <ST_IN_SCRIPTING>"__METHOD__" { - RETURN_TOKEN(T_METHOD_C); + RETURN_TOKEN_WITH_IDENT(T_METHOD_C); } <ST_IN_SCRIPTING>"__LINE__" { - RETURN_TOKEN(T_LINE); + RETURN_TOKEN_WITH_IDENT(T_LINE); } <ST_IN_SCRIPTING>"__FILE__" { - RETURN_TOKEN(T_FILE); + RETURN_TOKEN_WITH_IDENT(T_FILE); } <ST_IN_SCRIPTING>"__DIR__" { - RETURN_TOKEN(T_DIR); + RETURN_TOKEN_WITH_IDENT(T_DIR); } <ST_IN_SCRIPTING>"__NAMESPACE__" { - RETURN_TOKEN(T_NS_C); + RETURN_TOKEN_WITH_IDENT(T_NS_C); } <SHEBANG>"#!" .* {NEWLINE} { @@ -2014,7 +2169,8 @@ string: <INITIAL>"<?=" { BEGIN(ST_IN_SCRIPTING); if (PARSER_MODE()) { - RETURN_TOKEN(T_ECHO); + /* We'll reject this as an identifier in zend_lex_tstring. */ + RETURN_TOKEN_WITH_IDENT(T_ECHO); } RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO); } @@ -2052,7 +2208,7 @@ string: <INITIAL>{ANY_CHAR} { if (YYCURSOR > YYLIMIT) { - RETURN_TOKEN(END); + RETURN_END_TOKEN; } inline_char_handler: @@ -2129,7 +2285,7 @@ inline_char_handler: RETURN_TOKEN(']'); } -<ST_VAR_OFFSET>{TOKENS}|[{}"`] { +<ST_VAR_OFFSET>{TOKENS}|[[(){}"`] { /* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */ RETURN_TOKEN(yytext[0]); } @@ -2142,6 +2298,22 @@ inline_char_handler: RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE); } +<ST_IN_SCRIPTING>"namespace"("\\"{LABEL})+ { + RETURN_TOKEN_WITH_STR(T_NAME_RELATIVE, sizeof("namespace\\") - 1); +} + +<ST_IN_SCRIPTING>{LABEL}("\\"{LABEL})+ { + RETURN_TOKEN_WITH_STR(T_NAME_QUALIFIED, 0); +} + +<ST_IN_SCRIPTING>"\\"{LABEL}("\\"{LABEL})* { + RETURN_TOKEN_WITH_STR(T_NAME_FULLY_QUALIFIED, 1); +} + +<ST_IN_SCRIPTING>"\\" { + RETURN_TOKEN(T_NS_SEPARATOR); +} + <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} { RETURN_TOKEN_WITH_STR(T_STRING, 0); } @@ -2151,12 +2323,8 @@ inline_char_handler: while (YYCURSOR < YYLIMIT) { switch (*YYCURSOR++) { case '\r': - if (*YYCURSOR == '\n') { - YYCURSOR++; - } - /* fall through */ case '\n': - CG(zend_lineno)++; + YYCURSOR--; break; case '?': if (*YYCURSOR == '>') { @@ -2193,8 +2361,11 @@ inline_char_handler: if (YYCURSOR < YYLIMIT) { YYCURSOR++; - } else if (!SCNG(heredoc_scan_ahead)) { - zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno)); + } else { + zend_throw_exception_ex(zend_ce_parse_error, 0, "Unterminated comment starting line %d", CG(zend_lineno)); + if (PARSER_MODE()) { + RETURN_TOKEN(T_ERROR); + } } yyleng = YYCURSOR - SCNG(yy_text); @@ -2393,6 +2564,7 @@ skip_escape_conversion: } heredoc_label->label = estrndup(s, heredoc_label->length); + heredoc_label->indentation_uses_spaces = 0; heredoc_label->indentation = 0; saved_cursor = YYCURSOR; @@ -2530,6 +2702,7 @@ skip_escape_conversion: <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" { yy_push_state(ST_IN_SCRIPTING); yyless(1); + enter_nesting('{'); RETURN_TOKEN(T_CURLY_OPEN); } @@ -2554,7 +2727,7 @@ skip_escape_conversion: } if (YYCURSOR > YYLIMIT) { - RETURN_TOKEN(END); + RETURN_END_TOKEN; } if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) { YYCURSOR++; @@ -2601,7 +2774,7 @@ double_quotes_scan_done: <ST_BACKQUOTE>{ANY_CHAR} { if (YYCURSOR > YYLIMIT) { - RETURN_TOKEN(END); + RETURN_END_TOKEN; } if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) { YYCURSOR++; @@ -2650,7 +2823,7 @@ double_quotes_scan_done: int newline = 0, indentation = 0, spacing = 0; if (YYCURSOR > YYLIMIT) { - RETURN_TOKEN(END); + RETURN_END_TOKEN; } YYCURSOR--; @@ -2774,7 +2947,7 @@ heredoc_scan_done: int newline = 0, indentation = 0, spacing = -1; if (YYCURSOR > YYLIMIT) { - RETURN_TOKEN(END); + RETURN_END_TOKEN; } YYCURSOR--; @@ -2862,17 +3035,10 @@ nowdoc_scan_done: <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} { if (YYCURSOR > YYLIMIT) { - RETURN_TOKEN(END); + RETURN_END_TOKEN; } - if (!SCNG(heredoc_scan_ahead)) { - zend_error(E_COMPILE_WARNING, "Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE); - } - if (PARSER_MODE()) { - goto restart; - } else { - RETURN_TOKEN(T_BAD_CHARACTER); - } + RETURN_TOKEN(T_BAD_CHARACTER); } */ @@ -2888,14 +3054,24 @@ emit_token_with_val: emit_token: if (SCNG(on_event)) { - SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context)); + SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context)); + } + return token; + +emit_token_with_ident: + if (PARSER_MODE()) { + elem->ident.offset = SCNG(yy_text) - SCNG(yy_start); + elem->ident.len = SCNG(yy_leng); + } + if (SCNG(on_event)) { + SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context)); } return token; return_whitespace: HANDLE_NEWLINES(yytext, yyleng); if (SCNG(on_event)) { - SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, SCNG(on_event_context)); + SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, yytext, yyleng, SCNG(on_event_context)); } if (PARSER_MODE()) { start_line = CG(zend_lineno); @@ -2906,7 +3082,7 @@ return_whitespace: skip_token: if (SCNG(on_event)) { - SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context)); + SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context)); } start_line = CG(zend_lineno); goto restart; |