summaryrefslogtreecommitdiff
path: root/Zend/zend_language_scanner.l
diff options
context:
space:
mode:
Diffstat (limited to 'Zend/zend_language_scanner.l')
-rw-r--r--Zend/zend_language_scanner.l412
1 files changed, 294 insertions, 118 deletions
diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l
index db493211e2..ca5804f4bf 100644
--- a/Zend/zend_language_scanner.l
+++ b/Zend/zend_language_scanner.l
@@ -192,6 +192,7 @@ void startup_scanner(void)
CG(doc_comment) = NULL;
CG(extra_fn_flags) = 0;
zend_stack_init(&SCNG(state_stack), sizeof(int));
+ zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
SCNG(heredoc_scan_ahead) = 0;
}
@@ -205,6 +206,7 @@ void shutdown_scanner(void)
CG(parse_error) = 0;
RESET_DOC_COMMENT();
zend_stack_destroy(&SCNG(state_stack));
+ zend_stack_destroy(&SCNG(nest_location_stack));
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
SCNG(heredoc_scan_ahead) = 0;
@@ -223,6 +225,9 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
lex_state->state_stack = SCNG(state_stack);
zend_stack_init(&SCNG(state_stack), sizeof(int));
+ lex_state->nest_location_stack = SCNG(nest_location_stack);
+ zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
+
lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
@@ -258,6 +263,9 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
zend_stack_destroy(&SCNG(state_stack));
SCNG(state_stack) = lex_state->state_stack;
+ zend_stack_destroy(&SCNG(nest_location_stack));
+ SCNG(nest_location_stack) = lex_state->nest_location_stack;
+
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
@@ -298,13 +306,21 @@ ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
}
}
-ZEND_API void zend_lex_tstring(zval *zv)
+ZEND_API int zend_lex_tstring(zval *zv, zend_lexer_ident_ref ident_ref)
{
+ char *ident = (char *) SCNG(yy_start) + ident_ref.offset;
+ size_t length = ident_ref.len;
+ if (length == sizeof("<?=")-1 && memcmp(ident, "<?=", sizeof("<?=")-1) == 0) {
+ zend_throw_exception(zend_ce_parse_error, "Cannot use \"<?=\" as an identifier", 0);
+ return FAILURE;
+ }
+
if (SCNG(on_event)) {
- SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, SCNG(on_event_context));
+ SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, ident, length, SCNG(on_event_context));
}
- ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
+ ZVAL_STRINGL(zv, ident, length);
+ return SUCCESS;
}
#define BOM_UTF32_BE "\x00\x00\xfe\xff"
@@ -653,6 +669,42 @@ ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
return op_array;
}
+ZEND_API zend_ast *zend_compile_string_to_ast(
+ zend_string *code, zend_arena **ast_arena, const char *filename) {
+ zval code_zv;
+ zend_bool original_in_compilation;
+ zend_lex_state original_lex_state;
+ zend_ast *ast;
+
+ ZVAL_STR_COPY(&code_zv, code);
+
+ original_in_compilation = CG(in_compilation);
+ CG(in_compilation) = 1;
+
+ zend_save_lexical_state(&original_lex_state);
+ if (zend_prepare_string_for_scanning(&code_zv, filename) == SUCCESS) {
+ CG(ast) = NULL;
+ CG(ast_arena) = zend_arena_create(1024 * 32);
+ LANG_SCNG(yy_state) = yycINITIAL;
+
+ if (zendparse() != 0) {
+ zend_ast_destroy(CG(ast));
+ zend_arena_destroy(CG(ast_arena));
+ CG(ast) = NULL;
+ }
+ }
+
+ /* restore_lexical_state changes CG(ast) and CG(ast_arena) */
+ ast = CG(ast);
+ *ast_arena = CG(ast_arena);
+
+ zend_restore_lexical_state(&original_lex_state);
+ CG(in_compilation) = original_in_compilation;
+
+ zval_dtor(&code_zv);
+
+ return ast;
+}
zend_op_array *compile_filename(int type, zval *filename)
{
@@ -687,7 +739,7 @@ zend_op_array *compile_filename(int type, zval *filename)
return retval;
}
-ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename)
+ZEND_API int zend_prepare_string_for_scanning(zval *str, const char *filename)
{
char *buf;
size_t size, old_len;
@@ -755,7 +807,7 @@ ZEND_API size_t zend_get_scanned_file_offset(void)
return offset;
}
-zend_op_array *compile_string(zval *source_string, char *filename)
+zend_op_array *compile_string(zval *source_string, const char *filename)
{
zend_lex_state original_lex_state;
zend_op_array *op_array = NULL;
@@ -786,7 +838,7 @@ zend_op_array *compile_string(zval *source_string, char *filename)
BEGIN_EXTERN_C()
-int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
+int highlight_file(const char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
{
zend_lex_state original_lex_state;
zend_file_handle file_handle;
@@ -808,7 +860,7 @@ int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlight
return SUCCESS;
}
-int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name)
+int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, const char *str_name)
{
zend_lex_state original_lex_state;
zval tmp;
@@ -1215,6 +1267,63 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label);
}
+/* Check that { }, [ ], ( ) are nested correctly */
+static void report_bad_nesting(char opening, int opening_lineno, char closing)
+{
+ char buf[256];
+ size_t used = 0;
+
+ used = snprintf(buf, sizeof(buf), "Unclosed '%c'", opening);
+
+ if (opening_lineno != CG(zend_lineno)) {
+ used += snprintf(buf + used, sizeof(buf) - used, " on line %d", opening_lineno);
+ }
+
+ if (closing) { /* 'closing' will be 0 if at end of file */
+ used += snprintf(buf + used, sizeof(buf) - used, " does not match '%c'", closing);
+ }
+
+ zend_throw_exception(zend_ce_parse_error, buf, 0);
+}
+
+static void enter_nesting(char opening)
+{
+ zend_nest_location nest_loc = {opening, CG(zend_lineno)};
+ zend_stack_push(&SCNG(nest_location_stack), &nest_loc);
+}
+
+static int exit_nesting(char closing)
+{
+ if (zend_stack_is_empty(&SCNG(nest_location_stack))) {
+ zend_throw_exception_ex(zend_ce_parse_error, 0, "Unmatched '%c'", closing);
+ return -1;
+ }
+
+ zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
+ char opening = nest_loc->text;
+
+ if ((opening == '{' && closing != '}') ||
+ (opening == '[' && closing != ']') ||
+ (opening == '(' && closing != ')')) {
+ report_bad_nesting(opening, nest_loc->lineno, closing);
+ return -1;
+ }
+
+ zend_stack_del_top(&SCNG(nest_location_stack));
+ return 0;
+}
+
+static int check_nesting_at_end()
+{
+ if (!zend_stack_is_empty(&SCNG(nest_location_stack))) {
+ zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
+ report_bad_nesting(nest_loc->text, nest_loc->lineno, 0);
+ return -1;
+ }
+
+ return 0;
+}
+
#define PARSER_MODE() \
EXPECTED(elem != NULL)
@@ -1234,6 +1343,11 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
goto emit_token_with_str; \
} while (0)
+#define RETURN_TOKEN_WITH_IDENT(_token) do { \
+ token = _token; \
+ goto emit_token_with_ident; \
+ } while (0)
+
#define RETURN_OR_SKIP_TOKEN(_token) do { \
token = _token; \
if (PARSER_MODE()) { \
@@ -1242,6 +1356,22 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
goto emit_token; \
} while (0)
+#define RETURN_EXIT_NESTING_TOKEN(_token) do { \
+ if (exit_nesting(_token) && PARSER_MODE()) { \
+ RETURN_TOKEN(T_ERROR); \
+ } else { \
+ RETURN_TOKEN(_token); \
+ } \
+ } while(0)
+
+#define RETURN_END_TOKEN do { \
+ if (check_nesting_at_end() && PARSER_MODE()) { \
+ RETURN_TOKEN(T_ERROR); \
+ } else { \
+ RETURN_TOKEN(END); \
+ } \
+ } while (0)
+
int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem)
{
int token;
@@ -1262,7 +1392,7 @@ BNUM "0b"[01]+(_[01]+)*
LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
WHITESPACE [ \n\r\t]+
TABS_AND_SPACES [ \t]*
-TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
+TOKENS [;:,.|^&+-/*=%!~$<>?@]
ANY_CHAR [^]
NEWLINE ("\r"|"\n"|"\r\n")
@@ -1270,169 +1400,177 @@ NEWLINE ("\r"|"\n"|"\r\n")
<!*> := yyleng = YYCURSOR - SCNG(yy_text);
<ST_IN_SCRIPTING>"exit" {
- RETURN_TOKEN(T_EXIT);
+ RETURN_TOKEN_WITH_IDENT(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
- RETURN_TOKEN(T_EXIT);
+ RETURN_TOKEN_WITH_IDENT(T_EXIT);
}
<ST_IN_SCRIPTING>"fn" {
- RETURN_TOKEN(T_FN);
+ RETURN_TOKEN_WITH_IDENT(T_FN);
}
<ST_IN_SCRIPTING>"function" {
- RETURN_TOKEN(T_FUNCTION);
+ RETURN_TOKEN_WITH_IDENT(T_FUNCTION);
}
<ST_IN_SCRIPTING>"const" {
- RETURN_TOKEN(T_CONST);
+ RETURN_TOKEN_WITH_IDENT(T_CONST);
}
<ST_IN_SCRIPTING>"return" {
- RETURN_TOKEN(T_RETURN);
+ RETURN_TOKEN_WITH_IDENT(T_RETURN);
+}
+
+<ST_IN_SCRIPTING>"@@" {
+ RETURN_TOKEN(T_ATTRIBUTE);
}
<ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] {
yyless(yyleng - 1);
HANDLE_NEWLINES(yytext, yyleng);
- RETURN_TOKEN(T_YIELD_FROM);
+ RETURN_TOKEN_WITH_IDENT(T_YIELD_FROM);
}
<ST_IN_SCRIPTING>"yield" {
- RETURN_TOKEN(T_YIELD);
+ RETURN_TOKEN_WITH_IDENT(T_YIELD);
}
<ST_IN_SCRIPTING>"try" {
- RETURN_TOKEN(T_TRY);
+ RETURN_TOKEN_WITH_IDENT(T_TRY);
}
<ST_IN_SCRIPTING>"catch" {
- RETURN_TOKEN(T_CATCH);
+ RETURN_TOKEN_WITH_IDENT(T_CATCH);
}
<ST_IN_SCRIPTING>"finally" {
- RETURN_TOKEN(T_FINALLY);
+ RETURN_TOKEN_WITH_IDENT(T_FINALLY);
}
<ST_IN_SCRIPTING>"throw" {
- RETURN_TOKEN(T_THROW);
+ RETURN_TOKEN_WITH_IDENT(T_THROW);
}
<ST_IN_SCRIPTING>"if" {
- RETURN_TOKEN(T_IF);
+ RETURN_TOKEN_WITH_IDENT(T_IF);
}
<ST_IN_SCRIPTING>"elseif" {
- RETURN_TOKEN(T_ELSEIF);
+ RETURN_TOKEN_WITH_IDENT(T_ELSEIF);
}
<ST_IN_SCRIPTING>"endif" {
- RETURN_TOKEN(T_ENDIF);
+ RETURN_TOKEN_WITH_IDENT(T_ENDIF);
}
<ST_IN_SCRIPTING>"else" {
- RETURN_TOKEN(T_ELSE);
+ RETURN_TOKEN_WITH_IDENT(T_ELSE);
}
<ST_IN_SCRIPTING>"while" {
- RETURN_TOKEN(T_WHILE);
+ RETURN_TOKEN_WITH_IDENT(T_WHILE);
}
<ST_IN_SCRIPTING>"endwhile" {
- RETURN_TOKEN(T_ENDWHILE);
+ RETURN_TOKEN_WITH_IDENT(T_ENDWHILE);
}
<ST_IN_SCRIPTING>"do" {
- RETURN_TOKEN(T_DO);
+ RETURN_TOKEN_WITH_IDENT(T_DO);
}
<ST_IN_SCRIPTING>"for" {
- RETURN_TOKEN(T_FOR);
+ RETURN_TOKEN_WITH_IDENT(T_FOR);
}
<ST_IN_SCRIPTING>"endfor" {
- RETURN_TOKEN(T_ENDFOR);
+ RETURN_TOKEN_WITH_IDENT(T_ENDFOR);
}
<ST_IN_SCRIPTING>"foreach" {
- RETURN_TOKEN(T_FOREACH);
+ RETURN_TOKEN_WITH_IDENT(T_FOREACH);
}
<ST_IN_SCRIPTING>"endforeach" {
- RETURN_TOKEN(T_ENDFOREACH);
+ RETURN_TOKEN_WITH_IDENT(T_ENDFOREACH);
}
<ST_IN_SCRIPTING>"declare" {
- RETURN_TOKEN(T_DECLARE);
+ RETURN_TOKEN_WITH_IDENT(T_DECLARE);
}
<ST_IN_SCRIPTING>"enddeclare" {
- RETURN_TOKEN(T_ENDDECLARE);
+ RETURN_TOKEN_WITH_IDENT(T_ENDDECLARE);
}
<ST_IN_SCRIPTING>"instanceof" {
- RETURN_TOKEN(T_INSTANCEOF);
+ RETURN_TOKEN_WITH_IDENT(T_INSTANCEOF);
}
<ST_IN_SCRIPTING>"as" {
- RETURN_TOKEN(T_AS);
+ RETURN_TOKEN_WITH_IDENT(T_AS);
}
<ST_IN_SCRIPTING>"switch" {
- RETURN_TOKEN(T_SWITCH);
+ RETURN_TOKEN_WITH_IDENT(T_SWITCH);
+}
+
+<ST_IN_SCRIPTING>"match" {
+ RETURN_TOKEN_WITH_IDENT(T_MATCH);
}
<ST_IN_SCRIPTING>"endswitch" {
- RETURN_TOKEN(T_ENDSWITCH);
+ RETURN_TOKEN_WITH_IDENT(T_ENDSWITCH);
}
<ST_IN_SCRIPTING>"case" {
- RETURN_TOKEN(T_CASE);
+ RETURN_TOKEN_WITH_IDENT(T_CASE);
}
<ST_IN_SCRIPTING>"default" {
- RETURN_TOKEN(T_DEFAULT);
+ RETURN_TOKEN_WITH_IDENT(T_DEFAULT);
}
<ST_IN_SCRIPTING>"break" {
- RETURN_TOKEN(T_BREAK);
+ RETURN_TOKEN_WITH_IDENT(T_BREAK);
}
<ST_IN_SCRIPTING>"continue" {
- RETURN_TOKEN(T_CONTINUE);
+ RETURN_TOKEN_WITH_IDENT(T_CONTINUE);
}
<ST_IN_SCRIPTING>"goto" {
- RETURN_TOKEN(T_GOTO);
+ RETURN_TOKEN_WITH_IDENT(T_GOTO);
}
<ST_IN_SCRIPTING>"echo" {
- RETURN_TOKEN(T_ECHO);
+ RETURN_TOKEN_WITH_IDENT(T_ECHO);
}
<ST_IN_SCRIPTING>"print" {
- RETURN_TOKEN(T_PRINT);
+ RETURN_TOKEN_WITH_IDENT(T_PRINT);
}
<ST_IN_SCRIPTING>"class" {
- RETURN_TOKEN(T_CLASS);
+ RETURN_TOKEN_WITH_IDENT(T_CLASS);
}
<ST_IN_SCRIPTING>"interface" {
- RETURN_TOKEN(T_INTERFACE);
+ RETURN_TOKEN_WITH_IDENT(T_INTERFACE);
}
<ST_IN_SCRIPTING>"trait" {
- RETURN_TOKEN(T_TRAIT);
+ RETURN_TOKEN_WITH_IDENT(T_TRAIT);
}
<ST_IN_SCRIPTING>"extends" {
- RETURN_TOKEN(T_EXTENDS);
+ RETURN_TOKEN_WITH_IDENT(T_EXTENDS);
}
<ST_IN_SCRIPTING>"implements" {
- RETURN_TOKEN(T_IMPLEMENTS);
+ RETURN_TOKEN_WITH_IDENT(T_IMPLEMENTS);
}
<ST_IN_SCRIPTING>"->" {
@@ -1440,6 +1578,11 @@ NEWLINE ("\r"|"\n"|"\r\n")
RETURN_TOKEN(T_OBJECT_OPERATOR);
}
+<ST_IN_SCRIPTING>"?->" {
+ yy_push_state(ST_LOOKING_FOR_PROPERTY);
+ return T_NULLSAFE_OBJECT_OPERATOR;
+}
+
<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
goto return_whitespace;
}
@@ -1448,6 +1591,10 @@ NEWLINE ("\r"|"\n"|"\r\n")
RETURN_TOKEN(T_OBJECT_OPERATOR);
}
+<ST_LOOKING_FOR_PROPERTY>"?->" {
+ RETURN_TOKEN(T_NULLSAFE_OBJECT_OPERATOR);
+}
+
<ST_LOOKING_FOR_PROPERTY>{LABEL} {
yy_pop_state();
RETURN_TOKEN_WITH_STR(T_STRING, 0);
@@ -1463,10 +1610,6 @@ NEWLINE ("\r"|"\n"|"\r\n")
RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
}
-<ST_IN_SCRIPTING>"\\" {
- RETURN_TOKEN(T_NS_SEPARATOR);
-}
-
<ST_IN_SCRIPTING>"..." {
RETURN_TOKEN(T_ELLIPSIS);
}
@@ -1476,15 +1619,15 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
<ST_IN_SCRIPTING>"new" {
- RETURN_TOKEN(T_NEW);
+ RETURN_TOKEN_WITH_IDENT(T_NEW);
}
<ST_IN_SCRIPTING>"clone" {
- RETURN_TOKEN(T_CLONE);
+ RETURN_TOKEN_WITH_IDENT(T_CLONE);
}
<ST_IN_SCRIPTING>"var" {
- RETURN_TOKEN(T_VAR);
+ RETURN_TOKEN_WITH_IDENT(T_VAR);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
@@ -1497,7 +1640,8 @@ NEWLINE ("\r"|"\n"|"\r\n")
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"real"{TABS_AND_SPACES}")" {
if (PARSER_MODE()) {
- zend_error(E_DEPRECATED, "The (real) cast is deprecated, use (float) instead");
+ zend_throw_exception(zend_ce_parse_error, "The (real) cast has been removed, use (float) instead", 0);
+ RETURN_TOKEN(T_ERROR);
}
RETURN_TOKEN(T_DOUBLE_CAST);
}
@@ -1523,79 +1667,79 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
<ST_IN_SCRIPTING>"eval" {
- RETURN_TOKEN(T_EVAL);
+ RETURN_TOKEN_WITH_IDENT(T_EVAL);
}
<ST_IN_SCRIPTING>"include" {
- RETURN_TOKEN(T_INCLUDE);
+ RETURN_TOKEN_WITH_IDENT(T_INCLUDE);
}
<ST_IN_SCRIPTING>"include_once" {
- RETURN_TOKEN(T_INCLUDE_ONCE);
+ RETURN_TOKEN_WITH_IDENT(T_INCLUDE_ONCE);
}
<ST_IN_SCRIPTING>"require" {
- RETURN_TOKEN(T_REQUIRE);
+ RETURN_TOKEN_WITH_IDENT(T_REQUIRE);
}
<ST_IN_SCRIPTING>"require_once" {
- RETURN_TOKEN(T_REQUIRE_ONCE);
+ RETURN_TOKEN_WITH_IDENT(T_REQUIRE_ONCE);
}
<ST_IN_SCRIPTING>"namespace" {
- RETURN_TOKEN(T_NAMESPACE);
+ RETURN_TOKEN_WITH_IDENT(T_NAMESPACE);
}
<ST_IN_SCRIPTING>"use" {
- RETURN_TOKEN(T_USE);
+ RETURN_TOKEN_WITH_IDENT(T_USE);
}
<ST_IN_SCRIPTING>"insteadof" {
- RETURN_TOKEN(T_INSTEADOF);
+ RETURN_TOKEN_WITH_IDENT(T_INSTEADOF);
}
<ST_IN_SCRIPTING>"global" {
- RETURN_TOKEN(T_GLOBAL);
+ RETURN_TOKEN_WITH_IDENT(T_GLOBAL);
}
<ST_IN_SCRIPTING>"isset" {
- RETURN_TOKEN(T_ISSET);
+ RETURN_TOKEN_WITH_IDENT(T_ISSET);
}
<ST_IN_SCRIPTING>"empty" {
- RETURN_TOKEN(T_EMPTY);
+ RETURN_TOKEN_WITH_IDENT(T_EMPTY);
}
<ST_IN_SCRIPTING>"__halt_compiler" {
- RETURN_TOKEN(T_HALT_COMPILER);
+ RETURN_TOKEN_WITH_IDENT(T_HALT_COMPILER);
}
<ST_IN_SCRIPTING>"static" {
- RETURN_TOKEN(T_STATIC);
+ RETURN_TOKEN_WITH_IDENT(T_STATIC);
}
<ST_IN_SCRIPTING>"abstract" {
- RETURN_TOKEN(T_ABSTRACT);
+ RETURN_TOKEN_WITH_IDENT(T_ABSTRACT);
}
<ST_IN_SCRIPTING>"final" {
- RETURN_TOKEN(T_FINAL);
+ RETURN_TOKEN_WITH_IDENT(T_FINAL);
}
<ST_IN_SCRIPTING>"private" {
- RETURN_TOKEN(T_PRIVATE);
+ RETURN_TOKEN_WITH_IDENT(T_PRIVATE);
}
<ST_IN_SCRIPTING>"protected" {
- RETURN_TOKEN(T_PROTECTED);
+ RETURN_TOKEN_WITH_IDENT(T_PROTECTED);
}
<ST_IN_SCRIPTING>"public" {
- RETURN_TOKEN(T_PUBLIC);
+ RETURN_TOKEN_WITH_IDENT(T_PUBLIC);
}
<ST_IN_SCRIPTING>"unset" {
- RETURN_TOKEN(T_UNSET);
+ RETURN_TOKEN_WITH_IDENT(T_UNSET);
}
<ST_IN_SCRIPTING>"=>" {
@@ -1603,15 +1747,15 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
<ST_IN_SCRIPTING>"list" {
- RETURN_TOKEN(T_LIST);
+ RETURN_TOKEN_WITH_IDENT(T_LIST);
}
<ST_IN_SCRIPTING>"array" {
- RETURN_TOKEN(T_ARRAY);
+ RETURN_TOKEN_WITH_IDENT(T_ARRAY);
}
<ST_IN_SCRIPTING>"callable" {
- RETURN_TOKEN(T_CALLABLE);
+ RETURN_TOKEN_WITH_IDENT(T_CALLABLE);
}
<ST_IN_SCRIPTING>"++" {
@@ -1715,15 +1859,15 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
<ST_IN_SCRIPTING>"OR" {
- RETURN_TOKEN(T_LOGICAL_OR);
+ RETURN_TOKEN_WITH_IDENT(T_LOGICAL_OR);
}
<ST_IN_SCRIPTING>"AND" {
- RETURN_TOKEN(T_LOGICAL_AND);
+ RETURN_TOKEN_WITH_IDENT(T_LOGICAL_AND);
}
<ST_IN_SCRIPTING>"XOR" {
- RETURN_TOKEN(T_LOGICAL_XOR);
+ RETURN_TOKEN_WITH_IDENT(T_LOGICAL_XOR);
}
<ST_IN_SCRIPTING>"<<" {
@@ -1734,6 +1878,16 @@ NEWLINE ("\r"|"\n"|"\r\n")
RETURN_TOKEN(T_SR);
}
+<ST_IN_SCRIPTING>"]"|")" {
+ /* Check that ] and ) match up properly with a preceding [ or ( */
+ RETURN_EXIT_NESTING_TOKEN(yytext[0]);
+}
+
+<ST_IN_SCRIPTING>"["|"(" {
+ enter_nesting(yytext[0]);
+ RETURN_TOKEN(yytext[0]);
+}
+
<ST_IN_SCRIPTING>{TOKENS} {
RETURN_TOKEN(yytext[0]);
}
@@ -1741,22 +1895,23 @@ NEWLINE ("\r"|"\n"|"\r\n")
<ST_IN_SCRIPTING>"{" {
yy_push_state(ST_IN_SCRIPTING);
+ enter_nesting('{');
RETURN_TOKEN('{');
}
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
+ enter_nesting('{');
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
-
<ST_IN_SCRIPTING>"}" {
RESET_DOC_COMMENT();
if (!zend_stack_is_empty(&SCNG(state_stack))) {
yy_pop_state();
}
- RETURN_TOKEN('}');
+ RETURN_EXIT_NESTING_TOKEN('}');
}
@@ -1968,35 +2123,35 @@ string:
}
<ST_IN_SCRIPTING>"__CLASS__" {
- RETURN_TOKEN(T_CLASS_C);
+ RETURN_TOKEN_WITH_IDENT(T_CLASS_C);
}
<ST_IN_SCRIPTING>"__TRAIT__" {
- RETURN_TOKEN(T_TRAIT_C);
+ RETURN_TOKEN_WITH_IDENT(T_TRAIT_C);
}
<ST_IN_SCRIPTING>"__FUNCTION__" {
- RETURN_TOKEN(T_FUNC_C);
+ RETURN_TOKEN_WITH_IDENT(T_FUNC_C);
}
<ST_IN_SCRIPTING>"__METHOD__" {
- RETURN_TOKEN(T_METHOD_C);
+ RETURN_TOKEN_WITH_IDENT(T_METHOD_C);
}
<ST_IN_SCRIPTING>"__LINE__" {
- RETURN_TOKEN(T_LINE);
+ RETURN_TOKEN_WITH_IDENT(T_LINE);
}
<ST_IN_SCRIPTING>"__FILE__" {
- RETURN_TOKEN(T_FILE);
+ RETURN_TOKEN_WITH_IDENT(T_FILE);
}
<ST_IN_SCRIPTING>"__DIR__" {
- RETURN_TOKEN(T_DIR);
+ RETURN_TOKEN_WITH_IDENT(T_DIR);
}
<ST_IN_SCRIPTING>"__NAMESPACE__" {
- RETURN_TOKEN(T_NS_C);
+ RETURN_TOKEN_WITH_IDENT(T_NS_C);
}
<SHEBANG>"#!" .* {NEWLINE} {
@@ -2014,7 +2169,8 @@ string:
<INITIAL>"<?=" {
BEGIN(ST_IN_SCRIPTING);
if (PARSER_MODE()) {
- RETURN_TOKEN(T_ECHO);
+ /* We'll reject this as an identifier in zend_lex_tstring. */
+ RETURN_TOKEN_WITH_IDENT(T_ECHO);
}
RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
}
@@ -2052,7 +2208,7 @@ string:
<INITIAL>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
- RETURN_TOKEN(END);
+ RETURN_END_TOKEN;
}
inline_char_handler:
@@ -2129,7 +2285,7 @@ inline_char_handler:
RETURN_TOKEN(']');
}
-<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
+<ST_VAR_OFFSET>{TOKENS}|[[(){}"`] {
/* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
RETURN_TOKEN(yytext[0]);
}
@@ -2142,6 +2298,22 @@ inline_char_handler:
RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE);
}
+<ST_IN_SCRIPTING>"namespace"("\\"{LABEL})+ {
+ RETURN_TOKEN_WITH_STR(T_NAME_RELATIVE, sizeof("namespace\\") - 1);
+}
+
+<ST_IN_SCRIPTING>{LABEL}("\\"{LABEL})+ {
+ RETURN_TOKEN_WITH_STR(T_NAME_QUALIFIED, 0);
+}
+
+<ST_IN_SCRIPTING>"\\"{LABEL}("\\"{LABEL})* {
+ RETURN_TOKEN_WITH_STR(T_NAME_FULLY_QUALIFIED, 1);
+}
+
+<ST_IN_SCRIPTING>"\\" {
+ RETURN_TOKEN(T_NS_SEPARATOR);
+}
+
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
RETURN_TOKEN_WITH_STR(T_STRING, 0);
}
@@ -2151,12 +2323,8 @@ inline_char_handler:
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '\r':
- if (*YYCURSOR == '\n') {
- YYCURSOR++;
- }
- /* fall through */
case '\n':
- CG(zend_lineno)++;
+ YYCURSOR--;
break;
case '?':
if (*YYCURSOR == '>') {
@@ -2193,8 +2361,11 @@ inline_char_handler:
if (YYCURSOR < YYLIMIT) {
YYCURSOR++;
- } else if (!SCNG(heredoc_scan_ahead)) {
- zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
+ } else {
+ zend_throw_exception_ex(zend_ce_parse_error, 0, "Unterminated comment starting line %d", CG(zend_lineno));
+ if (PARSER_MODE()) {
+ RETURN_TOKEN(T_ERROR);
+ }
}
yyleng = YYCURSOR - SCNG(yy_text);
@@ -2393,6 +2564,7 @@ skip_escape_conversion:
}
heredoc_label->label = estrndup(s, heredoc_label->length);
+ heredoc_label->indentation_uses_spaces = 0;
heredoc_label->indentation = 0;
saved_cursor = YYCURSOR;
@@ -2530,6 +2702,7 @@ skip_escape_conversion:
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
yy_push_state(ST_IN_SCRIPTING);
yyless(1);
+ enter_nesting('{');
RETURN_TOKEN(T_CURLY_OPEN);
}
@@ -2554,7 +2727,7 @@ skip_escape_conversion:
}
if (YYCURSOR > YYLIMIT) {
- RETURN_TOKEN(END);
+ RETURN_END_TOKEN;
}
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
@@ -2601,7 +2774,7 @@ double_quotes_scan_done:
<ST_BACKQUOTE>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
- RETURN_TOKEN(END);
+ RETURN_END_TOKEN;
}
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
@@ -2650,7 +2823,7 @@ double_quotes_scan_done:
int newline = 0, indentation = 0, spacing = 0;
if (YYCURSOR > YYLIMIT) {
- RETURN_TOKEN(END);
+ RETURN_END_TOKEN;
}
YYCURSOR--;
@@ -2774,7 +2947,7 @@ heredoc_scan_done:
int newline = 0, indentation = 0, spacing = -1;
if (YYCURSOR > YYLIMIT) {
- RETURN_TOKEN(END);
+ RETURN_END_TOKEN;
}
YYCURSOR--;
@@ -2862,17 +3035,10 @@ nowdoc_scan_done:
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
- RETURN_TOKEN(END);
+ RETURN_END_TOKEN;
}
- if (!SCNG(heredoc_scan_ahead)) {
- zend_error(E_COMPILE_WARNING, "Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
- }
- if (PARSER_MODE()) {
- goto restart;
- } else {
- RETURN_TOKEN(T_BAD_CHARACTER);
- }
+ RETURN_TOKEN(T_BAD_CHARACTER);
}
*/
@@ -2888,14 +3054,24 @@ emit_token_with_val:
emit_token:
if (SCNG(on_event)) {
- SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context));
+ SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context));
+ }
+ return token;
+
+emit_token_with_ident:
+ if (PARSER_MODE()) {
+ elem->ident.offset = SCNG(yy_text) - SCNG(yy_start);
+ elem->ident.len = SCNG(yy_leng);
+ }
+ if (SCNG(on_event)) {
+ SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context));
}
return token;
return_whitespace:
HANDLE_NEWLINES(yytext, yyleng);
if (SCNG(on_event)) {
- SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, SCNG(on_event_context));
+ SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, yytext, yyleng, SCNG(on_event_context));
}
if (PARSER_MODE()) {
start_line = CG(zend_lineno);
@@ -2906,7 +3082,7 @@ return_whitespace:
skip_token:
if (SCNG(on_event)) {
- SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context));
+ SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context));
}
start_line = CG(zend_lineno);
goto restart;