diff options
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | Zend/tests/bug77966.phpt | 25 | ||||
-rw-r--r-- | Zend/zend_compile.c | 2 | ||||
-rw-r--r-- | Zend/zend_compile.h | 6 | ||||
-rw-r--r-- | Zend/zend_globals.h | 4 | ||||
-rw-r--r-- | Zend/zend_language_parser.y | 160 | ||||
-rw-r--r-- | Zend/zend_language_scanner.h | 6 | ||||
-rw-r--r-- | Zend/zend_language_scanner.l | 183 | ||||
-rw-r--r-- | ext/tokenizer/tests/bug77966.phpt | 49 | ||||
-rw-r--r-- | ext/tokenizer/tokenizer.c | 53 |
10 files changed, 310 insertions, 179 deletions
@@ -29,6 +29,7 @@ PHP NEWS extensions). (cmb) . Implemented FR #72089 (require() throws fatal error instead of exception). (Nikita) + . Fixed bug #77966 (Cannot alias a method named "namespace"). (Nikita) - BZ2: . Fixed bug #71263 (fread() does not report bzip2.decompress errors). (cmb) diff --git a/Zend/tests/bug77966.phpt b/Zend/tests/bug77966.phpt new file mode 100644 index 0000000000..1e859be8e1 --- /dev/null +++ b/Zend/tests/bug77966.phpt @@ -0,0 +1,25 @@ +--TEST-- +Bug #77966: Cannot alias a method named "namespace" +--FILE-- +<?php + +trait A { + function namespace() { + echo "Called\n"; + } +} + +class C { + use A { + namespace as bar; + } +} + +$c = new C; +$c->bar(); +$c->namespace(); + +?> +--EXPECT-- +Called +Called diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index 6d3c12b0f0..2f2a2d41b5 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -653,7 +653,7 @@ static int zend_add_const_name_literal(zend_string *name, zend_bool unqualified) void zend_stop_lexing(void) { if (LANG_SCNG(on_event)) { - LANG_SCNG(on_event)(ON_STOP, END, 0, LANG_SCNG(on_event_context)); + LANG_SCNG(on_event)(ON_STOP, END, 0, NULL, 0, LANG_SCNG(on_event_context)); } LANG_SCNG(yy_cursor) = LANG_SCNG(yy_limit); diff --git a/Zend/zend_compile.h b/Zend/zend_compile.h index 9c6070d888..e11e6902b9 100644 --- a/Zend/zend_compile.h +++ b/Zend/zend_compile.h @@ -117,11 +117,17 @@ typedef struct _zend_file_context { HashTable seen_symbols; } zend_file_context; +typedef struct { + uint32_t offset; + uint32_t len; +} zend_lexer_ident_ref; + typedef union _zend_parser_stack_elem { zend_ast *ast; zend_string *str; zend_ulong num; unsigned char *ptr; + zend_lexer_ident_ref ident; } zend_parser_stack_elem; void zend_compile_top_stmt(zend_ast *ast); diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h index 1d64d001f6..60d32c0335 100644 --- a/Zend/zend_globals.h +++ b/Zend/zend_globals.h @@ -311,7 +311,9 @@ struct _zend_php_scanner_globals { int scanned_string_len; /* hooks */ - void (*on_event)(zend_php_scanner_event event, int token, int line, void *context); + void (*on_event)( + zend_php_scanner_event event, int token, int line, + const char *text, size_t length, void *context); void *on_event_context; }; diff --git a/Zend/zend_language_parser.y b/Zend/zend_language_parser.y index 4fdc05909b..80a57514b9 100644 --- a/Zend/zend_language_parser.y +++ b/Zend/zend_language_parser.y @@ -96,18 +96,84 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); %token <ast> T_STRING_VARNAME "variable name (T_STRING_VARNAME)" %token <ast> T_NUM_STRING "number (T_NUM_STRING)" +%token <ident> T_INCLUDE "include (T_INCLUDE)" +%token <ident> T_INCLUDE_ONCE "include_once (T_INCLUDE_ONCE)" +%token <ident> T_EVAL "eval (T_EVAL)" +%token <ident> T_REQUIRE "require (T_REQUIRE)" +%token <ident> T_REQUIRE_ONCE "require_once (T_REQUIRE_ONCE)" +%token <ident> T_LOGICAL_OR "or (T_LOGICAL_OR)" +%token <ident> T_LOGICAL_XOR "xor (T_LOGICAL_XOR)" +%token <ident> T_LOGICAL_AND "and (T_LOGICAL_AND)" +%token <ident> T_PRINT "print (T_PRINT)" +%token <ident> T_YIELD "yield (T_YIELD)" +%token <ident> T_YIELD_FROM "yield from (T_YIELD_FROM)" +%token <ident> T_INSTANCEOF "instanceof (T_INSTANCEOF)" +%token <ident> T_NEW "new (T_NEW)" +%token <ident> T_CLONE "clone (T_CLONE)" +%token <ident> T_EXIT "exit (T_EXIT)" +%token <ident> T_IF "if (T_IF)" +%token <ident> T_ELSEIF "elseif (T_ELSEIF)" +%token <ident> T_ELSE "else (T_ELSE)" +%token <ident> T_ENDIF "endif (T_ENDIF)" +%token <ident> T_ECHO "echo (T_ECHO)" +%token <ident> T_DO "do (T_DO)" +%token <ident> T_WHILE "while (T_WHILE)" +%token <ident> T_ENDWHILE "endwhile (T_ENDWHILE)" +%token <ident> T_FOR "for (T_FOR)" +%token <ident> T_ENDFOR "endfor (T_ENDFOR)" +%token <ident> T_FOREACH "foreach (T_FOREACH)" +%token <ident> T_ENDFOREACH "endforeach (T_ENDFOREACH)" +%token <ident> T_DECLARE "declare (T_DECLARE)" +%token <ident> T_ENDDECLARE "enddeclare (T_ENDDECLARE)" +%token <ident> T_AS "as (T_AS)" +%token <ident> T_SWITCH "switch (T_SWITCH)" +%token <ident> T_ENDSWITCH "endswitch (T_ENDSWITCH)" +%token <ident> T_CASE "case (T_CASE)" +%token <ident> T_DEFAULT "default (T_DEFAULT)" +%token <ident> T_BREAK "break (T_BREAK)" +%token <ident> T_CONTINUE "continue (T_CONTINUE)" +%token <ident> T_GOTO "goto (T_GOTO)" +%token <ident> T_FUNCTION "function (T_FUNCTION)" +%token <ident> T_FN "fn (T_FN)" +%token <ident> T_CONST "const (T_CONST)" +%token <ident> T_RETURN "return (T_RETURN)" +%token <ident> T_TRY "try (T_TRY)" +%token <ident> T_CATCH "catch (T_CATCH)" +%token <ident> T_FINALLY "finally (T_FINALLY)" +%token <ident> T_THROW "throw (T_THROW)" +%token <ident> T_USE "use (T_USE)" +%token <ident> T_INSTEADOF "insteadof (T_INSTEADOF)" +%token <ident> T_GLOBAL "global (T_GLOBAL)" +%token <ident> T_STATIC "static (T_STATIC)" +%token <ident> T_ABSTRACT "abstract (T_ABSTRACT)" +%token <ident> T_FINAL "final (T_FINAL)" +%token <ident> T_PRIVATE "private (T_PRIVATE)" +%token <ident> T_PROTECTED "protected (T_PROTECTED)" +%token <ident> T_PUBLIC "public (T_PUBLIC)" +%token <ident> T_VAR "var (T_VAR)" +%token <ident> T_UNSET "unset (T_UNSET)" +%token <ident> T_ISSET "isset (T_ISSET)" +%token <ident> T_EMPTY "empty (T_EMPTY)" +%token <ident> T_HALT_COMPILER "__halt_compiler (T_HALT_COMPILER)" +%token <ident> T_CLASS "class (T_CLASS)" +%token <ident> T_TRAIT "trait (T_TRAIT)" +%token <ident> T_INTERFACE "interface (T_INTERFACE)" +%token <ident> T_EXTENDS "extends (T_EXTENDS)" +%token <ident> T_IMPLEMENTS "implements (T_IMPLEMENTS)" +%token <ident> T_NAMESPACE "namespace (T_NAMESPACE)" +%token <ident> T_LIST "list (T_LIST)" +%token <ident> T_ARRAY "array (T_ARRAY)" +%token <ident> T_CALLABLE "callable (T_CALLABLE)" +%token <ident> T_LINE "__LINE__ (T_LINE)" +%token <ident> T_FILE "__FILE__ (T_FILE)" +%token <ident> T_DIR "__DIR__ (T_DIR)" +%token <ident> T_CLASS_C "__CLASS__ (T_CLASS_C)" +%token <ident> T_TRAIT_C "__TRAIT__ (T_TRAIT_C)" +%token <ident> T_METHOD_C "__METHOD__ (T_METHOD_C)" +%token <ident> T_FUNC_C "__FUNCTION__ (T_FUNC_C)" +%token <ident> T_NS_C "__NAMESPACE__ (T_NS_C)" + %token END 0 "end of file" -%token T_INCLUDE "include (T_INCLUDE)" -%token T_INCLUDE_ONCE "include_once (T_INCLUDE_ONCE)" -%token T_EVAL "eval (T_EVAL)" -%token T_REQUIRE "require (T_REQUIRE)" -%token T_REQUIRE_ONCE "require_once (T_REQUIRE_ONCE)" -%token T_LOGICAL_OR "or (T_LOGICAL_OR)" -%token T_LOGICAL_XOR "xor (T_LOGICAL_XOR)" -%token T_LOGICAL_AND "and (T_LOGICAL_AND)" -%token T_PRINT "print (T_PRINT)" -%token T_YIELD "yield (T_YIELD)" -%token T_YIELD_FROM "yield from (T_YIELD_FROM)" %token T_PLUS_EQUAL "+= (T_PLUS_EQUAL)" %token T_MINUS_EQUAL "-= (T_MINUS_EQUAL)" %token T_MUL_EQUAL "*= (T_MUL_EQUAL)" @@ -131,7 +197,6 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); %token T_SPACESHIP "<=> (T_SPACESHIP)" %token T_SL "<< (T_SL)" %token T_SR ">> (T_SR)" -%token T_INSTANCEOF "instanceof (T_INSTANCEOF)" %token T_INC "++ (T_INC)" %token T_DEC "-- (T_DEC)" %token T_INT_CAST "(int) (T_INT_CAST)" @@ -141,70 +206,8 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); %token T_OBJECT_CAST "(object) (T_OBJECT_CAST)" %token T_BOOL_CAST "(bool) (T_BOOL_CAST)" %token T_UNSET_CAST "(unset) (T_UNSET_CAST)" -%token T_NEW "new (T_NEW)" -%token T_CLONE "clone (T_CLONE)" -%token T_EXIT "exit (T_EXIT)" -%token T_IF "if (T_IF)" -%token T_ELSEIF "elseif (T_ELSEIF)" -%token T_ELSE "else (T_ELSE)" -%token T_ENDIF "endif (T_ENDIF)" -%token T_ECHO "echo (T_ECHO)" -%token T_DO "do (T_DO)" -%token T_WHILE "while (T_WHILE)" -%token T_ENDWHILE "endwhile (T_ENDWHILE)" -%token T_FOR "for (T_FOR)" -%token T_ENDFOR "endfor (T_ENDFOR)" -%token T_FOREACH "foreach (T_FOREACH)" -%token T_ENDFOREACH "endforeach (T_ENDFOREACH)" -%token T_DECLARE "declare (T_DECLARE)" -%token T_ENDDECLARE "enddeclare (T_ENDDECLARE)" -%token T_AS "as (T_AS)" -%token T_SWITCH "switch (T_SWITCH)" -%token T_ENDSWITCH "endswitch (T_ENDSWITCH)" -%token T_CASE "case (T_CASE)" -%token T_DEFAULT "default (T_DEFAULT)" -%token T_BREAK "break (T_BREAK)" -%token T_CONTINUE "continue (T_CONTINUE)" -%token T_GOTO "goto (T_GOTO)" -%token T_FUNCTION "function (T_FUNCTION)" -%token T_FN "fn (T_FN)" -%token T_CONST "const (T_CONST)" -%token T_RETURN "return (T_RETURN)" -%token T_TRY "try (T_TRY)" -%token T_CATCH "catch (T_CATCH)" -%token T_FINALLY "finally (T_FINALLY)" -%token T_THROW "throw (T_THROW)" -%token T_USE "use (T_USE)" -%token T_INSTEADOF "insteadof (T_INSTEADOF)" -%token T_GLOBAL "global (T_GLOBAL)" -%token T_STATIC "static (T_STATIC)" -%token T_ABSTRACT "abstract (T_ABSTRACT)" -%token T_FINAL "final (T_FINAL)" -%token T_PRIVATE "private (T_PRIVATE)" -%token T_PROTECTED "protected (T_PROTECTED)" -%token T_PUBLIC "public (T_PUBLIC)" -%token T_VAR "var (T_VAR)" -%token T_UNSET "unset (T_UNSET)" -%token T_ISSET "isset (T_ISSET)" -%token T_EMPTY "empty (T_EMPTY)" -%token T_HALT_COMPILER "__halt_compiler (T_HALT_COMPILER)" -%token T_CLASS "class (T_CLASS)" -%token T_TRAIT "trait (T_TRAIT)" -%token T_INTERFACE "interface (T_INTERFACE)" -%token T_EXTENDS "extends (T_EXTENDS)" -%token T_IMPLEMENTS "implements (T_IMPLEMENTS)" %token T_OBJECT_OPERATOR "-> (T_OBJECT_OPERATOR)" %token T_DOUBLE_ARROW "=> (T_DOUBLE_ARROW)" -%token T_LIST "list (T_LIST)" -%token T_ARRAY "array (T_ARRAY)" -%token T_CALLABLE "callable (T_CALLABLE)" -%token T_LINE "__LINE__ (T_LINE)" -%token T_FILE "__FILE__ (T_FILE)" -%token T_DIR "__DIR__ (T_DIR)" -%token T_CLASS_C "__CLASS__ (T_CLASS_C)" -%token T_TRAIT_C "__TRAIT__ (T_TRAIT_C)" -%token T_METHOD_C "__METHOD__ (T_METHOD_C)" -%token T_FUNC_C "__FUNCTION__ (T_FUNC_C)" %token T_COMMENT "comment (T_COMMENT)" %token T_DOC_COMMENT "doc comment (T_DOC_COMMENT)" %token T_OPEN_TAG "open tag (T_OPEN_TAG)" @@ -216,8 +219,6 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); %token T_DOLLAR_OPEN_CURLY_BRACES "${ (T_DOLLAR_OPEN_CURLY_BRACES)" %token T_CURLY_OPEN "{$ (T_CURLY_OPEN)" %token T_PAAMAYIM_NEKUDOTAYIM ":: (T_PAAMAYIM_NEKUDOTAYIM)" -%token T_NAMESPACE "namespace (T_NAMESPACE)" -%token T_NS_C "__NAMESPACE__ (T_NS_C)" %token T_NS_SEPARATOR "\\ (T_NS_SEPARATOR)" %token T_ELLIPSIS "... (T_ELLIPSIS)" %token T_COALESCE "?? (T_COALESCE)" @@ -268,6 +269,8 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); %type <ptr> backup_lex_pos %type <str> backup_doc_comment +%type <ident> reserved_non_modifiers semi_reserved + %% /* Rules */ start: @@ -293,7 +296,7 @@ identifier: T_STRING { $$ = $1; } | semi_reserved { zval zv; - zend_lex_tstring(&zv); + zend_lex_tstring(&zv, $1); $$ = zend_ast_create_zval(&zv); } ; @@ -847,7 +850,8 @@ trait_alias: trait_method_reference T_AS T_STRING { $$ = zend_ast_create(ZEND_AST_TRAIT_ALIAS, $1, $3); } | trait_method_reference T_AS reserved_non_modifiers - { zval zv; zend_lex_tstring(&zv); $$ = zend_ast_create(ZEND_AST_TRAIT_ALIAS, $1, zend_ast_create_zval(&zv)); } + { zval zv; zend_lex_tstring(&zv, $3); + $$ = zend_ast_create(ZEND_AST_TRAIT_ALIAS, $1, zend_ast_create_zval(&zv)); } | trait_method_reference T_AS member_modifier identifier { $$ = zend_ast_create_ex(ZEND_AST_TRAIT_ALIAS, $3, $1, $4); } | trait_method_reference T_AS member_modifier diff --git a/Zend/zend_language_scanner.h b/Zend/zend_language_scanner.h index 35eccaf7e6..35d4d0269e 100644 --- a/Zend/zend_language_scanner.h +++ b/Zend/zend_language_scanner.h @@ -50,7 +50,9 @@ typedef struct _zend_lex_state { const zend_encoding *script_encoding; /* hooks */ - void (*on_event)(zend_php_scanner_event event, int token, int line, void *context); + void (*on_event)( + zend_php_scanner_event event, int token, int line, + const char *text, size_t length, void *context); void *on_event_context; zend_ast *ast; @@ -76,7 +78,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state); ZEND_API int zend_prepare_string_for_scanning(zval *str, const char *filename); ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding); ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding); -ZEND_API void zend_lex_tstring(zval *zv); +ZEND_API void zend_lex_tstring(zval *zv, zend_lexer_ident_ref ident_ref); END_EXTERN_C() diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index 8a3e88edfc..4aa024a69a 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -306,13 +306,15 @@ ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle) } } -ZEND_API void zend_lex_tstring(zval *zv) +ZEND_API void zend_lex_tstring(zval *zv, zend_lexer_ident_ref ident_ref) { + char *ident = (char *) SCNG(yy_start) + ident_ref.offset; + size_t length = ident_ref.len; if (SCNG(on_event)) { - SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, SCNG(on_event_context)); + SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, ident, length, SCNG(on_event_context)); } - ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng)); + ZVAL_STRINGL(zv, ident, length); } #define BOM_UTF32_BE "\x00\x00\xfe\xff" @@ -1334,6 +1336,11 @@ static int check_nesting_at_end() goto emit_token_with_str; \ } while (0) +#define RETURN_TOKEN_WITH_IDENT(_token) do { \ + token = _token; \ + goto emit_token_with_ident; \ + } while (0) + #define RETURN_OR_SKIP_TOKEN(_token) do { \ token = _token; \ if (PARSER_MODE()) { \ @@ -1386,169 +1393,169 @@ NEWLINE ("\r"|"\n"|"\r\n") <!*> := yyleng = YYCURSOR - SCNG(yy_text); <ST_IN_SCRIPTING>"exit" { - RETURN_TOKEN(T_EXIT); + RETURN_TOKEN_WITH_IDENT(T_EXIT); } <ST_IN_SCRIPTING>"die" { - RETURN_TOKEN(T_EXIT); + RETURN_TOKEN_WITH_IDENT(T_EXIT); } <ST_IN_SCRIPTING>"fn" { - RETURN_TOKEN(T_FN); + RETURN_TOKEN_WITH_IDENT(T_FN); } <ST_IN_SCRIPTING>"function" { - RETURN_TOKEN(T_FUNCTION); + RETURN_TOKEN_WITH_IDENT(T_FUNCTION); } <ST_IN_SCRIPTING>"const" { - RETURN_TOKEN(T_CONST); + RETURN_TOKEN_WITH_IDENT(T_CONST); } <ST_IN_SCRIPTING>"return" { - RETURN_TOKEN(T_RETURN); + RETURN_TOKEN_WITH_IDENT(T_RETURN); } <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] { yyless(yyleng - 1); HANDLE_NEWLINES(yytext, yyleng); - RETURN_TOKEN(T_YIELD_FROM); + RETURN_TOKEN_WITH_IDENT(T_YIELD_FROM); } <ST_IN_SCRIPTING>"yield" { - RETURN_TOKEN(T_YIELD); + RETURN_TOKEN_WITH_IDENT(T_YIELD); } <ST_IN_SCRIPTING>"try" { - RETURN_TOKEN(T_TRY); + RETURN_TOKEN_WITH_IDENT(T_TRY); } <ST_IN_SCRIPTING>"catch" { - RETURN_TOKEN(T_CATCH); + RETURN_TOKEN_WITH_IDENT(T_CATCH); } <ST_IN_SCRIPTING>"finally" { - RETURN_TOKEN(T_FINALLY); + RETURN_TOKEN_WITH_IDENT(T_FINALLY); } <ST_IN_SCRIPTING>"throw" { - RETURN_TOKEN(T_THROW); + RETURN_TOKEN_WITH_IDENT(T_THROW); } <ST_IN_SCRIPTING>"if" { - RETURN_TOKEN(T_IF); + RETURN_TOKEN_WITH_IDENT(T_IF); } <ST_IN_SCRIPTING>"elseif" { - RETURN_TOKEN(T_ELSEIF); + RETURN_TOKEN_WITH_IDENT(T_ELSEIF); } <ST_IN_SCRIPTING>"endif" { - RETURN_TOKEN(T_ENDIF); + RETURN_TOKEN_WITH_IDENT(T_ENDIF); } <ST_IN_SCRIPTING>"else" { - RETURN_TOKEN(T_ELSE); + RETURN_TOKEN_WITH_IDENT(T_ELSE); } <ST_IN_SCRIPTING>"while" { - RETURN_TOKEN(T_WHILE); + RETURN_TOKEN_WITH_IDENT(T_WHILE); } <ST_IN_SCRIPTING>"endwhile" { - RETURN_TOKEN(T_ENDWHILE); + RETURN_TOKEN_WITH_IDENT(T_ENDWHILE); } <ST_IN_SCRIPTING>"do" { - RETURN_TOKEN(T_DO); + RETURN_TOKEN_WITH_IDENT(T_DO); } <ST_IN_SCRIPTING>"for" { - RETURN_TOKEN(T_FOR); + RETURN_TOKEN_WITH_IDENT(T_FOR); } <ST_IN_SCRIPTING>"endfor" { - RETURN_TOKEN(T_ENDFOR); + RETURN_TOKEN_WITH_IDENT(T_ENDFOR); } <ST_IN_SCRIPTING>"foreach" { - RETURN_TOKEN(T_FOREACH); + RETURN_TOKEN_WITH_IDENT(T_FOREACH); } <ST_IN_SCRIPTING>"endforeach" { - RETURN_TOKEN(T_ENDFOREACH); + RETURN_TOKEN_WITH_IDENT(T_ENDFOREACH); } <ST_IN_SCRIPTING>"declare" { - RETURN_TOKEN(T_DECLARE); + RETURN_TOKEN_WITH_IDENT(T_DECLARE); } <ST_IN_SCRIPTING>"enddeclare" { - RETURN_TOKEN(T_ENDDECLARE); + RETURN_TOKEN_WITH_IDENT(T_ENDDECLARE); } <ST_IN_SCRIPTING>"instanceof" { - RETURN_TOKEN(T_INSTANCEOF); + RETURN_TOKEN_WITH_IDENT(T_INSTANCEOF); } <ST_IN_SCRIPTING>"as" { - RETURN_TOKEN(T_AS); + RETURN_TOKEN_WITH_IDENT(T_AS); } <ST_IN_SCRIPTING>"switch" { - RETURN_TOKEN(T_SWITCH); + RETURN_TOKEN_WITH_IDENT(T_SWITCH); } <ST_IN_SCRIPTING>"endswitch" { - RETURN_TOKEN(T_ENDSWITCH); + RETURN_TOKEN_WITH_IDENT(T_ENDSWITCH); } <ST_IN_SCRIPTING>"case" { - RETURN_TOKEN(T_CASE); + RETURN_TOKEN_WITH_IDENT(T_CASE); } <ST_IN_SCRIPTING>"default" { - RETURN_TOKEN(T_DEFAULT); + RETURN_TOKEN_WITH_IDENT(T_DEFAULT); } <ST_IN_SCRIPTING>"break" { - RETURN_TOKEN(T_BREAK); + RETURN_TOKEN_WITH_IDENT(T_BREAK); } <ST_IN_SCRIPTING>"continue" { - RETURN_TOKEN(T_CONTINUE); + RETURN_TOKEN_WITH_IDENT(T_CONTINUE); } <ST_IN_SCRIPTING>"goto" { - RETURN_TOKEN(T_GOTO); + RETURN_TOKEN_WITH_IDENT(T_GOTO); } <ST_IN_SCRIPTING>"echo" { - RETURN_TOKEN(T_ECHO); + RETURN_TOKEN_WITH_IDENT(T_ECHO); } <ST_IN_SCRIPTING>"print" { - RETURN_TOKEN(T_PRINT); + RETURN_TOKEN_WITH_IDENT(T_PRINT); } <ST_IN_SCRIPTING>"class" { - RETURN_TOKEN(T_CLASS); + RETURN_TOKEN_WITH_IDENT(T_CLASS); } <ST_IN_SCRIPTING>"interface" { - RETURN_TOKEN(T_INTERFACE); + RETURN_TOKEN_WITH_IDENT(T_INTERFACE); } <ST_IN_SCRIPTING>"trait" { - RETURN_TOKEN(T_TRAIT); + RETURN_TOKEN_WITH_IDENT(T_TRAIT); } <ST_IN_SCRIPTING>"extends" { - RETURN_TOKEN(T_EXTENDS); + RETURN_TOKEN_WITH_IDENT(T_EXTENDS); } <ST_IN_SCRIPTING>"implements" { - RETURN_TOKEN(T_IMPLEMENTS); + RETURN_TOKEN_WITH_IDENT(T_IMPLEMENTS); } <ST_IN_SCRIPTING>"->" { @@ -1592,15 +1599,15 @@ NEWLINE ("\r"|"\n"|"\r\n") } <ST_IN_SCRIPTING>"new" { - RETURN_TOKEN(T_NEW); + RETURN_TOKEN_WITH_IDENT(T_NEW); } <ST_IN_SCRIPTING>"clone" { - RETURN_TOKEN(T_CLONE); + RETURN_TOKEN_WITH_IDENT(T_CLONE); } <ST_IN_SCRIPTING>"var" { - RETURN_TOKEN(T_VAR); + RETURN_TOKEN_WITH_IDENT(T_VAR); } <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" { @@ -1640,79 +1647,79 @@ NEWLINE ("\r"|"\n"|"\r\n") } <ST_IN_SCRIPTING>"eval" { - RETURN_TOKEN(T_EVAL); + RETURN_TOKEN_WITH_IDENT(T_EVAL); } <ST_IN_SCRIPTING>"include" { - RETURN_TOKEN(T_INCLUDE); + RETURN_TOKEN_WITH_IDENT(T_INCLUDE); } <ST_IN_SCRIPTING>"include_once" { - RETURN_TOKEN(T_INCLUDE_ONCE); + RETURN_TOKEN_WITH_IDENT(T_INCLUDE_ONCE); } <ST_IN_SCRIPTING>"require" { - RETURN_TOKEN(T_REQUIRE); + RETURN_TOKEN_WITH_IDENT(T_REQUIRE); } <ST_IN_SCRIPTING>"require_once" { - RETURN_TOKEN(T_REQUIRE_ONCE); + RETURN_TOKEN_WITH_IDENT(T_REQUIRE_ONCE); } <ST_IN_SCRIPTING>"namespace" { - RETURN_TOKEN(T_NAMESPACE); + RETURN_TOKEN_WITH_IDENT(T_NAMESPACE); } <ST_IN_SCRIPTING>"use" { - RETURN_TOKEN(T_USE); + RETURN_TOKEN_WITH_IDENT(T_USE); } <ST_IN_SCRIPTING>"insteadof" { - RETURN_TOKEN(T_INSTEADOF); + RETURN_TOKEN_WITH_IDENT(T_INSTEADOF); } <ST_IN_SCRIPTING>"global" { - RETURN_TOKEN(T_GLOBAL); + RETURN_TOKEN_WITH_IDENT(T_GLOBAL); } <ST_IN_SCRIPTING>"isset" { - RETURN_TOKEN(T_ISSET); + RETURN_TOKEN_WITH_IDENT(T_ISSET); } <ST_IN_SCRIPTING>"empty" { - RETURN_TOKEN(T_EMPTY); + RETURN_TOKEN_WITH_IDENT(T_EMPTY); } <ST_IN_SCRIPTING>"__halt_compiler" { - RETURN_TOKEN(T_HALT_COMPILER); + RETURN_TOKEN_WITH_IDENT(T_HALT_COMPILER); } <ST_IN_SCRIPTING>"static" { - RETURN_TOKEN(T_STATIC); + RETURN_TOKEN_WITH_IDENT(T_STATIC); } <ST_IN_SCRIPTING>"abstract" { - RETURN_TOKEN(T_ABSTRACT); + RETURN_TOKEN_WITH_IDENT(T_ABSTRACT); } <ST_IN_SCRIPTING>"final" { - RETURN_TOKEN(T_FINAL); + RETURN_TOKEN_WITH_IDENT(T_FINAL); } <ST_IN_SCRIPTING>"private" { - RETURN_TOKEN(T_PRIVATE); + RETURN_TOKEN_WITH_IDENT(T_PRIVATE); } <ST_IN_SCRIPTING>"protected" { - RETURN_TOKEN(T_PROTECTED); + RETURN_TOKEN_WITH_IDENT(T_PROTECTED); } <ST_IN_SCRIPTING>"public" { - RETURN_TOKEN(T_PUBLIC); + RETURN_TOKEN_WITH_IDENT(T_PUBLIC); } <ST_IN_SCRIPTING>"unset" { - RETURN_TOKEN(T_UNSET); + RETURN_TOKEN_WITH_IDENT(T_UNSET); } <ST_IN_SCRIPTING>"=>" { @@ -1720,15 +1727,15 @@ NEWLINE ("\r"|"\n"|"\r\n") } <ST_IN_SCRIPTING>"list" { - RETURN_TOKEN(T_LIST); + RETURN_TOKEN_WITH_IDENT(T_LIST); } <ST_IN_SCRIPTING>"array" { - RETURN_TOKEN(T_ARRAY); + RETURN_TOKEN_WITH_IDENT(T_ARRAY); } <ST_IN_SCRIPTING>"callable" { - RETURN_TOKEN(T_CALLABLE); + RETURN_TOKEN_WITH_IDENT(T_CALLABLE); } <ST_IN_SCRIPTING>"++" { @@ -1832,15 +1839,15 @@ NEWLINE ("\r"|"\n"|"\r\n") } <ST_IN_SCRIPTING>"OR" { - RETURN_TOKEN(T_LOGICAL_OR); + RETURN_TOKEN_WITH_IDENT(T_LOGICAL_OR); } <ST_IN_SCRIPTING>"AND" { - RETURN_TOKEN(T_LOGICAL_AND); + RETURN_TOKEN_WITH_IDENT(T_LOGICAL_AND); } <ST_IN_SCRIPTING>"XOR" { - RETURN_TOKEN(T_LOGICAL_XOR); + RETURN_TOKEN_WITH_IDENT(T_LOGICAL_XOR); } <ST_IN_SCRIPTING>"<<" { @@ -2096,35 +2103,35 @@ string: } <ST_IN_SCRIPTING>"__CLASS__" { - RETURN_TOKEN(T_CLASS_C); + RETURN_TOKEN_WITH_IDENT(T_CLASS_C); } <ST_IN_SCRIPTING>"__TRAIT__" { - RETURN_TOKEN(T_TRAIT_C); + RETURN_TOKEN_WITH_IDENT(T_TRAIT_C); } <ST_IN_SCRIPTING>"__FUNCTION__" { - RETURN_TOKEN(T_FUNC_C); + RETURN_TOKEN_WITH_IDENT(T_FUNC_C); } <ST_IN_SCRIPTING>"__METHOD__" { - RETURN_TOKEN(T_METHOD_C); + RETURN_TOKEN_WITH_IDENT(T_METHOD_C); } <ST_IN_SCRIPTING>"__LINE__" { - RETURN_TOKEN(T_LINE); + RETURN_TOKEN_WITH_IDENT(T_LINE); } <ST_IN_SCRIPTING>"__FILE__" { - RETURN_TOKEN(T_FILE); + RETURN_TOKEN_WITH_IDENT(T_FILE); } <ST_IN_SCRIPTING>"__DIR__" { - RETURN_TOKEN(T_DIR); + RETURN_TOKEN_WITH_IDENT(T_DIR); } <ST_IN_SCRIPTING>"__NAMESPACE__" { - RETURN_TOKEN(T_NS_C); + RETURN_TOKEN_WITH_IDENT(T_NS_C); } <SHEBANG>"#!" .* {NEWLINE} { @@ -3013,14 +3020,24 @@ emit_token_with_val: emit_token: if (SCNG(on_event)) { - SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context)); + SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context)); + } + return token; + +emit_token_with_ident: + if (PARSER_MODE()) { + elem->ident.offset = SCNG(yy_text) - SCNG(yy_start); + elem->ident.len = SCNG(yy_leng); + } + if (SCNG(on_event)) { + SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context)); } return token; return_whitespace: HANDLE_NEWLINES(yytext, yyleng); if (SCNG(on_event)) { - SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, SCNG(on_event_context)); + SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, yytext, yyleng, SCNG(on_event_context)); } if (PARSER_MODE()) { start_line = CG(zend_lineno); @@ -3031,7 +3048,7 @@ return_whitespace: skip_token: if (SCNG(on_event)) { - SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context)); + SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context)); } start_line = CG(zend_lineno); goto restart; diff --git a/ext/tokenizer/tests/bug77966.phpt b/ext/tokenizer/tests/bug77966.phpt new file mode 100644 index 0000000000..142cc7c9ab --- /dev/null +++ b/ext/tokenizer/tests/bug77966.phpt @@ -0,0 +1,49 @@ +--TEST-- +Handling of "namespace as" in TOKEN_PARSE mode +--FILE-- +<?php + +$code = <<<'CODE' +<?php +class C { + use A { + namespace as bar; + } +} +CODE; + +$tokens = PhpToken::getAll($code, TOKEN_PARSE); +foreach ($tokens as $token) { + echo "{$token->getTokenName()}: \"$token->text\"\n"; +} + +?> +--EXPECT-- +T_OPEN_TAG: "<?php +" +T_CLASS: "class" +T_WHITESPACE: " " +T_STRING: "C" +T_WHITESPACE: " " +{: "{" +T_WHITESPACE: " + " +T_USE: "use" +T_WHITESPACE: " " +T_STRING: "A" +T_WHITESPACE: " " +{: "{" +T_WHITESPACE: " + " +T_STRING: "namespace" +T_WHITESPACE: " " +T_AS: "as" +T_WHITESPACE: " " +T_STRING: "bar" +;: ";" +T_WHITESPACE: " + " +}: "}" +T_WHITESPACE: " +" +}: "}" diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c index db57323223..901e609d2d 100644 --- a/ext/tokenizer/tokenizer.c +++ b/ext/tokenizer/tokenizer.c @@ -420,11 +420,33 @@ struct event_context { zend_class_entry *token_class; }; -void on_event(zend_php_scanner_event event, int token, int line, void *context) +static zval *extract_token_id_to_replace(zval *token_zv, const char *text, size_t length) { + zval *id_zv, *text_zv; + ZEND_ASSERT(token_zv); + if (Z_TYPE_P(token_zv) == IS_ARRAY) { + id_zv = zend_hash_index_find(Z_ARRVAL_P(token_zv), 0); + text_zv = zend_hash_index_find(Z_ARRVAL_P(token_zv), 1); + } else if (Z_TYPE_P(token_zv) == IS_OBJECT) { + id_zv = OBJ_PROP_NUM(Z_OBJ_P(token_zv), 0); + text_zv = OBJ_PROP_NUM(Z_OBJ_P(token_zv), 1); + } else { + return NULL; + } + + /* There are multiple candidate tokens to which this feedback may apply, + * check text to make sure this is the right one. */ + ZEND_ASSERT(Z_TYPE_P(text_zv) == IS_STRING); + if (Z_STRLEN_P(text_zv) == length && !memcmp(Z_STRVAL_P(text_zv), text, length)) { + return id_zv; + } + return NULL; +} + +void on_event( + zend_php_scanner_event event, int token, int line, + const char *text, size_t length, void *context) { struct event_context *ctx = context; - HashTable *tokens_ht; - zval *token_zv; switch (event) { case ON_TOKEN: @@ -435,19 +457,22 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context) } else if (token == T_ECHO && LANG_SCNG(yy_leng) == sizeof("<?=") - 1) { token = T_OPEN_TAG_WITH_ECHO; } - add_token(ctx->tokens, token, - LANG_SCNG(yy_text), LANG_SCNG(yy_leng), line, ctx->token_class, NULL); + add_token( + ctx->tokens, token, (unsigned char *) text, length, line, ctx->token_class, NULL); break; - case ON_FEEDBACK: - tokens_ht = Z_ARRVAL_P(ctx->tokens); - token_zv = zend_hash_index_find(tokens_ht, zend_hash_num_elements(tokens_ht) - 1); - ZEND_ASSERT(token_zv); - if (Z_TYPE_P(token_zv) == IS_ARRAY) { - ZVAL_LONG(zend_hash_index_find(Z_ARRVAL_P(token_zv), 0), token); - } else { - zend_update_property_long(php_token_ce, token_zv, "type", sizeof("type")-1, token); - } + case ON_FEEDBACK: { + HashTable *tokens_ht = Z_ARRVAL_P(ctx->tokens); + zval *token_zv, *id_zv = NULL; + ZEND_HASH_REVERSE_FOREACH_VAL(tokens_ht, token_zv) { + id_zv = extract_token_id_to_replace(token_zv, text, length); + if (id_zv) { + break; + } + } ZEND_HASH_FOREACH_END(); + ZEND_ASSERT(id_zv); + ZVAL_LONG(id_zv, token); break; + } case ON_STOP: if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) { add_token(ctx->tokens, T_INLINE_HTML, LANG_SCNG(yy_cursor), |