summaryrefslogtreecommitdiff
path: root/Zend
diff options
context:
space:
mode:
authorAlex Dowad <alexinbeijing@gmail.com>2020-04-08 13:19:39 +0200
committerNikita Popov <nikita.ppv@gmail.com>2020-04-14 11:22:23 +0200
commit80598f12507c5cbde04163289e4d2575f05d2a0c (patch)
treef5d3c26400dcb577e6c508469d1de086f5fda695 /Zend
parentd4471c6aaecab2f1cc467ea8e92040c73685bd21 (diff)
downloadphp-git-80598f12507c5cbde04163289e4d2575f05d2a0c.tar.gz
Syntax errors caused by unclosed {, [, ( mention specific location
Aside from a few very specific syntax errors for which detailed exceptions are thrown, generally PHP just emits the default error messages generated by bison on syntax error. These messages are very uninformative; they just say "Unexpected ... at line ...". This is most problematic with constructs which can span an arbitrary number of lines, such as blocks of code delimited by { }, 'if' conditions delimited by ( ), and so on. If a closing delimiter is missed, the block will run for the entire remainder of the source file (which could be thousands of lines), and then at the end, a parse error will be thrown with the dreaded words: "Unexpected end of file". Therefore, track the positions of opening and closing delimiters and ensure that they match up correctly. If any mismatch or missing delimiter is detected, immediately throw a parse error which points the user to the offending line. This is best done in the *lexer* and not in the parser. Thanks to Nikita Popov and George Peter Banyard for suggesting improvements. Fixes bug #79368. Closes GH-5364.
Diffstat (limited to 'Zend')
-rw-r--r--Zend/tests/bug60099.phpt2
-rw-r--r--Zend/tests/eval_parse_error_with_doc_comment.phpt2
-rw-r--r--Zend/tests/require_parse_exception.phpt4
-rw-r--r--Zend/zend_globals.h1
-rw-r--r--Zend/zend_language_scanner.h7
-rw-r--r--Zend/zend_language_scanner.l113
6 files changed, 115 insertions, 14 deletions
diff --git a/Zend/tests/bug60099.phpt b/Zend/tests/bug60099.phpt
index a8745fa522..fbb5e85fb7 100644
--- a/Zend/tests/bug60099.phpt
+++ b/Zend/tests/bug60099.phpt
@@ -7,4 +7,4 @@ namespace foo {
?>
--EXPECTF--
-Parse error: syntax error, unexpected end of file in %s on line %d
+Parse error: Unclosed '{' on line 2 in %s on line %d
diff --git a/Zend/tests/eval_parse_error_with_doc_comment.phpt b/Zend/tests/eval_parse_error_with_doc_comment.phpt
index 62561aaa79..09bc39b83d 100644
--- a/Zend/tests/eval_parse_error_with_doc_comment.phpt
+++ b/Zend/tests/eval_parse_error_with_doc_comment.phpt
@@ -12,4 +12,4 @@ EOC
?>
--EXPECTF--
-Parse error: syntax error, unexpected end of file in %s(%d) : eval()'d code on line %d
+Parse error: Unclosed '{' in %s(%d) : eval()'d code on line %d
diff --git a/Zend/tests/require_parse_exception.phpt b/Zend/tests/require_parse_exception.phpt
index 859589231e..d035e29624 100644
--- a/Zend/tests/require_parse_exception.phpt
+++ b/Zend/tests/require_parse_exception.phpt
@@ -43,8 +43,8 @@ var_dump("\u{ffffff}");');
?>
--EXPECT--
Deprecated: Directive 'allow_url_include' is deprecated in Unknown on line 0
-syntax error, unexpected end of file on line 2
-syntax error, unexpected end of file on line 3
+Unclosed '{' on line 2
+Unclosed '{' on line 3
syntax error, unexpected end of file, expecting '(' on line 2
Invalid numeric literal on line 2
Invalid UTF-8 codepoint escape sequence on line 2
diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h
index 2dc769a5ef..b7139fdfd6 100644
--- a/Zend/zend_globals.h
+++ b/Zend/zend_globals.h
@@ -285,6 +285,7 @@ struct _zend_php_scanner_globals {
int yy_state;
zend_stack state_stack;
zend_ptr_stack heredoc_label_stack;
+ zend_stack nest_location_stack; /* for syntax error reporting */
zend_bool heredoc_scan_ahead;
int heredoc_indentation;
zend_bool heredoc_indentation_uses_spaces;
diff --git a/Zend/zend_language_scanner.h b/Zend/zend_language_scanner.h
index 4d51a064fc..35eccaf7e6 100644
--- a/Zend/zend_language_scanner.h
+++ b/Zend/zend_language_scanner.h
@@ -30,6 +30,7 @@ typedef struct _zend_lex_state {
int yy_state;
zend_stack state_stack;
zend_ptr_stack heredoc_label_stack;
+ zend_stack nest_location_stack; /* for syntax error reporting */
zend_file_handle *in;
uint32_t lineno;
@@ -63,6 +64,12 @@ typedef struct _zend_heredoc_label {
zend_bool indentation_uses_spaces;
} zend_heredoc_label;
+/* Track locations of unclosed {, [, (, etc. for better syntax error reporting */
+typedef struct _zend_nest_location {
+ char text;
+ int lineno;
+} zend_nest_location;
+
BEGIN_EXTERN_C()
ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state);
ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state);
diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l
index 984d73474b..8a3e88edfc 100644
--- a/Zend/zend_language_scanner.l
+++ b/Zend/zend_language_scanner.l
@@ -192,6 +192,7 @@ void startup_scanner(void)
CG(doc_comment) = NULL;
CG(extra_fn_flags) = 0;
zend_stack_init(&SCNG(state_stack), sizeof(int));
+ zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
SCNG(heredoc_scan_ahead) = 0;
}
@@ -205,6 +206,7 @@ void shutdown_scanner(void)
CG(parse_error) = 0;
RESET_DOC_COMMENT();
zend_stack_destroy(&SCNG(state_stack));
+ zend_stack_destroy(&SCNG(nest_location_stack));
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
SCNG(heredoc_scan_ahead) = 0;
@@ -223,6 +225,9 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
lex_state->state_stack = SCNG(state_stack);
zend_stack_init(&SCNG(state_stack), sizeof(int));
+ lex_state->nest_location_stack = SCNG(nest_location_stack);
+ zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
+
lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
@@ -258,6 +263,9 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
zend_stack_destroy(&SCNG(state_stack));
SCNG(state_stack) = lex_state->state_stack;
+ zend_stack_destroy(&SCNG(nest_location_stack));
+ SCNG(nest_location_stack) = lex_state->nest_location_stack;
+
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
@@ -1250,6 +1258,63 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label);
}
+/* Check that { }, [ ], ( ) are nested correctly */
+static void report_bad_nesting(char opening, int opening_lineno, char closing)
+{
+ char buf[256];
+ size_t used = 0;
+
+ used = snprintf(buf, sizeof(buf), "Unclosed '%c'", opening);
+
+ if (opening_lineno != CG(zend_lineno)) {
+ used += snprintf(buf + used, sizeof(buf) - used, " on line %d", opening_lineno);
+ }
+
+ if (closing) { /* 'closing' will be 0 if at end of file */
+ used += snprintf(buf + used, sizeof(buf) - used, " does not match '%c'", closing);
+ }
+
+ zend_throw_exception(zend_ce_parse_error, buf, 0);
+}
+
+static void enter_nesting(char opening)
+{
+ zend_nest_location nest_loc = {opening, CG(zend_lineno)};
+ zend_stack_push(&SCNG(nest_location_stack), &nest_loc);
+}
+
+static int exit_nesting(char closing)
+{
+ if (zend_stack_is_empty(&SCNG(nest_location_stack))) {
+ zend_throw_exception_ex(zend_ce_parse_error, 0, "Unmatched '%c'", closing);
+ return -1;
+ }
+
+ zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
+ char opening = nest_loc->text;
+
+ if ((opening == '{' && closing != '}') ||
+ (opening == '[' && closing != ']') ||
+ (opening == '(' && closing != ')')) {
+ report_bad_nesting(opening, nest_loc->lineno, closing);
+ return -1;
+ }
+
+ zend_stack_del_top(&SCNG(nest_location_stack));
+ return 0;
+}
+
+static int check_nesting_at_end()
+{
+ if (!zend_stack_is_empty(&SCNG(nest_location_stack))) {
+ zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
+ report_bad_nesting(nest_loc->text, nest_loc->lineno, 0);
+ return -1;
+ }
+
+ return 0;
+}
+
#define PARSER_MODE() \
EXPECTED(elem != NULL)
@@ -1277,6 +1342,22 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
goto emit_token; \
} while (0)
+#define RETURN_EXIT_NESTING_TOKEN(_token) do { \
+ if (exit_nesting(_token) && PARSER_MODE()) { \
+ RETURN_TOKEN(T_ERROR); \
+ } else { \
+ RETURN_TOKEN(_token); \
+ } \
+ } while(0)
+
+#define RETURN_END_TOKEN do { \
+ if (check_nesting_at_end() && PARSER_MODE()) { \
+ RETURN_TOKEN(T_ERROR); \
+ } else { \
+ RETURN_TOKEN(END); \
+ } \
+ } while (0)
+
int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem)
{
int token;
@@ -1297,7 +1378,7 @@ BNUM "0b"[01]+(_[01]+)*
LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
WHITESPACE [ \n\r\t]+
TABS_AND_SPACES [ \t]*
-TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
+TOKENS [;:,.|^&+-/*=%!~$<>?@]
ANY_CHAR [^]
NEWLINE ("\r"|"\n"|"\r\n")
@@ -1770,6 +1851,16 @@ NEWLINE ("\r"|"\n"|"\r\n")
RETURN_TOKEN(T_SR);
}
+<ST_IN_SCRIPTING>"]"|")" {
+ /* Check that ] and ) match up properly with a preceding [ or ( */
+ RETURN_EXIT_NESTING_TOKEN(yytext[0]);
+}
+
+<ST_IN_SCRIPTING>"["|"(" {
+ enter_nesting(yytext[0]);
+ RETURN_TOKEN(yytext[0]);
+}
+
<ST_IN_SCRIPTING>{TOKENS} {
RETURN_TOKEN(yytext[0]);
}
@@ -1777,22 +1868,23 @@ NEWLINE ("\r"|"\n"|"\r\n")
<ST_IN_SCRIPTING>"{" {
yy_push_state(ST_IN_SCRIPTING);
+ enter_nesting('{');
RETURN_TOKEN('{');
}
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
+ enter_nesting('{');
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
-
<ST_IN_SCRIPTING>"}" {
RESET_DOC_COMMENT();
if (!zend_stack_is_empty(&SCNG(state_stack))) {
yy_pop_state();
}
- RETURN_TOKEN('}');
+ RETURN_EXIT_NESTING_TOKEN('}');
}
@@ -2088,7 +2180,7 @@ string:
<INITIAL>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
- RETURN_TOKEN(END);
+ RETURN_END_TOKEN;
}
inline_char_handler:
@@ -2165,7 +2257,7 @@ inline_char_handler:
RETURN_TOKEN(']');
}
-<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
+<ST_VAR_OFFSET>{TOKENS}|[[(){}"`] {
/* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
RETURN_TOKEN(yytext[0]);
}
@@ -2569,6 +2661,7 @@ skip_escape_conversion:
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
yy_push_state(ST_IN_SCRIPTING);
yyless(1);
+ enter_nesting('{');
RETURN_TOKEN(T_CURLY_OPEN);
}
@@ -2593,7 +2686,7 @@ skip_escape_conversion:
}
if (YYCURSOR > YYLIMIT) {
- RETURN_TOKEN(END);
+ RETURN_END_TOKEN;
}
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
@@ -2640,7 +2733,7 @@ double_quotes_scan_done:
<ST_BACKQUOTE>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
- RETURN_TOKEN(END);
+ RETURN_END_TOKEN;
}
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
@@ -2689,7 +2782,7 @@ double_quotes_scan_done:
int newline = 0, indentation = 0, spacing = 0;
if (YYCURSOR > YYLIMIT) {
- RETURN_TOKEN(END);
+ RETURN_END_TOKEN;
}
YYCURSOR--;
@@ -2813,7 +2906,7 @@ heredoc_scan_done:
int newline = 0, indentation = 0, spacing = -1;
if (YYCURSOR > YYLIMIT) {
- RETURN_TOKEN(END);
+ RETURN_END_TOKEN;
}
YYCURSOR--;
@@ -2901,7 +2994,7 @@ nowdoc_scan_done:
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
- RETURN_TOKEN(END);
+ RETURN_END_TOKEN;
}
RETURN_TOKEN(T_BAD_CHARACTER);