diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2022-02-08 12:25:15 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-08 12:25:15 +0000 |
commit | 5b58db75291cfbb9b6785c9845824b3e2da01c1c (patch) | |
tree | e44eba71a28740851b8f88a4bddb29cef1ef7f7b /Parser | |
parent | cbdcae5ab90710e8d82c213f3798af1154670ff9 (diff) | |
download | cpython-git-5b58db75291cfbb9b6785c9845824b3e2da01c1c.tar.gz |
[3.10] bpo-46521: Fix codeop to use a new partial-input mode of the parser (GH-31010). (GH-31213)
(cherry picked from commit 69e10976b2e7682c6d57f4272932ebc19f8e8859)
Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/pegen.c | 17 | ||||
-rw-r--r-- | Parser/pegen.h | 1 | ||||
-rw-r--r-- | Parser/tokenizer.c | 26 |
3 files changed, 34 insertions, 10 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c index 26143f57c0..d7a4983937 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -1202,6 +1202,9 @@ compute_parser_flags(PyCompilerFlags *flags) if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) { parser_flags |= PyPARSE_ASYNC_HACKS; } + if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) { + parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT; + } return parser_flags; } @@ -1327,15 +1330,29 @@ exit: return ret; } + +static inline int +_is_end_of_source(Parser *p) { + int err = p->tok->done; + return err == E_EOF || err == E_EOFS || err == E_EOLS; +} + void * _PyPegen_run_parser(Parser *p) { void *res = _PyPegen_parse(p); assert(p->level == 0); if (res == NULL) { + if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)) { + PyErr_Clear(); + return RAISE_SYNTAX_ERROR("incomplete input"); + } if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) { return NULL; } + // Make a second parser pass. In this pass we activate heavier and slower checks + // to produce better error messages and more complete diagnostics. Extra "invalid_*" + // rules will be active during parsing. Token *last_token = p->tokens[p->fill - 1]; reset_parser_state(p); _PyPegen_parse(p); diff --git a/Parser/pegen.h b/Parser/pegen.h index 29d48052e4..bf58f1e1f2 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -22,6 +22,7 @@ #define PyPARSE_BARRY_AS_BDFL 0x0020 #define PyPARSE_TYPE_COMMENTS 0x0040 #define PyPARSE_ASYNC_HACKS 0x0080 +#define PyPARSE_ALLOW_INCOMPLETE_INPUT 0x0100 typedef struct _memo { int type; diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 3738a9021f..eb15ef7a5d 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -39,7 +39,7 @@ static struct tok_state *tok_new(void); static int tok_nextc(struct tok_state *tok); static void tok_backup(struct tok_state *tok, int c); - +static int syntaxerror(struct tok_state *tok, const char *format, ...); /* Spaces in this constant are treated as "zero or more spaces or tabs" when tokenizing. */ @@ -1030,8 +1030,9 @@ tok_nextc(struct tok_state *tok) if (tok->cur != tok->inp) { return Py_CHARMASK(*tok->cur++); /* Fast path */ } - if (tok->done != E_OK) - return EOF; + if (tok->done != E_OK) { + return EOF; + } if (tok->fp == NULL) { rc = tok_underflow_string(tok); } @@ -1963,16 +1964,21 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) tok->line_start = tok->multi_line_start; int start = tok->lineno; tok->lineno = tok->first_lineno; - if (quote_size == 3) { - return syntaxerror(tok, - "unterminated triple-quoted string literal" - " (detected at line %d)", start); + syntaxerror(tok, "unterminated triple-quoted string literal" + " (detected at line %d)", start); + if (c != '\n') { + tok->done = E_EOFS; + } + return ERRORTOKEN; } else { - return syntaxerror(tok, - "unterminated string literal (detected at" - " line %d)", start); + syntaxerror(tok, "unterminated string literal (detected at" + " line %d)", start); + if (c != '\n') { + tok->done = E_EOLS; + } + return ERRORTOKEN; } } if (c == quote) { |