summaryrefslogtreecommitdiff
path: root/Parser
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2022-02-08 12:25:15 +0000
committerGitHub <noreply@github.com>2022-02-08 12:25:15 +0000
commit5b58db75291cfbb9b6785c9845824b3e2da01c1c (patch)
treee44eba71a28740851b8f88a4bddb29cef1ef7f7b /Parser
parentcbdcae5ab90710e8d82c213f3798af1154670ff9 (diff)
downloadcpython-git-5b58db75291cfbb9b6785c9845824b3e2da01c1c.tar.gz
[3.10] bpo-46521: Fix codeop to use a new partial-input mode of the parser (GH-31010). (GH-31213)
(cherry picked from commit 69e10976b2e7682c6d57f4272932ebc19f8e8859) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
Diffstat (limited to 'Parser')
-rw-r--r--Parser/pegen.c17
-rw-r--r--Parser/pegen.h1
-rw-r--r--Parser/tokenizer.c26
3 files changed, 34 insertions, 10 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 26143f57c0..d7a4983937 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -1202,6 +1202,9 @@ compute_parser_flags(PyCompilerFlags *flags)
if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) {
parser_flags |= PyPARSE_ASYNC_HACKS;
}
+ if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) {
+ parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT;
+ }
return parser_flags;
}
@@ -1327,15 +1330,29 @@ exit:
return ret;
}
+
+static inline int
+_is_end_of_source(Parser *p) {
+ int err = p->tok->done;
+ return err == E_EOF || err == E_EOFS || err == E_EOLS;
+}
+
void *
_PyPegen_run_parser(Parser *p)
{
void *res = _PyPegen_parse(p);
assert(p->level == 0);
if (res == NULL) {
+ if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)) {
+ PyErr_Clear();
+ return RAISE_SYNTAX_ERROR("incomplete input");
+ }
if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) {
return NULL;
}
+ // Make a second parser pass. In this pass we activate heavier and slower checks
+ // to produce better error messages and more complete diagnostics. Extra "invalid_*"
+ // rules will be active during parsing.
Token *last_token = p->tokens[p->fill - 1];
reset_parser_state(p);
_PyPegen_parse(p);
diff --git a/Parser/pegen.h b/Parser/pegen.h
index 29d48052e4..bf58f1e1f2 100644
--- a/Parser/pegen.h
+++ b/Parser/pegen.h
@@ -22,6 +22,7 @@
#define PyPARSE_BARRY_AS_BDFL 0x0020
#define PyPARSE_TYPE_COMMENTS 0x0040
#define PyPARSE_ASYNC_HACKS 0x0080
+#define PyPARSE_ALLOW_INCOMPLETE_INPUT 0x0100
typedef struct _memo {
int type;
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 3738a9021f..eb15ef7a5d 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -39,7 +39,7 @@
static struct tok_state *tok_new(void);
static int tok_nextc(struct tok_state *tok);
static void tok_backup(struct tok_state *tok, int c);
-
+static int syntaxerror(struct tok_state *tok, const char *format, ...);
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
tokenizing. */
@@ -1030,8 +1030,9 @@ tok_nextc(struct tok_state *tok)
if (tok->cur != tok->inp) {
return Py_CHARMASK(*tok->cur++); /* Fast path */
}
- if (tok->done != E_OK)
- return EOF;
+ if (tok->done != E_OK) {
+ return EOF;
+ }
if (tok->fp == NULL) {
rc = tok_underflow_string(tok);
}
@@ -1963,16 +1964,21 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
tok->line_start = tok->multi_line_start;
int start = tok->lineno;
tok->lineno = tok->first_lineno;
-
if (quote_size == 3) {
- return syntaxerror(tok,
- "unterminated triple-quoted string literal"
- " (detected at line %d)", start);
+ syntaxerror(tok, "unterminated triple-quoted string literal"
+ " (detected at line %d)", start);
+ if (c != '\n') {
+ tok->done = E_EOFS;
+ }
+ return ERRORTOKEN;
}
else {
- return syntaxerror(tok,
- "unterminated string literal (detected at"
- " line %d)", start);
+ syntaxerror(tok, "unterminated string literal (detected at"
+ " line %d)", start);
+ if (c != '\n') {
+ tok->done = E_EOLS;
+ }
+ return ERRORTOKEN;
}
}
if (c == quote) {