summaryrefslogtreecommitdiff
path: root/Parser
diff options
context:
space:
mode:
authorAnthony Sottile <asottile@umich.edu>2019-01-12 20:05:13 -0800
committerINADA Naoki <methane@users.noreply.github.com>2019-01-13 13:05:13 +0900
commit995d9b92979768125ced4da3a56f755bcdf80f6e (patch)
tree2184ab1771b87883a92391f41229a12ce4cbd9d3 /Parser
parent1cffd0eed313011c0c2bb071c8affeb4a7ed05c7 (diff)
downloadcpython-git-995d9b92979768125ced4da3a56f755bcdf80f6e.tar.gz
bpo-16806: Fix `lineno` and `col_offset` for multi-line string tokens (GH-10021)
Diffstat (limited to 'Parser')
-rw-r--r--Parser/parsetok.c15
-rw-r--r--Parser/tokenizer.c7
-rw-r--r--Parser/tokenizer.h5
3 files changed, 24 insertions, 3 deletions
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index fc878d89d5..d37e28a0a3 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -205,6 +205,8 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
size_t len;
char *str;
col_offset = -1;
+ int lineno;
+ const char *line_start;
type = PyTokenizer_Get(tok, &a, &b);
if (type == ERRORTOKEN) {
@@ -253,8 +255,15 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
}
}
#endif
- if (a != NULL && a >= tok->line_start) {
- col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
+
+ /* Nodes of type STRING, especially multi line strings
+ must be handled differently in order to get both
+ the starting line number and the column offset right.
+ (cf. issue 16806) */
+ lineno = type == STRING ? tok->first_lineno : tok->lineno;
+ line_start = type == STRING ? tok->multi_line_start : tok->line_start;
+ if (a != NULL && a >= line_start) {
+ col_offset = Py_SAFE_DOWNCAST(a - line_start,
intptr_t, int);
}
else {
@@ -263,7 +272,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
if ((err_ret->error =
PyParser_AddToken(ps, (int)type, str,
- tok->lineno, col_offset,
+ lineno, col_offset,
&(err_ret->expected))) != E_OK) {
if (err_ret->error != E_DONE) {
PyObject_FREE(str);
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 0e6c1a85e0..3e3cf2cd7f 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1519,6 +1519,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
int quote_size = 1; /* 1 or 3 */
int end_quote_size = 0;
+ /* Nodes of type STRING, especially multi line strings
+ must be handled differently in order to get both
+ the starting line number and the column offset right.
+ (cf. issue 16806) */
+ tok->first_lineno = tok->lineno;
+ tok->multi_line_start = tok->line_start;
+
/* Find the quote size and start of string */
c = tok_nextc(tok);
if (c == quote) {
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index cd18d25dc1..096ce687ec 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -38,6 +38,8 @@ struct tok_state {
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
const char *prompt, *nextprompt; /* For interactive prompting */
int lineno; /* Current line number */
+ int first_lineno; /* First line of a single line or multi line string
+ expression (cf. issue 16806) */
int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
#ifndef PGEN
@@ -58,6 +60,9 @@ struct tok_state {
char *encoding; /* Source encoding. */
int cont_line; /* whether we are in a continuation line. */
const char* line_start; /* pointer to start of current line */
+ const char* multi_line_start; /* pointer to start of first line of
+ a single line or multi line string
+ expression (cf. issue 16806) */
#ifndef PGEN
PyObject *decoding_readline; /* open(...).readline */
PyObject *decoding_buffer;