bpo-16806: Fix `lineno` and `col_offset` for multi-line string tokens (GH-10021)

author: Anthony Sottile <asottile@umich.edu> 2019-01-12 20:05:13 -0800
committer: INADA Naoki <methane@users.noreply.github.com> 2019-01-13 13:05:13 +0900
commit: 995d9b92979768125ced4da3a56f755bcdf80f6e (patch)
tree: 2184ab1771b87883a92391f41229a12ce4cbd9d3 /Parser
parent: 1cffd0eed313011c0c2bb071c8affeb4a7ed05c7 (diff)
download: cpython-git-995d9b92979768125ced4da3a56f755bcdf80f6e.tar.gz
3 files changed, 24 insertions, 3 deletions
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index fc878d89d5..d37e28a0a3 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -205,6 +205,8 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
         size_t len;
         char *str;
         col_offset = -1;
+        int lineno;
+        const char *line_start;
 
         type = PyTokenizer_Get(tok, &a, &b);
         if (type == ERRORTOKEN) {
@@ -253,8 +255,15 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
             }
         }
 #endif
-        if (a != NULL && a >= tok->line_start) {
-            col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
+
+        /* Nodes of type STRING, especially multi line strings
+           must be handled differently in order to get both
+           the starting line number and the column offset right.
+           (cf. issue 16806) */
+        lineno = type == STRING ? tok->first_lineno : tok->lineno;
+        line_start = type == STRING ? tok->multi_line_start : tok->line_start;
+        if (a != NULL && a >= line_start) {
+            col_offset = Py_SAFE_DOWNCAST(a - line_start,
                                           intptr_t, int);
         }
         else {
@@ -263,7 +272,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
 
         if ((err_ret->error =
              PyParser_AddToken(ps, (int)type, str,
-                               tok->lineno, col_offset,
+                               lineno, col_offset,
                                &(err_ret->expected))) != E_OK) {
             if (err_ret->error != E_DONE) {
                 PyObject_FREE(str);
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 0e6c1a85e0..3e3cf2cd7f 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1519,6 +1519,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
         int quote_size = 1;             /* 1 or 3 */
         int end_quote_size = 0;
 
+        /* Nodes of type STRING, especially multi line strings
+           must be handled differently in order to get both
+           the starting line number and the column offset right.
+           (cf. issue 16806) */
+        tok->first_lineno = tok->lineno;
+        tok->multi_line_start = tok->line_start;
+
         /* Find the quote size and start of string */
         c = tok_nextc(tok);
         if (c == quote) {
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index cd18d25dc1..096ce687ec 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -38,6 +38,8 @@ struct tok_state {
     int pendin;         /* Pending indents (if > 0) or dedents (if < 0) */
     const char *prompt, *nextprompt;          /* For interactive prompting */
     int lineno;         /* Current line number */
+    int first_lineno;   /* First line of a single line or multi line string
+                           expression (cf. issue 16806) */
     int level;          /* () [] {} Parentheses nesting level */
             /* Used to allow free continuations inside them */
 #ifndef PGEN
@@ -58,6 +60,9 @@ struct tok_state {
     char *encoding;         /* Source encoding. */
     int cont_line;          /* whether we are in a continuation line. */
     const char* line_start;     /* pointer to start of current line */
+    const char* multi_line_start; /* pointer to start of first line of
+                                     a single line or multi line string
+                                     expression (cf. issue 16806) */
 #ifndef PGEN
     PyObject *decoding_readline; /* open(...).readline */
     PyObject *decoding_buffer;
author	Anthony Sottile <asottile@umich.edu>	2019-01-12 20:05:13 -0800
committer	INADA Naoki <methane@users.noreply.github.com>	2019-01-13 13:05:13 +0900
commit	995d9b92979768125ced4da3a56f755bcdf80f6e (patch)
tree	2184ab1771b87883a92391f41229a12ce4cbd9d3 /Parser
parent	1cffd0eed313011c0c2bb071c8affeb4a7ed05c7 (diff)
download	cpython-git-995d9b92979768125ced4da3a56f755bcdf80f6e.tar.gz