summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2020-02-12 02:35:10 -0800
committerGitHub <noreply@github.com>2020-02-12 02:35:10 -0800
commitefd878cdb46d9c7038d93fb36eb1ff7dc5baf9ec (patch)
tree25eaf1cdfc0890bc3bc3c8bf71748e6750836ff7
parent0b8f738eb3ee0110461e7da28c0b6b452f91999d (diff)
downloadcpython-git-efd878cdb46d9c7038d93fb36eb1ff7dc5baf9ec.tar.gz
bpo-39219: Fix SyntaxError attributes in the tokenizer. (GH-17828)
* Always set the text attribute. * Correct the offset attribute for non-ascii sources. (cherry picked from commit 0cc6b5e559b8303b18fdd56c2befd900fe7b5e35) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
-rw-r--r--Lib/test/test_exceptions.py14
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2020-01-05-13-36-08.bpo-39219.uHtKd4.rst2
-rw-r--r--Parser/tokenizer.c36
3 files changed, 47 insertions, 5 deletions
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index 10c1e07646..3a32253157 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -179,17 +179,25 @@ class ExceptionTests(unittest.TestCase):
ckmsg(s, "inconsistent use of tabs and spaces in indentation", TabError)
def testSyntaxErrorOffset(self):
- def check(src, lineno, offset):
+ def check(src, lineno, offset, encoding='utf-8'):
with self.assertRaises(SyntaxError) as cm:
compile(src, '<fragment>', 'exec')
self.assertEqual(cm.exception.lineno, lineno)
self.assertEqual(cm.exception.offset, offset)
+ if cm.exception.text is not None:
+ if not isinstance(src, str):
+ src = src.decode(encoding, 'replace')
+ line = src.split('\n')[lineno-1]
+ self.assertEqual(cm.exception.text.rstrip('\n'), line)
check('def fact(x):\n\treturn x!\n', 2, 10)
check('1 +\n', 1, 4)
check('def spam():\n print(1)\n print(2)', 3, 10)
check('Python = "Python" +', 1, 20)
check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20)
+ check(b'# -*- coding: cp1251 -*-\nPython = "\xcf\xb3\xf2\xee\xed" +',
+ 2, 19, encoding='cp1251')
+ check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 18)
check('x = "a', 1, 7)
check('lambda x: x = 2', 1, 1)
@@ -205,6 +213,10 @@ class ExceptionTests(unittest.TestCase):
check('0010 + 2', 1, 4)
check('x = 32e-+4', 1, 8)
check('x = 0o9', 1, 6)
+ check('\u03b1 = 0xI', 1, 6)
+ check(b'\xce\xb1 = 0xI', 1, 6)
+ check(b'# -*- coding: iso8859-7 -*-\n\xe1 = 0xI', 2, 6,
+ encoding='iso8859-7')
# Errors thrown by symtable.c
check('x = [(yield i) for i in range(3)]', 1, 5)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-01-05-13-36-08.bpo-39219.uHtKd4.rst b/Misc/NEWS.d/next/Core and Builtins/2020-01-05-13-36-08.bpo-39219.uHtKd4.rst
new file mode 100644
index 0000000000..dac8360df7
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-01-05-13-36-08.bpo-39219.uHtKd4.rst
@@ -0,0 +1,2 @@
+Syntax errors raised in the tokenizer now always set correct "text" and
+"offset" attributes.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index f73c32684c..aecbcebb91 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1,6 +1,7 @@
/* Tokenizer implementation */
+#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include <ctype.h>
@@ -1034,17 +1035,44 @@ tok_backup(struct tok_state *tok, int c)
static int
syntaxerror(struct tok_state *tok, const char *format, ...)
{
+ PyObject *errmsg, *errtext, *args;
va_list vargs;
#ifdef HAVE_STDARG_PROTOTYPES
va_start(vargs, format);
#else
va_start(vargs);
#endif
- PyErr_FormatV(PyExc_SyntaxError, format, vargs);
+ errmsg = PyUnicode_FromFormatV(format, vargs);
va_end(vargs);
- PyErr_SyntaxLocationObject(tok->filename,
- tok->lineno,
- (int)(tok->cur - tok->line_start));
+ if (!errmsg) {
+ goto error;
+ }
+
+ errtext = PyUnicode_DecodeUTF8(tok->line_start, tok->cur - tok->line_start,
+ "replace");
+ if (!errtext) {
+ goto error;
+ }
+ int offset = (int)PyUnicode_GET_LENGTH(errtext);
+ Py_ssize_t line_len = strcspn(tok->line_start, "\n");
+ if (line_len != tok->cur - tok->line_start) {
+ Py_DECREF(errtext);
+ errtext = PyUnicode_DecodeUTF8(tok->line_start, line_len,
+ "replace");
+ }
+ if (!errtext) {
+ goto error;
+ }
+
+ args = Py_BuildValue("(O(OiiN))", errmsg,
+ tok->filename, tok->lineno, offset, errtext);
+ if (args) {
+ PyErr_SetObject(PyExc_SyntaxError, args);
+ Py_DECREF(args);
+ }
+
+error:
+ Py_XDECREF(errmsg);
tok->done = E_ERROR;
return ERRORTOKEN;
}