diff options
| author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2021-07-31 02:17:09 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-07-31 02:17:09 +0100 |
| commit | b6bde9fc42aecad5be0457198d17cfe7b481ad79 (patch) | |
| tree | ec5e777de20072b7bf98d8101adf07e942cde4e8 | |
| parent | e63e6311aa258a5f3f49a7aed9fdde445fd384d6 (diff) | |
| download | cpython-git-b6bde9fc42aecad5be0457198d17cfe7b481ad79.tar.gz | |
bpo-44667: Treat correctly lines ending with comments and no newlines in the Python tokenizer (GH-27499)
| -rw-r--r-- | Lib/test/test_tokenize.py | 10 | ||||
| -rw-r--r-- | Lib/tokenize.py | 2 | ||||
| -rw-r--r-- | Misc/NEWS.d/next/Library/2021-07-30-23-27-30.bpo-44667.tu0Xrv.rst | 4 |
3 files changed, 15 insertions, 1 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 681f2c72f9..4bce1ca9c7 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1458,6 +1458,16 @@ class TestTokenize(TestCase): # See http://bugs.python.org/issue16152 self.assertExactTypeEqual('@ ', token.AT) + def test_comment_at_the_end_of_the_source_without_newline(self): + # See http://bugs.python.org/issue44667 + source = 'b = 1\n\n#test' + expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT] + + tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline)) + self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING]) + for i in range(6): + self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]]) + self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER]) class UntokenizeTest(TestCase): diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 42c1f10373..7d7736fe98 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -604,7 +604,7 @@ def _tokenize(readline, encoding): pos += 1 # Add an implicit NEWLINE if the input doesn't end in one - if last_line and last_line[-1] not in '\r\n': + if last_line and last_line[-1] not in '\r\n' and not last_line.strip().startswith("#"): yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '') for indent in indents[1:]: # pop remaining indent levels yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '') diff --git a/Misc/NEWS.d/next/Library/2021-07-30-23-27-30.bpo-44667.tu0Xrv.rst b/Misc/NEWS.d/next/Library/2021-07-30-23-27-30.bpo-44667.tu0Xrv.rst new file mode 100644 index 0000000000..5b7e20e0af --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-07-30-23-27-30.bpo-44667.tu0Xrv.rst @@ -0,0 +1,4 @@ +The :func:`tokenize.tokenize` doesn't incorrectly generate a ``NEWLINE`` +token if the source doesn't end with a new line character but the last line +is a comment, as the function is already generating a ``NL`` token. Patch by +Pablo Galindo |
