summaryrefslogtreecommitdiff
path: root/Lib/tokenize.py
diff options
context:
space:
mode:
authorAmmar Askar <ammar_askar@hotmail.com>2018-07-06 03:19:08 -0400
committerTal Einat <taleinat+github@gmail.com>2018-07-06 10:19:08 +0300
commitc4ef4896eac86a6759901c8546e26de4695a1389 (patch)
tree47ad8191fbe9f8fe4bb4272509410bc229ec7c6e /Lib/tokenize.py
parent3c8aae9ffe13d0f2ad4ff81cdf56bc6393af362a (diff)
downloadcpython-git-c4ef4896eac86a6759901c8546e26de4695a1389.tar.gz
bpo-33899: Make tokenize module mirror end-of-file is end-of-line behavior (GH-7891)
Most of the change involves fixing up the test suite, which previously made the assumption that there wouldn't be a new line if the input didn't end in one. Contributed by Ammar Askar.
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r--Lib/tokenize.py10
1 files changed, 10 insertions, 0 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index c78d9f7e9e..fce010bc5e 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -492,8 +492,15 @@ def _tokenize(readline, encoding):
# BOM will already have been stripped.
encoding = "utf-8"
yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
+ last_line = b''
+ line = b''
while True: # loop over lines in stream
try:
+ # We capture the value of the line variable here because
+ # readline uses the empty string '' to signal end of input,
+ # hence `line` itself will always be overwritten at the end
+ # of this loop.
+ last_line = line
line = readline()
except StopIteration:
line = b''
@@ -648,6 +655,9 @@ def _tokenize(readline, encoding):
(lnum, pos), (lnum, pos+1), line)
pos += 1
+ # Add an implicit NEWLINE if the input doesn't end in one
+ if last_line and last_line[-1] not in '\r\n':
+ yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
for indent in indents[1:]: # pop remaining indent levels
yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')