summaryrefslogtreecommitdiff
path: root/Lib
diff options
context:
space:
mode:
authorAmmar Askar <ammar_askar@hotmail.com>2018-07-06 06:23:13 -0400
committerTal Einat <taleinat+github@gmail.com>2018-07-06 13:23:13 +0300
commit7829bba45d0e2446f3a0ca240bfe46959f01071e (patch)
treeb4450692df47c1de32b7a7de5e941a5fda6e809a /Lib
parent9720f60f2aba457121bfe42d09aa3ed91f28b86f (diff)
downloadcpython-git-7829bba45d0e2446f3a0ca240bfe46959f01071e.tar.gz
[2.7] bpo-33899: Make tokenize module mirror end-of-file is end-of-line behavior (GH-7891) (#8133)
Most of the change involves fixing up the test suite, which previously made the assumption that there wouldn't be a new line if the input didn't end in one. Contributed by Ammar Askar. (cherry picked from commit c4ef4896eac86a6759901c8546e26de4695a1389)
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_tokenize.py46
-rw-r--r--Lib/tokenize.py10
2 files changed, 44 insertions, 12 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index fd9486bdd7..a4625971d3 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1,32 +1,54 @@
from test import test_support
-from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,
+from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP, NEWLINE,
STRING, ENDMARKER, tok_name, Untokenizer, tokenize)
from StringIO import StringIO
import os
from unittest import TestCase
+# Converts a source string into a list of textual representation
+# of the tokens such as:
+# ` NAME 'if' (1, 0) (1, 2)`
+# to make writing tests easier.
+def stringify_tokens_from_source(token_generator, source_string):
+ result = []
+ num_lines = len(source_string.splitlines())
+ missing_trailing_nl = source_string[-1] not in '\r\n'
+
+ for type, token, start, end, line in token_generator:
+ if type == ENDMARKER:
+ break
+ # Ignore the new line on the last line if the input lacks one
+ if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
+ continue
+ type = tok_name[type]
+ result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
+ locals())
+
+ return result
+
class TokenizeTest(TestCase):
# Tests for the tokenize module.
# The tests can be really simple. Given a small fragment of source
- # code, print out a table with tokens. The ENDMARKER is omitted for
- # brevity.
+ # code, print out a table with tokens. The ENDMARKER, ENCODING and
+ # final NEWLINE are omitted for brevity.
def check_tokenize(self, s, expected):
# Format the tokens in s in a table format.
- # The ENDMARKER is omitted.
- result = []
f = StringIO(s)
- for type, token, start, end, line in generate_tokens(f.readline):
- if type == ENDMARKER:
- break
- type = tok_name[type]
- result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
- locals())
+ result = stringify_tokens_from_source(generate_tokens(f.readline), s)
+
self.assertEqual(result,
expected.rstrip().splitlines())
+ def test_implicit_newline(self):
+ # Make sure that the tokenizer puts in an implicit NEWLINE
+ # when the input lacks a trailing new line.
+ f = StringIO("x")
+ tokens = list(generate_tokens(f.readline))
+ self.assertEqual(tokens[-2][0], NEWLINE)
+ self.assertEqual(tokens[-1][0], ENDMARKER)
def test_basic(self):
self.check_tokenize("1 + 1", """\
@@ -616,7 +638,7 @@ class TestRoundtrip(TestCase):
self.check_roundtrip("if x == 1:\n"
" print x\n")
self.check_roundtrip("# This is a comment\n"
- "# This also")
+ "# This also\n")
# Some people use different formatting conventions, which makes
# untokenize a little trickier. Note that this test involves trailing
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index d426cd2df5..6c857f8547 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -306,8 +306,15 @@ def generate_tokens(readline):
contline = None
indents = [0]
+ last_line = b''
+ line = b''
while 1: # loop over lines in stream
try:
+ # We capture the value of the line variable here because
+ # readline uses the empty string '' to signal end of input,
+ # hence `line` itself will always be overwritten at the end
+ # of this loop.
+ last_line = line
line = readline()
except StopIteration:
line = ''
@@ -437,6 +444,9 @@ def generate_tokens(readline):
(lnum, pos), (lnum, pos+1), line)
pos += 1
+ # Add an implicit NEWLINE if the input doesn't end in one
+ if last_line and last_line[-1] not in '\r\n':
+ yield (NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
for indent in indents[1:]: # pop remaining indent levels
yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')