diff options
| -rw-r--r-- | Lib/test/test_tokenize.py | 21 | ||||
| -rw-r--r-- | Lib/tokenize.py | 17 | ||||
| -rw-r--r-- | Misc/NEWS | 3 | 
3 files changed, 40 insertions, 1 deletions
| diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 984220729d..6506b671a1 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -5,6 +5,8 @@ The tests can be really simple. Given a small fragment of source  code, print out a table with tokens. The ENDMARKER is omitted for  brevity. +    >>> import glob +      >>> dump_tokens("1 + 1")      ENCODING   'utf-8'       (0, 0) (0, 0)      NUMBER     '1'           (1, 0) (1, 1) @@ -647,7 +649,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,                       open as tokenize_open, Untokenizer)  from io import BytesIO  from unittest import TestCase, mock -import os, sys, glob +import os  import token  def dump_tokens(s): @@ -1227,6 +1229,22 @@ class UntokenizeTest(TestCase):          self.assertEqual(untokenize(iter(tokens)), b'Hello ') +class TestRoundtrip(TestCase): +    def roundtrip(self, code): +        if isinstance(code, str): +            code = code.encode('utf-8') +        return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8') + +    def test_indentation_semantics_retained(self): +        """ +        Ensure that although whitespace might be mutated in a roundtrip, +        the semantic meaning of the indentation remains consistent. +        """ +        code = "if False:\n\tx=3\n\tx=3\n" +        codelines = self.roundtrip(code).split('\n') +        self.assertEqual(codelines[1], codelines[2]) + +  __test__ = {"doctests" : doctests, 'decistmt': decistmt}  def test_main(): @@ -1237,6 +1255,7 @@ def test_main():      support.run_unittest(TestDetectEncoding)      support.run_unittest(TestTokenize)      support.run_unittest(UntokenizeTest) +    support.run_unittest(TestRoundtrip)  if __name__ == "__main__":      test_main() diff --git a/Lib/tokenize.py b/Lib/tokenize.py index cf18bf9f2d..4d93a83e29 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -244,6 +244,8 @@ class Untokenizer:      def untokenize(self, iterable):          it = iter(iterable) +        indents = [] +        startline = False          for t in it:              if len(t) == 2:                  self.compat(t, it) @@ -254,6 +256,21 @@ class Untokenizer:                  continue              if tok_type == ENDMARKER:                  break +            if tok_type == INDENT: +                indents.append(token) +                continue +            elif tok_type == DEDENT: +                indents.pop() +                self.prev_row, self.prev_col = end +                continue +            elif tok_type in (NEWLINE, NL): +                startline = True +            elif startline and indents: +                indent = indents[-1] +                if start[1] >= len(indent): +                    self.tokens.append(indent) +                    self.prev_col = len(indent) +                startline = False              self.add_whitespace(start)              self.tokens.append(token)              self.prev_row, self.prev_col = end @@ -60,6 +60,9 @@ Core and Builtins  Library  ------- +- Issue #20387: Restore semantic round-trip correctness in tokenize/untokenize +  for tab-indented blocks. +  - Issue #24336: The contextmanager decorator now works with functions with    keyword arguments called "func" and "self".  Patch by Martin Panter. | 
