Issue #22413: Merge StringIO doc from 3.4 into 3.5

author: Martin Panter <vadmium+py@gmail.com> 2015-10-10 10:15:21 +0000
committer: Martin Panter <vadmium+py@gmail.com> 2015-10-10 10:15:21 +0000
commit: b4c49a7b40ae96dbdc079b40b6c317bd2cbdeb55 (patch)
tree: 450aeb71ba9d9118732554bb4a7a4a22edc3734d /Lib/tokenize.py
parent: e013b9445b6f9caab1eb30b7785e3690d1bab4df (diff)
parent: b98a5f90775e5f59662e452b3b31e0c07efa3d2c (diff)
download: cpython-b4c49a7b40ae96dbdc079b40b6c317bd2cbdeb55.tar.gz
1 files changed, 71 insertions, 7 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 4d93a83e29..65d06e53f3 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -91,7 +91,8 @@ EXACT_TOKEN_TYPES = {
     '**=': DOUBLESTAREQUAL,
     '//':  DOUBLESLASH,
     '//=': DOUBLESLASHEQUAL,
-    '@':   AT
+    '@':   AT,
+    '@=':  ATEQUAL,
 }
 
 class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
@@ -150,7 +151,7 @@ String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
 # recognized as two instances of =).
 Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
                  r"//=?", r"->",
-                 r"[+\-*/%&|^=<>]=?",
+                 r"[+\-*/%&@|^=<>]=?",
                  r"~")
 
 Bracket = '[][(){}]'
@@ -186,7 +187,6 @@ endpats = {"'": Single, '"': Double,
            "rB'''": Single3, 'rB"""': Double3,
            "RB'''": Single3, 'RB"""': Double3,
            "u'''": Single3, 'u"""': Double3,
-           "R'''": Single3, 'R"""': Double3,
            "U'''": Single3, 'U"""': Double3,
            'r': None, 'R': None, 'b': None, 'B': None,
            'u': None, 'U': None}
@@ -291,7 +291,7 @@ class Untokenizer:
                 self.encoding = tokval
                 continue
 
-            if toknum in (NAME, NUMBER):
+            if toknum in (NAME, NUMBER, ASYNC, AWAIT):
                 tokval += ' '
 
             # Insert a space between two consecutive strings
@@ -498,6 +498,12 @@ def _tokenize(readline, encoding):
     contline = None
     indents = [0]
 
+    # 'stashed' and 'async_*' are used for async/await parsing
+    stashed = None
+    async_def = False
+    async_def_indent = 0
+    async_def_nl = False
+
     if encoding is not None:
         if encoding == "utf-8-sig":
             # BOM will already have been stripped.
@@ -573,8 +579,19 @@ def _tokenize(readline, encoding):
                         "unindent does not match any outer indentation level",
                         ("<tokenize>", lnum, pos, line))
                 indents = indents[:-1]
+
+                if async_def and async_def_indent >= indents[-1]:
+                    async_def = False
+                    async_def_nl = False
+                    async_def_indent = 0
+
                 yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
 
+            if async_def and async_def_nl and async_def_indent >= indents[-1]:
+                async_def = False
+                async_def_nl = False
+                async_def_indent = 0
+
         else:                                  # continued statement
             if not line:
                 raise TokenError("EOF in multi-line statement", (lnum, 0))
@@ -593,10 +610,21 @@ def _tokenize(readline, encoding):
                     (initial == '.' and token != '.' and token != '...')):
                     yield TokenInfo(NUMBER, token, spos, epos, line)
                 elif initial in '\r\n':
-                    yield TokenInfo(NL if parenlev > 0 else NEWLINE,
-                           token, spos, epos, line)
+                    if stashed:
+                        yield stashed
+                        stashed = None
+                    if parenlev > 0:
+                        yield TokenInfo(NL, token, spos, epos, line)
+                    else:
+                        yield TokenInfo(NEWLINE, token, spos, epos, line)
+                        if async_def:
+                            async_def_nl = True
+
                 elif initial == '#':
                     assert not token.endswith("\n")
+                    if stashed:
+                        yield stashed
+                        stashed = None
                     yield TokenInfo(COMMENT, token, spos, epos, line)
                 elif token in triple_quoted:
                     endprog = _compile(endpats[token])
@@ -624,7 +652,36 @@ def _tokenize(readline, encoding):
                     else:                                  # ordinary string
                         yield TokenInfo(STRING, token, spos, epos, line)
                 elif initial.isidentifier():               # ordinary name
-                    yield TokenInfo(NAME, token, spos, epos, line)
+                    if token in ('async', 'await'):
+                        if async_def:
+                            yield TokenInfo(
+                                ASYNC if token == 'async' else AWAIT,
+                                token, spos, epos, line)
+                            continue
+
+                    tok = TokenInfo(NAME, token, spos, epos, line)
+                    if token == 'async' and not stashed:
+                        stashed = tok
+                        continue
+
+                    if token == 'def':
+                        if (stashed
+                                and stashed.type == NAME
+                                and stashed.string == 'async'):
+
+                            async_def = True
+                            async_def_indent = indents[-1]
+
+                            yield TokenInfo(ASYNC, stashed.string,
+                                            stashed.start, stashed.end,
+                                            stashed.line)
+                            stashed = None
+
+                    if stashed:
+                        yield stashed
+                        stashed = None
+
+                    yield tok
                 elif initial == '\\':                      # continued stmt
                     continued = 1
                 else:
@@ -632,12 +689,19 @@ def _tokenize(readline, encoding):
                         parenlev += 1
                     elif initial in ')]}':
                         parenlev -= 1
+                    if stashed:
+                        yield stashed
+                        stashed = None
                     yield TokenInfo(OP, token, spos, epos, line)
             else:
                 yield TokenInfo(ERRORTOKEN, line[pos],
                            (lnum, pos), (lnum, pos+1), line)
                 pos += 1
 
+    if stashed:
+        yield stashed
+        stashed = None
+
     for indent in indents[1:]:                 # pop remaining indent levels
         yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
     yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
author	Martin Panter <vadmium+py@gmail.com>	2015-10-10 10:15:21 +0000
committer	Martin Panter <vadmium+py@gmail.com>	2015-10-10 10:15:21 +0000
commit	b4c49a7b40ae96dbdc079b40b6c317bd2cbdeb55 (patch)
tree	450aeb71ba9d9118732554bb4a7a4a22edc3734d /Lib/tokenize.py
parent	e013b9445b6f9caab1eb30b7785e3690d1bab4df (diff)
parent	b98a5f90775e5f59662e452b3b31e0c07efa3d2c (diff)
download	cpython-b4c49a7b40ae96dbdc079b40b6c317bd2cbdeb55.tar.gz