diff options
author | Martin Panter <vadmium+py@gmail.com> | 2015-10-10 10:15:21 +0000 |
---|---|---|
committer | Martin Panter <vadmium+py@gmail.com> | 2015-10-10 10:15:21 +0000 |
commit | b4c49a7b40ae96dbdc079b40b6c317bd2cbdeb55 (patch) | |
tree | 450aeb71ba9d9118732554bb4a7a4a22edc3734d /Lib/tokenize.py | |
parent | e013b9445b6f9caab1eb30b7785e3690d1bab4df (diff) | |
parent | b98a5f90775e5f59662e452b3b31e0c07efa3d2c (diff) | |
download | cpython-b4c49a7b40ae96dbdc079b40b6c317bd2cbdeb55.tar.gz |
Issue #22413: Merge StringIO doc from 3.4 into 3.5
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r-- | Lib/tokenize.py | 78 |
1 files changed, 71 insertions, 7 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 4d93a83e29..65d06e53f3 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -91,7 +91,8 @@ EXACT_TOKEN_TYPES = { '**=': DOUBLESTAREQUAL, '//': DOUBLESLASH, '//=': DOUBLESLASHEQUAL, - '@': AT + '@': AT, + '@=': ATEQUAL, } class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')): @@ -150,7 +151,7 @@ String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", # recognized as two instances of =). Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=", r"//=?", r"->", - r"[+\-*/%&|^=<>]=?", + r"[+\-*/%&@|^=<>]=?", r"~") Bracket = '[][(){}]' @@ -186,7 +187,6 @@ endpats = {"'": Single, '"': Double, "rB'''": Single3, 'rB"""': Double3, "RB'''": Single3, 'RB"""': Double3, "u'''": Single3, 'u"""': Double3, - "R'''": Single3, 'R"""': Double3, "U'''": Single3, 'U"""': Double3, 'r': None, 'R': None, 'b': None, 'B': None, 'u': None, 'U': None} @@ -291,7 +291,7 @@ class Untokenizer: self.encoding = tokval continue - if toknum in (NAME, NUMBER): + if toknum in (NAME, NUMBER, ASYNC, AWAIT): tokval += ' ' # Insert a space between two consecutive strings @@ -498,6 +498,12 @@ def _tokenize(readline, encoding): contline = None indents = [0] + # 'stashed' and 'async_*' are used for async/await parsing + stashed = None + async_def = False + async_def_indent = 0 + async_def_nl = False + if encoding is not None: if encoding == "utf-8-sig": # BOM will already have been stripped. @@ -573,8 +579,19 @@ def _tokenize(readline, encoding): "unindent does not match any outer indentation level", ("<tokenize>", lnum, pos, line)) indents = indents[:-1] + + if async_def and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 + yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line) + if async_def and async_def_nl and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 + else: # continued statement if not line: raise TokenError("EOF in multi-line statement", (lnum, 0)) @@ -593,10 +610,21 @@ def _tokenize(readline, encoding): (initial == '.' and token != '.' and token != '...')): yield TokenInfo(NUMBER, token, spos, epos, line) elif initial in '\r\n': - yield TokenInfo(NL if parenlev > 0 else NEWLINE, - token, spos, epos, line) + if stashed: + yield stashed + stashed = None + if parenlev > 0: + yield TokenInfo(NL, token, spos, epos, line) + else: + yield TokenInfo(NEWLINE, token, spos, epos, line) + if async_def: + async_def_nl = True + elif initial == '#': assert not token.endswith("\n") + if stashed: + yield stashed + stashed = None yield TokenInfo(COMMENT, token, spos, epos, line) elif token in triple_quoted: endprog = _compile(endpats[token]) @@ -624,7 +652,36 @@ def _tokenize(readline, encoding): else: # ordinary string yield TokenInfo(STRING, token, spos, epos, line) elif initial.isidentifier(): # ordinary name - yield TokenInfo(NAME, token, spos, epos, line) + if token in ('async', 'await'): + if async_def: + yield TokenInfo( + ASYNC if token == 'async' else AWAIT, + token, spos, epos, line) + continue + + tok = TokenInfo(NAME, token, spos, epos, line) + if token == 'async' and not stashed: + stashed = tok + continue + + if token == 'def': + if (stashed + and stashed.type == NAME + and stashed.string == 'async'): + + async_def = True + async_def_indent = indents[-1] + + yield TokenInfo(ASYNC, stashed.string, + stashed.start, stashed.end, + stashed.line) + stashed = None + + if stashed: + yield stashed + stashed = None + + yield tok elif initial == '\\': # continued stmt continued = 1 else: @@ -632,12 +689,19 @@ def _tokenize(readline, encoding): parenlev += 1 elif initial in ')]}': parenlev -= 1 + if stashed: + yield stashed + stashed = None yield TokenInfo(OP, token, spos, epos, line) else: yield TokenInfo(ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos+1), line) pos += 1 + if stashed: + yield stashed + stashed = None + for indent in indents[1:]: # pop remaining indent levels yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '') yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '') |