From 96ec934e755355cfc5af036db8641646b7ddb45e Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Thu, 23 Jul 2015 15:01:58 +0300 Subject: Issue #24619: Simplify async/await tokenization. This commit simplifies async/await tokenization in tokenizer.c, tokenize.py & lib2to3/tokenize.py. Previous solution was to keep a stack of async-def & def blocks, whereas the new approach is just to remember position of the outermost async-def block. This change won't bring any parsing performance improvements, but it makes the code much easier to read and validate. --- Lib/tokenize.py | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'Lib/tokenize.py') diff --git a/Lib/tokenize.py b/Lib/tokenize.py index c3efdda528..65d06e53f3 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -498,10 +498,11 @@ def _tokenize(readline, encoding): contline = None indents = [0] - # 'stashed' and 'ctx' are used for async/await parsing + # 'stashed' and 'async_*' are used for async/await parsing stashed = None - ctx = [('sync', 0)] - in_async = 0 + async_def = False + async_def_indent = 0 + async_def_nl = False if encoding is not None: if encoding == "utf-8-sig": @@ -579,15 +580,18 @@ def _tokenize(readline, encoding): ("", lnum, pos, line)) indents = indents[:-1] - cur_indent = indents[-1] - while len(ctx) > 1 and ctx[-1][1] >= cur_indent: - if ctx[-1][0] == 'async': - in_async -= 1 - assert in_async >= 0 - ctx.pop() + if async_def and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line) + if async_def and async_def_nl and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 + else: # continued statement if not line: raise TokenError("EOF in multi-line statement", (lnum, 0)) @@ -609,8 +613,13 @@ def _tokenize(readline, encoding): if stashed: yield stashed stashed = None - yield TokenInfo(NL if parenlev > 0 else NEWLINE, - token, spos, epos, line) + if parenlev > 0: + yield TokenInfo(NL, token, spos, epos, line) + else: + yield TokenInfo(NEWLINE, token, spos, epos, line) + if async_def: + async_def_nl = True + elif initial == '#': assert not token.endswith("\n") if stashed: @@ -644,7 +653,7 @@ def _tokenize(readline, encoding): yield TokenInfo(STRING, token, spos, epos, line) elif initial.isidentifier(): # ordinary name if token in ('async', 'await'): - if in_async: + if async_def: yield TokenInfo( ASYNC if token == 'async' else AWAIT, token, spos, epos, line) @@ -660,15 +669,13 @@ def _tokenize(readline, encoding): and stashed.type == NAME and stashed.string == 'async'): - ctx.append(('async', indents[-1])) - in_async += 1 + async_def = True + async_def_indent = indents[-1] yield TokenInfo(ASYNC, stashed.string, stashed.start, stashed.end, stashed.line) stashed = None - else: - ctx.append(('sync', indents[-1])) if stashed: yield stashed -- cgit v1.2.1