summaryrefslogtreecommitdiff
path: root/Lib/tokenize.py
diff options
context:
space:
mode:
authorMartin Panter <vadmium+py@gmail.com>2015-10-10 10:15:21 +0000
committerMartin Panter <vadmium+py@gmail.com>2015-10-10 10:15:21 +0000
commitb4c49a7b40ae96dbdc079b40b6c317bd2cbdeb55 (patch)
tree450aeb71ba9d9118732554bb4a7a4a22edc3734d /Lib/tokenize.py
parente013b9445b6f9caab1eb30b7785e3690d1bab4df (diff)
parentb98a5f90775e5f59662e452b3b31e0c07efa3d2c (diff)
downloadcpython-b4c49a7b40ae96dbdc079b40b6c317bd2cbdeb55.tar.gz
Issue #22413: Merge StringIO doc from 3.4 into 3.5
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r--Lib/tokenize.py78
1 files changed, 71 insertions, 7 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 4d93a83e29..65d06e53f3 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -91,7 +91,8 @@ EXACT_TOKEN_TYPES = {
'**=': DOUBLESTAREQUAL,
'//': DOUBLESLASH,
'//=': DOUBLESLASHEQUAL,
- '@': AT
+ '@': AT,
+ '@=': ATEQUAL,
}
class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
@@ -150,7 +151,7 @@ String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
# recognized as two instances of =).
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
r"//=?", r"->",
- r"[+\-*/%&|^=<>]=?",
+ r"[+\-*/%&@|^=<>]=?",
r"~")
Bracket = '[][(){}]'
@@ -186,7 +187,6 @@ endpats = {"'": Single, '"': Double,
"rB'''": Single3, 'rB"""': Double3,
"RB'''": Single3, 'RB"""': Double3,
"u'''": Single3, 'u"""': Double3,
- "R'''": Single3, 'R"""': Double3,
"U'''": Single3, 'U"""': Double3,
'r': None, 'R': None, 'b': None, 'B': None,
'u': None, 'U': None}
@@ -291,7 +291,7 @@ class Untokenizer:
self.encoding = tokval
continue
- if toknum in (NAME, NUMBER):
+ if toknum in (NAME, NUMBER, ASYNC, AWAIT):
tokval += ' '
# Insert a space between two consecutive strings
@@ -498,6 +498,12 @@ def _tokenize(readline, encoding):
contline = None
indents = [0]
+ # 'stashed' and 'async_*' are used for async/await parsing
+ stashed = None
+ async_def = False
+ async_def_indent = 0
+ async_def_nl = False
+
if encoding is not None:
if encoding == "utf-8-sig":
# BOM will already have been stripped.
@@ -573,8 +579,19 @@ def _tokenize(readline, encoding):
"unindent does not match any outer indentation level",
("<tokenize>", lnum, pos, line))
indents = indents[:-1]
+
+ if async_def and async_def_indent >= indents[-1]:
+ async_def = False
+ async_def_nl = False
+ async_def_indent = 0
+
yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
+ if async_def and async_def_nl and async_def_indent >= indents[-1]:
+ async_def = False
+ async_def_nl = False
+ async_def_indent = 0
+
else: # continued statement
if not line:
raise TokenError("EOF in multi-line statement", (lnum, 0))
@@ -593,10 +610,21 @@ def _tokenize(readline, encoding):
(initial == '.' and token != '.' and token != '...')):
yield TokenInfo(NUMBER, token, spos, epos, line)
elif initial in '\r\n':
- yield TokenInfo(NL if parenlev > 0 else NEWLINE,
- token, spos, epos, line)
+ if stashed:
+ yield stashed
+ stashed = None
+ if parenlev > 0:
+ yield TokenInfo(NL, token, spos, epos, line)
+ else:
+ yield TokenInfo(NEWLINE, token, spos, epos, line)
+ if async_def:
+ async_def_nl = True
+
elif initial == '#':
assert not token.endswith("\n")
+ if stashed:
+ yield stashed
+ stashed = None
yield TokenInfo(COMMENT, token, spos, epos, line)
elif token in triple_quoted:
endprog = _compile(endpats[token])
@@ -624,7 +652,36 @@ def _tokenize(readline, encoding):
else: # ordinary string
yield TokenInfo(STRING, token, spos, epos, line)
elif initial.isidentifier(): # ordinary name
- yield TokenInfo(NAME, token, spos, epos, line)
+ if token in ('async', 'await'):
+ if async_def:
+ yield TokenInfo(
+ ASYNC if token == 'async' else AWAIT,
+ token, spos, epos, line)
+ continue
+
+ tok = TokenInfo(NAME, token, spos, epos, line)
+ if token == 'async' and not stashed:
+ stashed = tok
+ continue
+
+ if token == 'def':
+ if (stashed
+ and stashed.type == NAME
+ and stashed.string == 'async'):
+
+ async_def = True
+ async_def_indent = indents[-1]
+
+ yield TokenInfo(ASYNC, stashed.string,
+ stashed.start, stashed.end,
+ stashed.line)
+ stashed = None
+
+ if stashed:
+ yield stashed
+ stashed = None
+
+ yield tok
elif initial == '\\': # continued stmt
continued = 1
else:
@@ -632,12 +689,19 @@ def _tokenize(readline, encoding):
parenlev += 1
elif initial in ')]}':
parenlev -= 1
+ if stashed:
+ yield stashed
+ stashed = None
yield TokenInfo(OP, token, spos, epos, line)
else:
yield TokenInfo(ERRORTOKEN, line[pos],
(lnum, pos), (lnum, pos+1), line)
pos += 1
+ if stashed:
+ yield stashed
+ stashed = None
+
for indent in indents[1:]: # pop remaining indent levels
yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')