summaryrefslogtreecommitdiff
path: root/Lib/token.py
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2018-12-22 11:18:40 +0200
committerGitHub <noreply@github.com>2018-12-22 11:18:40 +0200
commit8ac658114dec4964479baecfbc439fceb40eaa79 (patch)
treee66c4c3beda293a6fdf01763306697d15d0af157 /Lib/token.py
parentc1b4b0f6160e1919394586f44b12538505fed300 (diff)
downloadcpython-git-8ac658114dec4964479baecfbc439fceb40eaa79.tar.gz
bpo-30455: Generate all token related code and docs from Grammar/Tokens. (GH-10370)
"Include/token.h", "Lib/token.py" (containing now some data moved from "Lib/tokenize.py") and new files "Parser/token.c" (containing the code moved from "Parser/tokenizer.c") and "Doc/library/token-list.inc" (included in "Doc/library/token.rst") are now generated from "Grammar/Tokens" by "Tools/scripts/generate_token.py". The script overwrites files only if needed and can be used on the read-only sources tree. "Lib/symbol.py" is now generated by "Tools/scripts/generate_symbol_py.py" instead of been executable itself. Added new make targets "regen-token" and "regen-symbol" which are now dependencies of "regen-all". The documentation contains now strings for operators and punctuation tokens.
Diffstat (limited to 'Lib/token.py')
-rw-r--r--Lib/token.py134
1 files changed, 52 insertions, 82 deletions
diff --git a/Lib/token.py b/Lib/token.py
index ba132059ab..5af7e6b91e 100644
--- a/Lib/token.py
+++ b/Lib/token.py
@@ -1,15 +1,8 @@
-"""Token constants (from "token.h")."""
+"""Token constants."""
+# Auto-generated by Tools/scripts/generate_token.py
__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
-# This file is automatically generated; please don't muck it up!
-#
-# To update the symbols in this file, 'cd' to the top directory of
-# the python source tree after building the interpreter and run:
-#
-# ./python Lib/token.py
-
-#--start constants--
ENDMARKER = 0
NAME = 1
NUMBER = 2
@@ -63,23 +56,70 @@ AT = 49
ATEQUAL = 50
RARROW = 51
ELLIPSIS = 52
-# Don't forget to update the table _PyParser_TokenNames in tokenizer.c!
OP = 53
-ERRORTOKEN = 54
# These aren't used by the C tokenizer but are needed for tokenize.py
+ERRORTOKEN = 54
COMMENT = 55
NL = 56
ENCODING = 57
N_TOKENS = 58
# Special definitions for cooperation with parser
NT_OFFSET = 256
-#--end constants--
tok_name = {value: name
for name, value in globals().items()
if isinstance(value, int) and not name.startswith('_')}
__all__.extend(tok_name.values())
+EXACT_TOKEN_TYPES = {
+ '!=': NOTEQUAL,
+ '%': PERCENT,
+ '%=': PERCENTEQUAL,
+ '&': AMPER,
+ '&=': AMPEREQUAL,
+ '(': LPAR,
+ ')': RPAR,
+ '*': STAR,
+ '**': DOUBLESTAR,
+ '**=': DOUBLESTAREQUAL,
+ '*=': STAREQUAL,
+ '+': PLUS,
+ '+=': PLUSEQUAL,
+ ',': COMMA,
+ '-': MINUS,
+ '-=': MINEQUAL,
+ '->': RARROW,
+ '.': DOT,
+ '...': ELLIPSIS,
+ '/': SLASH,
+ '//': DOUBLESLASH,
+ '//=': DOUBLESLASHEQUAL,
+ '/=': SLASHEQUAL,
+ ':': COLON,
+ ';': SEMI,
+ '<': LESS,
+ '<<': LEFTSHIFT,
+ '<<=': LEFTSHIFTEQUAL,
+ '<=': LESSEQUAL,
+ '=': EQUAL,
+ '==': EQEQUAL,
+ '>': GREATER,
+ '>=': GREATEREQUAL,
+ '>>': RIGHTSHIFT,
+ '>>=': RIGHTSHIFTEQUAL,
+ '@': AT,
+ '@=': ATEQUAL,
+ '[': LSQB,
+ ']': RSQB,
+ '^': CIRCUMFLEX,
+ '^=': CIRCUMFLEXEQUAL,
+ '{': LBRACE,
+ '|': VBAR,
+ '|=': VBAREQUAL,
+ '}': RBRACE,
+ '~': TILDE,
+}
+
def ISTERMINAL(x):
return x < NT_OFFSET
@@ -88,73 +128,3 @@ def ISNONTERMINAL(x):
def ISEOF(x):
return x == ENDMARKER
-
-
-def _main():
- import re
- import sys
- args = sys.argv[1:]
- inFileName = args and args[0] or "Include/token.h"
- outFileName = "Lib/token.py"
- if len(args) > 1:
- outFileName = args[1]
- try:
- fp = open(inFileName)
- except OSError as err:
- sys.stdout.write("I/O error: %s\n" % str(err))
- sys.exit(1)
- with fp:
- lines = fp.read().split("\n")
- prog = re.compile(
- r"#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)",
- re.IGNORECASE)
- comment_regex = re.compile(
- r"^\s*/\*\s*(.+?)\s*\*/\s*$",
- re.IGNORECASE)
-
- tokens = {}
- prev_val = None
- for line in lines:
- match = prog.match(line)
- if match:
- name, val = match.group(1, 2)
- val = int(val)
- tokens[val] = {'token': name} # reverse so we can sort them...
- prev_val = val
- else:
- comment_match = comment_regex.match(line)
- if comment_match and prev_val is not None:
- comment = comment_match.group(1)
- tokens[prev_val]['comment'] = comment
- keys = sorted(tokens.keys())
- # load the output skeleton from the target:
- try:
- fp = open(outFileName)
- except OSError as err:
- sys.stderr.write("I/O error: %s\n" % str(err))
- sys.exit(2)
- with fp:
- format = fp.read().split("\n")
- try:
- start = format.index("#--start constants--") + 1
- end = format.index("#--end constants--")
- except ValueError:
- sys.stderr.write("target does not contain format markers")
- sys.exit(3)
- lines = []
- for key in keys:
- lines.append("%s = %d" % (tokens[key]["token"], key))
- if "comment" in tokens[key]:
- lines.append("# %s" % tokens[key]["comment"])
- format[start:end] = lines
- try:
- fp = open(outFileName, 'w')
- except OSError as err:
- sys.stderr.write("I/O error: %s\n" % str(err))
- sys.exit(4)
- with fp:
- fp.write("\n".join(format))
-
-
-if __name__ == "__main__":
- _main()