bpo-36143: Regenerate Lib/keyword.py from the Grammar and Tokens file using pgen (GH-12456)

Now that the parser generator is written in Python (Parser/pgen) we can make use of it to regenerate the Lib/keyword file that contains the language keywords instead of parsing the autogenerated grammar files. This also allows checking in the CI that the autogenerated files are up to date.
author: Pablo Galindo <Pablogsal@gmail.com> 2019-03-25 22:01:12 +0000
committer: GitHub <noreply@github.com> 2019-03-25 22:01:12 +0000
commit: 91759d98015e1d6d5e1367cff60592ab548e7806 (patch)
tree: 903553ec0677b1fc9c3531799ce890fd7a019069 /Parser
parent: 027b09c5a13aac9e14a3b43bb385298d549c3833 (diff)
download: cpython-git-91759d98015e1d6d5e1367cff60592ab548e7806.tar.gz
1 files changed, 60 insertions, 0 deletions
diff --git a/Parser/pgen/keywordgen.py b/Parser/pgen/keywordgen.py
new file mode 100644
index 0000000000..eeb3ef739f
--- /dev/null
+++ b/Parser/pgen/keywordgen.py
@@ -0,0 +1,60 @@
+"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen"""
+
+import argparse
+
+from .pgen import ParserGenerator
+
+TEMPLATE = r'''
+"""Keywords (from "Grammar/Grammar")
+
+This file is automatically generated; please don't muck it up!
+
+To update the symbols in this file, 'cd' to the top directory of
+the python source tree and run:
+
+    python3 -m Parser.pgen.keywordgen Grammar/Grammar \
+                                      Grammar/Tokens \
+                                      Lib/keyword.py
+
+Alternatively, you can run 'make regen-keyword'.
+"""
+
+__all__ = ["iskeyword", "kwlist"]
+
+kwlist = [
+    {keywords}
+]
+
+iskeyword = frozenset(kwlist).__contains__
+'''.lstrip()
+
+EXTRA_KEYWORDS = ["async", "await"]
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate the Lib/keywords.py "
+                                                 "file from the grammar.")
+    parser.add_argument(
+        "grammar", type=str, help="The file with the grammar definition in EBNF format"
+    )
+    parser.add_argument(
+        "tokens", type=str, help="The file with the token definitions"
+    )
+    parser.add_argument(
+        "keyword_file",
+        type=argparse.FileType('w'),
+        help="The path to write the keyword definitions",
+    )
+    args = parser.parse_args()
+    p = ParserGenerator(args.grammar, args.tokens)
+    grammar = p.make_grammar()
+
+    with args.keyword_file as thefile:
+        all_keywords = sorted(list(grammar.keywords) + EXTRA_KEYWORDS)
+
+        keywords = ",\n    ".join(map(repr, all_keywords))
+        thefile.write(TEMPLATE.format(keywords=keywords))
+
+
+if __name__ == "__main__":
+    main()
author	Pablo Galindo <Pablogsal@gmail.com>	2019-03-25 22:01:12 +0000
committer	GitHub <noreply@github.com>	2019-03-25 22:01:12 +0000
commit	91759d98015e1d6d5e1367cff60592ab548e7806 (patch)
tree	903553ec0677b1fc9c3531799ce890fd7a019069 /Parser
parent	027b09c5a13aac9e14a3b43bb385298d549c3833 (diff)
download	cpython-git-91759d98015e1d6d5e1367cff60592ab548e7806.tar.gz