Add some helpful comments to the unicode_denormalizer.py example script

author: ptmcg <ptmcg@austin.rr.com> 2023-04-18 15:23:50 -0500
committer: ptmcg <ptmcg@austin.rr.com> 2023-04-18 15:23:50 -0500
commit: cf4f1559503f0eb66343c96cb0144e86f68e9b1d (patch)
tree: ca6d06198b0860e02311bd4edc9f404c8f4d35d1
parent: 0b73a048c8987c88360b25d5da5d177e2f758a49 (diff)
download: pyparsing-git-cf4f1559503f0eb66343c96cb0144e86f68e9b1d.tar.gz
1 files changed, 18 insertions, 13 deletions
diff --git a/examples/unicode_denormalizer.py b/examples/unicode_denormalizer.py
index 5955c13..6eee875 100644
--- a/examples/unicode_denormalizer.py
+++ b/examples/unicode_denormalizer.py
@@ -31,9 +31,9 @@ ident_chars = (
     "0123456789" + _·
 )
 
-# build map of each ASCII character to a list of
+# build map of each ASCII character to a string of
 # all the characters in the Basic Multilingual Plane
-# that NFKC normalize back to that ASCII character
+# that NFKC normalizes back to that ASCII character
 ident_char_map = {}.fromkeys(ident_chars, "")
 for ch in ppu.BMP.identbodychars:
     normal = unicodedata.normalize("NFKC", ch)
@@ -58,18 +58,23 @@ ligature_map = {
     'ix': 'ix ⅸ',
     'xi': 'xi ⅺ',
 }
-ligature_transformer = pp.oneOf(ligature_map).add_parse_action(lambda t: random.choice(ligature_map[t[0]].split()))
+ligature_transformer = pp.oneOf(ligature_map).add_parse_action(
+    lambda t: random.choice(ligature_map[t[0]].split())
+)
 
 
 def make_mixed_font(t):
-    t_0 = t[0][0]
-    ret = ['_' if t_0 == '_' else random.choice(ident_char_map.get(t_0, t_0))]
-    t_rest = ligature_transformer.transform_string(t[0][1:])
+    t_0 = t[0]
+    # a leading '_' must be written using the ASCII character '_'
+    ret = ['_' if t_0[0] == '_'
+           else random.choice(ident_char_map.get(t_0[0], t_0[0]))]
+    t_rest = ligature_transformer.transform_string(t_0[1:])
     ret.extend(random.choice(ident_char_map.get(c, c)) for c in t_rest)
     return ''.join(ret)
 
 
-# define a pyparsing expression to match any identifier
+# define a pyparsing expression to match any identifier; add a parse
+# action to convert to mixed Unicode characters
 identifier = pp.pyparsing_common.identifier
 identifier.add_parse_action(make_mixed_font)
 
@@ -87,13 +92,13 @@ def mix_fstring_expressions(t):
     ret = t.f_string_prefix + fstring_arg.transform_string(t.quoted_string_body)
     return ret
 
-
+# add parse action to transform identifiers in f-strings
 python_quoted_string.add_parse_action(mix_fstring_expressions)
 
 # match keywords separately from identifiers - keywords must be kept in their
 # original ASCII
 any_keyword = pp.one_of(
-    keyword.kwlist + getattr(keyword, "softkwlist", []),
+    list(keyword.kwlist) + getattr(keyword, "softkwlist", []),
     as_keyword=True
 )
 
@@ -115,13 +120,13 @@ def demo():
     if __name__ == "__main__":
         hello()
     """)
-    source = hello_source
 
-    transformed = transformer.transform_string(source)
+    # use transformer to generate code with denormalized identifiers
+    transformed = transformer.transform_string(hello_source)
     print(transformed)
 
-    # does it really work?
-    code = compile(transformed, source, mode="exec")
+    # does it really work? compile the transformed code and run it!
+    code = compile(transformed, "inline source", mode="exec")
     exec(code)
 
     if 0:
author	ptmcg <ptmcg@austin.rr.com>	2023-04-18 15:23:50 -0500
committer	ptmcg <ptmcg@austin.rr.com>	2023-04-18 15:23:50 -0500
commit	cf4f1559503f0eb66343c96cb0144e86f68e9b1d (patch)
tree	ca6d06198b0860e02311bd4edc9f404c8f4d35d1
parent	0b73a048c8987c88360b25d5da5d177e2f758a49 (diff)
download	pyparsing-git-cf4f1559503f0eb66343c96cb0144e86f68e9b1d.tar.gz