Add pyparsing.unicode.identifier class property

author: ptmcg <ptmcg@austin.rr.com> 2023-01-13 05:24:37 -0600
committer: ptmcg <ptmcg@austin.rr.com> 2023-01-13 05:24:37 -0600
commit: cc94b5a6d608e7f25be15c4487cbab25f606e0d8 (patch)
tree: 224ad44ae49d3527383287bb5b900ed896842ac5
parent: 318ec7e3b945068d36f5c98b1de81003c773c6c4 (diff)
download: pyparsing-git-cc94b5a6d608e7f25be15c4487cbab25f606e0d8.tar.gz
4 files changed, 110 insertions, 74 deletions
diff --git a/CHANGES b/CHANGES
index 36a09b8..18edee1 100644
--- a/CHANGES
+++ b/CHANGES
@@ -28,6 +28,19 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit
 
   Suggested by Antony Lee (issue #412), PR (#413) by Devin J. Pohly.
 
+- Added new class property `identifier` to all Unicode set classes in `pyparsing.unicode`,
+  using the class's values for `cls.identchars` and `cls.identbodychars`. Now Unicode-aware
+  parsers that formerly wrote:
+
+      ppu = pyparsing.unicode
+      ident = Word(ppu.Greek.identchars, ppu.Greek.identbodychars)
+
+  can now write:
+
+      ident = ppu.Greek.identifier
+      # or
+      # ident = ppu.Ελληνικά.identifier
+
 - Reworked `delimited_list` function into the new `DelimitedList` class.
   `DelimitedList` has the same constructor interface as `delimited_list`, and
   in this release, `delimited_list` changes from a function to a synonym for
diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py
index 18be13c..1557b60 100644
--- a/pyparsing/__init__.py
+++ b/pyparsing/__init__.py
@@ -121,7 +121,7 @@ class version_info(NamedTuple):
 
 
 __version_info__ = version_info(3, 0, 10, "final", 0)
-__version_time__ = "22 Dec 2022 08:16 UTC"
+__version_time__ = "13 Jan 2023 11:09 UTC"
 __version__ = __version_info__.__version__
 __versionTime__ = __version_time__
 __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
diff --git a/pyparsing/unicode.py b/pyparsing/unicode.py
index 9bc5e1d..b0a87b2 100644
--- a/pyparsing/unicode.py
+++ b/pyparsing/unicode.py
@@ -64,27 +64,27 @@ class unicode_set:
 
     @_lazyclassproperty
     def printables(cls):
-        "all non-whitespace characters in this range"
+        """all non-whitespace characters in this range"""
         return "".join(filterfalse(str.isspace, cls._chars_for_ranges))
 
     @_lazyclassproperty
     def alphas(cls):
-        "all alphabetic characters in this range"
+        """all alphabetic characters in this range"""
         return "".join(filter(str.isalpha, cls._chars_for_ranges))
 
     @_lazyclassproperty
     def nums(cls):
-        "all numeric digit characters in this range"
+        """all numeric digit characters in this range"""
         return "".join(filter(str.isdigit, cls._chars_for_ranges))
 
     @_lazyclassproperty
     def alphanums(cls):
-        "all alphanumeric characters in this range"
+        """all alphanumeric characters in this range"""
         return cls.alphas + cls.nums
 
     @_lazyclassproperty
     def identchars(cls):
-        "all characters in this range that are valid identifier characters, plus underscore '_'"
+        """all characters in this range that are valid identifier characters, plus underscore '_'"""
         return "".join(
             sorted(
                 set(
@@ -114,6 +114,16 @@ class unicode_set:
             )
         )
 
+    @_lazyclassproperty
+    def identifier(cls):
+        """
+        a pyparsing Word expression for an identifier using this range's definitions for
+        identchars and identbodychars
+        """
+        from pyparsing import Word
+
+        return Word(cls.identchars, cls.identbodychars)
+
 
 class pyparsing_unicode(unicode_set):
     """
@@ -128,32 +138,32 @@ class pyparsing_unicode(unicode_set):
     ]
 
     class BasicMultilingualPlane(unicode_set):
-        "Unicode set for the Basic Multilingual Plane"
+        """Unicode set for the Basic Multilingual Plane"""
         _ranges: UnicodeRangeList = [
             (0x0020, 0xFFFF),
         ]
 
     class Latin1(unicode_set):
-        "Unicode set for Latin-1 Unicode Character Range"
+        """Unicode set for Latin-1 Unicode Character Range"""
         _ranges: UnicodeRangeList = [
             (0x0020, 0x007E),
             (0x00A0, 0x00FF),
         ]
 
     class LatinA(unicode_set):
-        "Unicode set for Latin-A Unicode Character Range"
+        """Unicode set for Latin-A Unicode Character Range"""
         _ranges: UnicodeRangeList = [
             (0x0100, 0x017F),
         ]
 
     class LatinB(unicode_set):
-        "Unicode set for Latin-B Unicode Character Range"
+        """Unicode set for Latin-B Unicode Character Range"""
         _ranges: UnicodeRangeList = [
             (0x0180, 0x024F),
         ]
 
     class Greek(unicode_set):
-        "Unicode set for Greek Unicode Character Ranges"
+        """Unicode set for Greek Unicode Character Ranges"""
         _ranges: UnicodeRangeList = [
             (0x0342, 0x0345),
             (0x0370, 0x0377),
@@ -193,7 +203,7 @@ class pyparsing_unicode(unicode_set):
         ]
 
     class Cyrillic(unicode_set):
-        "Unicode set for Cyrillic Unicode Character Range"
+        """Unicode set for Cyrillic Unicode Character Range"""
         _ranges: UnicodeRangeList = [
             (0x0400, 0x052F),
             (0x1C80, 0x1C88),
@@ -206,7 +216,7 @@ class pyparsing_unicode(unicode_set):
         ]
 
     class Chinese(unicode_set):
-        "Unicode set for Chinese Unicode Character Range"
+        """Unicode set for Chinese Unicode Character Range"""
         _ranges: UnicodeRangeList = [
             (0x2E80, 0x2E99),
             (0x2E9B, 0x2EF3),
@@ -229,7 +239,7 @@ class pyparsing_unicode(unicode_set):
         ]
 
     class Japanese(unicode_set):
-        "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
+        """Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"""
 
         class Kanji(unicode_set):
             "Unicode set for Kanji Unicode Character Range"
@@ -239,7 +249,7 @@ class pyparsing_unicode(unicode_set):
             ]
 
         class Hiragana(unicode_set):
-            "Unicode set for Hiragana Unicode Character Range"
+            """Unicode set for Hiragana Unicode Character Range"""
             _ranges: UnicodeRangeList = [
                 (0x3041, 0x3096),
                 (0x3099, 0x30A0),
@@ -251,7 +261,7 @@ class pyparsing_unicode(unicode_set):
             ]
 
         class Katakana(unicode_set):
-            "Unicode set for Katakana  Unicode Character Range"
+            """Unicode set for Katakana  Unicode Character Range"""
             _ranges: UnicodeRangeList = [
                 (0x3099, 0x309C),
                 (0x30A0, 0x30FF),
@@ -275,7 +285,7 @@ class pyparsing_unicode(unicode_set):
         )
 
     class Hangul(unicode_set):
-        "Unicode set for Hangul (Korean) Unicode Character Range"
+        """Unicode set for Hangul (Korean) Unicode Character Range"""
         _ranges: UnicodeRangeList = [
             (0x1100, 0x11FF),
             (0x302E, 0x302F),
@@ -297,17 +307,17 @@ class pyparsing_unicode(unicode_set):
     Korean = Hangul
 
     class CJK(Chinese, Japanese, Hangul):
-        "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"
+        """Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"""
 
     class Thai(unicode_set):
-        "Unicode set for Thai Unicode Character Range"
+        """Unicode set for Thai Unicode Character Range"""
         _ranges: UnicodeRangeList = [
             (0x0E01, 0x0E3A),
             (0x0E3F, 0x0E5B)
         ]
 
     class Arabic(unicode_set):
-        "Unicode set for Arabic Unicode Character Range"
+        """Unicode set for Arabic Unicode Character Range"""
         _ranges: UnicodeRangeList = [
             (0x0600, 0x061B),
             (0x061E, 0x06FF),
@@ -315,7 +325,7 @@ class pyparsing_unicode(unicode_set):
         ]
 
     class Hebrew(unicode_set):
-        "Unicode set for Hebrew Unicode Character Range"
+        """Unicode set for Hebrew Unicode Character Range"""
         _ranges: UnicodeRangeList = [
             (0x0591, 0x05C7),
             (0x05D0, 0x05EA),
@@ -329,7 +339,7 @@ class pyparsing_unicode(unicode_set):
         ]
 
     class Devanagari(unicode_set):
-        "Unicode set for Devanagari Unicode Character Range"
+        """Unicode set for Devanagari Unicode Character Range"""
         _ranges: UnicodeRangeList = [
             (0x0900, 0x097F),
             (0xA8E0, 0xA8FF)
diff --git a/tests/test_unit.py b/tests/test_unit.py
index b50117b..34c2736 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -116,9 +116,7 @@ class TestCase(unittest.TestCase):
             yield
 
         if getattr(ar, "warning", None) is not None:
-            print(
-                f"Raised expected warning: {type(ar.warning).__name__}: {ar.warning}"
-            )
+            print(f"Raised expected warning: {type(ar.warning).__name__}: {ar.warning}")
         else:
             print(f"Expected {expected_warning_type.__name__} warning not raised")
         return ar
@@ -2931,7 +2929,10 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             with self.assertRaises(
                 TypeError, msg="ParserElement * (str, str) should raise error"
             ):
-                expr = pp.Word(pp.alphas)("first") + pp.Word(pp.nums)("second") * ("2", "3")
+                expr = pp.Word(pp.alphas)("first") + pp.Word(pp.nums)("second") * (
+                    "2",
+                    "3",
+                )
 
     def testParserElementMulByZero(self):
         alpwd = pp.Word(pp.alphas)
@@ -2956,12 +2957,16 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
 
         # ParserElement * str
         with self.subTest():
-            with self.assertRaises(TypeError, msg="ParserElement * str should raise error"):
+            with self.assertRaises(
+                TypeError, msg="ParserElement * str should raise error"
+            ):
                 expr = pp.Word(pp.alphas)("first") + pp.Word(pp.nums)("second") * "3"
 
         # str * ParserElement
         with self.subTest():
-            with self.assertRaises(TypeError, msg="str * ParserElement should raise error"):
+            with self.assertRaises(
+                TypeError, msg="str * ParserElement should raise error"
+            ):
                 expr = pp.Word(pp.alphas)("first") + "3" * pp.Word(pp.nums)("second")
 
         # ParserElement * int
@@ -8284,24 +8289,27 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             self.assertEqual("bool [, bool]...", str(bool_list2))
 
         with self.subTest():
-            street_address = pp.common.integer.set_name("integer") + pp.Word(pp.alphas)[1, ...].set_name("street_name")
+            street_address = pp.common.integer.set_name("integer") + pp.Word(pp.alphas)[
+                1, ...
+            ].set_name("street_name")
             self.assertEqual(
                 "{integer street_name} [, {integer street_name}]...",
-                str(pp.delimitedList(street_address))
+                str(pp.delimitedList(street_address)),
             )
 
         with self.subTest():
             operand = pp.Char(pp.alphas).set_name("var")
-            math = pp.infixNotation(operand,
-                                    [
-                                        (pp.one_of("+ -"), 2, pp.opAssoc.LEFT),
-                                    ])
+            math = pp.infixNotation(
+                operand,
+                [
+                    (pp.one_of("+ -"), 2, pp.opAssoc.LEFT),
+                ],
+            )
             self.assertEqual(
                 "Forward: + | - term [, Forward: + | - term]...",
-                str(pp.delimitedList(math))
+                str(pp.delimitedList(math)),
             )
 
-
     def testDelimitedListOfStrLiterals(self):
         expr = pp.delimitedList("ABC")
         print(str(expr))
@@ -8333,9 +8341,9 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
 
     def testDelimitedListParseActions1(self):
         # from issue #408
-        keyword = pp.Keyword('foobar')
+        keyword = pp.Keyword("foobar")
         untyped_identifier = ~keyword + pp.Word(pp.alphas)
-        dotted_vars = pp.delimited_list(untyped_identifier, delim='.')
+        dotted_vars = pp.delimited_list(untyped_identifier, delim=".")
         lvalue = pp.Opt(dotted_vars)
 
         # uncomment this line to see the problem
@@ -8344,27 +8352,29 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
         # stmt = pp.Opt(dotted_vars)
 
         def parse_identifier(toks):
-            print('YAY!', toks)
+            print("YAY!", toks)
 
         untyped_identifier.set_parse_action(parse_identifier)
 
         save_stdout = StringIO()
         with contextlib.redirect_stdout(save_stdout):
-            dotted_vars.parse_string('B.C')
+            dotted_vars.parse_string("B.C")
 
         self.assertEqual(
-            dedent("""\
+            dedent(
+                """\
                 YAY! ['B']
                 YAY! ['C']
-                """),
-            save_stdout.getvalue()
+                """
+            ),
+            save_stdout.getvalue(),
         )
 
     def testDelimitedListParseActions2(self):
         # from issue #408
-        keyword = pp.Keyword('foobar')
+        keyword = pp.Keyword("foobar")
         untyped_identifier = ~keyword + pp.Word(pp.alphas)
-        dotted_vars = pp.delimited_list(untyped_identifier, delim='.')
+        dotted_vars = pp.delimited_list(untyped_identifier, delim=".")
         lvalue = pp.Opt(dotted_vars)
 
         # uncomment this line to see the problem
@@ -8373,27 +8383,29 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
         # stmt = pp.Opt(dotted_vars)
 
         def parse_identifier(toks):
-            print('YAY!', toks)
+            print("YAY!", toks)
 
         untyped_identifier.set_parse_action(parse_identifier)
 
         save_stdout = StringIO()
         with contextlib.redirect_stdout(save_stdout):
-            dotted_vars.parse_string('B.C')
+            dotted_vars.parse_string("B.C")
 
         self.assertEqual(
-            dedent("""\
+            dedent(
+                """\
                 YAY! ['B']
                 YAY! ['C']
-                """),
-            save_stdout.getvalue()
+                """
+            ),
+            save_stdout.getvalue(),
         )
 
     def testDelimitedListParseActions3(self):
         # from issue #408
-        keyword = pp.Keyword('foobar')
+        keyword = pp.Keyword("foobar")
         untyped_identifier = ~keyword + pp.Word(pp.alphas)
-        dotted_vars = pp.delimited_list(untyped_identifier, delim='.')
+        dotted_vars = pp.delimited_list(untyped_identifier, delim=".")
         lvalue = pp.Opt(dotted_vars)
 
         # uncomment this line to see the problem
@@ -8402,20 +8414,22 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
         stmt = pp.Opt(dotted_vars)
 
         def parse_identifier(toks):
-            print('YAY!', toks)
+            print("YAY!", toks)
 
         untyped_identifier.set_parse_action(parse_identifier)
 
         save_stdout = StringIO()
         with contextlib.redirect_stdout(save_stdout):
-            dotted_vars.parse_string('B.C')
+            dotted_vars.parse_string("B.C")
 
         self.assertEqual(
-            dedent("""\
+            dedent(
+                """\
                 YAY! ['B']
                 YAY! ['C']
-                """),
-            save_stdout.getvalue()
+                """
+            ),
+            save_stdout.getvalue(),
         )
 
     def testEnableDebugOnNamedExpressions(self):
@@ -8667,11 +8681,14 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
 
     def testSetDebugRecursivelyWithForward(self):
         expr = pp.Word(pp.alphas).set_name("innermost")
-        contained = pp.infix_notation(expr, [
-            ('NOT', 1, pp.opAssoc.RIGHT),
-            ('AND', 2, pp.opAssoc.LEFT),
-            ('OR', 2, pp.opAssoc.LEFT),
-        ])
+        contained = pp.infix_notation(
+            expr,
+            [
+                ("NOT", 1, pp.opAssoc.RIGHT),
+                ("AND", 2, pp.opAssoc.LEFT),
+                ("OR", 2, pp.opAssoc.LEFT),
+            ],
+        )
 
         contained.set_debug(recurse=True)
         self.assertTrue(expr.debug)
@@ -8925,17 +8942,11 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
         ppu = pp.pyparsing_unicode
 
         latin_identifier = pp.Word(pp.identchars, pp.identbodychars)("latin*")
-        japanese_identifier = pp.Word(
-            ppu.Japanese.identchars, ppu.Japanese.identbodychars
-        )("japanese*")
-        cjk_identifier = pp.Word(ppu.CJK.identchars, ppu.CJK.identbodychars)("cjk*")
-        greek_identifier = pp.Word(ppu.Greek.identchars, ppu.Greek.identbodychars)(
-            "greek*"
-        )
-        cyrillic_identifier = pp.Word(
-            ppu.Cyrillic.identchars, ppu.Cyrillic.identbodychars
-        )("cyrillic*")
-        thai_identifier = pp.Word(ppu.Thai.identchars, ppu.Thai.identbodychars)("thai*")
+        japanese_identifier = ppu.Japanese.identifier("japanese*")
+        cjk_identifier = ppu.CJK.identifier("cjk*")
+        greek_identifier = ppu.Greek.identifier("greek*")
+        cyrillic_identifier = ppu.Cyrillic.identifier("cyrillic*")
+        thai_identifier = ppu.Thai.identifier("thai*")
         idents = (
             latin_identifier
             | japanese_identifier
@@ -9113,7 +9124,9 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
         def testValidation(grmr, gnam, isValid):
             try:
                 grmr.streamline()
-                with self.assertWarns(DeprecationWarning, msg="failed to warn validate() is deprecated"):
+                with self.assertWarns(
+                    DeprecationWarning, msg="failed to warn validate() is deprecated"
+                ):
                     grmr.validate()
                 self.assertTrue(isValid, "validate() accepted invalid grammar " + gnam)
             except pp.RecursiveGrammarException as rge:
author	ptmcg <ptmcg@austin.rr.com>	2023-01-13 05:24:37 -0600
committer	ptmcg <ptmcg@austin.rr.com>	2023-01-13 05:24:37 -0600
commit	cc94b5a6d608e7f25be15c4487cbab25f606e0d8 (patch)
tree	224ad44ae49d3527383287bb5b900ed896842ac5
parent	318ec7e3b945068d36f5c98b1de81003c773c6c4 (diff)
download	pyparsing-git-cc94b5a6d608e7f25be15c4487cbab25f606e0d8.tar.gz