summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xdev/generate_new_unicode_numbers.py45
-rw-r--r--natsort/unicode_numeric_hex.py17
2 files changed, 45 insertions, 17 deletions
diff --git a/dev/generate_new_unicode_numbers.py b/dev/generate_new_unicode_numbers.py
new file mode 100755
index 0000000..c59f4e4
--- /dev/null
+++ b/dev/generate_new_unicode_numbers.py
@@ -0,0 +1,45 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Generate the numeric hex list of unicode numerals
+"""
+import os
+import os.path
+import sys
+import unicodedata
+
+# This is intended to be called from project root. Enforce this.
+this_file = os.path.abspath(__file__)
+this_base = os.path.basename(this_file)
+cwd = os.path.abspath(os.getcwd())
+desired_this_file = os.path.join(cwd, "dev", this_base)
+if this_file != desired_this_file:
+ sys.exit(f"{this_base} must be called from project root")
+
+# We will write the new numeric hex collection to a natsort package file.
+target = os.path.join(cwd, "natsort", "unicode_numeric_hex.py")
+with open(target, "w") as fl:
+ print(
+ '''# -*- coding: utf-8 -*-
+"""
+Contains all possible non-ASCII unicode numbers.
+"""
+
+# Rather than determine what unicode characters are numeric on the fly which
+# would incur a startup runtime penalty, the hex values are hard-coded below.
+numeric_hex = (''',
+ file=fl,
+ )
+
+ # Write out each individual hex value.
+ for i in range(0x110000):
+ try:
+ a = chr(i)
+ except ValueError:
+ break
+ if a in "0123456789":
+ continue
+ if unicodedata.numeric(a, None) is not None:
+ print(f" 0x{i:X},", file=fl)
+
+ print(")", file=fl)
diff --git a/natsort/unicode_numeric_hex.py b/natsort/unicode_numeric_hex.py
index ba5841b..c1e789d 100644
--- a/natsort/unicode_numeric_hex.py
+++ b/natsort/unicode_numeric_hex.py
@@ -1859,20 +1859,3 @@ numeric_hex = (
0x2626D,
0x2F890,
)
-
-# Some code that can be used to create the above list of hex numbers.
-if __name__ == "__main__":
- import unicodedata
-
- hex_chars = []
- for i in range(0x110000):
- try:
- a = chr(i)
- except ValueError:
- break
- if a in "0123456789":
- continue
- if unicodedata.numeric(a, None) is not None:
- hex_chars.append(i)
-
- print(", ".join(["0X{:X}".format(i) for i in hex_chars]))