diff options
-rwxr-xr-x | dev/generate_new_unicode_numbers.py | 45 | ||||
-rw-r--r-- | natsort/unicode_numeric_hex.py | 17 |
2 files changed, 45 insertions, 17 deletions
diff --git a/dev/generate_new_unicode_numbers.py b/dev/generate_new_unicode_numbers.py new file mode 100755 index 0000000..c59f4e4 --- /dev/null +++ b/dev/generate_new_unicode_numbers.py @@ -0,0 +1,45 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- +""" +Generate the numeric hex list of unicode numerals +""" +import os +import os.path +import sys +import unicodedata + +# This is intended to be called from project root. Enforce this. +this_file = os.path.abspath(__file__) +this_base = os.path.basename(this_file) +cwd = os.path.abspath(os.getcwd()) +desired_this_file = os.path.join(cwd, "dev", this_base) +if this_file != desired_this_file: + sys.exit(f"{this_base} must be called from project root") + +# We will write the new numeric hex collection to a natsort package file. +target = os.path.join(cwd, "natsort", "unicode_numeric_hex.py") +with open(target, "w") as fl: + print( + '''# -*- coding: utf-8 -*- +""" +Contains all possible non-ASCII unicode numbers. +""" + +# Rather than determine what unicode characters are numeric on the fly which +# would incur a startup runtime penalty, the hex values are hard-coded below. +numeric_hex = (''', + file=fl, + ) + + # Write out each individual hex value. + for i in range(0x110000): + try: + a = chr(i) + except ValueError: + break + if a in "0123456789": + continue + if unicodedata.numeric(a, None) is not None: + print(f" 0x{i:X},", file=fl) + + print(")", file=fl) diff --git a/natsort/unicode_numeric_hex.py b/natsort/unicode_numeric_hex.py index ba5841b..c1e789d 100644 --- a/natsort/unicode_numeric_hex.py +++ b/natsort/unicode_numeric_hex.py @@ -1859,20 +1859,3 @@ numeric_hex = ( 0x2626D, 0x2F890, ) - -# Some code that can be used to create the above list of hex numbers. -if __name__ == "__main__": - import unicodedata - - hex_chars = [] - for i in range(0x110000): - try: - a = chr(i) - except ValueError: - break - if a in "0123456789": - continue - if unicodedata.numeric(a, None) is not None: - hex_chars.append(i) - - print(", ".join(["0X{:X}".format(i) for i in hex_chars])) |