diff options
author | Seth M Morton <seth.m.morton@gmail.com> | 2018-08-11 09:43:08 -0400 |
---|---|---|
committer | Seth M Morton <seth.m.morton@gmail.com> | 2018-08-11 09:43:08 -0400 |
commit | ef9f871304731ad4cfbe6c5cbf2bb9c93894229c (patch) | |
tree | e0ced386667926059353345aa94e4090a1f63b3e | |
parent | f70b3a761ffd5320644b10c03746b8cecae6ccad (diff) | |
download | natsort-ef9f871304731ad4cfbe6c5cbf2bb9c93894229c.tar.gz |
Refactor regex_chooser into a function.
Previously, many module-level regular expressions were created, then
accessed through a dictionary. The regular expressions are now
encapsulated in a class, and the user access the dictionary through
a function call which can better handle cleaning up the key accessor.
-rw-r--r-- | natsort/__main__.py | 4 | ||||
-rw-r--r-- | natsort/natsort.py | 4 | ||||
-rw-r--r-- | natsort/utils.py | 117 | ||||
-rw-r--r-- | test_natsort/test_natsort_key.py | 4 | ||||
-rw-r--r-- | test_natsort/test_parse_string_function.py | 60 | ||||
-rw-r--r-- | test_natsort/test_utils.py | 25 |
6 files changed, 125 insertions, 89 deletions
diff --git a/natsort/__main__.py b/natsort/__main__.py index 9b8dd70..a07297e 100644 --- a/natsort/__main__.py +++ b/natsort/__main__.py @@ -5,7 +5,7 @@ import sys import natsort from natsort.compat.py23 import py23_str -from natsort.utils import _regex_chooser +from natsort.utils import regex_chooser def main(): @@ -276,7 +276,7 @@ def sort_and_print_entries(entries, args): | natsort.ns.SIGNED * signed | natsort.ns.NOEXP * (not args.exp) ) - regex = _regex_chooser[inp_options] + regex = regex_chooser(inp_options) if args.filter is not None: lows, highs = ([f[0] for f in args.filter], [f[1] for f in args.filter]) entries = [ diff --git a/natsort/natsort.py b/natsort/natsort.py index 3cf2b91..d5a2a78 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -25,7 +25,7 @@ from natsort.utils import ( _parse_number_factory, _parse_path_factory, _parse_string_factory, - _regex_chooser, + regex_chooser, _string_component_transform_factory, ) @@ -197,7 +197,7 @@ def natsort_keygen(key=None, alg=0, **_kwargs): else: sep = natsort.compat.locale.null_string pre_sep = natsort.compat.locale.null_string - regex = _regex_chooser[alg & (ns.REAL | ns.NOEXP)] + regex = regex_chooser(alg) # Create the functions that will be used to split strings. input_transform = _input_string_transform_factory(alg) diff --git a/natsort/utils.py b/natsort/utils.py index 2ebccbf..cf8189a 100644 --- a/natsort/utils.py +++ b/natsort/utils.py @@ -69,40 +69,89 @@ from natsort.unicode_numbers import digits_no_decimals, numeric_no_decimals if PY_VERSION >= 3: long = int -# The regex that locates floats - include Unicode numerals. -_nnd = numeric_no_decimals -_exp = r"(?:[eE][-+]?\d+)?" -_num = r"(?:\d+\.?\d*|\.\d+)" -_float_sign_exp_re = r"([-+]?{0}{1}|[{2}])" -_float_sign_exp_re = _float_sign_exp_re.format(_num, _exp, _nnd) -_float_sign_exp_re = re.compile(_float_sign_exp_re, flags=re.U) -_float_nosign_exp_re = r"({0}{1}|[{2}])" -_float_nosign_exp_re = _float_nosign_exp_re.format(_num, _exp, _nnd) -_float_nosign_exp_re = re.compile(_float_nosign_exp_re, flags=re.U) -_float_sign_noexp_re = r"([-+]?{0}|[{1}])" -_float_sign_noexp_re = _float_sign_noexp_re.format(_num, _nnd) -_float_sign_noexp_re = re.compile(_float_sign_noexp_re, flags=re.U) -_float_nosign_noexp_re = r"({0}|[{1}])" -_float_nosign_noexp_re = _float_nosign_noexp_re.format(_num, _nnd) -_float_nosign_noexp_re = re.compile(_float_nosign_noexp_re, flags=re.U) - -# Integer regexes - include Unicode digits. -_int_nosign_re = r"(\d+|[{0}])".format(digits_no_decimals) -_int_nosign_re = re.compile(_int_nosign_re, flags=re.U) -_int_sign_re = r"([-+]?\d+|[{0}])".format(digits_no_decimals) -_int_sign_re = re.compile(_int_sign_re, flags=re.U) - -# This dict will help select the correct regex and number conversion function. -_regex_chooser = { - (ns.F | ns.S): _float_sign_exp_re, - (ns.F | ns.S | ns.N): _float_sign_noexp_re, - (ns.F | ns.U): _float_nosign_exp_re, - (ns.F | ns.U | ns.N): _float_nosign_noexp_re, - (ns.I | ns.S): _int_sign_re, - (ns.I | ns.S | ns.N): _int_sign_re, - (ns.I | ns.U): _int_nosign_re, - (ns.I | ns.U | ns.N): _int_nosign_re, -} + +class NumericalRegularExpressions(object): + """ + Container of regular expressions that match numbers. + + The numbers also account for unicode non-decimal characters. + + Not intended to be made an instance - use class methods only. + """ + + # All unicode numeric characters (minus the decimal characters). + numeric = numeric_no_decimals + # All unicode digit characters (minus the decimal characters). + digits = digits_no_decimals + # Regular expression to match exponential component of a float. + exp = r"(?:[eE][-+]?\d+)?" + # Regular expression to match a floating point number. + float_num = r"(?:\d+\.?\d*|\.\d+)" + + @classmethod + def _construct_regex(cls, fmt): + """Given a format string, construct the regex with class attributes.""" + return re.compile(fmt.format(**vars(cls)), flags=re.U) + + @classmethod + def int_sign(cls): + """Regular expression to match a signed int.""" + return cls._construct_regex(r"([-+]?\d+|[{digits}])") + + @classmethod + def int_nosign(cls): + """Regular expression to match an unsigned int.""" + return cls._construct_regex(r"(\d+|[{digits}])") + + @classmethod + def float_sign_exp(cls): + """Regular expression to match a signed float with exponent.""" + return cls._construct_regex(r"([-+]?{float_num}{exp}|[{numeric}])") + + @classmethod + def float_nosign_exp(cls): + """Regular expression to match an unsigned float with exponent.""" + return cls._construct_regex(r"({float_num}{exp}|[{numeric}])") + + @classmethod + def float_sign_noexp(cls): + """Regular expression to match a signed float without exponent.""" + return cls._construct_regex(r"([-+]?{float_num}|[{numeric}])") + + @classmethod + def float_nosign_noexp(cls): + """Regular expression to match an unsigned float without exponent.""" + return cls._construct_regex(r"({float_num}|[{numeric}])") + + +def regex_chooser(alg): + """ + Select an appropriate regex for the type of number of interest. + + Parameters + ---------- + alg : ns enum + Used to indicate the regular expression to select. + + Returns + ------- + regex : compiled regex object + Regular expression object that matches the desired number type. + + """ + if alg & ns.FLOAT: + alg &= ns.FLOAT | ns.SIGNED | ns.NOEXP + else: + alg &= ns.INT | ns.SIGNED + + return { + ns.INT: NumericalRegularExpressions.int_nosign(), + ns.FLOAT: NumericalRegularExpressions.float_nosign_exp(), + ns.INT | ns.SIGNED: NumericalRegularExpressions.int_sign(), + ns.FLOAT | ns.SIGNED: NumericalRegularExpressions.float_sign_exp(), + ns.FLOAT | ns.NOEXP: NumericalRegularExpressions.float_nosign_noexp(), + ns.FLOAT | ns.SIGNED | ns.NOEXP: NumericalRegularExpressions.float_sign_noexp(), + }[alg] def _no_op(x): diff --git a/test_natsort/test_natsort_key.py b/test_natsort/test_natsort_key.py index 55e0b58..35ee3bd 100644 --- a/test_natsort/test_natsort_key.py +++ b/test_natsort/test_natsort_key.py @@ -15,7 +15,7 @@ from natsort.utils import ( _parse_number_factory, _parse_path_factory, _parse_string_factory, - _regex_chooser, + regex_chooser, _string_component_transform_factory, ) @@ -23,7 +23,7 @@ if PY_VERSION >= 3: long = int -regex = _regex_chooser[ns.INT] +regex = regex_chooser(ns.INT) pre = _input_string_transform_factory(ns.INT) post = _string_component_transform_factory(ns.INT) after = _final_data_transform_factory(ns.INT, "", "") diff --git a/test_natsort/test_parse_string_function.py b/test_natsort/test_parse_string_function.py index 3737942..5cecd7b 100644 --- a/test_natsort/test_parse_string_function.py +++ b/test_natsort/test_parse_string_function.py @@ -7,16 +7,8 @@ from hypothesis.strategies import floats, integers, lists, text from natsort.compat.fastnumbers import fast_float, fast_int from natsort.compat.py23 import PY_VERSION, py23_str from natsort.ns_enum import ns, ns_DUMB -from natsort.utils import ( - _float_nosign_exp_re, - _float_nosign_noexp_re, - _float_sign_exp_re, - _float_sign_noexp_re, - _int_nosign_re, - _int_sign_re, - _parse_path_factory, - _parse_string_factory, -) +from natsort.utils import NumericalRegularExpressions as nre +from natsort.utils import _parse_path_factory, _parse_string_factory from pytest import raises from slow_splitters import float_splitter, int_splitter @@ -53,7 +45,7 @@ def tuple2(x, dummy): def test_parse_string_factory_raises_TypeError_if_given_a_number_example(): with raises(TypeError): assert _parse_string_factory( - 0, "", _float_sign_exp_re.split, no_op, fast_float, tuple2 + 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2 )(50.0) @@ -61,14 +53,14 @@ def test_parse_string_factory_raises_TypeError_if_given_a_number_example(): def test_parse_string_factory_raises_TypeError_if_given_a_number(x): with raises(TypeError): assert _parse_string_factory( - 0, "", _float_sign_exp_re.split, no_op, fast_float, tuple2 + 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2 )(x) def test_parse_string_factory_only_parses_digits_with_nosign_int_example(): - assert _parse_string_factory(0, "", _int_nosign_re.split, no_op, fast_int, tuple2)( - "a5+5.034e-1" - ) == ("a", 5, "+", 5, ".", 34, "e-", 1) + assert _parse_string_factory( + 0, "", nre.int_nosign().split, no_op, fast_int, tuple2 + )("a5+5.034e-1") == ("a", 5, "+", 5, ".", 34, "e-", 1) @given( @@ -87,13 +79,13 @@ def test_parse_string_factory_only_parses_digits_with_nosign_int_example(): ) def test_parse_string_factory_only_parses_digits_with_nosign_int(x): s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _parse_string_factory(0, "", _int_nosign_re.split, no_op, fast_int, tuple2)( - s - ) == int_splitter(s, False, "") + assert _parse_string_factory( + 0, "", nre.int_nosign().split, no_op, fast_int, tuple2 + )(s) == int_splitter(s, False, "") def test_parse_string_factory_parses_digit_with_sign_with_signed_int_example(): - assert _parse_string_factory(0, "", _int_sign_re.split, no_op, fast_int, tuple2)( + assert _parse_string_factory(0, "", nre.int_sign().split, no_op, fast_int, tuple2)( "a5+5.034e-1" ) == ("a", 5, "", 5, ".", 34, "e", -1) @@ -107,14 +99,14 @@ def test_parse_string_factory_parses_digit_with_sign_with_signed_int_example(): ) def test_parse_string_factory_parses_digit_with_sign_with_signed_int(x): s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _parse_string_factory(0, "", _int_sign_re.split, no_op, fast_int, tuple2)( + assert _parse_string_factory(0, "", nre.int_sign().split, no_op, fast_int, tuple2)( s ) == int_splitter(s, True, "") def test_parse_string_factory_only_parses_float_with_nosign_noexp_float_example(): assert _parse_string_factory( - 0, "", _float_nosign_noexp_re.split, no_op, fast_float, tuple2 + 0, "", nre.float_nosign_noexp().split, no_op, fast_float, tuple2 )("a5+5.034e-1") == ("a", 5.0, "+", 5.034, "e-", 1.0) @@ -128,13 +120,13 @@ def test_parse_string_factory_only_parses_float_with_nosign_noexp_float_example( def test_parse_string_factory_only_parses_float_with_nosign_noexp_float(x): s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x) assert _parse_string_factory( - 0, "", _float_nosign_noexp_re.split, no_op, fast_float, tuple2 + 0, "", nre.float_nosign_noexp().split, no_op, fast_float, tuple2 )(s) == float_splitter(s, False, False, "") def test_parse_string_factory_only_parses_float_with_exponent_with_nosign_exp_float_example(): assert _parse_string_factory( - 0, "", _float_nosign_exp_re.split, no_op, fast_float, tuple2 + 0, "", nre.float_nosign_exp().split, no_op, fast_float, tuple2 )("a5+5.034e-1") == ("a", 5.0, "+", 0.5034) @@ -148,13 +140,13 @@ def test_parse_string_factory_only_parses_float_with_exponent_with_nosign_exp_fl def test_parse_string_factory_only_parses_float_with_exponent_with_nosign_exp_float(x): s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x) assert _parse_string_factory( - 0, "", _float_nosign_exp_re.split, no_op, fast_float, tuple2 + 0, "", nre.float_nosign_exp().split, no_op, fast_float, tuple2 )(s) == float_splitter(s, False, True, "") def test_parse_string_factory_only_parses_float_with_sign_with_sign_noexp_float_example(): assert _parse_string_factory( - 0, "", _float_sign_noexp_re.split, no_op, fast_float, tuple2 + 0, "", nre.float_sign_noexp().split, no_op, fast_float, tuple2 )("a5+5.034e-1") == ("a", 5.0, "", 5.034, "e", -1.0) @@ -168,16 +160,16 @@ def test_parse_string_factory_only_parses_float_with_sign_with_sign_noexp_float_ def test_parse_string_factory_only_parses_float_with_sign_with_sign_noexp_float(x): s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x) assert _parse_string_factory( - 0, "", _float_sign_noexp_re.split, no_op, fast_float, tuple2 + 0, "", nre.float_sign_noexp().split, no_op, fast_float, tuple2 )(s) == float_splitter(s, True, False, "") def test_parse_string_factory_parses_float_with_sign_exp_float_example(): assert _parse_string_factory( - 0, "", _float_sign_exp_re.split, no_op, fast_float, tuple2 + 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2 )("a5+5.034e-1") == ("a", 5.0, "", 0.5034) assert _parse_string_factory( - 0, "", _float_sign_exp_re.split, no_op, fast_float, tuple2 + 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2 )("6a5+5.034e-1") == ("", 6.0, "a", 5.0, "", 0.5034) @@ -191,7 +183,7 @@ def test_parse_string_factory_parses_float_with_sign_exp_float_example(): def test_parse_string_factory_parses_float_with_sign_exp_float(x): s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x) assert _parse_string_factory( - 0, "", _float_sign_exp_re.split, no_op, fast_float, tuple2 + 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2 )(s) == float_splitter(s, True, True, "") @@ -201,18 +193,18 @@ def test_parse_string_factory_selects_pre_function_value_if_not_dumb(): return (orig[0], tuple(x)) assert _parse_string_factory( - 0, "", _int_nosign_re.split, py23_str.upper, fast_float, tuple2 + 0, "", nre.int_nosign().split, py23_str.upper, fast_float, tuple2 )("a5+5.034e-1") == ("A", ("A", 5, "+", 5, ".", 34, "E-", 1)) assert _parse_string_factory( - ns_DUMB, "", _int_nosign_re.split, py23_str.upper, fast_float, tuple2 + ns_DUMB, "", nre.int_nosign().split, py23_str.upper, fast_float, tuple2 )("a5+5.034e-1") == ("A", ("A", 5, "+", 5, ".", 34, "E-", 1)) assert _parse_string_factory( - ns.LOCALE, "", _int_nosign_re.split, py23_str.upper, fast_float, tuple2 + ns.LOCALE, "", nre.int_nosign().split, py23_str.upper, fast_float, tuple2 )("a5+5.034e-1") == ("A", ("A", 5, "+", 5, ".", 34, "E-", 1)) assert _parse_string_factory( ns.LOCALE | ns_DUMB, "", - _int_nosign_re.split, + nre.int_nosign().split, py23_str.upper, fast_float, tuple2, @@ -221,7 +213,7 @@ def test_parse_string_factory_selects_pre_function_value_if_not_dumb(): def test_parse_path_function_parses_string_as_path_then_as_string(): splt = _parse_string_factory( - 0, "", _float_sign_exp_re.split, no_op, fast_float, tuple2 + 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2 ) assert _parse_path_factory(splt)("/p/Folder (10)/file34.5nm (2).tar.gz") == ( ("/",), diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py index 1dcdb4e..b729adf 100644 --- a/test_natsort/test_utils.py +++ b/test_natsort/test_utils.py @@ -15,15 +15,10 @@ from natsort.ns_enum import ns from natsort.utils import ( _args_to_enum, _do_decoding, - _float_nosign_exp_re, - _float_nosign_noexp_re, - _float_sign_exp_re, - _float_sign_noexp_re, _groupletters, - _int_nosign_re, - _int_sign_re, _path_splitter, - _regex_chooser, + NumericalRegularExpressions, + regex_chooser, _sep_inserter, chain_functions, ) @@ -105,14 +100,14 @@ def test_args_to_enum_converts_None_to_ns_IU(): def test_regex_chooser_returns_correct_regular_expression_object(): - assert _regex_chooser[ns.INT] is _int_nosign_re - assert _regex_chooser[ns.INT | ns.NOEXP] is _int_nosign_re - assert _regex_chooser[ns.INT | ns.SIGNED] is _int_sign_re - assert _regex_chooser[ns.INT | ns.SIGNED | ns.NOEXP] is _int_sign_re - assert _regex_chooser[ns.FLOAT] is _float_nosign_exp_re - assert _regex_chooser[ns.FLOAT | ns.NOEXP] is _float_nosign_noexp_re - assert _regex_chooser[ns.FLOAT | ns.SIGNED] is _float_sign_exp_re - assert _regex_chooser[ns.FLOAT | ns.SIGNED | ns.NOEXP] is _float_sign_noexp_re + assert regex_chooser(ns.INT) is NumericalRegularExpressions.int_nosign() + assert regex_chooser(ns.INT | ns.NOEXP) is NumericalRegularExpressions.int_nosign() + assert regex_chooser(ns.INT | ns.SIGNED) is NumericalRegularExpressions.int_sign() + assert regex_chooser(ns.INT | ns.SIGNED | ns.NOEXP) is NumericalRegularExpressions.int_sign() + assert regex_chooser(ns.FLOAT) is NumericalRegularExpressions.float_nosign_exp() + assert regex_chooser(ns.FLOAT | ns.NOEXP) is NumericalRegularExpressions.float_nosign_noexp() + assert regex_chooser(ns.FLOAT | ns.SIGNED) is NumericalRegularExpressions.float_sign_exp() + assert regex_chooser(ns.FLOAT | ns.SIGNED | ns.NOEXP) is NumericalRegularExpressions.float_sign_noexp() def test_ns_enum_values_have_are_as_expected(): |