summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth M Morton <seth.m.morton@gmail.com>2018-08-11 09:43:08 -0400
committerSeth M Morton <seth.m.morton@gmail.com>2018-08-11 09:43:08 -0400
commitef9f871304731ad4cfbe6c5cbf2bb9c93894229c (patch)
treee0ced386667926059353345aa94e4090a1f63b3e
parentf70b3a761ffd5320644b10c03746b8cecae6ccad (diff)
downloadnatsort-ef9f871304731ad4cfbe6c5cbf2bb9c93894229c.tar.gz
Refactor regex_chooser into a function.
Previously, many module-level regular expressions were created, then accessed through a dictionary. The regular expressions are now encapsulated in a class, and the user access the dictionary through a function call which can better handle cleaning up the key accessor.
-rw-r--r--natsort/__main__.py4
-rw-r--r--natsort/natsort.py4
-rw-r--r--natsort/utils.py117
-rw-r--r--test_natsort/test_natsort_key.py4
-rw-r--r--test_natsort/test_parse_string_function.py60
-rw-r--r--test_natsort/test_utils.py25
6 files changed, 125 insertions, 89 deletions
diff --git a/natsort/__main__.py b/natsort/__main__.py
index 9b8dd70..a07297e 100644
--- a/natsort/__main__.py
+++ b/natsort/__main__.py
@@ -5,7 +5,7 @@ import sys
import natsort
from natsort.compat.py23 import py23_str
-from natsort.utils import _regex_chooser
+from natsort.utils import regex_chooser
def main():
@@ -276,7 +276,7 @@ def sort_and_print_entries(entries, args):
| natsort.ns.SIGNED * signed
| natsort.ns.NOEXP * (not args.exp)
)
- regex = _regex_chooser[inp_options]
+ regex = regex_chooser(inp_options)
if args.filter is not None:
lows, highs = ([f[0] for f in args.filter], [f[1] for f in args.filter])
entries = [
diff --git a/natsort/natsort.py b/natsort/natsort.py
index 3cf2b91..d5a2a78 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -25,7 +25,7 @@ from natsort.utils import (
_parse_number_factory,
_parse_path_factory,
_parse_string_factory,
- _regex_chooser,
+ regex_chooser,
_string_component_transform_factory,
)
@@ -197,7 +197,7 @@ def natsort_keygen(key=None, alg=0, **_kwargs):
else:
sep = natsort.compat.locale.null_string
pre_sep = natsort.compat.locale.null_string
- regex = _regex_chooser[alg & (ns.REAL | ns.NOEXP)]
+ regex = regex_chooser(alg)
# Create the functions that will be used to split strings.
input_transform = _input_string_transform_factory(alg)
diff --git a/natsort/utils.py b/natsort/utils.py
index 2ebccbf..cf8189a 100644
--- a/natsort/utils.py
+++ b/natsort/utils.py
@@ -69,40 +69,89 @@ from natsort.unicode_numbers import digits_no_decimals, numeric_no_decimals
if PY_VERSION >= 3:
long = int
-# The regex that locates floats - include Unicode numerals.
-_nnd = numeric_no_decimals
-_exp = r"(?:[eE][-+]?\d+)?"
-_num = r"(?:\d+\.?\d*|\.\d+)"
-_float_sign_exp_re = r"([-+]?{0}{1}|[{2}])"
-_float_sign_exp_re = _float_sign_exp_re.format(_num, _exp, _nnd)
-_float_sign_exp_re = re.compile(_float_sign_exp_re, flags=re.U)
-_float_nosign_exp_re = r"({0}{1}|[{2}])"
-_float_nosign_exp_re = _float_nosign_exp_re.format(_num, _exp, _nnd)
-_float_nosign_exp_re = re.compile(_float_nosign_exp_re, flags=re.U)
-_float_sign_noexp_re = r"([-+]?{0}|[{1}])"
-_float_sign_noexp_re = _float_sign_noexp_re.format(_num, _nnd)
-_float_sign_noexp_re = re.compile(_float_sign_noexp_re, flags=re.U)
-_float_nosign_noexp_re = r"({0}|[{1}])"
-_float_nosign_noexp_re = _float_nosign_noexp_re.format(_num, _nnd)
-_float_nosign_noexp_re = re.compile(_float_nosign_noexp_re, flags=re.U)
-
-# Integer regexes - include Unicode digits.
-_int_nosign_re = r"(\d+|[{0}])".format(digits_no_decimals)
-_int_nosign_re = re.compile(_int_nosign_re, flags=re.U)
-_int_sign_re = r"([-+]?\d+|[{0}])".format(digits_no_decimals)
-_int_sign_re = re.compile(_int_sign_re, flags=re.U)
-
-# This dict will help select the correct regex and number conversion function.
-_regex_chooser = {
- (ns.F | ns.S): _float_sign_exp_re,
- (ns.F | ns.S | ns.N): _float_sign_noexp_re,
- (ns.F | ns.U): _float_nosign_exp_re,
- (ns.F | ns.U | ns.N): _float_nosign_noexp_re,
- (ns.I | ns.S): _int_sign_re,
- (ns.I | ns.S | ns.N): _int_sign_re,
- (ns.I | ns.U): _int_nosign_re,
- (ns.I | ns.U | ns.N): _int_nosign_re,
-}
+
+class NumericalRegularExpressions(object):
+ """
+ Container of regular expressions that match numbers.
+
+ The numbers also account for unicode non-decimal characters.
+
+ Not intended to be made an instance - use class methods only.
+ """
+
+ # All unicode numeric characters (minus the decimal characters).
+ numeric = numeric_no_decimals
+ # All unicode digit characters (minus the decimal characters).
+ digits = digits_no_decimals
+ # Regular expression to match exponential component of a float.
+ exp = r"(?:[eE][-+]?\d+)?"
+ # Regular expression to match a floating point number.
+ float_num = r"(?:\d+\.?\d*|\.\d+)"
+
+ @classmethod
+ def _construct_regex(cls, fmt):
+ """Given a format string, construct the regex with class attributes."""
+ return re.compile(fmt.format(**vars(cls)), flags=re.U)
+
+ @classmethod
+ def int_sign(cls):
+ """Regular expression to match a signed int."""
+ return cls._construct_regex(r"([-+]?\d+|[{digits}])")
+
+ @classmethod
+ def int_nosign(cls):
+ """Regular expression to match an unsigned int."""
+ return cls._construct_regex(r"(\d+|[{digits}])")
+
+ @classmethod
+ def float_sign_exp(cls):
+ """Regular expression to match a signed float with exponent."""
+ return cls._construct_regex(r"([-+]?{float_num}{exp}|[{numeric}])")
+
+ @classmethod
+ def float_nosign_exp(cls):
+ """Regular expression to match an unsigned float with exponent."""
+ return cls._construct_regex(r"({float_num}{exp}|[{numeric}])")
+
+ @classmethod
+ def float_sign_noexp(cls):
+ """Regular expression to match a signed float without exponent."""
+ return cls._construct_regex(r"([-+]?{float_num}|[{numeric}])")
+
+ @classmethod
+ def float_nosign_noexp(cls):
+ """Regular expression to match an unsigned float without exponent."""
+ return cls._construct_regex(r"({float_num}|[{numeric}])")
+
+
+def regex_chooser(alg):
+ """
+ Select an appropriate regex for the type of number of interest.
+
+ Parameters
+ ----------
+ alg : ns enum
+ Used to indicate the regular expression to select.
+
+ Returns
+ -------
+ regex : compiled regex object
+ Regular expression object that matches the desired number type.
+
+ """
+ if alg & ns.FLOAT:
+ alg &= ns.FLOAT | ns.SIGNED | ns.NOEXP
+ else:
+ alg &= ns.INT | ns.SIGNED
+
+ return {
+ ns.INT: NumericalRegularExpressions.int_nosign(),
+ ns.FLOAT: NumericalRegularExpressions.float_nosign_exp(),
+ ns.INT | ns.SIGNED: NumericalRegularExpressions.int_sign(),
+ ns.FLOAT | ns.SIGNED: NumericalRegularExpressions.float_sign_exp(),
+ ns.FLOAT | ns.NOEXP: NumericalRegularExpressions.float_nosign_noexp(),
+ ns.FLOAT | ns.SIGNED | ns.NOEXP: NumericalRegularExpressions.float_sign_noexp(),
+ }[alg]
def _no_op(x):
diff --git a/test_natsort/test_natsort_key.py b/test_natsort/test_natsort_key.py
index 55e0b58..35ee3bd 100644
--- a/test_natsort/test_natsort_key.py
+++ b/test_natsort/test_natsort_key.py
@@ -15,7 +15,7 @@ from natsort.utils import (
_parse_number_factory,
_parse_path_factory,
_parse_string_factory,
- _regex_chooser,
+ regex_chooser,
_string_component_transform_factory,
)
@@ -23,7 +23,7 @@ if PY_VERSION >= 3:
long = int
-regex = _regex_chooser[ns.INT]
+regex = regex_chooser(ns.INT)
pre = _input_string_transform_factory(ns.INT)
post = _string_component_transform_factory(ns.INT)
after = _final_data_transform_factory(ns.INT, "", "")
diff --git a/test_natsort/test_parse_string_function.py b/test_natsort/test_parse_string_function.py
index 3737942..5cecd7b 100644
--- a/test_natsort/test_parse_string_function.py
+++ b/test_natsort/test_parse_string_function.py
@@ -7,16 +7,8 @@ from hypothesis.strategies import floats, integers, lists, text
from natsort.compat.fastnumbers import fast_float, fast_int
from natsort.compat.py23 import PY_VERSION, py23_str
from natsort.ns_enum import ns, ns_DUMB
-from natsort.utils import (
- _float_nosign_exp_re,
- _float_nosign_noexp_re,
- _float_sign_exp_re,
- _float_sign_noexp_re,
- _int_nosign_re,
- _int_sign_re,
- _parse_path_factory,
- _parse_string_factory,
-)
+from natsort.utils import NumericalRegularExpressions as nre
+from natsort.utils import _parse_path_factory, _parse_string_factory
from pytest import raises
from slow_splitters import float_splitter, int_splitter
@@ -53,7 +45,7 @@ def tuple2(x, dummy):
def test_parse_string_factory_raises_TypeError_if_given_a_number_example():
with raises(TypeError):
assert _parse_string_factory(
- 0, "", _float_sign_exp_re.split, no_op, fast_float, tuple2
+ 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2
)(50.0)
@@ -61,14 +53,14 @@ def test_parse_string_factory_raises_TypeError_if_given_a_number_example():
def test_parse_string_factory_raises_TypeError_if_given_a_number(x):
with raises(TypeError):
assert _parse_string_factory(
- 0, "", _float_sign_exp_re.split, no_op, fast_float, tuple2
+ 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2
)(x)
def test_parse_string_factory_only_parses_digits_with_nosign_int_example():
- assert _parse_string_factory(0, "", _int_nosign_re.split, no_op, fast_int, tuple2)(
- "a5+5.034e-1"
- ) == ("a", 5, "+", 5, ".", 34, "e-", 1)
+ assert _parse_string_factory(
+ 0, "", nre.int_nosign().split, no_op, fast_int, tuple2
+ )("a5+5.034e-1") == ("a", 5, "+", 5, ".", 34, "e-", 1)
@given(
@@ -87,13 +79,13 @@ def test_parse_string_factory_only_parses_digits_with_nosign_int_example():
)
def test_parse_string_factory_only_parses_digits_with_nosign_int(x):
s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _parse_string_factory(0, "", _int_nosign_re.split, no_op, fast_int, tuple2)(
- s
- ) == int_splitter(s, False, "")
+ assert _parse_string_factory(
+ 0, "", nre.int_nosign().split, no_op, fast_int, tuple2
+ )(s) == int_splitter(s, False, "")
def test_parse_string_factory_parses_digit_with_sign_with_signed_int_example():
- assert _parse_string_factory(0, "", _int_sign_re.split, no_op, fast_int, tuple2)(
+ assert _parse_string_factory(0, "", nre.int_sign().split, no_op, fast_int, tuple2)(
"a5+5.034e-1"
) == ("a", 5, "", 5, ".", 34, "e", -1)
@@ -107,14 +99,14 @@ def test_parse_string_factory_parses_digit_with_sign_with_signed_int_example():
)
def test_parse_string_factory_parses_digit_with_sign_with_signed_int(x):
s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _parse_string_factory(0, "", _int_sign_re.split, no_op, fast_int, tuple2)(
+ assert _parse_string_factory(0, "", nre.int_sign().split, no_op, fast_int, tuple2)(
s
) == int_splitter(s, True, "")
def test_parse_string_factory_only_parses_float_with_nosign_noexp_float_example():
assert _parse_string_factory(
- 0, "", _float_nosign_noexp_re.split, no_op, fast_float, tuple2
+ 0, "", nre.float_nosign_noexp().split, no_op, fast_float, tuple2
)("a5+5.034e-1") == ("a", 5.0, "+", 5.034, "e-", 1.0)
@@ -128,13 +120,13 @@ def test_parse_string_factory_only_parses_float_with_nosign_noexp_float_example(
def test_parse_string_factory_only_parses_float_with_nosign_noexp_float(x):
s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x)
assert _parse_string_factory(
- 0, "", _float_nosign_noexp_re.split, no_op, fast_float, tuple2
+ 0, "", nre.float_nosign_noexp().split, no_op, fast_float, tuple2
)(s) == float_splitter(s, False, False, "")
def test_parse_string_factory_only_parses_float_with_exponent_with_nosign_exp_float_example():
assert _parse_string_factory(
- 0, "", _float_nosign_exp_re.split, no_op, fast_float, tuple2
+ 0, "", nre.float_nosign_exp().split, no_op, fast_float, tuple2
)("a5+5.034e-1") == ("a", 5.0, "+", 0.5034)
@@ -148,13 +140,13 @@ def test_parse_string_factory_only_parses_float_with_exponent_with_nosign_exp_fl
def test_parse_string_factory_only_parses_float_with_exponent_with_nosign_exp_float(x):
s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x)
assert _parse_string_factory(
- 0, "", _float_nosign_exp_re.split, no_op, fast_float, tuple2
+ 0, "", nre.float_nosign_exp().split, no_op, fast_float, tuple2
)(s) == float_splitter(s, False, True, "")
def test_parse_string_factory_only_parses_float_with_sign_with_sign_noexp_float_example():
assert _parse_string_factory(
- 0, "", _float_sign_noexp_re.split, no_op, fast_float, tuple2
+ 0, "", nre.float_sign_noexp().split, no_op, fast_float, tuple2
)("a5+5.034e-1") == ("a", 5.0, "", 5.034, "e", -1.0)
@@ -168,16 +160,16 @@ def test_parse_string_factory_only_parses_float_with_sign_with_sign_noexp_float_
def test_parse_string_factory_only_parses_float_with_sign_with_sign_noexp_float(x):
s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x)
assert _parse_string_factory(
- 0, "", _float_sign_noexp_re.split, no_op, fast_float, tuple2
+ 0, "", nre.float_sign_noexp().split, no_op, fast_float, tuple2
)(s) == float_splitter(s, True, False, "")
def test_parse_string_factory_parses_float_with_sign_exp_float_example():
assert _parse_string_factory(
- 0, "", _float_sign_exp_re.split, no_op, fast_float, tuple2
+ 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2
)("a5+5.034e-1") == ("a", 5.0, "", 0.5034)
assert _parse_string_factory(
- 0, "", _float_sign_exp_re.split, no_op, fast_float, tuple2
+ 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2
)("6a5+5.034e-1") == ("", 6.0, "a", 5.0, "", 0.5034)
@@ -191,7 +183,7 @@ def test_parse_string_factory_parses_float_with_sign_exp_float_example():
def test_parse_string_factory_parses_float_with_sign_exp_float(x):
s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x)
assert _parse_string_factory(
- 0, "", _float_sign_exp_re.split, no_op, fast_float, tuple2
+ 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2
)(s) == float_splitter(s, True, True, "")
@@ -201,18 +193,18 @@ def test_parse_string_factory_selects_pre_function_value_if_not_dumb():
return (orig[0], tuple(x))
assert _parse_string_factory(
- 0, "", _int_nosign_re.split, py23_str.upper, fast_float, tuple2
+ 0, "", nre.int_nosign().split, py23_str.upper, fast_float, tuple2
)("a5+5.034e-1") == ("A", ("A", 5, "+", 5, ".", 34, "E-", 1))
assert _parse_string_factory(
- ns_DUMB, "", _int_nosign_re.split, py23_str.upper, fast_float, tuple2
+ ns_DUMB, "", nre.int_nosign().split, py23_str.upper, fast_float, tuple2
)("a5+5.034e-1") == ("A", ("A", 5, "+", 5, ".", 34, "E-", 1))
assert _parse_string_factory(
- ns.LOCALE, "", _int_nosign_re.split, py23_str.upper, fast_float, tuple2
+ ns.LOCALE, "", nre.int_nosign().split, py23_str.upper, fast_float, tuple2
)("a5+5.034e-1") == ("A", ("A", 5, "+", 5, ".", 34, "E-", 1))
assert _parse_string_factory(
ns.LOCALE | ns_DUMB,
"",
- _int_nosign_re.split,
+ nre.int_nosign().split,
py23_str.upper,
fast_float,
tuple2,
@@ -221,7 +213,7 @@ def test_parse_string_factory_selects_pre_function_value_if_not_dumb():
def test_parse_path_function_parses_string_as_path_then_as_string():
splt = _parse_string_factory(
- 0, "", _float_sign_exp_re.split, no_op, fast_float, tuple2
+ 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2
)
assert _parse_path_factory(splt)("/p/Folder (10)/file34.5nm (2).tar.gz") == (
("/",),
diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py
index 1dcdb4e..b729adf 100644
--- a/test_natsort/test_utils.py
+++ b/test_natsort/test_utils.py
@@ -15,15 +15,10 @@ from natsort.ns_enum import ns
from natsort.utils import (
_args_to_enum,
_do_decoding,
- _float_nosign_exp_re,
- _float_nosign_noexp_re,
- _float_sign_exp_re,
- _float_sign_noexp_re,
_groupletters,
- _int_nosign_re,
- _int_sign_re,
_path_splitter,
- _regex_chooser,
+ NumericalRegularExpressions,
+ regex_chooser,
_sep_inserter,
chain_functions,
)
@@ -105,14 +100,14 @@ def test_args_to_enum_converts_None_to_ns_IU():
def test_regex_chooser_returns_correct_regular_expression_object():
- assert _regex_chooser[ns.INT] is _int_nosign_re
- assert _regex_chooser[ns.INT | ns.NOEXP] is _int_nosign_re
- assert _regex_chooser[ns.INT | ns.SIGNED] is _int_sign_re
- assert _regex_chooser[ns.INT | ns.SIGNED | ns.NOEXP] is _int_sign_re
- assert _regex_chooser[ns.FLOAT] is _float_nosign_exp_re
- assert _regex_chooser[ns.FLOAT | ns.NOEXP] is _float_nosign_noexp_re
- assert _regex_chooser[ns.FLOAT | ns.SIGNED] is _float_sign_exp_re
- assert _regex_chooser[ns.FLOAT | ns.SIGNED | ns.NOEXP] is _float_sign_noexp_re
+ assert regex_chooser(ns.INT) is NumericalRegularExpressions.int_nosign()
+ assert regex_chooser(ns.INT | ns.NOEXP) is NumericalRegularExpressions.int_nosign()
+ assert regex_chooser(ns.INT | ns.SIGNED) is NumericalRegularExpressions.int_sign()
+ assert regex_chooser(ns.INT | ns.SIGNED | ns.NOEXP) is NumericalRegularExpressions.int_sign()
+ assert regex_chooser(ns.FLOAT) is NumericalRegularExpressions.float_nosign_exp()
+ assert regex_chooser(ns.FLOAT | ns.NOEXP) is NumericalRegularExpressions.float_nosign_noexp()
+ assert regex_chooser(ns.FLOAT | ns.SIGNED) is NumericalRegularExpressions.float_sign_exp()
+ assert regex_chooser(ns.FLOAT | ns.SIGNED | ns.NOEXP) is NumericalRegularExpressions.float_sign_noexp()
def test_ns_enum_values_have_are_as_expected():