diff options
-rw-r--r-- | docs/api.rst | 9 | ||||
-rw-r--r-- | docs/examples.rst | 50 | ||||
-rw-r--r-- | natsort/__init__.py | 2 | ||||
-rw-r--r-- | natsort/natsort.py | 19 | ||||
-rw-r--r-- | tests/conftest.py | 10 | ||||
-rw-r--r-- | tests/test_regex.py | 20 | ||||
-rw-r--r-- | tox.ini | 6 |
7 files changed, 112 insertions, 4 deletions
diff --git a/docs/api.rst b/docs/api.rst index 5e7a482..8052606 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -91,7 +91,14 @@ Help With Creating Function Keys If you need to create a complicated *key* argument to (for example) :func:`natsorted` that is actually multiple functions called one after the other, the following function can help you easily perform this action. It is -used internally to :mod:`natsort`, and has been exposed publically for +used internally to :mod:`natsort`, and has been exposed publicly for the convenience of the user. .. autofunction:: chain_functions + +If you need to be able to search your input for numbers using the same definition +as :mod:`natsort`, you can do so using the following function. Given your chosen +algorithm (selected using the :class:`~natsort.ns` enum), the corresponding regular +expression to locate numbers will be returned. + +.. autofunction:: numeric_regex_chooser diff --git a/docs/examples.rst b/docs/examples.rst index e44aa1c..04ca632 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -245,6 +245,56 @@ sort key so that: >>> natsorted(b, key=attrgetter('bar')) [Foo('num2'), Foo('num3'), Foo('num5')] +.. _unit_sorting: + +Accounting for Units When Sorting ++++++++++++++++++++++++++++++++++ + +:mod:`natsort` does not come with any pre-built mechanism to sort units, +but you can write your own `key` to do this. Below, I will demonstrate sorting +imperial lengths (e.g. feet an inches), but of course you can extend this to any +set of units you need. This example is based on code from +`this issue <https://github.com/SethMMorton/natsort/issues/100#issuecomment-530659310>`_, +and uses the function :func:`natsort.numeric_regex_chooser` to build a regular +expression that will parse numbers in the same manner as :mod:`natsort` itself. + +.. code-block:: pycon + + >>> import re + >>> import natsort + >>> + >>> # Define how each unit will be transformed + >>> conversion_mapping = { + ... "in": 1, + ... "inch": 1, + ... "inches": 1, + ... "ft": 12, + ... "feet": 12, + ... "foot": 12, + ... } + >>> + >>> # This regular expression searches for numbers and units + >>> all_units = "|".join(conversion_mapping.keys()) + >>> float_re = natsort.numeric_regex_chooser(natsort.FLOAT | natsort.SIGNED) + >>> unit_finder = re.compile(r"({})\s*({})".format(float_re, all_units), re.IGNORECASE) + >>> + >>> def unit_replacer(matchobj): + ... """ + ... Given a regex match object, return a replacement string where units are modified + ... """ + ... number = matchobj.group(1) + ... unit = matchobj.group(2) + ... new_number = float(number) * conversion_mapping[unit] + ... return "{} in".format(new_number) + ... + >>> # Demo time! + >>> data = ['1 ft', '5 in', '10 ft', '2 in'] + >>> [unit_finder.sub(unit_replacer, x) for x in data] + ['12.0 in', '5.0 in', '120.0 in', '2.0 in'] + >>> + >>> natsort.natsorted(data, key=lambda x: unit_finder.sub(unit_replacer, x)) + ['2 in', '5 in', '1 ft', '10 ft'] + Generating a Natsort Key ------------------------ diff --git a/natsort/__init__.py b/natsort/__init__.py index da23650..561164b 100644 --- a/natsort/__init__.py +++ b/natsort/__init__.py @@ -15,6 +15,7 @@ from natsort.natsort import ( natsort_keygen, natsorted, ns, + numeric_regex_chooser, order_by_index, realsorted, ) @@ -41,6 +42,7 @@ __all__ = [ "as_utf8", "ns", "chain_functions", + "numeric_regex_chooser", ] # Add the ns keys to this namespace for convenience. diff --git a/natsort/natsort.py b/natsort/natsort.py index e597815..58641e4 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -601,6 +601,25 @@ def order_by_index(seq, index, iter=False): return (seq[i] for i in index) if iter else [seq[i] for i in index] +def numeric_regex_chooser(alg): + """ + Select an appropriate regex for the type of number of interest. + + Parameters + ---------- + alg : ns enum + Used to indicate the regular expression to select. + + Returns + ------- + regex : str + Regular expression string that matches the desired number type. + + """ + # Remove the leading and trailing parens + return utils.regex_chooser(alg).pattern[1:-1] + + if float(sys.version[:3]) < 3: # pylint: disable=unused-variable # noinspection PyUnresolvedReferences,PyPep8Naming diff --git a/tests/conftest.py b/tests/conftest.py index 8a7412b..d584789 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,9 +4,19 @@ Fixtures for pytest. import locale +import hypothesis import pytest +# This disables the "too slow" hypothesis heath check globally. +# For some reason it thinks that the text/binary generation is too +# slow then causes the tests to fail. +hypothesis.settings.register_profile( + "slow-tests", + suppress_health_check=[hypothesis.HealthCheck.too_slow], +) + + def load_locale(x): """Convenience to load a locale, trying ISO8859-1 first.""" try: diff --git a/tests/test_regex.py b/tests/test_regex.py index d3fe617..a6da62d 100644 --- a/tests/test_regex.py +++ b/tests/test_regex.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import pytest +from natsort import ns, numeric_regex_chooser from natsort.utils import NumericalRegularExpressions as NumRegex @@ -98,3 +99,22 @@ labels = ["{}-{}".format(given, regex_names[regex]) for given, _, regex in regex def test_regex_splits_correctly(x, expected, regex): # noinspection PyUnresolvedReferences assert regex.split(x) == expected + + +@pytest.mark.parametrize( + "given, expected", + [ + (ns.INT, NumRegex.int_nosign()), + (ns.INT | ns.UNSIGNED, NumRegex.int_nosign()), + (ns.INT | ns.SIGNED, NumRegex.int_sign()), + (ns.INT | ns.NOEXP, NumRegex.int_nosign()), + (ns.FLOAT, NumRegex.float_nosign_exp()), + (ns.FLOAT | ns.UNSIGNED, NumRegex.float_nosign_exp()), + (ns.FLOAT | ns.SIGNED, NumRegex.float_sign_exp()), + (ns.FLOAT | ns.NOEXP, NumRegex.float_nosign_noexp()), + (ns.FLOAT | ns.SIGNED | ns.NOEXP, NumRegex.float_sign_noexp()), + (ns.FLOAT | ns.UNSIGNED | ns.NOEXP, NumRegex.float_nosign_noexp()), + ], +) +def test_regex_chooser(given, expected): + assert numeric_regex_chooser(given) == expected.pattern[1:-1] # remove parens @@ -24,11 +24,11 @@ extras = commands = # Only run How It Works doctest on Python 3.6. py36: {envpython} -m doctest -o IGNORE_EXCEPTION_DETAIL docs/howitworks.rst - # Other doctests are run for all pythons. - pytest README.rst docs/intro.rst docs/examples.rst + # Other doctests are run for all pythons except 2.7. + !py27: pytest README.rst docs/intro.rst docs/examples.rst pytest --doctest-modules {envsitepackagesdir}/natsort # Full test suite. Allow the user to pass command-line objects. - pytest --tb=short --cov {envsitepackagesdir}/natsort --cov-report term-missing {posargs:} + pytest --hypothesis-profile=slow-tests --tb=short --cov {envsitepackagesdir}/natsort --cov-report term-missing {posargs:} # Check code quality. [testenv:flake8] |