summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/api.rst9
-rw-r--r--docs/examples.rst50
-rw-r--r--natsort/__init__.py2
-rw-r--r--natsort/natsort.py19
-rw-r--r--tests/conftest.py10
-rw-r--r--tests/test_regex.py20
-rw-r--r--tox.ini6
7 files changed, 112 insertions, 4 deletions
diff --git a/docs/api.rst b/docs/api.rst
index 5e7a482..8052606 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -91,7 +91,14 @@ Help With Creating Function Keys
If you need to create a complicated *key* argument to (for example)
:func:`natsorted` that is actually multiple functions called one after the other,
the following function can help you easily perform this action. It is
-used internally to :mod:`natsort`, and has been exposed publically for
+used internally to :mod:`natsort`, and has been exposed publicly for
the convenience of the user.
.. autofunction:: chain_functions
+
+If you need to be able to search your input for numbers using the same definition
+as :mod:`natsort`, you can do so using the following function. Given your chosen
+algorithm (selected using the :class:`~natsort.ns` enum), the corresponding regular
+expression to locate numbers will be returned.
+
+.. autofunction:: numeric_regex_chooser
diff --git a/docs/examples.rst b/docs/examples.rst
index e44aa1c..04ca632 100644
--- a/docs/examples.rst
+++ b/docs/examples.rst
@@ -245,6 +245,56 @@ sort key so that:
>>> natsorted(b, key=attrgetter('bar'))
[Foo('num2'), Foo('num3'), Foo('num5')]
+.. _unit_sorting:
+
+Accounting for Units When Sorting
++++++++++++++++++++++++++++++++++
+
+:mod:`natsort` does not come with any pre-built mechanism to sort units,
+but you can write your own `key` to do this. Below, I will demonstrate sorting
+imperial lengths (e.g. feet an inches), but of course you can extend this to any
+set of units you need. This example is based on code from
+`this issue <https://github.com/SethMMorton/natsort/issues/100#issuecomment-530659310>`_,
+and uses the function :func:`natsort.numeric_regex_chooser` to build a regular
+expression that will parse numbers in the same manner as :mod:`natsort` itself.
+
+.. code-block:: pycon
+
+ >>> import re
+ >>> import natsort
+ >>>
+ >>> # Define how each unit will be transformed
+ >>> conversion_mapping = {
+ ... "in": 1,
+ ... "inch": 1,
+ ... "inches": 1,
+ ... "ft": 12,
+ ... "feet": 12,
+ ... "foot": 12,
+ ... }
+ >>>
+ >>> # This regular expression searches for numbers and units
+ >>> all_units = "|".join(conversion_mapping.keys())
+ >>> float_re = natsort.numeric_regex_chooser(natsort.FLOAT | natsort.SIGNED)
+ >>> unit_finder = re.compile(r"({})\s*({})".format(float_re, all_units), re.IGNORECASE)
+ >>>
+ >>> def unit_replacer(matchobj):
+ ... """
+ ... Given a regex match object, return a replacement string where units are modified
+ ... """
+ ... number = matchobj.group(1)
+ ... unit = matchobj.group(2)
+ ... new_number = float(number) * conversion_mapping[unit]
+ ... return "{} in".format(new_number)
+ ...
+ >>> # Demo time!
+ >>> data = ['1 ft', '5 in', '10 ft', '2 in']
+ >>> [unit_finder.sub(unit_replacer, x) for x in data]
+ ['12.0 in', '5.0 in', '120.0 in', '2.0 in']
+ >>>
+ >>> natsort.natsorted(data, key=lambda x: unit_finder.sub(unit_replacer, x))
+ ['2 in', '5 in', '1 ft', '10 ft']
+
Generating a Natsort Key
------------------------
diff --git a/natsort/__init__.py b/natsort/__init__.py
index da23650..561164b 100644
--- a/natsort/__init__.py
+++ b/natsort/__init__.py
@@ -15,6 +15,7 @@ from natsort.natsort import (
natsort_keygen,
natsorted,
ns,
+ numeric_regex_chooser,
order_by_index,
realsorted,
)
@@ -41,6 +42,7 @@ __all__ = [
"as_utf8",
"ns",
"chain_functions",
+ "numeric_regex_chooser",
]
# Add the ns keys to this namespace for convenience.
diff --git a/natsort/natsort.py b/natsort/natsort.py
index e597815..58641e4 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -601,6 +601,25 @@ def order_by_index(seq, index, iter=False):
return (seq[i] for i in index) if iter else [seq[i] for i in index]
+def numeric_regex_chooser(alg):
+ """
+ Select an appropriate regex for the type of number of interest.
+
+ Parameters
+ ----------
+ alg : ns enum
+ Used to indicate the regular expression to select.
+
+ Returns
+ -------
+ regex : str
+ Regular expression string that matches the desired number type.
+
+ """
+ # Remove the leading and trailing parens
+ return utils.regex_chooser(alg).pattern[1:-1]
+
+
if float(sys.version[:3]) < 3:
# pylint: disable=unused-variable
# noinspection PyUnresolvedReferences,PyPep8Naming
diff --git a/tests/conftest.py b/tests/conftest.py
index 8a7412b..d584789 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -4,9 +4,19 @@ Fixtures for pytest.
import locale
+import hypothesis
import pytest
+# This disables the "too slow" hypothesis heath check globally.
+# For some reason it thinks that the text/binary generation is too
+# slow then causes the tests to fail.
+hypothesis.settings.register_profile(
+ "slow-tests",
+ suppress_health_check=[hypothesis.HealthCheck.too_slow],
+)
+
+
def load_locale(x):
"""Convenience to load a locale, trying ISO8859-1 first."""
try:
diff --git a/tests/test_regex.py b/tests/test_regex.py
index d3fe617..a6da62d 100644
--- a/tests/test_regex.py
+++ b/tests/test_regex.py
@@ -3,6 +3,7 @@
from __future__ import unicode_literals
import pytest
+from natsort import ns, numeric_regex_chooser
from natsort.utils import NumericalRegularExpressions as NumRegex
@@ -98,3 +99,22 @@ labels = ["{}-{}".format(given, regex_names[regex]) for given, _, regex in regex
def test_regex_splits_correctly(x, expected, regex):
# noinspection PyUnresolvedReferences
assert regex.split(x) == expected
+
+
+@pytest.mark.parametrize(
+ "given, expected",
+ [
+ (ns.INT, NumRegex.int_nosign()),
+ (ns.INT | ns.UNSIGNED, NumRegex.int_nosign()),
+ (ns.INT | ns.SIGNED, NumRegex.int_sign()),
+ (ns.INT | ns.NOEXP, NumRegex.int_nosign()),
+ (ns.FLOAT, NumRegex.float_nosign_exp()),
+ (ns.FLOAT | ns.UNSIGNED, NumRegex.float_nosign_exp()),
+ (ns.FLOAT | ns.SIGNED, NumRegex.float_sign_exp()),
+ (ns.FLOAT | ns.NOEXP, NumRegex.float_nosign_noexp()),
+ (ns.FLOAT | ns.SIGNED | ns.NOEXP, NumRegex.float_sign_noexp()),
+ (ns.FLOAT | ns.UNSIGNED | ns.NOEXP, NumRegex.float_nosign_noexp()),
+ ],
+)
+def test_regex_chooser(given, expected):
+ assert numeric_regex_chooser(given) == expected.pattern[1:-1] # remove parens
diff --git a/tox.ini b/tox.ini
index 9382ada..2919f69 100644
--- a/tox.ini
+++ b/tox.ini
@@ -24,11 +24,11 @@ extras =
commands =
# Only run How It Works doctest on Python 3.6.
py36: {envpython} -m doctest -o IGNORE_EXCEPTION_DETAIL docs/howitworks.rst
- # Other doctests are run for all pythons.
- pytest README.rst docs/intro.rst docs/examples.rst
+ # Other doctests are run for all pythons except 2.7.
+ !py27: pytest README.rst docs/intro.rst docs/examples.rst
pytest --doctest-modules {envsitepackagesdir}/natsort
# Full test suite. Allow the user to pass command-line objects.
- pytest --tb=short --cov {envsitepackagesdir}/natsort --cov-report term-missing {posargs:}
+ pytest --hypothesis-profile=slow-tests --tb=short --cov {envsitepackagesdir}/natsort --cov-report term-missing {posargs:}
# Check code quality.
[testenv:flake8]