summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth M Morton <seth.m.morton@gmail.com>2014-09-02 18:35:02 -0700
committerSeth M Morton <seth.m.morton@gmail.com>2014-09-02 18:35:02 -0700
commitafc8e69ccbcf6304a10751d97149f2c8435fb09f (patch)
tree4833decc5c881bcfba181d410b220ed09a9a5265
parent514a60f24137a3435bfad8f7f448235aa4139da2 (diff)
parent4d297c1452d76b9f442bf95bae7f10a1a6deeb24 (diff)
downloadnatsort-afc8e69ccbcf6304a10751d97149f2c8435fb09f.tar.gz
natsort version 3.5.0 release3.5.0
- Added the 'alg' argument to the 'natsort' functions. This argument accepts an enum that is used to indicate the options the user wishes to use. The 'number_type', 'signed', 'exp', 'as_path', and 'py3_safe' options are being depreciated and will become (undocumented) keyword-only options in natsort version 4.0.0. - The user can now modify how 'natsort' handles the case of non-numeric characters. - The user can now instruct 'natsort' to use locale-aware sorting, which allows 'natsort' to perform true "human sorting". - The `humansorted` convenience function has been included to make this easier. - Updated shell script with locale functionality.
-rw-r--r--.travis.yml13
-rw-r--r--MANIFEST.in4
-rw-r--r--README.rst78
-rw-r--r--docs/source/api.rst3
-rw-r--r--docs/source/changelog.rst18
-rw-r--r--docs/source/conf.py1
-rw-r--r--docs/source/examples.rst143
-rw-r--r--docs/source/humansorted.rst8
-rw-r--r--docs/source/index_humansorted.rst8
-rw-r--r--docs/source/intro.rst29
-rw-r--r--docs/source/ns_class.rst8
-rw-r--r--natsort/__init__.py7
-rw-r--r--natsort/__main__.py37
-rw-r--r--natsort/_version.py2
-rw-r--r--natsort/locale_help.py129
-rw-r--r--natsort/natsort.py740
-rw-r--r--setup.cfg2
-rw-r--r--setup.py3
-rw-r--r--test_natsort/test_locale_help.py44
-rw-r--r--test_natsort/test_main.py1
-rw-r--r--test_natsort/test_natsort.py277
21 files changed, 1195 insertions, 360 deletions
diff --git a/.travis.yml b/.travis.yml
index 797241c..a3a12f0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,13 +6,18 @@ python:
- 3.3
- 3.4
env:
-- WITH_FASTNUMBERS=true
-- WITH_FASTNUMBERS=false
+- WITH_OPTIONS=true
+- WITH_OPTIONS=false
+before_install:
+- sudo apt-get update
+- sudo locale-gen de_DE.UTF-8
install:
+- if [[ $WITH_OPTIONS == true ]]; then sudo apt-get install libicu-dev; fi
+- if [[ $WITH_OPTIONS == true ]]; then pip install fastnumbers; fi
+- if [[ $WITH_OPTIONS == true ]]; then pip install PyICU; fi
+- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi
- pip install pytest-cov pytest-flakes pytest-pep8
- pip install coveralls
-- if [[ $WITH_FASTNUMBERS == true ]]; then pip install fastnumbers; fi
-- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi
script:
- python -m pytest --cov natsort --flakes --pep8
- python -m pytest --doctest-modules natsort
diff --git a/MANIFEST.in b/MANIFEST.in
index be74ec3..ad8cd5a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -5,9 +5,13 @@ include natsort/_version.py
include natsort/__main__.py
include natsort/__init__.py
include natsort/py23compat.py
+include natsort/locale_help.py
+include natsort/fake_fastnumbers.py
include test_natsort/profile_natsorted.py
include test_natsort/stress_natsort.py
include test_natsort/test_natsort.py
+include test_natsort/test_locale_help.py
+include test_natsort/test_fake_fastnumbers.py
include test_natsort/test_main.py
include setup.py
include setup.cfg
diff --git a/README.rst b/README.rst
index 5db4219..f6e4cbf 100644
--- a/README.rst
+++ b/README.rst
@@ -49,6 +49,26 @@ Sorting version numbers is just as easy with the ``versorted`` function::
>>> natsorted(a) # natsorted tries to sort as signed floats, so it won't work
['version-2.0', 'version-1.9', 'version-1.11', 'version-1.10']
+You can also perform locale-aware sorting (or "human sorting"), where the
+non-numeric characters are ordered based on their meaning, not on their
+ordinal value; this can be achieved with the ``humansorted`` function::
+
+ >>> a = ['Apple', 'Banana', 'apple', 'banana']
+ >>> natsorted(a)
+ ['Apple', 'Banana', 'apple', 'banana']
+ >>> import locale
+ >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
+ 'en_US.UTF-8'
+ >>> from natsort import humansorted
+ >>> humansorted(a)
+ ['apple', 'Apple', 'banana', 'Banana']
+
+You may find you need to explicitly set the locale to get this to work
+(as shown in the example).
+Please see the `following caveat <http://pythonhosted.org//natsort/examples.html#bug-note>`_
+and the "Optional Dependencies" section
+below before using the ``humansorted`` function.
+
You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types
when you sort::
@@ -61,6 +81,7 @@ when you sort::
The natsort algorithm does other fancy things like
- recursively descend into lists of lists
+ - control the case-sensitivity
- sort file paths correctly
- allow custom sorting keys
- exposes a natsort_key generator to pass to list.sort
@@ -84,19 +105,37 @@ Requirements
(this includes python 3.x). To run version 2.6, 3.0, or 3.1 the
`argparse <https://pypi.python.org/pypi/argparse>`_ module is required.
-Optional Dependency
--------------------
+Optional Dependencies
+---------------------
+
+fastnumbers
+'''''''''''
The most efficient sorting can occur if you install the
`fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ package (it helps
with the string to number conversions.) ``natsort`` will still run (efficiently)
without the package, but if you need to squeeze out that extra juice it is
recommended you include this as a dependency. ``natsort`` will not require (or
-check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed.
+check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed
+at installation.
+
+PyICU
+'''''
+
+On some systems, Python's ``locale`` library can be buggy (I have found this to be
+the case on Mac OS X), so ``natsort`` will use
+`PyICU <https://pypi.python.org/pypi/PyICU>`_ under the hood if it is installed
+on your computer; this will give more reliable results. ``natsort`` will not
+require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed
+at installation.
Depreciation Notices
--------------------
+ - In ``natsort`` version 4.0.0, the ``number_type``, ``signed``, ``exp``,
+ ``as_path``, and ``py3_safe`` options will be removed from the (documented)
+ API, in favor of the ``alg`` option and ``ns`` enum. They will remain as
+ keyword-only arguments after that (for the foreseeable future).
- In ``natsort`` version 4.0.0, the ``natsort_key`` function will be removed
from the public API. All future development should use ``natsort_keygen``
in preparation for this.
@@ -118,6 +157,24 @@ History
These are the last three entries of the changelog. See the package documentation
for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_.
+09-02-2014 v. 3.5.0
+'''''''''''''''''''
+
+ - Added the 'alg' argument to the 'natsort' functions. This argument
+ accepts an enum that is used to indicate the options the user wishes
+ to use. The 'number_type', 'signed', 'exp', 'as_path', and 'py3_safe'
+ options are being depreciated and will become (undocumented)
+ keyword-only options in natsort version 4.0.0.
+ - The user can now modify how 'natsort' handles the case of non-numeric
+ characters.
+ - The user can now instruct 'natsort' to use locale-aware sorting, which
+ allows 'natsort' to perform true "human sorting".
+
+ - The `humansorted` convenience function has been included to make this
+ easier.
+
+ - Updated shell script with locale functionality.
+
08-12-2014 v. 3.4.1
'''''''''''''''''''
@@ -155,18 +212,3 @@ for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_.
- Reworked the documentation, moving most of it to PyPI's hosting platform.
- Added support for coveralls.io.
- Entire codebase is now PyFlakes and PEP8 compliant.
-
-06-28-2014 v. 3.3.0
-'''''''''''''''''''
-
- - Added a 'versorted' method for more convenient sorting of versions.
- - Updated command-line tool --number_type option with 'version' and 'ver'
- to make it more clear how to sort version numbers.
- - Moved unit-testing mechanism from being docstring-based to actual unit tests
- in actual functions.
-
- - This has provided the ability determine the coverage of the unit tests (99%).
- - This also makes the pydoc documentation a bit more clear.
-
- - Made docstrings for public functions mirror the README API.
- - Connected natsort development to Travis-CI to help ensure quality releases.
diff --git a/docs/source/api.rst b/docs/source/api.rst
index 7546de6..80b7edd 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -13,6 +13,9 @@ natsort API
natsort_key.rst
natsorted.rst
versorted.rst
+ humansorted.rst
index_natsorted.rst
index_versorted.rst
+ index_humansorted.rst
order_by_index.rst
+ ns_class.rst
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index c4f8c30..542f5ad 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -3,6 +3,24 @@
Changelog
---------
+09-02-2014 v. 3.5.0
+'''''''''''''''''''
+
+ - Added the 'alg' argument to the 'natsort' functions. This argument
+ accepts an enum that is used to indicate the options the user wishes
+ to use. The 'number_type', 'signed', 'exp', 'as_path', and 'py3_safe'
+ options are being depreciated and will become (undocumented)
+ keyword-only options in natsort version 4.0.0.
+ - The user can now modify how 'natsort' handles the case of non-numeric
+ characters.
+ - The user can now instruct 'natsort' to use locale-aware sorting, which
+ allows 'natsort' to perform true "human sorting".
+
+ - The `humansorted` convenience function has been included to make this
+ easier.
+
+ - Updated shell script with locale functionality.
+
08-12-2014 v. 3.4.1
'''''''''''''''''''
diff --git a/docs/source/conf.py b/docs/source/conf.py
index abeb7da..fa8d749 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -43,6 +43,7 @@ def current_version():
# ones.
extensions = [
'sphinx.ext.autodoc',
+ 'sphinx.ext.autosummary',
'sphinx.ext.intersphinx',
'numpydoc',
]
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
index 5176dd1..b0dfe27 100644
--- a/docs/source/examples.rst
+++ b/docs/source/examples.rst
@@ -18,27 +18,10 @@ it as you would :func:`sorted`::
>>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300']
>>> sorted(a)
['a5.034e1', 'a50', 'a50.300', 'a50.4', 'a51.']
- >>> from natsort import natsorted
+ >>> from natsort import natsorted, ns
>>> natsorted(a)
['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.']
-Customizing Float Definition
-----------------------------
-
-By default :func:`~natsorted` searches for any float that would be
-a valid Python float literal, such as 5, 0.4, -4.78, +4.2E-34, etc.
-Perhaps you don't want to search for signed numbers, or you don't
-want to search for exponential notation, and the ``signed`` and
-``exp`` options allow you to do this::
-
- >>> a = ['a50', 'a51.', 'a+50.4', 'a5.034e1', 'a+50.300']
- >>> natsorted(a)
- ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.']
- >>> natsorted(a, signed=False)
- ['a50', 'a5.034e1', 'a51.', 'a+50.300', 'a+50.4']
- >>> natsorted(a, exp=False)
- ['a5.034e1', 'a50', 'a+50.300', 'a+50.4', 'a51.']
-
Sort Version Numbers
--------------------
@@ -49,17 +32,17 @@ literals, not floats. This can be achieved in three ways, as shown below::
>>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1']
>>> natsorted(a) # This gives incorrect results
['ver-2.9.9a', 'ver-2.9.9b', 'ver-1.11', 'ver-1.11.4', 'ver-1.10.1']
- >>> natsorted(a, number_type=int, signed=False)
+ >>> natsorted(a, alg=ns.INT | ns.UNSIGNED)
['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b']
- >>> natsorted(a, number_type=None)
+ >>> natsorted(a, alg=ns.VERSION)
['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b']
>>> from natsort import versorted
>>> versorted(a)
['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b']
-You can see that ``number_type=None`` is a shortcut for ``number_type=int``
-and ``signed=False``, and the :func:`~versorted` is a shortcut for
-``natsorted(number_type=None)``. The recommend manner to sort version
+You can see that ``alg=ns.VERSION`` is a shortcut for
+``alg=ns.INT | ns.UNSIGNED``, and the :func:`~versorted` is a shortcut for
+``natsorted(alg=ns.VERSION)``. The recommend manner to sort version
numbers is to use :func:`~versorted`.
Sorting with Alpha, Beta, and Release Candidates
@@ -68,15 +51,21 @@ Sorting with Alpha, Beta, and Release Candidates
By default, if you wish to sort versions with a non-strict versioning
scheme, you may not get the results you expect::
- >>> a = ['1.2', '1.2rc1', '1.2beta2', '1.2beta', '1.2alpha', '1.2.1', '1.1', '1.3']
+ >>> a = ['1.2', '1.2rc1', '1.2beta2', '1.2beta1', '1.2alpha', '1.2.1', '1.1', '1.3']
>>> versorted(a)
- ['1.1', '1.2', '1.2.1', '1.2alpha', '1.2beta', '1.2beta2', '1.2rc1', '1.3']
+ ['1.1', '1.2', '1.2.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.3']
To make the '1.2' pre-releases come before '1.2.1', you need to use the following
recipe::
>>> versorted(a, key=lambda x: x.replace('.', '~'))
- ['1.1', '1.2', '1.2alpha', '1.2beta', '1.2beta2', '1.2rc1', '1.2.1', '1.3']
+ ['1.1', '1.2', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2.1', '1.3']
+
+If you also want '1.2' after all the alpha, beta, and rc candidates, you can
+modify the above recipe::
+
+ >>> versorted(a, key=lambda x: x.replace('.', '~')+'z')
+ ['1.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2', '1.2.1', '1.3']
Please see `this issue <https://github.com/SethMMorton/natsort/issues/13>`_ to
see why this works.
@@ -86,7 +75,7 @@ Sort OS-Generated Paths
In some cases when sorting file paths with OS-Generated names, the default
:mod:`~natsorted` algorithm may not be sufficient. In cases like these,
-you may need to use the ``as_path`` option::
+you may need to use the ``ns.PATH`` option::
>>> a = ['./folder/file (1).txt',
... './folder/file.txt',
@@ -94,9 +83,102 @@ you may need to use the ``as_path`` option::
... './folder (10)/file.txt']
>>> natsorted(a)
['./folder (1)/file.txt', './folder (10)/file.txt', './folder/file (1).txt', './folder/file.txt']
- >>> natsorted(a, as_path=True)
+ >>> natsorted(a, alg=ns.PATH)
['./folder/file.txt', './folder/file (1).txt', './folder (1)/file.txt', './folder (10)/file.txt']
+Locale-Aware Sorting (Human Sorting)
+------------------------------------
+
+You can instruct :mod:`natsort` to use locale-aware sorting with the
+``ns.LOCALE`` option. In addition to making this understand non-ASCII
+characters, it will also properly interpret non-'.' decimal separators
+and also properly order case. It may be more convenient to just use
+the :func:`humansorted` function::
+
+ >>> from natsort import humansorted
+ >>> import locale
+ >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
+ 'en_US.UTF-8'
+ >>> a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
+ >>> natsorted(a, alg=ns.LOCALE)
+ ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn']
+ >>> humansorted(a)
+ ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn']
+
+You may find that if you do not explicitly set the locale your results may not
+be as you expect... I have found that it depends on the system you are on.
+If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see below) then
+you should not need to do this.
+
+.. _bug_note:
+
+A Note For Bugs With Locale-Aware Sorting
++++++++++++++++++++++++++++++++++++++++++
+
+If you find that ``ns.LOCALE`` (or :func:`~humansorted`) does not give
+the results you expect, before filing a bug report please try to first install
+`PyICU <https://pypi.python.org/pypi/PyICU>`_. There are some known bugs
+with the `locale` module from the standard library that are solved when
+using `PyICU <https://pypi.python.org/pypi/PyICU>`_.
+
+Controlling Case When Sorting
+-----------------------------
+
+For non-numbers, by default :mod:`natsort` used ordinal sorting (i.e.
+it sorts by the character's value in the ASCII table). For example::
+
+ >>> a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
+ >>> natsorted(a)
+ ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn']
+
+There are times when you wish to ignore the case when sorting,
+you can easily do this with the ``ns.IGNORECASE`` option::
+
+ >>> natsorted(a, alg=ns.IGNORECASE)
+ ['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn']
+
+Note thats since Python's sorting is stable, the order of equivalent
+elements after lowering the case is the same order they appear in the
+original list.
+
+Upper-case letters appear first in the ASCII table, but many natural
+sorting methods place lower-case first. To do this, use
+``ns.LOWERCASEFIRST``::
+
+ >>> natsorted(a, alg=ns.LOWERCASEFIRST)
+ ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn']
+
+It may be undesirable to have the upper-case letters grouped together
+and the lower-case letters grouped together; most would expect all
+"a"s to bet together regardless of case, and all "b"s, and so on. To
+achieve this, use ``ns.GROUPLETTERS``::
+
+ >>> natsorted(a, alg=ns.GROUPLETTERS)
+ ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn']
+
+You might combine this with ``ns.LOWERCASEFIRST`` to get what most
+would expect to be "natural" sorting::
+
+ >>> natsorted(a, alg=ns.G | ns.LF)
+ ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn']
+
+Customizing Float Definition
+----------------------------
+
+By default :func:`~natsorted` searches for any float that would be
+a valid Python float literal, such as 5, 0.4, -4.78, +4.2E-34, etc.
+Perhaps you don't want to search for signed numbers, or you don't
+want to search for exponential notation, the ``ns.UNSIGNED`` and
+``ns.NOEXP`` options allow you to do this::
+
+ >>> a = ['a50', 'a51.', 'a+50.4', 'a5.034e1', 'a+50.300']
+ >>> natsorted(a)
+ ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.']
+ >>> natsorted(a, alg=ns.UNSIGNED)
+ ['a50', 'a5.034e1', 'a51.', 'a+50.300', 'a+50.4']
+ >>> natsorted(a, alg=ns.NOEXP)
+ ['a5.034e1', 'a50', 'a+50.300', 'a+50.4', 'a51.']
+
Using a Custom Sorting Key
--------------------------
@@ -129,13 +211,14 @@ need to pass a key to the :meth:`list.sort` method. The function
>>> a.sort(key=natsort_key)
>>> a
['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.']
- >>> versort_key = natsort_keygen(number_type=None)
+ >>> versort_key = natsort_keygen(alg=ns.VERSION)
>>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1']
>>> a.sort(key=versort_key)
>>> a
['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b']
-:func:`~natsort_keygen` has the same API as :func:`~natsorted`.
+:func:`~natsort_keygen` has the same API as :func:`~natsorted` (minus the
+`reverse` option).
Sorting Multiple Lists According to a Single List
-------------------------------------------------
diff --git a/docs/source/humansorted.rst b/docs/source/humansorted.rst
new file mode 100644
index 0000000..35be19b
--- /dev/null
+++ b/docs/source/humansorted.rst
@@ -0,0 +1,8 @@
+.. default-domain:: py
+.. currentmodule:: natsort
+
+:func:`~natsort.humansorted`
+============================
+
+.. autofunction:: humansorted
+
diff --git a/docs/source/index_humansorted.rst b/docs/source/index_humansorted.rst
new file mode 100644
index 0000000..e143b67
--- /dev/null
+++ b/docs/source/index_humansorted.rst
@@ -0,0 +1,8 @@
+.. default-domain:: py
+.. currentmodule:: natsort
+
+:func:`~natsort.index_humansorted`
+==================================
+
+.. autofunction:: index_humansorted
+
diff --git a/docs/source/intro.rst b/docs/source/intro.rst
index 3348356..ace8355 100644
--- a/docs/source/intro.rst
+++ b/docs/source/intro.rst
@@ -59,6 +59,25 @@ Sorting version numbers is just as easy with :func:`~versorted`::
>>> natsorted(a) # natsorted tries to sort as signed floats, so it won't work
['version-2.0', 'version-1.9', 'version-1.11', 'version-1.10']
+You can also perform locale-aware sorting (or "human sorting"), where the
+non-numeric characters are ordered based on their meaning, not on their
+ordinal value; this can be achieved with the ``humansorted`` function::
+
+ >>> a = ['Apple', 'Banana', 'apple', 'banana']
+ >>> natsorted(a)
+ ['Apple', 'Banana', 'apple', 'banana']
+ >>> import locale
+ >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
+ 'en_US.UTF-8'
+ >>> from natsort import humansorted
+ >>> humansorted(a)
+ ['apple', 'Apple', 'banana', 'Banana']
+
+You may find you need to explicitly set the locale to get this to work
+(as shown in the example).
+Please see :ref:`bug_note` and the Installation section
+below before using the ``humansorted`` function.
+
You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types
when you sort::
@@ -71,9 +90,10 @@ when you sort::
The natsort algorithm does other fancy things like
- recursively descend into lists of lists
+ - control the case-sensitivity
- sort file paths correctly
- allow custom sorting keys
- - allow exposed a natsort_key generator to pass to list.sort
+ - exposes a natsort_key generator to pass to list.sort
Please see the :ref:`examples` for a quick start guide, or the :ref:`api`
for more details.
@@ -119,6 +139,13 @@ without the package, but if you need to squeeze out that extra juice it is
recommended you include this as a dependency. ``natsort`` will not require (or
check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed.
+On some systems, Python's ``locale`` library can be buggy (I have found this to be
+the case on Mac OS X), so ``natsort`` will use
+`PyICU <https://pypi.python.org/pypi/PyICU>`_ under the hood if it is installed
+on your computer; this will give more reliable results. ``natsort`` will not
+require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed
+at installation.
+
:mod:`natsort` comes with a shell script called :mod:`natsort`, or can also be called
from the command line with ``python -m natsort``. The command line script is
only installed onto your ``PATH`` if you don't install via a wheel. There is
diff --git a/docs/source/ns_class.rst b/docs/source/ns_class.rst
new file mode 100644
index 0000000..f604e3d
--- /dev/null
+++ b/docs/source/ns_class.rst
@@ -0,0 +1,8 @@
+.. default-domain:: py
+.. currentmodule:: natsort
+
+:class:`~natsort.ns`
+====================
+
+.. autoclass:: ns
+
diff --git a/natsort/__init__.py b/natsort/__init__.py
index ac8171d..02f8d30 100644
--- a/natsort/__init__.py
+++ b/natsort/__init__.py
@@ -2,9 +2,9 @@
from __future__ import (print_function, division,
unicode_literals, absolute_import)
-from .natsort import (natsort_key, natsort_keygen, natsorted,
+from .natsort import (natsort_key, natsort_keygen, natsorted, humansorted,
index_natsorted, versorted, index_versorted,
- order_by_index)
+ index_humansorted, order_by_index, ns)
from ._version import __version__
__all__ = [
@@ -12,7 +12,10 @@ __all__ = [
'natsort_keygen',
'natsorted',
'versorted'
+ 'humansorted',
'index_natsorted',
'index_versorted',
+ 'index_humansorted',
'order_by_index',
+ 'ns',
]
diff --git a/natsort/__main__.py b/natsort/__main__.py
index af8ef63..f55c3fe 100644
--- a/natsort/__main__.py
+++ b/natsort/__main__.py
@@ -4,7 +4,7 @@ from __future__ import (print_function, division,
import sys
-from .natsort import natsorted, regex_and_num_function_chooser
+from .natsort import natsorted, _regex_and_num_function_chooser, ns
from ._version import __version__
from .py23compat import py23_str
@@ -63,6 +63,11 @@ def main():
'would be considered as 1, "e", and 4, not as 10000. This only '
'effects the --number-type=float.')
parser.add_argument(
+ '--locale', '-l', action='store_true', default=False,
+ help='Causes natsort to use locale-aware sorting. On some systems, '
+ 'the underlying C library is broken, so if you get results that '
+ 'you do not expect please install PyICU and try again.')
+ parser.add_argument(
'entries', nargs='*', default=sys.stdin,
help='The entries to sort. Taken from stdin if nothing is given on '
'the command line.', )
@@ -135,23 +140,29 @@ def sort_and_print_entries(entries, args):
"""Sort the entries, applying the filters first if necessary."""
# Extract the proper number type.
- kwargs = {'number_type': {'digit': None,
- 'version': None,
- 'ver': None,
- 'int': int,
- 'float': float}[args.number_type],
- 'signed': args.signed,
- 'exp': args.exp,
- 'as_path': args.paths,
- 'reverse': args.reverse, }
+ num_type = {'digit': None,
+ 'version': None,
+ 'ver': None,
+ 'int': int,
+ 'float': float}[args.number_type]
+ unsigned = not args.signed or num_type is None
+ alg = (ns.INT * int(num_type in (int, None)) |
+ ns.UNSIGNED * unsigned |
+ ns.NOEXP * (not args.exp) |
+ ns.PATH * args.paths |
+ ns.LOCALE * args.locale)
# Pre-remove entries that don't pass the filtering criteria
# Make sure we use the same searching algorithm for filtering
# as for sorting.
do_filter = args.filter is not None or args.reverse_filter is not None
if do_filter or args.exclude:
- inp_options = (kwargs['number_type'], args.signed, args.exp)
- regex, num_function = regex_and_num_function_chooser[inp_options]
+ inp_options = (ns.INT * int(num_type in (int, None)) |
+ ns.UNSIGNED * unsigned |
+ ns.NOEXP * (not args.exp),
+ '.'
+ )
+ regex, num_function = _regex_and_num_function_chooser[inp_options]
if args.filter is not None:
lows, highs = ([f[0] for f in args.filter],
[f[1] for f in args.filter])
@@ -171,7 +182,7 @@ def sort_and_print_entries(entries, args):
num_function, regex)]
# Print off the sorted results
- for entry in natsorted(entries, **kwargs):
+ for entry in natsorted(entries, reverse=args.reverse, alg=alg):
print(entry)
diff --git a/natsort/_version.py b/natsort/_version.py
index d364806..b490955 100644
--- a/natsort/_version.py
+++ b/natsort/_version.py
@@ -2,4 +2,4 @@
from __future__ import (print_function, division,
unicode_literals, absolute_import)
-__version__ = '3.4.1'
+__version__ = '3.5.0'
diff --git a/natsort/locale_help.py b/natsort/locale_help.py
new file mode 100644
index 0000000..748a7cb
--- /dev/null
+++ b/natsort/locale_help.py
@@ -0,0 +1,129 @@
+# -*- coding: utf-8 -*-
+"""\
+This module is intended to help combine some locale functions
+together for natsort consumption. It also accounts for Python2
+and Python3 differences.
+"""
+from __future__ import (print_function, division,
+ unicode_literals, absolute_import)
+
+import sys
+from itertools import chain
+from locale import localeconv
+
+from .py23compat import py23_zip
+
+# We need cmp_to_key for Python2 because strxfrm is broken for unicode.
+if sys.version[:3] == '2.7':
+ from functools import cmp_to_key
+# cmp_to_key was not created till 2.7.
+elif sys.version[:3] == '2.6':
+ def cmp_to_key(mycmp):
+ """Convert a cmp= function into a key= function"""
+ class K(object):
+ __slots__ = ['obj']
+
+ def __init__(self, obj):
+ self.obj = obj
+
+ def __lt__(self, other):
+ return mycmp(self.obj, other.obj) < 0
+
+ def __gt__(self, other):
+ return mycmp(self.obj, other.obj) > 0
+
+ def __eq__(self, other):
+ return mycmp(self.obj, other.obj) == 0
+
+ def __le__(self, other):
+ return mycmp(self.obj, other.obj) <= 0
+
+ def __ge__(self, other):
+ return mycmp(self.obj, other.obj) >= 0
+
+ def __ne__(self, other):
+ return mycmp(self.obj, other.obj) != 0
+
+ def __hash__(self):
+ raise TypeError('hash not implemented')
+
+ return K
+
+# Make the strxfrm function from strcoll on Python2
+# It can be buggy, so prefer PyICU if available.
+try:
+ import PyICU
+ from locale import getlocale
+
+ # If using PyICU, get the locale from the current global locale,
+ # then create a sort key from that
+ def get_pyicu_transform(l, _d={}):
+ if l not in _d:
+ if l == (None, None):
+ c = PyICU.Collator.createInstance(PyICU.Locale())
+ else:
+ loc = '.'.join(l)
+ c = PyICU.Collator.createInstance(PyICU.Locale(loc))
+ _d[l] = c.getSortKey
+ return _d[l]
+ use_pyicu = True
+except ImportError:
+ if sys.version[0] == '2':
+ from locale import strcoll
+ strxfrm = cmp_to_key(strcoll)
+ else:
+ from locale import strxfrm
+ use_pyicu = False
+
+# Convenience functions.
+lowercase = lambda x: x.lower()
+swapcase = lambda x: x.swapcase()
+
+# This little lambda doubles all characters, making letters lowercase.
+groupletters = lambda x: ''.join(chain(*py23_zip(lowercase(x), x)))
+
+
+def grouper(val, func):
+ """\
+ Attempt to convert a string to a number. If the conversion
+ was not possible, run it through the letter grouper
+ to make the sorting work as requested.
+ """
+ # Return the number or transformed string.
+ # If the input is identical to the output, then no conversion happened.
+ s = func(val)
+ return groupletters(s) if val is s else s
+
+
+def locale_convert(val, func, group):
+ """\
+ Attempt to convert a string to a number, first converting
+ the decimal place character if needed. Then, if the conversion
+ was not possible, run it through strxfrm to make the sorting
+ as requested, possibly grouping first.
+ """
+
+ # Format the number so that the conversion function can interpret it.
+ radix = localeconv()['decimal_point']
+ s = val.replace(radix, '.') if radix != '.' else val
+
+ # Perform the conversion
+ t = func(s)
+
+ # Return the number or transformed string.
+ # If the input is identical to the output, then no conversion happened.
+ # In this case, we don't want to return the function output because it
+ # may have had characters modified from the above 'replace' call,
+ # so we return the input.
+ if group:
+ if use_pyicu:
+ xfrm = get_pyicu_transform(getlocale())
+ return xfrm(groupletters(val)) if s is t else t
+ else:
+ return strxfrm(groupletters(val)) if s is t else t
+ else:
+ if use_pyicu:
+ xfrm = get_pyicu_transform(getlocale())
+ return xfrm(val) if s is t else t
+ else:
+ return strxfrm(val) if s is t else t
diff --git a/natsort/natsort.py b/natsort/natsort.py
index 5974199..82b84df 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -22,6 +22,7 @@ from operator import itemgetter
from functools import partial
from itertools import islice
from warnings import warn
+from locale import localeconv
# If the user has fastnumbers installed, they will get great speed
# benefits. If not, we simulate the functions here.
@@ -29,53 +30,230 @@ try:
from fastnumbers import fast_float, fast_int, isreal
except ImportError:
from .fake_fastnumbers import fast_float, fast_int, isreal
-
+from .locale_help import locale_convert, grouper, lowercase, swapcase
from .py23compat import u_format, py23_str, py23_zip
# Make sure the doctest works for either python2 or python3
__doc__ = u_format(__doc__)
+
+class ns(object):
+ """
+ Enum to control the `natsort` algorithm.
+
+ This class acts like an enum to control the `natsort` algorithm. The
+ user may select several options simultaneously by or'ing the options
+ together. For example, to choose ``ns.INT``, `ns.PATH``, and
+ ``ns.LOCALE``, you could do ``ns.INT | ns.LOCALE | ns.PATH``.
+
+ Each option has a shortened 1- or 2-letter form.
+
+ .. warning:: On some systems, the underlying C library that
+ Python's locale module uses is broken. On these
+ systems it is recommended that you install
+ `PyICU <https://pypi.python.org/pypi/PyICU>`_
+ if you wish to use `LOCALE`.
+ Please validate that `LOCALE` works as
+ expected on your target system, and if not you
+ should add
+ `PyICU <https://pypi.python.org/pypi/PyICU>`_
+ as a dependency.
+
+ Attributes
+ ----------
+ FLOAT, F
+ The default - parse numbers as floats.
+ INT, I
+ Tell `natsort` to parse numbers as ints.
+ UNSIGNED, U
+ Tell `natsort` to ignore any sign (i.e. "-" or "+") to the
+ immediate left of a number. It is the same as setting the old
+ `signed` option to `False`.
+ VERSION, V
+ This is a shortcut for ``ns.INT | ns.UNSIGNED``, which is useful
+ when attempting to sort version numbers. It is the same as
+ setting the old `number_type` option to `None`.
+ DIGIT, D
+ Same as `VERSION` above.
+ NOEXP, N
+ Tell `natsort` to not search for exponents as part of the number.
+ For example, with `NOEXP` the number "5.6E5" would be interpreted
+ as `5.6`, `"E"`, and `5`. It is the same as setting the old `exp`
+ option to `False`.
+ PATH, P
+ Tell `natsort` to interpret strings as filesystem paths, so they
+ will be split according to the filesystem separator
+ (i.e. ‘/’ on UNIX, ‘\’ on Windows), as well as splitting on the
+ file extension, if any. Without this, lists of file paths like
+ ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted
+ properly; 'Folder/' will be placed at the end, not at the front.
+ It is the same as setting the old `as_path` option to `True`.
+ LOCALE, L
+ Tell `natsort` to be locale-aware when sorting strings (everything
+ that was not converted to a number). Your sorting results will vary
+ depending on your current locale. Generally, the `GROUPLETTERS`
+ option is needed with `LOCALE` because the `locale` library
+ groups the letters in the same manner (although you may still
+ need `GROUPLETTERS` if there are numbers in your strings).
+ IGNORECASE, IC
+ Tell `natsort` to ignore case when sorting. For example,
+ ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as
+ ``['apple', 'Apple', 'Banana', 'banana']``.
+ LOWERCASEFIRST, LF
+ Tell `natsort` to put lowercase letters before uppercase letters
+ when sorting. For example,
+ ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as
+ ``['apple', 'banana', 'Apple', 'Banana']`` (the default order
+ would be ``['Apple', 'Banana', 'apple', 'banana']`` which is
+ the order from a purely ordinal sort).
+ Useless when used with `IGNORECASE`.
+ GROUPLETTERS, G
+ Tell `natsort` to group lowercase and uppercase letters together
+ when sorting. For example,
+ ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as
+ ``['Apple', 'apple', 'Banana', 'banana']``.
+ Useless when used with `IGNORECASE`; use with `LOWERCASEFIRST`
+ to reverse the order of upper and lower case.
+ TYPESAFE, T
+ Try hard to avoid "unorderable types" error on Python 3. It
+ is the same as setting the old `py3_safe` option to `True`.
+
+ Notes
+ -----
+ If using `LOCALE`, you may find that if you do not explicitly set
+ the locale your results may not be as you expect... I have found that
+ it depends on the system you are on. To do this is straightforward
+ (in the below example I use 'en_US.UTF-8', but you should use your
+ locale)::
+
+ >>> import locale
+ >>> # The 'str' call is only to get around a bug on Python 2.x
+ >>> # where 'setlocale' does not expect unicode strings (ironic,
+ >>> # right?)
+ >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8'))
+ 'en_US.UTF-8'
+
+ It is preferred that you do this before importing `natsort`.
+ If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning
+ above) then you should not need to do this.
+
+ """
+ pass
+
+
+# Sort algorithm "enum" values.
+_nsdict = {'FLOAT': 0, 'F': 0,
+ 'INT': 1, 'I': 1,
+ 'UNSIGNED': 2, 'U': 2,
+ 'VERSION': 3, 'V': 3, # Shortcut for INT | UNSIGNED
+ 'DIGIT': 3, 'D': 3, # Shortcut for INT | UNSIGNED
+ 'NOEXP': 4, 'N': 4,
+ 'PATH': 8, 'P': 8,
+ 'LOCALE': 16, 'L': 16,
+ 'IGNORECASE': 32, 'IC': 32,
+ 'LOWERCASEFIRST': 64, 'LF': 64,
+ 'GROUPLETTERS': 128, 'G': 128,
+ 'TYPESAFE': 1024, 'T': 1024,
+ }
+# Populate the ns class with the _nsdict values.
+for x, y in _nsdict.items():
+ setattr(ns, x, y)
+
+# Group algorithm types for easy extraction
+_NUMBER_ALGORITHMS = ns.FLOAT | ns.INT | ns.UNSIGNED | ns.NOEXP
+_CASE_ALGORITHMS = ns.IGNORECASE | ns.LOWERCASEFIRST | ns.GROUPLETTERS
+_ALL_BUT_PATH = (ns.F | ns.I | ns.U | ns.N | ns.L |
+ ns.IC | ns.LF | ns.G | ns.TYPESAFE)
+
# The regex that locates floats
-float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)')
-float_nosign_exp_re = re.compile(r'(\d*\.?\d+(?:[eE][-+]?\d+)?)')
-float_sign_noexp_re = re.compile(r'([-+]?\d*\.?\d+)')
-float_nosign_noexp_re = re.compile(r'(\d*\.?\d+)')
+_float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U)
+_float_nosign_exp_re = re.compile(r'(\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U)
+_float_sign_noexp_re = re.compile(r'([-+]?\d*\.?\d+)', re.U)
+_float_nosign_noexp_re = re.compile(r'(\d*\.?\d+)', re.U)
+_float_sign_exp_re_c = re.compile(r'([-+]?\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U)
+_float_nosign_exp_re_c = re.compile(r'(\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U)
+_float_sign_noexp_re_c = re.compile(r'([-+]?\d*[.,]?\d+)', re.U)
+_float_nosign_noexp_re_c = re.compile(r'(\d*[.,]?\d+)', re.U)
+
# Integer regexes
-int_nosign_re = re.compile(r'(\d+)')
-int_sign_re = re.compile(r'([-+]?\d+)')
+_int_nosign_re = re.compile(r'(\d+)', re.U)
+_int_sign_re = re.compile(r'([-+]?\d+)', re.U)
+
# This dict will help select the correct regex and number conversion function.
-regex_and_num_function_chooser = {
- (float, True, True): (float_sign_exp_re, fast_float),
- (float, True, False): (float_sign_noexp_re, fast_float),
- (float, False, True): (float_nosign_exp_re, fast_float),
- (float, False, False): (float_nosign_noexp_re, fast_float),
- (int, True, True): (int_sign_re, fast_int),
- (int, True, False): (int_sign_re, fast_int),
- (int, False, True): (int_nosign_re, fast_int),
- (int, False, False): (int_nosign_re, fast_int),
- (None, True, True): (int_nosign_re, fast_int),
- (None, True, False): (int_nosign_re, fast_int),
- (None, False, True): (int_nosign_re, fast_int),
- (None, False, False): (int_nosign_re, fast_int),
+_regex_and_num_function_chooser = {
+ (ns.F, '.'): (_float_sign_exp_re, fast_float),
+ (ns.F | ns.N, '.'): (_float_sign_noexp_re, fast_float),
+ (ns.F | ns.U, '.'): (_float_nosign_exp_re, fast_float),
+ (ns.F | ns.U | ns.N, '.'): (_float_nosign_noexp_re, fast_float),
+ (ns.I, '.'): (_int_sign_re, fast_int),
+ (ns.I | ns.N, '.'): (_int_sign_re, fast_int),
+ (ns.I | ns.U, '.'): (_int_nosign_re, fast_int),
+ (ns.I | ns.U | ns.N, '.'): (_int_nosign_re, fast_int),
+ (ns.F, ','): (_float_sign_exp_re_c, fast_float),
+ (ns.F | ns.N, ','): (_float_sign_noexp_re_c, fast_float),
+ (ns.F | ns.U, ','): (_float_nosign_exp_re_c, fast_float),
+ (ns.F | ns.U | ns.N, ','): (_float_nosign_noexp_re_c, fast_float),
+ (ns.I, ','): (_int_sign_re, fast_int),
+ (ns.I | ns.N, ','): (_int_sign_re, fast_int),
+ (ns.I | ns.U, ','): (_int_nosign_re, fast_int),
+ (ns.I | ns.U | ns.N, ','): (_int_nosign_re, fast_int),
}
-def _number_finder(s, regex, numconv, py3_safe):
- """Helper to split numbers"""
-
- # Split the input string by numbers. If there are no splits, return now.
+def _args_to_enum(number_type, signed, exp, as_path, py3_safe):
+ """A function to convert input booleans to an enum-type argument."""
+ alg = 0
+ if number_type is not float:
+ msg = "The 'number_type' argument is depreciated as of 3.5.0, "
+ msg += "please use 'alg=ns.FLOAT', 'alg=ns.INT', or 'alg=ns.VERSION'"
+ warn(msg, DeprecationWarning)
+ alg |= (_nsdict['INT'] * bool(number_type in (int, None)))
+ alg |= (_nsdict['UNSIGNED'] * (number_type is None))
+ if signed is not None:
+ msg = "The 'signed' argument is depreciated as of 3.5.0, "
+ msg += "please use 'alg=ns.UNSIGNED'."
+ warn(msg, DeprecationWarning)
+ alg |= (_nsdict['UNSIGNED'] * (not signed))
+ if exp is not None:
+ msg = "The 'exp' argument is depreciated as of 3.5.0, "
+ msg += "please use 'alg=ns.NOEXP'."
+ warn(msg, DeprecationWarning)
+ alg |= (_nsdict['NOEXP'] * (not exp))
+ if as_path is not None:
+ msg = "The 'as_path' argument is depreciated as of 3.5.0, "
+ msg += "please use 'alg=ns.PATH'."
+ warn(msg, DeprecationWarning)
+ alg |= (_nsdict['PATH'] * as_path)
+ if py3_safe is not None:
+ msg = "The 'py3_safe' argument is depreciated as of 3.5.0, "
+ msg += "please use 'alg=ns.TYPESAFE'."
+ warn(msg, DeprecationWarning)
+ alg |= (_nsdict['TYPESAFE'] * py3_safe)
+ return alg
+
+
+def _input_parser(s, regex, numconv, py3_safe, use_locale, group_letters):
+ """Helper to parse the string input into numbers and strings."""
+
+ # Split the input string by numbers.
# If the input is not a string, TypeError is raised.
s = regex.split(s)
- if len(s) == 1:
- return tuple(s)
# Now convert the numbers to numbers, and leave strings as strings.
+ # Take into account locale if needed, and group letters if needed.
# Remove empty strings from the list.
- s = [numconv(x) for x in s if x]
+ if use_locale:
+ s = [locale_convert(x, numconv, group_letters) for x in s if x]
+ elif group_letters:
+ s = [grouper(x, numconv) for x in s if x]
+ else:
+ s = [numconv(x) for x in s if x]
# If the list begins with a number, lead with an empty string.
# This is used to get around the "unorderable types" issue.
- if isreal(s[0]):
+ if not s: # Return empty tuple for empty results.
+ return ()
+ elif isreal(s[0]):
s = [''] + s
# The _py3_safe function inserts "" between numbers in the list,
@@ -145,8 +323,7 @@ def _py3_safe(parsed_list):
return new_list
-def _natsort_key(val, key=None, number_type=float, signed=True, exp=True,
- as_path=False, py3_safe=False):
+def _natsort_key(val, key, alg):
"""\
Key to sort strings and numbers naturally.
@@ -157,12 +334,8 @@ def _natsort_key(val, key=None, number_type=float, signed=True, exp=True,
Parameters
----------
val : {str, unicode}
- key : callable, optional
- number_type : {None, float, int}, optional
- signed : {True, False}, optional
- exp : {True, False}, optional
- as_path : {True, False}, optional
- py3_safe : {True, False}, optional
+ key : callable
+ alg : ns enum
Returns
-------
@@ -172,20 +345,24 @@ def _natsort_key(val, key=None, number_type=float, signed=True, exp=True,
"""
# Convert the arguments to the proper input tuple
- inp_options = (number_type, signed, exp)
try:
- regex, num_function = regex_and_num_function_chooser[inp_options]
- except KeyError:
- # Report errors properly
- if number_type not in (float, int) and number_type is not None:
- raise ValueError("_natsort_key: 'number_type' parameter "
- "'{0}' invalid".format(py23_str(number_type)))
- elif signed not in (True, False):
- raise ValueError("_natsort_key: 'signed' parameter "
- "'{0}' invalid".format(py23_str(signed)))
- elif exp not in (True, False):
- raise ValueError("_natsort_key: 'exp' parameter "
- "'{0}' invalid".format(py23_str(exp)))
+ use_locale = alg & _nsdict['LOCALE']
+ inp_options = (alg & _NUMBER_ALGORITHMS,
+ localeconv()['decimal_point'] if use_locale else '.')
+ except TypeError:
+ msg = "_natsort_key: 'alg' argument must be from the enum 'ns'"
+ raise ValueError(msg+', got {0}'.format(py23_str(alg)))
+
+ # Get the proper regex and conversion function.
+ try:
+ regex, num_function = _regex_and_num_function_chooser[inp_options]
+ except KeyError: # pragma: no cover
+ if inp_options[1] not in ('.', ','): # pragma: no cover
+ raise ValueError("_natsort_key: currently natsort only supports "
+ "the decimal separators '.' and ','. "
+ "Please file a bug report.")
+ else:
+ raise
else:
# Apply key if needed.
if key is not None:
@@ -194,41 +371,47 @@ def _natsort_key(val, key=None, number_type=float, signed=True, exp=True,
# If this is a path, convert it.
# An AttrubuteError is raised if not a string.
split_as_path = False
- if as_path:
+ if alg & _nsdict['PATH']:
try:
val = _path_splitter(val)
except AttributeError:
pass
else:
# Record that this string was split as a path so that
- # we can set as_path to False in the recursive call.
+ # we don't set PATH in the recursive call.
split_as_path = True
# Assume the input are strings, which is the most common case.
+ # Apply the string modification if needed.
try:
- return tuple(_number_finder(val, regex, num_function, py3_safe))
+ if alg & _nsdict['LOWERCASEFIRST']:
+ val = swapcase(val)
+ if alg & _nsdict['IGNORECASE']:
+ val = lowercase(val)
+ return tuple(_input_parser(val,
+ regex,
+ num_function,
+ alg & _nsdict['TYPESAFE'],
+ use_locale,
+ alg & _nsdict['GROUPLETTERS']))
except TypeError:
# If not strings, assume it is an iterable that must
# be parsed recursively. Do not apply the key recursively.
- # If this string was split as a path, set as_path to False.
+ # If this string was split as a path, turn off 'PATH'.
try:
- return tuple([_natsort_key(x, None, number_type, signed,
- exp, as_path and not split_as_path,
- py3_safe) for x in val])
+ was_path = alg & _nsdict['PATH']
+ newalg = alg & _ALL_BUT_PATH
+ newalg |= (was_path * (not split_as_path))
+ return tuple([_natsort_key(x, None, newalg) for x in val])
# If there is still an error, it must be a number.
# Return as-is, with a leading empty string.
- # Waiting for two raised errors instead of calling
- # isinstance at the opening of the function is slower
- # for numbers but much faster for strings, and since
- # numbers are not a common input to natsort this is
- # an acceptable sacrifice.
except TypeError:
- return (('', val,),) if as_path else ('', val,)
+ return (('', val,),) if alg & _nsdict['PATH'] else ('', val,)
@u_format
-def natsort_key(val, key=None, number_type=float, signed=True, exp=True,
- as_path=False, py3_safe=False):
+def natsort_key(val, key=None, number_type=float, signed=None, exp=None,
+ as_path=None, py3_safe=None, alg=0):
"""\
Key to sort strings and numbers naturally.
@@ -257,39 +440,39 @@ def natsort_key(val, key=None, number_type=float, signed=True, exp=True,
It should accept a single argument and return a single value.
number_type : {{None, float, int}}, optional
- The types of number to sort on: `float` searches for floating
- point numbers, `int` searches for integers, and `None` searches
- for digits (like integers but does not take into account
- negative sign). `None` is a shortcut for `number_type = int`
- and `signed = False`.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
signed : {{True, False}}, optional
- By default a '+' or '-' before a number is taken to be the sign
- of the number. If `signed` is `False`, any '+' or '-' will not
- be considered to be part of the number, but as part part of the
- string.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
exp : {{True, False}}, optional
- This option only applies to `number_type = float`. If
- `exp = True`, a string like "3.5e5" will be interpreted as
- 350000, i.e. the exponential part is considered to be part of
- the number. If `exp = False`, "3.5e5" is interpreted as
- ``(3.5, "e", 5)``. The default behavior is `exp = True`.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
as_path : {{True, False}}, optional
- This option will force strings to be interpreted as filesystem
- paths, so they will be split according to the filesystem separator
- (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the
- file extension, if any. Without this, lists of file paths like
- ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted
- properly; ``'Folder'`` will be placed at the end, not at the front.
- The default behavior is `as_path = False`.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
py3_safe : {{True, False}}, optional
- This will make the string parsing algorithm be more careful by
- placing an empty string between two adjacent numbers after the
- parsing algorithm. This will prevent the "unorderable types"
- error.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
+
+ alg : ns enum, optional
+ This option is used to control which algorithm `natsort`
+ uses when sorting. For details into these options, please see
+ the :class:`ns` class documentation. The default is `ns.FLOAT`.
Returns
-------
@@ -348,12 +531,13 @@ def natsort_key(val, key=None, number_type=float, signed=True, exp=True,
"""
msg = "natsort_key is depreciated as of 3.4.0, please use natsort_keygen"
warn(msg, DeprecationWarning)
- return _natsort_key(val, key, number_type, signed, exp, as_path, py3_safe)
+ alg = _args_to_enum(number_type, signed, exp, as_path, py3_safe) | alg
+ return _natsort_key(val, key, alg)
@u_format
-def natsort_keygen(key=None, number_type=float, signed=True, exp=True,
- as_path=False, py3_safe=False):
+def natsort_keygen(key=None, number_type=float, signed=None, exp=None,
+ as_path=None, py3_safe=None, alg=0):
"""\
Generate a key to sort strings and numbers naturally.
@@ -373,39 +557,39 @@ def natsort_keygen(key=None, number_type=float, signed=True, exp=True,
It should accept a single argument and return a single value.
number_type : {{None, float, int}}, optional
- The types of number to sort on: `float` searches for floating
- point numbers, `int` searches for integers, and `None` searches
- for digits (like integers but does not take into account
- negative sign). `None` is a shortcut for `number_type = int`
- and `signed = False`.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
signed : {{True, False}}, optional
- By default a '+' or '-' before a number is taken to be the sign
- of the number. If `signed` is `False`, any '+' or '-' will not
- be considered to be part of the number, but as part part of the
- string.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
exp : {{True, False}}, optional
- This option only applies to `number_type = float`. If
- `exp = True`, a string like "3.5e5" will be interpreted as
- 350000, i.e. the exponential part is considered to be part of
- the number. If `exp = False`, "3.5e5" is interpreted as
- ``(3.5, "e", 5)``. The default behavior is `exp = True`.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
as_path : {{True, False}}, optional
- This option will force strings to be interpreted as filesystem
- paths, so they will be split according to the filesystem separator
- (i.e. `/` on UNIX, `\\\\` on Windows), as well as splitting on the
- file extension, if any. Without this, lists with file paths like
- ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted
- properly; ``'Folder'`` will be placed at the end, not at the front.
- The default behavior is `as_path = False`.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
py3_safe : {{True, False}}, optional
- This will make the string parsing algorithm be more careful by
- placing an empty string between two adjacent numbers after the
- parsing algorithm. This will prevent the "unorderable types"
- error.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
+
+ alg : ns enum, optional
+ This option is used to control which algorithm `natsort`
+ uses when sorting. For details into these options, please see
+ the :class:`ns` class documentation. The default is `ns.FLOAT`.
Returns
-------
@@ -440,18 +624,13 @@ def natsort_keygen(key=None, number_type=float, signed=True, exp=True,
True
"""
- return partial(_natsort_key,
- key=key,
- number_type=number_type,
- signed=signed,
- exp=exp,
- as_path=as_path,
- py3_safe=py3_safe)
+ alg = _args_to_enum(number_type, signed, exp, as_path, py3_safe) | alg
+ return partial(_natsort_key, key=key, alg=alg)
@u_format
-def natsorted(seq, key=None, number_type=float, signed=True, exp=True,
- reverse=False, as_path=False):
+def natsorted(seq, key=None, number_type=float, signed=None, exp=None,
+ reverse=False, as_path=None, alg=0):
"""\
Sorts a sequence naturally.
@@ -470,37 +649,37 @@ def natsorted(seq, key=None, number_type=float, signed=True, exp=True,
It should accept a single argument and return a single value.
number_type : {{None, float, int}}, optional
- The types of number to sort on: `float` searches for floating
- point numbers, `int` searches for integers, and `None` searches
- for digits (like integers but does not take into account
- negative sign). `None` is a shortcut for `number_type = int`
- and `signed = False`.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
signed : {{True, False}}, optional
- By default a '+' or '-' before a number is taken to be the sign
- of the number. If `signed` is `False`, any '+' or '-' will not
- be considered to be part of the number, but as part part of the
- string.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
exp : {{True, False}}, optional
- This option only applies to `number_type = float`. If
- `exp = True`, a string like "3.5e5" will be interpreted as
- 350000, i.e. the exponential part is considered to be part of
- the number. If `exp = False`, "3.5e5" is interpreted as
- ``(3.5, "e", 5)``. The default behavior is `exp = True`.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
reverse : {{True, False}}, optional
Return the list in reversed sorted order. The default is
`False`.
as_path : {{True, False}}, optional
- This option will force strings to be interpreted as filesystem
- paths, so they will be split according to the filesystem separator
- (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the
- file extension, if any. Without this, lists of file paths like
- ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted
- properly; ``'Folder'`` will be placed at the end, not at the front.
- The default behavior is `as_path = False`.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
+
+ alg : ns enum, optional
+ This option is used to control which algorithm `natsort`
+ uses when sorting. For details into these options, please see
+ the :class:`ns` class documentation. The default is `ns.FLOAT`.
Returns
-------
@@ -522,30 +701,29 @@ def natsorted(seq, key=None, number_type=float, signed=True, exp=True,
[{u}'num2', {u}'num3', {u}'num5']
"""
+ alg = _args_to_enum(number_type, signed, exp, as_path, None) | alg
try:
return sorted(seq, reverse=reverse,
- key=natsort_keygen(key, number_type,
- signed, exp, as_path))
- except TypeError as e:
+ key=natsort_keygen(key, alg=alg))
+ except TypeError as e: # pragma: no cover
# In the event of an unresolved "unorderable types" error
# attempt to sort again, being careful to prevent this error.
if 'unorderable types' in str(e):
return sorted(seq, reverse=reverse,
- key=natsort_keygen(key, number_type,
- signed, exp, as_path,
- True))
+ key=natsort_keygen(key,
+ alg=alg | _nsdict['TYPESAFE']))
else:
# Re-raise if the problem was not "unorderable types"
raise
@u_format
-def versorted(seq, key=None, reverse=False, as_path=False):
+def versorted(seq, key=None, reverse=False, as_path=None, alg=0):
"""\
Convenience function to sort version numbers.
Convenience function to sort version numbers. This is a wrapper
- around ``natsorted(seq, number_type=None)``.
+ around ``natsorted(seq, alg=ns.VERSION)``.
Parameters
----------
@@ -562,13 +740,15 @@ def versorted(seq, key=None, reverse=False, as_path=False):
`False`.
as_path : {{True, False}}, optional
- This option will force strings to be interpreted as filesystem
- paths, so they will be split according to the filesystem separator
- (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the
- file extension, if any. Without this, lists of file paths like
- ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted
- properly; ``'Folder'`` will be placed at the end, not at the front.
- The default behavior is `as_path = False`.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
+
+ alg : ns enum, optional
+ This option is used to control which algorithm `natsort`
+ uses when sorting. For details into these options, please see
+ the :class:`ns` class documentation. The default is `ns.FLOAT`.
Returns
-------
@@ -588,12 +768,93 @@ def versorted(seq, key=None, reverse=False, as_path=False):
[{u}'num3.4.1', {u}'num3.4.2', {u}'num4.0.2']
"""
- return natsorted(seq, key, None, reverse=reverse, as_path=as_path)
+ alg = _args_to_enum(float, None, None, as_path, None) | alg
+ return natsorted(seq, key, reverse=reverse, alg=alg | ns.VERSION)
+
+
+@u_format
+def humansorted(seq, key=None, reverse=False, alg=0):
+ """\
+ Convenience function to properly sort non-numeric characters.
+
+ Convenience function to properly sort non-numeric characters
+ in a locale-aware fashion (a.k.a "human sorting"). This is a
+ wrapper around ``natsorted(seq, alg=ns.LOCALE)``.
+
+ .. warning:: On some systems, the underlying C library that
+ Python's locale module uses is broken. On these
+ systems it is recommended that you install
+ `PyICU <https://pypi.python.org/pypi/PyICU>`_.
+ Please validate that this function works as
+ expected on your target system, and if not you
+ should add
+ `PyICU <https://pypi.python.org/pypi/PyICU>`_
+ as a dependency.
+
+ Parameters
+ ----------
+ seq : iterable
+ The sequence to sort.
+
+ key : callable, optional
+ A key used to determine how to sort each element of the sequence.
+ It is **not** applied recursively.
+ It should accept a single argument and return a single value.
+
+ reverse : {{True, False}}, optional
+ Return the list in reversed sorted order. The default is
+ `False`.
+
+ alg : ns enum, optional
+ This option is used to control which algorithm `natsort`
+ uses when sorting. For details into these options, please see
+ the :class:`ns` class documentation. The default is `ns.FLOAT`.
+
+ Returns
+ -------
+ out : list
+ The sorted sequence.
+
+ See Also
+ --------
+ index_humansorted : Returns the sorted indexes from `humansorted`.
+
+ Notes
+ -----
+ You may find that if you do not explicitly set
+ the locale your results may not be as you expect... I have found that
+ it depends on the system you are on. To do this is straightforward
+ (in the below example I use 'en_US.UTF-8', but you should use your
+ locale)::
+
+ >>> import locale
+ >>> # The 'str' call is only to get around a bug on Python 2.x
+ >>> # where 'setlocale' does not expect unicode strings (ironic,
+ >>> # right?)
+ >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8'))
+ 'en_US.UTF-8'
+
+ It is preferred that you do this before importing `natsort`.
+ If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning
+ above) then you should not need to do this.
+
+ Examples
+ --------
+ Use `humansorted` just like the builtin `sorted`::
+
+ >>> a = ['Apple', 'Banana', 'apple', 'banana']
+ >>> natsorted(a)
+ [{u}'Apple', {u}'Banana', {u}'apple', {u}'banana']
+ >>> humansorted(a)
+ [{u}'apple', {u}'Apple', {u}'banana', {u}'Banana']
+
+ """
+ return natsorted(seq, key, reverse=reverse, alg=alg | ns.LOCALE)
@u_format
-def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True,
- reverse=False, as_path=False):
+def index_natsorted(seq, key=None, number_type=float, signed=None, exp=None,
+ reverse=False, as_path=None, alg=0):
"""\
Return the list of the indexes used to sort the input sequence.
@@ -613,37 +874,37 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True,
It should accept a single argument and return a single value.
number_type : {{None, float, int}}, optional
- The types of number to sort on: `float` searches for floating
- point numbers, `int` searches for integers, and `None` searches
- for digits (like integers but does not take into account
- negative sign). `None` is a shortcut for `number_type = int`
- and `signed = False`.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
signed : {{True, False}}, optional
- By default a '+' or '-' before a number is taken to be the sign
- of the number. If `signed` is `False`, any '+' or '-' will not
- be considered to be part of the number, but as part part of the
- string.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
exp : {{True, False}}, optional
- This option only applies to `number_type = float`. If
- `exp = True`, a string like "3.5e5" will be interpreted as
- 350000, i.e. the exponential part is considered to be part of
- the number. If `exp = False`, "3.5e5" is interpreted as
- ``(3.5, "e", 5)``. The default behavior is `exp = True`.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
reverse : {{True, False}}, optional
Return the list in reversed sorted order. The default is
`False`.
as_path : {{True, False}}, optional
- This option will force strings to be interpreted as filesystem
- paths, so they will be split according to the filesystem separator
- (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the
- file extension, if any. Without this, lists of file paths like
- ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted
- properly; ``'Folder'`` will be placed at the end, not at the front.
- The default behavior is `as_path = False`.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
+
+ alg : ns enum, optional
+ This option is used to control which algorithm `natsort`
+ uses when sorting. For details into these options, please see
+ the :class:`ns` class documentation. The default is `ns.FLOAT`.
Returns
-------
@@ -673,6 +934,7 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True,
[{u}'baz', {u}'foo', {u}'bar']
"""
+ alg = _args_to_enum(number_type, signed, exp, as_path, None) | alg
if key is None:
newkey = itemgetter(1)
else:
@@ -681,16 +943,14 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True,
index_seq_pair = [[x, y] for x, y in enumerate(seq)]
try:
index_seq_pair.sort(reverse=reverse,
- key=natsort_keygen(newkey, number_type,
- signed, exp, as_path))
- except TypeError as e:
+ key=natsort_keygen(newkey, alg=alg))
+ except TypeError as e: # pragma: no cover
# In the event of an unresolved "unorderable types" error
# attempt to sort again, being careful to prevent this error.
if 'unorderable types' in str(e):
index_seq_pair.sort(reverse=reverse,
- key=natsort_keygen(newkey, number_type,
- signed, exp, as_path,
- True))
+ key=natsort_keygen(newkey,
+ alg=alg | ns.TYPESAFE))
else:
# Re-raise if the problem was not "unorderable types"
raise
@@ -698,12 +958,12 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True,
@u_format
-def index_versorted(seq, key=None, reverse=False, as_path=False):
+def index_versorted(seq, key=None, reverse=False, as_path=None, alg=0):
"""\
Return the list of the indexes used to sort the input sequence
of version numbers.
- Sorts a sequence naturally, but returns a list of sorted the
+ Sorts a sequence of version, but returns a list of sorted the
indexes and not the sorted list. This list of indexes can be
used to sort multiple lists by the sorted order of the given
sequence.
@@ -725,13 +985,15 @@ def index_versorted(seq, key=None, reverse=False, as_path=False):
`False`.
as_path : {{True, False}}, optional
- This option will force strings to be interpreted as filesystem
- paths, so they will be split according to the filesystem separator
- (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the
- file extension, if any. Without this, lists of file paths like
- ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted
- properly; ``'Folder'`` will be placed at the end, not at the front.
- The default behavior is `as_path = False`.
+ Depreciated as of version 3.5.0 and will become an undocumented
+ keyword-only argument in 4.0.0. Please use the `alg` argument
+ for all future development. See :class:`ns` class documentation for
+ details.
+
+ alg : ns enum, optional
+ This option is used to control which algorithm `natsort`
+ uses when sorting. For details into these options, please see
+ the :class:`ns` class documentation. The default is `ns.FLOAT`.
Returns
-------
@@ -752,7 +1014,81 @@ def index_versorted(seq, key=None, reverse=False, as_path=False):
[1, 2, 0]
"""
- return index_natsorted(seq, key, None, reverse=reverse, as_path=as_path)
+ alg = _args_to_enum(float, None, None, as_path, None) | alg
+ return index_natsorted(seq, key, reverse=reverse, alg=alg | ns.VERSION)
+
+
+@u_format
+def index_humansorted(seq, key=None, reverse=False, alg=0):
+ """\
+ Return the list of the indexes used to sort the input sequence
+ in a locale-aware manner.
+
+ Sorts a sequence in a locale-aware manner, but returns a list
+ of sorted the indexes and not the sorted list. This list of
+ indexes can be used to sort multiple lists by the sorted order
+ of the given sequence.
+
+ This is a wrapper around ``index_natsorted(seq, alg=ns.LOCALE)``.
+
+ Parameters
+ ----------
+ seq: iterable
+ The sequence to sort.
+
+ key: callable, optional
+ A key used to determine how to sort each element of the sequence.
+ It is **not** applied recursively.
+ It should accept a single argument and return a single value.
+
+ reverse : {{True, False}}, optional
+ Return the list in reversed sorted order. The default is
+ `False`.
+
+ alg : ns enum, optional
+ This option is used to control which algorithm `natsort`
+ uses when sorting. For details into these options, please see
+ the :class:`ns` class documentation. The default is `ns.FLOAT`.
+
+ Returns
+ -------
+ out : tuple
+ The ordered indexes of the sequence.
+
+ See Also
+ --------
+ humansorted
+ order_by_index
+
+ Notes
+ -----
+ You may find that if you do not explicitly set
+ the locale your results may not be as you expect... I have found that
+ it depends on the system you are on. To do this is straightforward
+ (in the below example I use 'en_US.UTF-8', but you should use your
+ locale)::
+
+ >>> import locale
+ >>> # The 'str' call is only to get around a bug on Python 2.x
+ >>> # where 'setlocale' does not expect unicode strings (ironic,
+ >>> # right?)
+ >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8'))
+ 'en_US.UTF-8'
+
+ It is preferred that you do this before importing `natsort`.
+ If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning
+ above) then you should not need to do this.
+
+ Examples
+ --------
+ Use `index_humansorted` just like the builtin `sorted`::
+
+ >>> a = ['Apple', 'Banana', 'apple', 'banana']
+ >>> index_humansorted(a)
+ [2, 0, 3, 1]
+
+ """
+ return index_natsorted(seq, key, reverse=reverse, alg=alg | ns.LOCALE)
@u_format
diff --git a/setup.cfg b/setup.cfg
index a5b0345..fe8cd93 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -9,6 +9,8 @@ flakes-ignore =
natsort/py23compat.py UndefinedName
natsort/__init__.py UnusedImport
docs/source/conf.py ALL
+ test_natsort/test_natsort.py UnusedImport RedefinedWhileUnused
+ test_natsort/test_locale_help.py UnusedImport RedefinedWhileUnused
pep8ignore =
test_natsort/test_natsort.py E501 E241 E221
diff --git a/setup.py b/setup.py
index 10e52b5..cf42e6c 100644
--- a/setup.py
+++ b/setup.py
@@ -67,7 +67,8 @@ setup(
install_requires=REQUIRES,
packages=['natsort'],
entry_points={'console_scripts': ['natsort = natsort.__main__:main']},
- tests_require=['pytest', 'pytest-pep8', 'pytest-flakes', 'pytest-cov'],
+ tests_require=['pytest', 'pytest-pep8',
+ 'pytest-flakes', 'pytest-cov'],
cmdclass={'test': PyTest},
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
diff --git a/test_natsort/test_locale_help.py b/test_natsort/test_locale_help.py
new file mode 100644
index 0000000..c654fdd
--- /dev/null
+++ b/test_natsort/test_locale_help.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+"""\
+Test the locale help module module.
+"""
+import locale
+from natsort.fake_fastnumbers import fast_float
+from natsort.locale_help import grouper, locale_convert, use_pyicu
+
+if use_pyicu:
+ from natsort.locale_help import get_pyicu_transform
+ from locale import getlocale
+else:
+ from natsort.locale_help import strxfrm
+
+
+def test_grouper():
+ assert grouper('HELLO', fast_float) == 'hHeElLlLoO'
+ assert grouper('hello', fast_float) == 'hheelllloo'
+ assert grouper('45.8e-2', fast_float) == 45.8e-2
+
+
+def test_locale_convert():
+ locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8')
+ if use_pyicu:
+ from natsort.locale_help import get_pyicu_transform
+ from locale import getlocale
+ strxfrm = get_pyicu_transform(getlocale())
+ else:
+ from natsort.locale_help import strxfrm
+ assert locale_convert('45.8', fast_float, False) == 45.8
+ assert locale_convert('45,8', fast_float, False) == strxfrm('45,8')
+ assert locale_convert('hello', fast_float, False) == strxfrm('hello')
+ assert locale_convert('hello', fast_float, True) == strxfrm('hheelllloo')
+ assert locale_convert('45,8', fast_float, True) == strxfrm('4455,,88')
+
+ locale.setlocale(locale.LC_NUMERIC, 'de_DE.UTF-8')
+ if use_pyicu:
+ strxfrm = get_pyicu_transform(getlocale())
+ assert locale_convert('45.8', fast_float, False) == 45.8
+ assert locale_convert('45,8', fast_float, False) == 45.8
+ assert locale_convert('hello', fast_float, False) == strxfrm('hello')
+ assert locale_convert('hello', fast_float, True) == strxfrm('hheelllloo')
+
+ locale.setlocale(locale.LC_NUMERIC, '')
diff --git a/test_natsort/test_main.py b/test_natsort/test_main.py
index 8157c3e..2323d59 100644
--- a/test_natsort/test_main.py
+++ b/test_natsort/test_main.py
@@ -206,6 +206,7 @@ def test_sort_and_print_entries(capsys):
self.signed = True
self.exp = True
self.paths = as_path
+ self.locale = 0
entries = ['tmp/a57/path2',
'tmp/a23/path1',
diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py
index 264b508..afe8662 100644
--- a/test_natsort/test_natsort.py
+++ b/test_natsort/test_natsort.py
@@ -3,13 +3,18 @@
Here are a collection of examples of how this module can be used.
See the README or the natsort homepage for more details.
"""
+from __future__ import unicode_literals
import warnings
+import locale
from operator import itemgetter
from pytest import raises
-from natsort import natsorted, index_natsorted, natsort_key, versorted, index_versorted, natsort_keygen, order_by_index
-from natsort.natsort import _number_finder, _py3_safe, _natsort_key
-from natsort.natsort import float_sign_exp_re, float_nosign_exp_re, float_sign_noexp_re
-from natsort.natsort import float_nosign_noexp_re, int_nosign_re, int_sign_re
+from natsort import natsorted, index_natsorted, natsort_key, versorted, index_versorted
+from natsort import humansorted, index_humansorted, natsort_keygen, order_by_index
+from natsort.natsort import _input_parser, _py3_safe, _natsort_key, _args_to_enum
+from natsort.natsort import _float_sign_exp_re, _float_nosign_exp_re, _float_sign_noexp_re
+from natsort.natsort import _float_nosign_noexp_re, _int_nosign_re, _int_sign_re
+from natsort.natsort import ns
+from natsort.locale_help import use_pyicu
try:
from fastnumbers import fast_float, fast_int
@@ -17,24 +22,67 @@ except ImportError:
from natsort.fake_fastnumbers import fast_float, fast_int
-def test_number_finder():
-
- assert _number_finder('a5+5.034e-1', float_sign_exp_re, fast_float, False) == ['a', 5.0, 0.5034]
- assert _number_finder('a5+5.034e-1', float_nosign_exp_re, fast_float, False) == ['a', 5.0, '+', 0.5034]
- assert _number_finder('a5+5.034e-1', float_sign_noexp_re, fast_float, False) == ['a', 5.0, 5.034, 'e', -1.0]
- assert _number_finder('a5+5.034e-1', float_nosign_noexp_re, fast_float, False) == ['a', 5.0, '+', 5.034, 'e-', 1.0]
- assert _number_finder('a5+5.034e-1', int_nosign_re, fast_int, False) == ['a', 5, '+', 5, '.', 34, 'e-', 1]
- assert _number_finder('a5+5.034e-1', int_sign_re, fast_int, False) == ['a', 5, 5, '.', 34, 'e', -1]
-
- assert _number_finder('a5+5.034e-1', float_sign_exp_re, fast_float, True) == ['a', 5.0, '', 0.5034]
- assert _number_finder('a5+5.034e-1', float_nosign_exp_re, fast_float, True) == ['a', 5.0, '+', 0.5034]
- assert _number_finder('a5+5.034e-1', float_sign_noexp_re, fast_float, True) == ['a', 5.0, '', 5.034, 'e', -1.0]
- assert _number_finder('a5+5.034e-1', float_nosign_noexp_re, fast_float, True) == ['a', 5.0, '+', 5.034, 'e-', 1.0]
- assert _number_finder('a5+5.034e-1', int_nosign_re, fast_int, True) == ['a', 5, '+', 5, '.', 34, 'e-', 1]
- assert _number_finder('a5+5.034e-1', int_sign_re, fast_int, True) == ['a', 5, '', 5, '.', 34, 'e', -1]
-
- assert _number_finder('6a5+5.034e-1', float_sign_exp_re, fast_float, False) == ['', 6.0, 'a', 5.0, 0.5034]
- assert _number_finder('6a5+5.034e-1', float_sign_exp_re, fast_float, True) == ['', 6.0, 'a', 5.0, '', 0.5034]
+def test_args_to_enum():
+
+ assert _args_to_enum(float, True, True, False, False) == ns.F
+ assert _args_to_enum(float, True, False, False, False) == ns.F | ns.N
+ assert _args_to_enum(float, False, True, False, False) == ns.F | ns.U
+ assert _args_to_enum(float, False, False, False, False) == ns.F | ns.U | ns.N
+ assert _args_to_enum(float, True, True, True, True) == ns.F | ns.P | ns.T
+ assert _args_to_enum(int, True, True, True, False) == ns.I | ns.P
+ assert _args_to_enum(int, False, True, False, True) == ns.I | ns.U | ns.T
+ assert _args_to_enum(None, True, True, False, False) == ns.I | ns.U
+
+
+def test_input_parser():
+
+ # fttt = (fast_float, True, True, True)
+ # fttf = (fast_float, True, True, False)
+ ftft = (fast_float, True, False, True)
+ ftff = (fast_float, True, False, False)
+ # fftt = (fast_float, False, True, True)
+ # ffft = (fast_float, False, False, True)
+ # fftf = (fast_float, False, True, False)
+ ffff = (fast_float, False, False, False)
+ ittt = (fast_int, True, True, True)
+ ittf = (fast_int, True, True, False)
+ itft = (fast_int, True, False, True)
+ itff = (fast_int, True, False, False)
+ # iftt = (fast_int, False, True, True)
+ # ifft = (fast_int, False, False, True)
+ # iftf = (fast_int, False, True, False)
+ ifff = (fast_int, False, False, False)
+
+ assert _input_parser('a5+5.034e-1', _float_sign_exp_re, *ffff) == ['a', 5.0, 0.5034]
+ assert _input_parser('a5+5.034e-1', _float_nosign_exp_re, *ffff) == ['a', 5.0, '+', 0.5034]
+ assert _input_parser('a5+5.034e-1', _float_sign_noexp_re, *ffff) == ['a', 5.0, 5.034, 'e', -1.0]
+ assert _input_parser('a5+5.034e-1', _float_nosign_noexp_re, *ffff) == ['a', 5.0, '+', 5.034, 'e-', 1.0]
+ assert _input_parser('a5+5.034e-1', _int_nosign_re, *ifff) == ['a', 5, '+', 5, '.', 34, 'e-', 1]
+ assert _input_parser('a5+5.034e-1', _int_sign_re, *ifff) == ['a', 5, 5, '.', 34, 'e', -1]
+
+ assert _input_parser('a5+5.034e-1', _float_sign_exp_re, *ftff) == ['a', 5.0, '', 0.5034]
+ assert _input_parser('a5+5.034e-1', _float_nosign_exp_re, *ftff) == ['a', 5.0, '+', 0.5034]
+ assert _input_parser('a5+5.034e-1', _float_sign_noexp_re, *ftff) == ['a', 5.0, '', 5.034, 'e', -1.0]
+ assert _input_parser('a5+5.034e-1', _float_nosign_noexp_re, *ftff) == ['a', 5.0, '+', 5.034, 'e-', 1.0]
+ assert _input_parser('a5+5.034e-1', _int_nosign_re, *itff) == ['a', 5, '+', 5, '.', 34, 'e-', 1]
+ assert _input_parser('a5+5.034e-1', _int_sign_re, *itff) == ['a', 5, '', 5, '.', 34, 'e', -1]
+
+ assert _input_parser('6a5+5.034e-1', _float_sign_exp_re, *ffff) == ['', 6.0, 'a', 5.0, 0.5034]
+ assert _input_parser('6a5+5.034e-1', _float_sign_exp_re, *ftff) == ['', 6.0, 'a', 5.0, '', 0.5034]
+
+ assert _input_parser('A5+5.034E-1', _float_sign_exp_re, *ftft) == ['aA', 5.0, '', 0.5034]
+ assert _input_parser('A5+5.034E-1', _int_nosign_re, *itft) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1]
+
+ locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8'))
+ if use_pyicu:
+ from natsort.locale_help import get_pyicu_transform
+ from locale import getlocale
+ strxfrm = get_pyicu_transform(getlocale())
+ else:
+ from natsort.locale_help import strxfrm
+ assert _input_parser('A5+5.034E-1', _int_nosign_re, *ittf) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1]
+ assert _input_parser('A5+5.034E-1', _int_nosign_re, *ittt) == [strxfrm('aA'), 5, strxfrm('++'), 5, strxfrm('..'), 34, strxfrm('eE--'), 1]
+ locale.setlocale(locale.LC_NUMERIC, str(''))
def test_py3_safe():
@@ -47,56 +95,74 @@ def test_py3_safe():
def test_natsort_key_private():
- a = ['num3', 'num5', 'num2']
- a.sort(key=_natsort_key)
- assert a == ['num2', 'num3', 'num5']
-
# The below illustrates how the key works, and how the different options affect sorting.
- assert _natsort_key('a-5.034e2') == ('a', -503.4)
- assert _natsort_key('a-5.034e2', number_type=float, signed=True, exp=True) == ('a', -503.4)
- assert _natsort_key('a-5.034e2', number_type=float, signed=True, exp=False) == ('a', -5.034, 'e', 2.0)
- assert _natsort_key('a-5.034e2', number_type=float, signed=False, exp=True) == ('a-', 503.4)
- assert _natsort_key('a-5.034e2', number_type=float, signed=False, exp=False) == ('a-', 5.034, 'e', 2.0)
- assert _natsort_key('a-5.034e2', number_type=int) == ('a', -5, '.', 34, 'e', 2)
- assert _natsort_key('a-5.034e2', number_type=int, signed=False) == ('a-', 5, '.', 34, 'e', 2)
- assert _natsort_key('a-5.034e2', number_type=None) == _natsort_key('a-5.034e2', number_type=int, signed=False)
- assert _natsort_key('a-5.034e2', key=lambda x: x.upper()) == ('A', -503.4)
+ assert _natsort_key('a-5.034e2', key=None, alg=ns.F) == ('a', -503.4)
+ assert _natsort_key('a-5.034e2', key=None, alg=ns.FLOAT) == ('a', -503.4)
+ assert _natsort_key('a-5.034e2', key=None, alg=ns.FLOAT | ns.NOEXP) == ('a', -5.034, 'e', 2.0)
+ assert _natsort_key('a-5.034e2', key=None, alg=ns.NOEXP) == ('a', -5.034, 'e', 2.0)
+ assert _natsort_key('a-5.034e2', key=None, alg=ns.UNSIGNED) == ('a-', 503.4)
+ assert _natsort_key('a-5.034e2', key=None, alg=ns.UNSIGNED | ns.NOEXP) == ('a-', 5.034, 'e', 2.0)
+ assert _natsort_key('a-5.034e2', key=None, alg=ns.INT) == ('a', -5, '.', 34, 'e', 2)
+ assert _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.NOEXP) == ('a', -5, '.', 34, 'e', 2)
+ assert _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.UNSIGNED) == ('a-', 5, '.', 34, 'e', 2)
+ assert _natsort_key('a-5.034e2', key=None, alg=ns.VERSION) == _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.UNSIGNED)
+ assert _natsort_key('a-5.034e2', key=None, alg=ns.DIGIT) == _natsort_key('a-5.034e2', key=None, alg=ns.VERSION)
+ assert _natsort_key('a-5.034e2', key=lambda x: x.upper(), alg=ns.F) == ('A', -503.4)
# Iterables are parsed recursively so you can sort lists of lists.
- assert _natsort_key(('a1', 'a-5.034e2')) == (('a', 1.0), ('a', -503.4))
- assert _natsort_key(('a1', 'a-5.034e2'), number_type=None) == (('a', 1), ('a-', 5, '.', 34, 'e', 2))
+ assert _natsort_key(('a1', 'a-5.034e2'), key=None, alg=ns.F) == (('a', 1.0), ('a', -503.4))
+ assert _natsort_key(('a1', 'a-5.034e2'), key=None, alg=ns.V) == (('a', 1), ('a-', 5, '.', 34, 'e', 2))
# A key is applied before recursion, but not in the recursive calls.
- assert _natsort_key(('a1', 'a-5.034e2'), key=itemgetter(1)) == ('a', -503.4)
+ assert _natsort_key(('a1', 'a-5.034e2'), key=itemgetter(1), alg=ns.F) == ('a', -503.4)
# Strings that lead with a number get an empty string at the front of the tuple.
# This is designed to get around the "unorderable types" issue.
- assert _natsort_key(('15a', '6')) == (('', 15.0, 'a'), ('', 6.0))
- assert _natsort_key(10) == ('', 10)
+ assert _natsort_key(('15a', '6'), key=None, alg=ns.F) == (('', 15.0, 'a'), ('', 6.0))
+ assert _natsort_key(10, key=None, alg=ns.F) == ('', 10)
# Turn on as_path to split a file path into components
- assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', as_path=True) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
- assert _natsort_key('../Folder (10)/file (2).tar.gz', as_path=True) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',))
- assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', as_path=True) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
+ assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', key=None, alg=ns.PATH) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
+ assert _natsort_key('../Folder (10)/file (2).tar.gz', key=None, alg=ns.PATH) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',))
+ assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', key=None, alg=ns.PATH) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
# It gracefully handles as_path for numeric input by putting an extra tuple around it
# so it will sort against the other as_path results.
- assert _natsort_key(10, as_path=True) == (('', 10),)
+ assert _natsort_key(10, key=None, alg=ns.PATH) == (('', 10),)
# as_path also handles recursion well.
- assert _natsort_key(('/Folder', '/Folder (1)'), as_path=True) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1.0, ')')))
+ assert _natsort_key(('/Folder', '/Folder (1)'), key=None, alg=ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1.0, ')')))
# Turn on py3_safe to put a '' between adjacent numbers
- assert _natsort_key('43h7+3', py3_safe=True) == ('', 43.0, 'h', 7.0, '', 3.0)
+ assert _natsort_key('43h7+3', key=None, alg=ns.TYPESAFE) == ('', 43.0, 'h', 7.0, '', 3.0)
# Invalid arguments give the correct response
with raises(ValueError) as err:
- _natsort_key('a', number_type='float')
- assert str(err.value) == "_natsort_key: 'number_type' parameter 'float' invalid"
- with raises(ValueError) as err:
- _natsort_key('a', signed='True')
- assert str(err.value) == "_natsort_key: 'signed' parameter 'True' invalid"
- with raises(ValueError) as err:
- _natsort_key('a', exp='False')
- assert str(err.value) == "_natsort_key: 'exp' parameter 'False' invalid"
+ _natsort_key('a', key=None, alg='1')
+ assert str(err.value) == "_natsort_key: 'alg' argument must be from the enum 'ns', got 1"
+
+ # Changing the sort order of strings
+ assert _natsort_key('Apple56', key=None, alg=ns.F) == ('Apple', 56.0)
+ assert _natsort_key('Apple56', key=None, alg=ns.IGNORECASE) == ('apple', 56.0)
+ assert _natsort_key('Apple56', key=None, alg=ns.LOWERCASEFIRST) == ('aPPLE', 56.0)
+ assert _natsort_key('Apple56', key=None, alg=ns.GROUPLETTERS) == ('aAppppllee', 56.0)
+ assert _natsort_key('Apple56', key=None, alg=ns.G | ns.LF) == ('aapPpPlLeE', 56.0)
+
+ # Locale aware sorting
+ locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8'))
+ if use_pyicu:
+ from natsort.locale_help import get_pyicu_transform
+ from locale import getlocale
+ strxfrm = get_pyicu_transform(getlocale())
+ else:
+ from natsort.locale_help import strxfrm
+ assert _natsort_key('Apple56.5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5)
+ assert _natsort_key('Apple56,5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.0, strxfrm(','), 5.0)
+
+ locale.setlocale(locale.LC_NUMERIC, str('de_DE.UTF-8'))
+ if use_pyicu:
+ strxfrm = get_pyicu_transform(getlocale())
+ assert _natsort_key('Apple56.5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5)
+ assert _natsort_key('Apple56,5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5)
+ locale.setlocale(locale.LC_NUMERIC, str(''))
def test_natsort_key_public():
@@ -105,10 +171,11 @@ def test_natsort_key_public():
# But it raises a depreciation warning
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
- assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2')
+ assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2', key=None, alg=ns.F)
assert len(w) == 1
assert "natsort_key is depreciated as of 3.4.0, please use natsort_keygen" in str(w[-1].message)
- assert natsort_key('a-5.034e2', number_type=float, signed=False, exp=False) == _natsort_key('a-5.034e2', number_type=float, signed=False, exp=False)
+ assert natsort_key('a-5.034e2', number_type=float, signed=False, exp=False) == _natsort_key('a-5.034e2', key=None, alg=ns.F | ns.U | ns.N)
+ assert natsort_key('a-5.034e2', alg=ns.F | ns.U | ns.N) == _natsort_key('a-5.034e2', key=None, alg=ns.F | ns.U | ns.N)
# It is called for each element in a list when sorting
with warnings.catch_warnings(record=True) as w:
@@ -122,25 +189,25 @@ def test_natsort_keygen():
# Creates equivalent natsort keys
a = 'a-5.034e1'
- assert natsort_keygen()(a) == _natsort_key(a)
- assert natsort_keygen(signed=False)(a) == _natsort_key(a, signed=False)
- assert natsort_keygen(exp=False)(a) == _natsort_key(a, exp=False)
- assert natsort_keygen(signed=False, exp=False)(a) == _natsort_key(a, signed=False, exp=False)
- assert natsort_keygen(number_type=int)(a) == _natsort_key(a, number_type=int)
- assert natsort_keygen(number_type=int, signed=False)(a) == _natsort_key(a, number_type=int, signed=False)
- assert natsort_keygen(number_type=None)(a) == _natsort_key(a, number_type=None)
- assert natsort_keygen(as_path=True)(a) == _natsort_key(a, as_path=True)
+ assert natsort_keygen()(a) == _natsort_key(a, key=None, alg=ns.F)
+ assert natsort_keygen(alg=ns.UNSIGNED)(a) == _natsort_key(a, key=None, alg=ns.U)
+ assert natsort_keygen(alg=ns.NOEXP)(a) == _natsort_key(a, key=None, alg=ns.N)
+ assert natsort_keygen(alg=ns.U | ns.N)(a) == _natsort_key(a, key=None, alg=ns.U | ns.N)
+ assert natsort_keygen(alg=ns.INT)(a) == _natsort_key(a, key=None, alg=ns.INT)
+ assert natsort_keygen(alg=ns.I | ns.U)(a) == _natsort_key(a, key=None, alg=ns.I | ns.U)
+ assert natsort_keygen(alg=ns.VERSION)(a) == _natsort_key(a, key=None, alg=ns.V)
+ assert natsort_keygen(alg=ns.PATH)(a) == _natsort_key(a, key=None, alg=ns.PATH)
# Custom keys are more straightforward with keygen
f1 = natsort_keygen(key=lambda x: x.upper())
- f2 = lambda x: _natsort_key(x, key=lambda y: y.upper())
+ f2 = lambda x: _natsort_key(x, key=lambda y: y.upper(), alg=ns.F)
assert f1(a) == f2(a)
# It also makes sorting lists in-place easier (no lambdas!)
a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300']
b = a[:]
- a.sort(key=natsort_keygen(number_type=int))
- assert a == natsorted(b, number_type=int)
+ a.sort(key=natsort_keygen(alg=ns.I))
+ assert a == natsorted(b, alg=ns.I)
def test_natsorted():
@@ -151,20 +218,20 @@ def test_natsorted():
# Number types
a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300']
- assert natsorted(a) == ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.']
- assert natsorted(a, number_type=float, exp=False) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.']
- assert natsorted(a, number_type=int) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.']
- assert natsorted(a, number_type=None) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.']
+ assert natsorted(a) == ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.']
+ assert natsorted(a, alg=ns.NOEXP | ns.FLOAT) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.']
+ assert natsorted(a, alg=ns.INT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.']
+ assert natsorted(a, alg=ns.DIGIT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.']
# Signed option
a = ['a-5', 'a7', 'a+2']
- assert natsorted(a) == ['a-5', 'a+2', 'a7']
- assert natsorted(a, signed=False) == ['a7', 'a+2', 'a-5']
+ assert natsorted(a) == ['a-5', 'a+2', 'a7']
+ assert natsorted(a, alg=ns.UNSIGNED) == ['a7', 'a+2', 'a-5']
# Number type == None
a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
- assert natsorted(a) == ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b']
- assert natsorted(a, number_type=None) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4']
+ assert natsorted(a) == ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b']
+ assert natsorted(a, alg=ns.DIGIT) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4']
# You can mix types with natsorted. This can get around the new
# 'unorderable types' issue with Python 3.
@@ -203,20 +270,38 @@ def test_natsorted():
'/p/Folder (1)/file.tar.gz',
'/p/Folder (10)/file.tar.gz',
'/p/Folder/file.tar.gz']
- assert natsorted(a, as_path=True) == ['/p/Folder/file.tar.gz',
- '/p/Folder (1)/file.tar.gz',
- '/p/Folder (1)/file (1).tar.gz',
- '/p/Folder (10)/file.tar.gz']
+ assert natsorted(a, alg=ns.PATH) == ['/p/Folder/file.tar.gz',
+ '/p/Folder (1)/file.tar.gz',
+ '/p/Folder (1)/file (1).tar.gz',
+ '/p/Folder (10)/file.tar.gz']
# You can sort paths and numbers, not that you'd want to
a = ['/Folder (9)/file.exe', 43]
- assert natsorted(a, as_path=True) == [43, '/Folder (9)/file.exe']
+ assert natsorted(a, alg=ns.PATH) == [43, '/Folder (9)/file.exe']
+
+ # You can modify how case is interpreted in your sorting.
+ a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
+ assert natsorted(a) == ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn']
+ assert natsorted(a, alg=ns.IGNORECASE) == ['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn']
+ assert natsorted(a, alg=ns.LOWERCASEFIRST) == ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn']
+ assert natsorted(a, alg=ns.GROUPLETTERS) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn']
+ assert natsorted(a, alg=ns.G | ns.LF) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn']
+
+ # You can also do locale-aware sorting
+ locale.setlocale(locale.LC_ALL, str('en_US.UTF-8'))
+ assert natsorted(a, alg=ns.LOCALE) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn']
+ a = ['c', 'ä', 'b', 'a5,6', 'a5,50']
+ assert natsorted(a, alg=ns.LOCALE) == ['a5,6', 'a5,50', 'ä', 'b', 'c']
+
+ locale.setlocale(locale.LC_ALL, str('de_DE.UTF-8'))
+ assert natsorted(a, alg=ns.LOCALE) == ['a5,50', 'a5,6', 'ä', 'b', 'c']
+ locale.setlocale(locale.LC_ALL, str(''))
def test_versorted():
a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
- assert versorted(a) == natsorted(a, number_type=None)
+ assert versorted(a) == natsorted(a, alg=ns.VERSION)
assert versorted(a, reverse=True) == versorted(a)[::-1]
a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'),
('a', '1.11.4'), ('a', '1.10.1')]
@@ -232,10 +317,18 @@ def test_versorted():
'/p/Folder (1)/file1.1.0.tar.gz',
'/p/Folder (10)/file1.1.0.tar.gz',
'/p/Folder/file1.1.0.tar.gz']
- assert versorted(a, as_path=True) == ['/p/Folder/file1.1.0.tar.gz',
- '/p/Folder (1)/file1.1.0.tar.gz',
- '/p/Folder (1)/file1.1.0 (1).tar.gz',
- '/p/Folder (10)/file1.1.0.tar.gz']
+ assert versorted(a, alg=ns.PATH) == ['/p/Folder/file1.1.0.tar.gz',
+ '/p/Folder (1)/file1.1.0.tar.gz',
+ '/p/Folder (1)/file1.1.0 (1).tar.gz',
+ '/p/Folder (10)/file1.1.0.tar.gz']
+
+
+def test_humansorted():
+
+ a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
+ assert humansorted(a) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn']
+ assert humansorted(a) == natsorted(a, alg=ns.LOCALE)
+ assert humansorted(a, reverse=True) == humansorted(a)[::-1]
def test_index_natsorted():
@@ -265,13 +358,13 @@ def test_index_natsorted():
a = ['/p/Folder (10)/',
'/p/Folder/',
'/p/Folder (1)/']
- assert index_natsorted(a, as_path=True) == [1, 2, 0]
+ assert index_natsorted(a, alg=ns.PATH) == [1, 2, 0]
def test_index_versorted():
a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
- assert index_versorted(a) == index_natsorted(a, number_type=None)
+ assert index_versorted(a) == index_natsorted(a, alg=ns.VERSION)
assert index_versorted(a, reverse=True) == index_versorted(a)[::-1]
a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'),
('a', '1.11.4'), ('a', '1.10.1')]
@@ -282,7 +375,15 @@ def test_index_versorted():
'/p/Folder/file1.1.0.tar.gz',
'/p/Folder (1)/file1.1.0 (1).tar.gz',
'/p/Folder (1)/file1.1.0.tar.gz']
- assert index_versorted(a, as_path=True) == [1, 3, 2, 0]
+ assert index_versorted(a, alg=ns.PATH) == [1, 3, 2, 0]
+
+
+def test_index_humansorted():
+
+ a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
+ assert index_humansorted(a) == [4, 0, 5, 3, 1, 2]
+ assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE)
+ assert index_humansorted(a, reverse=True) == index_humansorted(a)[::-1]
def test_order_by_index():