diff options
author | Seth M Morton <seth.m.morton@gmail.com> | 2014-09-02 18:35:02 -0700 |
---|---|---|
committer | Seth M Morton <seth.m.morton@gmail.com> | 2014-09-02 18:35:02 -0700 |
commit | afc8e69ccbcf6304a10751d97149f2c8435fb09f (patch) | |
tree | 4833decc5c881bcfba181d410b220ed09a9a5265 | |
parent | 514a60f24137a3435bfad8f7f448235aa4139da2 (diff) | |
parent | 4d297c1452d76b9f442bf95bae7f10a1a6deeb24 (diff) | |
download | natsort-afc8e69ccbcf6304a10751d97149f2c8435fb09f.tar.gz |
natsort version 3.5.0 release3.5.0
- Added the 'alg' argument to the 'natsort' functions. This argument
accepts an enum that is used to indicate the options the user wishes
to use. The 'number_type', 'signed', 'exp', 'as_path', and
'py3_safe' options are being depreciated and will become
(undocumented) keyword-only options in natsort version 4.0.0.
- The user can now modify how 'natsort' handles the case of non-numeric
characters.
- The user can now instruct 'natsort' to use locale-aware sorting,
which allows 'natsort' to perform true "human sorting".
- The `humansorted` convenience function has been included to make
this easier.
- Updated shell script with locale functionality.
-rw-r--r-- | .travis.yml | 13 | ||||
-rw-r--r-- | MANIFEST.in | 4 | ||||
-rw-r--r-- | README.rst | 78 | ||||
-rw-r--r-- | docs/source/api.rst | 3 | ||||
-rw-r--r-- | docs/source/changelog.rst | 18 | ||||
-rw-r--r-- | docs/source/conf.py | 1 | ||||
-rw-r--r-- | docs/source/examples.rst | 143 | ||||
-rw-r--r-- | docs/source/humansorted.rst | 8 | ||||
-rw-r--r-- | docs/source/index_humansorted.rst | 8 | ||||
-rw-r--r-- | docs/source/intro.rst | 29 | ||||
-rw-r--r-- | docs/source/ns_class.rst | 8 | ||||
-rw-r--r-- | natsort/__init__.py | 7 | ||||
-rw-r--r-- | natsort/__main__.py | 37 | ||||
-rw-r--r-- | natsort/_version.py | 2 | ||||
-rw-r--r-- | natsort/locale_help.py | 129 | ||||
-rw-r--r-- | natsort/natsort.py | 740 | ||||
-rw-r--r-- | setup.cfg | 2 | ||||
-rw-r--r-- | setup.py | 3 | ||||
-rw-r--r-- | test_natsort/test_locale_help.py | 44 | ||||
-rw-r--r-- | test_natsort/test_main.py | 1 | ||||
-rw-r--r-- | test_natsort/test_natsort.py | 277 |
21 files changed, 1195 insertions, 360 deletions
diff --git a/.travis.yml b/.travis.yml index 797241c..a3a12f0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,13 +6,18 @@ python: - 3.3 - 3.4 env: -- WITH_FASTNUMBERS=true -- WITH_FASTNUMBERS=false +- WITH_OPTIONS=true +- WITH_OPTIONS=false +before_install: +- sudo apt-get update +- sudo locale-gen de_DE.UTF-8 install: +- if [[ $WITH_OPTIONS == true ]]; then sudo apt-get install libicu-dev; fi +- if [[ $WITH_OPTIONS == true ]]; then pip install fastnumbers; fi +- if [[ $WITH_OPTIONS == true ]]; then pip install PyICU; fi +- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi - pip install pytest-cov pytest-flakes pytest-pep8 - pip install coveralls -- if [[ $WITH_FASTNUMBERS == true ]]; then pip install fastnumbers; fi -- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi script: - python -m pytest --cov natsort --flakes --pep8 - python -m pytest --doctest-modules natsort diff --git a/MANIFEST.in b/MANIFEST.in index be74ec3..ad8cd5a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,9 +5,13 @@ include natsort/_version.py include natsort/__main__.py include natsort/__init__.py include natsort/py23compat.py +include natsort/locale_help.py +include natsort/fake_fastnumbers.py include test_natsort/profile_natsorted.py include test_natsort/stress_natsort.py include test_natsort/test_natsort.py +include test_natsort/test_locale_help.py +include test_natsort/test_fake_fastnumbers.py include test_natsort/test_main.py include setup.py include setup.cfg @@ -49,6 +49,26 @@ Sorting version numbers is just as easy with the ``versorted`` function:: >>> natsorted(a) # natsorted tries to sort as signed floats, so it won't work ['version-2.0', 'version-1.9', 'version-1.11', 'version-1.10'] +You can also perform locale-aware sorting (or "human sorting"), where the +non-numeric characters are ordered based on their meaning, not on their +ordinal value; this can be achieved with the ``humansorted`` function:: + + >>> a = ['Apple', 'Banana', 'apple', 'banana'] + >>> natsorted(a) + ['Apple', 'Banana', 'apple', 'banana'] + >>> import locale + >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') + 'en_US.UTF-8' + >>> from natsort import humansorted + >>> humansorted(a) + ['apple', 'Apple', 'banana', 'Banana'] + +You may find you need to explicitly set the locale to get this to work +(as shown in the example). +Please see the `following caveat <http://pythonhosted.org//natsort/examples.html#bug-note>`_ +and the "Optional Dependencies" section +below before using the ``humansorted`` function. + You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types when you sort:: @@ -61,6 +81,7 @@ when you sort:: The natsort algorithm does other fancy things like - recursively descend into lists of lists + - control the case-sensitivity - sort file paths correctly - allow custom sorting keys - exposes a natsort_key generator to pass to list.sort @@ -84,19 +105,37 @@ Requirements (this includes python 3.x). To run version 2.6, 3.0, or 3.1 the `argparse <https://pypi.python.org/pypi/argparse>`_ module is required. -Optional Dependency -------------------- +Optional Dependencies +--------------------- + +fastnumbers +''''''''''' The most efficient sorting can occur if you install the `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ package (it helps with the string to number conversions.) ``natsort`` will still run (efficiently) without the package, but if you need to squeeze out that extra juice it is recommended you include this as a dependency. ``natsort`` will not require (or -check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed. +check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed +at installation. + +PyICU +''''' + +On some systems, Python's ``locale`` library can be buggy (I have found this to be +the case on Mac OS X), so ``natsort`` will use +`PyICU <https://pypi.python.org/pypi/PyICU>`_ under the hood if it is installed +on your computer; this will give more reliable results. ``natsort`` will not +require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed +at installation. Depreciation Notices -------------------- + - In ``natsort`` version 4.0.0, the ``number_type``, ``signed``, ``exp``, + ``as_path``, and ``py3_safe`` options will be removed from the (documented) + API, in favor of the ``alg`` option and ``ns`` enum. They will remain as + keyword-only arguments after that (for the foreseeable future). - In ``natsort`` version 4.0.0, the ``natsort_key`` function will be removed from the public API. All future development should use ``natsort_keygen`` in preparation for this. @@ -118,6 +157,24 @@ History These are the last three entries of the changelog. See the package documentation for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_. +09-02-2014 v. 3.5.0 +''''''''''''''''''' + + - Added the 'alg' argument to the 'natsort' functions. This argument + accepts an enum that is used to indicate the options the user wishes + to use. The 'number_type', 'signed', 'exp', 'as_path', and 'py3_safe' + options are being depreciated and will become (undocumented) + keyword-only options in natsort version 4.0.0. + - The user can now modify how 'natsort' handles the case of non-numeric + characters. + - The user can now instruct 'natsort' to use locale-aware sorting, which + allows 'natsort' to perform true "human sorting". + + - The `humansorted` convenience function has been included to make this + easier. + + - Updated shell script with locale functionality. + 08-12-2014 v. 3.4.1 ''''''''''''''''''' @@ -155,18 +212,3 @@ for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_. - Reworked the documentation, moving most of it to PyPI's hosting platform. - Added support for coveralls.io. - Entire codebase is now PyFlakes and PEP8 compliant. - -06-28-2014 v. 3.3.0 -''''''''''''''''''' - - - Added a 'versorted' method for more convenient sorting of versions. - - Updated command-line tool --number_type option with 'version' and 'ver' - to make it more clear how to sort version numbers. - - Moved unit-testing mechanism from being docstring-based to actual unit tests - in actual functions. - - - This has provided the ability determine the coverage of the unit tests (99%). - - This also makes the pydoc documentation a bit more clear. - - - Made docstrings for public functions mirror the README API. - - Connected natsort development to Travis-CI to help ensure quality releases. diff --git a/docs/source/api.rst b/docs/source/api.rst index 7546de6..80b7edd 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -13,6 +13,9 @@ natsort API natsort_key.rst natsorted.rst versorted.rst + humansorted.rst index_natsorted.rst index_versorted.rst + index_humansorted.rst order_by_index.rst + ns_class.rst diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index c4f8c30..542f5ad 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -3,6 +3,24 @@ Changelog --------- +09-02-2014 v. 3.5.0 +''''''''''''''''''' + + - Added the 'alg' argument to the 'natsort' functions. This argument + accepts an enum that is used to indicate the options the user wishes + to use. The 'number_type', 'signed', 'exp', 'as_path', and 'py3_safe' + options are being depreciated and will become (undocumented) + keyword-only options in natsort version 4.0.0. + - The user can now modify how 'natsort' handles the case of non-numeric + characters. + - The user can now instruct 'natsort' to use locale-aware sorting, which + allows 'natsort' to perform true "human sorting". + + - The `humansorted` convenience function has been included to make this + easier. + + - Updated shell script with locale functionality. + 08-12-2014 v. 3.4.1 ''''''''''''''''''' diff --git a/docs/source/conf.py b/docs/source/conf.py index abeb7da..fa8d749 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -43,6 +43,7 @@ def current_version(): # ones. extensions = [ 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', 'sphinx.ext.intersphinx', 'numpydoc', ] diff --git a/docs/source/examples.rst b/docs/source/examples.rst index 5176dd1..b0dfe27 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -18,27 +18,10 @@ it as you would :func:`sorted`:: >>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300'] >>> sorted(a) ['a5.034e1', 'a50', 'a50.300', 'a50.4', 'a51.'] - >>> from natsort import natsorted + >>> from natsort import natsorted, ns >>> natsorted(a) ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] -Customizing Float Definition ----------------------------- - -By default :func:`~natsorted` searches for any float that would be -a valid Python float literal, such as 5, 0.4, -4.78, +4.2E-34, etc. -Perhaps you don't want to search for signed numbers, or you don't -want to search for exponential notation, and the ``signed`` and -``exp`` options allow you to do this:: - - >>> a = ['a50', 'a51.', 'a+50.4', 'a5.034e1', 'a+50.300'] - >>> natsorted(a) - ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] - >>> natsorted(a, signed=False) - ['a50', 'a5.034e1', 'a51.', 'a+50.300', 'a+50.4'] - >>> natsorted(a, exp=False) - ['a5.034e1', 'a50', 'a+50.300', 'a+50.4', 'a51.'] - Sort Version Numbers -------------------- @@ -49,17 +32,17 @@ literals, not floats. This can be achieved in three ways, as shown below:: >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1'] >>> natsorted(a) # This gives incorrect results ['ver-2.9.9a', 'ver-2.9.9b', 'ver-1.11', 'ver-1.11.4', 'ver-1.10.1'] - >>> natsorted(a, number_type=int, signed=False) + >>> natsorted(a, alg=ns.INT | ns.UNSIGNED) ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] - >>> natsorted(a, number_type=None) + >>> natsorted(a, alg=ns.VERSION) ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] >>> from natsort import versorted >>> versorted(a) ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] -You can see that ``number_type=None`` is a shortcut for ``number_type=int`` -and ``signed=False``, and the :func:`~versorted` is a shortcut for -``natsorted(number_type=None)``. The recommend manner to sort version +You can see that ``alg=ns.VERSION`` is a shortcut for +``alg=ns.INT | ns.UNSIGNED``, and the :func:`~versorted` is a shortcut for +``natsorted(alg=ns.VERSION)``. The recommend manner to sort version numbers is to use :func:`~versorted`. Sorting with Alpha, Beta, and Release Candidates @@ -68,15 +51,21 @@ Sorting with Alpha, Beta, and Release Candidates By default, if you wish to sort versions with a non-strict versioning scheme, you may not get the results you expect:: - >>> a = ['1.2', '1.2rc1', '1.2beta2', '1.2beta', '1.2alpha', '1.2.1', '1.1', '1.3'] + >>> a = ['1.2', '1.2rc1', '1.2beta2', '1.2beta1', '1.2alpha', '1.2.1', '1.1', '1.3'] >>> versorted(a) - ['1.1', '1.2', '1.2.1', '1.2alpha', '1.2beta', '1.2beta2', '1.2rc1', '1.3'] + ['1.1', '1.2', '1.2.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.3'] To make the '1.2' pre-releases come before '1.2.1', you need to use the following recipe:: >>> versorted(a, key=lambda x: x.replace('.', '~')) - ['1.1', '1.2', '1.2alpha', '1.2beta', '1.2beta2', '1.2rc1', '1.2.1', '1.3'] + ['1.1', '1.2', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2.1', '1.3'] + +If you also want '1.2' after all the alpha, beta, and rc candidates, you can +modify the above recipe:: + + >>> versorted(a, key=lambda x: x.replace('.', '~')+'z') + ['1.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2', '1.2.1', '1.3'] Please see `this issue <https://github.com/SethMMorton/natsort/issues/13>`_ to see why this works. @@ -86,7 +75,7 @@ Sort OS-Generated Paths In some cases when sorting file paths with OS-Generated names, the default :mod:`~natsorted` algorithm may not be sufficient. In cases like these, -you may need to use the ``as_path`` option:: +you may need to use the ``ns.PATH`` option:: >>> a = ['./folder/file (1).txt', ... './folder/file.txt', @@ -94,9 +83,102 @@ you may need to use the ``as_path`` option:: ... './folder (10)/file.txt'] >>> natsorted(a) ['./folder (1)/file.txt', './folder (10)/file.txt', './folder/file (1).txt', './folder/file.txt'] - >>> natsorted(a, as_path=True) + >>> natsorted(a, alg=ns.PATH) ['./folder/file.txt', './folder/file (1).txt', './folder (1)/file.txt', './folder (10)/file.txt'] +Locale-Aware Sorting (Human Sorting) +------------------------------------ + +You can instruct :mod:`natsort` to use locale-aware sorting with the +``ns.LOCALE`` option. In addition to making this understand non-ASCII +characters, it will also properly interpret non-'.' decimal separators +and also properly order case. It may be more convenient to just use +the :func:`humansorted` function:: + + >>> from natsort import humansorted + >>> import locale + >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') + 'en_US.UTF-8' + >>> a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] + >>> natsorted(a, alg=ns.LOCALE) + ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] + >>> humansorted(a) + ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] + +You may find that if you do not explicitly set the locale your results may not +be as you expect... I have found that it depends on the system you are on. +If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see below) then +you should not need to do this. + +.. _bug_note: + +A Note For Bugs With Locale-Aware Sorting ++++++++++++++++++++++++++++++++++++++++++ + +If you find that ``ns.LOCALE`` (or :func:`~humansorted`) does not give +the results you expect, before filing a bug report please try to first install +`PyICU <https://pypi.python.org/pypi/PyICU>`_. There are some known bugs +with the `locale` module from the standard library that are solved when +using `PyICU <https://pypi.python.org/pypi/PyICU>`_. + +Controlling Case When Sorting +----------------------------- + +For non-numbers, by default :mod:`natsort` used ordinal sorting (i.e. +it sorts by the character's value in the ASCII table). For example:: + + >>> a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] + >>> natsorted(a) + ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn'] + +There are times when you wish to ignore the case when sorting, +you can easily do this with the ``ns.IGNORECASE`` option:: + + >>> natsorted(a, alg=ns.IGNORECASE) + ['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn'] + +Note thats since Python's sorting is stable, the order of equivalent +elements after lowering the case is the same order they appear in the +original list. + +Upper-case letters appear first in the ASCII table, but many natural +sorting methods place lower-case first. To do this, use +``ns.LOWERCASEFIRST``:: + + >>> natsorted(a, alg=ns.LOWERCASEFIRST) + ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn'] + +It may be undesirable to have the upper-case letters grouped together +and the lower-case letters grouped together; most would expect all +"a"s to bet together regardless of case, and all "b"s, and so on. To +achieve this, use ``ns.GROUPLETTERS``:: + + >>> natsorted(a, alg=ns.GROUPLETTERS) + ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] + +You might combine this with ``ns.LOWERCASEFIRST`` to get what most +would expect to be "natural" sorting:: + + >>> natsorted(a, alg=ns.G | ns.LF) + ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] + +Customizing Float Definition +---------------------------- + +By default :func:`~natsorted` searches for any float that would be +a valid Python float literal, such as 5, 0.4, -4.78, +4.2E-34, etc. +Perhaps you don't want to search for signed numbers, or you don't +want to search for exponential notation, the ``ns.UNSIGNED`` and +``ns.NOEXP`` options allow you to do this:: + + >>> a = ['a50', 'a51.', 'a+50.4', 'a5.034e1', 'a+50.300'] + >>> natsorted(a) + ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] + >>> natsorted(a, alg=ns.UNSIGNED) + ['a50', 'a5.034e1', 'a51.', 'a+50.300', 'a+50.4'] + >>> natsorted(a, alg=ns.NOEXP) + ['a5.034e1', 'a50', 'a+50.300', 'a+50.4', 'a51.'] + Using a Custom Sorting Key -------------------------- @@ -129,13 +211,14 @@ need to pass a key to the :meth:`list.sort` method. The function >>> a.sort(key=natsort_key) >>> a ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] - >>> versort_key = natsort_keygen(number_type=None) + >>> versort_key = natsort_keygen(alg=ns.VERSION) >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1'] >>> a.sort(key=versort_key) >>> a ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] -:func:`~natsort_keygen` has the same API as :func:`~natsorted`. +:func:`~natsort_keygen` has the same API as :func:`~natsorted` (minus the +`reverse` option). Sorting Multiple Lists According to a Single List ------------------------------------------------- diff --git a/docs/source/humansorted.rst b/docs/source/humansorted.rst new file mode 100644 index 0000000..35be19b --- /dev/null +++ b/docs/source/humansorted.rst @@ -0,0 +1,8 @@ +.. default-domain:: py +.. currentmodule:: natsort + +:func:`~natsort.humansorted` +============================ + +.. autofunction:: humansorted + diff --git a/docs/source/index_humansorted.rst b/docs/source/index_humansorted.rst new file mode 100644 index 0000000..e143b67 --- /dev/null +++ b/docs/source/index_humansorted.rst @@ -0,0 +1,8 @@ +.. default-domain:: py +.. currentmodule:: natsort + +:func:`~natsort.index_humansorted` +================================== + +.. autofunction:: index_humansorted + diff --git a/docs/source/intro.rst b/docs/source/intro.rst index 3348356..ace8355 100644 --- a/docs/source/intro.rst +++ b/docs/source/intro.rst @@ -59,6 +59,25 @@ Sorting version numbers is just as easy with :func:`~versorted`:: >>> natsorted(a) # natsorted tries to sort as signed floats, so it won't work ['version-2.0', 'version-1.9', 'version-1.11', 'version-1.10'] +You can also perform locale-aware sorting (or "human sorting"), where the +non-numeric characters are ordered based on their meaning, not on their +ordinal value; this can be achieved with the ``humansorted`` function:: + + >>> a = ['Apple', 'Banana', 'apple', 'banana'] + >>> natsorted(a) + ['Apple', 'Banana', 'apple', 'banana'] + >>> import locale + >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') + 'en_US.UTF-8' + >>> from natsort import humansorted + >>> humansorted(a) + ['apple', 'Apple', 'banana', 'Banana'] + +You may find you need to explicitly set the locale to get this to work +(as shown in the example). +Please see :ref:`bug_note` and the Installation section +below before using the ``humansorted`` function. + You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types when you sort:: @@ -71,9 +90,10 @@ when you sort:: The natsort algorithm does other fancy things like - recursively descend into lists of lists + - control the case-sensitivity - sort file paths correctly - allow custom sorting keys - - allow exposed a natsort_key generator to pass to list.sort + - exposes a natsort_key generator to pass to list.sort Please see the :ref:`examples` for a quick start guide, or the :ref:`api` for more details. @@ -119,6 +139,13 @@ without the package, but if you need to squeeze out that extra juice it is recommended you include this as a dependency. ``natsort`` will not require (or check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed. +On some systems, Python's ``locale`` library can be buggy (I have found this to be +the case on Mac OS X), so ``natsort`` will use +`PyICU <https://pypi.python.org/pypi/PyICU>`_ under the hood if it is installed +on your computer; this will give more reliable results. ``natsort`` will not +require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed +at installation. + :mod:`natsort` comes with a shell script called :mod:`natsort`, or can also be called from the command line with ``python -m natsort``. The command line script is only installed onto your ``PATH`` if you don't install via a wheel. There is diff --git a/docs/source/ns_class.rst b/docs/source/ns_class.rst new file mode 100644 index 0000000..f604e3d --- /dev/null +++ b/docs/source/ns_class.rst @@ -0,0 +1,8 @@ +.. default-domain:: py +.. currentmodule:: natsort + +:class:`~natsort.ns` +==================== + +.. autoclass:: ns + diff --git a/natsort/__init__.py b/natsort/__init__.py index ac8171d..02f8d30 100644 --- a/natsort/__init__.py +++ b/natsort/__init__.py @@ -2,9 +2,9 @@ from __future__ import (print_function, division, unicode_literals, absolute_import) -from .natsort import (natsort_key, natsort_keygen, natsorted, +from .natsort import (natsort_key, natsort_keygen, natsorted, humansorted, index_natsorted, versorted, index_versorted, - order_by_index) + index_humansorted, order_by_index, ns) from ._version import __version__ __all__ = [ @@ -12,7 +12,10 @@ __all__ = [ 'natsort_keygen', 'natsorted', 'versorted' + 'humansorted', 'index_natsorted', 'index_versorted', + 'index_humansorted', 'order_by_index', + 'ns', ] diff --git a/natsort/__main__.py b/natsort/__main__.py index af8ef63..f55c3fe 100644 --- a/natsort/__main__.py +++ b/natsort/__main__.py @@ -4,7 +4,7 @@ from __future__ import (print_function, division, import sys -from .natsort import natsorted, regex_and_num_function_chooser +from .natsort import natsorted, _regex_and_num_function_chooser, ns from ._version import __version__ from .py23compat import py23_str @@ -63,6 +63,11 @@ def main(): 'would be considered as 1, "e", and 4, not as 10000. This only ' 'effects the --number-type=float.') parser.add_argument( + '--locale', '-l', action='store_true', default=False, + help='Causes natsort to use locale-aware sorting. On some systems, ' + 'the underlying C library is broken, so if you get results that ' + 'you do not expect please install PyICU and try again.') + parser.add_argument( 'entries', nargs='*', default=sys.stdin, help='The entries to sort. Taken from stdin if nothing is given on ' 'the command line.', ) @@ -135,23 +140,29 @@ def sort_and_print_entries(entries, args): """Sort the entries, applying the filters first if necessary.""" # Extract the proper number type. - kwargs = {'number_type': {'digit': None, - 'version': None, - 'ver': None, - 'int': int, - 'float': float}[args.number_type], - 'signed': args.signed, - 'exp': args.exp, - 'as_path': args.paths, - 'reverse': args.reverse, } + num_type = {'digit': None, + 'version': None, + 'ver': None, + 'int': int, + 'float': float}[args.number_type] + unsigned = not args.signed or num_type is None + alg = (ns.INT * int(num_type in (int, None)) | + ns.UNSIGNED * unsigned | + ns.NOEXP * (not args.exp) | + ns.PATH * args.paths | + ns.LOCALE * args.locale) # Pre-remove entries that don't pass the filtering criteria # Make sure we use the same searching algorithm for filtering # as for sorting. do_filter = args.filter is not None or args.reverse_filter is not None if do_filter or args.exclude: - inp_options = (kwargs['number_type'], args.signed, args.exp) - regex, num_function = regex_and_num_function_chooser[inp_options] + inp_options = (ns.INT * int(num_type in (int, None)) | + ns.UNSIGNED * unsigned | + ns.NOEXP * (not args.exp), + '.' + ) + regex, num_function = _regex_and_num_function_chooser[inp_options] if args.filter is not None: lows, highs = ([f[0] for f in args.filter], [f[1] for f in args.filter]) @@ -171,7 +182,7 @@ def sort_and_print_entries(entries, args): num_function, regex)] # Print off the sorted results - for entry in natsorted(entries, **kwargs): + for entry in natsorted(entries, reverse=args.reverse, alg=alg): print(entry) diff --git a/natsort/_version.py b/natsort/_version.py index d364806..b490955 100644 --- a/natsort/_version.py +++ b/natsort/_version.py @@ -2,4 +2,4 @@ from __future__ import (print_function, division, unicode_literals, absolute_import) -__version__ = '3.4.1' +__version__ = '3.5.0' diff --git a/natsort/locale_help.py b/natsort/locale_help.py new file mode 100644 index 0000000..748a7cb --- /dev/null +++ b/natsort/locale_help.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +"""\ +This module is intended to help combine some locale functions +together for natsort consumption. It also accounts for Python2 +and Python3 differences. +""" +from __future__ import (print_function, division, + unicode_literals, absolute_import) + +import sys +from itertools import chain +from locale import localeconv + +from .py23compat import py23_zip + +# We need cmp_to_key for Python2 because strxfrm is broken for unicode. +if sys.version[:3] == '2.7': + from functools import cmp_to_key +# cmp_to_key was not created till 2.7. +elif sys.version[:3] == '2.6': + def cmp_to_key(mycmp): + """Convert a cmp= function into a key= function""" + class K(object): + __slots__ = ['obj'] + + def __init__(self, obj): + self.obj = obj + + def __lt__(self, other): + return mycmp(self.obj, other.obj) < 0 + + def __gt__(self, other): + return mycmp(self.obj, other.obj) > 0 + + def __eq__(self, other): + return mycmp(self.obj, other.obj) == 0 + + def __le__(self, other): + return mycmp(self.obj, other.obj) <= 0 + + def __ge__(self, other): + return mycmp(self.obj, other.obj) >= 0 + + def __ne__(self, other): + return mycmp(self.obj, other.obj) != 0 + + def __hash__(self): + raise TypeError('hash not implemented') + + return K + +# Make the strxfrm function from strcoll on Python2 +# It can be buggy, so prefer PyICU if available. +try: + import PyICU + from locale import getlocale + + # If using PyICU, get the locale from the current global locale, + # then create a sort key from that + def get_pyicu_transform(l, _d={}): + if l not in _d: + if l == (None, None): + c = PyICU.Collator.createInstance(PyICU.Locale()) + else: + loc = '.'.join(l) + c = PyICU.Collator.createInstance(PyICU.Locale(loc)) + _d[l] = c.getSortKey + return _d[l] + use_pyicu = True +except ImportError: + if sys.version[0] == '2': + from locale import strcoll + strxfrm = cmp_to_key(strcoll) + else: + from locale import strxfrm + use_pyicu = False + +# Convenience functions. +lowercase = lambda x: x.lower() +swapcase = lambda x: x.swapcase() + +# This little lambda doubles all characters, making letters lowercase. +groupletters = lambda x: ''.join(chain(*py23_zip(lowercase(x), x))) + + +def grouper(val, func): + """\ + Attempt to convert a string to a number. If the conversion + was not possible, run it through the letter grouper + to make the sorting work as requested. + """ + # Return the number or transformed string. + # If the input is identical to the output, then no conversion happened. + s = func(val) + return groupletters(s) if val is s else s + + +def locale_convert(val, func, group): + """\ + Attempt to convert a string to a number, first converting + the decimal place character if needed. Then, if the conversion + was not possible, run it through strxfrm to make the sorting + as requested, possibly grouping first. + """ + + # Format the number so that the conversion function can interpret it. + radix = localeconv()['decimal_point'] + s = val.replace(radix, '.') if radix != '.' else val + + # Perform the conversion + t = func(s) + + # Return the number or transformed string. + # If the input is identical to the output, then no conversion happened. + # In this case, we don't want to return the function output because it + # may have had characters modified from the above 'replace' call, + # so we return the input. + if group: + if use_pyicu: + xfrm = get_pyicu_transform(getlocale()) + return xfrm(groupletters(val)) if s is t else t + else: + return strxfrm(groupletters(val)) if s is t else t + else: + if use_pyicu: + xfrm = get_pyicu_transform(getlocale()) + return xfrm(val) if s is t else t + else: + return strxfrm(val) if s is t else t diff --git a/natsort/natsort.py b/natsort/natsort.py index 5974199..82b84df 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -22,6 +22,7 @@ from operator import itemgetter from functools import partial from itertools import islice from warnings import warn +from locale import localeconv # If the user has fastnumbers installed, they will get great speed # benefits. If not, we simulate the functions here. @@ -29,53 +30,230 @@ try: from fastnumbers import fast_float, fast_int, isreal except ImportError: from .fake_fastnumbers import fast_float, fast_int, isreal - +from .locale_help import locale_convert, grouper, lowercase, swapcase from .py23compat import u_format, py23_str, py23_zip # Make sure the doctest works for either python2 or python3 __doc__ = u_format(__doc__) + +class ns(object): + """ + Enum to control the `natsort` algorithm. + + This class acts like an enum to control the `natsort` algorithm. The + user may select several options simultaneously by or'ing the options + together. For example, to choose ``ns.INT``, `ns.PATH``, and + ``ns.LOCALE``, you could do ``ns.INT | ns.LOCALE | ns.PATH``. + + Each option has a shortened 1- or 2-letter form. + + .. warning:: On some systems, the underlying C library that + Python's locale module uses is broken. On these + systems it is recommended that you install + `PyICU <https://pypi.python.org/pypi/PyICU>`_ + if you wish to use `LOCALE`. + Please validate that `LOCALE` works as + expected on your target system, and if not you + should add + `PyICU <https://pypi.python.org/pypi/PyICU>`_ + as a dependency. + + Attributes + ---------- + FLOAT, F + The default - parse numbers as floats. + INT, I + Tell `natsort` to parse numbers as ints. + UNSIGNED, U + Tell `natsort` to ignore any sign (i.e. "-" or "+") to the + immediate left of a number. It is the same as setting the old + `signed` option to `False`. + VERSION, V + This is a shortcut for ``ns.INT | ns.UNSIGNED``, which is useful + when attempting to sort version numbers. It is the same as + setting the old `number_type` option to `None`. + DIGIT, D + Same as `VERSION` above. + NOEXP, N + Tell `natsort` to not search for exponents as part of the number. + For example, with `NOEXP` the number "5.6E5" would be interpreted + as `5.6`, `"E"`, and `5`. It is the same as setting the old `exp` + option to `False`. + PATH, P + Tell `natsort` to interpret strings as filesystem paths, so they + will be split according to the filesystem separator + (i.e. ‘/’ on UNIX, ‘\’ on Windows), as well as splitting on the + file extension, if any. Without this, lists of file paths like + ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted + properly; 'Folder/' will be placed at the end, not at the front. + It is the same as setting the old `as_path` option to `True`. + LOCALE, L + Tell `natsort` to be locale-aware when sorting strings (everything + that was not converted to a number). Your sorting results will vary + depending on your current locale. Generally, the `GROUPLETTERS` + option is needed with `LOCALE` because the `locale` library + groups the letters in the same manner (although you may still + need `GROUPLETTERS` if there are numbers in your strings). + IGNORECASE, IC + Tell `natsort` to ignore case when sorting. For example, + ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as + ``['apple', 'Apple', 'Banana', 'banana']``. + LOWERCASEFIRST, LF + Tell `natsort` to put lowercase letters before uppercase letters + when sorting. For example, + ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as + ``['apple', 'banana', 'Apple', 'Banana']`` (the default order + would be ``['Apple', 'Banana', 'apple', 'banana']`` which is + the order from a purely ordinal sort). + Useless when used with `IGNORECASE`. + GROUPLETTERS, G + Tell `natsort` to group lowercase and uppercase letters together + when sorting. For example, + ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as + ``['Apple', 'apple', 'Banana', 'banana']``. + Useless when used with `IGNORECASE`; use with `LOWERCASEFIRST` + to reverse the order of upper and lower case. + TYPESAFE, T + Try hard to avoid "unorderable types" error on Python 3. It + is the same as setting the old `py3_safe` option to `True`. + + Notes + ----- + If using `LOCALE`, you may find that if you do not explicitly set + the locale your results may not be as you expect... I have found that + it depends on the system you are on. To do this is straightforward + (in the below example I use 'en_US.UTF-8', but you should use your + locale):: + + >>> import locale + >>> # The 'str' call is only to get around a bug on Python 2.x + >>> # where 'setlocale' does not expect unicode strings (ironic, + >>> # right?) + >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) + 'en_US.UTF-8' + + It is preferred that you do this before importing `natsort`. + If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning + above) then you should not need to do this. + + """ + pass + + +# Sort algorithm "enum" values. +_nsdict = {'FLOAT': 0, 'F': 0, + 'INT': 1, 'I': 1, + 'UNSIGNED': 2, 'U': 2, + 'VERSION': 3, 'V': 3, # Shortcut for INT | UNSIGNED + 'DIGIT': 3, 'D': 3, # Shortcut for INT | UNSIGNED + 'NOEXP': 4, 'N': 4, + 'PATH': 8, 'P': 8, + 'LOCALE': 16, 'L': 16, + 'IGNORECASE': 32, 'IC': 32, + 'LOWERCASEFIRST': 64, 'LF': 64, + 'GROUPLETTERS': 128, 'G': 128, + 'TYPESAFE': 1024, 'T': 1024, + } +# Populate the ns class with the _nsdict values. +for x, y in _nsdict.items(): + setattr(ns, x, y) + +# Group algorithm types for easy extraction +_NUMBER_ALGORITHMS = ns.FLOAT | ns.INT | ns.UNSIGNED | ns.NOEXP +_CASE_ALGORITHMS = ns.IGNORECASE | ns.LOWERCASEFIRST | ns.GROUPLETTERS +_ALL_BUT_PATH = (ns.F | ns.I | ns.U | ns.N | ns.L | + ns.IC | ns.LF | ns.G | ns.TYPESAFE) + # The regex that locates floats -float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)') -float_nosign_exp_re = re.compile(r'(\d*\.?\d+(?:[eE][-+]?\d+)?)') -float_sign_noexp_re = re.compile(r'([-+]?\d*\.?\d+)') -float_nosign_noexp_re = re.compile(r'(\d*\.?\d+)') +_float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U) +_float_nosign_exp_re = re.compile(r'(\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U) +_float_sign_noexp_re = re.compile(r'([-+]?\d*\.?\d+)', re.U) +_float_nosign_noexp_re = re.compile(r'(\d*\.?\d+)', re.U) +_float_sign_exp_re_c = re.compile(r'([-+]?\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U) +_float_nosign_exp_re_c = re.compile(r'(\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U) +_float_sign_noexp_re_c = re.compile(r'([-+]?\d*[.,]?\d+)', re.U) +_float_nosign_noexp_re_c = re.compile(r'(\d*[.,]?\d+)', re.U) + # Integer regexes -int_nosign_re = re.compile(r'(\d+)') -int_sign_re = re.compile(r'([-+]?\d+)') +_int_nosign_re = re.compile(r'(\d+)', re.U) +_int_sign_re = re.compile(r'([-+]?\d+)', re.U) + # This dict will help select the correct regex and number conversion function. -regex_and_num_function_chooser = { - (float, True, True): (float_sign_exp_re, fast_float), - (float, True, False): (float_sign_noexp_re, fast_float), - (float, False, True): (float_nosign_exp_re, fast_float), - (float, False, False): (float_nosign_noexp_re, fast_float), - (int, True, True): (int_sign_re, fast_int), - (int, True, False): (int_sign_re, fast_int), - (int, False, True): (int_nosign_re, fast_int), - (int, False, False): (int_nosign_re, fast_int), - (None, True, True): (int_nosign_re, fast_int), - (None, True, False): (int_nosign_re, fast_int), - (None, False, True): (int_nosign_re, fast_int), - (None, False, False): (int_nosign_re, fast_int), +_regex_and_num_function_chooser = { + (ns.F, '.'): (_float_sign_exp_re, fast_float), + (ns.F | ns.N, '.'): (_float_sign_noexp_re, fast_float), + (ns.F | ns.U, '.'): (_float_nosign_exp_re, fast_float), + (ns.F | ns.U | ns.N, '.'): (_float_nosign_noexp_re, fast_float), + (ns.I, '.'): (_int_sign_re, fast_int), + (ns.I | ns.N, '.'): (_int_sign_re, fast_int), + (ns.I | ns.U, '.'): (_int_nosign_re, fast_int), + (ns.I | ns.U | ns.N, '.'): (_int_nosign_re, fast_int), + (ns.F, ','): (_float_sign_exp_re_c, fast_float), + (ns.F | ns.N, ','): (_float_sign_noexp_re_c, fast_float), + (ns.F | ns.U, ','): (_float_nosign_exp_re_c, fast_float), + (ns.F | ns.U | ns.N, ','): (_float_nosign_noexp_re_c, fast_float), + (ns.I, ','): (_int_sign_re, fast_int), + (ns.I | ns.N, ','): (_int_sign_re, fast_int), + (ns.I | ns.U, ','): (_int_nosign_re, fast_int), + (ns.I | ns.U | ns.N, ','): (_int_nosign_re, fast_int), } -def _number_finder(s, regex, numconv, py3_safe): - """Helper to split numbers""" - - # Split the input string by numbers. If there are no splits, return now. +def _args_to_enum(number_type, signed, exp, as_path, py3_safe): + """A function to convert input booleans to an enum-type argument.""" + alg = 0 + if number_type is not float: + msg = "The 'number_type' argument is depreciated as of 3.5.0, " + msg += "please use 'alg=ns.FLOAT', 'alg=ns.INT', or 'alg=ns.VERSION'" + warn(msg, DeprecationWarning) + alg |= (_nsdict['INT'] * bool(number_type in (int, None))) + alg |= (_nsdict['UNSIGNED'] * (number_type is None)) + if signed is not None: + msg = "The 'signed' argument is depreciated as of 3.5.0, " + msg += "please use 'alg=ns.UNSIGNED'." + warn(msg, DeprecationWarning) + alg |= (_nsdict['UNSIGNED'] * (not signed)) + if exp is not None: + msg = "The 'exp' argument is depreciated as of 3.5.0, " + msg += "please use 'alg=ns.NOEXP'." + warn(msg, DeprecationWarning) + alg |= (_nsdict['NOEXP'] * (not exp)) + if as_path is not None: + msg = "The 'as_path' argument is depreciated as of 3.5.0, " + msg += "please use 'alg=ns.PATH'." + warn(msg, DeprecationWarning) + alg |= (_nsdict['PATH'] * as_path) + if py3_safe is not None: + msg = "The 'py3_safe' argument is depreciated as of 3.5.0, " + msg += "please use 'alg=ns.TYPESAFE'." + warn(msg, DeprecationWarning) + alg |= (_nsdict['TYPESAFE'] * py3_safe) + return alg + + +def _input_parser(s, regex, numconv, py3_safe, use_locale, group_letters): + """Helper to parse the string input into numbers and strings.""" + + # Split the input string by numbers. # If the input is not a string, TypeError is raised. s = regex.split(s) - if len(s) == 1: - return tuple(s) # Now convert the numbers to numbers, and leave strings as strings. + # Take into account locale if needed, and group letters if needed. # Remove empty strings from the list. - s = [numconv(x) for x in s if x] + if use_locale: + s = [locale_convert(x, numconv, group_letters) for x in s if x] + elif group_letters: + s = [grouper(x, numconv) for x in s if x] + else: + s = [numconv(x) for x in s if x] # If the list begins with a number, lead with an empty string. # This is used to get around the "unorderable types" issue. - if isreal(s[0]): + if not s: # Return empty tuple for empty results. + return () + elif isreal(s[0]): s = [''] + s # The _py3_safe function inserts "" between numbers in the list, @@ -145,8 +323,7 @@ def _py3_safe(parsed_list): return new_list -def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, - as_path=False, py3_safe=False): +def _natsort_key(val, key, alg): """\ Key to sort strings and numbers naturally. @@ -157,12 +334,8 @@ def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, Parameters ---------- val : {str, unicode} - key : callable, optional - number_type : {None, float, int}, optional - signed : {True, False}, optional - exp : {True, False}, optional - as_path : {True, False}, optional - py3_safe : {True, False}, optional + key : callable + alg : ns enum Returns ------- @@ -172,20 +345,24 @@ def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, """ # Convert the arguments to the proper input tuple - inp_options = (number_type, signed, exp) try: - regex, num_function = regex_and_num_function_chooser[inp_options] - except KeyError: - # Report errors properly - if number_type not in (float, int) and number_type is not None: - raise ValueError("_natsort_key: 'number_type' parameter " - "'{0}' invalid".format(py23_str(number_type))) - elif signed not in (True, False): - raise ValueError("_natsort_key: 'signed' parameter " - "'{0}' invalid".format(py23_str(signed))) - elif exp not in (True, False): - raise ValueError("_natsort_key: 'exp' parameter " - "'{0}' invalid".format(py23_str(exp))) + use_locale = alg & _nsdict['LOCALE'] + inp_options = (alg & _NUMBER_ALGORITHMS, + localeconv()['decimal_point'] if use_locale else '.') + except TypeError: + msg = "_natsort_key: 'alg' argument must be from the enum 'ns'" + raise ValueError(msg+', got {0}'.format(py23_str(alg))) + + # Get the proper regex and conversion function. + try: + regex, num_function = _regex_and_num_function_chooser[inp_options] + except KeyError: # pragma: no cover + if inp_options[1] not in ('.', ','): # pragma: no cover + raise ValueError("_natsort_key: currently natsort only supports " + "the decimal separators '.' and ','. " + "Please file a bug report.") + else: + raise else: # Apply key if needed. if key is not None: @@ -194,41 +371,47 @@ def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, # If this is a path, convert it. # An AttrubuteError is raised if not a string. split_as_path = False - if as_path: + if alg & _nsdict['PATH']: try: val = _path_splitter(val) except AttributeError: pass else: # Record that this string was split as a path so that - # we can set as_path to False in the recursive call. + # we don't set PATH in the recursive call. split_as_path = True # Assume the input are strings, which is the most common case. + # Apply the string modification if needed. try: - return tuple(_number_finder(val, regex, num_function, py3_safe)) + if alg & _nsdict['LOWERCASEFIRST']: + val = swapcase(val) + if alg & _nsdict['IGNORECASE']: + val = lowercase(val) + return tuple(_input_parser(val, + regex, + num_function, + alg & _nsdict['TYPESAFE'], + use_locale, + alg & _nsdict['GROUPLETTERS'])) except TypeError: # If not strings, assume it is an iterable that must # be parsed recursively. Do not apply the key recursively. - # If this string was split as a path, set as_path to False. + # If this string was split as a path, turn off 'PATH'. try: - return tuple([_natsort_key(x, None, number_type, signed, - exp, as_path and not split_as_path, - py3_safe) for x in val]) + was_path = alg & _nsdict['PATH'] + newalg = alg & _ALL_BUT_PATH + newalg |= (was_path * (not split_as_path)) + return tuple([_natsort_key(x, None, newalg) for x in val]) # If there is still an error, it must be a number. # Return as-is, with a leading empty string. - # Waiting for two raised errors instead of calling - # isinstance at the opening of the function is slower - # for numbers but much faster for strings, and since - # numbers are not a common input to natsort this is - # an acceptable sacrifice. except TypeError: - return (('', val,),) if as_path else ('', val,) + return (('', val,),) if alg & _nsdict['PATH'] else ('', val,) @u_format -def natsort_key(val, key=None, number_type=float, signed=True, exp=True, - as_path=False, py3_safe=False): +def natsort_key(val, key=None, number_type=float, signed=None, exp=None, + as_path=None, py3_safe=None, alg=0): """\ Key to sort strings and numbers naturally. @@ -257,39 +440,39 @@ def natsort_key(val, key=None, number_type=float, signed=True, exp=True, It should accept a single argument and return a single value. number_type : {{None, float, int}}, optional - The types of number to sort on: `float` searches for floating - point numbers, `int` searches for integers, and `None` searches - for digits (like integers but does not take into account - negative sign). `None` is a shortcut for `number_type = int` - and `signed = False`. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. signed : {{True, False}}, optional - By default a '+' or '-' before a number is taken to be the sign - of the number. If `signed` is `False`, any '+' or '-' will not - be considered to be part of the number, but as part part of the - string. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. exp : {{True, False}}, optional - This option only applies to `number_type = float`. If - `exp = True`, a string like "3.5e5" will be interpreted as - 350000, i.e. the exponential part is considered to be part of - the number. If `exp = False`, "3.5e5" is interpreted as - ``(3.5, "e", 5)``. The default behavior is `exp = True`. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. as_path : {{True, False}}, optional - This option will force strings to be interpreted as filesystem - paths, so they will be split according to the filesystem separator - (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the - file extension, if any. Without this, lists of file paths like - ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted - properly; ``'Folder'`` will be placed at the end, not at the front. - The default behavior is `as_path = False`. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. py3_safe : {{True, False}}, optional - This will make the string parsing algorithm be more careful by - placing an empty string between two adjacent numbers after the - parsing algorithm. This will prevent the "unorderable types" - error. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. + + alg : ns enum, optional + This option is used to control which algorithm `natsort` + uses when sorting. For details into these options, please see + the :class:`ns` class documentation. The default is `ns.FLOAT`. Returns ------- @@ -348,12 +531,13 @@ def natsort_key(val, key=None, number_type=float, signed=True, exp=True, """ msg = "natsort_key is depreciated as of 3.4.0, please use natsort_keygen" warn(msg, DeprecationWarning) - return _natsort_key(val, key, number_type, signed, exp, as_path, py3_safe) + alg = _args_to_enum(number_type, signed, exp, as_path, py3_safe) | alg + return _natsort_key(val, key, alg) @u_format -def natsort_keygen(key=None, number_type=float, signed=True, exp=True, - as_path=False, py3_safe=False): +def natsort_keygen(key=None, number_type=float, signed=None, exp=None, + as_path=None, py3_safe=None, alg=0): """\ Generate a key to sort strings and numbers naturally. @@ -373,39 +557,39 @@ def natsort_keygen(key=None, number_type=float, signed=True, exp=True, It should accept a single argument and return a single value. number_type : {{None, float, int}}, optional - The types of number to sort on: `float` searches for floating - point numbers, `int` searches for integers, and `None` searches - for digits (like integers but does not take into account - negative sign). `None` is a shortcut for `number_type = int` - and `signed = False`. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. signed : {{True, False}}, optional - By default a '+' or '-' before a number is taken to be the sign - of the number. If `signed` is `False`, any '+' or '-' will not - be considered to be part of the number, but as part part of the - string. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. exp : {{True, False}}, optional - This option only applies to `number_type = float`. If - `exp = True`, a string like "3.5e5" will be interpreted as - 350000, i.e. the exponential part is considered to be part of - the number. If `exp = False`, "3.5e5" is interpreted as - ``(3.5, "e", 5)``. The default behavior is `exp = True`. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. as_path : {{True, False}}, optional - This option will force strings to be interpreted as filesystem - paths, so they will be split according to the filesystem separator - (i.e. `/` on UNIX, `\\\\` on Windows), as well as splitting on the - file extension, if any. Without this, lists with file paths like - ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted - properly; ``'Folder'`` will be placed at the end, not at the front. - The default behavior is `as_path = False`. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. py3_safe : {{True, False}}, optional - This will make the string parsing algorithm be more careful by - placing an empty string between two adjacent numbers after the - parsing algorithm. This will prevent the "unorderable types" - error. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. + + alg : ns enum, optional + This option is used to control which algorithm `natsort` + uses when sorting. For details into these options, please see + the :class:`ns` class documentation. The default is `ns.FLOAT`. Returns ------- @@ -440,18 +624,13 @@ def natsort_keygen(key=None, number_type=float, signed=True, exp=True, True """ - return partial(_natsort_key, - key=key, - number_type=number_type, - signed=signed, - exp=exp, - as_path=as_path, - py3_safe=py3_safe) + alg = _args_to_enum(number_type, signed, exp, as_path, py3_safe) | alg + return partial(_natsort_key, key=key, alg=alg) @u_format -def natsorted(seq, key=None, number_type=float, signed=True, exp=True, - reverse=False, as_path=False): +def natsorted(seq, key=None, number_type=float, signed=None, exp=None, + reverse=False, as_path=None, alg=0): """\ Sorts a sequence naturally. @@ -470,37 +649,37 @@ def natsorted(seq, key=None, number_type=float, signed=True, exp=True, It should accept a single argument and return a single value. number_type : {{None, float, int}}, optional - The types of number to sort on: `float` searches for floating - point numbers, `int` searches for integers, and `None` searches - for digits (like integers but does not take into account - negative sign). `None` is a shortcut for `number_type = int` - and `signed = False`. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. signed : {{True, False}}, optional - By default a '+' or '-' before a number is taken to be the sign - of the number. If `signed` is `False`, any '+' or '-' will not - be considered to be part of the number, but as part part of the - string. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. exp : {{True, False}}, optional - This option only applies to `number_type = float`. If - `exp = True`, a string like "3.5e5" will be interpreted as - 350000, i.e. the exponential part is considered to be part of - the number. If `exp = False`, "3.5e5" is interpreted as - ``(3.5, "e", 5)``. The default behavior is `exp = True`. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. as_path : {{True, False}}, optional - This option will force strings to be interpreted as filesystem - paths, so they will be split according to the filesystem separator - (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the - file extension, if any. Without this, lists of file paths like - ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted - properly; ``'Folder'`` will be placed at the end, not at the front. - The default behavior is `as_path = False`. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. + + alg : ns enum, optional + This option is used to control which algorithm `natsort` + uses when sorting. For details into these options, please see + the :class:`ns` class documentation. The default is `ns.FLOAT`. Returns ------- @@ -522,30 +701,29 @@ def natsorted(seq, key=None, number_type=float, signed=True, exp=True, [{u}'num2', {u}'num3', {u}'num5'] """ + alg = _args_to_enum(number_type, signed, exp, as_path, None) | alg try: return sorted(seq, reverse=reverse, - key=natsort_keygen(key, number_type, - signed, exp, as_path)) - except TypeError as e: + key=natsort_keygen(key, alg=alg)) + except TypeError as e: # pragma: no cover # In the event of an unresolved "unorderable types" error # attempt to sort again, being careful to prevent this error. if 'unorderable types' in str(e): return sorted(seq, reverse=reverse, - key=natsort_keygen(key, number_type, - signed, exp, as_path, - True)) + key=natsort_keygen(key, + alg=alg | _nsdict['TYPESAFE'])) else: # Re-raise if the problem was not "unorderable types" raise @u_format -def versorted(seq, key=None, reverse=False, as_path=False): +def versorted(seq, key=None, reverse=False, as_path=None, alg=0): """\ Convenience function to sort version numbers. Convenience function to sort version numbers. This is a wrapper - around ``natsorted(seq, number_type=None)``. + around ``natsorted(seq, alg=ns.VERSION)``. Parameters ---------- @@ -562,13 +740,15 @@ def versorted(seq, key=None, reverse=False, as_path=False): `False`. as_path : {{True, False}}, optional - This option will force strings to be interpreted as filesystem - paths, so they will be split according to the filesystem separator - (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the - file extension, if any. Without this, lists of file paths like - ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted - properly; ``'Folder'`` will be placed at the end, not at the front. - The default behavior is `as_path = False`. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. + + alg : ns enum, optional + This option is used to control which algorithm `natsort` + uses when sorting. For details into these options, please see + the :class:`ns` class documentation. The default is `ns.FLOAT`. Returns ------- @@ -588,12 +768,93 @@ def versorted(seq, key=None, reverse=False, as_path=False): [{u}'num3.4.1', {u}'num3.4.2', {u}'num4.0.2'] """ - return natsorted(seq, key, None, reverse=reverse, as_path=as_path) + alg = _args_to_enum(float, None, None, as_path, None) | alg + return natsorted(seq, key, reverse=reverse, alg=alg | ns.VERSION) + + +@u_format +def humansorted(seq, key=None, reverse=False, alg=0): + """\ + Convenience function to properly sort non-numeric characters. + + Convenience function to properly sort non-numeric characters + in a locale-aware fashion (a.k.a "human sorting"). This is a + wrapper around ``natsorted(seq, alg=ns.LOCALE)``. + + .. warning:: On some systems, the underlying C library that + Python's locale module uses is broken. On these + systems it is recommended that you install + `PyICU <https://pypi.python.org/pypi/PyICU>`_. + Please validate that this function works as + expected on your target system, and if not you + should add + `PyICU <https://pypi.python.org/pypi/PyICU>`_ + as a dependency. + + Parameters + ---------- + seq : iterable + The sequence to sort. + + key : callable, optional + A key used to determine how to sort each element of the sequence. + It is **not** applied recursively. + It should accept a single argument and return a single value. + + reverse : {{True, False}}, optional + Return the list in reversed sorted order. The default is + `False`. + + alg : ns enum, optional + This option is used to control which algorithm `natsort` + uses when sorting. For details into these options, please see + the :class:`ns` class documentation. The default is `ns.FLOAT`. + + Returns + ------- + out : list + The sorted sequence. + + See Also + -------- + index_humansorted : Returns the sorted indexes from `humansorted`. + + Notes + ----- + You may find that if you do not explicitly set + the locale your results may not be as you expect... I have found that + it depends on the system you are on. To do this is straightforward + (in the below example I use 'en_US.UTF-8', but you should use your + locale):: + + >>> import locale + >>> # The 'str' call is only to get around a bug on Python 2.x + >>> # where 'setlocale' does not expect unicode strings (ironic, + >>> # right?) + >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) + 'en_US.UTF-8' + + It is preferred that you do this before importing `natsort`. + If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning + above) then you should not need to do this. + + Examples + -------- + Use `humansorted` just like the builtin `sorted`:: + + >>> a = ['Apple', 'Banana', 'apple', 'banana'] + >>> natsorted(a) + [{u}'Apple', {u}'Banana', {u}'apple', {u}'banana'] + >>> humansorted(a) + [{u}'apple', {u}'Apple', {u}'banana', {u}'Banana'] + + """ + return natsorted(seq, key, reverse=reverse, alg=alg | ns.LOCALE) @u_format -def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, - reverse=False, as_path=False): +def index_natsorted(seq, key=None, number_type=float, signed=None, exp=None, + reverse=False, as_path=None, alg=0): """\ Return the list of the indexes used to sort the input sequence. @@ -613,37 +874,37 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, It should accept a single argument and return a single value. number_type : {{None, float, int}}, optional - The types of number to sort on: `float` searches for floating - point numbers, `int` searches for integers, and `None` searches - for digits (like integers but does not take into account - negative sign). `None` is a shortcut for `number_type = int` - and `signed = False`. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. signed : {{True, False}}, optional - By default a '+' or '-' before a number is taken to be the sign - of the number. If `signed` is `False`, any '+' or '-' will not - be considered to be part of the number, but as part part of the - string. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. exp : {{True, False}}, optional - This option only applies to `number_type = float`. If - `exp = True`, a string like "3.5e5" will be interpreted as - 350000, i.e. the exponential part is considered to be part of - the number. If `exp = False`, "3.5e5" is interpreted as - ``(3.5, "e", 5)``. The default behavior is `exp = True`. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. as_path : {{True, False}}, optional - This option will force strings to be interpreted as filesystem - paths, so they will be split according to the filesystem separator - (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the - file extension, if any. Without this, lists of file paths like - ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted - properly; ``'Folder'`` will be placed at the end, not at the front. - The default behavior is `as_path = False`. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. + + alg : ns enum, optional + This option is used to control which algorithm `natsort` + uses when sorting. For details into these options, please see + the :class:`ns` class documentation. The default is `ns.FLOAT`. Returns ------- @@ -673,6 +934,7 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, [{u}'baz', {u}'foo', {u}'bar'] """ + alg = _args_to_enum(number_type, signed, exp, as_path, None) | alg if key is None: newkey = itemgetter(1) else: @@ -681,16 +943,14 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, index_seq_pair = [[x, y] for x, y in enumerate(seq)] try: index_seq_pair.sort(reverse=reverse, - key=natsort_keygen(newkey, number_type, - signed, exp, as_path)) - except TypeError as e: + key=natsort_keygen(newkey, alg=alg)) + except TypeError as e: # pragma: no cover # In the event of an unresolved "unorderable types" error # attempt to sort again, being careful to prevent this error. if 'unorderable types' in str(e): index_seq_pair.sort(reverse=reverse, - key=natsort_keygen(newkey, number_type, - signed, exp, as_path, - True)) + key=natsort_keygen(newkey, + alg=alg | ns.TYPESAFE)) else: # Re-raise if the problem was not "unorderable types" raise @@ -698,12 +958,12 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, @u_format -def index_versorted(seq, key=None, reverse=False, as_path=False): +def index_versorted(seq, key=None, reverse=False, as_path=None, alg=0): """\ Return the list of the indexes used to sort the input sequence of version numbers. - Sorts a sequence naturally, but returns a list of sorted the + Sorts a sequence of version, but returns a list of sorted the indexes and not the sorted list. This list of indexes can be used to sort multiple lists by the sorted order of the given sequence. @@ -725,13 +985,15 @@ def index_versorted(seq, key=None, reverse=False, as_path=False): `False`. as_path : {{True, False}}, optional - This option will force strings to be interpreted as filesystem - paths, so they will be split according to the filesystem separator - (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the - file extension, if any. Without this, lists of file paths like - ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted - properly; ``'Folder'`` will be placed at the end, not at the front. - The default behavior is `as_path = False`. + Depreciated as of version 3.5.0 and will become an undocumented + keyword-only argument in 4.0.0. Please use the `alg` argument + for all future development. See :class:`ns` class documentation for + details. + + alg : ns enum, optional + This option is used to control which algorithm `natsort` + uses when sorting. For details into these options, please see + the :class:`ns` class documentation. The default is `ns.FLOAT`. Returns ------- @@ -752,7 +1014,81 @@ def index_versorted(seq, key=None, reverse=False, as_path=False): [1, 2, 0] """ - return index_natsorted(seq, key, None, reverse=reverse, as_path=as_path) + alg = _args_to_enum(float, None, None, as_path, None) | alg + return index_natsorted(seq, key, reverse=reverse, alg=alg | ns.VERSION) + + +@u_format +def index_humansorted(seq, key=None, reverse=False, alg=0): + """\ + Return the list of the indexes used to sort the input sequence + in a locale-aware manner. + + Sorts a sequence in a locale-aware manner, but returns a list + of sorted the indexes and not the sorted list. This list of + indexes can be used to sort multiple lists by the sorted order + of the given sequence. + + This is a wrapper around ``index_natsorted(seq, alg=ns.LOCALE)``. + + Parameters + ---------- + seq: iterable + The sequence to sort. + + key: callable, optional + A key used to determine how to sort each element of the sequence. + It is **not** applied recursively. + It should accept a single argument and return a single value. + + reverse : {{True, False}}, optional + Return the list in reversed sorted order. The default is + `False`. + + alg : ns enum, optional + This option is used to control which algorithm `natsort` + uses when sorting. For details into these options, please see + the :class:`ns` class documentation. The default is `ns.FLOAT`. + + Returns + ------- + out : tuple + The ordered indexes of the sequence. + + See Also + -------- + humansorted + order_by_index + + Notes + ----- + You may find that if you do not explicitly set + the locale your results may not be as you expect... I have found that + it depends on the system you are on. To do this is straightforward + (in the below example I use 'en_US.UTF-8', but you should use your + locale):: + + >>> import locale + >>> # The 'str' call is only to get around a bug on Python 2.x + >>> # where 'setlocale' does not expect unicode strings (ironic, + >>> # right?) + >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) + 'en_US.UTF-8' + + It is preferred that you do this before importing `natsort`. + If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning + above) then you should not need to do this. + + Examples + -------- + Use `index_humansorted` just like the builtin `sorted`:: + + >>> a = ['Apple', 'Banana', 'apple', 'banana'] + >>> index_humansorted(a) + [2, 0, 3, 1] + + """ + return index_natsorted(seq, key, reverse=reverse, alg=alg | ns.LOCALE) @u_format @@ -9,6 +9,8 @@ flakes-ignore = natsort/py23compat.py UndefinedName natsort/__init__.py UnusedImport docs/source/conf.py ALL + test_natsort/test_natsort.py UnusedImport RedefinedWhileUnused + test_natsort/test_locale_help.py UnusedImport RedefinedWhileUnused pep8ignore = test_natsort/test_natsort.py E501 E241 E221 @@ -67,7 +67,8 @@ setup( install_requires=REQUIRES, packages=['natsort'], entry_points={'console_scripts': ['natsort = natsort.__main__:main']}, - tests_require=['pytest', 'pytest-pep8', 'pytest-flakes', 'pytest-cov'], + tests_require=['pytest', 'pytest-pep8', + 'pytest-flakes', 'pytest-cov'], cmdclass={'test': PyTest}, description=DESCRIPTION, long_description=LONG_DESCRIPTION, diff --git a/test_natsort/test_locale_help.py b/test_natsort/test_locale_help.py new file mode 100644 index 0000000..c654fdd --- /dev/null +++ b/test_natsort/test_locale_help.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +"""\ +Test the locale help module module. +""" +import locale +from natsort.fake_fastnumbers import fast_float +from natsort.locale_help import grouper, locale_convert, use_pyicu + +if use_pyicu: + from natsort.locale_help import get_pyicu_transform + from locale import getlocale +else: + from natsort.locale_help import strxfrm + + +def test_grouper(): + assert grouper('HELLO', fast_float) == 'hHeElLlLoO' + assert grouper('hello', fast_float) == 'hheelllloo' + assert grouper('45.8e-2', fast_float) == 45.8e-2 + + +def test_locale_convert(): + locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8') + if use_pyicu: + from natsort.locale_help import get_pyicu_transform + from locale import getlocale + strxfrm = get_pyicu_transform(getlocale()) + else: + from natsort.locale_help import strxfrm + assert locale_convert('45.8', fast_float, False) == 45.8 + assert locale_convert('45,8', fast_float, False) == strxfrm('45,8') + assert locale_convert('hello', fast_float, False) == strxfrm('hello') + assert locale_convert('hello', fast_float, True) == strxfrm('hheelllloo') + assert locale_convert('45,8', fast_float, True) == strxfrm('4455,,88') + + locale.setlocale(locale.LC_NUMERIC, 'de_DE.UTF-8') + if use_pyicu: + strxfrm = get_pyicu_transform(getlocale()) + assert locale_convert('45.8', fast_float, False) == 45.8 + assert locale_convert('45,8', fast_float, False) == 45.8 + assert locale_convert('hello', fast_float, False) == strxfrm('hello') + assert locale_convert('hello', fast_float, True) == strxfrm('hheelllloo') + + locale.setlocale(locale.LC_NUMERIC, '') diff --git a/test_natsort/test_main.py b/test_natsort/test_main.py index 8157c3e..2323d59 100644 --- a/test_natsort/test_main.py +++ b/test_natsort/test_main.py @@ -206,6 +206,7 @@ def test_sort_and_print_entries(capsys): self.signed = True self.exp = True self.paths = as_path + self.locale = 0 entries = ['tmp/a57/path2', 'tmp/a23/path1', diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py index 264b508..afe8662 100644 --- a/test_natsort/test_natsort.py +++ b/test_natsort/test_natsort.py @@ -3,13 +3,18 @@ Here are a collection of examples of how this module can be used. See the README or the natsort homepage for more details. """ +from __future__ import unicode_literals import warnings +import locale from operator import itemgetter from pytest import raises -from natsort import natsorted, index_natsorted, natsort_key, versorted, index_versorted, natsort_keygen, order_by_index -from natsort.natsort import _number_finder, _py3_safe, _natsort_key -from natsort.natsort import float_sign_exp_re, float_nosign_exp_re, float_sign_noexp_re -from natsort.natsort import float_nosign_noexp_re, int_nosign_re, int_sign_re +from natsort import natsorted, index_natsorted, natsort_key, versorted, index_versorted +from natsort import humansorted, index_humansorted, natsort_keygen, order_by_index +from natsort.natsort import _input_parser, _py3_safe, _natsort_key, _args_to_enum +from natsort.natsort import _float_sign_exp_re, _float_nosign_exp_re, _float_sign_noexp_re +from natsort.natsort import _float_nosign_noexp_re, _int_nosign_re, _int_sign_re +from natsort.natsort import ns +from natsort.locale_help import use_pyicu try: from fastnumbers import fast_float, fast_int @@ -17,24 +22,67 @@ except ImportError: from natsort.fake_fastnumbers import fast_float, fast_int -def test_number_finder(): - - assert _number_finder('a5+5.034e-1', float_sign_exp_re, fast_float, False) == ['a', 5.0, 0.5034] - assert _number_finder('a5+5.034e-1', float_nosign_exp_re, fast_float, False) == ['a', 5.0, '+', 0.5034] - assert _number_finder('a5+5.034e-1', float_sign_noexp_re, fast_float, False) == ['a', 5.0, 5.034, 'e', -1.0] - assert _number_finder('a5+5.034e-1', float_nosign_noexp_re, fast_float, False) == ['a', 5.0, '+', 5.034, 'e-', 1.0] - assert _number_finder('a5+5.034e-1', int_nosign_re, fast_int, False) == ['a', 5, '+', 5, '.', 34, 'e-', 1] - assert _number_finder('a5+5.034e-1', int_sign_re, fast_int, False) == ['a', 5, 5, '.', 34, 'e', -1] - - assert _number_finder('a5+5.034e-1', float_sign_exp_re, fast_float, True) == ['a', 5.0, '', 0.5034] - assert _number_finder('a5+5.034e-1', float_nosign_exp_re, fast_float, True) == ['a', 5.0, '+', 0.5034] - assert _number_finder('a5+5.034e-1', float_sign_noexp_re, fast_float, True) == ['a', 5.0, '', 5.034, 'e', -1.0] - assert _number_finder('a5+5.034e-1', float_nosign_noexp_re, fast_float, True) == ['a', 5.0, '+', 5.034, 'e-', 1.0] - assert _number_finder('a5+5.034e-1', int_nosign_re, fast_int, True) == ['a', 5, '+', 5, '.', 34, 'e-', 1] - assert _number_finder('a5+5.034e-1', int_sign_re, fast_int, True) == ['a', 5, '', 5, '.', 34, 'e', -1] - - assert _number_finder('6a5+5.034e-1', float_sign_exp_re, fast_float, False) == ['', 6.0, 'a', 5.0, 0.5034] - assert _number_finder('6a5+5.034e-1', float_sign_exp_re, fast_float, True) == ['', 6.0, 'a', 5.0, '', 0.5034] +def test_args_to_enum(): + + assert _args_to_enum(float, True, True, False, False) == ns.F + assert _args_to_enum(float, True, False, False, False) == ns.F | ns.N + assert _args_to_enum(float, False, True, False, False) == ns.F | ns.U + assert _args_to_enum(float, False, False, False, False) == ns.F | ns.U | ns.N + assert _args_to_enum(float, True, True, True, True) == ns.F | ns.P | ns.T + assert _args_to_enum(int, True, True, True, False) == ns.I | ns.P + assert _args_to_enum(int, False, True, False, True) == ns.I | ns.U | ns.T + assert _args_to_enum(None, True, True, False, False) == ns.I | ns.U + + +def test_input_parser(): + + # fttt = (fast_float, True, True, True) + # fttf = (fast_float, True, True, False) + ftft = (fast_float, True, False, True) + ftff = (fast_float, True, False, False) + # fftt = (fast_float, False, True, True) + # ffft = (fast_float, False, False, True) + # fftf = (fast_float, False, True, False) + ffff = (fast_float, False, False, False) + ittt = (fast_int, True, True, True) + ittf = (fast_int, True, True, False) + itft = (fast_int, True, False, True) + itff = (fast_int, True, False, False) + # iftt = (fast_int, False, True, True) + # ifft = (fast_int, False, False, True) + # iftf = (fast_int, False, True, False) + ifff = (fast_int, False, False, False) + + assert _input_parser('a5+5.034e-1', _float_sign_exp_re, *ffff) == ['a', 5.0, 0.5034] + assert _input_parser('a5+5.034e-1', _float_nosign_exp_re, *ffff) == ['a', 5.0, '+', 0.5034] + assert _input_parser('a5+5.034e-1', _float_sign_noexp_re, *ffff) == ['a', 5.0, 5.034, 'e', -1.0] + assert _input_parser('a5+5.034e-1', _float_nosign_noexp_re, *ffff) == ['a', 5.0, '+', 5.034, 'e-', 1.0] + assert _input_parser('a5+5.034e-1', _int_nosign_re, *ifff) == ['a', 5, '+', 5, '.', 34, 'e-', 1] + assert _input_parser('a5+5.034e-1', _int_sign_re, *ifff) == ['a', 5, 5, '.', 34, 'e', -1] + + assert _input_parser('a5+5.034e-1', _float_sign_exp_re, *ftff) == ['a', 5.0, '', 0.5034] + assert _input_parser('a5+5.034e-1', _float_nosign_exp_re, *ftff) == ['a', 5.0, '+', 0.5034] + assert _input_parser('a5+5.034e-1', _float_sign_noexp_re, *ftff) == ['a', 5.0, '', 5.034, 'e', -1.0] + assert _input_parser('a5+5.034e-1', _float_nosign_noexp_re, *ftff) == ['a', 5.0, '+', 5.034, 'e-', 1.0] + assert _input_parser('a5+5.034e-1', _int_nosign_re, *itff) == ['a', 5, '+', 5, '.', 34, 'e-', 1] + assert _input_parser('a5+5.034e-1', _int_sign_re, *itff) == ['a', 5, '', 5, '.', 34, 'e', -1] + + assert _input_parser('6a5+5.034e-1', _float_sign_exp_re, *ffff) == ['', 6.0, 'a', 5.0, 0.5034] + assert _input_parser('6a5+5.034e-1', _float_sign_exp_re, *ftff) == ['', 6.0, 'a', 5.0, '', 0.5034] + + assert _input_parser('A5+5.034E-1', _float_sign_exp_re, *ftft) == ['aA', 5.0, '', 0.5034] + assert _input_parser('A5+5.034E-1', _int_nosign_re, *itft) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1] + + locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) + if use_pyicu: + from natsort.locale_help import get_pyicu_transform + from locale import getlocale + strxfrm = get_pyicu_transform(getlocale()) + else: + from natsort.locale_help import strxfrm + assert _input_parser('A5+5.034E-1', _int_nosign_re, *ittf) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1] + assert _input_parser('A5+5.034E-1', _int_nosign_re, *ittt) == [strxfrm('aA'), 5, strxfrm('++'), 5, strxfrm('..'), 34, strxfrm('eE--'), 1] + locale.setlocale(locale.LC_NUMERIC, str('')) def test_py3_safe(): @@ -47,56 +95,74 @@ def test_py3_safe(): def test_natsort_key_private(): - a = ['num3', 'num5', 'num2'] - a.sort(key=_natsort_key) - assert a == ['num2', 'num3', 'num5'] - # The below illustrates how the key works, and how the different options affect sorting. - assert _natsort_key('a-5.034e2') == ('a', -503.4) - assert _natsort_key('a-5.034e2', number_type=float, signed=True, exp=True) == ('a', -503.4) - assert _natsort_key('a-5.034e2', number_type=float, signed=True, exp=False) == ('a', -5.034, 'e', 2.0) - assert _natsort_key('a-5.034e2', number_type=float, signed=False, exp=True) == ('a-', 503.4) - assert _natsort_key('a-5.034e2', number_type=float, signed=False, exp=False) == ('a-', 5.034, 'e', 2.0) - assert _natsort_key('a-5.034e2', number_type=int) == ('a', -5, '.', 34, 'e', 2) - assert _natsort_key('a-5.034e2', number_type=int, signed=False) == ('a-', 5, '.', 34, 'e', 2) - assert _natsort_key('a-5.034e2', number_type=None) == _natsort_key('a-5.034e2', number_type=int, signed=False) - assert _natsort_key('a-5.034e2', key=lambda x: x.upper()) == ('A', -503.4) + assert _natsort_key('a-5.034e2', key=None, alg=ns.F) == ('a', -503.4) + assert _natsort_key('a-5.034e2', key=None, alg=ns.FLOAT) == ('a', -503.4) + assert _natsort_key('a-5.034e2', key=None, alg=ns.FLOAT | ns.NOEXP) == ('a', -5.034, 'e', 2.0) + assert _natsort_key('a-5.034e2', key=None, alg=ns.NOEXP) == ('a', -5.034, 'e', 2.0) + assert _natsort_key('a-5.034e2', key=None, alg=ns.UNSIGNED) == ('a-', 503.4) + assert _natsort_key('a-5.034e2', key=None, alg=ns.UNSIGNED | ns.NOEXP) == ('a-', 5.034, 'e', 2.0) + assert _natsort_key('a-5.034e2', key=None, alg=ns.INT) == ('a', -5, '.', 34, 'e', 2) + assert _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.NOEXP) == ('a', -5, '.', 34, 'e', 2) + assert _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.UNSIGNED) == ('a-', 5, '.', 34, 'e', 2) + assert _natsort_key('a-5.034e2', key=None, alg=ns.VERSION) == _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.UNSIGNED) + assert _natsort_key('a-5.034e2', key=None, alg=ns.DIGIT) == _natsort_key('a-5.034e2', key=None, alg=ns.VERSION) + assert _natsort_key('a-5.034e2', key=lambda x: x.upper(), alg=ns.F) == ('A', -503.4) # Iterables are parsed recursively so you can sort lists of lists. - assert _natsort_key(('a1', 'a-5.034e2')) == (('a', 1.0), ('a', -503.4)) - assert _natsort_key(('a1', 'a-5.034e2'), number_type=None) == (('a', 1), ('a-', 5, '.', 34, 'e', 2)) + assert _natsort_key(('a1', 'a-5.034e2'), key=None, alg=ns.F) == (('a', 1.0), ('a', -503.4)) + assert _natsort_key(('a1', 'a-5.034e2'), key=None, alg=ns.V) == (('a', 1), ('a-', 5, '.', 34, 'e', 2)) # A key is applied before recursion, but not in the recursive calls. - assert _natsort_key(('a1', 'a-5.034e2'), key=itemgetter(1)) == ('a', -503.4) + assert _natsort_key(('a1', 'a-5.034e2'), key=itemgetter(1), alg=ns.F) == ('a', -503.4) # Strings that lead with a number get an empty string at the front of the tuple. # This is designed to get around the "unorderable types" issue. - assert _natsort_key(('15a', '6')) == (('', 15.0, 'a'), ('', 6.0)) - assert _natsort_key(10) == ('', 10) + assert _natsort_key(('15a', '6'), key=None, alg=ns.F) == (('', 15.0, 'a'), ('', 6.0)) + assert _natsort_key(10, key=None, alg=ns.F) == ('', 10) # Turn on as_path to split a file path into components - assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', as_path=True) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) - assert _natsort_key('../Folder (10)/file (2).tar.gz', as_path=True) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) - assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', as_path=True) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) + assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', key=None, alg=ns.PATH) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) + assert _natsort_key('../Folder (10)/file (2).tar.gz', key=None, alg=ns.PATH) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) + assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', key=None, alg=ns.PATH) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) # It gracefully handles as_path for numeric input by putting an extra tuple around it # so it will sort against the other as_path results. - assert _natsort_key(10, as_path=True) == (('', 10),) + assert _natsort_key(10, key=None, alg=ns.PATH) == (('', 10),) # as_path also handles recursion well. - assert _natsort_key(('/Folder', '/Folder (1)'), as_path=True) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1.0, ')'))) + assert _natsort_key(('/Folder', '/Folder (1)'), key=None, alg=ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1.0, ')'))) # Turn on py3_safe to put a '' between adjacent numbers - assert _natsort_key('43h7+3', py3_safe=True) == ('', 43.0, 'h', 7.0, '', 3.0) + assert _natsort_key('43h7+3', key=None, alg=ns.TYPESAFE) == ('', 43.0, 'h', 7.0, '', 3.0) # Invalid arguments give the correct response with raises(ValueError) as err: - _natsort_key('a', number_type='float') - assert str(err.value) == "_natsort_key: 'number_type' parameter 'float' invalid" - with raises(ValueError) as err: - _natsort_key('a', signed='True') - assert str(err.value) == "_natsort_key: 'signed' parameter 'True' invalid" - with raises(ValueError) as err: - _natsort_key('a', exp='False') - assert str(err.value) == "_natsort_key: 'exp' parameter 'False' invalid" + _natsort_key('a', key=None, alg='1') + assert str(err.value) == "_natsort_key: 'alg' argument must be from the enum 'ns', got 1" + + # Changing the sort order of strings + assert _natsort_key('Apple56', key=None, alg=ns.F) == ('Apple', 56.0) + assert _natsort_key('Apple56', key=None, alg=ns.IGNORECASE) == ('apple', 56.0) + assert _natsort_key('Apple56', key=None, alg=ns.LOWERCASEFIRST) == ('aPPLE', 56.0) + assert _natsort_key('Apple56', key=None, alg=ns.GROUPLETTERS) == ('aAppppllee', 56.0) + assert _natsort_key('Apple56', key=None, alg=ns.G | ns.LF) == ('aapPpPlLeE', 56.0) + + # Locale aware sorting + locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) + if use_pyicu: + from natsort.locale_help import get_pyicu_transform + from locale import getlocale + strxfrm = get_pyicu_transform(getlocale()) + else: + from natsort.locale_help import strxfrm + assert _natsort_key('Apple56.5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5) + assert _natsort_key('Apple56,5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.0, strxfrm(','), 5.0) + + locale.setlocale(locale.LC_NUMERIC, str('de_DE.UTF-8')) + if use_pyicu: + strxfrm = get_pyicu_transform(getlocale()) + assert _natsort_key('Apple56.5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5) + assert _natsort_key('Apple56,5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5) + locale.setlocale(locale.LC_NUMERIC, str('')) def test_natsort_key_public(): @@ -105,10 +171,11 @@ def test_natsort_key_public(): # But it raises a depreciation warning with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2') + assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2', key=None, alg=ns.F) assert len(w) == 1 assert "natsort_key is depreciated as of 3.4.0, please use natsort_keygen" in str(w[-1].message) - assert natsort_key('a-5.034e2', number_type=float, signed=False, exp=False) == _natsort_key('a-5.034e2', number_type=float, signed=False, exp=False) + assert natsort_key('a-5.034e2', number_type=float, signed=False, exp=False) == _natsort_key('a-5.034e2', key=None, alg=ns.F | ns.U | ns.N) + assert natsort_key('a-5.034e2', alg=ns.F | ns.U | ns.N) == _natsort_key('a-5.034e2', key=None, alg=ns.F | ns.U | ns.N) # It is called for each element in a list when sorting with warnings.catch_warnings(record=True) as w: @@ -122,25 +189,25 @@ def test_natsort_keygen(): # Creates equivalent natsort keys a = 'a-5.034e1' - assert natsort_keygen()(a) == _natsort_key(a) - assert natsort_keygen(signed=False)(a) == _natsort_key(a, signed=False) - assert natsort_keygen(exp=False)(a) == _natsort_key(a, exp=False) - assert natsort_keygen(signed=False, exp=False)(a) == _natsort_key(a, signed=False, exp=False) - assert natsort_keygen(number_type=int)(a) == _natsort_key(a, number_type=int) - assert natsort_keygen(number_type=int, signed=False)(a) == _natsort_key(a, number_type=int, signed=False) - assert natsort_keygen(number_type=None)(a) == _natsort_key(a, number_type=None) - assert natsort_keygen(as_path=True)(a) == _natsort_key(a, as_path=True) + assert natsort_keygen()(a) == _natsort_key(a, key=None, alg=ns.F) + assert natsort_keygen(alg=ns.UNSIGNED)(a) == _natsort_key(a, key=None, alg=ns.U) + assert natsort_keygen(alg=ns.NOEXP)(a) == _natsort_key(a, key=None, alg=ns.N) + assert natsort_keygen(alg=ns.U | ns.N)(a) == _natsort_key(a, key=None, alg=ns.U | ns.N) + assert natsort_keygen(alg=ns.INT)(a) == _natsort_key(a, key=None, alg=ns.INT) + assert natsort_keygen(alg=ns.I | ns.U)(a) == _natsort_key(a, key=None, alg=ns.I | ns.U) + assert natsort_keygen(alg=ns.VERSION)(a) == _natsort_key(a, key=None, alg=ns.V) + assert natsort_keygen(alg=ns.PATH)(a) == _natsort_key(a, key=None, alg=ns.PATH) # Custom keys are more straightforward with keygen f1 = natsort_keygen(key=lambda x: x.upper()) - f2 = lambda x: _natsort_key(x, key=lambda y: y.upper()) + f2 = lambda x: _natsort_key(x, key=lambda y: y.upper(), alg=ns.F) assert f1(a) == f2(a) # It also makes sorting lists in-place easier (no lambdas!) a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] b = a[:] - a.sort(key=natsort_keygen(number_type=int)) - assert a == natsorted(b, number_type=int) + a.sort(key=natsort_keygen(alg=ns.I)) + assert a == natsorted(b, alg=ns.I) def test_natsorted(): @@ -151,20 +218,20 @@ def test_natsorted(): # Number types a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] - assert natsorted(a) == ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'] - assert natsorted(a, number_type=float, exp=False) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.'] - assert natsorted(a, number_type=int) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] - assert natsorted(a, number_type=None) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] + assert natsorted(a) == ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'] + assert natsorted(a, alg=ns.NOEXP | ns.FLOAT) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.'] + assert natsorted(a, alg=ns.INT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] + assert natsorted(a, alg=ns.DIGIT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] # Signed option a = ['a-5', 'a7', 'a+2'] - assert natsorted(a) == ['a-5', 'a+2', 'a7'] - assert natsorted(a, signed=False) == ['a7', 'a+2', 'a-5'] + assert natsorted(a) == ['a-5', 'a+2', 'a7'] + assert natsorted(a, alg=ns.UNSIGNED) == ['a7', 'a+2', 'a-5'] # Number type == None a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] - assert natsorted(a) == ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b'] - assert natsorted(a, number_type=None) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] + assert natsorted(a) == ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b'] + assert natsorted(a, alg=ns.DIGIT) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] # You can mix types with natsorted. This can get around the new # 'unorderable types' issue with Python 3. @@ -203,20 +270,38 @@ def test_natsorted(): '/p/Folder (1)/file.tar.gz', '/p/Folder (10)/file.tar.gz', '/p/Folder/file.tar.gz'] - assert natsorted(a, as_path=True) == ['/p/Folder/file.tar.gz', - '/p/Folder (1)/file.tar.gz', - '/p/Folder (1)/file (1).tar.gz', - '/p/Folder (10)/file.tar.gz'] + assert natsorted(a, alg=ns.PATH) == ['/p/Folder/file.tar.gz', + '/p/Folder (1)/file.tar.gz', + '/p/Folder (1)/file (1).tar.gz', + '/p/Folder (10)/file.tar.gz'] # You can sort paths and numbers, not that you'd want to a = ['/Folder (9)/file.exe', 43] - assert natsorted(a, as_path=True) == [43, '/Folder (9)/file.exe'] + assert natsorted(a, alg=ns.PATH) == [43, '/Folder (9)/file.exe'] + + # You can modify how case is interpreted in your sorting. + a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] + assert natsorted(a) == ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn'] + assert natsorted(a, alg=ns.IGNORECASE) == ['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn'] + assert natsorted(a, alg=ns.LOWERCASEFIRST) == ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn'] + assert natsorted(a, alg=ns.GROUPLETTERS) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] + assert natsorted(a, alg=ns.G | ns.LF) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] + + # You can also do locale-aware sorting + locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) + assert natsorted(a, alg=ns.LOCALE) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] + a = ['c', 'ä', 'b', 'a5,6', 'a5,50'] + assert natsorted(a, alg=ns.LOCALE) == ['a5,6', 'a5,50', 'ä', 'b', 'c'] + + locale.setlocale(locale.LC_ALL, str('de_DE.UTF-8')) + assert natsorted(a, alg=ns.LOCALE) == ['a5,50', 'a5,6', 'ä', 'b', 'c'] + locale.setlocale(locale.LC_ALL, str('')) def test_versorted(): a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] - assert versorted(a) == natsorted(a, number_type=None) + assert versorted(a) == natsorted(a, alg=ns.VERSION) assert versorted(a, reverse=True) == versorted(a)[::-1] a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'), ('a', '1.11.4'), ('a', '1.10.1')] @@ -232,10 +317,18 @@ def test_versorted(): '/p/Folder (1)/file1.1.0.tar.gz', '/p/Folder (10)/file1.1.0.tar.gz', '/p/Folder/file1.1.0.tar.gz'] - assert versorted(a, as_path=True) == ['/p/Folder/file1.1.0.tar.gz', - '/p/Folder (1)/file1.1.0.tar.gz', - '/p/Folder (1)/file1.1.0 (1).tar.gz', - '/p/Folder (10)/file1.1.0.tar.gz'] + assert versorted(a, alg=ns.PATH) == ['/p/Folder/file1.1.0.tar.gz', + '/p/Folder (1)/file1.1.0.tar.gz', + '/p/Folder (1)/file1.1.0 (1).tar.gz', + '/p/Folder (10)/file1.1.0.tar.gz'] + + +def test_humansorted(): + + a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] + assert humansorted(a) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] + assert humansorted(a) == natsorted(a, alg=ns.LOCALE) + assert humansorted(a, reverse=True) == humansorted(a)[::-1] def test_index_natsorted(): @@ -265,13 +358,13 @@ def test_index_natsorted(): a = ['/p/Folder (10)/', '/p/Folder/', '/p/Folder (1)/'] - assert index_natsorted(a, as_path=True) == [1, 2, 0] + assert index_natsorted(a, alg=ns.PATH) == [1, 2, 0] def test_index_versorted(): a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] - assert index_versorted(a) == index_natsorted(a, number_type=None) + assert index_versorted(a) == index_natsorted(a, alg=ns.VERSION) assert index_versorted(a, reverse=True) == index_versorted(a)[::-1] a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'), ('a', '1.11.4'), ('a', '1.10.1')] @@ -282,7 +375,15 @@ def test_index_versorted(): '/p/Folder/file1.1.0.tar.gz', '/p/Folder (1)/file1.1.0 (1).tar.gz', '/p/Folder (1)/file1.1.0.tar.gz'] - assert index_versorted(a, as_path=True) == [1, 3, 2, 0] + assert index_versorted(a, alg=ns.PATH) == [1, 3, 2, 0] + + +def test_index_humansorted(): + + a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] + assert index_humansorted(a) == [4, 0, 5, 3, 1, 2] + assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE) + assert index_humansorted(a, reverse=True) == index_humansorted(a)[::-1] def test_order_by_index(): |