summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth M Morton <seth.m.morton@gmail.com>2015-05-17 19:36:10 -0700
committerSeth M Morton <seth.m.morton@gmail.com>2015-05-17 19:36:10 -0700
commit3cbedc17c805fb7a077e8b69fd3066aea7f3a38e (patch)
treed8dc973b8bff99489f3481b5a283f3bb6706852b
parent72867093bce4c2cabe2ea53415fabfb6238ae7ea (diff)
parent7df020b5ddec957c86cdae3d6adede7a4dbb93a6 (diff)
downloadnatsort-3cbedc17c805fb7a077e8b69fd3066aea7f3a38e.tar.gz
natsort release version 4.0.0.
- Made default behavior of 'natsort' search for unsigned ints, rather than signed floats. This is a backwards-incompatible change but in 99% of use cases it should not required any end-user changes. - Improved handling of locale-aware sorting on systems where the underlying locale library is broken. - Greatly improved all unit tests by adding the hypothesis library.
-rw-r--r--.gitignore3
-rw-r--r--.hgignore3
-rw-r--r--.travis.yml7
-rw-r--r--README.rst125
-rw-r--r--docs/source/api.rst1
-rw-r--r--docs/source/changelog.rst11
-rw-r--r--docs/source/examples.rst112
-rw-r--r--docs/source/intro.rst71
-rw-r--r--docs/source/natsort_key.rst8
-rw-r--r--docs/source/shell.rst38
-rw-r--r--natsort/__main__.py37
-rw-r--r--natsort/_version.py2
-rw-r--r--natsort/fake_fastnumbers.py45
-rw-r--r--natsort/locale_help.py47
-rw-r--r--natsort/natsort.py439
-rw-r--r--natsort/ns_enum.py63
-rw-r--r--natsort/py23compat.py6
-rw-r--r--natsort/unicode_numbers.py183
-rw-r--r--natsort/utils.py176
-rw-r--r--setup.py11
-rw-r--r--test_natsort/slow_splitters.py156
-rw-r--r--test_natsort/test_fake_fastnumbers.py124
-rw-r--r--test_natsort/test_locale_help.py124
-rw-r--r--test_natsort/test_main.py184
-rw-r--r--test_natsort/test_natsort.py128
-rw-r--r--test_natsort/test_unicode_numbers.py47
-rw-r--r--test_natsort/test_utils.py686
27 files changed, 1887 insertions, 950 deletions
diff --git a/.gitignore b/.gitignore
index 15cdd0e..c9d78bd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
# Packages
*.egg
+*.eggs
*.egg-info
dist
build
@@ -20,9 +21,11 @@ MANIFEST
pip-log.txt
# Unit test / coverage reports
+.hypothesis
.coverage
.tox
.cache
+.pytest
#Translations
*.mo
diff --git a/.hgignore b/.hgignore
index 2607b61..e0d8d84 100644
--- a/.hgignore
+++ b/.hgignore
@@ -4,6 +4,7 @@ syntax: glob
# Packages
*.egg
+*.eggs
*.egg-info
dist
build
@@ -22,9 +23,11 @@ MANIFEST
pip-log.txt
# Unit test / coverage reports
+.hypothesis
.coverage
.tox
.cache
+.pytest
#Translations
*.mo
diff --git a/.travis.yml b/.travis.yml
index 1d064bb..bebdb51 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,8 +1,6 @@
language: python
python:
-- 2.6
- 2.7
-- 3.2
- 3.3
- 3.4
env:
@@ -13,13 +11,14 @@ before_install:
- sudo locale-gen de_DE.UTF-8
- sudo apt-get install bc
install:
+- pip install -U pip
- if [[ $WITH_OPTIONS == true ]]; then sudo apt-get install libicu-dev; fi
- if [[ $WITH_OPTIONS == true ]]; then pip install fastnumbers; fi
- if [[ $WITH_OPTIONS == true ]]; then pip install PyICU; fi
-- if [[ $WITH_OPTIONS == true && 1 -eq $(echo "$TRAVIS_PYTHON_VERSION < 3.4" | bc -l) ]]; then pip install pathlib; fi
+- if [[ 1 -eq $(echo "$TRAVIS_PYTHON_VERSION < 3.4" | bc -l) ]]; then pip install pathlib; fi
- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi
- if [[ $(echo "$TRAVIS_PYTHON_VERSION < 3.3" | bc -l) ]]; then pip install mock; fi
-- pip install pytest-cov pytest-flakes pytest-pep8
+- pip install pytest-cov pytest-flakes pytest-pep8 hypothesis
- pip install coveralls
script:
- python -m pytest --cov natsort --flakes --pep8
diff --git a/README.rst b/README.rst
index c268032..7e8316a 100644
--- a/README.rst
+++ b/README.rst
@@ -11,10 +11,10 @@ Natural sorting for python.
- Source Code: https://github.com/SethMMorton/natsort
- Downloads: https://pypi.python.org/pypi/natsort
- - Documentation: http://pythonhosted.org/natsort/
+ - Documentation: http://pythonhosted.org/natsort
-Please see `Deprecation Notices`_ for an `important` backwards incompatibility notice
-for ``natsort`` version 4.0.0.
+Please see `Moving from older Natsort versions`_ to see if this update requires
+you to modify your ``natsort`` calls in your code (99% of users will not).
Quick Description
-----------------
@@ -47,16 +47,16 @@ Using ``natsorted`` is simple:
``natsorted`` identifies real numbers anywhere in a string and sorts them
naturally.
-Sorting version numbers is just as easy with the ``versorted`` function:
+Sorting versions is handled properly by default (as of ``natsort`` version >= 4.0.0):
.. code-block:: python
- >>> from natsort import versorted
>>> a = ['version-1.9', 'version-2.0', 'version-1.11', 'version-1.10']
- >>> versorted(a)
+ >>> natsorted(a)
['version-1.9', 'version-1.10', 'version-1.11', 'version-2.0']
- >>> natsorted(a) # natsorted tries to sort as signed floats, so it won't work
- ['version-2.0', 'version-1.9', 'version-1.11', 'version-1.10']
+
+If you need to sort release candidates, please see
+`this useful hack <http://pythonhosted.org//natsort/examples.htm#rc-sorting>`_ .
You can also perform locale-aware sorting (or "human sorting"), where the
non-numeric characters are ordered based on their meaning, not on their
@@ -81,6 +81,19 @@ and the `Optional Dependencies`_ section
below before using the ``humansorted`` function, *especially* if you are on a
BSD-based system (like Mac OS X).
+You can sort signed floats (i.e. real numbers) using the ``realsorted``; this is
+useful in scientific data analysis. This was the default behavior of ``natsorted``
+for ``natsort`` version < 4.0.0. ::
+
+.. code-block:: python
+
+ >>> from natsort import realsorted
+ >>> a = ['num5.10', 'num-3', 'num5.3', 'num2']
+ >>> natsorted(a)
+ ['num2', 'num5.3', 'num5.10', 'num-3']
+ >>> realsorted(a)
+ ['num-3', 'num2', 'num5.10', 'num5.3']
+
You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types
when you sort:
@@ -129,9 +142,9 @@ from the command line with ``python -m natsort``.
Requirements
------------
-``natsort`` requires python version 2.6 or greater
-(this includes python 3.x). To run version 2.6, 3.0, or 3.1 the
-`argparse <https://pypi.python.org/pypi/argparse>`_ module is required.
+``natsort`` requires Python version 2.7 or greater or Python 3.3 or greater.
+Python 2.6 and 3.2 are no longer officially supported (no unit tests are performed)
+but it should work.
.. _optional:
@@ -153,35 +166,56 @@ PyICU
'''''
On BSD-based systems (this includes Mac OS X), the underlying ``locale`` library
-can be buggy (please see http://bugs.python.org/issue23195), so ``natsort`` will use
-`PyICU <https://pypi.python.org/pypi/PyICU>`_ under the hood if it is installed
-on your computer; this will give more reliable cross-platform results.
-``natsort`` will not require (or check) that
-`PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed at installation
-since in Linux-based systems and Windows systems ``locale`` should work just fine.
-Please visit https://github.com/SethMMorton/natsort/issues/21 for more details and
-how to install on Mac OS X.
+can be buggy (please see http://bugs.python.org/issue23195); ``locale`` is
+used for the ``ns.LOCALE`` option and ``humansorted`` function.. To remedy this,
+one can
+
+ 1. Use "\*.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\*.UTF-8"
+ encoding. These encodings do not suffer from as many problems as "UTF-8"
+ and thus should give expected results.
+ 2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If
+ `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort``
+ will use it under the hood if it is installed; this will give more
+ reliable cross-platform results in the long run. ``natsort`` will not
+ require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_
+ is installed at installation. Please visit
+ https://github.com/SethMMorton/natsort/issues/21 for more details and
+ how to install on Mac OS X. **Please note** that using
+ `PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to
+ guarantee correct results for all input on BSD-based systems, since
+ every other suggestion is a workaround.
+ 3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured
+ to compensate for a broken ``locale`` library in terms of case-handling;
+ if you do not need to be able to properly handle non-ASCII characters
+ then this may be the best option for you.
+
+Note that the above solutions *should not* be required for Windows or
+Linux since in Linux-based systems and Windows systems ``locale`` *should* work
+just fine.
.. _deprecate:
-Deprecation Notices
--------------------
-
- - The default sorting algorithm for ``natsort`` will change in version 4.0.0
- from signed floats (with exponents) to unsigned integers. The motivation
- for this change is that it will cause ``natsort`` to return results that
- pass the "least astonishment" test for the most common use case, which is
- sorting version numbers. If you currently rely on the default behavior
- to be signed floats, it is recommend that you add ``alg=ns.F`` to your
- ``natsort`` calls or switch to the new ``realsorted`` function which
- behaves identically to the current ``natsorted`` with default values.
- - In ``natsort`` version 4.0.0, the ``number_type``, ``signed``, ``exp``,
- ``as_path``, and ``py3_safe`` options will be removed from the (documented)
- API, in favor of the ``alg`` option and ``ns`` enum. They will remain as
- keyword-only arguments after that (for the foreseeable future).
- - In ``natsort`` version 4.0.0, the ``natsort_key`` function will be removed
- from the public API. All future development should use ``natsort_keygen``
- in preparation for this.
+Moving from older Natsort versions
+----------------------------------
+
+ - The default sorting algorithm for ``natsort`` has changed in version 4.0.0
+ from signed floats (with exponents) to unsigned integers. The motivation
+ for this change is that it will cause ``natsort`` to return results that
+ pass the "least astonishment" test for the most common use case, which is
+ sorting version numbers. If you relied on the default behavior
+ to be signed floats, it is add ``alg=ns.F | ns.S`` to your
+ ``natsort`` calls or switch to the new ``realsorted`` function which
+ behaves identically to the current ``natsorted`` with default values.
+ For 99% of users this will have no effect... it is only expected that this
+ will effect users using ``natsort`` for science and engineering. What it
+ will do is make it so you no longer need ``ns.V`` or ``ns.I | ns.U`` to sort
+ version-like strings.
+ This will also affect the default behavior of the ``natsort`` shell script.
+ - In ``natsort`` version 4.0.0, the ``number_type``, ``signed``, ``exp``,
+ ``as_path``, and ``py3_safe`` options have be removed from the (documented)
+ API in favor of the ``alg`` option and ``ns`` enum.
+ - In ``natsort`` version 4.0.0, the ``natsort_key`` function has be removed
+ from the public API.
Author
------
@@ -194,6 +228,17 @@ History
These are the last three entries of the changelog. See the package documentation
for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_.
+05-17-2015 v. 4.0.0
+'''''''''''''''''''
+
+ - Made default behavior of 'natsort' search for unsigned ints,
+ rather than signed floats. This is a backwards-incompatible
+ change but in 99% of use cases it should not required any
+ end-user changes.
+ - Improved handling of locale-aware sorting on systems where the
+ underlying locale library is broken.
+ - Greatly improved all unit tests by adding the hypothesis library.
+
04-06-2015 v. 3.5.6
'''''''''''''''''''
@@ -208,9 +253,3 @@ for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_.
- Added 'realsorted' and 'index_realsorted' functions for
forward-compatibility with >= 4.0.0.
- Made explanation of when to use "TYPESAFE" more clear in the docs.
-
-04-02-2015 v. 3.5.4
-'''''''''''''''''''
-
- - Fixed bug where a 'TypeError' was raised if a string containing a leading
- number was sorted with alpha-only strings when 'LOCALE' is used.
diff --git a/docs/source/api.rst b/docs/source/api.rst
index 4084720..48728ee 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -10,7 +10,6 @@ natsort API
:maxdepth: 2
natsort_keygen.rst
- natsort_key.rst
natsorted.rst
versorted.rst
humansorted.rst
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index 2803377..834373a 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -3,6 +3,17 @@
Changelog
---------
+05-17-2015 v. 4.0.0
+'''''''''''''''''''
+
+ - Made default behavior of 'natsort' search for unsigned ints,
+ rather than signed floats. This is a backwards-incompatible
+ change but in 99% of use cases it should not required any
+ end-user changes.
+ - Improved handling of locale-aware sorting on systems where the
+ underlying locale library is broken.
+ - Greatly improved all unit tests by adding the hypothesis library.
+
04-06-2015 v. 3.5.6
'''''''''''''''''''
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
index a995bb4..02783f4 100644
--- a/docs/source/examples.rst
+++ b/docs/source/examples.rst
@@ -15,35 +15,21 @@ Basic Usage
In the most basic use case, simply import :func:`~natsorted` and use
it as you would :func:`sorted`::
- >>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300']
+ >>> a = ['a2', 'a9', 'a1', 'a4', 'a10']
>>> sorted(a)
- ['a5.034e1', 'a50', 'a50.300', 'a50.4', 'a51.']
+ ['a1', 'a10', 'a2', 'a4', 'a9']
>>> from natsort import natsorted, ns
>>> natsorted(a)
- ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.']
+ ['a1', 'a2', 'a4', 'a9', 'a10']
Sort Version Numbers
--------------------
-With default options, :func:`~natsorted` will not sort version numbers
-well. Version numbers are best sorted by searching for valid unsigned int
-literals, not floats. This can be achieved in three ways, as shown below::
-
- >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1']
- >>> natsorted(a) # This gives incorrect results
- ['ver-2.9.9a', 'ver-2.9.9b', 'ver-1.11', 'ver-1.11.4', 'ver-1.10.1']
- >>> natsorted(a, alg=ns.INT | ns.UNSIGNED)
- ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b']
- >>> natsorted(a, alg=ns.VERSION)
- ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b']
- >>> from natsort import versorted
- >>> versorted(a)
- ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b']
-
-You can see that ``alg=ns.VERSION`` is a shortcut for
-``alg=ns.INT | ns.UNSIGNED``, and the :func:`~versorted` is a shortcut for
-``natsorted(alg=ns.VERSION)``. The recommend manner to sort version
-numbers is to use :func:`~versorted`.
+As of :mod:`natsort` version >= 4.0.0, :func:`~natsorted` will now properly
+sort version numbers. The old function :func:`~versorted` exists for
+backwards compatibility but new development should use :func:`~natsorted`.
+
+.. _rc_sorting:
Sorting with Alpha, Beta, and Release Candidates
++++++++++++++++++++++++++++++++++++++++++++++++
@@ -52,19 +38,19 @@ By default, if you wish to sort versions with a non-strict versioning
scheme, you may not get the results you expect::
>>> a = ['1.2', '1.2rc1', '1.2beta2', '1.2beta1', '1.2alpha', '1.2.1', '1.1', '1.3']
- >>> versorted(a)
+ >>> natsorted(a)
['1.1', '1.2', '1.2.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.3']
To make the '1.2' pre-releases come before '1.2.1', you need to use the following
recipe::
- >>> versorted(a, key=lambda x: x.replace('.', '~'))
+ >>> natsorted(a, key=lambda x: x.replace('.', '~'))
['1.1', '1.2', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2.1', '1.3']
If you also want '1.2' after all the alpha, beta, and rc candidates, you can
modify the above recipe::
- >>> versorted(a, key=lambda x: x.replace('.', '~')+'z')
+ >>> natsorted(a, key=lambda x: x.replace('.', '~')+'z')
['1.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2', '1.2.1', '1.3']
Please see `this issue <https://github.com/SethMMorton/natsort/issues/13>`_ to
@@ -123,6 +109,32 @@ with the ``locale`` module from the standard library that are solved when
using `PyICU <https://pypi.python.org/pypi/PyICU>`_; you can read about
them here: http://bugs.python.org/issue23195.
+If you have problems with ``ns.LOCALE`` (or :func:`~humansorted`),
+especially on BSD-based systems, you can try the following:
+
+ 1. Use "\*.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\*.UTF-8"
+ encoding. These encodings do not suffer from as many problems as "UTF-8"
+ and thus should give expected results.
+ 2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If
+ `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort``
+ will use it under the hood if it is installed; this will give more
+ reliable cross-platform results in the long run. ``natsort`` will not
+ require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_
+ is installed at installation. Please visit
+ https://github.com/SethMMorton/natsort/issues/21 for more details and
+ how to install on Mac OS X. **Please note** that using
+ `PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to
+ guarantee correct results for all input on BSD-based systems, since
+ every other suggestion is a workaround.
+ 3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured
+ to compensate for a broken ``locale`` library in terms of case-handling;
+ if you do not need to be able to properly handle non-ASCII characters
+ then this may be the best option for you.
+
+Note that the above solutions *should not* be required for Windows or
+Linux since in Linux-based systems and Windows systems ``locale`` *should* work
+just fine.
+
Controlling Case When Sorting
-----------------------------
@@ -167,20 +179,32 @@ would expect to be "natural" sorting::
Customizing Float Definition
----------------------------
-By default :func:`~natsorted` searches for any float that would be
+You can make :func:`~natsorted` search for any float that would be
a valid Python float literal, such as 5, 0.4, -4.78, +4.2E-34, etc.
-Perhaps you don't want to search for signed numbers, or you don't
-want to search for exponential notation, the ``ns.UNSIGNED`` and
-``ns.NOEXP`` options allow you to do this::
+using the ``ns.FLOAT`` key. You can disable the exponential component
+of the number with ``ns.NOEXP``. ::
>>> a = ['a50', 'a51.', 'a+50.4', 'a5.034e1', 'a+50.300']
- >>> natsorted(a)
- ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.']
- >>> natsorted(a, alg=ns.UNSIGNED)
+ >>> natsorted(a, alg=ns.FLOAT)
['a50', 'a5.034e1', 'a51.', 'a+50.300', 'a+50.4']
- >>> natsorted(a, alg=ns.NOEXP)
+ >>> natsorted(a, alg=ns.FLOAT | ns.SIGNED)
+ ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.']
+ >>> natsorted(a, alg=ns.FLOAT | ns.SIGNED | ns.NOEXP)
['a5.034e1', 'a50', 'a+50.300', 'a+50.4', 'a51.']
+For convenience, the ``ns.REAL`` option is provided which is a shortcut
+for ``ns.FLOAT | ns.SIGNED`` and can be used to sort on real numbers.
+This can be easily accessed with the :func:`~realsorted` convenience
+function. Please note that the behavior of the :func:`~realsorted` function
+was the default behavior of :func:`~natsorted` for :mod:`natsort`
+version < 4.0.0::
+
+ >>> natsorted(a, alg=ns.REAL)
+ ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.']
+ >>> from natsort import realsorted
+ >>> realsorted(a)
+ ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.']
+
Using a Custom Sorting Key
--------------------------
@@ -209,15 +233,10 @@ need to pass a key to the :meth:`list.sort` method. The function
>>> from natsort import natsort_keygen
>>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300']
- >>> natsort_key = natsort_keygen()
+ >>> natsort_key = natsort_keygen(alg=ns.FLOAT)
>>> a.sort(key=natsort_key)
>>> a
['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.']
- >>> versort_key = natsort_keygen(alg=ns.VERSION)
- >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1']
- >>> a.sort(key=versort_key)
- >>> a
- ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b']
:func:`~natsort_keygen` has the same API as :func:`~natsorted` (minus the
`reverse` option).
@@ -227,8 +246,8 @@ Sorting Multiple Lists According to a Single List
Sometimes you have multiple lists, and you want to sort one of those
lists and reorder the other lists according to how the first was sorted.
-To achieve this you would use the :func:`~index_natsorted` or
-:func:`~index_versorted` in combination with the convenience function
+To achieve this you could use the :func:`~index_natsorted` in combination
+with the convenience function
:func:`~order_by_index`::
>>> from natsort import index_natsorted, order_by_index
@@ -297,3 +316,14 @@ If you need a codec different from ASCII or UTF-8, you can use
>>> a = [b'a56', b'a5', b'a6', b'a40']
>>> natsorted(a, key=decoder('latin1')) == [b'a5', b'a6', b'a40', b'a56']
True
+
+Sorting a Pandas DataFrame
+--------------------------
+
+As of Pandas version 0.16.0, the sorting methods do not accept a ``key`` argument,
+so you cannot simply pass :func:`natsort_keygen` to a Pandas DataFrame and sort.
+This request has been made to the Pandas devs; see
+`issue 3942 <https://github.com/pydata/pandas/issues/3942>`_ if you are interested.
+If you need to sort a Pandas DataFrame, please check out
+`this answer on StackOverflow <http://stackoverflow.com/a/29582718/1399279>`_
+for ways to do this without the ``key`` argument to ``sort``.
diff --git a/docs/source/intro.rst b/docs/source/intro.rst
index b79aec9..d454094 100644
--- a/docs/source/intro.rst
+++ b/docs/source/intro.rst
@@ -47,21 +47,23 @@ or as versions. Using :func:`~natsorted` is simple::
>>> natsorted(a)
['a1', 'a2', 'a4', 'a9', 'a10']
-:func:`~natsorted` identifies real numbers anywhere in a string and sorts them
+:func:`~natsorted` identifies numbers anywhere in a string and sorts them
naturally.
-Sorting version numbers is just as easy with :func:`~versorted`::
+Sorting versions is handled properly by default (as of :mod:`natsort` version >= 4.0.0):
+
+.. code-block:: python
- >>> from natsort import versorted
>>> a = ['version-1.9', 'version-2.0', 'version-1.11', 'version-1.10']
- >>> versorted(a)
+ >>> natsorted(a)
['version-1.9', 'version-1.10', 'version-1.11', 'version-2.0']
- >>> natsorted(a) # natsorted tries to sort as signed floats, so it won't work
- ['version-2.0', 'version-1.9', 'version-1.11', 'version-1.10']
+
+If you need to sort release candidates, please see :ref:`rc_sorting` for
+a useful hack.
You can also perform locale-aware sorting (or "human sorting"), where the
non-numeric characters are ordered based on their meaning, not on their
-ordinal value; this can be achieved with the ``humansorted`` function::
+ordinal value; this can be achieved with the :func:`~humansorted` function::
>>> a = ['Apple', 'Banana', 'apple', 'banana']
>>> natsorted(a)
@@ -76,7 +78,20 @@ ordinal value; this can be achieved with the ``humansorted`` function::
You may find you need to explicitly set the locale to get this to work
(as shown in the example).
Please see :ref:`bug_note` and the Installation section
-below before using the ``humansorted`` function.
+below before using the :func:`~humansorted` function.
+
+You can sort signed floats (i.e. real numbers) using the :func:`~realsorted`;
+this is useful in scientific data analysis. This was the default behavior of
+:func:`~natsorted` for :mod:`natsort` version < 4.0.0. ::
+
+.. code-block:: python
+
+ >>> from natsort import realsorted
+ >>> a = ['num5.10', 'num-3', 'num5.3', 'num2']
+ >>> natsorted(a)
+ ['num2', 'num5.3', 'num5.10', 'num-3']
+ >>> realsorted(a)
+ ['num-3', 'num2', 'num5.10', 'num5.3']
You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types
when you sort::
@@ -143,9 +158,9 @@ If you want to build this documentation, enter::
python setup.py build_sphinx
-:mod:`natsort` requires python version 2.6 or greater
-(this includes python 3.x). To run version 2.6, 3.0, or 3.1 the
-`argparse <https://pypi.python.org/pypi/argparse>`_ module is required.
+:mod:`natsort` requires Python version 2.7 or greater or Python 3.3 or greater.
+Python 2.6 and 3.2 are no longer officially supported (no unit tests are performed)
+but it should work.
The most efficient sorting can occur if you install the
`fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ package (it helps
@@ -155,14 +170,32 @@ recommended you include this as a dependency. ``natsort`` will not require (or
check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed.
On BSD-based systems (this includes Mac OS X), the underlying ``locale`` library
-can be buggy (please see http://bugs.python.org/issue23195), so ``natsort`` will use
-`PyICU <https://pypi.python.org/pypi/PyICU>`_ under the hood if it is installed
-on your computer; this will give more reliable cross-platform results.
-``natsort`` will not require (or check) that
-`PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed at installation
-since in Linux-based systems and Windows systems ``locale`` should work just fine.
-Please visit https://github.com/SethMMorton/natsort/issues/21 for more details and
-how to install on Mac OS X.
+can be buggy (please see http://bugs.python.org/issue23195); ``locale`` is
+used for the ``ns.LOCALE`` option and ``humansorted`` function.. To remedy this,
+one can
+
+ 1. Use "\*.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\*.UTF-8"
+ encoding. These encodings do not suffer from as many problems as "UTF-8"
+ and thus should give expected results.
+ 2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If
+ `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort``
+ will use it under the hood if it is installed; this will give more
+ reliable cross-platform results in the long run. ``natsort`` will not
+ require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_
+ is installed at installation. Please visit
+ https://github.com/SethMMorton/natsort/issues/21 for more details and
+ how to install on Mac OS X. **Please note** that using
+ `PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to
+ guarantee correct results for all input on BSD-based systems, since
+ every other suggestion is a workaround.
+ 3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured
+ to compensate for a broken ``locale`` library in terms of case-handling;
+ if you do not need to be able to properly handle non-ASCII characters
+ then this may be the best option for you.
+
+Note that the above solutions *should not* be required for Windows or
+Linux since in Linux-based systems and Windows systems ``locale`` *should* work
+just fine.
:mod:`natsort` comes with a shell script called :mod:`natsort`, or can also be called
from the command line with ``python -m natsort``. The command line script is
diff --git a/docs/source/natsort_key.rst b/docs/source/natsort_key.rst
deleted file mode 100644
index 351b351..0000000
--- a/docs/source/natsort_key.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-.. default-domain:: py
-.. currentmodule:: natsort
-
-:func:`~natsort.natsort_key`
-============================
-
-.. autofunction:: natsort_key
-
diff --git a/docs/source/shell.rst b/docs/source/shell.rst
index 65cfc76..78dc3dc 100644
--- a/docs/source/shell.rst
+++ b/docs/source/shell.rst
@@ -48,20 +48,26 @@ Usage
Used to exclude an entry that contains a specific
number.
-r, --reverse Returns in reversed order.
- -t {digit,int,float,version,ver}, --number-type {digit,int,float,version,ver}
+ -t {digit,int,float,version,ver,real,f,i,r,d},
+ --number-type {digit,int,float,version,ver,real,f,i,r,d},
+ --number_type {digit,int,float,version,ver,real,f,i,r,d}
Choose the type of number to search for. "float" will
search for floating-point numbers. "int" will only
search for integers. "digit", "version", and "ver" are
- shortcuts for "int" with --nosign.
+ synonyms for "int"."real" is a shortcut for "float"
+ with --sign. "i" and "d" are synonyms for "int", "f"
+ is a synonym for "float", and "r" is a synonym for
+ "real".The default is int.
--nosign Do not consider "+" or "-" as part of a number, i.e.
- do not take sign into consideration.
+ do not take sign into consideration. This is the
+ default.
+ -s, --sign Consider "+" or "-" as part of a number, i.e. take
+ sign into consideration. The default is unsigned.
--noexp Do not consider an exponential as part of a number,
i.e. 1e4, would be considered as 1, "e", and 4, not as
10000. This only effects the --number-type=float.
- --locale, -l Causes natsort to use locale-aware sorting. On some
- systems, the underlying C library is broken, so if you
- get results that you do not expect please install
- PyICU and try again.
+ -l, --locale Causes natsort to use locale-aware sorting. You will
+ get the best results if you install PyICU.
Description
-----------
@@ -84,18 +90,18 @@ to bad analysis. To remedy this, use ``natsort``::
mode943.54.out
mode1000.35.out
mode1243.34.out
- $ natsort *.out | xargs your_program
+ $ natsort -t r *.out | xargs your_program
-You can also place natsort in the middle of a pipe::
+``-t r`` is short for ``--number-type real``. You can also place natsort in
+the middle of a pipe::
- $ find . -name "*.out" | natsort | xargs your_program
+ $ find . -name "*.out" | natsort -t r | xargs your_program
-To sort version numbers, use the ``--number-type version`` option
-(or ``-t ver`` for short)::
+To sort version numbers, use the default ``--number-type``::
$ ls *
prog-1.10.zip prog-1.9.zip prog-2.0.zip
- $ natsort -t ver *
+ $ natsort *
prog-1.9.zip
prog-1.10.zip
prog-2.0.zip
@@ -106,13 +112,13 @@ options. These three options are used as follows::
$ ls *.out
mode1000.35.out mode1243.34.out mode744.43.out mode943.54.out
- $ natsort *.out -f 900 1100 # Select only numbers between 900-1100
+ $ natsort -t r *.out -f 900 1100 # Select only numbers between 900-1100
mode943.54.out
mode1000.35.out
- $ natsort *.out -F 900 1100 # Select only numbers NOT between 900-1100
+ $ natsort -t r *.out -F 900 1100 # Select only numbers NOT between 900-1100
mode744.43.out
mode1243.34.out
- $ natsort *.out -e 1000.35 # Exclude 1000.35 from search
+ $ natsort -t r *.out -e 1000.35 # Exclude 1000.35 from search
mode744.43.out
mode943.54.out
mode1243.34.out
diff --git a/natsort/__main__.py b/natsort/__main__.py
index 85edba3..e86097d 100644
--- a/natsort/__main__.py
+++ b/natsort/__main__.py
@@ -51,22 +51,31 @@ def main():
help='Returns in reversed order.')
parser.add_argument(
'-t', '--number-type', '--number_type', dest='number_type',
- choices=('digit', 'int', 'float', 'version', 'ver'), default='float',
+ choices=('digit', 'int', 'float', 'version', 'ver',
+ 'real', 'f', 'i', 'r', 'd'),
+ default='int',
help='Choose the type of number to search for. "float" will search '
'for floating-point numbers. "int" will only search for '
- 'integers. "digit", "version", and "ver" are shortcuts for "int" '
- 'with --nosign.')
+ 'integers. "digit", "version", and "ver" are synonyms for "int".'
+ '"real" is a shortcut for "float" with --sign. '
+ '"i" and "d" are synonyms for "int", "f" is a synonym for '
+ '"float", and "r" is a synonym for "real".'
+ 'The default is %(default)s.')
parser.add_argument(
- '--nosign', default=True, action='store_false', dest='signed',
+ '--nosign', default=False, action='store_false', dest='signed',
help='Do not consider "+" or "-" as part of a number, i.e. do not '
- 'take sign into consideration.')
+ 'take sign into consideration. This is the default.')
+ parser.add_argument(
+ '-s', '--sign', default=False, action='store_true', dest='signed',
+ help='Consider "+" or "-" as part of a number, i.e. '
+ 'take sign into consideration. The default is unsigned.')
parser.add_argument(
'--noexp', default=True, action='store_false', dest='exp',
help='Do not consider an exponential as part of a number, i.e. 1e4, '
'would be considered as 1, "e", and 4, not as 10000. This only '
'effects the --number-type=float.')
parser.add_argument(
- '--locale', '-l', action='store_true', default=False,
+ '-l', '--locale', action='store_true', default=False,
help='Causes natsort to use locale-aware sorting. You will get the '
'best results if you install PyICU.')
parser.add_argument(
@@ -143,14 +152,10 @@ def sort_and_print_entries(entries, args):
"""Sort the entries, applying the filters first if necessary."""
# Extract the proper number type.
- num_type = {'digit': None,
- 'version': None,
- 'ver': None,
- 'int': int,
- 'float': float}[args.number_type]
- unsigned = not args.signed or num_type is None
- alg = (ns.INT * int(num_type in (int, None)) |
- ns.UNSIGNED * unsigned |
+ is_float = args.number_type in ('float', 'real', 'f', 'r')
+ signed = args.signed or args.number_type in ('real', 'r')
+ alg = (ns.FLOAT * is_float |
+ ns.SIGNED * signed |
ns.NOEXP * (not args.exp) |
ns.PATH * args.paths |
ns.LOCALE * args.locale)
@@ -160,8 +165,8 @@ def sort_and_print_entries(entries, args):
# as for sorting.
do_filter = args.filter is not None or args.reverse_filter is not None
if do_filter or args.exclude:
- inp_options = (ns.INT * int(num_type in (int, None)) |
- ns.UNSIGNED * unsigned |
+ inp_options = (ns.FLOAT * is_float |
+ ns.SIGNED * signed |
ns.NOEXP * (not args.exp),
'.'
)
diff --git a/natsort/_version.py b/natsort/_version.py
index eea91d6..cc26564 100644
--- a/natsort/_version.py
+++ b/natsort/_version.py
@@ -2,4 +2,4 @@
from __future__ import (print_function, division,
unicode_literals, absolute_import)
-__version__ = '3.5.6'
+__version__ = '4.0.0'
diff --git a/natsort/fake_fastnumbers.py b/natsort/fake_fastnumbers.py
index 116bab1..e934313 100644
--- a/natsort/fake_fastnumbers.py
+++ b/natsort/fake_fastnumbers.py
@@ -8,24 +8,49 @@ from __future__ import (print_function, division,
unicode_literals, absolute_import)
# Std. lib imports.
+import sys
import re
+import unicodedata
+float_re = re.compile(r'[-+]?(\d*\.?\d+(?:[eE][-+]?\d+)?|inf(?:inity)?|nan)$')
+if sys.version[0] == '2':
+ int_re = re.compile(r'[-+]?\d+[lL]?$')
+else:
+ int_re = re.compile(r'[-+]?\d+$')
+ long = int
+ unicode = str
-float_re = re.compile(r'[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?$')
-int_re = re.compile(r'[-+]?\d+$')
-
-def fast_float(x, regex_matcher=float_re.match):
+def fast_float(x, regex_matcher=float_re.match, uni=unicodedata.numeric):
"""Convert a string to a float quickly"""
- return float(x) if regex_matcher(x) else x
+ if type(x) in (int, long, float):
+ return float(x)
+ elif regex_matcher(x):
+ return float(x)
+ elif type(x) == unicode and len(x) == 1 and uni(x, None) is not None:
+ return uni(x)
+ else:
+ return x
-def fast_int(x, regex_matcher=int_re.match):
+def fast_int(x, regex_matcher=int_re.match, uni=unicodedata.digit):
"""\
Convert a string to a int quickly, return input as-is if not possible.
"""
- return int(x) if regex_matcher(x) else x
+ if type(x) in (int, long, float):
+ return int(x)
+ elif regex_matcher(x):
+ return int(x.rstrip('Ll'))
+ elif type(x) == unicode and len(x) == 1 and uni(x, None) is not None:
+ return uni(x)
+ else:
+ return x
+
+
+def isfloat(x, num_only=False):
+ """Returns true if the input is a float, false otherwise."""
+ return type(x) == float
-def isreal(x, ntypes=set([int, float])):
- """Returns true if the input is a real number, false otherwise."""
- return type(x) in ntypes
+def isint(x, num_only=False):
+ """Returns true if the input is an int, false otherwise."""
+ return type(x) in set([int, long])
diff --git a/natsort/locale_help.py b/natsort/locale_help.py
index 9a5b656..789b50b 100644
--- a/natsort/locale_help.py
+++ b/natsort/locale_help.py
@@ -13,21 +13,14 @@ from itertools import chain
from locale import localeconv
# Local imports.
-from natsort.py23compat import py23_zip
-
-# If the user has fastnumbers installed, they will get great speed
-# benefits. If not, we simulate the functions here.
-try:
- from fastnumbers import isreal
-except ImportError:
- from natsort.fake_fastnumbers import isreal
+from natsort.py23compat import PY_VERSION
# We need cmp_to_key for Python2 because strxfrm is broken for unicode.
-if sys.version[:3] == '2.7':
+try:
from functools import cmp_to_key
# cmp_to_key was not created till 2.7.
-elif sys.version[:3] == '2.6':
- def cmp_to_key(mycmp): # pragma: no cover
+except ImportError: # pragma: no cover
+ def cmp_to_key(mycmp):
"""Convert a cmp= function into a key= function"""
class K(object):
__slots__ = ['obj']
@@ -78,6 +71,9 @@ try:
return _d[l]
use_pyicu = True
null_string = b''
+
+ def dumb_sort():
+ return False
except ImportError:
if sys.version[0] == '2':
from locale import strcoll
@@ -88,10 +84,23 @@ except ImportError:
null_string = ''
use_pyicu = False
+ # On some systems, locale is broken and does not sort in the expected
+ # order. We will try to detect this and compensate.
+ def dumb_sort():
+ return strxfrm('A') < strxfrm('a')
+
+
+if PY_VERSION >= 3.3:
+ def _low(x):
+ return x.casefold()
+else:
+ def _low(x):
+ return x.lower()
+
def groupletters(x):
"""Double all characters, making doubled letters lowercase."""
- return ''.join(chain(*py23_zip(x.lower(), x)))
+ return ''.join(chain.from_iterable([_low(y), y] for y in x))
def grouper(val, func):
@@ -102,8 +111,8 @@ def grouper(val, func):
"""
# Return the number or transformed string.
# If the input is identical to the output, then no conversion happened.
- s = func(val)
- return groupletters(s) if val is s else s
+ s = func[0](val)
+ return groupletters(s) if not func[1](s) else s
def locale_convert(val, func, group):
@@ -119,7 +128,7 @@ def locale_convert(val, func, group):
s = val.replace(radix, '.') if radix != '.' else val
# Perform the conversion
- t = func(s)
+ t = func[0](s)
# Return the number or transformed string.
# If the input is identical to the output, then no conversion happened.
@@ -129,12 +138,12 @@ def locale_convert(val, func, group):
if group:
if use_pyicu:
xfrm = get_pyicu_transform(getlocale())
- return xfrm(groupletters(val)) if not isreal(t) else t
+ return xfrm(groupletters(val)) if not func[1](t) else t
else:
- return strxfrm(groupletters(val)) if not isreal(t) else t
+ return strxfrm(groupletters(val)) if not func[1](t) else t
else:
if use_pyicu:
xfrm = get_pyicu_transform(getlocale())
- return xfrm(val) if not isreal(t) else t
+ return xfrm(val) if not func[1](t) else t
else:
- return strxfrm(val) if not isreal(t) else t
+ return strxfrm(val) if not func[1](t) else t
diff --git a/natsort/natsort.py b/natsort/natsort.py
index 8fb6754..78c0c24 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -122,135 +122,15 @@ def as_utf8(s):
return _do_decoding(s, 'utf-8')
-@u_format
-def natsort_key(val, key=None, number_type=float, signed=None, exp=None,
- as_path=None, py3_safe=None, alg=0):
- """\
- Key to sort strings and numbers naturally.
-
- Key to sort strings and numbers naturally, not lexicographically.
- It is designed for use in passing to the 'sorted' builtin or
- 'sort' attribute of lists.
-
- .. note:: Deprecated since version 3.4.0.
- This function remains in the publicly exposed API for
- backwards-compatibility reasons, but future development
- should use the newer `natsort_keygen` function. It is
- planned to remove this from the public API in natsort
- version 4.0.0. A DeprecationWarning will be raised
- via the warnings module; set warnings.simplefilter("always")
- to raise them to see if your code will work in version
- 4.0.0.
-
- Parameters
- ----------
- val : {{str, unicode}}
- The value used by the sorting algorithm
-
- key : callable, optional
- A key used to manipulate the input value before parsing for
- numbers. It is **not** applied recursively.
- It should accept a single argument and return a single value.
-
- number_type : {{None, float, int}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
- signed : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
- exp : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
- as_path : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
- py3_safe : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
- alg : ns enum, optional
- This option is used to control which algorithm `natsort`
- uses when sorting. For details into these options, please see
- the :class:`ns` class documentation. The default is `ns.FLOAT`.
-
- Returns
- -------
- out : tuple
- The modified value with numbers extracted.
-
- See Also
- --------
- natsort_keygen : Generates a properly wrapped `natsort_key`.
-
- Examples
- --------
- Using natsort_key is just like any other sorting key in python::
-
- >>> a = ['num3', 'num5', 'num2']
- >>> a.sort(key=natsort_key)
- >>> a
- [{u}'num2', {u}'num3', {u}'num5']
-
- It works by separating out the numbers from the strings::
-
- >>> natsort_key('num2')
- ({u}'num', 2.0)
-
- If you need to call natsort_key with the number_type argument, or get a
- special attribute or item of each element of the sequence, please use
- the `natsort_keygen` function. Actually, please just use the
- `natsort_keygen` function.
-
- Notes
- -----
- Iterables are parsed recursively so you can sort lists of lists::
-
- >>> natsort_key(('a1', 'a10'))
- (({u}'a', 1.0), ({u}'a', 10.0))
-
- Strings that lead with a number get an empty string at the front of the
- tuple. This is designed to get around the "unorderable types" issue of
- Python3::
-
- >>> natsort_key('15a')
- ({u}'', 15.0, {u}'a')
-
- You can give bare numbers, too::
-
- >>> natsort_key(10)
- ({u}'', 10)
-
- If you have a case where one of your string has two numbers in a row,
- you can turn on the "py3_safe" option to try to add a "" between sets
- of two numbers::
-
- >>> natsort_key('43h7+3', py3_safe=True)
- ({u}'', 43.0, {u}'h', 7.0, {u}'', 3.0)
-
- """
+def natsort_key(val, key=None, alg=0, **_kwargs):
+ """Undocumented, kept for backwards-compatibility."""
msg = "natsort_key is deprecated as of 3.4.0, please use natsort_keygen"
warn(msg, DeprecationWarning)
- alg = _args_to_enum(number_type, signed, exp, as_path, py3_safe) | alg
- return _natsort_key(val, key, alg)
+ return _natsort_key(val, key, _args_to_enum(**_kwargs) | alg)
@u_format
-def natsort_keygen(key=None, number_type=float, signed=None, exp=None,
- as_path=None, py3_safe=None, alg=0):
+def natsort_keygen(key=None, alg=0, **_kwargs):
"""\
Generate a key to sort strings and numbers naturally.
@@ -269,40 +149,10 @@ def natsort_keygen(key=None, number_type=float, signed=None, exp=None,
numbers. It is **not** applied recursively.
It should accept a single argument and return a single value.
- number_type : {{None, float, int}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
- signed : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
- exp : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
- as_path : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
- py3_safe : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
alg : ns enum, optional
This option is used to control which algorithm `natsort`
uses when sorting. For details into these options, please see
- the :class:`ns` class documentation. The default is `ns.FLOAT`.
+ the :class:`ns` class documentation. The default is `ns.INT`.
Returns
-------
@@ -311,6 +161,10 @@ def natsort_keygen(key=None, number_type=float, signed=None, exp=None,
suitable for passing as the `key` argument to functions
such as `sorted`.
+ See Also
+ --------
+ natsorted
+
Examples
--------
`natsort_keygen` is a convenient way to create a custom key
@@ -318,32 +172,16 @@ def natsort_keygen(key=None, number_type=float, signed=None, exp=None,
will return a plain `natsort_key` instance::
>>> a = ['num5.10', 'num-3', 'num5.3', 'num2']
- >>> b = a[:]
- >>> a.sort(key=natsort_key)
- >>> b.sort(key=natsort_keygen())
- >>> a == b
- True
-
- The power of `natsort_keygen` is when you want to want to pass
- arguments to the `natsort_key`. Consider the following
- equivalent examples; which is more clear? ::
-
- >>> a = ['num5.10', 'num-3', 'num5.3', 'num2']
- >>> b = a[:]
- >>> a.sort(key=lambda x: natsort_key(x, key=lambda y: y.upper(),
- ... signed=False))
- >>> b.sort(key=natsort_keygen(key=lambda x: x.upper(), signed=False))
- >>> a == b
- True
+ >>> a.sort(key=natsort_keygen(alg=ns.REAL))
+ >>> a
+ [{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3']
"""
- alg = _args_to_enum(number_type, signed, exp, as_path, py3_safe) | alg
- return partial(_natsort_key, key=key, alg=alg)
+ return partial(_natsort_key, key=key, alg=_args_to_enum(**_kwargs) | alg)
@u_format
-def natsorted(seq, key=None, number_type=float, signed=None, exp=None,
- reverse=False, as_path=None, alg=0):
+def natsorted(seq, key=None, reverse=False, alg=0, **_kwargs):
"""\
Sorts a sequence naturally.
@@ -361,38 +199,14 @@ def natsorted(seq, key=None, number_type=float, signed=None, exp=None,
It is **not** applied recursively.
It should accept a single argument and return a single value.
- number_type : {{None, float, int}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
- signed : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
- exp : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
reverse : {{True, False}}, optional
Return the list in reversed sorted order. The default is
`False`.
- as_path : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
alg : ns enum, optional
This option is used to control which algorithm `natsort`
uses when sorting. For details into these options, please see
- the :class:`ns` class documentation. The default is `ns.FLOAT`.
+ the :class:`ns` class documentation. The default is `ns.INT`.
Returns
-------
@@ -402,8 +216,7 @@ def natsorted(seq, key=None, number_type=float, signed=None, exp=None,
See Also
--------
natsort_keygen : Generates the key that makes natural sorting possible.
- versorted : A wrapper for ``natsorted(seq, alg=ns.VERSION)``.
- realsorted : Identical to ``natsorted(seq)``; for forwards-compatibility.
+ realsorted : A wrapper for ``natsorted(seq, alg=ns.REAL)``.
humansorted : A wrapper for ``natsorted(seq, alg=ns.LOCALE)``.
index_natsorted : Returns the sorted indexes from `natsorted`.
@@ -416,10 +229,9 @@ def natsorted(seq, key=None, number_type=float, signed=None, exp=None,
[{u}'num2', {u}'num3', {u}'num5']
"""
- alg = _args_to_enum(number_type, signed, exp, as_path, None) | alg
+ alg = _args_to_enum(**_kwargs) | alg
try:
- return sorted(seq, reverse=reverse,
- key=natsort_keygen(key, alg=alg))
+ return sorted(seq, reverse=reverse, key=natsort_keygen(key, alg=alg))
except TypeError as e: # pragma: no cover
# In the event of an unresolved "unorderable types" error
# for string to number type comparisons (not str/bytes),
@@ -435,58 +247,21 @@ def natsorted(seq, key=None, number_type=float, signed=None, exp=None,
@u_format
-def versorted(seq, key=None, reverse=False, as_path=None, alg=0):
+def versorted(seq, key=None, reverse=False, alg=0, **_kwargs):
"""\
- Convenience function to sort version numbers.
-
- Convenience function to sort version numbers. This is a wrapper
- around ``natsorted(seq, alg=ns.VERSION)``.
-
- Parameters
- ----------
- seq : iterable
- The sequence to sort.
-
- key : callable, optional
- A key used to determine how to sort each element of the sequence.
- It is **not** applied recursively.
- It should accept a single argument and return a single value.
-
- reverse : {{True, False}}, optional
- Return the list in reversed sorted order. The default is
- `False`.
-
- as_path : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
+ Identical to :func:`natsorted`.
- alg : ns enum, optional
- This option is used to control which algorithm `natsort`
- uses when sorting. For details into these options, please see
- the :class:`ns` class documentation. The default is `ns.VERSION`.
+ This function exists for backwards compatibility with `natsort`
+ version < 4.0.0. Future development should use :func:`natsorted`.
- Returns
- -------
- out : list
- The sorted sequence.
+ Please see the :func:`natsorted` documentation for use.
See Also
--------
- index_versorted : Returns the sorted indexes from `versorted`.
-
- Examples
- --------
- Use `versorted` just like the builtin `sorted`::
-
- >>> a = ['num4.0.2', 'num3.4.1', 'num3.4.2']
- >>> versorted(a)
- [{u}'num3.4.1', {u}'num3.4.2', {u}'num4.0.2']
+ natsorted
"""
- alg = _args_to_enum(float, None, None, as_path, None) | alg
- return natsorted(seq, key, reverse=reverse, alg=alg | ns.VERSION)
+ return natsorted(seq, key, reverse, alg, **_kwargs)
@u_format
@@ -502,7 +277,8 @@ def humansorted(seq, key=None, reverse=False, alg=0):
C library that Python's locale module uses is broken.
On these systems it is recommended that you install
`PyICU <https://pypi.python.org/pypi/PyICU>`_
- if you wish to use ``humansorted``. If you are on
+ if you wish to use ``humansorted``, especially if you need
+ to handle non-ASCII characters. If you are on
one of systems and get unexpected results, please try
using `PyICU <https://pypi.python.org/pypi/PyICU>`_
before filing a bug report to `natsort`.
@@ -538,10 +314,11 @@ def humansorted(seq, key=None, reverse=False, alg=0):
Notes
-----
You may find that if you do not explicitly set
- the locale your results may not be as you expect... I have found that
- it depends on the system you are on. To do this is straightforward
- (in the below example I use 'en_US.UTF-8', but you should use your
- locale)::
+ the locale your results may not be as you expect, although
+ as of ``natsort`` version 4.0.0 the sorting algorithm has been
+ updated to account for a buggy ``locale`` installation.
+ In the below example 'en_US.UTF-8' is used, but you should use your
+ locale::
>>> import locale
>>> # The 'str' call is only to get around a bug on Python 2.x
@@ -552,7 +329,7 @@ def humansorted(seq, key=None, reverse=False, alg=0):
It is preferred that you do this before importing `natsort`.
If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning
- above) then you should not need to do this.
+ above) then you should not need to do explicitly set a locale.
Examples
--------
@@ -565,20 +342,21 @@ def humansorted(seq, key=None, reverse=False, alg=0):
[{u}'apple', {u}'Apple', {u}'banana', {u}'Banana']
"""
- return natsorted(seq, key, reverse=reverse, alg=alg | ns.LOCALE)
+ return natsorted(seq, key, reverse, alg | ns.LOCALE)
@u_format
def realsorted(seq, key=None, reverse=False, alg=0):
"""\
- Identical to :func:`natsorted`.
+ Convenience function to properly sort signed floats.
+
+ Convenience function to properly sort signed floats within
+ strings (i.e. "a-5.7"). This is a wrapper around
+ ``natsorted(seq, alg=ns.REAL)``.
- This is provided for forward-compatibility with :mod:`natsort`
- version >= 4.0.0. If you are relying on the default sorting
- behavior of :func:`natsorted` to sort by signed floats,
- you should consider using this function as the default sorting
- behavior of :func:`natsorted` will changed to unsigned
- integers in :mod:`natsort` version >= 4.0.0.
+ The behavior of :func:`realsorted` for `natsort` version >= 4.0.0
+ was the default behavior of :func:`natsorted` for `natsort`
+ version < 4.0.0.
Parameters
----------
@@ -597,7 +375,7 @@ def realsorted(seq, key=None, reverse=False, alg=0):
alg : ns enum, optional
This option is used to control which algorithm `natsort`
uses when sorting. For details into these options, please see
- the :class:`ns` class documentation. The default is `ns.FLOAT`.
+ the :class:`ns` class documentation. The default is `ns.REAL`.
Returns
-------
@@ -613,16 +391,17 @@ def realsorted(seq, key=None, reverse=False, alg=0):
Use `realsorted` just like the builtin `sorted`::
>>> a = ['num5.10', 'num-3', 'num5.3', 'num2']
+ >>> natsorted(a)
+ [{u}'num2', {u}'num5.3', {u}'num5.10', {u}'num-3']
>>> realsorted(a)
[{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3']
"""
- return natsorted(seq, key=key, reverse=reverse, alg=alg)
+ return natsorted(seq, key, reverse, alg | ns.REAL)
@u_format
-def index_natsorted(seq, key=None, number_type=float, signed=None, exp=None,
- reverse=False, as_path=None, alg=0):
+def index_natsorted(seq, key=None, reverse=False, alg=0, **_kwargs):
"""\
Return the list of the indexes used to sort the input sequence.
@@ -641,38 +420,14 @@ def index_natsorted(seq, key=None, number_type=float, signed=None, exp=None,
It is **not** applied recursively.
It should accept a single argument and return a single value.
- number_type : {{None, float, int}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
- signed : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
- exp : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
reverse : {{True, False}}, optional
Return the list in reversed sorted order. The default is
`False`.
- as_path : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
-
alg : ns enum, optional
This option is used to control which algorithm `natsort`
uses when sorting. For details into these options, please see
- the :class:`ns` class documentation. The default is `ns.FLOAT`.
+ the :class:`ns` class documentation. The default is `ns.INT`.
Returns
-------
@@ -702,7 +457,7 @@ def index_natsorted(seq, key=None, number_type=float, signed=None, exp=None,
[{u}'baz', {u}'foo', {u}'bar']
"""
- alg = _args_to_enum(number_type, signed, exp, as_path, None) | alg
+ alg = _args_to_enum(**_kwargs) | alg
if key is None:
newkey = itemgetter(1)
else:
@@ -727,64 +482,22 @@ def index_natsorted(seq, key=None, number_type=float, signed=None, exp=None,
@u_format
-def index_versorted(seq, key=None, reverse=False, as_path=None, alg=0):
+def index_versorted(seq, key=None, reverse=False, alg=0, **_kwargs):
"""\
- Return the list of the indexes used to sort the input sequence
- of version numbers.
-
- Sorts a sequence of version, but returns a list of sorted the
- indexes and not the sorted list. This list of indexes can be
- used to sort multiple lists by the sorted order of the given
- sequence.
-
- This is a wrapper around ``index_natsorted(seq, number_type=None)``.
-
- Parameters
- ----------
- seq: iterable
- The sequence to sort.
-
- key: callable, optional
- A key used to determine how to sort each element of the sequence.
- It is **not** applied recursively.
- It should accept a single argument and return a single value.
-
- reverse : {{True, False}}, optional
- Return the list in reversed sorted order. The default is
- `False`.
+ Identical to :func:`index_natsorted`.
- as_path : {{True, False}}, optional
- Deprecated as of version 3.5.0 and will become an undocumented
- keyword-only argument in 4.0.0. Please use the `alg` argument
- for all future development. See :class:`ns` class documentation for
- details.
+ This function exists for backwards compatibility with
+ ``index_natsort`` version < 4.0.0. Future development should use
+ :func:`index_natsorted`.
- alg : ns enum, optional
- This option is used to control which algorithm `natsort`
- uses when sorting. For details into these options, please see
- the :class:`ns` class documentation. The default is `ns.VERSION`.
-
- Returns
- -------
- out : tuple
- The ordered indexes of the sequence.
+ Please see the :func:`index_natsorted` documentation for use.
See Also
--------
- versorted
- order_by_index
-
- Examples
- --------
- Use `index_versorted` just like the builtin `sorted`::
-
- >>> a = ['num4.0.2', 'num3.4.1', 'num3.4.2']
- >>> index_versorted(a)
- [1, 2, 0]
+ index_natsorted
"""
- alg = _args_to_enum(float, None, None, as_path, None) | alg
- return index_natsorted(seq, key, reverse=reverse, alg=alg | ns.VERSION)
+ return index_natsorted(seq, key, reverse, alg, **_kwargs)
@u_format
@@ -799,6 +512,8 @@ def index_humansorted(seq, key=None, reverse=False, alg=0):
of the given sequence.
This is a wrapper around ``index_natsorted(seq, alg=ns.LOCALE)``.
+ Please see the ``humansorted`` documentation for caveats of
+ using ``index_humansorted``.
Parameters
----------
@@ -832,10 +547,11 @@ def index_humansorted(seq, key=None, reverse=False, alg=0):
Notes
-----
You may find that if you do not explicitly set
- the locale your results may not be as you expect... I have found that
- it depends on the system you are on. To do this is straightforward
- (in the below example I use 'en_US.UTF-8', but you should use your
- locale)::
+ the locale your results may not be as you expect, although
+ as of ``natsort`` version 4.0.0 the sorting algorithm has been
+ updated to account for a buggy ``locale`` installation.
+ In the below example 'en_US.UTF-8' is used, but you should use your
+ locale::
>>> import locale
>>> # The 'str' call is only to get around a bug on Python 2.x
@@ -846,7 +562,7 @@ def index_humansorted(seq, key=None, reverse=False, alg=0):
It is preferred that you do this before importing `natsort`.
If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning
- above) then you should not need to do this.
+ above) then you should not need to explicitly set a locale.
Examples
--------
@@ -857,20 +573,25 @@ def index_humansorted(seq, key=None, reverse=False, alg=0):
[2, 0, 3, 1]
"""
- return index_natsorted(seq, key, reverse=reverse, alg=alg | ns.LOCALE)
+ return index_natsorted(seq, key, reverse, alg | ns.LOCALE)
@u_format
def index_realsorted(seq, key=None, reverse=False, alg=0):
"""\
- Identical to :func:`index_natsorted`.
+ Return the list of the indexes used to sort the input sequence
+ in a locale-aware manner.
+
+ Sorts a sequence in a locale-aware manner, but returns a list
+ of sorted the indexes and not the sorted list. This list of
+ indexes can be used to sort multiple lists by the sorted order
+ of the given sequence.
+
+ This is a wrapper around ``index_natsorted(seq, alg=ns.REAL)``.
- This is provided for forward-compatibility with :mod:`natsort`
- version >= 4.0.0. If you are relying on the default sorting
- behavior of :func:`index_natsorted` to sort by signed floats,
- you should consider using this function as the default sorting
- behavior of :func:`index_natsorted` will changed to unsigned
- integers in :mod:`natsort` version >= 4.0.0.
+ The behavior of :func:`index_realsorted` in `natsort` version >= 4.0.0
+ was the default behavior of :func:`index_natsorted` for `natsort`
+ version < 4.0.0.
Parameters
----------
@@ -889,7 +610,7 @@ def index_realsorted(seq, key=None, reverse=False, alg=0):
alg : ns enum, optional
This option is used to control which algorithm `natsort`
uses when sorting. For details into these options, please see
- the :class:`ns` class documentation.
+ the :class:`ns` class documentation. The default is `ns.REAL`.
Returns
-------
@@ -910,7 +631,7 @@ def index_realsorted(seq, key=None, reverse=False, alg=0):
[1, 3, 0, 2]
"""
- return index_natsorted(seq, key=key, reverse=reverse, alg=alg)
+ return index_natsorted(seq, key, reverse, alg | ns.REAL)
@u_format
diff --git a/natsort/ns_enum.py b/natsort/ns_enum.py
index f568382..8b9d794 100644
--- a/natsort/ns_enum.py
+++ b/natsort/ns_enum.py
@@ -20,40 +20,51 @@ class ns(object):
C library that Python's locale module uses is broken.
On these systems it is recommended that you install
`PyICU <https://pypi.python.org/pypi/PyICU>`_
- if you wish to use ``LOCALE``. If you are on one of
+ if you wish to use ``LOCALE``, especially if you need
+ to handle non-ASCII characters. If you are on one of
systems and get unexpected results, please try using
`PyICU <https://pypi.python.org/pypi/PyICU>`_ before
filing a bug report to ``natsort``.
Attributes
----------
+ INT, I (default)
+ The default - parse numbers as integers.
FLOAT, F
- The default - parse numbers as floats.
- INT, I
- Tell `natsort` to parse numbers as ints.
- UNSIGNED, U
- Tell `natsort` to ignore any sign (i.e. "-" or "+") to the
- immediate left of a number. It is the same as setting the old
- `signed` option to `False`.
+ Tell `natsort` to parse numbers as floats.
+ UNSIGNED, U (default)
+ Tell `natsort` to ignore any sign (i.e. "-" or "+") to the immediate
+ left of a number. It is the same as setting the old `signed` option
+ to `False`. This is the default.
+ SIGNED, S
+ Tell `natsort` to take into account any sign (i.e. "-" or "+")
+ to the immediate left of a number. It is the same as setting
+ the old `signed` option to `True`.
VERSION, V
This is a shortcut for ``ns.INT | ns.UNSIGNED``, which is useful
when attempting to sort version numbers. It is the same as
- setting the old `number_type` option to `None`.
+ setting the old `number_type` option to `None`. Since
+ ``ns.INT | ns.UNSIGNED`` is default, this is is
+ unnecessary.
DIGIT, D
Same as `VERSION` above.
+ REAL, R
+ This is a shortcut for ``ns.FLOAT | ns.SIGNED``, which is useful
+ when attempting to sort real numbers.
NOEXP, N
Tell `natsort` to not search for exponents as part of the number.
For example, with `NOEXP` the number "5.6E5" would be interpreted
- as `5.6`, `"E"`, and `5`. It is the same as setting the old `exp`
- option to `False`.
+ as `5.6`, `"E"`, and `5`. It is the same as setting the old
+ `exp` option to `False`.
PATH, P
Tell `natsort` to interpret strings as filesystem paths, so they
will be split according to the filesystem separator
(i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the
file extension, if any. Without this, lists of file paths like
- ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted
- properly; 'Folder/' will be placed at the end, not at the front.
- It is the same as setting the old `as_path` option to `True`.
+ ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be
+ sorted properly; 'Folder/' will be placed at the end, not at the
+ front. It is the same as setting the old `as_path` option to
+ `True`.
LOCALE, L
Tell `natsort` to be locale-aware when sorting strings (everything
that was not converted to a number). Your sorting results will vary
@@ -72,7 +83,11 @@ class ns(object):
``['apple', 'banana', 'Apple', 'Banana']`` (the default order
would be ``['Apple', 'Banana', 'apple', 'banana']`` which is
the order from a purely ordinal sort).
- Useless when used with `IGNORECASE`.
+ Useless when used with `IGNORECASE`. Please note that if used
+ with ``LOCALE``, this actually has the reverse effect and will
+ put uppercase first (this is because ``LOCALE`` already puts
+ lowercase first); you may use this to your advantage if you
+ need to modify the order returned with ``LOCALE``.
GROUPLETTERS, G
Tell `natsort` to group lowercase and uppercase letters together
when sorting. For example,
@@ -90,9 +105,8 @@ class ns(object):
TYPESAFE, T
Try hard to avoid "unorderable types" error on Python 3. It
is the same as setting the old `py3_safe` option to `True`.
- This is only needed if not using ``UNSIGNED`` or if
- sorting by ``FLOAT``.
- You shouldn't need to use this unless you are using
+ This is only needed if using ``SIGNED`` or if sorting by
+ ``FLOAT``. You shouldn't need to use this unless you are using
``natsort_keygen``. *NOTE:* It cannot resolve the ``TypeError``
from trying to compare `str` and `bytes`.
@@ -120,11 +134,14 @@ class ns(object):
# Sort algorithm "enum" values.
-_ns = {'FLOAT': 0, 'F': 0,
- 'INT': 1, 'I': 1,
- 'UNSIGNED': 2, 'U': 2,
- 'VERSION': 3, 'V': 3, # Shortcut for INT | UNSIGNED
- 'DIGIT': 3, 'D': 3, # Shortcut for INT | UNSIGNED
+_ns = {
+ 'INT': 0, 'I': 0,
+ 'FLOAT': 1, 'F': 1,
+ 'UNSIGNED': 0, 'U': 0,
+ 'SIGNED': 2, 'S': 2,
+ 'VERSION': 0, 'V': 0, # Shortcut for INT | UNSIGNED
+ 'DIGIT': 0, 'D': 0, # Shortcut for INT | UNSIGNED
+ 'REAL': 3, 'R': 3, # Shortcut for FLOAT | SIGNED
'NOEXP': 4, 'N': 4,
'PATH': 8, 'P': 8,
'LOCALE': 16, 'L': 16,
diff --git a/natsort/py23compat.py b/natsort/py23compat.py
index 3f3fb92..3c9f88b 100644
--- a/natsort/py23compat.py
+++ b/natsort/py23compat.py
@@ -9,6 +9,9 @@ import sys
# python2 and python3. This code is pretty much lifted from the iPython
# project's py3compat.py file. Credit to the iPython devs.
+# Numeric form of version
+PY_VERSION = float(sys.version[:3])
+
# Assume all strings are Unicode in Python 2
py23_str = str if sys.version[0] == '3' else unicode
@@ -18,6 +21,9 @@ py23_range = range if sys.version[0] == '3' else xrange
# Uniform base string type
py23_basestring = str if sys.version[0] == '3' else basestring
+# unichr function
+py23_unichr = chr if sys.version[0] == '3' else unichr
+
# zip as an iterator
if sys.version[0] == '3':
py23_zip = zip
diff --git a/natsort/unicode_numbers.py b/natsort/unicode_numbers.py
new file mode 100644
index 0000000..a0e8359
--- /dev/null
+++ b/natsort/unicode_numbers.py
@@ -0,0 +1,183 @@
+# -*- coding: utf-8 -*-
+"""
+Contains all possible non-ASCII unicode numbers.
+
+"""
+
+from __future__ import (print_function, division,
+ unicode_literals, absolute_import)
+
+# Std. lib imports.
+import unicodedata
+
+# Local imports.
+from natsort.py23compat import py23_unichr
+
+
+# Rather than determine this on the fly, which would incur a startup
+# runtime penalty, the hex values of the Unicode numeric characters
+# are hard-coded below.
+numeric_hex = [
+ 0XB2, 0XB3, 0XB9, 0XBC, 0XBD, 0XBE, 0X660, 0X661, 0X662, 0X663, 0X664,
+ 0X665, 0X666, 0X667, 0X668, 0X669, 0X6F0, 0X6F1, 0X6F2, 0X6F3, 0X6F4,
+ 0X6F5, 0X6F6, 0X6F7, 0X6F8, 0X6F9, 0X7C0, 0X7C1, 0X7C2, 0X7C3, 0X7C4,
+ 0X7C5, 0X7C6, 0X7C7, 0X7C8, 0X7C9, 0X966, 0X967, 0X968, 0X969, 0X96A,
+ 0X96B, 0X96C, 0X96D, 0X96E, 0X96F, 0X9E6, 0X9E7, 0X9E8, 0X9E9, 0X9EA,
+ 0X9EB, 0X9EC, 0X9ED, 0X9EE, 0X9EF, 0X9F4, 0X9F5, 0X9F6, 0X9F7, 0X9F8,
+ 0X9F9, 0XA66, 0XA67, 0XA68, 0XA69, 0XA6A, 0XA6B, 0XA6C, 0XA6D, 0XA6E,
+ 0XA6F, 0XAE6, 0XAE7, 0XAE8, 0XAE9, 0XAEA, 0XAEB, 0XAEC, 0XAED, 0XAEE,
+ 0XAEF, 0XB66, 0XB67, 0XB68, 0XB69, 0XB6A, 0XB6B, 0XB6C, 0XB6D, 0XB6E,
+ 0XB6F, 0XB72, 0XB73, 0XB74, 0XB75, 0XB76, 0XB77, 0XBE6, 0XBE7, 0XBE8,
+ 0XBE9, 0XBEA, 0XBEB, 0XBEC, 0XBED, 0XBEE, 0XBEF, 0XBF0, 0XBF1, 0XBF2,
+ 0XC66, 0XC67, 0XC68, 0XC69, 0XC6A, 0XC6B, 0XC6C, 0XC6D, 0XC6E, 0XC6F,
+ 0XC78, 0XC79, 0XC7A, 0XC7B, 0XC7C, 0XC7D, 0XC7E, 0XCE6, 0XCE7, 0XCE8,
+ 0XCE9, 0XCEA, 0XCEB, 0XCEC, 0XCED, 0XCEE, 0XCEF, 0XD66, 0XD67, 0XD68,
+ 0XD69, 0XD6A, 0XD6B, 0XD6C, 0XD6D, 0XD6E, 0XD6F, 0XD70, 0XD71, 0XD72,
+ 0XD73, 0XD74, 0XD75, 0XE50, 0XE51, 0XE52, 0XE53, 0XE54, 0XE55, 0XE56,
+ 0XE57, 0XE58, 0XE59, 0XED0, 0XED1, 0XED2, 0XED3, 0XED4, 0XED5, 0XED6,
+ 0XED7, 0XED8, 0XED9, 0XF20, 0XF21, 0XF22, 0XF23, 0XF24, 0XF25, 0XF26,
+ 0XF27, 0XF28, 0XF29, 0XF2A, 0XF2B, 0XF2C, 0XF2D, 0XF2E, 0XF2F, 0XF30,
+ 0XF31, 0XF32, 0XF33, 0X1040, 0X1041, 0X1042, 0X1043, 0X1044, 0X1045,
+ 0X1046, 0X1047, 0X1048, 0X1049, 0X1090, 0X1091, 0X1092, 0X1093, 0X1094,
+ 0X1095, 0X1096, 0X1097, 0X1098, 0X1099, 0X1369, 0X136A, 0X136B, 0X136C,
+ 0X136D, 0X136E, 0X136F, 0X1370, 0X1371, 0X1372, 0X1373, 0X1374, 0X1375,
+ 0X1376, 0X1377, 0X1378, 0X1379, 0X137A, 0X137B, 0X137C, 0X16EE, 0X16EF,
+ 0X16F0, 0X17E0, 0X17E1, 0X17E2, 0X17E3, 0X17E4, 0X17E5, 0X17E6, 0X17E7,
+ 0X17E8, 0X17E9, 0X17F0, 0X17F1, 0X17F2, 0X17F3, 0X17F4, 0X17F5, 0X17F6,
+ 0X17F7, 0X17F8, 0X17F9, 0X1810, 0X1811, 0X1812, 0X1813, 0X1814, 0X1815,
+ 0X1816, 0X1817, 0X1818, 0X1819, 0X1946, 0X1947, 0X1948, 0X1949, 0X194A,
+ 0X194B, 0X194C, 0X194D, 0X194E, 0X194F, 0X19D0, 0X19D1, 0X19D2, 0X19D3,
+ 0X19D4, 0X19D5, 0X19D6, 0X19D7, 0X19D8, 0X19D9, 0X19DA, 0X1A80, 0X1A81,
+ 0X1A82, 0X1A83, 0X1A84, 0X1A85, 0X1A86, 0X1A87, 0X1A88, 0X1A89, 0X1A90,
+ 0X1A91, 0X1A92, 0X1A93, 0X1A94, 0X1A95, 0X1A96, 0X1A97, 0X1A98, 0X1A99,
+ 0X1B50, 0X1B51, 0X1B52, 0X1B53, 0X1B54, 0X1B55, 0X1B56, 0X1B57, 0X1B58,
+ 0X1B59, 0X1BB0, 0X1BB1, 0X1BB2, 0X1BB3, 0X1BB4, 0X1BB5, 0X1BB6, 0X1BB7,
+ 0X1BB8, 0X1BB9, 0X1C40, 0X1C41, 0X1C42, 0X1C43, 0X1C44, 0X1C45, 0X1C46,
+ 0X1C47, 0X1C48, 0X1C49, 0X1C50, 0X1C51, 0X1C52, 0X1C53, 0X1C54, 0X1C55,
+ 0X1C56, 0X1C57, 0X1C58, 0X1C59, 0X2070, 0X2074, 0X2075, 0X2076, 0X2077,
+ 0X2078, 0X2079, 0X2080, 0X2081, 0X2082, 0X2083, 0X2084, 0X2085, 0X2086,
+ 0X2087, 0X2088, 0X2089, 0X2150, 0X2151, 0X2152, 0X2153, 0X2154, 0X2155,
+ 0X2156, 0X2157, 0X2158, 0X2159, 0X215A, 0X215B, 0X215C, 0X215D, 0X215E,
+ 0X215F, 0X2160, 0X2161, 0X2162, 0X2163, 0X2164, 0X2165, 0X2166, 0X2167,
+ 0X2168, 0X2169, 0X216A, 0X216B, 0X216C, 0X216D, 0X216E, 0X216F, 0X2170,
+ 0X2171, 0X2172, 0X2173, 0X2174, 0X2175, 0X2176, 0X2177, 0X2178, 0X2179,
+ 0X217A, 0X217B, 0X217C, 0X217D, 0X217E, 0X217F, 0X2180, 0X2181, 0X2182,
+ 0X2185, 0X2186, 0X2187, 0X2188, 0X2189, 0X2460, 0X2461, 0X2462, 0X2463,
+ 0X2464, 0X2465, 0X2466, 0X2467, 0X2468, 0X2469, 0X246A, 0X246B, 0X246C,
+ 0X246D, 0X246E, 0X246F, 0X2470, 0X2471, 0X2472, 0X2473, 0X2474, 0X2475,
+ 0X2476, 0X2477, 0X2478, 0X2479, 0X247A, 0X247B, 0X247C, 0X247D, 0X247E,
+ 0X247F, 0X2480, 0X2481, 0X2482, 0X2483, 0X2484, 0X2485, 0X2486, 0X2487,
+ 0X2488, 0X2489, 0X248A, 0X248B, 0X248C, 0X248D, 0X248E, 0X248F, 0X2490,
+ 0X2491, 0X2492, 0X2493, 0X2494, 0X2495, 0X2496, 0X2497, 0X2498, 0X2499,
+ 0X249A, 0X249B, 0X24EA, 0X24EB, 0X24EC, 0X24ED, 0X24EE, 0X24EF, 0X24F0,
+ 0X24F1, 0X24F2, 0X24F3, 0X24F4, 0X24F5, 0X24F6, 0X24F7, 0X24F8, 0X24F9,
+ 0X24FA, 0X24FB, 0X24FC, 0X24FD, 0X24FE, 0X24FF, 0X2776, 0X2777, 0X2778,
+ 0X2779, 0X277A, 0X277B, 0X277C, 0X277D, 0X277E, 0X277F, 0X2780, 0X2781,
+ 0X2782, 0X2783, 0X2784, 0X2785, 0X2786, 0X2787, 0X2788, 0X2789, 0X278A,
+ 0X278B, 0X278C, 0X278D, 0X278E, 0X278F, 0X2790, 0X2791, 0X2792, 0X2793,
+ 0X2CFD, 0X3007, 0X3021, 0X3022, 0X3023, 0X3024, 0X3025, 0X3026, 0X3027,
+ 0X3028, 0X3029, 0X3038, 0X3039, 0X303A, 0X3192, 0X3193, 0X3194, 0X3195,
+ 0X3220, 0X3221, 0X3222, 0X3223, 0X3224, 0X3225, 0X3226, 0X3227, 0X3228,
+ 0X3229, 0X3248, 0X3249, 0X324A, 0X324B, 0X324C, 0X324D, 0X324E, 0X324F,
+ 0X3251, 0X3252, 0X3253, 0X3254, 0X3255, 0X3256, 0X3257, 0X3258, 0X3259,
+ 0X325A, 0X325B, 0X325C, 0X325D, 0X325E, 0X325F, 0X3280, 0X3281, 0X3282,
+ 0X3283, 0X3284, 0X3285, 0X3286, 0X3287, 0X3288, 0X3289, 0X32B1, 0X32B2,
+ 0X32B3, 0X32B4, 0X32B5, 0X32B6, 0X32B7, 0X32B8, 0X32B9, 0X32BA, 0X32BB,
+ 0X32BC, 0X32BD, 0X32BE, 0X32BF, 0X3405, 0X3483, 0X382A, 0X3B4D, 0X4E00,
+ 0X4E03, 0X4E07, 0X4E09, 0X4E5D, 0X4E8C, 0X4E94, 0X4E96, 0X4EBF, 0X4EC0,
+ 0X4EDF, 0X4EE8, 0X4F0D, 0X4F70, 0X5104, 0X5146, 0X5169, 0X516B, 0X516D,
+ 0X5341, 0X5343, 0X5344, 0X5345, 0X534C, 0X53C1, 0X53C2, 0X53C3, 0X53C4,
+ 0X56DB, 0X58F1, 0X58F9, 0X5E7A, 0X5EFE, 0X5EFF, 0X5F0C, 0X5F0D, 0X5F0E,
+ 0X5F10, 0X62FE, 0X634C, 0X67D2, 0X6F06, 0X7396, 0X767E, 0X8086, 0X842C,
+ 0X8CAE, 0X8CB3, 0X8D30, 0X9621, 0X9646, 0X964C, 0X9678, 0X96F6, 0XA620,
+ 0XA621, 0XA622, 0XA623, 0XA624, 0XA625, 0XA626, 0XA627, 0XA628, 0XA629,
+ 0XA6E6, 0XA6E7, 0XA6E8, 0XA6E9, 0XA6EA, 0XA6EB, 0XA6EC, 0XA6ED, 0XA6EE,
+ 0XA6EF, 0XA830, 0XA831, 0XA832, 0XA833, 0XA834, 0XA835, 0XA8D0, 0XA8D1,
+ 0XA8D2, 0XA8D3, 0XA8D4, 0XA8D5, 0XA8D6, 0XA8D7, 0XA8D8, 0XA8D9, 0XA900,
+ 0XA901, 0XA902, 0XA903, 0XA904, 0XA905, 0XA906, 0XA907, 0XA908, 0XA909,
+ 0XA9D0, 0XA9D1, 0XA9D2, 0XA9D3, 0XA9D4, 0XA9D5, 0XA9D6, 0XA9D7, 0XA9D8,
+ 0XA9D9, 0XAA50, 0XAA51, 0XAA52, 0XAA53, 0XAA54, 0XAA55, 0XAA56, 0XAA57,
+ 0XAA58, 0XAA59, 0XABF0, 0XABF1, 0XABF2, 0XABF3, 0XABF4, 0XABF5, 0XABF6,
+ 0XABF7, 0XABF8, 0XABF9, 0XF96B, 0XF973, 0XF978, 0XF9B2, 0XF9D1, 0XF9D3,
+ 0XF9FD, 0XFF10, 0XFF11, 0XFF12, 0XFF13, 0XFF14, 0XFF15, 0XFF16, 0XFF17,
+ 0XFF18, 0XFF19, 0X10107, 0X10108, 0X10109, 0X1010A, 0X1010B, 0X1010C,
+ 0X1010D, 0X1010E, 0X1010F, 0X10110, 0X10111, 0X10112, 0X10113, 0X10114,
+ 0X10115, 0X10116, 0X10117, 0X10118, 0X10119, 0X1011A, 0X1011B, 0X1011C,
+ 0X1011D, 0X1011E, 0X1011F, 0X10120, 0X10121, 0X10122, 0X10123, 0X10124,
+ 0X10125, 0X10126, 0X10127, 0X10128, 0X10129, 0X1012A, 0X1012B, 0X1012C,
+ 0X1012D, 0X1012E, 0X1012F, 0X10130, 0X10131, 0X10132, 0X10133, 0X10140,
+ 0X10141, 0X10142, 0X10143, 0X10144, 0X10145, 0X10146, 0X10147, 0X10148,
+ 0X10149, 0X1014A, 0X1014B, 0X1014C, 0X1014D, 0X1014E, 0X1014F, 0X10150,
+ 0X10151, 0X10152, 0X10153, 0X10154, 0X10155, 0X10156, 0X10157, 0X10158,
+ 0X10159, 0X1015A, 0X1015B, 0X1015C, 0X1015D, 0X1015E, 0X1015F, 0X10160,
+ 0X10161, 0X10162, 0X10163, 0X10164, 0X10165, 0X10166, 0X10167, 0X10168,
+ 0X10169, 0X1016A, 0X1016B, 0X1016C, 0X1016D, 0X1016E, 0X1016F, 0X10170,
+ 0X10171, 0X10172, 0X10173, 0X10174, 0X10175, 0X10176, 0X10177, 0X10178,
+ 0X1018A, 0X10320, 0X10321, 0X10322, 0X10323, 0X10341, 0X1034A, 0X103D1,
+ 0X103D2, 0X103D3, 0X103D4, 0X103D5, 0X104A0, 0X104A1, 0X104A2, 0X104A3,
+ 0X104A4, 0X104A5, 0X104A6, 0X104A7, 0X104A8, 0X104A9, 0X10858, 0X10859,
+ 0X1085A, 0X1085B, 0X1085C, 0X1085D, 0X1085E, 0X1085F, 0X10916, 0X10917,
+ 0X10918, 0X10919, 0X1091A, 0X1091B, 0X10A40, 0X10A41, 0X10A42, 0X10A43,
+ 0X10A44, 0X10A45, 0X10A46, 0X10A47, 0X10A7D, 0X10A7E, 0X10B58, 0X10B59,
+ 0X10B5A, 0X10B5B, 0X10B5C, 0X10B5D, 0X10B5E, 0X10B5F, 0X10B78, 0X10B79,
+ 0X10B7A, 0X10B7B, 0X10B7C, 0X10B7D, 0X10B7E, 0X10B7F, 0X10E60, 0X10E61,
+ 0X10E62, 0X10E63, 0X10E64, 0X10E65, 0X10E66, 0X10E67, 0X10E68, 0X10E69,
+ 0X10E6A, 0X10E6B, 0X10E6C, 0X10E6D, 0X10E6E, 0X10E6F, 0X10E70, 0X10E71,
+ 0X10E72, 0X10E73, 0X10E74, 0X10E75, 0X10E76, 0X10E77, 0X10E78, 0X10E79,
+ 0X10E7A, 0X10E7B, 0X10E7C, 0X10E7D, 0X10E7E, 0X11052, 0X11053, 0X11054,
+ 0X11055, 0X11056, 0X11057, 0X11058, 0X11059, 0X1105A, 0X1105B, 0X1105C,
+ 0X1105D, 0X1105E, 0X1105F, 0X11060, 0X11061, 0X11062, 0X11063, 0X11064,
+ 0X11065, 0X11066, 0X11067, 0X11068, 0X11069, 0X1106A, 0X1106B, 0X1106C,
+ 0X1106D, 0X1106E, 0X1106F, 0X110F0, 0X110F1, 0X110F2, 0X110F3, 0X110F4,
+ 0X110F5, 0X110F6, 0X110F7, 0X110F8, 0X110F9, 0X11136, 0X11137, 0X11138,
+ 0X11139, 0X1113A, 0X1113B, 0X1113C, 0X1113D, 0X1113E, 0X1113F, 0X111D0,
+ 0X111D1, 0X111D2, 0X111D3, 0X111D4, 0X111D5, 0X111D6, 0X111D7, 0X111D8,
+ 0X111D9, 0X116C0, 0X116C1, 0X116C2, 0X116C3, 0X116C4, 0X116C5, 0X116C6,
+ 0X116C7, 0X116C8, 0X116C9, 0X12400, 0X12401, 0X12402, 0X12403, 0X12404,
+ 0X12405, 0X12406, 0X12407, 0X12408, 0X12409, 0X1240A, 0X1240B, 0X1240C,
+ 0X1240D, 0X1240E, 0X1240F, 0X12410, 0X12411, 0X12412, 0X12413, 0X12414,
+ 0X12415, 0X12416, 0X12417, 0X12418, 0X12419, 0X1241A, 0X1241B, 0X1241C,
+ 0X1241D, 0X1241E, 0X1241F, 0X12420, 0X12421, 0X12422, 0X12423, 0X12424,
+ 0X12425, 0X12426, 0X12427, 0X12428, 0X12429, 0X1242A, 0X1242B, 0X1242C,
+ 0X1242D, 0X1242E, 0X1242F, 0X12430, 0X12431, 0X12432, 0X12433, 0X12434,
+ 0X12435, 0X12436, 0X12437, 0X12438, 0X12439, 0X1243A, 0X1243B, 0X1243C,
+ 0X1243D, 0X1243E, 0X1243F, 0X12440, 0X12441, 0X12442, 0X12443, 0X12444,
+ 0X12445, 0X12446, 0X12447, 0X12448, 0X12449, 0X1244A, 0X1244B, 0X1244C,
+ 0X1244D, 0X1244E, 0X1244F, 0X12450, 0X12451, 0X12452, 0X12453, 0X12454,
+ 0X12455, 0X12456, 0X12457, 0X12458, 0X12459, 0X1245A, 0X1245B, 0X1245C,
+ 0X1245D, 0X1245E, 0X1245F, 0X12460, 0X12461, 0X12462, 0X1D360, 0X1D361,
+ 0X1D362, 0X1D363, 0X1D364, 0X1D365, 0X1D366, 0X1D367, 0X1D368, 0X1D369,
+ 0X1D36A, 0X1D36B, 0X1D36C, 0X1D36D, 0X1D36E, 0X1D36F, 0X1D370, 0X1D371,
+ 0X1D7CE, 0X1D7CF, 0X1D7D0, 0X1D7D1, 0X1D7D2, 0X1D7D3, 0X1D7D4, 0X1D7D5,
+ 0X1D7D6, 0X1D7D7, 0X1D7D8, 0X1D7D9, 0X1D7DA, 0X1D7DB, 0X1D7DC, 0X1D7DD,
+ 0X1D7DE, 0X1D7DF, 0X1D7E0, 0X1D7E1, 0X1D7E2, 0X1D7E3, 0X1D7E4, 0X1D7E5,
+ 0X1D7E6, 0X1D7E7, 0X1D7E8, 0X1D7E9, 0X1D7EA, 0X1D7EB, 0X1D7EC, 0X1D7ED,
+ 0X1D7EE, 0X1D7EF, 0X1D7F0, 0X1D7F1, 0X1D7F2, 0X1D7F3, 0X1D7F4, 0X1D7F5,
+ 0X1D7F6, 0X1D7F7, 0X1D7F8, 0X1D7F9, 0X1D7FA, 0X1D7FB, 0X1D7FC, 0X1D7FD,
+ 0X1D7FE, 0X1D7FF, 0X1F100, 0X1F101, 0X1F102, 0X1F103, 0X1F104, 0X1F105,
+ 0X1F106, 0X1F107, 0X1F108, 0X1F109, 0X1F10A, 0X20001, 0X20064, 0X200E2,
+ 0X20121, 0X2092A, 0X20983, 0X2098C, 0X2099C, 0X20AEA, 0X20AFD, 0X20B19,
+ 0X22390, 0X22998, 0X23B1B, 0X2626D, 0X2F890,
+]
+
+# Convert each hex into the literal Unicode character.
+# Stop if a ValueError is raised in case of a narrow Unicode build.
+# The extra check with unicodedata is in case this Python version
+# does not support some characters.
+numeric_chars = []
+for a in numeric_hex:
+ try:
+ l = py23_unichr(a)
+ except ValueError:
+ break
+ if unicodedata.numeric(l, None) is None:
+ continue
+ numeric_chars.append(l)
+
+# The digit characters are a subset of the numerals.
+digit_chars = [a for a in numeric_chars
+ if unicodedata.digit(a, None) is not None]
+
+# Create a single string with the above data.
+digits = ''.join(digit_chars)
+numeric = ''.join(numeric_chars)
diff --git a/natsort/utils.py b/natsort/utils.py
index 3e756b7..a272f29 100644
--- a/natsort/utils.py
+++ b/natsort/utils.py
@@ -17,66 +17,93 @@ from itertools import islice
from locale import localeconv
# Local imports.
-from natsort.locale_help import locale_convert, grouper, null_string
-from natsort.py23compat import py23_str, py23_zip
+from natsort.locale_help import (locale_convert, grouper,
+ null_string, use_pyicu, dumb_sort)
+from natsort.py23compat import py23_str, py23_zip, PY_VERSION
from natsort.ns_enum import ns, _ns
+from natsort.unicode_numbers import digits, numeric
# If the user has fastnumbers installed, they will get great speed
# benefits. If not, we simulate the functions here.
try:
- from fastnumbers import fast_float, fast_int, isreal
+ from fastnumbers import fast_float, fast_int, isint, isfloat
+ import fastnumbers
+ v = list(map(int, fastnumbers.__version__.split('.')))
+ if not (v[0] >= 0 and v[1] >= 5): # Require >= version 0.5.0.
+ raise ImportError
except ImportError:
- from natsort.fake_fastnumbers import fast_float, fast_int, isreal
+ from natsort.fake_fastnumbers import fast_float, fast_int, isint, isfloat
# If the user has pathlib installed, the ns.PATH option will convert
# Path objects to str before sorting.
try:
from pathlib import PurePath # PurePath is the base object for Paths.
-except ImportError:
+except ImportError: # pragma: no cover
PurePath = object # To avoid NameErrors.
has_pathlib = False
else:
has_pathlib = True
# Group algorithm types for easy extraction
-_NUMBER_ALGORITHMS = ns.FLOAT | ns.INT | ns.UNSIGNED | ns.NOEXP
-_ALL_BUT_PATH = (ns.F | ns.I | ns.U | ns.N | ns.L |
+_NUMBER_ALGORITHMS = ns.FLOAT | ns.INT | ns.UNSIGNED | ns.SIGNED | ns.NOEXP
+_ALL_BUT_PATH = (ns.F | ns.I | ns.U | ns.S | ns.N | ns.L |
ns.IC | ns.LF | ns.G | ns.UG | ns.TYPESAFE)
-# The regex that locates floats
-_float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U)
-_float_nosign_exp_re = re.compile(r'(\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U)
-_float_sign_noexp_re = re.compile(r'([-+]?\d*\.?\d+)', re.U)
-_float_nosign_noexp_re = re.compile(r'(\d*\.?\d+)', re.U)
-_float_sign_exp_re_c = re.compile(r'([-+]?\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U)
-_float_nosign_exp_re_c = re.compile(r'(\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U)
-_float_sign_noexp_re_c = re.compile(r'([-+]?\d*[.,]?\d+)', re.U)
-_float_nosign_noexp_re_c = re.compile(r'(\d*[.,]?\d+)', re.U)
-
-# Integer regexes
-_int_nosign_re = re.compile(r'(\d+)', re.U)
-_int_sign_re = re.compile(r'([-+]?\d+)', re.U)
+# The regex that locates floats - include Unicode numerals.
+_float_sign_exp_re = r'([-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?|[{}])'
+_float_sign_exp_re = _float_sign_exp_re.format(numeric)
+_float_sign_exp_re = re.compile(_float_sign_exp_re, flags=re.U)
+_float_nosign_exp_re = r'([0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?|[{}])'
+_float_nosign_exp_re = _float_nosign_exp_re.format(numeric)
+_float_nosign_exp_re = re.compile(_float_nosign_exp_re, flags=re.U)
+_float_sign_noexp_re = r'([-+]?[0-9]*\.?[0-9]+|[{}])'
+_float_sign_noexp_re = _float_sign_noexp_re.format(numeric)
+_float_sign_noexp_re = re.compile(_float_sign_noexp_re, flags=re.U)
+_float_nosign_noexp_re = r'([0-9]*\.?[0-9]+|[{}])'
+_float_nosign_noexp_re = _float_nosign_noexp_re.format(numeric)
+_float_nosign_noexp_re = re.compile(_float_nosign_noexp_re, flags=re.U)
+_float_sign_exp_re_c = r'([-+]?[0-9]*[.,]?[0-9]+(?:[eE][-+]?[0-9]+)?)|[{}]'
+_float_sign_exp_re_c = _float_sign_exp_re_c.format(numeric)
+_float_sign_exp_re_c = re.compile(_float_sign_exp_re_c, flags=re.U)
+_float_nosign_exp_re_c = r'([0-9]*[.,]?[0-9]+(?:[eE][-+]?[0-9]+)?|[{}])'
+_float_nosign_exp_re_c = _float_nosign_exp_re_c.format(numeric)
+_float_nosign_exp_re_c = re.compile(_float_nosign_exp_re_c, flags=re.U)
+_float_sign_noexp_re_c = r'([-+]?[0-9]*[.,]?[0-9]+|[{}])'
+_float_sign_noexp_re_c = _float_sign_noexp_re_c.format(numeric)
+_float_sign_noexp_re_c = re.compile(_float_sign_noexp_re_c, flags=re.U)
+_float_nosign_noexp_re_c = r'([0-9]*[.,]?[0-9]+|[{}])'
+_float_nosign_noexp_re_c = _float_nosign_noexp_re_c.format(numeric)
+_float_nosign_noexp_re_c = re.compile(_float_nosign_noexp_re_c, flags=re.U)
+
+# Integer regexes - include Unicode digits.
+_int_nosign_re = r'([0-9]+|[{}])'.format(digits)
+_int_nosign_re = re.compile(_int_nosign_re, flags=re.U)
+_int_sign_re = r'([-+]?[0-9]+|[{}])'.format(digits)
+_int_sign_re = re.compile(_int_sign_re, flags=re.U)
# This dict will help select the correct regex and number conversion function.
_regex_and_num_function_chooser = {
- (ns.F, '.'): (_float_sign_exp_re, fast_float),
- (ns.F | ns.N, '.'): (_float_sign_noexp_re, fast_float),
+ (ns.F | ns.S, '.'): (_float_sign_exp_re, fast_float),
+ (ns.F | ns.S | ns.N, '.'): (_float_sign_noexp_re, fast_float),
(ns.F | ns.U, '.'): (_float_nosign_exp_re, fast_float),
(ns.F | ns.U | ns.N, '.'): (_float_nosign_noexp_re, fast_float),
- (ns.I, '.'): (_int_sign_re, fast_int),
- (ns.I | ns.N, '.'): (_int_sign_re, fast_int),
+ (ns.I | ns.S, '.'): (_int_sign_re, fast_int),
+ (ns.I | ns.S | ns.N, '.'): (_int_sign_re, fast_int),
(ns.I | ns.U, '.'): (_int_nosign_re, fast_int),
(ns.I | ns.U | ns.N, '.'): (_int_nosign_re, fast_int),
- (ns.F, ','): (_float_sign_exp_re_c, fast_float),
- (ns.F | ns.N, ','): (_float_sign_noexp_re_c, fast_float),
+ (ns.F | ns.S, ','): (_float_sign_exp_re_c, fast_float),
+ (ns.F | ns.S | ns.N, ','): (_float_sign_noexp_re_c, fast_float),
(ns.F | ns.U, ','): (_float_nosign_exp_re_c, fast_float),
(ns.F | ns.U | ns.N, ','): (_float_nosign_noexp_re_c, fast_float),
- (ns.I, ','): (_int_sign_re, fast_int),
- (ns.I | ns.N, ','): (_int_sign_re, fast_int),
+ (ns.I | ns.S, ','): (_int_sign_re, fast_int),
+ (ns.I | ns.S | ns.N, ','): (_int_sign_re, fast_int),
(ns.I | ns.U, ','): (_int_nosign_re, fast_int),
(ns.I | ns.U | ns.N, ','): (_int_nosign_re, fast_int),
}
+# Dict to select checker function from converter function
+_conv_to_check = {fast_float: isfloat, fast_int: isint}
+
def _do_decoding(s, encoding):
"""A function to decode a bytes string, or return the object as-is."""
@@ -88,40 +115,46 @@ def _do_decoding(s, encoding):
return s
-def _args_to_enum(number_type, signed, exp, as_path, py3_safe):
+def _args_to_enum(**kwargs):
"""A function to convert input booleans to an enum-type argument."""
alg = 0
- if number_type is not float:
+ keys = ('number_type', 'signed', 'exp', 'as_path', 'py3_safe')
+ if any(x not in keys for x in kwargs):
+ x = set(kwargs) - set(keys)
+ raise TypeError('Invalid argument(s): ' + ', '.join(x))
+ if 'number_type' in kwargs and kwargs['number_type'] is not int:
msg = "The 'number_type' argument is deprecated as of 3.5.0, "
msg += "please use 'alg=ns.FLOAT', 'alg=ns.INT', or 'alg=ns.VERSION'"
warn(msg, DeprecationWarning)
- alg |= (_ns['INT'] * bool(number_type in (int, None)))
- alg |= (_ns['UNSIGNED'] * (number_type is None))
- if signed is not None:
+ alg |= (_ns['FLOAT'] * bool(kwargs['number_type'] is float))
+ alg |= (_ns['INT'] * bool(kwargs['number_type'] in (int, None)))
+ alg |= (_ns['SIGNED'] * (kwargs['number_type'] not in (float, None)))
+ if 'signed' in kwargs and kwargs['signed'] is not None:
msg = "The 'signed' argument is deprecated as of 3.5.0, "
- msg += "please use 'alg=ns.UNSIGNED'."
+ msg += "please use 'alg=ns.SIGNED'."
warn(msg, DeprecationWarning)
- alg |= (_ns['UNSIGNED'] * (not signed))
- if exp is not None:
+ alg |= (_ns['SIGNED'] * bool(kwargs['signed']))
+ if 'exp' in kwargs and kwargs['exp'] is not None:
msg = "The 'exp' argument is deprecated as of 3.5.0, "
msg += "please use 'alg=ns.NOEXP'."
warn(msg, DeprecationWarning)
- alg |= (_ns['NOEXP'] * (not exp))
- if as_path is not None:
+ alg |= (_ns['NOEXP'] * (not kwargs['exp']))
+ if 'as_path' in kwargs and kwargs['as_path'] is not None:
msg = "The 'as_path' argument is deprecated as of 3.5.0, "
msg += "please use 'alg=ns.PATH'."
warn(msg, DeprecationWarning)
- alg |= (_ns['PATH'] * as_path)
- if py3_safe is not None:
+ alg |= (_ns['PATH'] * kwargs['as_path'])
+ if 'py3_safe' in kwargs and kwargs['py3_safe'] is not None:
msg = "The 'py3_safe' argument is deprecated as of 3.5.0, "
msg += "please use 'alg=ns.TYPESAFE'."
warn(msg, DeprecationWarning)
- alg |= (_ns['TYPESAFE'] * py3_safe)
+ alg |= (_ns['TYPESAFE'] * kwargs['py3_safe'])
return alg
def _number_extracter(s, regex, numconv, py3_safe, use_locale, group_letters):
"""Helper to separate the string input into numbers and strings."""
+ conv_check = (numconv, _conv_to_check[numconv])
# Split the input string by numbers.
# If the input is not a string, TypeError is raised.
@@ -131,24 +164,24 @@ def _number_extracter(s, regex, numconv, py3_safe, use_locale, group_letters):
# Take into account locale if needed, and group letters if needed.
# Remove empty strings from the list.
if use_locale:
- s = [locale_convert(x, numconv, group_letters) for x in s if x]
+ s = [locale_convert(x, conv_check, group_letters) for x in s if x]
elif group_letters:
- s = [grouper(x, numconv) for x in s if x]
+ s = [grouper(x, conv_check) for x in s if x]
else:
s = [numconv(x) for x in s if x]
# If the list begins with a number, lead with an empty string.
# This is used to get around the "unorderable types" issue.
- if not s: # Return empty tuple for empty results.
- return ()
- elif isreal(s[0]):
+ if not s: # Return empty list for empty results.
+ return []
+ elif conv_check[1](s[0], num_only=True):
s = [null_string if use_locale else ''] + s
# The _py3_safe function inserts "" between numbers in the list,
# and is used to get around "unorderable types" in complex cases.
# It is a separate function that needs to be requested specifically
# because it is expensive to call.
- return _py3_safe(s, use_locale) if py3_safe else s
+ return _py3_safe(s, use_locale, conv_check[1]) if py3_safe else s
def _path_splitter(s, _d_match=re.compile(r'\.\d').match):
@@ -158,7 +191,7 @@ def _path_splitter(s, _d_match=re.compile(r'\.\d').match):
# Convert a pathlib PurePath object to a string.
if has_pathlib and isinstance(s, PurePath):
path_location = str(s)
- else:
+ else: # pragma: no cover
path_location = s
# Continue splitting the path from the back until we have reached
@@ -199,7 +232,7 @@ def _path_splitter(s, _d_match=re.compile(r'\.\d').match):
return path_parts + base_parts
-def _py3_safe(parsed_list, use_locale):
+def _py3_safe(parsed_list, use_locale, check):
"""Insert '' between two numbers."""
length = len(parsed_list)
if length < 2:
@@ -209,7 +242,7 @@ def _py3_safe(parsed_list, use_locale):
nl_append = new_list.append
for before, after in py23_zip(islice(parsed_list, 0, length-1),
islice(parsed_list, 1, None)):
- if isreal(before) and isreal(after):
+ if check(before, num_only=True) and check(after, num_only=True):
nl_append(null_string if use_locale else '')
nl_append(after)
return new_list
@@ -275,24 +308,45 @@ def _natsort_key(val, key, alg):
# Assume the input are strings, which is the most common case.
# Apply the string modification if needed.
+ orig_val = val
try:
- if alg & _ns['LOWERCASEFIRST']:
+ lowfirst = alg & _ns['LOWERCASEFIRST']
+ dumb = dumb_sort() if use_locale else False
+ if use_locale and dumb and not lowfirst:
+ val = val.swapcase() # Compensate for bad locale lib.
+ elif lowfirst and not (use_locale and dumb):
val = val.swapcase()
if alg & _ns['IGNORECASE']:
- val = val.lower()
- if use_locale and alg & _ns['UNGROUPLETTERS'] and val[0].isupper():
- val = ' ' + val
- return tuple(_number_extracter(val,
- regex,
- num_function,
- alg & _ns['TYPESAFE'],
- use_locale,
- alg & _ns['GROUPLETTERS']))
+ val = val.casefold() if PY_VERSION >= 3.3 else val.lower()
+ gl = alg & _ns['GROUPLETTERS']
+ ret = tuple(_number_extracter(val,
+ regex,
+ num_function,
+ alg & _ns['TYPESAFE'],
+ use_locale,
+ gl or (use_locale and dumb)))
+ # For UNGROUPLETTERS, so the high level grouping can occur
+ # based on the first letter of the string.
+ # Do no locale transformation of the characters.
+ if use_locale and alg & _ns['UNGROUPLETTERS']:
+ if not ret:
+ return (ret, ret)
+ elif ret[0] == null_string:
+ return ((b'' if use_pyicu else '',), ret)
+ elif dumb:
+ if lowfirst:
+ return ((orig_val[0].swapcase(),), ret)
+ else:
+ return ((orig_val[0],), ret)
+ else:
+ return ((val[0],), ret)
+ else:
+ return ret
except (TypeError, AttributeError):
# Check if it is a bytes type, and if so return as a
# one element tuple.
if type(val) in (bytes,):
- return (val,)
+ return (val.lower(),) if alg & _ns['IGNORECASE'] else (val,)
# If not strings, assume it is an iterable that must
# be parsed recursively. Do not apply the key recursively.
# If this string was split as a path, turn off 'PATH'.
diff --git a/setup.py b/setup.py
index a78bd14..c8c4d21 100644
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,11 @@ class PyTest(TestCommand):
import pytest
err1 = pytest.main(['--cov', 'natsort',
'--cov-report', 'term-missing',
- '--flakes', '--pep8'])
+ '--flakes',
+ '--pep8',
+ # '--failed',
+ # '-v',
+ ])
err2 = pytest.main(['--doctest-modules', 'natsort'])
err3 = pytest.main(['README.rst',
'docs/source/intro.rst',
@@ -56,9 +60,12 @@ except IOError:
REQUIRES = 'argparse' if sys.version[:3] in ('2.6', '3.0', '3.1') else ''
# Testing needs pytest, and mock if less than python 3.3
-TESTS_REQUIRE = ['pytest', 'pytest-pep8', 'pytest-flakes', 'pytest-cov']
+TESTS_REQUIRE = ['pytest', 'pytest-pep8', 'pytest-flakes',
+ 'pytest-cov', 'hypothesis']
if sys.version[0] == 2 or (sys.version[3] == '3' and int(sys.version[2]) < 3):
TESTS_REQUIRE.append('mock')
+if sys.version[0] == 2 or (sys.version[3] == '3' and int(sys.version[2]) < 4):
+ TESTS_REQUIRE.append('pathlib')
# The setup parameters
setup(
diff --git a/test_natsort/slow_splitters.py b/test_natsort/slow_splitters.py
new file mode 100644
index 0000000..6352dd7
--- /dev/null
+++ b/test_natsort/slow_splitters.py
@@ -0,0 +1,156 @@
+# -*- coding: utf-8 -*-
+"""Alternate versions of the splitting functions for testing."""
+from __future__ import unicode_literals
+
+import unicodedata
+from natsort.py23compat import PY_VERSION
+
+if PY_VERSION >= 3.0:
+ long = int
+
+
+def int_splitter(x, signed, safe, sep):
+ """Alternate (slow) method to split a string into numbers."""
+ if not x:
+ return []
+ all_digits = set('0123456789')
+ full_list, strings, nums = [], [], []
+ input_len = len(x)
+ for i, char in enumerate(x):
+ # If this character is a sign and the next is a number,
+ # start a new number.
+ if (i+1 < input_len and signed and
+ (char in '-+') and (x[i+1] in all_digits)):
+ # Reset any current string or number.
+ if strings:
+ full_list.append(''.join(strings))
+ if nums:
+ full_list.append(int(''.join(nums)))
+ strings = []
+ nums = [char]
+ # If this is a number, add to the number list.
+ elif char in all_digits:
+ nums.append(char)
+ # Reset any string.
+ if strings:
+ full_list.append(''.join(strings))
+ strings = []
+ # If this is a unicode digit, append directly to the full list.
+ elif char.isdigit():
+ # Reset any string or number.
+ if strings:
+ full_list.append(''.join(strings))
+ if nums:
+ full_list.append(int(''.join(nums)))
+ strings = []
+ nums = []
+ full_list.append(unicodedata.digit(char))
+ # Otherwise add to the string.
+ else:
+ strings.append(char)
+ # Reset any number.
+ if nums:
+ full_list.append(int(''.join(nums)))
+ nums = []
+ if nums:
+ full_list.append(int(''.join(nums)))
+ elif strings:
+ full_list.append(''.join(strings))
+ if safe:
+ full_list = sep_inserter(full_list, (int, long), sep)
+ if type(full_list[0]) in (int, long):
+ return [sep] + full_list
+ else:
+ return full_list
+
+
+def float_splitter(x, signed, exp, safe, sep):
+ """Alternate (slow) method to split a string into numbers."""
+ if not x:
+ return []
+ all_digits = set('0123456789')
+ full_list, strings, nums = [], [], []
+ input_len = len(x)
+ for i, char in enumerate(x):
+ # If this character is a sign and the next is a number,
+ # start a new number.
+ if (i+1 < input_len and
+ (signed or (i > 1 and exp and x[i-1] in 'eE' and
+ x[i-2] in all_digits)) and
+ (char in '-+') and (x[i+1] in all_digits)):
+ # Reset any current string or number.
+ if strings:
+ full_list.append(''.join(strings))
+ if nums and i > 0 and x[i-1] not in 'eE':
+ full_list.append(float(''.join(nums)))
+ nums = [char]
+ else:
+ nums.append(char)
+ strings = []
+ # If this is a number, add to the number list.
+ elif char in all_digits:
+ nums.append(char)
+ # Reset any string.
+ if strings:
+ full_list.append(''.join(strings))
+ strings = []
+ # If this is a decimal, add to the number list.
+ elif (i + 1 < input_len and char == '.' and x[i+1] in all_digits):
+ if nums and '.' in nums:
+ full_list.append(float(''.join(nums)))
+ nums = []
+ nums.append(char)
+ if strings:
+ full_list.append(''.join(strings))
+ strings = []
+ # If this is an exponent, add to the number list.
+ elif (i > 0 and i + 1 < input_len and exp and char in 'eE' and
+ x[i-1] in all_digits and x[i+1] in all_digits | set('+-')):
+ if 'e' in nums or 'E' in nums:
+ strings = [char]
+ full_list.append(float(''.join(nums)))
+ nums = []
+ else:
+ nums.append(char)
+ # If this is a unicode digit, append directly to the full list.
+ elif unicodedata.numeric(char, None) is not None:
+ # Reset any string or number.
+ if strings:
+ full_list.append(''.join(strings))
+ if nums:
+ full_list.append(float(''.join(nums)))
+ strings = []
+ nums = []
+ full_list.append(unicodedata.numeric(char))
+ # Otherwise add to the string.
+ else:
+ strings.append(char)
+ # Reset any number.
+ if nums:
+ full_list.append(float(''.join(nums)))
+ nums = []
+ if nums:
+ full_list.append(float(''.join(nums)))
+ elif strings:
+ full_list.append(''.join(strings))
+ # Fix a float that looks like a string.
+ fstrings = ('inf', 'infinity', '-inf', '-infinity',
+ '+inf', '+infinity', 'nan')
+ full_list = [float(y) if type(y) != float and y.lower() in fstrings else y
+ for y in full_list]
+ if safe:
+ full_list = sep_inserter(full_list, (float,), sep)
+ if type(full_list[0]) == float:
+ return [sep] + full_list
+ else:
+ return full_list
+
+
+def sep_inserter(x, t, sep):
+ # Simulates the py3_safe function.
+ ret = [x[0]]
+ for i, y in enumerate(x[1:]):
+ if type(y) in t and type(x[i]) in t:
+ ret.append(sep)
+ ret.append(y)
+ return ret
diff --git a/test_natsort/test_fake_fastnumbers.py b/test_natsort/test_fake_fastnumbers.py
index 5aedadb..ff7e42c 100644
--- a/test_natsort/test_fake_fastnumbers.py
+++ b/test_natsort/test_fake_fastnumbers.py
@@ -2,34 +2,136 @@
"""\
Test the fake fastnumbers module.
"""
-from natsort.fake_fastnumbers import fast_float, fast_int, isreal
+from __future__ import unicode_literals
+import unicodedata
+from math import isnan
+from hypothesis import given, assume
+from natsort.fake_fastnumbers import fast_float, fast_int, isfloat, isint
+from natsort.py23compat import py23_str
-def test_fast_float_converts_float_string_to_float():
+
+def is_float(x):
+ try:
+ float(x)
+ except ValueError:
+ try:
+ unicodedata.numeric(x)
+ except (ValueError, TypeError):
+ return False
+ else:
+ return True
+ else:
+ return True
+
+
+def is_int(x):
+ try:
+ int(x)
+ except ValueError:
+ try:
+ unicodedata.digit(x)
+ except (ValueError, TypeError):
+ return False
+ else:
+ return True
+ else:
+ return True
+
+
+# Each test has an "example" version for demonstrative purposes,
+# and a test that uses the hypothesis module.
+
+def test_fast_float_converts_float_string_to_float_example():
assert fast_float('45.8') == 45.8
assert fast_float('-45') == -45.0
assert fast_float('45.8e-2') == 45.8e-2
+ assert isnan(fast_float('nan'))
+
+
+@given(float)
+def test_fast_float_converts_float_string_to_float(x):
+ assume(not isnan(x)) # But inf is included
+ assert fast_float(repr(x)) == x
-def test_fast_float_leaves_string_as_is():
+def test_fast_float_leaves_string_as_is_example():
assert fast_float('invalid') == 'invalid'
-def test_fast_int_leaves_float_string_as_is():
+@given(py23_str)
+def test_fast_float_leaves_string_as_is(x):
+ assume(not is_float(x))
+ assert fast_float(x) == x
+
+
+def test_fast_int_leaves_float_string_as_is_example():
assert fast_int('45.8') == '45.8'
+ assert fast_int('nan') == 'nan'
+ assert fast_int('inf') == 'inf'
+
+
+@given(float)
+def test_fast_int_leaves_float_string_as_is(x):
+ assume(not x.is_integer())
+ assert fast_int(repr(x)) == repr(x)
-def test_fast_int_converts_int_string_to_int():
+def test_fast_int_converts_int_string_to_int_example():
assert fast_int('-45') == -45
assert fast_int('+45') == 45
-def test_fast_int_leaves_string_as_is():
+@given(int)
+def test_fast_int_converts_int_string_to_int(x):
+ assert fast_int(repr(x)) == x
+
+
+def test_fast_int_leaves_string_as_is_example():
assert fast_int('invalid') == 'invalid'
-def test_isreal_returns_True_for_real_numbers_False_for_strings():
- assert isreal(-45)
- assert isreal(45.8e-2)
- assert not isreal('45.8')
- assert not isreal('invalid')
+@given(py23_str)
+def test_fast_int_leaves_string_as_is(x):
+ assume(not is_int(x))
+ assert fast_int(x) == x
+
+
+def test_isfloat_returns_True_for_real_numbers_example():
+ assert isfloat(-45.0)
+ assert isfloat(45.8e-2)
+
+
+@given(float)
+def test_isfloat_returns_True_for_real_numbers(x):
+ assert isfloat(x)
+
+
+def test_isfloat_returns_False_for_strings_example():
+ assert not isfloat('45.8')
+ assert not isfloat('invalid')
+
+
+@given(py23_str)
+def test_isfloat_returns_False_for_strings(x):
+ assert not isfloat(x)
+
+
+def test_isint_returns_True_for_real_numbers_example():
+ assert isint(-45)
+ assert isint(45)
+
+
+@given(int)
+def test_isint_returns_True_for_real_numbers(x):
+ assert isint(x)
+
+
+def test_isint_returns_False_for_strings_example():
+ assert not isint('45')
+ assert not isint('invalid')
+
+
+@given(py23_str)
+def test_isint_returns_False_for_strings(x):
+ assert not isint(x)
diff --git a/test_natsort/test_locale_help.py b/test_natsort/test_locale_help.py
index 5d69408..95c3000 100644
--- a/test_natsort/test_locale_help.py
+++ b/test_natsort/test_locale_help.py
@@ -2,9 +2,15 @@
"""\
Test the locale help module module.
"""
+from __future__ import unicode_literals
+
import locale
-from natsort.fake_fastnumbers import fast_float
+from math import isnan
+from itertools import chain
+from natsort.fake_fastnumbers import fast_float, isfloat, isint
from natsort.locale_help import grouper, locale_convert, use_pyicu
+from natsort.py23compat import py23_str
+from hypothesis import given, assume, example
if use_pyicu:
from natsort.locale_help import get_pyicu_transform
@@ -14,49 +20,125 @@ else:
from natsort.locale_help import strxfrm
-def test_grouper_returns_letters_with_lowercase_transform_of_letter():
- assert grouper('HELLO', fast_float) == 'hHeElLlLoO'
- assert grouper('hello', fast_float) == 'hheelllloo'
+def load_locale(x):
+ try:
+ locale.setlocale(locale.LC_ALL, str('{}.ISO8859-1'.format(x)))
+ except:
+ locale.setlocale(locale.LC_ALL, str('{}.UTF-8'.format(x)))
+
+
+# Each test has an "example" version for demonstrative purposes,
+# and a test that uses the hypothesis module.
+
+
+def test_grouper_returns_letters_with_lowercase_transform_of_letter_example():
+ assert grouper('HELLO', (fast_float, isfloat)) == 'hHeElLlLoO'
+ assert grouper('hello', (fast_float, isfloat)) == 'hheelllloo'
+
+
+@given(py23_str)
+def test_grouper_returns_letters_with_lowercase_transform_of_letter(x):
+ assume(type(fast_float(x)) is not float)
+ try:
+ low = py23_str.casefold
+ except AttributeError:
+ low = py23_str.lower
+ assert grouper(x, (fast_float, isfloat)) == ''.join(chain.from_iterable([low(y), y] for y in x))
+
+
+def test_grouper_returns_float_string_as_float_example():
+ assert grouper('45.8e-2', (fast_float, isfloat)) == 45.8e-2
-def test_grouper_returns_float_string_as_float():
- assert grouper('45.8e-2', fast_float) == 45.8e-2
+@given(float)
+def test_grouper_returns_float_string_as_float(x):
+ assume(not isnan(x))
+ assert grouper(repr(x), (fast_float, isfloat)) == x
-def test_locale_convert_transforms_float_string_to_float():
- locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8')
- assert locale_convert('45.8', fast_float, False) == 45.8
+def test_locale_convert_transforms_float_string_to_float_example():
+ load_locale('en_US')
+ assert locale_convert('45.8', (fast_float, isfloat), False) == 45.8
locale.setlocale(locale.LC_NUMERIC, str(''))
-def test_locale_convert_transforms_nonfloat_string_to_strxfrm_string():
- locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8')
+@given(float)
+def test_locale_convert_transforms_float_string_to_float(x):
+ assume(not isnan(x))
+ load_locale('en_US')
+ assert locale_convert(repr(x), (fast_float, isfloat), False) == x
+ locale.setlocale(locale.LC_NUMERIC, str(''))
+
+
+def test_locale_convert_transforms_nonfloat_string_to_strxfrm_string_example():
+ load_locale('en_US')
if use_pyicu:
from natsort.locale_help import get_pyicu_transform
from locale import getlocale
strxfrm = get_pyicu_transform(getlocale())
else:
from natsort.locale_help import strxfrm
- assert locale_convert('45,8', fast_float, False) == strxfrm('45,8')
- assert locale_convert('hello', fast_float, False) == strxfrm('hello')
+ assert locale_convert('45,8', (fast_float, isfloat), False) == strxfrm('45,8')
+ assert locale_convert('hello', (fast_float, isfloat), False) == strxfrm('hello')
locale.setlocale(locale.LC_NUMERIC, str(''))
-def test_locale_convert_with_groupletters_transforms_nonfloat_string_to_strxfrm_string_with_grouped_letters():
- locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8')
+@given(py23_str)
+def test_locale_convert_transforms_nonfloat_string_to_strxfrm_string(x):
+ assume(type(fast_float(x)) is not float)
+ load_locale('en_US')
if use_pyicu:
from natsort.locale_help import get_pyicu_transform
from locale import getlocale
strxfrm = get_pyicu_transform(getlocale())
else:
from natsort.locale_help import strxfrm
- assert locale_convert('hello', fast_float, True) == strxfrm('hheelllloo')
- assert locale_convert('45,8', fast_float, True) == strxfrm('4455,,88')
+ assert locale_convert(x, (fast_float, isfloat), False) == strxfrm(x)
+ locale.setlocale(locale.LC_NUMERIC, str(''))
+
+
+def test_locale_convert_with_groupletters_transforms_nonfloat_string_to_strxfrm_string_with_grouped_letters_example():
+ load_locale('en_US')
+ if use_pyicu:
+ from natsort.locale_help import get_pyicu_transform
+ from locale import getlocale
+ strxfrm = get_pyicu_transform(getlocale())
+ else:
+ from natsort.locale_help import strxfrm
+ assert locale_convert('hello', (fast_float, isfloat), True) == strxfrm('hheelllloo')
+ assert locale_convert('45,8', (fast_float, isfloat), True) == strxfrm('4455,,88')
+ locale.setlocale(locale.LC_NUMERIC, str(''))
+
+
+@given(py23_str)
+def test_locale_convert_with_groupletters_transforms_nonfloat_string_to_strxfrm_string_with_grouped_letters(x):
+ assume(type(fast_float(x)) is not float)
+ load_locale('en_US')
+ if use_pyicu:
+ from natsort.locale_help import get_pyicu_transform
+ from locale import getlocale
+ strxfrm = get_pyicu_transform(getlocale())
+ else:
+ from natsort.locale_help import strxfrm
+ try:
+ low = py23_str.casefold
+ except AttributeError:
+ low = py23_str.lower
+ assert locale_convert(x, (fast_float, isfloat), True) == strxfrm(''.join(chain.from_iterable([low(y), y] for y in x)))
+ locale.setlocale(locale.LC_NUMERIC, str(''))
+
+
+def test_locale_convert_transforms_float_string_to_float_with_de_locale_example():
+ load_locale('de_DE')
+ assert locale_convert('45.8', (fast_float, isfloat), False) == 45.8
+ assert locale_convert('45,8', (fast_float, isfloat), False) == 45.8
locale.setlocale(locale.LC_NUMERIC, str(''))
-def test_locale_convert_transforms_float_string_to_float_with_de_locale():
- locale.setlocale(locale.LC_NUMERIC, 'de_DE.UTF-8')
- assert locale_convert('45.8', fast_float, False) == 45.8
- assert locale_convert('45,8', fast_float, False) == 45.8
+@given(float)
+def test_locale_convert_transforms_float_string_to_float_with_de_locale(x):
+ assume(not isnan(x))
+ load_locale('de_DE')
+ assert locale_convert(repr(x), (fast_float, isfloat), False) == x
+ assert locale_convert(repr(x).replace('.', ','), (fast_float, isfloat), False) == x
locale.setlocale(locale.LC_NUMERIC, str(''))
diff --git a/test_natsort/test_main.py b/test_natsort/test_main.py
index 0416c89..0735f6d 100644
--- a/test_natsort/test_main.py
+++ b/test_natsort/test_main.py
@@ -2,10 +2,12 @@
"""\
Test the natsort command-line tool functions.
"""
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
import re
import sys
from pytest import raises
+from hypothesis import given, assume
+from hypothesis.specifiers import integers_in_range, integers_from, sampled_from
try:
from unittest.mock import patch, call
except ImportError:
@@ -13,6 +15,7 @@ except ImportError:
from natsort.__main__ import main, range_check, check_filter
from natsort.__main__ import keep_entry_range, exclude_entry
from natsort.__main__ import sort_and_print_entries
+from natsort.py23compat import py23_str
def test_main_passes_default_arguments_with_no_command_line_options():
@@ -25,8 +28,8 @@ def test_main_passes_default_arguments_with_no_command_line_options():
assert args.reverse_filter is None
assert args.exclude is None
assert not args.reverse
- assert args.number_type == 'float'
- assert args.signed
+ assert args.number_type == 'int'
+ assert not args.signed
assert args.exp
assert not args.locale
@@ -36,8 +39,7 @@ def test_main_passes_arguments_with_all_command_line_options():
sys.argv[1:] = ['--paths', '--reverse', '--locale',
'--filter', '4', '10',
'--reverse-filter', '100', '110',
- '--number-type', 'int',
- '--nosign', '--noexp',
+ '--number-type', 'float', '--noexp', '--sign',
'--exclude', '34', '--exclude', '35',
'num-2', 'num-6', 'num-1']
main()
@@ -47,60 +49,12 @@ def test_main_passes_arguments_with_all_command_line_options():
assert args.reverse_filter == [(100.0, 110.0)]
assert args.exclude == [34, 35]
assert args.reverse
- assert args.number_type == 'int'
- assert not args.signed
+ assert args.number_type == 'float'
+ assert args.signed
assert not args.exp
assert args.locale
-def test_range_check_returns_range_as_is_but_with_floats():
- assert range_check(10, 11) == (10.0, 11.0)
- assert range_check(6.4, 30) == (6.4, 30.0)
-
-
-def test_range_check_raises_ValueError_if_range_is_invalid():
- with raises(ValueError) as err:
- range_check(7, 2)
- assert str(err.value) == 'low >= high'
-
-
-def test_check_filter_returns_None_if_filter_evaluates_to_False():
- assert check_filter(()) is None
- assert check_filter(False) is None
- assert check_filter(None) is None
-
-
-def test_check_filter_converts_filter_numbers_to_floats_if_filter_is_valid():
- assert check_filter([(6, 7)]) == [(6.0, 7.0)]
- assert check_filter([(6, 7), (2, 8)]) == [(6.0, 7.0), (2.0, 8.0)]
-
-
-def test_check_filter_raises_ValueError_if_filter_is_invalid():
- with raises(ValueError) as err:
- check_filter([(7, 2)])
- assert str(err.value) == 'Error in --filter: low >= high'
-
-
-def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_the_range_bounds():
- assert keep_entry_range('a56b23c89', [0], [100], int, re.compile(r'\d+'))
-
-
-def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_any_range_bounds():
- assert keep_entry_range('a56b23c89', [1, 88], [20, 90], int, re.compile(r'\d+'))
-
-
-def test_keep_entry_range_returns_False_if_no_portion_of_input_is_between_the_range_bounds():
- assert not keep_entry_range('a56b23c89', [1], [20], int, re.compile(r'\d+'))
-
-
-def test_exclude_entry_returns_True_if_exlcude_parameters_are_not_in_input():
- assert exclude_entry('a56b23c89', [100, 45], int, re.compile(r'\d+'))
-
-
-def test_exclude_entry_returns_False_if_exlcude_parameters_are_in_input():
- assert not exclude_entry('a56b23c89', [23], int, re.compile(r'\d+'))
-
-
class Args:
"""A dummy class to simulate the argparse Namespace object"""
def __init__(self, filter, reverse_filter, exclude, as_path, reverse):
@@ -198,3 +152,123 @@ def test_sort_and_print_entries_reverses_order_with_reverse_option():
sort_and_print_entries(entries, Args(None, None, False, True, True))
e = [call(entries[i]) for i in reversed([2, 3, 1, 0, 5, 6, 4])]
p.assert_has_calls(e)
+
+
+# Each test has an "example" version for demonstrative purposes,
+# and a test that uses the hypothesis module.
+
+def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second_example():
+ assert range_check(10, 11) == (10.0, 11.0)
+ assert range_check(6.4, 30) == (6.4, 30.0)
+
+
+@given(x=int, y=int)
+def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second(x, y):
+ assume(x < y)
+ assert range_check(x, y) == (float(x), float(y))
+
+
+@given(x=float, y=float)
+def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second2(x, y):
+ assume(x < y)
+ assert range_check(x, y) == (x, y)
+
+
+def test_range_check_raises_ValueError_if_second_is_less_than_first_example():
+ with raises(ValueError) as err:
+ range_check(7, 2)
+ assert str(err.value) == 'low >= high'
+
+
+@given(x=float, y=float)
+def test_range_check_raises_ValueError_if_second_is_less_than_first(x, y):
+ assume(x >= y)
+ with raises(ValueError) as err:
+ range_check(x, x)
+ assert str(err.value) == 'low >= high'
+
+
+def test_check_filter_returns_None_if_filter_evaluates_to_False():
+ assert check_filter(()) is None
+ assert check_filter(False) is None
+ assert check_filter(None) is None
+
+
+def test_check_filter_converts_filter_numbers_to_floats_if_filter_is_valid_example():
+ assert check_filter([(6, 7)]) == [(6.0, 7.0)]
+ assert check_filter([(6, 7), (2, 8)]) == [(6.0, 7.0), (2.0, 8.0)]
+
+
+@given(x=(int, int, float, float), y=(int, float, float, int))
+def test_check_filter_converts_filter_numbers_to_floats_if_filter_is_valid(x, y):
+ assume(all(i < j for i, j in zip(x, y)))
+ assert check_filter(list(zip(x, y))) == [(float(i), float(j)) for i, j in zip(x, y)]
+
+
+def test_check_filter_raises_ValueError_if_filter_is_invalid_example():
+ with raises(ValueError) as err:
+ check_filter([(7, 2)])
+ assert str(err.value) == 'Error in --filter: low >= high'
+
+
+@given(x=(int, int, float, float), y=(int, float, float, int))
+def test_check_filter_raises_ValueError_if_filter_is_invalid(x, y):
+ assume(any(i >= j for i, j in zip(x, y)))
+ with raises(ValueError) as err:
+ check_filter(list(zip(x, y)))
+ assert str(err.value) == 'Error in --filter: low >= high'
+
+
+def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_the_range_bounds_example():
+ assert keep_entry_range('a56b23c89', [0], [100], int, re.compile(r'\d+'))
+
+
+@given((py23_str, integers_in_range(1, 99), py23_str, integers_in_range(1, 99), py23_str))
+def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_the_range_bounds(x):
+ s = ''.join(map(py23_str, x))
+ assume(any(0 < int(i) < 100 for i in re.findall(r'\d+', s) if re.match(r'\d+$', i)))
+ assert keep_entry_range(s, [0], [100], int, re.compile(r'\d+'))
+
+
+def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_any_range_bounds_example():
+ assert keep_entry_range('a56b23c89', [1, 88], [20, 90], int, re.compile(r'\d+'))
+
+
+@given((py23_str, integers_in_range(2, 89), py23_str, integers_in_range(2, 89), py23_str))
+def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_any_range_bounds(x):
+ s = ''.join(map(py23_str, x))
+ assume(any((1 < int(i) < 20) or (88 < int(i) < 90) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i)))
+ assert keep_entry_range(s, [1, 88], [20, 90], int, re.compile(r'\d+'))
+
+
+def test_keep_entry_range_returns_False_if_no_portion_of_input_is_between_the_range_bounds_example():
+ assert not keep_entry_range('a56b23c89', [1], [20], int, re.compile(r'\d+'))
+
+
+@given((py23_str, integers_from(21), py23_str, integers_from(21), py23_str))
+def test_keep_entry_range_returns_False_if_no_portion_of_input_is_between_the_range_bounds(x):
+ s = ''.join(map(py23_str, x))
+ assume(all(not (1 <= int(i) <= 20) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i)))
+ assert not keep_entry_range(s, [1], [20], int, re.compile(r'\d+'))
+
+
+def test_exclude_entry_returns_True_if_exlcude_parameters_are_not_in_input_example():
+ assert exclude_entry('a56b23c89', [100, 45], int, re.compile(r'\d+'))
+
+
+@given((py23_str, integers_from(0), py23_str, integers_from(0), py23_str))
+def test_exclude_entry_returns_True_if_exlcude_parameters_are_not_in_input(x):
+ s = ''.join(map(py23_str, x))
+ assume(not any(int(i) in (23, 45, 87) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i)))
+ assert exclude_entry(s, [23, 45, 87], int, re.compile(r'\d+'))
+
+
+def test_exclude_entry_returns_False_if_exlcude_parameters_are_in_input_example():
+ assert not exclude_entry('a56b23c89', [23], int, re.compile(r'\d+'))
+
+
+@given((py23_str, sampled_from([23, 45, 87]), py23_str, sampled_from([23, 45, 87]), py23_str))
+def test_exclude_entry_returns_False_if_exlcude_parameters_are_in_input(x):
+ s = ''.join(map(py23_str, x))
+ assume(any(int(i) in (23, 45, 87) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i)))
+ assert not exclude_entry(s, [23, 45, 87], int, re.compile(r'\d+'))
diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py
index 78a3eaa..329d39f 100644
--- a/test_natsort/test_natsort.py
+++ b/test_natsort/test_natsort.py
@@ -15,6 +15,13 @@ from natsort import realsorted, index_realsorted, decoder, as_ascii, as_utf8
from natsort.utils import _natsort_key
+def load_locale(x):
+ try:
+ locale.setlocale(locale.LC_ALL, str('{}.ISO8859-1'.format(x)))
+ except:
+ locale.setlocale(locale.LC_ALL, str('{}.UTF-8'.format(x)))
+
+
def test_decoder_returns_function_that_can_decode_bytes_but_return_non_bytes_as_is():
f = decoder('latin1')
a = 'bytes'
@@ -41,7 +48,7 @@ def test_natsort_key_public_raises_DeprecationWarning_when_called():
# But it raises a deprecation warning
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
- assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2', key=None, alg=ns.F)
+ assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2', key=None, alg=ns.I)
assert len(w) == 1
assert "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" in str(w[-1].message)
# It is called for each element in a list when sorting
@@ -54,8 +61,8 @@ def test_natsort_key_public_raises_DeprecationWarning_when_called():
def test_natsort_keygen_returns_natsort_key_with_alg_option():
a = 'a-5.034e1'
- assert natsort_keygen()(a) == _natsort_key(a, None, ns.F)
- assert natsort_keygen(alg=ns.I | ns.U)(a) == _natsort_key(a, None, ns.I | ns.U)
+ assert natsort_keygen()(a) == _natsort_key(a, None, ns.I)
+ assert natsort_keygen(alg=ns.F | ns.S)(a) == _natsort_key(a, None, ns.F | ns.S)
def test_natsort_keygen_with_key_returns_same_result_as_nested_lambda_with_bare_natsort_key():
@@ -63,15 +70,15 @@ def test_natsort_keygen_with_key_returns_same_result_as_nested_lambda_with_bare_
f1 = natsort_keygen(key=lambda x: x.upper())
def f2(x):
- return _natsort_key(x, lambda y: y.upper(), ns.F)
+ return _natsort_key(x, lambda y: y.upper(), ns.I)
assert f1(a) == f2(a)
def test_natsort_keygen_returns_key_that_can_be_used_to_sort_list_in_place_with_same_result_as_natsorted():
a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300']
b = a[:]
- a.sort(key=natsort_keygen(alg=ns.I))
- assert a == natsorted(b, alg=ns.I)
+ a.sort(key=natsort_keygen(alg=ns.F))
+ assert a == natsorted(b, alg=ns.F)
def test_natsorted_returns_strings_with_numbers_in_ascending_order():
@@ -80,42 +87,48 @@ def test_natsorted_returns_strings_with_numbers_in_ascending_order():
def test_natsorted_returns_list_of_numbers_sorted_as_signed_floats_with_exponents():
- a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300']
- assert natsorted(a) == ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.']
+ a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
+ assert natsorted(a, alg=ns.REAL) == ['a-50', 'a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.']
-def test_natsorted_returns_list_of_numbers_sorted_as_signed_floats_without_exponents_with_NOEXP_option():
- a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300']
- assert natsorted(a, alg=ns.NOEXP | ns.FLOAT) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.']
+def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_floats_without_exponents_with_NOEXP_option():
+ a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
+ assert natsorted(a, alg=ns.N | ns.F | ns.U) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.', 'a-50']
+ # UNSIGNED is default
+ assert natsorted(a, alg=ns.NOEXP | ns.FLOAT) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.', 'a-50']
-def test_natsorted_returns_list_of_numbers_sorted_as_signed_ints_with_INT_option():
- a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300']
- assert natsorted(a, alg=ns.INT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.']
+def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_INT_option():
+ a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
+ assert natsorted(a, alg=ns.INT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50']
+ # INT is default
+ assert natsorted(a) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50']
-def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_DIGIT_option():
- a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300']
- assert natsorted(a, alg=ns.DIGIT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.']
+def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_DIGIT_and_VERSION_option():
+ a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
+ assert natsorted(a, alg=ns.DIGIT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50']
+ assert natsorted(a, alg=ns.VERSION) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50']
-def test_natsorted_returns_list_of_numbers_sorted_without_accounting_for_sign_with_UNSIGNED_option():
- a = ['a-5', 'a7', 'a+2']
- assert natsorted(a, alg=ns.UNSIGNED) == ['a7', 'a+2', 'a-5']
+def test_natsorted_returns_list_of_numbers_sorted_as_signed_ints_with_SIGNED_option():
+ a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
+ assert natsorted(a, alg=ns.SIGNED) == ['a-50', 'a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.']
-def test_natsorted_returns_list_of_numbers_sorted_accounting_for_sign_without_UNSIGNED_option():
+def test_natsorted_returns_list_of_numbers_sorted_accounting_for_sign_with_SIGNED_option():
a = ['a-5', 'a7', 'a+2']
- assert natsorted(a) == ['a-5', 'a+2', 'a7']
+ assert natsorted(a, alg=ns.SIGNED) == ['a-5', 'a+2', 'a7']
-def test_natsorted_returns_list_of_version_numbers_improperly_sorted_without_VERSION_option():
- a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
- assert natsorted(a) == ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b']
+def test_natsorted_returns_list_of_numbers_sorted_not_accounting_for_sign_without_SIGNED_option():
+ a = ['a-5', 'a7', 'a+2']
+ assert natsorted(a) == ['a7', 'a+2', 'a-5']
-def test_natsorted_returns_sorted_list_of_version_numbers_with_VERSION_option():
+def test_natsorted_returns_sorted_list_of_version_numbers_by_default_or_with_VERSION_option():
a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
+ assert natsorted(a) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4']
assert natsorted(a, alg=ns.VERSION) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4']
@@ -233,27 +246,48 @@ def test_natsorted_with_IGNORECASE_sorts_without_regard_to_case_for_nested_input
def test_natsorted_with_LOCALE_returns_results_sorted_by_lowercase_first_and_grouped_letters():
a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
- locale.setlocale(locale.LC_ALL, str('en_US.UTF-8'))
+ load_locale('en_US')
assert natsorted(a, alg=ns.LOCALE) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn']
locale.setlocale(locale.LC_ALL, str(''))
+def test_natsorted_with_LOCALE_and_CAPITALFIRST_returns_results_sorted_by_capital_first_and_ungrouped():
+ a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
+ load_locale('en_US')
+ assert natsorted(a, alg=ns.LOCALE | ns.CAPITALFIRST) == ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn']
+ locale.setlocale(locale.LC_ALL, str(''))
+
+
+def test_natsorted_with_LOCALE_and_LOWERCASEFIRST_returns_results_sorted_by_uppercase_first_and_grouped_letters():
+ a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
+ load_locale('en_US')
+ assert natsorted(a, alg=ns.LOCALE | ns.LOWERCASEFIRST) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn']
+ locale.setlocale(locale.LC_ALL, str(''))
+
+
+def test_natsorted_with_LOCALE_and_CAPITALFIRST_and_LOWERCASE_returns_results_sorted_by_capital_last_and_ungrouped():
+ a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
+ load_locale('en_US')
+ assert natsorted(a, alg=ns.LOCALE | ns.CAPITALFIRST | ns.LOWERCASEFIRST) == ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn']
+ locale.setlocale(locale.LC_ALL, str(''))
+
+
def test_natsorted_with_LOCALE_and_en_setting_returns_results_sorted_by_en_language():
- locale.setlocale(locale.LC_ALL, str('en_US.UTF-8'))
+ load_locale('en_US')
a = ['c', 'ä', 'b', 'a5,6', 'a5,50']
- assert natsorted(a, alg=ns.LOCALE) == ['a5,6', 'a5,50', 'ä', 'b', 'c']
+ assert natsorted(a, alg=ns.LOCALE | ns.F) == ['a5,6', 'a5,50', 'ä', 'b', 'c']
locale.setlocale(locale.LC_ALL, str(''))
def test_natsorted_with_LOCALE_and_de_setting_returns_results_sorted_by_de_language():
- locale.setlocale(locale.LC_ALL, str('de_DE.UTF-8'))
+ load_locale('de_DE')
a = ['c', 'ä', 'b', 'a5,6', 'a5,50']
- assert natsorted(a, alg=ns.LOCALE) == ['a5,50', 'a5,6', 'ä', 'b', 'c']
+ assert natsorted(a, alg=ns.LOCALE | ns.F) == ['a5,50', 'a5,6', 'ä', 'b', 'c']
locale.setlocale(locale.LC_ALL, str(''))
def test_natsorted_with_LOCALE_and_mixed_input_returns_sorted_results_without_error():
- locale.setlocale(locale.LC_ALL, str('en_US.UTF-8'))
+ load_locale('en_US')
a = ['0', 'Á', '2', 'Z']
assert natsorted(a) == ['0', '2', 'Z', 'Á']
a = ['2', 'ä', 'b', 1.5, 3]
@@ -261,9 +295,15 @@ def test_natsorted_with_LOCALE_and_mixed_input_returns_sorted_results_without_er
locale.setlocale(locale.LC_ALL, str(''))
-def test_versorted_returns_results_identical_to_natsorted_with_VERSION():
+def test_versorted_returns_results_identical_to_natsorted():
a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
- assert versorted(a) == natsorted(a, alg=ns.VERSION)
+ # versorted is retained for backwards compatibility
+ assert versorted(a) == natsorted(a)
+
+
+def test_realsorted_returns_results_identical_to_natsorted_with_REAL():
+ a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
+ assert realsorted(a) == natsorted(a, alg=ns.REAL)
def test_humansorted_returns_results_identical_to_natsorted_with_LOCALE():
@@ -271,11 +311,6 @@ def test_humansorted_returns_results_identical_to_natsorted_with_LOCALE():
assert humansorted(a) == natsorted(a, alg=ns.LOCALE)
-def test_realsorted_returns_results_identical_to_natsorted():
- a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300']
- assert realsorted(a) == natsorted(a)
-
-
def test_index_natsorted_returns_integer_list_of_sort_order_for_input_list():
a = ['num3', 'num5', 'num2']
b = ['foo', 'bar', 'baz']
@@ -312,9 +347,15 @@ def test_index_natsorted_returns_integer_list_in_proper_order_for_input_paths_wi
assert index_natsorted(a, alg=ns.PATH) == [1, 2, 0]
-def test_index_versorted_returns_results_identical_to_index_natsorted_with_VERSION():
+def test_index_versorted_returns_results_identical_to_index_natsorted():
a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
- assert index_versorted(a) == index_natsorted(a, alg=ns.VERSION)
+ # index_versorted is retained for backwards compatibility
+ assert index_versorted(a) == index_natsorted(a)
+
+
+def test_index_realsorted_returns_results_identical_to_index_natsorted_with_REAL():
+ a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
+ assert index_realsorted(a) == index_natsorted(a, alg=ns.REAL)
def test_index_humansorted_returns_results_identical_to_index_natsorted_with_LOCALE():
@@ -322,11 +363,6 @@ def test_index_humansorted_returns_results_identical_to_index_natsorted_with_LOC
assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE)
-def test_index_realsorted_returns_results_identical_to_index_natsorted():
- a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300']
- assert index_realsorted(a) == index_natsorted(a)
-
-
def test_order_by_index_sorts_list_according_to_order_of_integer_list():
a = ['num3', 'num5', 'num2']
index = [2, 0, 1]
diff --git a/test_natsort/test_unicode_numbers.py b/test_natsort/test_unicode_numbers.py
new file mode 100644
index 0000000..f3e8de7
--- /dev/null
+++ b/test_natsort/test_unicode_numbers.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+"""\
+Test the Unicode numbers module.
+"""
+from __future__ import unicode_literals
+import unicodedata
+from natsort.py23compat import py23_range, py23_unichr
+from natsort.unicode_numbers import numeric_chars, numeric, digit_chars, digits
+
+
+def test_numeric_chars_contains_only_valid_unicode_numeric_characters():
+ for a in numeric_chars:
+ assert unicodedata.numeric(a, None) is not None
+
+
+def test_digit_chars_contains_only_valid_unicode_digit_characters():
+ for a in digit_chars:
+ assert unicodedata.digit(a, None) is not None
+
+
+def test_numeric_chars_contains_all_valid_unicode_numeric_characters():
+ for i in py23_range(0X10FFFF):
+ try:
+ a = py23_unichr(i)
+ except ValueError:
+ break
+ if a in set('0123456789'):
+ continue
+ if unicodedata.numeric(a, None) is not None:
+ assert a in numeric_chars
+
+
+def test_digit_chars_contains_all_valid_unicode_digit_characters():
+ for i in py23_range(0X10FFFF):
+ try:
+ a = py23_unichr(i)
+ except ValueError:
+ break
+ if a in set('0123456789'):
+ continue
+ if unicodedata.digit(a, None) is not None:
+ assert a in digit_chars
+
+
+def test_combined_string_contains_all_characters_in_list():
+ assert numeric == ''.join(numeric_chars)
+ assert digits == ''.join(digit_chars)
diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py
index 01fe6a3..16cf411 100644
--- a/test_natsort/test_utils.py
+++ b/test_natsort/test_utils.py
@@ -4,26 +4,43 @@ from __future__ import unicode_literals
import sys
import locale
+import pathlib
+import string
+from math import isnan
from operator import itemgetter
+from itertools import chain
from pytest import raises
+from hypothesis import given, assume, example
+from hypothesis.specifiers import sampled_from
from natsort.ns_enum import ns
from natsort.utils import _number_extracter, _py3_safe, _natsort_key, _args_to_enum
from natsort.utils import _float_sign_exp_re, _float_nosign_exp_re, _float_sign_noexp_re
from natsort.utils import _float_nosign_noexp_re, _int_nosign_re, _int_sign_re, _do_decoding
-from natsort.locale_help import use_pyicu, null_string
+from natsort.utils import _path_splitter
+from natsort.locale_help import use_pyicu, null_string, locale_convert, dumb_sort
from natsort.py23compat import py23_str
+from slow_splitters import int_splitter, float_splitter, sep_inserter
try:
- from fastnumbers import fast_float, fast_int
+ from fastnumbers import fast_float, fast_int, isint
+ import fastnumbers
+ v = list(map(int, fastnumbers.__version__.split('.')))
+ if not (v[0] >= 0 and v[1] >= 5): # Require >= version 0.5.0.
+ raise ImportError
except ImportError:
- from natsort.fake_fastnumbers import fast_float, fast_int
+ from natsort.fake_fastnumbers import fast_float, fast_int, isint
+
+if sys.version[0] == '3':
+ long = int
+
+ichain = chain.from_iterable
-try:
- import pathlib
-except ImportError:
- has_pathlib = False
-else:
- has_pathlib = True
+
+def load_locale(x):
+ try:
+ locale.setlocale(locale.LC_ALL, str('{}.ISO8859-1'.format(x)))
+ except:
+ locale.setlocale(locale.LC_ALL, str('{}.UTF-8'.format(x)))
def test_do_decoding_decodes_bytes_string_to_unicode():
@@ -32,280 +49,524 @@ def test_do_decoding_decodes_bytes_string_to_unicode():
assert _do_decoding(b'bytes', 'ascii') == b'bytes'.decode('ascii')
+def test_args_to_enum_raises_TypeError_for_invalid_argument():
+ with raises(TypeError):
+ _args_to_enum(**{'alf': 0})
+
+
def test_args_to_enum_converts_signed_exp_float_to_ns_F():
# number_type, signed, exp, as_path, py3_safe
- assert _args_to_enum(float, True, True, False, False) == ns.F
+ assert _args_to_enum(**{'number_type': float,
+ 'signed': True,
+ 'exp': True}) == ns.F | ns.S
def test_args_to_enum_converts_signed_noexp_float_to_ns_FN():
# number_type, signed, exp, as_path, py3_safe
- assert _args_to_enum(float, True, False, False, False) == ns.F | ns.N
+ assert _args_to_enum(**{'number_type': float,
+ 'signed': True,
+ 'exp': False}) == ns.F | ns.N | ns.S
def test_args_to_enum_converts_unsigned_exp_float_to_ns_FU():
# number_type, signed, exp, as_path, py3_safe
- assert _args_to_enum(float, False, True, False, False) == ns.F | ns.U
+ assert _args_to_enum(**{'number_type': float,
+ 'signed': False,
+ 'exp': True}) == ns.F | ns.U
+ # unsigned is default
+ assert _args_to_enum(**{'number_type': float,
+ 'signed': False,
+ 'exp': True}) == ns.F
def test_args_to_enum_converts_unsigned_unexp_float_to_ns_FNU():
# number_type, signed, exp, as_path, py3_safe
- assert _args_to_enum(float, False, False, False, False) == ns.F | ns.U | ns.N
+ assert _args_to_enum(**{'number_type': float,
+ 'signed': False,
+ 'exp': False}) == ns.F | ns.U | ns.N
-def test_args_to_enum_converts_signed_exp_float_and_path_and_py3safe_to_ns_FPT():
+def test_args_to_enum_converts_float_and_path_and_py3safe_to_ns_FPT():
# number_type, signed, exp, as_path, py3_safe
- assert _args_to_enum(float, True, True, True, True) == ns.F | ns.P | ns.T
+ assert _args_to_enum(**{'number_type': float,
+ 'as_path': True,
+ 'py3_safe': True}) == ns.F | ns.P | ns.T
-def test_args_to_enum_converts_singed_int_and_path_to_ns_IP():
+def test_args_to_enum_converts_int_and_path_to_ns_IP():
# number_type, signed, exp, as_path, py3_safe
- assert _args_to_enum(int, True, True, True, False) == ns.I | ns.P
+ assert _args_to_enum(**{'number_type': int, 'as_path': True}) == ns.I | ns.P
def test_args_to_enum_converts_unsigned_int_and_py3safe_to_ns_IUT():
# number_type, signed, exp, as_path, py3_safe
- assert _args_to_enum(int, False, True, False, True) == ns.I | ns.U | ns.T
+ assert _args_to_enum(**{'number_type': int,
+ 'signed': False,
+ 'py3_safe': True}) == ns.I | ns.U | ns.T
def test_args_to_enum_converts_None_to_ns_IU():
# number_type, signed, exp, as_path, py3_safe
- assert _args_to_enum(None, True, True, False, False) == ns.I | ns.U
-
-# fttt = (fast_float, True, True, True)
-# fttf = (fast_float, True, True, False)
-ftft = (fast_float, True, False, True)
-ftff = (fast_float, True, False, False)
-# fftt = (fast_float, False, True, True)
-ffft = (fast_float, False, False, True)
-# fftf = (fast_float, False, True, False)
-ffff = (fast_float, False, False, False)
-ittt = (fast_int, True, True, True)
-ittf = (fast_int, True, True, False)
-itft = (fast_int, True, False, True)
-itff = (fast_int, True, False, False)
-# iftt = (fast_int, False, True, True)
-ifft = (fast_int, False, False, True)
-# iftf = (fast_int, False, True, False)
-ifff = (fast_int, False, False, False)
-
-
-def test_number_extracter_raises_TypeError_if_given_a_number():
- with raises(TypeError):
- assert _number_extracter(50.0, _float_sign_exp_re, *ffff)
+ assert _args_to_enum(**{'number_type': None,
+ 'exp': True}) == ns.I | ns.U
+float_nosafe_locale_group = (fast_float, False, True, True)
+float_nosafe_locale_nogroup = (fast_float, False, True, False)
+float_safe_nolocale_nogroup = (fast_float, True, False, False)
+float_nosafe_nolocale_group = (fast_float, False, False, True)
+float_nosafe_nolocale_nogroup = (fast_float, False, False, False)
+int_safe_locale_group = (fast_int, True, True, True)
+int_safe_locale_nogroup = (fast_int, True, True, False)
+int_safe_nolocale_group = (fast_int, True, False, True)
+int_safe_nolocale_nogroup = (fast_int, True, False, False)
+int_nosafe_locale_group = (fast_int, False, True, True)
+int_nosafe_locale_nogroup = (fast_int, False, True, False)
+int_nosafe_nolocale_group = (fast_int, False, False, True)
+int_nosafe_nolocale_nogroup = (fast_int, False, False, False)
-def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats():
- assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *ffff) == ['a', 5.0, 0.5034]
+# Each test has an "example" version for demonstrative purposes,
+# and a test that uses the hypothesis module.
-def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats():
- assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *ffff) == ['a', 5.0, '+', 0.5034]
+def test_py3_safe_does_nothing_if_no_numbers_example():
+ assert _py3_safe(['a', 'b', 'c'], False, isint) == ['a', 'b', 'c']
+ assert _py3_safe(['a'], False, isint) == ['a']
-def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats():
- assert _number_extracter('a5+5.034e-1', _float_sign_noexp_re, *ffff) == ['a', 5.0, 5.034, 'e', -1.0]
+def test_py3_safe_does_nothing_if_only_one_number_example():
+ assert _py3_safe(['a', 5], False, isint) == ['a', 5]
-def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats():
- assert _number_extracter('a5+5.034e-1', _float_nosign_noexp_re, *ffff) == ['a', 5.0, '+', 5.034, 'e-', 1.0]
+def test_py3_safe_inserts_empty_string_between_two_numbers_example():
+ assert _py3_safe([5, 9], False, isint) == [5, '', 9]
-def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints():
- assert _number_extracter('a5+5.034e-1', _int_nosign_re, *ifff) == ['a', 5, '+', 5, '.', 34, 'e-', 1]
+def test_py3_safe_with_use_locale_inserts_null_string_between_two_numbers_example():
+ assert _py3_safe([5, 9], True, isint) == [5, null_string, 9]
-def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints():
- assert _number_extracter('a5+5.034e-1', _int_sign_re, *ifff) == ['a', 5, 5, '.', 34, 'e', -1]
+@given([py23_str, int])
+def test_py3_safe_inserts_empty_string_between_two_numbers(x):
+ assume(bool(x))
+ assert _py3_safe(x, False, isint) == sep_inserter(x, (int, long), '')
-def test_number_extracter_inserts_empty_string_between_floats_for_py3safe_option():
- assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *ftff) == ['a', 5.0, '', 0.5034]
+def test_path_splitter_splits_path_string_by_separator_example():
+ z = '/this/is/a/path'
+ assert _path_splitter(z) == list(pathlib.Path(z).parts)
-def test_number_extracter_inserts_empty_string_between_ints_for_py3safe_option():
- assert _number_extracter('a5+5.034e-1', _int_sign_re, *itff) == ['a', 5, '', 5, '.', 34, 'e', -1]
+@given([sampled_from(string.ascii_letters)])
+def test_path_splitter_splits_path_string_by_separator(x):
+ assume(len(x) > 1)
+ assume(all(x))
+ z = py23_str(pathlib.Path(*x))
+ assert _path_splitter(z) == list(pathlib.Path(z).parts)
-def test_number_extracter_inserts_no_empty_string_py3safe_option_because_no_numbers_are_adjascent():
- assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *ftff) == ['a', 5.0, '+', 0.5034]
+def test_path_splitter_splits_path_string_by_separator_and_removes_extension_example():
+ z = '/this/is/a/path/file.exe'
+ y = list(pathlib.Path(z).parts)
+ assert _path_splitter(z) == y[:-1] + [pathlib.Path(z).stem] + [pathlib.Path(z).suffix]
-def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number():
- assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *ffff) == ['', 6.0, 'a', 5.0, 0.5034]
+@given([sampled_from(string.ascii_letters)])
+def test_path_splitter_splits_path_string_by_separator_and_removes_extension(x):
+ assume(len(x) > 2)
+ assume(all(x))
+ z = py23_str(pathlib.Path(*x[:-2])) + '.' + x[-1]
+ y = list(pathlib.Path(z).parts)
+ assert _path_splitter(z) == y[:-1] + [pathlib.Path(z).stem] + [pathlib.Path(z).suffix]
-def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_and_empty_string_between_numbers_for_py3safe():
- assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *ftff) == ['', 6.0, 'a', 5.0, '', 0.5034]
+def test_number_extracter_raises_TypeError_if_given_a_number_example():
+ with raises(TypeError):
+ assert _number_extracter(50.0, _float_sign_exp_re, *float_nosafe_nolocale_nogroup)
-def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float():
- assert _number_extracter('A5+5.034E-1', _float_sign_exp_re, *ffft) == ['aA', 5.0, 0.5034]
+@given(float)
+def test_number_extracter_raises_TypeError_if_given_a_number(x):
+ with raises(TypeError):
+ assert _number_extracter(x, _float_sign_exp_re, *float_nosafe_nolocale_nogroup)
-def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int():
- assert _number_extracter('A5+5.034E-1', _int_nosign_re, *ifft) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1]
+def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats_example():
+ assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, 0.5034]
-def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale():
- locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8'))
- if use_pyicu:
- from natsort.locale_help import get_pyicu_transform
- from locale import getlocale
- strxfrm = get_pyicu_transform(getlocale())
- else:
- from natsort.locale_help import strxfrm
- assert _number_extracter('A5+5.034E-1', _int_nosign_re, *ittf) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1]
- locale.setlocale(locale.LC_NUMERIC, str(''))
+@given([float, py23_str, int])
+def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, True, True, False, '')
-def test_number_extracter_extracts_numbers_and_strxfrms_letter_doubled_strings_with_use_locale_and_groupletters():
- locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8'))
- if use_pyicu:
- from natsort.locale_help import get_pyicu_transform
- from locale import getlocale
- strxfrm = get_pyicu_transform(getlocale())
- else:
- from natsort.locale_help import strxfrm
- assert _number_extracter('A5+5.034E-1', _int_nosign_re, *ittt) == [strxfrm('aA'), 5, strxfrm('++'), 5, strxfrm('..'), 34, strxfrm('eE--'), 1]
- locale.setlocale(locale.LC_NUMERIC, str(''))
+def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats_example():
+ assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, '+', 0.5034]
-def test_py3_safe_does_nothing_if_no_numbers():
- assert _py3_safe(['a', 'b', 'c'], False) == ['a', 'b', 'c']
- assert _py3_safe(['a'], False) == ['a']
+@given([float, py23_str, int])
+def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, False, True, False, '')
-def test_py3_safe_does_nothing_if_only_one_number():
- assert _py3_safe(['a', 5], False) == ['a', 5]
+def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats_example():
+ assert _number_extracter('a5+5.034e-1', _float_sign_noexp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, 5.034, 'e', -1.0]
-def test_py3_safe_inserts_empty_string_between_two_numbers():
- assert _py3_safe([5, 9], False) == [5, '', 9]
+@given([float, py23_str, int])
+def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _number_extracter(s, _float_sign_noexp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, True, False, False, '')
-def test_py3_safe_with_use_locale_inserts_null_string_between_two_numbers():
- assert _py3_safe([5, 9], True) == [5, null_string, 9]
+def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats_example():
+ assert _number_extracter('a5+5.034e-1', _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, '+', 5.034, 'e-', 1.0]
-def test__natsort_key_with_float_splits_input_into_string_and_signed_float_with_exponent():
- assert ns.F == ns.FLOAT
- assert _natsort_key('a-5.034e2', None, ns.F) == ('a', -503.4)
+@given([float, py23_str, int])
+def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _number_extracter(s, _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, False, False, False, '')
-def test__natsort_key_with_float_and_noexp_splits_input_into_string_and_signed_float_without_exponent():
- assert _natsort_key('a-5.034e2', None, ns.FLOAT | ns.NOEXP) == ('a', -5.034, 'e', 2.0)
- # Default is to split on floats.
- assert _natsort_key('a-5.034e2', None, ns.NOEXP) == ('a', -5.034, 'e', 2.0)
+def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints_example():
+ assert _number_extracter('a5+5.034e-1', _int_nosign_re, *int_nosafe_nolocale_nogroup) == ['a', 5, '+', 5, '.', 34, 'e-', 1]
-def test__natsort_key_with_float_and_unsigned_splits_input_into_string_and_unsigned_float():
- assert _natsort_key('a-5.034e2', None, ns.UNSIGNED) == ('a-', 503.4)
+@given([float, py23_str, int])
+@example([10000000000000000000000000000000000000000000000000000000000000000000000000,
+ 100000000000000000000000000000000000000000000000000000000000000000000000000,
+ 100000000000000000000000000000000000000000000000000000000000000000000000000])
+def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints(x):
+ assume(len(x) <= 10)
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup) == int_splitter(s, False, False, '')
-def test__natsort_key_with_float_and_unsigned_and_noexp_splits_input_into_string_and_unsigned_float_without_exponent():
- assert _natsort_key('a-5.034e2', None, ns.UNSIGNED | ns.NOEXP) == ('a-', 5.034, 'e', 2.0)
+def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints_example():
+ assert _number_extracter('a5+5.034e-1', _int_sign_re, *int_nosafe_nolocale_nogroup) == ['a', 5, 5, '.', 34, 'e', -1]
-def test__natsort_key_with_int_splits_input_into_string_and_signed_int():
- assert _natsort_key('a-5.034e2', None, ns.INT) == ('a', -5, '.', 34, 'e', 2)
- # NOEXP is ignored for integers
- assert _natsort_key('a-5.034e2', None, ns.INT | ns.NOEXP) == ('a', -5, '.', 34, 'e', 2)
+@given([float, py23_str, int])
+def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints(x):
+ assume(len(x) <= 10)
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup) == int_splitter(s, True, False, '')
-def test__natsort_key_with_int_splits_and_unsigned_input_into_string_and_unsigned_int():
- assert _natsort_key('a-5.034e2', None, ns.INT | ns.UNSIGNED) == ('a-', 5, '.', 34, 'e', 2)
+def test_number_extracter_inserts_empty_string_between_floats_for_py3safe_option_example():
+ assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *float_safe_nolocale_nogroup) == ['a', 5.0, '', 0.5034]
-def test__natsort_key_with_version_or_digit_matches_usigned_int():
- assert _natsort_key('a-5.034e2', None, ns.VERSION) == _natsort_key('a-5.034e2', None, ns.INT | ns.UNSIGNED)
- assert _natsort_key('a-5.034e2', None, ns.DIGIT) == _natsort_key('a-5.034e2', None, ns.VERSION)
+@given([float, py23_str, int])
+def test_number_extracter_inserts_empty_string_between_floats_for_py3safe_option(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _number_extracter(s, _float_sign_exp_re, *float_safe_nolocale_nogroup) == float_splitter(s, True, True, True, '')
-def test__natsort_key_with_key_applies_key_function_before_splitting():
- assert _natsort_key('a-5.034e2', lambda x: x.upper(), ns.F) == ('A', -503.4)
+def test_number_extracter_inserts_empty_string_between_ints_for_py3safe_option_example():
+ assert _number_extracter('a5+5.034e-1', _int_sign_re, *int_safe_nolocale_nogroup) == ['a', 5, '', 5, '.', 34, 'e', -1]
-def test__natsort_key_with_tuple_input_returns_nested_tuples():
- # Iterables are parsed recursively so you can sort lists of lists.
- assert _natsort_key(('a1', 'a-5.034e2'), None, ns.V) == (('a', 1), ('a-', 5, '.', 34, 'e', 2))
+@given([float, py23_str, int])
+def test_number_extracter_inserts_empty_string_between_ints_for_py3safe_option(x):
+ assume(len(x) <= 10)
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _number_extracter(s, _int_sign_re, *int_safe_nolocale_nogroup) == int_splitter(s, True, True, '')
-def test__natsort_key_with_tuple_input_but_itemgetter_key_returns_split_second_element():
- # A key is applied before recursion, but not in the recursive calls.
- assert _natsort_key(('a1', 'a-5.034e2'), itemgetter(1), ns.F) == ('a', -503.4)
+def test_number_extracter_inserts_no_empty_string_py3safe_option_because_no_numbers_are_adjascent_example():
+ assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *float_safe_nolocale_nogroup) == ['a', 5.0, '+', 0.5034]
-def test__natsort_key_with_input_containing_leading_numbers_returns_leading_empty_strings():
- # Strings that lead with a number get an empty string at the front of the tuple.
- # This is designed to get around the "unorderable types" issue.
- assert _natsort_key(('15a', '6'), None, ns.F) == (('', 15.0, 'a'), ('', 6.0))
+def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_example():
+ assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == ['', 6.0, 'a', 5.0, 0.5034]
-def test__natsort_key_with_numeric_input_returns_number_with_leading_empty_string():
- assert _natsort_key(10, None, ns.F) == ('', 10)
+def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_and_empty_string_between_numbers_for_py3safe_exmple():
+ assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *float_safe_nolocale_nogroup) == ['', 6.0, 'a', 5.0, '', 0.5034]
-def test__natsort_key_with_absolute_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_root_and_split_extensions():
- # Turn on PATH to split a file path into components
- assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', None, ns.PATH) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
+def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float_example():
+ assert _number_extracter('A5+5.034E-1', _float_sign_exp_re, *float_nosafe_nolocale_group) == ['aA', 5.0, 0.5034]
-def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_relative_parent_and_split_extensions():
- assert _natsort_key('../Folder (10)/file (2).tar.gz', None, ns.PATH) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',))
+@given([float, py23_str, int])
+def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ try:
+ low = py23_str.casefold
+ except AttributeError:
+ low = py23_str.lower
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ t = float_splitter(s, True, True, False, '')
+ t = [''.join([low(z) + z for z in y]) if type(y) != float else y for y in t]
+ assert _number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_group) == t
-def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_and_split_extensions():
- assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', None, ns.PATH) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
+def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int_example():
+ assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_nolocale_group) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1]
-def test__natsort_key_with_pathlib_intput_and_PATH_returns_nested_tuples():
- # Converts pathlib PurePath (and subclass) objects to string before sorting
- if has_pathlib:
- assert _natsort_key(pathlib.Path('../Folder (10)/file (2).tar.gz'), None, ns.PATH) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',))
+@given([float, py23_str, int])
+def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int(x):
+ assume(len(x) <= 10)
+ try:
+ low = py23_str.casefold
+ except AttributeError:
+ low = py23_str.lower
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ t = int_splitter(s, False, False, '')
+ t = [''.join([low(z) + z for z in y]) if type(y) not in (int, long) else y for y in t]
+ assert _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_group) == t
-def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple():
- # It gracefully handles as_path for numeric input by putting an extra tuple around it
- # so it will sort against the other as_path results.
- assert _natsort_key(10, None, ns.PATH) == (('', 10),)
+def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale_example():
+ load_locale('en_US')
+ if use_pyicu:
+ from natsort.locale_help import get_pyicu_transform
+ from locale import getlocale
+ strxfrm = get_pyicu_transform(getlocale())
+ else:
+ from natsort.locale_help import strxfrm
+ assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_locale_nogroup) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1]
+ locale.setlocale(locale.LC_NUMERIC, str(''))
-def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple():
- # PATH also handles recursion well.
- assert _natsort_key(('/Folder', '/Folder (1)'), None, ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1.0, ')')))
+@given([float, py23_str, int])
+def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale(x):
+ assume(len(x) <= 10)
+ load_locale('en_US')
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ t = int_splitter(s, False, False, null_string)
+ t = [y if i == 0 and y is null_string else locale_convert(y, (fast_int, isint), False) for i, y in enumerate(t)]
+ assert _number_extracter(s, _int_nosign_re, *int_nosafe_locale_nogroup) == t
+ locale.setlocale(locale.LC_NUMERIC, str(''))
-def test__natsort_key_with_TYPESAFE_inserts_spaces_between_numbers():
- # Turn on TYPESAFE to put a '' between adjacent numbers
- assert _natsort_key('43h7+3', None, ns.TYPESAFE) == ('', 43.0, 'h', 7.0, '', 3.0)
+def test_number_extracter_extracts_numbers_and_strxfrms_letter_doubled_strings_with_use_locale_and_groupletters_example():
+ load_locale('en_US')
+ if use_pyicu:
+ from natsort.locale_help import get_pyicu_transform
+ from locale import getlocale
+ strxfrm = get_pyicu_transform(getlocale())
+ else:
+ from natsort.locale_help import strxfrm
+ assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_locale_group) == [strxfrm('aA'), 5, strxfrm('++'), 5, strxfrm('..'), 34, strxfrm('eE--'), 1]
+ locale.setlocale(locale.LC_NUMERIC, str(''))
-def test__natsort_key_with_invalid_alg_input_raises_ValueError():
- # Invalid arguments give the correct response
- with raises(ValueError) as err:
- _natsort_key('a', None, '1')
- assert str(err.value) == "_natsort_key: 'alg' argument must be from the enum 'ns', got 1"
+
+@given([float, py23_str, int])
+def test_number_extracter_extracts_numbers_and_strxfrms_letter_doubled_strings_with_use_locale_and_groupletters(x):
+ assume(len(x) <= 10)
+ load_locale('en_US')
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ t = int_splitter(s, False, False, null_string)
+ t = [y if i == 0 and y is null_string else locale_convert(y, (fast_int, isint), True) for i, y in enumerate(t)]
+ assert _number_extracter(s, _int_nosign_re, *int_nosafe_locale_group) == t
+ locale.setlocale(locale.LC_NUMERIC, str(''))
+
+
+# The remaining tests provide no examples, just hypothesis tests.
+# They only confirm that _natsort_key uses the above building blocks.
+
+
+@given([float, py23_str, int])
+def test__natsort_key_with_float_and_signed_splits_input_into_string_and_signed_float_with_exponent(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert ns.F == ns.FLOAT
+ assert ns.S == ns.SIGNED
+ assert _natsort_key(s, None, ns.F | ns.S) == tuple(_number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup))
+
+
+@given([float, py23_str, int])
+def test__natsort_key_with_real_splits_input_into_string_and_signed_float_with_exponent(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert ns.R == ns.F | ns.S
+ assert _natsort_key(s, None, ns.R) == tuple(_number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup))
+
+
+@given([float, py23_str, int])
+def test__natsort_key_with_real_matches_signed_float(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _natsort_key(s, None, ns.R) == _natsort_key(s, None, ns.F | ns.S)
+
+
+@given([float, py23_str, int])
+def test__natsort_key_with_float_and_signed_and_noexp_splits_input_into_string_and_signed_float_without_exponent(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert ns.N == ns.NOEXP
+ assert _natsort_key(s, None, ns.F | ns.S | ns.N) == tuple(_number_extracter(s, _float_sign_noexp_re, *float_nosafe_nolocale_nogroup))
+
+
+@given([float, py23_str, int])
+def test__natsort_key_with_float_and_unsigned_splits_input_into_string_and_unsigned_float(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert ns.U == ns.UNSIGNED
+ assert _natsort_key(s, None, ns.F | ns.U) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup))
+ # Default is unsigned search
+ assert _natsort_key(s, None, ns.F) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup))
+
+
+@given([float, py23_str, int])
+def test__natsort_key_with_float_and_noexp_splits_input_into_string_and_unsigned_float_without_exponent(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _natsort_key(s, None, ns.F | ns.N) == tuple(_number_extracter(s, _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup))
+
+
+@given([float, py23_str, int])
+def test__natsort_key_with_int_splits_input_into_string_and_unsigned_int(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert ns.I == ns.INT
+ assert _natsort_key(s, None, ns.INT) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup))
+ # Default is int search
+ assert _natsort_key(s, None, ns.NOEXP) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup))
+ # NOEXP is ignored for integers
+ assert _natsort_key(s, None, ns.I | ns.NOEXP) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup))
+
+
+@given([float, py23_str, int])
+def test__natsort_key_with_int_splits_and_signed_input_into_string_and_signed_int(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _natsort_key(s, None, ns.INT | ns.SIGNED) == tuple(_number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup))
+ assert _natsort_key(s, None, ns.SIGNED) == tuple(_number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup))
+
+
+@given([float, py23_str, int])
+def test__natsort_key_with_version_or_digit_matches_usigned_int(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _natsort_key(s, None, ns.VERSION) == _natsort_key(s, None, ns.INT | ns.UNSIGNED)
+ assert _natsort_key(s, None, ns.DIGIT) == _natsort_key(s, None, ns.VERSION)
+
+
+@given([float, py23_str, int])
+def test__natsort_key_with_key_applies_key_function_before_splitting(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _natsort_key(s, lambda x: x.upper(), ns.I) == tuple(_number_extracter(s.upper(), _int_nosign_re, *int_nosafe_nolocale_nogroup))
+
+
+@given([float, py23_str, int])
+def test__natsort_key_with_tuple_input_returns_nested_tuples(x):
+ # Iterables are parsed recursively so you can sort lists of lists.
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ t = tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup))
+ assert _natsort_key((s, s), None, ns.I) == (t, t)
-def test__natsort_key_without_string_modifiers_leaves_text_as_is():
- # Changing the sort order of strings
- assert _natsort_key('Apple56', None, ns.F) == ('Apple', 56.0)
+@given([float, py23_str, int])
+def test__natsort_key_with_tuple_input_but_itemgetter_key_returns_split_second_element(x):
+ # A key is applied before recursion, but not in the recursive calls.
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ t = tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup))
+ assert _natsort_key((s, s), itemgetter(1), ns.I) == t
-def test__natsort_key_with_IGNORECASE_lowercases_text():
- assert _natsort_key('Apple56', None, ns.IGNORECASE) == ('apple', 56.0)
+@given(float)
+def test__natsort_key_with_numeric_input_returns_number_with_leading_empty_string(x):
+ assume(not isnan(x))
+ if x.is_integer():
+ x = int(x)
+ assert _natsort_key(x, None, ns.I) == ('', x)
-def test__natsort_key_with_LOWERCASEFIRST_inverts_text_case():
- assert _natsort_key('Apple56', None, ns.LOWERCASEFIRST) == ('aPPLE', 56.0)
+@given([float, py23_str, int])
+def test__natsort_key_with_TYPESAFE_inserts_spaces_between_numbers(x):
+ # Turn on TYPESAFE to put a '' between adjacent numbers
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _natsort_key(s, None, ns.TYPESAFE | ns.S) == tuple(_number_extracter(s, _int_sign_re, *int_safe_nolocale_nogroup))
-def test__natsort_key_with_GROUPLETTERS_doubles_text_with_lowercase_letter_first():
- assert _natsort_key('Apple56', None, ns.GROUPLETTERS) == ('aAppppllee', 56.0)
+def test__natsort_key_with_invalid_alg_input_raises_ValueError():
+ # Invalid arguments give the correct response
+ with raises(ValueError) as err:
+ _natsort_key('a', None, '1')
+ assert str(err.value) == "_natsort_key: 'alg' argument must be from the enum 'ns', got 1"
-def test__natsort_key_with_GROUPLETTERS_and_LOWERCASEFIRST_inverts_text_first_then_doubles_letters_with_lowercase_letter_first():
- assert _natsort_key('Apple56', None, ns.G | ns.LF) == ('aapPpPlLeE', 56.0)
+@given([float, py23_str, int])
+def test__natsort_key_with_IGNORECASE_lowercases_text(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ try:
+ assert _natsort_key(s, None, ns.IGNORECASE) == tuple(_number_extracter(s.casefold(), _int_nosign_re, *int_nosafe_nolocale_nogroup))
+ except AttributeError:
+ assert _natsort_key(s, None, ns.IGNORECASE) == tuple(_number_extracter(s.lower(), _int_nosign_re, *int_nosafe_nolocale_nogroup))
+
+
+@given([float, py23_str, int])
+def test__natsort_key_with_LOWERCASEFIRST_inverts_text_case(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _natsort_key(s, None, ns.LOWERCASEFIRST) == tuple(_number_extracter(s.swapcase(), _int_nosign_re, *int_nosafe_nolocale_nogroup))
+
+
+@given([float, py23_str, int])
+def test__natsort_key_with_GROUPLETTERS_doubles_text_with_lowercase_letter_first(x):
+ try:
+ low = py23_str.casefold
+ except AttributeError:
+ low = py23_str.lower
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(ichain([repr(y)] if type(y) in (float, long, int) else [low(y), y] for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ t = _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)
+ assert _natsort_key(s, None, ns.GROUPLETTERS) == tuple(''.join(low(z) + z for z in y) if type(y) not in (float, long, int) else y for y in t)
+
+
+@given([float, py23_str, int])
+def test__natsort_key_with_GROUPLETTERS_and_LOWERCASEFIRST_inverts_text_first_then_doubles_letters_with_lowercase_letter_first(x):
+ try:
+ low = py23_str.casefold
+ except AttributeError:
+ low = py23_str.lower
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(ichain([repr(y)] if type(y) in (float, long, int) else [low(y), y] for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ t = _number_extracter(s.swapcase(), _int_nosign_re, *int_nosafe_nolocale_nogroup)
+ assert _natsort_key(s, None, ns.G | ns.LF) == tuple(''.join(low(z) + z for z in y) if type(y) not in (float, long, int) else y for y in t)
def test__natsort_key_with_bytes_input_only_applies_LOWERCASEFIRST_or_IGNORECASE_and_returns_in_tuple():
@@ -318,38 +579,38 @@ def test__natsort_key_with_bytes_input_only_applies_LOWERCASEFIRST_or_IGNORECASE
assert True
-def test__natsort_key_with_LOCALE_transforms_floats_according_to_the_current_locale_and_strxfrms_strings():
+@given([float, py23_str, int])
+def test__natsort_key_with_LOCALE_transforms_floats_according_to_the_current_locale_and_strxfrms_strings(x):
# Locale aware sorting
- locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8'))
- if use_pyicu:
- from natsort.locale_help import get_pyicu_transform
- from locale import getlocale
- strxfrm = get_pyicu_transform(getlocale())
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ load_locale('en_US')
+ if dumb_sort():
+ assert _natsort_key(s, None, ns.LOCALE | ns.F) == tuple(_number_extracter(s.swapcase(), _float_nosign_exp_re, *float_nosafe_locale_group))
else:
- from natsort.locale_help import strxfrm
- assert _natsort_key('Apple56.5', None, ns.LOCALE) == (strxfrm('Apple'), 56.5)
- assert _natsort_key('Apple56,5', None, ns.LOCALE) == (strxfrm('Apple'), 56.0, strxfrm(','), 5.0)
-
- locale.setlocale(locale.LC_NUMERIC, str('de_DE.UTF-8'))
- if use_pyicu:
- strxfrm = get_pyicu_transform(getlocale())
- assert _natsort_key('Apple56.5', None, ns.LOCALE) == (strxfrm('Apple'), 56.5)
- assert _natsort_key('Apple56,5', None, ns.LOCALE) == (strxfrm('Apple'), 56.5)
+ assert _natsort_key(s, None, ns.LOCALE | ns.F) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_locale_nogroup))
locale.setlocale(locale.LC_NUMERIC, str(''))
-def test__natsort_key_with_LOCALE_and_UNGROUPLETTERS_places_space_before_string_with_capital_first_letter():
+@given([float, py23_str, int])
+def test__natsort_key_with_LOCALE_and_UNGROUPLETTERS_places_space_before_string_with_capital_first_letter(x):
# Locale aware sorting
- locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8'))
- if use_pyicu:
- from natsort.locale_help import get_pyicu_transform
- from locale import getlocale
- strxfrm = get_pyicu_transform(getlocale())
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ load_locale('en_US')
+ if dumb_sort():
+ t = tuple(_number_extracter(s.swapcase(), _float_nosign_exp_re, *float_nosafe_locale_group))
else:
- from natsort.locale_help import strxfrm
- assert _natsort_key('Apple56.5', None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == (strxfrm(' Apple'), 56.5)
- assert _natsort_key('apple56.5', None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == (strxfrm('apple'), 56.5)
- assert _natsort_key('12Apple56.5', None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == (null_string, 12.0, strxfrm('Apple'), 56.5)
+ t = tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_locale_nogroup))
+ if not t:
+ r = (t, t)
+ elif t[0] is null_string:
+ r = ((b'' if use_pyicu else '',), t)
+ else:
+ r = ((s[0],), t)
+ assert _natsort_key(s, None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == r
# The below are all aliases for UNGROUPLETTERS
assert ns.UNGROUPLETTERS == ns.UG
assert ns.UNGROUPLETTERS == ns.CAPITALFIRST
@@ -357,5 +618,42 @@ def test__natsort_key_with_LOCALE_and_UNGROUPLETTERS_places_space_before_string_
locale.setlocale(locale.LC_NUMERIC, str(''))
-def test__natsort_key_with_UNGROUPLETTERS_does_nothing_without_LOCALE():
- assert _natsort_key('Apple56.5', None, ns.UG | ns.I) == _natsort_key('Apple56.5', None, ns.I)
+@given([float, py23_str, int])
+def test__natsort_key_with_UNGROUPLETTERS_does_nothing_without_LOCALE(x):
+ assume(len(x) <= 10)
+ assume(not any(type(y) == float and isnan(y) for y in x))
+ s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
+ assert _natsort_key(s, None, ns.UG | ns.I) == _natsort_key(s, None, ns.I)
+
+
+# It is difficult to generate code that will create random filesystem paths,
+# so "example" based tests are given for the PATH option.
+
+
+def test__natsort_key_with_absolute_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_root_and_split_extensions():
+ # Turn on PATH to split a file path into components
+ assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', None, ns.PATH | ns.F) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
+
+
+def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_relative_parent_and_split_extensions():
+ assert _natsort_key('../Folder (10)/file (2).tar.gz', None, ns.PATH | ns.F) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',))
+
+
+def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_and_split_extensions():
+ assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', None, ns.PATH | ns.F) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
+
+
+def test__natsort_key_with_pathlib_intput_and_PATH_returns_nested_tuples():
+ # Converts pathlib PurePath (and subclass) objects to string before sorting
+ assert _natsort_key(pathlib.Path('../Folder (10)/file (2).tar.gz'), None, ns.PATH | ns.F) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',))
+
+
+def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple():
+ # It gracefully handles as_path for numeric input by putting an extra tuple around it
+ # so it will sort against the other as_path results.
+ assert _natsort_key(10, None, ns.PATH) == (('', 10),)
+
+
+def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple():
+ # PATH also handles recursion well.
+ assert _natsort_key(('/Folder', '/Folder (1)'), None, ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1, ')')))