diff options
| author | Seth M Morton <seth.m.morton@gmail.com> | 2015-05-17 19:36:10 -0700 |
|---|---|---|
| committer | Seth M Morton <seth.m.morton@gmail.com> | 2015-05-17 19:36:10 -0700 |
| commit | 3cbedc17c805fb7a077e8b69fd3066aea7f3a38e (patch) | |
| tree | d8dc973b8bff99489f3481b5a283f3bb6706852b | |
| parent | 72867093bce4c2cabe2ea53415fabfb6238ae7ea (diff) | |
| parent | 7df020b5ddec957c86cdae3d6adede7a4dbb93a6 (diff) | |
| download | natsort-3cbedc17c805fb7a077e8b69fd3066aea7f3a38e.tar.gz | |
natsort release version 4.0.0.
- Made default behavior of 'natsort' search for unsigned ints,
rather than signed floats. This is a backwards-incompatible
change but in 99% of use cases it should not required any
end-user changes.
- Improved handling of locale-aware sorting on systems where the
underlying locale library is broken.
- Greatly improved all unit tests by adding the hypothesis library.
| -rw-r--r-- | .gitignore | 3 | ||||
| -rw-r--r-- | .hgignore | 3 | ||||
| -rw-r--r-- | .travis.yml | 7 | ||||
| -rw-r--r-- | README.rst | 125 | ||||
| -rw-r--r-- | docs/source/api.rst | 1 | ||||
| -rw-r--r-- | docs/source/changelog.rst | 11 | ||||
| -rw-r--r-- | docs/source/examples.rst | 112 | ||||
| -rw-r--r-- | docs/source/intro.rst | 71 | ||||
| -rw-r--r-- | docs/source/natsort_key.rst | 8 | ||||
| -rw-r--r-- | docs/source/shell.rst | 38 | ||||
| -rw-r--r-- | natsort/__main__.py | 37 | ||||
| -rw-r--r-- | natsort/_version.py | 2 | ||||
| -rw-r--r-- | natsort/fake_fastnumbers.py | 45 | ||||
| -rw-r--r-- | natsort/locale_help.py | 47 | ||||
| -rw-r--r-- | natsort/natsort.py | 439 | ||||
| -rw-r--r-- | natsort/ns_enum.py | 63 | ||||
| -rw-r--r-- | natsort/py23compat.py | 6 | ||||
| -rw-r--r-- | natsort/unicode_numbers.py | 183 | ||||
| -rw-r--r-- | natsort/utils.py | 176 | ||||
| -rw-r--r-- | setup.py | 11 | ||||
| -rw-r--r-- | test_natsort/slow_splitters.py | 156 | ||||
| -rw-r--r-- | test_natsort/test_fake_fastnumbers.py | 124 | ||||
| -rw-r--r-- | test_natsort/test_locale_help.py | 124 | ||||
| -rw-r--r-- | test_natsort/test_main.py | 184 | ||||
| -rw-r--r-- | test_natsort/test_natsort.py | 128 | ||||
| -rw-r--r-- | test_natsort/test_unicode_numbers.py | 47 | ||||
| -rw-r--r-- | test_natsort/test_utils.py | 686 |
27 files changed, 1887 insertions, 950 deletions
@@ -2,6 +2,7 @@ # Packages *.egg +*.eggs *.egg-info dist build @@ -20,9 +21,11 @@ MANIFEST pip-log.txt # Unit test / coverage reports +.hypothesis .coverage .tox .cache +.pytest #Translations *.mo @@ -4,6 +4,7 @@ syntax: glob # Packages *.egg +*.eggs *.egg-info dist build @@ -22,9 +23,11 @@ MANIFEST pip-log.txt # Unit test / coverage reports +.hypothesis .coverage .tox .cache +.pytest #Translations *.mo diff --git a/.travis.yml b/.travis.yml index 1d064bb..bebdb51 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,6 @@ language: python python: -- 2.6 - 2.7 -- 3.2 - 3.3 - 3.4 env: @@ -13,13 +11,14 @@ before_install: - sudo locale-gen de_DE.UTF-8 - sudo apt-get install bc install: +- pip install -U pip - if [[ $WITH_OPTIONS == true ]]; then sudo apt-get install libicu-dev; fi - if [[ $WITH_OPTIONS == true ]]; then pip install fastnumbers; fi - if [[ $WITH_OPTIONS == true ]]; then pip install PyICU; fi -- if [[ $WITH_OPTIONS == true && 1 -eq $(echo "$TRAVIS_PYTHON_VERSION < 3.4" | bc -l) ]]; then pip install pathlib; fi +- if [[ 1 -eq $(echo "$TRAVIS_PYTHON_VERSION < 3.4" | bc -l) ]]; then pip install pathlib; fi - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi - if [[ $(echo "$TRAVIS_PYTHON_VERSION < 3.3" | bc -l) ]]; then pip install mock; fi -- pip install pytest-cov pytest-flakes pytest-pep8 +- pip install pytest-cov pytest-flakes pytest-pep8 hypothesis - pip install coveralls script: - python -m pytest --cov natsort --flakes --pep8 @@ -11,10 +11,10 @@ Natural sorting for python. - Source Code: https://github.com/SethMMorton/natsort - Downloads: https://pypi.python.org/pypi/natsort - - Documentation: http://pythonhosted.org/natsort/ + - Documentation: http://pythonhosted.org/natsort -Please see `Deprecation Notices`_ for an `important` backwards incompatibility notice -for ``natsort`` version 4.0.0. +Please see `Moving from older Natsort versions`_ to see if this update requires +you to modify your ``natsort`` calls in your code (99% of users will not). Quick Description ----------------- @@ -47,16 +47,16 @@ Using ``natsorted`` is simple: ``natsorted`` identifies real numbers anywhere in a string and sorts them naturally. -Sorting version numbers is just as easy with the ``versorted`` function: +Sorting versions is handled properly by default (as of ``natsort`` version >= 4.0.0): .. code-block:: python - >>> from natsort import versorted >>> a = ['version-1.9', 'version-2.0', 'version-1.11', 'version-1.10'] - >>> versorted(a) + >>> natsorted(a) ['version-1.9', 'version-1.10', 'version-1.11', 'version-2.0'] - >>> natsorted(a) # natsorted tries to sort as signed floats, so it won't work - ['version-2.0', 'version-1.9', 'version-1.11', 'version-1.10'] + +If you need to sort release candidates, please see +`this useful hack <http://pythonhosted.org//natsort/examples.htm#rc-sorting>`_ . You can also perform locale-aware sorting (or "human sorting"), where the non-numeric characters are ordered based on their meaning, not on their @@ -81,6 +81,19 @@ and the `Optional Dependencies`_ section below before using the ``humansorted`` function, *especially* if you are on a BSD-based system (like Mac OS X). +You can sort signed floats (i.e. real numbers) using the ``realsorted``; this is +useful in scientific data analysis. This was the default behavior of ``natsorted`` +for ``natsort`` version < 4.0.0. :: + +.. code-block:: python + + >>> from natsort import realsorted + >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] + >>> natsorted(a) + ['num2', 'num5.3', 'num5.10', 'num-3'] + >>> realsorted(a) + ['num-3', 'num2', 'num5.10', 'num5.3'] + You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types when you sort: @@ -129,9 +142,9 @@ from the command line with ``python -m natsort``. Requirements ------------ -``natsort`` requires python version 2.6 or greater -(this includes python 3.x). To run version 2.6, 3.0, or 3.1 the -`argparse <https://pypi.python.org/pypi/argparse>`_ module is required. +``natsort`` requires Python version 2.7 or greater or Python 3.3 or greater. +Python 2.6 and 3.2 are no longer officially supported (no unit tests are performed) +but it should work. .. _optional: @@ -153,35 +166,56 @@ PyICU ''''' On BSD-based systems (this includes Mac OS X), the underlying ``locale`` library -can be buggy (please see http://bugs.python.org/issue23195), so ``natsort`` will use -`PyICU <https://pypi.python.org/pypi/PyICU>`_ under the hood if it is installed -on your computer; this will give more reliable cross-platform results. -``natsort`` will not require (or check) that -`PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed at installation -since in Linux-based systems and Windows systems ``locale`` should work just fine. -Please visit https://github.com/SethMMorton/natsort/issues/21 for more details and -how to install on Mac OS X. +can be buggy (please see http://bugs.python.org/issue23195); ``locale`` is +used for the ``ns.LOCALE`` option and ``humansorted`` function.. To remedy this, +one can + + 1. Use "\*.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\*.UTF-8" + encoding. These encodings do not suffer from as many problems as "UTF-8" + and thus should give expected results. + 2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If + `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort`` + will use it under the hood if it is installed; this will give more + reliable cross-platform results in the long run. ``natsort`` will not + require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ + is installed at installation. Please visit + https://github.com/SethMMorton/natsort/issues/21 for more details and + how to install on Mac OS X. **Please note** that using + `PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to + guarantee correct results for all input on BSD-based systems, since + every other suggestion is a workaround. + 3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured + to compensate for a broken ``locale`` library in terms of case-handling; + if you do not need to be able to properly handle non-ASCII characters + then this may be the best option for you. + +Note that the above solutions *should not* be required for Windows or +Linux since in Linux-based systems and Windows systems ``locale`` *should* work +just fine. .. _deprecate: -Deprecation Notices -------------------- - - - The default sorting algorithm for ``natsort`` will change in version 4.0.0 - from signed floats (with exponents) to unsigned integers. The motivation - for this change is that it will cause ``natsort`` to return results that - pass the "least astonishment" test for the most common use case, which is - sorting version numbers. If you currently rely on the default behavior - to be signed floats, it is recommend that you add ``alg=ns.F`` to your - ``natsort`` calls or switch to the new ``realsorted`` function which - behaves identically to the current ``natsorted`` with default values. - - In ``natsort`` version 4.0.0, the ``number_type``, ``signed``, ``exp``, - ``as_path``, and ``py3_safe`` options will be removed from the (documented) - API, in favor of the ``alg`` option and ``ns`` enum. They will remain as - keyword-only arguments after that (for the foreseeable future). - - In ``natsort`` version 4.0.0, the ``natsort_key`` function will be removed - from the public API. All future development should use ``natsort_keygen`` - in preparation for this. +Moving from older Natsort versions +---------------------------------- + + - The default sorting algorithm for ``natsort`` has changed in version 4.0.0 + from signed floats (with exponents) to unsigned integers. The motivation + for this change is that it will cause ``natsort`` to return results that + pass the "least astonishment" test for the most common use case, which is + sorting version numbers. If you relied on the default behavior + to be signed floats, it is add ``alg=ns.F | ns.S`` to your + ``natsort`` calls or switch to the new ``realsorted`` function which + behaves identically to the current ``natsorted`` with default values. + For 99% of users this will have no effect... it is only expected that this + will effect users using ``natsort`` for science and engineering. What it + will do is make it so you no longer need ``ns.V`` or ``ns.I | ns.U`` to sort + version-like strings. + This will also affect the default behavior of the ``natsort`` shell script. + - In ``natsort`` version 4.0.0, the ``number_type``, ``signed``, ``exp``, + ``as_path``, and ``py3_safe`` options have be removed from the (documented) + API in favor of the ``alg`` option and ``ns`` enum. + - In ``natsort`` version 4.0.0, the ``natsort_key`` function has be removed + from the public API. Author ------ @@ -194,6 +228,17 @@ History These are the last three entries of the changelog. See the package documentation for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_. +05-17-2015 v. 4.0.0 +''''''''''''''''''' + + - Made default behavior of 'natsort' search for unsigned ints, + rather than signed floats. This is a backwards-incompatible + change but in 99% of use cases it should not required any + end-user changes. + - Improved handling of locale-aware sorting on systems where the + underlying locale library is broken. + - Greatly improved all unit tests by adding the hypothesis library. + 04-06-2015 v. 3.5.6 ''''''''''''''''''' @@ -208,9 +253,3 @@ for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_. - Added 'realsorted' and 'index_realsorted' functions for forward-compatibility with >= 4.0.0. - Made explanation of when to use "TYPESAFE" more clear in the docs. - -04-02-2015 v. 3.5.4 -''''''''''''''''''' - - - Fixed bug where a 'TypeError' was raised if a string containing a leading - number was sorted with alpha-only strings when 'LOCALE' is used. diff --git a/docs/source/api.rst b/docs/source/api.rst index 4084720..48728ee 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -10,7 +10,6 @@ natsort API :maxdepth: 2 natsort_keygen.rst - natsort_key.rst natsorted.rst versorted.rst humansorted.rst diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 2803377..834373a 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -3,6 +3,17 @@ Changelog --------- +05-17-2015 v. 4.0.0 +''''''''''''''''''' + + - Made default behavior of 'natsort' search for unsigned ints, + rather than signed floats. This is a backwards-incompatible + change but in 99% of use cases it should not required any + end-user changes. + - Improved handling of locale-aware sorting on systems where the + underlying locale library is broken. + - Greatly improved all unit tests by adding the hypothesis library. + 04-06-2015 v. 3.5.6 ''''''''''''''''''' diff --git a/docs/source/examples.rst b/docs/source/examples.rst index a995bb4..02783f4 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -15,35 +15,21 @@ Basic Usage In the most basic use case, simply import :func:`~natsorted` and use it as you would :func:`sorted`:: - >>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300'] + >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] >>> sorted(a) - ['a5.034e1', 'a50', 'a50.300', 'a50.4', 'a51.'] + ['a1', 'a10', 'a2', 'a4', 'a9'] >>> from natsort import natsorted, ns >>> natsorted(a) - ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] + ['a1', 'a2', 'a4', 'a9', 'a10'] Sort Version Numbers -------------------- -With default options, :func:`~natsorted` will not sort version numbers -well. Version numbers are best sorted by searching for valid unsigned int -literals, not floats. This can be achieved in three ways, as shown below:: - - >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1'] - >>> natsorted(a) # This gives incorrect results - ['ver-2.9.9a', 'ver-2.9.9b', 'ver-1.11', 'ver-1.11.4', 'ver-1.10.1'] - >>> natsorted(a, alg=ns.INT | ns.UNSIGNED) - ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] - >>> natsorted(a, alg=ns.VERSION) - ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] - >>> from natsort import versorted - >>> versorted(a) - ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] - -You can see that ``alg=ns.VERSION`` is a shortcut for -``alg=ns.INT | ns.UNSIGNED``, and the :func:`~versorted` is a shortcut for -``natsorted(alg=ns.VERSION)``. The recommend manner to sort version -numbers is to use :func:`~versorted`. +As of :mod:`natsort` version >= 4.0.0, :func:`~natsorted` will now properly +sort version numbers. The old function :func:`~versorted` exists for +backwards compatibility but new development should use :func:`~natsorted`. + +.. _rc_sorting: Sorting with Alpha, Beta, and Release Candidates ++++++++++++++++++++++++++++++++++++++++++++++++ @@ -52,19 +38,19 @@ By default, if you wish to sort versions with a non-strict versioning scheme, you may not get the results you expect:: >>> a = ['1.2', '1.2rc1', '1.2beta2', '1.2beta1', '1.2alpha', '1.2.1', '1.1', '1.3'] - >>> versorted(a) + >>> natsorted(a) ['1.1', '1.2', '1.2.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.3'] To make the '1.2' pre-releases come before '1.2.1', you need to use the following recipe:: - >>> versorted(a, key=lambda x: x.replace('.', '~')) + >>> natsorted(a, key=lambda x: x.replace('.', '~')) ['1.1', '1.2', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2.1', '1.3'] If you also want '1.2' after all the alpha, beta, and rc candidates, you can modify the above recipe:: - >>> versorted(a, key=lambda x: x.replace('.', '~')+'z') + >>> natsorted(a, key=lambda x: x.replace('.', '~')+'z') ['1.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2', '1.2.1', '1.3'] Please see `this issue <https://github.com/SethMMorton/natsort/issues/13>`_ to @@ -123,6 +109,32 @@ with the ``locale`` module from the standard library that are solved when using `PyICU <https://pypi.python.org/pypi/PyICU>`_; you can read about them here: http://bugs.python.org/issue23195. +If you have problems with ``ns.LOCALE`` (or :func:`~humansorted`), +especially on BSD-based systems, you can try the following: + + 1. Use "\*.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\*.UTF-8" + encoding. These encodings do not suffer from as many problems as "UTF-8" + and thus should give expected results. + 2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If + `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort`` + will use it under the hood if it is installed; this will give more + reliable cross-platform results in the long run. ``natsort`` will not + require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ + is installed at installation. Please visit + https://github.com/SethMMorton/natsort/issues/21 for more details and + how to install on Mac OS X. **Please note** that using + `PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to + guarantee correct results for all input on BSD-based systems, since + every other suggestion is a workaround. + 3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured + to compensate for a broken ``locale`` library in terms of case-handling; + if you do not need to be able to properly handle non-ASCII characters + then this may be the best option for you. + +Note that the above solutions *should not* be required for Windows or +Linux since in Linux-based systems and Windows systems ``locale`` *should* work +just fine. + Controlling Case When Sorting ----------------------------- @@ -167,20 +179,32 @@ would expect to be "natural" sorting:: Customizing Float Definition ---------------------------- -By default :func:`~natsorted` searches for any float that would be +You can make :func:`~natsorted` search for any float that would be a valid Python float literal, such as 5, 0.4, -4.78, +4.2E-34, etc. -Perhaps you don't want to search for signed numbers, or you don't -want to search for exponential notation, the ``ns.UNSIGNED`` and -``ns.NOEXP`` options allow you to do this:: +using the ``ns.FLOAT`` key. You can disable the exponential component +of the number with ``ns.NOEXP``. :: >>> a = ['a50', 'a51.', 'a+50.4', 'a5.034e1', 'a+50.300'] - >>> natsorted(a) - ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] - >>> natsorted(a, alg=ns.UNSIGNED) + >>> natsorted(a, alg=ns.FLOAT) ['a50', 'a5.034e1', 'a51.', 'a+50.300', 'a+50.4'] - >>> natsorted(a, alg=ns.NOEXP) + >>> natsorted(a, alg=ns.FLOAT | ns.SIGNED) + ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] + >>> natsorted(a, alg=ns.FLOAT | ns.SIGNED | ns.NOEXP) ['a5.034e1', 'a50', 'a+50.300', 'a+50.4', 'a51.'] +For convenience, the ``ns.REAL`` option is provided which is a shortcut +for ``ns.FLOAT | ns.SIGNED`` and can be used to sort on real numbers. +This can be easily accessed with the :func:`~realsorted` convenience +function. Please note that the behavior of the :func:`~realsorted` function +was the default behavior of :func:`~natsorted` for :mod:`natsort` +version < 4.0.0:: + + >>> natsorted(a, alg=ns.REAL) + ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] + >>> from natsort import realsorted + >>> realsorted(a) + ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] + Using a Custom Sorting Key -------------------------- @@ -209,15 +233,10 @@ need to pass a key to the :meth:`list.sort` method. The function >>> from natsort import natsort_keygen >>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300'] - >>> natsort_key = natsort_keygen() + >>> natsort_key = natsort_keygen(alg=ns.FLOAT) >>> a.sort(key=natsort_key) >>> a ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] - >>> versort_key = natsort_keygen(alg=ns.VERSION) - >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1'] - >>> a.sort(key=versort_key) - >>> a - ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] :func:`~natsort_keygen` has the same API as :func:`~natsorted` (minus the `reverse` option). @@ -227,8 +246,8 @@ Sorting Multiple Lists According to a Single List Sometimes you have multiple lists, and you want to sort one of those lists and reorder the other lists according to how the first was sorted. -To achieve this you would use the :func:`~index_natsorted` or -:func:`~index_versorted` in combination with the convenience function +To achieve this you could use the :func:`~index_natsorted` in combination +with the convenience function :func:`~order_by_index`:: >>> from natsort import index_natsorted, order_by_index @@ -297,3 +316,14 @@ If you need a codec different from ASCII or UTF-8, you can use >>> a = [b'a56', b'a5', b'a6', b'a40'] >>> natsorted(a, key=decoder('latin1')) == [b'a5', b'a6', b'a40', b'a56'] True + +Sorting a Pandas DataFrame +-------------------------- + +As of Pandas version 0.16.0, the sorting methods do not accept a ``key`` argument, +so you cannot simply pass :func:`natsort_keygen` to a Pandas DataFrame and sort. +This request has been made to the Pandas devs; see +`issue 3942 <https://github.com/pydata/pandas/issues/3942>`_ if you are interested. +If you need to sort a Pandas DataFrame, please check out +`this answer on StackOverflow <http://stackoverflow.com/a/29582718/1399279>`_ +for ways to do this without the ``key`` argument to ``sort``. diff --git a/docs/source/intro.rst b/docs/source/intro.rst index b79aec9..d454094 100644 --- a/docs/source/intro.rst +++ b/docs/source/intro.rst @@ -47,21 +47,23 @@ or as versions. Using :func:`~natsorted` is simple:: >>> natsorted(a) ['a1', 'a2', 'a4', 'a9', 'a10'] -:func:`~natsorted` identifies real numbers anywhere in a string and sorts them +:func:`~natsorted` identifies numbers anywhere in a string and sorts them naturally. -Sorting version numbers is just as easy with :func:`~versorted`:: +Sorting versions is handled properly by default (as of :mod:`natsort` version >= 4.0.0): + +.. code-block:: python - >>> from natsort import versorted >>> a = ['version-1.9', 'version-2.0', 'version-1.11', 'version-1.10'] - >>> versorted(a) + >>> natsorted(a) ['version-1.9', 'version-1.10', 'version-1.11', 'version-2.0'] - >>> natsorted(a) # natsorted tries to sort as signed floats, so it won't work - ['version-2.0', 'version-1.9', 'version-1.11', 'version-1.10'] + +If you need to sort release candidates, please see :ref:`rc_sorting` for +a useful hack. You can also perform locale-aware sorting (or "human sorting"), where the non-numeric characters are ordered based on their meaning, not on their -ordinal value; this can be achieved with the ``humansorted`` function:: +ordinal value; this can be achieved with the :func:`~humansorted` function:: >>> a = ['Apple', 'Banana', 'apple', 'banana'] >>> natsorted(a) @@ -76,7 +78,20 @@ ordinal value; this can be achieved with the ``humansorted`` function:: You may find you need to explicitly set the locale to get this to work (as shown in the example). Please see :ref:`bug_note` and the Installation section -below before using the ``humansorted`` function. +below before using the :func:`~humansorted` function. + +You can sort signed floats (i.e. real numbers) using the :func:`~realsorted`; +this is useful in scientific data analysis. This was the default behavior of +:func:`~natsorted` for :mod:`natsort` version < 4.0.0. :: + +.. code-block:: python + + >>> from natsort import realsorted + >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] + >>> natsorted(a) + ['num2', 'num5.3', 'num5.10', 'num-3'] + >>> realsorted(a) + ['num-3', 'num2', 'num5.10', 'num5.3'] You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types when you sort:: @@ -143,9 +158,9 @@ If you want to build this documentation, enter:: python setup.py build_sphinx -:mod:`natsort` requires python version 2.6 or greater -(this includes python 3.x). To run version 2.6, 3.0, or 3.1 the -`argparse <https://pypi.python.org/pypi/argparse>`_ module is required. +:mod:`natsort` requires Python version 2.7 or greater or Python 3.3 or greater. +Python 2.6 and 3.2 are no longer officially supported (no unit tests are performed) +but it should work. The most efficient sorting can occur if you install the `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ package (it helps @@ -155,14 +170,32 @@ recommended you include this as a dependency. ``natsort`` will not require (or check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed. On BSD-based systems (this includes Mac OS X), the underlying ``locale`` library -can be buggy (please see http://bugs.python.org/issue23195), so ``natsort`` will use -`PyICU <https://pypi.python.org/pypi/PyICU>`_ under the hood if it is installed -on your computer; this will give more reliable cross-platform results. -``natsort`` will not require (or check) that -`PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed at installation -since in Linux-based systems and Windows systems ``locale`` should work just fine. -Please visit https://github.com/SethMMorton/natsort/issues/21 for more details and -how to install on Mac OS X. +can be buggy (please see http://bugs.python.org/issue23195); ``locale`` is +used for the ``ns.LOCALE`` option and ``humansorted`` function.. To remedy this, +one can + + 1. Use "\*.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\*.UTF-8" + encoding. These encodings do not suffer from as many problems as "UTF-8" + and thus should give expected results. + 2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If + `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort`` + will use it under the hood if it is installed; this will give more + reliable cross-platform results in the long run. ``natsort`` will not + require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ + is installed at installation. Please visit + https://github.com/SethMMorton/natsort/issues/21 for more details and + how to install on Mac OS X. **Please note** that using + `PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to + guarantee correct results for all input on BSD-based systems, since + every other suggestion is a workaround. + 3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured + to compensate for a broken ``locale`` library in terms of case-handling; + if you do not need to be able to properly handle non-ASCII characters + then this may be the best option for you. + +Note that the above solutions *should not* be required for Windows or +Linux since in Linux-based systems and Windows systems ``locale`` *should* work +just fine. :mod:`natsort` comes with a shell script called :mod:`natsort`, or can also be called from the command line with ``python -m natsort``. The command line script is diff --git a/docs/source/natsort_key.rst b/docs/source/natsort_key.rst deleted file mode 100644 index 351b351..0000000 --- a/docs/source/natsort_key.rst +++ /dev/null @@ -1,8 +0,0 @@ -.. default-domain:: py -.. currentmodule:: natsort - -:func:`~natsort.natsort_key` -============================ - -.. autofunction:: natsort_key - diff --git a/docs/source/shell.rst b/docs/source/shell.rst index 65cfc76..78dc3dc 100644 --- a/docs/source/shell.rst +++ b/docs/source/shell.rst @@ -48,20 +48,26 @@ Usage Used to exclude an entry that contains a specific number. -r, --reverse Returns in reversed order. - -t {digit,int,float,version,ver}, --number-type {digit,int,float,version,ver} + -t {digit,int,float,version,ver,real,f,i,r,d}, + --number-type {digit,int,float,version,ver,real,f,i,r,d}, + --number_type {digit,int,float,version,ver,real,f,i,r,d} Choose the type of number to search for. "float" will search for floating-point numbers. "int" will only search for integers. "digit", "version", and "ver" are - shortcuts for "int" with --nosign. + synonyms for "int"."real" is a shortcut for "float" + with --sign. "i" and "d" are synonyms for "int", "f" + is a synonym for "float", and "r" is a synonym for + "real".The default is int. --nosign Do not consider "+" or "-" as part of a number, i.e. - do not take sign into consideration. + do not take sign into consideration. This is the + default. + -s, --sign Consider "+" or "-" as part of a number, i.e. take + sign into consideration. The default is unsigned. --noexp Do not consider an exponential as part of a number, i.e. 1e4, would be considered as 1, "e", and 4, not as 10000. This only effects the --number-type=float. - --locale, -l Causes natsort to use locale-aware sorting. On some - systems, the underlying C library is broken, so if you - get results that you do not expect please install - PyICU and try again. + -l, --locale Causes natsort to use locale-aware sorting. You will + get the best results if you install PyICU. Description ----------- @@ -84,18 +90,18 @@ to bad analysis. To remedy this, use ``natsort``:: mode943.54.out mode1000.35.out mode1243.34.out - $ natsort *.out | xargs your_program + $ natsort -t r *.out | xargs your_program -You can also place natsort in the middle of a pipe:: +``-t r`` is short for ``--number-type real``. You can also place natsort in +the middle of a pipe:: - $ find . -name "*.out" | natsort | xargs your_program + $ find . -name "*.out" | natsort -t r | xargs your_program -To sort version numbers, use the ``--number-type version`` option -(or ``-t ver`` for short):: +To sort version numbers, use the default ``--number-type``:: $ ls * prog-1.10.zip prog-1.9.zip prog-2.0.zip - $ natsort -t ver * + $ natsort * prog-1.9.zip prog-1.10.zip prog-2.0.zip @@ -106,13 +112,13 @@ options. These three options are used as follows:: $ ls *.out mode1000.35.out mode1243.34.out mode744.43.out mode943.54.out - $ natsort *.out -f 900 1100 # Select only numbers between 900-1100 + $ natsort -t r *.out -f 900 1100 # Select only numbers between 900-1100 mode943.54.out mode1000.35.out - $ natsort *.out -F 900 1100 # Select only numbers NOT between 900-1100 + $ natsort -t r *.out -F 900 1100 # Select only numbers NOT between 900-1100 mode744.43.out mode1243.34.out - $ natsort *.out -e 1000.35 # Exclude 1000.35 from search + $ natsort -t r *.out -e 1000.35 # Exclude 1000.35 from search mode744.43.out mode943.54.out mode1243.34.out diff --git a/natsort/__main__.py b/natsort/__main__.py index 85edba3..e86097d 100644 --- a/natsort/__main__.py +++ b/natsort/__main__.py @@ -51,22 +51,31 @@ def main(): help='Returns in reversed order.') parser.add_argument( '-t', '--number-type', '--number_type', dest='number_type', - choices=('digit', 'int', 'float', 'version', 'ver'), default='float', + choices=('digit', 'int', 'float', 'version', 'ver', + 'real', 'f', 'i', 'r', 'd'), + default='int', help='Choose the type of number to search for. "float" will search ' 'for floating-point numbers. "int" will only search for ' - 'integers. "digit", "version", and "ver" are shortcuts for "int" ' - 'with --nosign.') + 'integers. "digit", "version", and "ver" are synonyms for "int".' + '"real" is a shortcut for "float" with --sign. ' + '"i" and "d" are synonyms for "int", "f" is a synonym for ' + '"float", and "r" is a synonym for "real".' + 'The default is %(default)s.') parser.add_argument( - '--nosign', default=True, action='store_false', dest='signed', + '--nosign', default=False, action='store_false', dest='signed', help='Do not consider "+" or "-" as part of a number, i.e. do not ' - 'take sign into consideration.') + 'take sign into consideration. This is the default.') + parser.add_argument( + '-s', '--sign', default=False, action='store_true', dest='signed', + help='Consider "+" or "-" as part of a number, i.e. ' + 'take sign into consideration. The default is unsigned.') parser.add_argument( '--noexp', default=True, action='store_false', dest='exp', help='Do not consider an exponential as part of a number, i.e. 1e4, ' 'would be considered as 1, "e", and 4, not as 10000. This only ' 'effects the --number-type=float.') parser.add_argument( - '--locale', '-l', action='store_true', default=False, + '-l', '--locale', action='store_true', default=False, help='Causes natsort to use locale-aware sorting. You will get the ' 'best results if you install PyICU.') parser.add_argument( @@ -143,14 +152,10 @@ def sort_and_print_entries(entries, args): """Sort the entries, applying the filters first if necessary.""" # Extract the proper number type. - num_type = {'digit': None, - 'version': None, - 'ver': None, - 'int': int, - 'float': float}[args.number_type] - unsigned = not args.signed or num_type is None - alg = (ns.INT * int(num_type in (int, None)) | - ns.UNSIGNED * unsigned | + is_float = args.number_type in ('float', 'real', 'f', 'r') + signed = args.signed or args.number_type in ('real', 'r') + alg = (ns.FLOAT * is_float | + ns.SIGNED * signed | ns.NOEXP * (not args.exp) | ns.PATH * args.paths | ns.LOCALE * args.locale) @@ -160,8 +165,8 @@ def sort_and_print_entries(entries, args): # as for sorting. do_filter = args.filter is not None or args.reverse_filter is not None if do_filter or args.exclude: - inp_options = (ns.INT * int(num_type in (int, None)) | - ns.UNSIGNED * unsigned | + inp_options = (ns.FLOAT * is_float | + ns.SIGNED * signed | ns.NOEXP * (not args.exp), '.' ) diff --git a/natsort/_version.py b/natsort/_version.py index eea91d6..cc26564 100644 --- a/natsort/_version.py +++ b/natsort/_version.py @@ -2,4 +2,4 @@ from __future__ import (print_function, division, unicode_literals, absolute_import) -__version__ = '3.5.6' +__version__ = '4.0.0' diff --git a/natsort/fake_fastnumbers.py b/natsort/fake_fastnumbers.py index 116bab1..e934313 100644 --- a/natsort/fake_fastnumbers.py +++ b/natsort/fake_fastnumbers.py @@ -8,24 +8,49 @@ from __future__ import (print_function, division, unicode_literals, absolute_import) # Std. lib imports. +import sys import re +import unicodedata +float_re = re.compile(r'[-+]?(\d*\.?\d+(?:[eE][-+]?\d+)?|inf(?:inity)?|nan)$') +if sys.version[0] == '2': + int_re = re.compile(r'[-+]?\d+[lL]?$') +else: + int_re = re.compile(r'[-+]?\d+$') + long = int + unicode = str -float_re = re.compile(r'[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?$') -int_re = re.compile(r'[-+]?\d+$') - -def fast_float(x, regex_matcher=float_re.match): +def fast_float(x, regex_matcher=float_re.match, uni=unicodedata.numeric): """Convert a string to a float quickly""" - return float(x) if regex_matcher(x) else x + if type(x) in (int, long, float): + return float(x) + elif regex_matcher(x): + return float(x) + elif type(x) == unicode and len(x) == 1 and uni(x, None) is not None: + return uni(x) + else: + return x -def fast_int(x, regex_matcher=int_re.match): +def fast_int(x, regex_matcher=int_re.match, uni=unicodedata.digit): """\ Convert a string to a int quickly, return input as-is if not possible. """ - return int(x) if regex_matcher(x) else x + if type(x) in (int, long, float): + return int(x) + elif regex_matcher(x): + return int(x.rstrip('Ll')) + elif type(x) == unicode and len(x) == 1 and uni(x, None) is not None: + return uni(x) + else: + return x + + +def isfloat(x, num_only=False): + """Returns true if the input is a float, false otherwise.""" + return type(x) == float -def isreal(x, ntypes=set([int, float])): - """Returns true if the input is a real number, false otherwise.""" - return type(x) in ntypes +def isint(x, num_only=False): + """Returns true if the input is an int, false otherwise.""" + return type(x) in set([int, long]) diff --git a/natsort/locale_help.py b/natsort/locale_help.py index 9a5b656..789b50b 100644 --- a/natsort/locale_help.py +++ b/natsort/locale_help.py @@ -13,21 +13,14 @@ from itertools import chain from locale import localeconv # Local imports. -from natsort.py23compat import py23_zip - -# If the user has fastnumbers installed, they will get great speed -# benefits. If not, we simulate the functions here. -try: - from fastnumbers import isreal -except ImportError: - from natsort.fake_fastnumbers import isreal +from natsort.py23compat import PY_VERSION # We need cmp_to_key for Python2 because strxfrm is broken for unicode. -if sys.version[:3] == '2.7': +try: from functools import cmp_to_key # cmp_to_key was not created till 2.7. -elif sys.version[:3] == '2.6': - def cmp_to_key(mycmp): # pragma: no cover +except ImportError: # pragma: no cover + def cmp_to_key(mycmp): """Convert a cmp= function into a key= function""" class K(object): __slots__ = ['obj'] @@ -78,6 +71,9 @@ try: return _d[l] use_pyicu = True null_string = b'' + + def dumb_sort(): + return False except ImportError: if sys.version[0] == '2': from locale import strcoll @@ -88,10 +84,23 @@ except ImportError: null_string = '' use_pyicu = False + # On some systems, locale is broken and does not sort in the expected + # order. We will try to detect this and compensate. + def dumb_sort(): + return strxfrm('A') < strxfrm('a') + + +if PY_VERSION >= 3.3: + def _low(x): + return x.casefold() +else: + def _low(x): + return x.lower() + def groupletters(x): """Double all characters, making doubled letters lowercase.""" - return ''.join(chain(*py23_zip(x.lower(), x))) + return ''.join(chain.from_iterable([_low(y), y] for y in x)) def grouper(val, func): @@ -102,8 +111,8 @@ def grouper(val, func): """ # Return the number or transformed string. # If the input is identical to the output, then no conversion happened. - s = func(val) - return groupletters(s) if val is s else s + s = func[0](val) + return groupletters(s) if not func[1](s) else s def locale_convert(val, func, group): @@ -119,7 +128,7 @@ def locale_convert(val, func, group): s = val.replace(radix, '.') if radix != '.' else val # Perform the conversion - t = func(s) + t = func[0](s) # Return the number or transformed string. # If the input is identical to the output, then no conversion happened. @@ -129,12 +138,12 @@ def locale_convert(val, func, group): if group: if use_pyicu: xfrm = get_pyicu_transform(getlocale()) - return xfrm(groupletters(val)) if not isreal(t) else t + return xfrm(groupletters(val)) if not func[1](t) else t else: - return strxfrm(groupletters(val)) if not isreal(t) else t + return strxfrm(groupletters(val)) if not func[1](t) else t else: if use_pyicu: xfrm = get_pyicu_transform(getlocale()) - return xfrm(val) if not isreal(t) else t + return xfrm(val) if not func[1](t) else t else: - return strxfrm(val) if not isreal(t) else t + return strxfrm(val) if not func[1](t) else t diff --git a/natsort/natsort.py b/natsort/natsort.py index 8fb6754..78c0c24 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -122,135 +122,15 @@ def as_utf8(s): return _do_decoding(s, 'utf-8') -@u_format -def natsort_key(val, key=None, number_type=float, signed=None, exp=None, - as_path=None, py3_safe=None, alg=0): - """\ - Key to sort strings and numbers naturally. - - Key to sort strings and numbers naturally, not lexicographically. - It is designed for use in passing to the 'sorted' builtin or - 'sort' attribute of lists. - - .. note:: Deprecated since version 3.4.0. - This function remains in the publicly exposed API for - backwards-compatibility reasons, but future development - should use the newer `natsort_keygen` function. It is - planned to remove this from the public API in natsort - version 4.0.0. A DeprecationWarning will be raised - via the warnings module; set warnings.simplefilter("always") - to raise them to see if your code will work in version - 4.0.0. - - Parameters - ---------- - val : {{str, unicode}} - The value used by the sorting algorithm - - key : callable, optional - A key used to manipulate the input value before parsing for - numbers. It is **not** applied recursively. - It should accept a single argument and return a single value. - - number_type : {{None, float, int}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - - signed : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - - exp : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - - as_path : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - - py3_safe : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - - alg : ns enum, optional - This option is used to control which algorithm `natsort` - uses when sorting. For details into these options, please see - the :class:`ns` class documentation. The default is `ns.FLOAT`. - - Returns - ------- - out : tuple - The modified value with numbers extracted. - - See Also - -------- - natsort_keygen : Generates a properly wrapped `natsort_key`. - - Examples - -------- - Using natsort_key is just like any other sorting key in python:: - - >>> a = ['num3', 'num5', 'num2'] - >>> a.sort(key=natsort_key) - >>> a - [{u}'num2', {u}'num3', {u}'num5'] - - It works by separating out the numbers from the strings:: - - >>> natsort_key('num2') - ({u}'num', 2.0) - - If you need to call natsort_key with the number_type argument, or get a - special attribute or item of each element of the sequence, please use - the `natsort_keygen` function. Actually, please just use the - `natsort_keygen` function. - - Notes - ----- - Iterables are parsed recursively so you can sort lists of lists:: - - >>> natsort_key(('a1', 'a10')) - (({u}'a', 1.0), ({u}'a', 10.0)) - - Strings that lead with a number get an empty string at the front of the - tuple. This is designed to get around the "unorderable types" issue of - Python3:: - - >>> natsort_key('15a') - ({u}'', 15.0, {u}'a') - - You can give bare numbers, too:: - - >>> natsort_key(10) - ({u}'', 10) - - If you have a case where one of your string has two numbers in a row, - you can turn on the "py3_safe" option to try to add a "" between sets - of two numbers:: - - >>> natsort_key('43h7+3', py3_safe=True) - ({u}'', 43.0, {u}'h', 7.0, {u}'', 3.0) - - """ +def natsort_key(val, key=None, alg=0, **_kwargs): + """Undocumented, kept for backwards-compatibility.""" msg = "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" warn(msg, DeprecationWarning) - alg = _args_to_enum(number_type, signed, exp, as_path, py3_safe) | alg - return _natsort_key(val, key, alg) + return _natsort_key(val, key, _args_to_enum(**_kwargs) | alg) @u_format -def natsort_keygen(key=None, number_type=float, signed=None, exp=None, - as_path=None, py3_safe=None, alg=0): +def natsort_keygen(key=None, alg=0, **_kwargs): """\ Generate a key to sort strings and numbers naturally. @@ -269,40 +149,10 @@ def natsort_keygen(key=None, number_type=float, signed=None, exp=None, numbers. It is **not** applied recursively. It should accept a single argument and return a single value. - number_type : {{None, float, int}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - - signed : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - - exp : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - - as_path : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - - py3_safe : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see - the :class:`ns` class documentation. The default is `ns.FLOAT`. + the :class:`ns` class documentation. The default is `ns.INT`. Returns ------- @@ -311,6 +161,10 @@ def natsort_keygen(key=None, number_type=float, signed=None, exp=None, suitable for passing as the `key` argument to functions such as `sorted`. + See Also + -------- + natsorted + Examples -------- `natsort_keygen` is a convenient way to create a custom key @@ -318,32 +172,16 @@ def natsort_keygen(key=None, number_type=float, signed=None, exp=None, will return a plain `natsort_key` instance:: >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] - >>> b = a[:] - >>> a.sort(key=natsort_key) - >>> b.sort(key=natsort_keygen()) - >>> a == b - True - - The power of `natsort_keygen` is when you want to want to pass - arguments to the `natsort_key`. Consider the following - equivalent examples; which is more clear? :: - - >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] - >>> b = a[:] - >>> a.sort(key=lambda x: natsort_key(x, key=lambda y: y.upper(), - ... signed=False)) - >>> b.sort(key=natsort_keygen(key=lambda x: x.upper(), signed=False)) - >>> a == b - True + >>> a.sort(key=natsort_keygen(alg=ns.REAL)) + >>> a + [{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3'] """ - alg = _args_to_enum(number_type, signed, exp, as_path, py3_safe) | alg - return partial(_natsort_key, key=key, alg=alg) + return partial(_natsort_key, key=key, alg=_args_to_enum(**_kwargs) | alg) @u_format -def natsorted(seq, key=None, number_type=float, signed=None, exp=None, - reverse=False, as_path=None, alg=0): +def natsorted(seq, key=None, reverse=False, alg=0, **_kwargs): """\ Sorts a sequence naturally. @@ -361,38 +199,14 @@ def natsorted(seq, key=None, number_type=float, signed=None, exp=None, It is **not** applied recursively. It should accept a single argument and return a single value. - number_type : {{None, float, int}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - - signed : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - - exp : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. - as_path : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see - the :class:`ns` class documentation. The default is `ns.FLOAT`. + the :class:`ns` class documentation. The default is `ns.INT`. Returns ------- @@ -402,8 +216,7 @@ def natsorted(seq, key=None, number_type=float, signed=None, exp=None, See Also -------- natsort_keygen : Generates the key that makes natural sorting possible. - versorted : A wrapper for ``natsorted(seq, alg=ns.VERSION)``. - realsorted : Identical to ``natsorted(seq)``; for forwards-compatibility. + realsorted : A wrapper for ``natsorted(seq, alg=ns.REAL)``. humansorted : A wrapper for ``natsorted(seq, alg=ns.LOCALE)``. index_natsorted : Returns the sorted indexes from `natsorted`. @@ -416,10 +229,9 @@ def natsorted(seq, key=None, number_type=float, signed=None, exp=None, [{u}'num2', {u}'num3', {u}'num5'] """ - alg = _args_to_enum(number_type, signed, exp, as_path, None) | alg + alg = _args_to_enum(**_kwargs) | alg try: - return sorted(seq, reverse=reverse, - key=natsort_keygen(key, alg=alg)) + return sorted(seq, reverse=reverse, key=natsort_keygen(key, alg=alg)) except TypeError as e: # pragma: no cover # In the event of an unresolved "unorderable types" error # for string to number type comparisons (not str/bytes), @@ -435,58 +247,21 @@ def natsorted(seq, key=None, number_type=float, signed=None, exp=None, @u_format -def versorted(seq, key=None, reverse=False, as_path=None, alg=0): +def versorted(seq, key=None, reverse=False, alg=0, **_kwargs): """\ - Convenience function to sort version numbers. - - Convenience function to sort version numbers. This is a wrapper - around ``natsorted(seq, alg=ns.VERSION)``. - - Parameters - ---------- - seq : iterable - The sequence to sort. - - key : callable, optional - A key used to determine how to sort each element of the sequence. - It is **not** applied recursively. - It should accept a single argument and return a single value. - - reverse : {{True, False}}, optional - Return the list in reversed sorted order. The default is - `False`. - - as_path : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. + Identical to :func:`natsorted`. - alg : ns enum, optional - This option is used to control which algorithm `natsort` - uses when sorting. For details into these options, please see - the :class:`ns` class documentation. The default is `ns.VERSION`. + This function exists for backwards compatibility with `natsort` + version < 4.0.0. Future development should use :func:`natsorted`. - Returns - ------- - out : list - The sorted sequence. + Please see the :func:`natsorted` documentation for use. See Also -------- - index_versorted : Returns the sorted indexes from `versorted`. - - Examples - -------- - Use `versorted` just like the builtin `sorted`:: - - >>> a = ['num4.0.2', 'num3.4.1', 'num3.4.2'] - >>> versorted(a) - [{u}'num3.4.1', {u}'num3.4.2', {u}'num4.0.2'] + natsorted """ - alg = _args_to_enum(float, None, None, as_path, None) | alg - return natsorted(seq, key, reverse=reverse, alg=alg | ns.VERSION) + return natsorted(seq, key, reverse, alg, **_kwargs) @u_format @@ -502,7 +277,8 @@ def humansorted(seq, key=None, reverse=False, alg=0): C library that Python's locale module uses is broken. On these systems it is recommended that you install `PyICU <https://pypi.python.org/pypi/PyICU>`_ - if you wish to use ``humansorted``. If you are on + if you wish to use ``humansorted``, especially if you need + to handle non-ASCII characters. If you are on one of systems and get unexpected results, please try using `PyICU <https://pypi.python.org/pypi/PyICU>`_ before filing a bug report to `natsort`. @@ -538,10 +314,11 @@ def humansorted(seq, key=None, reverse=False, alg=0): Notes ----- You may find that if you do not explicitly set - the locale your results may not be as you expect... I have found that - it depends on the system you are on. To do this is straightforward - (in the below example I use 'en_US.UTF-8', but you should use your - locale):: + the locale your results may not be as you expect, although + as of ``natsort`` version 4.0.0 the sorting algorithm has been + updated to account for a buggy ``locale`` installation. + In the below example 'en_US.UTF-8' is used, but you should use your + locale:: >>> import locale >>> # The 'str' call is only to get around a bug on Python 2.x @@ -552,7 +329,7 @@ def humansorted(seq, key=None, reverse=False, alg=0): It is preferred that you do this before importing `natsort`. If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning - above) then you should not need to do this. + above) then you should not need to do explicitly set a locale. Examples -------- @@ -565,20 +342,21 @@ def humansorted(seq, key=None, reverse=False, alg=0): [{u}'apple', {u}'Apple', {u}'banana', {u}'Banana'] """ - return natsorted(seq, key, reverse=reverse, alg=alg | ns.LOCALE) + return natsorted(seq, key, reverse, alg | ns.LOCALE) @u_format def realsorted(seq, key=None, reverse=False, alg=0): """\ - Identical to :func:`natsorted`. + Convenience function to properly sort signed floats. + + Convenience function to properly sort signed floats within + strings (i.e. "a-5.7"). This is a wrapper around + ``natsorted(seq, alg=ns.REAL)``. - This is provided for forward-compatibility with :mod:`natsort` - version >= 4.0.0. If you are relying on the default sorting - behavior of :func:`natsorted` to sort by signed floats, - you should consider using this function as the default sorting - behavior of :func:`natsorted` will changed to unsigned - integers in :mod:`natsort` version >= 4.0.0. + The behavior of :func:`realsorted` for `natsort` version >= 4.0.0 + was the default behavior of :func:`natsorted` for `natsort` + version < 4.0.0. Parameters ---------- @@ -597,7 +375,7 @@ def realsorted(seq, key=None, reverse=False, alg=0): alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see - the :class:`ns` class documentation. The default is `ns.FLOAT`. + the :class:`ns` class documentation. The default is `ns.REAL`. Returns ------- @@ -613,16 +391,17 @@ def realsorted(seq, key=None, reverse=False, alg=0): Use `realsorted` just like the builtin `sorted`:: >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] + >>> natsorted(a) + [{u}'num2', {u}'num5.3', {u}'num5.10', {u}'num-3'] >>> realsorted(a) [{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3'] """ - return natsorted(seq, key=key, reverse=reverse, alg=alg) + return natsorted(seq, key, reverse, alg | ns.REAL) @u_format -def index_natsorted(seq, key=None, number_type=float, signed=None, exp=None, - reverse=False, as_path=None, alg=0): +def index_natsorted(seq, key=None, reverse=False, alg=0, **_kwargs): """\ Return the list of the indexes used to sort the input sequence. @@ -641,38 +420,14 @@ def index_natsorted(seq, key=None, number_type=float, signed=None, exp=None, It is **not** applied recursively. It should accept a single argument and return a single value. - number_type : {{None, float, int}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - - signed : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - - exp : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - reverse : {{True, False}}, optional Return the list in reversed sorted order. The default is `False`. - as_path : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. - alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see - the :class:`ns` class documentation. The default is `ns.FLOAT`. + the :class:`ns` class documentation. The default is `ns.INT`. Returns ------- @@ -702,7 +457,7 @@ def index_natsorted(seq, key=None, number_type=float, signed=None, exp=None, [{u}'baz', {u}'foo', {u}'bar'] """ - alg = _args_to_enum(number_type, signed, exp, as_path, None) | alg + alg = _args_to_enum(**_kwargs) | alg if key is None: newkey = itemgetter(1) else: @@ -727,64 +482,22 @@ def index_natsorted(seq, key=None, number_type=float, signed=None, exp=None, @u_format -def index_versorted(seq, key=None, reverse=False, as_path=None, alg=0): +def index_versorted(seq, key=None, reverse=False, alg=0, **_kwargs): """\ - Return the list of the indexes used to sort the input sequence - of version numbers. - - Sorts a sequence of version, but returns a list of sorted the - indexes and not the sorted list. This list of indexes can be - used to sort multiple lists by the sorted order of the given - sequence. - - This is a wrapper around ``index_natsorted(seq, number_type=None)``. - - Parameters - ---------- - seq: iterable - The sequence to sort. - - key: callable, optional - A key used to determine how to sort each element of the sequence. - It is **not** applied recursively. - It should accept a single argument and return a single value. - - reverse : {{True, False}}, optional - Return the list in reversed sorted order. The default is - `False`. + Identical to :func:`index_natsorted`. - as_path : {{True, False}}, optional - Deprecated as of version 3.5.0 and will become an undocumented - keyword-only argument in 4.0.0. Please use the `alg` argument - for all future development. See :class:`ns` class documentation for - details. + This function exists for backwards compatibility with + ``index_natsort`` version < 4.0.0. Future development should use + :func:`index_natsorted`. - alg : ns enum, optional - This option is used to control which algorithm `natsort` - uses when sorting. For details into these options, please see - the :class:`ns` class documentation. The default is `ns.VERSION`. - - Returns - ------- - out : tuple - The ordered indexes of the sequence. + Please see the :func:`index_natsorted` documentation for use. See Also -------- - versorted - order_by_index - - Examples - -------- - Use `index_versorted` just like the builtin `sorted`:: - - >>> a = ['num4.0.2', 'num3.4.1', 'num3.4.2'] - >>> index_versorted(a) - [1, 2, 0] + index_natsorted """ - alg = _args_to_enum(float, None, None, as_path, None) | alg - return index_natsorted(seq, key, reverse=reverse, alg=alg | ns.VERSION) + return index_natsorted(seq, key, reverse, alg, **_kwargs) @u_format @@ -799,6 +512,8 @@ def index_humansorted(seq, key=None, reverse=False, alg=0): of the given sequence. This is a wrapper around ``index_natsorted(seq, alg=ns.LOCALE)``. + Please see the ``humansorted`` documentation for caveats of + using ``index_humansorted``. Parameters ---------- @@ -832,10 +547,11 @@ def index_humansorted(seq, key=None, reverse=False, alg=0): Notes ----- You may find that if you do not explicitly set - the locale your results may not be as you expect... I have found that - it depends on the system you are on. To do this is straightforward - (in the below example I use 'en_US.UTF-8', but you should use your - locale):: + the locale your results may not be as you expect, although + as of ``natsort`` version 4.0.0 the sorting algorithm has been + updated to account for a buggy ``locale`` installation. + In the below example 'en_US.UTF-8' is used, but you should use your + locale:: >>> import locale >>> # The 'str' call is only to get around a bug on Python 2.x @@ -846,7 +562,7 @@ def index_humansorted(seq, key=None, reverse=False, alg=0): It is preferred that you do this before importing `natsort`. If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning - above) then you should not need to do this. + above) then you should not need to explicitly set a locale. Examples -------- @@ -857,20 +573,25 @@ def index_humansorted(seq, key=None, reverse=False, alg=0): [2, 0, 3, 1] """ - return index_natsorted(seq, key, reverse=reverse, alg=alg | ns.LOCALE) + return index_natsorted(seq, key, reverse, alg | ns.LOCALE) @u_format def index_realsorted(seq, key=None, reverse=False, alg=0): """\ - Identical to :func:`index_natsorted`. + Return the list of the indexes used to sort the input sequence + in a locale-aware manner. + + Sorts a sequence in a locale-aware manner, but returns a list + of sorted the indexes and not the sorted list. This list of + indexes can be used to sort multiple lists by the sorted order + of the given sequence. + + This is a wrapper around ``index_natsorted(seq, alg=ns.REAL)``. - This is provided for forward-compatibility with :mod:`natsort` - version >= 4.0.0. If you are relying on the default sorting - behavior of :func:`index_natsorted` to sort by signed floats, - you should consider using this function as the default sorting - behavior of :func:`index_natsorted` will changed to unsigned - integers in :mod:`natsort` version >= 4.0.0. + The behavior of :func:`index_realsorted` in `natsort` version >= 4.0.0 + was the default behavior of :func:`index_natsorted` for `natsort` + version < 4.0.0. Parameters ---------- @@ -889,7 +610,7 @@ def index_realsorted(seq, key=None, reverse=False, alg=0): alg : ns enum, optional This option is used to control which algorithm `natsort` uses when sorting. For details into these options, please see - the :class:`ns` class documentation. + the :class:`ns` class documentation. The default is `ns.REAL`. Returns ------- @@ -910,7 +631,7 @@ def index_realsorted(seq, key=None, reverse=False, alg=0): [1, 3, 0, 2] """ - return index_natsorted(seq, key=key, reverse=reverse, alg=alg) + return index_natsorted(seq, key, reverse, alg | ns.REAL) @u_format diff --git a/natsort/ns_enum.py b/natsort/ns_enum.py index f568382..8b9d794 100644 --- a/natsort/ns_enum.py +++ b/natsort/ns_enum.py @@ -20,40 +20,51 @@ class ns(object): C library that Python's locale module uses is broken. On these systems it is recommended that you install `PyICU <https://pypi.python.org/pypi/PyICU>`_ - if you wish to use ``LOCALE``. If you are on one of + if you wish to use ``LOCALE``, especially if you need + to handle non-ASCII characters. If you are on one of systems and get unexpected results, please try using `PyICU <https://pypi.python.org/pypi/PyICU>`_ before filing a bug report to ``natsort``. Attributes ---------- + INT, I (default) + The default - parse numbers as integers. FLOAT, F - The default - parse numbers as floats. - INT, I - Tell `natsort` to parse numbers as ints. - UNSIGNED, U - Tell `natsort` to ignore any sign (i.e. "-" or "+") to the - immediate left of a number. It is the same as setting the old - `signed` option to `False`. + Tell `natsort` to parse numbers as floats. + UNSIGNED, U (default) + Tell `natsort` to ignore any sign (i.e. "-" or "+") to the immediate + left of a number. It is the same as setting the old `signed` option + to `False`. This is the default. + SIGNED, S + Tell `natsort` to take into account any sign (i.e. "-" or "+") + to the immediate left of a number. It is the same as setting + the old `signed` option to `True`. VERSION, V This is a shortcut for ``ns.INT | ns.UNSIGNED``, which is useful when attempting to sort version numbers. It is the same as - setting the old `number_type` option to `None`. + setting the old `number_type` option to `None`. Since + ``ns.INT | ns.UNSIGNED`` is default, this is is + unnecessary. DIGIT, D Same as `VERSION` above. + REAL, R + This is a shortcut for ``ns.FLOAT | ns.SIGNED``, which is useful + when attempting to sort real numbers. NOEXP, N Tell `natsort` to not search for exponents as part of the number. For example, with `NOEXP` the number "5.6E5" would be interpreted - as `5.6`, `"E"`, and `5`. It is the same as setting the old `exp` - option to `False`. + as `5.6`, `"E"`, and `5`. It is the same as setting the old + `exp` option to `False`. PATH, P Tell `natsort` to interpret strings as filesystem paths, so they will be split according to the filesystem separator (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the file extension, if any. Without this, lists of file paths like - ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted - properly; 'Folder/' will be placed at the end, not at the front. - It is the same as setting the old `as_path` option to `True`. + ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be + sorted properly; 'Folder/' will be placed at the end, not at the + front. It is the same as setting the old `as_path` option to + `True`. LOCALE, L Tell `natsort` to be locale-aware when sorting strings (everything that was not converted to a number). Your sorting results will vary @@ -72,7 +83,11 @@ class ns(object): ``['apple', 'banana', 'Apple', 'Banana']`` (the default order would be ``['Apple', 'Banana', 'apple', 'banana']`` which is the order from a purely ordinal sort). - Useless when used with `IGNORECASE`. + Useless when used with `IGNORECASE`. Please note that if used + with ``LOCALE``, this actually has the reverse effect and will + put uppercase first (this is because ``LOCALE`` already puts + lowercase first); you may use this to your advantage if you + need to modify the order returned with ``LOCALE``. GROUPLETTERS, G Tell `natsort` to group lowercase and uppercase letters together when sorting. For example, @@ -90,9 +105,8 @@ class ns(object): TYPESAFE, T Try hard to avoid "unorderable types" error on Python 3. It is the same as setting the old `py3_safe` option to `True`. - This is only needed if not using ``UNSIGNED`` or if - sorting by ``FLOAT``. - You shouldn't need to use this unless you are using + This is only needed if using ``SIGNED`` or if sorting by + ``FLOAT``. You shouldn't need to use this unless you are using ``natsort_keygen``. *NOTE:* It cannot resolve the ``TypeError`` from trying to compare `str` and `bytes`. @@ -120,11 +134,14 @@ class ns(object): # Sort algorithm "enum" values. -_ns = {'FLOAT': 0, 'F': 0, - 'INT': 1, 'I': 1, - 'UNSIGNED': 2, 'U': 2, - 'VERSION': 3, 'V': 3, # Shortcut for INT | UNSIGNED - 'DIGIT': 3, 'D': 3, # Shortcut for INT | UNSIGNED +_ns = { + 'INT': 0, 'I': 0, + 'FLOAT': 1, 'F': 1, + 'UNSIGNED': 0, 'U': 0, + 'SIGNED': 2, 'S': 2, + 'VERSION': 0, 'V': 0, # Shortcut for INT | UNSIGNED + 'DIGIT': 0, 'D': 0, # Shortcut for INT | UNSIGNED + 'REAL': 3, 'R': 3, # Shortcut for FLOAT | SIGNED 'NOEXP': 4, 'N': 4, 'PATH': 8, 'P': 8, 'LOCALE': 16, 'L': 16, diff --git a/natsort/py23compat.py b/natsort/py23compat.py index 3f3fb92..3c9f88b 100644 --- a/natsort/py23compat.py +++ b/natsort/py23compat.py @@ -9,6 +9,9 @@ import sys # python2 and python3. This code is pretty much lifted from the iPython # project's py3compat.py file. Credit to the iPython devs. +# Numeric form of version +PY_VERSION = float(sys.version[:3]) + # Assume all strings are Unicode in Python 2 py23_str = str if sys.version[0] == '3' else unicode @@ -18,6 +21,9 @@ py23_range = range if sys.version[0] == '3' else xrange # Uniform base string type py23_basestring = str if sys.version[0] == '3' else basestring +# unichr function +py23_unichr = chr if sys.version[0] == '3' else unichr + # zip as an iterator if sys.version[0] == '3': py23_zip = zip diff --git a/natsort/unicode_numbers.py b/natsort/unicode_numbers.py new file mode 100644 index 0000000..a0e8359 --- /dev/null +++ b/natsort/unicode_numbers.py @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- +""" +Contains all possible non-ASCII unicode numbers. + +""" + +from __future__ import (print_function, division, + unicode_literals, absolute_import) + +# Std. lib imports. +import unicodedata + +# Local imports. +from natsort.py23compat import py23_unichr + + +# Rather than determine this on the fly, which would incur a startup +# runtime penalty, the hex values of the Unicode numeric characters +# are hard-coded below. +numeric_hex = [ + 0XB2, 0XB3, 0XB9, 0XBC, 0XBD, 0XBE, 0X660, 0X661, 0X662, 0X663, 0X664, + 0X665, 0X666, 0X667, 0X668, 0X669, 0X6F0, 0X6F1, 0X6F2, 0X6F3, 0X6F4, + 0X6F5, 0X6F6, 0X6F7, 0X6F8, 0X6F9, 0X7C0, 0X7C1, 0X7C2, 0X7C3, 0X7C4, + 0X7C5, 0X7C6, 0X7C7, 0X7C8, 0X7C9, 0X966, 0X967, 0X968, 0X969, 0X96A, + 0X96B, 0X96C, 0X96D, 0X96E, 0X96F, 0X9E6, 0X9E7, 0X9E8, 0X9E9, 0X9EA, + 0X9EB, 0X9EC, 0X9ED, 0X9EE, 0X9EF, 0X9F4, 0X9F5, 0X9F6, 0X9F7, 0X9F8, + 0X9F9, 0XA66, 0XA67, 0XA68, 0XA69, 0XA6A, 0XA6B, 0XA6C, 0XA6D, 0XA6E, + 0XA6F, 0XAE6, 0XAE7, 0XAE8, 0XAE9, 0XAEA, 0XAEB, 0XAEC, 0XAED, 0XAEE, + 0XAEF, 0XB66, 0XB67, 0XB68, 0XB69, 0XB6A, 0XB6B, 0XB6C, 0XB6D, 0XB6E, + 0XB6F, 0XB72, 0XB73, 0XB74, 0XB75, 0XB76, 0XB77, 0XBE6, 0XBE7, 0XBE8, + 0XBE9, 0XBEA, 0XBEB, 0XBEC, 0XBED, 0XBEE, 0XBEF, 0XBF0, 0XBF1, 0XBF2, + 0XC66, 0XC67, 0XC68, 0XC69, 0XC6A, 0XC6B, 0XC6C, 0XC6D, 0XC6E, 0XC6F, + 0XC78, 0XC79, 0XC7A, 0XC7B, 0XC7C, 0XC7D, 0XC7E, 0XCE6, 0XCE7, 0XCE8, + 0XCE9, 0XCEA, 0XCEB, 0XCEC, 0XCED, 0XCEE, 0XCEF, 0XD66, 0XD67, 0XD68, + 0XD69, 0XD6A, 0XD6B, 0XD6C, 0XD6D, 0XD6E, 0XD6F, 0XD70, 0XD71, 0XD72, + 0XD73, 0XD74, 0XD75, 0XE50, 0XE51, 0XE52, 0XE53, 0XE54, 0XE55, 0XE56, + 0XE57, 0XE58, 0XE59, 0XED0, 0XED1, 0XED2, 0XED3, 0XED4, 0XED5, 0XED6, + 0XED7, 0XED8, 0XED9, 0XF20, 0XF21, 0XF22, 0XF23, 0XF24, 0XF25, 0XF26, + 0XF27, 0XF28, 0XF29, 0XF2A, 0XF2B, 0XF2C, 0XF2D, 0XF2E, 0XF2F, 0XF30, + 0XF31, 0XF32, 0XF33, 0X1040, 0X1041, 0X1042, 0X1043, 0X1044, 0X1045, + 0X1046, 0X1047, 0X1048, 0X1049, 0X1090, 0X1091, 0X1092, 0X1093, 0X1094, + 0X1095, 0X1096, 0X1097, 0X1098, 0X1099, 0X1369, 0X136A, 0X136B, 0X136C, + 0X136D, 0X136E, 0X136F, 0X1370, 0X1371, 0X1372, 0X1373, 0X1374, 0X1375, + 0X1376, 0X1377, 0X1378, 0X1379, 0X137A, 0X137B, 0X137C, 0X16EE, 0X16EF, + 0X16F0, 0X17E0, 0X17E1, 0X17E2, 0X17E3, 0X17E4, 0X17E5, 0X17E6, 0X17E7, + 0X17E8, 0X17E9, 0X17F0, 0X17F1, 0X17F2, 0X17F3, 0X17F4, 0X17F5, 0X17F6, + 0X17F7, 0X17F8, 0X17F9, 0X1810, 0X1811, 0X1812, 0X1813, 0X1814, 0X1815, + 0X1816, 0X1817, 0X1818, 0X1819, 0X1946, 0X1947, 0X1948, 0X1949, 0X194A, + 0X194B, 0X194C, 0X194D, 0X194E, 0X194F, 0X19D0, 0X19D1, 0X19D2, 0X19D3, + 0X19D4, 0X19D5, 0X19D6, 0X19D7, 0X19D8, 0X19D9, 0X19DA, 0X1A80, 0X1A81, + 0X1A82, 0X1A83, 0X1A84, 0X1A85, 0X1A86, 0X1A87, 0X1A88, 0X1A89, 0X1A90, + 0X1A91, 0X1A92, 0X1A93, 0X1A94, 0X1A95, 0X1A96, 0X1A97, 0X1A98, 0X1A99, + 0X1B50, 0X1B51, 0X1B52, 0X1B53, 0X1B54, 0X1B55, 0X1B56, 0X1B57, 0X1B58, + 0X1B59, 0X1BB0, 0X1BB1, 0X1BB2, 0X1BB3, 0X1BB4, 0X1BB5, 0X1BB6, 0X1BB7, + 0X1BB8, 0X1BB9, 0X1C40, 0X1C41, 0X1C42, 0X1C43, 0X1C44, 0X1C45, 0X1C46, + 0X1C47, 0X1C48, 0X1C49, 0X1C50, 0X1C51, 0X1C52, 0X1C53, 0X1C54, 0X1C55, + 0X1C56, 0X1C57, 0X1C58, 0X1C59, 0X2070, 0X2074, 0X2075, 0X2076, 0X2077, + 0X2078, 0X2079, 0X2080, 0X2081, 0X2082, 0X2083, 0X2084, 0X2085, 0X2086, + 0X2087, 0X2088, 0X2089, 0X2150, 0X2151, 0X2152, 0X2153, 0X2154, 0X2155, + 0X2156, 0X2157, 0X2158, 0X2159, 0X215A, 0X215B, 0X215C, 0X215D, 0X215E, + 0X215F, 0X2160, 0X2161, 0X2162, 0X2163, 0X2164, 0X2165, 0X2166, 0X2167, + 0X2168, 0X2169, 0X216A, 0X216B, 0X216C, 0X216D, 0X216E, 0X216F, 0X2170, + 0X2171, 0X2172, 0X2173, 0X2174, 0X2175, 0X2176, 0X2177, 0X2178, 0X2179, + 0X217A, 0X217B, 0X217C, 0X217D, 0X217E, 0X217F, 0X2180, 0X2181, 0X2182, + 0X2185, 0X2186, 0X2187, 0X2188, 0X2189, 0X2460, 0X2461, 0X2462, 0X2463, + 0X2464, 0X2465, 0X2466, 0X2467, 0X2468, 0X2469, 0X246A, 0X246B, 0X246C, + 0X246D, 0X246E, 0X246F, 0X2470, 0X2471, 0X2472, 0X2473, 0X2474, 0X2475, + 0X2476, 0X2477, 0X2478, 0X2479, 0X247A, 0X247B, 0X247C, 0X247D, 0X247E, + 0X247F, 0X2480, 0X2481, 0X2482, 0X2483, 0X2484, 0X2485, 0X2486, 0X2487, + 0X2488, 0X2489, 0X248A, 0X248B, 0X248C, 0X248D, 0X248E, 0X248F, 0X2490, + 0X2491, 0X2492, 0X2493, 0X2494, 0X2495, 0X2496, 0X2497, 0X2498, 0X2499, + 0X249A, 0X249B, 0X24EA, 0X24EB, 0X24EC, 0X24ED, 0X24EE, 0X24EF, 0X24F0, + 0X24F1, 0X24F2, 0X24F3, 0X24F4, 0X24F5, 0X24F6, 0X24F7, 0X24F8, 0X24F9, + 0X24FA, 0X24FB, 0X24FC, 0X24FD, 0X24FE, 0X24FF, 0X2776, 0X2777, 0X2778, + 0X2779, 0X277A, 0X277B, 0X277C, 0X277D, 0X277E, 0X277F, 0X2780, 0X2781, + 0X2782, 0X2783, 0X2784, 0X2785, 0X2786, 0X2787, 0X2788, 0X2789, 0X278A, + 0X278B, 0X278C, 0X278D, 0X278E, 0X278F, 0X2790, 0X2791, 0X2792, 0X2793, + 0X2CFD, 0X3007, 0X3021, 0X3022, 0X3023, 0X3024, 0X3025, 0X3026, 0X3027, + 0X3028, 0X3029, 0X3038, 0X3039, 0X303A, 0X3192, 0X3193, 0X3194, 0X3195, + 0X3220, 0X3221, 0X3222, 0X3223, 0X3224, 0X3225, 0X3226, 0X3227, 0X3228, + 0X3229, 0X3248, 0X3249, 0X324A, 0X324B, 0X324C, 0X324D, 0X324E, 0X324F, + 0X3251, 0X3252, 0X3253, 0X3254, 0X3255, 0X3256, 0X3257, 0X3258, 0X3259, + 0X325A, 0X325B, 0X325C, 0X325D, 0X325E, 0X325F, 0X3280, 0X3281, 0X3282, + 0X3283, 0X3284, 0X3285, 0X3286, 0X3287, 0X3288, 0X3289, 0X32B1, 0X32B2, + 0X32B3, 0X32B4, 0X32B5, 0X32B6, 0X32B7, 0X32B8, 0X32B9, 0X32BA, 0X32BB, + 0X32BC, 0X32BD, 0X32BE, 0X32BF, 0X3405, 0X3483, 0X382A, 0X3B4D, 0X4E00, + 0X4E03, 0X4E07, 0X4E09, 0X4E5D, 0X4E8C, 0X4E94, 0X4E96, 0X4EBF, 0X4EC0, + 0X4EDF, 0X4EE8, 0X4F0D, 0X4F70, 0X5104, 0X5146, 0X5169, 0X516B, 0X516D, + 0X5341, 0X5343, 0X5344, 0X5345, 0X534C, 0X53C1, 0X53C2, 0X53C3, 0X53C4, + 0X56DB, 0X58F1, 0X58F9, 0X5E7A, 0X5EFE, 0X5EFF, 0X5F0C, 0X5F0D, 0X5F0E, + 0X5F10, 0X62FE, 0X634C, 0X67D2, 0X6F06, 0X7396, 0X767E, 0X8086, 0X842C, + 0X8CAE, 0X8CB3, 0X8D30, 0X9621, 0X9646, 0X964C, 0X9678, 0X96F6, 0XA620, + 0XA621, 0XA622, 0XA623, 0XA624, 0XA625, 0XA626, 0XA627, 0XA628, 0XA629, + 0XA6E6, 0XA6E7, 0XA6E8, 0XA6E9, 0XA6EA, 0XA6EB, 0XA6EC, 0XA6ED, 0XA6EE, + 0XA6EF, 0XA830, 0XA831, 0XA832, 0XA833, 0XA834, 0XA835, 0XA8D0, 0XA8D1, + 0XA8D2, 0XA8D3, 0XA8D4, 0XA8D5, 0XA8D6, 0XA8D7, 0XA8D8, 0XA8D9, 0XA900, + 0XA901, 0XA902, 0XA903, 0XA904, 0XA905, 0XA906, 0XA907, 0XA908, 0XA909, + 0XA9D0, 0XA9D1, 0XA9D2, 0XA9D3, 0XA9D4, 0XA9D5, 0XA9D6, 0XA9D7, 0XA9D8, + 0XA9D9, 0XAA50, 0XAA51, 0XAA52, 0XAA53, 0XAA54, 0XAA55, 0XAA56, 0XAA57, + 0XAA58, 0XAA59, 0XABF0, 0XABF1, 0XABF2, 0XABF3, 0XABF4, 0XABF5, 0XABF6, + 0XABF7, 0XABF8, 0XABF9, 0XF96B, 0XF973, 0XF978, 0XF9B2, 0XF9D1, 0XF9D3, + 0XF9FD, 0XFF10, 0XFF11, 0XFF12, 0XFF13, 0XFF14, 0XFF15, 0XFF16, 0XFF17, + 0XFF18, 0XFF19, 0X10107, 0X10108, 0X10109, 0X1010A, 0X1010B, 0X1010C, + 0X1010D, 0X1010E, 0X1010F, 0X10110, 0X10111, 0X10112, 0X10113, 0X10114, + 0X10115, 0X10116, 0X10117, 0X10118, 0X10119, 0X1011A, 0X1011B, 0X1011C, + 0X1011D, 0X1011E, 0X1011F, 0X10120, 0X10121, 0X10122, 0X10123, 0X10124, + 0X10125, 0X10126, 0X10127, 0X10128, 0X10129, 0X1012A, 0X1012B, 0X1012C, + 0X1012D, 0X1012E, 0X1012F, 0X10130, 0X10131, 0X10132, 0X10133, 0X10140, + 0X10141, 0X10142, 0X10143, 0X10144, 0X10145, 0X10146, 0X10147, 0X10148, + 0X10149, 0X1014A, 0X1014B, 0X1014C, 0X1014D, 0X1014E, 0X1014F, 0X10150, + 0X10151, 0X10152, 0X10153, 0X10154, 0X10155, 0X10156, 0X10157, 0X10158, + 0X10159, 0X1015A, 0X1015B, 0X1015C, 0X1015D, 0X1015E, 0X1015F, 0X10160, + 0X10161, 0X10162, 0X10163, 0X10164, 0X10165, 0X10166, 0X10167, 0X10168, + 0X10169, 0X1016A, 0X1016B, 0X1016C, 0X1016D, 0X1016E, 0X1016F, 0X10170, + 0X10171, 0X10172, 0X10173, 0X10174, 0X10175, 0X10176, 0X10177, 0X10178, + 0X1018A, 0X10320, 0X10321, 0X10322, 0X10323, 0X10341, 0X1034A, 0X103D1, + 0X103D2, 0X103D3, 0X103D4, 0X103D5, 0X104A0, 0X104A1, 0X104A2, 0X104A3, + 0X104A4, 0X104A5, 0X104A6, 0X104A7, 0X104A8, 0X104A9, 0X10858, 0X10859, + 0X1085A, 0X1085B, 0X1085C, 0X1085D, 0X1085E, 0X1085F, 0X10916, 0X10917, + 0X10918, 0X10919, 0X1091A, 0X1091B, 0X10A40, 0X10A41, 0X10A42, 0X10A43, + 0X10A44, 0X10A45, 0X10A46, 0X10A47, 0X10A7D, 0X10A7E, 0X10B58, 0X10B59, + 0X10B5A, 0X10B5B, 0X10B5C, 0X10B5D, 0X10B5E, 0X10B5F, 0X10B78, 0X10B79, + 0X10B7A, 0X10B7B, 0X10B7C, 0X10B7D, 0X10B7E, 0X10B7F, 0X10E60, 0X10E61, + 0X10E62, 0X10E63, 0X10E64, 0X10E65, 0X10E66, 0X10E67, 0X10E68, 0X10E69, + 0X10E6A, 0X10E6B, 0X10E6C, 0X10E6D, 0X10E6E, 0X10E6F, 0X10E70, 0X10E71, + 0X10E72, 0X10E73, 0X10E74, 0X10E75, 0X10E76, 0X10E77, 0X10E78, 0X10E79, + 0X10E7A, 0X10E7B, 0X10E7C, 0X10E7D, 0X10E7E, 0X11052, 0X11053, 0X11054, + 0X11055, 0X11056, 0X11057, 0X11058, 0X11059, 0X1105A, 0X1105B, 0X1105C, + 0X1105D, 0X1105E, 0X1105F, 0X11060, 0X11061, 0X11062, 0X11063, 0X11064, + 0X11065, 0X11066, 0X11067, 0X11068, 0X11069, 0X1106A, 0X1106B, 0X1106C, + 0X1106D, 0X1106E, 0X1106F, 0X110F0, 0X110F1, 0X110F2, 0X110F3, 0X110F4, + 0X110F5, 0X110F6, 0X110F7, 0X110F8, 0X110F9, 0X11136, 0X11137, 0X11138, + 0X11139, 0X1113A, 0X1113B, 0X1113C, 0X1113D, 0X1113E, 0X1113F, 0X111D0, + 0X111D1, 0X111D2, 0X111D3, 0X111D4, 0X111D5, 0X111D6, 0X111D7, 0X111D8, + 0X111D9, 0X116C0, 0X116C1, 0X116C2, 0X116C3, 0X116C4, 0X116C5, 0X116C6, + 0X116C7, 0X116C8, 0X116C9, 0X12400, 0X12401, 0X12402, 0X12403, 0X12404, + 0X12405, 0X12406, 0X12407, 0X12408, 0X12409, 0X1240A, 0X1240B, 0X1240C, + 0X1240D, 0X1240E, 0X1240F, 0X12410, 0X12411, 0X12412, 0X12413, 0X12414, + 0X12415, 0X12416, 0X12417, 0X12418, 0X12419, 0X1241A, 0X1241B, 0X1241C, + 0X1241D, 0X1241E, 0X1241F, 0X12420, 0X12421, 0X12422, 0X12423, 0X12424, + 0X12425, 0X12426, 0X12427, 0X12428, 0X12429, 0X1242A, 0X1242B, 0X1242C, + 0X1242D, 0X1242E, 0X1242F, 0X12430, 0X12431, 0X12432, 0X12433, 0X12434, + 0X12435, 0X12436, 0X12437, 0X12438, 0X12439, 0X1243A, 0X1243B, 0X1243C, + 0X1243D, 0X1243E, 0X1243F, 0X12440, 0X12441, 0X12442, 0X12443, 0X12444, + 0X12445, 0X12446, 0X12447, 0X12448, 0X12449, 0X1244A, 0X1244B, 0X1244C, + 0X1244D, 0X1244E, 0X1244F, 0X12450, 0X12451, 0X12452, 0X12453, 0X12454, + 0X12455, 0X12456, 0X12457, 0X12458, 0X12459, 0X1245A, 0X1245B, 0X1245C, + 0X1245D, 0X1245E, 0X1245F, 0X12460, 0X12461, 0X12462, 0X1D360, 0X1D361, + 0X1D362, 0X1D363, 0X1D364, 0X1D365, 0X1D366, 0X1D367, 0X1D368, 0X1D369, + 0X1D36A, 0X1D36B, 0X1D36C, 0X1D36D, 0X1D36E, 0X1D36F, 0X1D370, 0X1D371, + 0X1D7CE, 0X1D7CF, 0X1D7D0, 0X1D7D1, 0X1D7D2, 0X1D7D3, 0X1D7D4, 0X1D7D5, + 0X1D7D6, 0X1D7D7, 0X1D7D8, 0X1D7D9, 0X1D7DA, 0X1D7DB, 0X1D7DC, 0X1D7DD, + 0X1D7DE, 0X1D7DF, 0X1D7E0, 0X1D7E1, 0X1D7E2, 0X1D7E3, 0X1D7E4, 0X1D7E5, + 0X1D7E6, 0X1D7E7, 0X1D7E8, 0X1D7E9, 0X1D7EA, 0X1D7EB, 0X1D7EC, 0X1D7ED, + 0X1D7EE, 0X1D7EF, 0X1D7F0, 0X1D7F1, 0X1D7F2, 0X1D7F3, 0X1D7F4, 0X1D7F5, + 0X1D7F6, 0X1D7F7, 0X1D7F8, 0X1D7F9, 0X1D7FA, 0X1D7FB, 0X1D7FC, 0X1D7FD, + 0X1D7FE, 0X1D7FF, 0X1F100, 0X1F101, 0X1F102, 0X1F103, 0X1F104, 0X1F105, + 0X1F106, 0X1F107, 0X1F108, 0X1F109, 0X1F10A, 0X20001, 0X20064, 0X200E2, + 0X20121, 0X2092A, 0X20983, 0X2098C, 0X2099C, 0X20AEA, 0X20AFD, 0X20B19, + 0X22390, 0X22998, 0X23B1B, 0X2626D, 0X2F890, +] + +# Convert each hex into the literal Unicode character. +# Stop if a ValueError is raised in case of a narrow Unicode build. +# The extra check with unicodedata is in case this Python version +# does not support some characters. +numeric_chars = [] +for a in numeric_hex: + try: + l = py23_unichr(a) + except ValueError: + break + if unicodedata.numeric(l, None) is None: + continue + numeric_chars.append(l) + +# The digit characters are a subset of the numerals. +digit_chars = [a for a in numeric_chars + if unicodedata.digit(a, None) is not None] + +# Create a single string with the above data. +digits = ''.join(digit_chars) +numeric = ''.join(numeric_chars) diff --git a/natsort/utils.py b/natsort/utils.py index 3e756b7..a272f29 100644 --- a/natsort/utils.py +++ b/natsort/utils.py @@ -17,66 +17,93 @@ from itertools import islice from locale import localeconv # Local imports. -from natsort.locale_help import locale_convert, grouper, null_string -from natsort.py23compat import py23_str, py23_zip +from natsort.locale_help import (locale_convert, grouper, + null_string, use_pyicu, dumb_sort) +from natsort.py23compat import py23_str, py23_zip, PY_VERSION from natsort.ns_enum import ns, _ns +from natsort.unicode_numbers import digits, numeric # If the user has fastnumbers installed, they will get great speed # benefits. If not, we simulate the functions here. try: - from fastnumbers import fast_float, fast_int, isreal + from fastnumbers import fast_float, fast_int, isint, isfloat + import fastnumbers + v = list(map(int, fastnumbers.__version__.split('.'))) + if not (v[0] >= 0 and v[1] >= 5): # Require >= version 0.5.0. + raise ImportError except ImportError: - from natsort.fake_fastnumbers import fast_float, fast_int, isreal + from natsort.fake_fastnumbers import fast_float, fast_int, isint, isfloat # If the user has pathlib installed, the ns.PATH option will convert # Path objects to str before sorting. try: from pathlib import PurePath # PurePath is the base object for Paths. -except ImportError: +except ImportError: # pragma: no cover PurePath = object # To avoid NameErrors. has_pathlib = False else: has_pathlib = True # Group algorithm types for easy extraction -_NUMBER_ALGORITHMS = ns.FLOAT | ns.INT | ns.UNSIGNED | ns.NOEXP -_ALL_BUT_PATH = (ns.F | ns.I | ns.U | ns.N | ns.L | +_NUMBER_ALGORITHMS = ns.FLOAT | ns.INT | ns.UNSIGNED | ns.SIGNED | ns.NOEXP +_ALL_BUT_PATH = (ns.F | ns.I | ns.U | ns.S | ns.N | ns.L | ns.IC | ns.LF | ns.G | ns.UG | ns.TYPESAFE) -# The regex that locates floats -_float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U) -_float_nosign_exp_re = re.compile(r'(\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U) -_float_sign_noexp_re = re.compile(r'([-+]?\d*\.?\d+)', re.U) -_float_nosign_noexp_re = re.compile(r'(\d*\.?\d+)', re.U) -_float_sign_exp_re_c = re.compile(r'([-+]?\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U) -_float_nosign_exp_re_c = re.compile(r'(\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U) -_float_sign_noexp_re_c = re.compile(r'([-+]?\d*[.,]?\d+)', re.U) -_float_nosign_noexp_re_c = re.compile(r'(\d*[.,]?\d+)', re.U) - -# Integer regexes -_int_nosign_re = re.compile(r'(\d+)', re.U) -_int_sign_re = re.compile(r'([-+]?\d+)', re.U) +# The regex that locates floats - include Unicode numerals. +_float_sign_exp_re = r'([-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?|[{}])' +_float_sign_exp_re = _float_sign_exp_re.format(numeric) +_float_sign_exp_re = re.compile(_float_sign_exp_re, flags=re.U) +_float_nosign_exp_re = r'([0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?|[{}])' +_float_nosign_exp_re = _float_nosign_exp_re.format(numeric) +_float_nosign_exp_re = re.compile(_float_nosign_exp_re, flags=re.U) +_float_sign_noexp_re = r'([-+]?[0-9]*\.?[0-9]+|[{}])' +_float_sign_noexp_re = _float_sign_noexp_re.format(numeric) +_float_sign_noexp_re = re.compile(_float_sign_noexp_re, flags=re.U) +_float_nosign_noexp_re = r'([0-9]*\.?[0-9]+|[{}])' +_float_nosign_noexp_re = _float_nosign_noexp_re.format(numeric) +_float_nosign_noexp_re = re.compile(_float_nosign_noexp_re, flags=re.U) +_float_sign_exp_re_c = r'([-+]?[0-9]*[.,]?[0-9]+(?:[eE][-+]?[0-9]+)?)|[{}]' +_float_sign_exp_re_c = _float_sign_exp_re_c.format(numeric) +_float_sign_exp_re_c = re.compile(_float_sign_exp_re_c, flags=re.U) +_float_nosign_exp_re_c = r'([0-9]*[.,]?[0-9]+(?:[eE][-+]?[0-9]+)?|[{}])' +_float_nosign_exp_re_c = _float_nosign_exp_re_c.format(numeric) +_float_nosign_exp_re_c = re.compile(_float_nosign_exp_re_c, flags=re.U) +_float_sign_noexp_re_c = r'([-+]?[0-9]*[.,]?[0-9]+|[{}])' +_float_sign_noexp_re_c = _float_sign_noexp_re_c.format(numeric) +_float_sign_noexp_re_c = re.compile(_float_sign_noexp_re_c, flags=re.U) +_float_nosign_noexp_re_c = r'([0-9]*[.,]?[0-9]+|[{}])' +_float_nosign_noexp_re_c = _float_nosign_noexp_re_c.format(numeric) +_float_nosign_noexp_re_c = re.compile(_float_nosign_noexp_re_c, flags=re.U) + +# Integer regexes - include Unicode digits. +_int_nosign_re = r'([0-9]+|[{}])'.format(digits) +_int_nosign_re = re.compile(_int_nosign_re, flags=re.U) +_int_sign_re = r'([-+]?[0-9]+|[{}])'.format(digits) +_int_sign_re = re.compile(_int_sign_re, flags=re.U) # This dict will help select the correct regex and number conversion function. _regex_and_num_function_chooser = { - (ns.F, '.'): (_float_sign_exp_re, fast_float), - (ns.F | ns.N, '.'): (_float_sign_noexp_re, fast_float), + (ns.F | ns.S, '.'): (_float_sign_exp_re, fast_float), + (ns.F | ns.S | ns.N, '.'): (_float_sign_noexp_re, fast_float), (ns.F | ns.U, '.'): (_float_nosign_exp_re, fast_float), (ns.F | ns.U | ns.N, '.'): (_float_nosign_noexp_re, fast_float), - (ns.I, '.'): (_int_sign_re, fast_int), - (ns.I | ns.N, '.'): (_int_sign_re, fast_int), + (ns.I | ns.S, '.'): (_int_sign_re, fast_int), + (ns.I | ns.S | ns.N, '.'): (_int_sign_re, fast_int), (ns.I | ns.U, '.'): (_int_nosign_re, fast_int), (ns.I | ns.U | ns.N, '.'): (_int_nosign_re, fast_int), - (ns.F, ','): (_float_sign_exp_re_c, fast_float), - (ns.F | ns.N, ','): (_float_sign_noexp_re_c, fast_float), + (ns.F | ns.S, ','): (_float_sign_exp_re_c, fast_float), + (ns.F | ns.S | ns.N, ','): (_float_sign_noexp_re_c, fast_float), (ns.F | ns.U, ','): (_float_nosign_exp_re_c, fast_float), (ns.F | ns.U | ns.N, ','): (_float_nosign_noexp_re_c, fast_float), - (ns.I, ','): (_int_sign_re, fast_int), - (ns.I | ns.N, ','): (_int_sign_re, fast_int), + (ns.I | ns.S, ','): (_int_sign_re, fast_int), + (ns.I | ns.S | ns.N, ','): (_int_sign_re, fast_int), (ns.I | ns.U, ','): (_int_nosign_re, fast_int), (ns.I | ns.U | ns.N, ','): (_int_nosign_re, fast_int), } +# Dict to select checker function from converter function +_conv_to_check = {fast_float: isfloat, fast_int: isint} + def _do_decoding(s, encoding): """A function to decode a bytes string, or return the object as-is.""" @@ -88,40 +115,46 @@ def _do_decoding(s, encoding): return s -def _args_to_enum(number_type, signed, exp, as_path, py3_safe): +def _args_to_enum(**kwargs): """A function to convert input booleans to an enum-type argument.""" alg = 0 - if number_type is not float: + keys = ('number_type', 'signed', 'exp', 'as_path', 'py3_safe') + if any(x not in keys for x in kwargs): + x = set(kwargs) - set(keys) + raise TypeError('Invalid argument(s): ' + ', '.join(x)) + if 'number_type' in kwargs and kwargs['number_type'] is not int: msg = "The 'number_type' argument is deprecated as of 3.5.0, " msg += "please use 'alg=ns.FLOAT', 'alg=ns.INT', or 'alg=ns.VERSION'" warn(msg, DeprecationWarning) - alg |= (_ns['INT'] * bool(number_type in (int, None))) - alg |= (_ns['UNSIGNED'] * (number_type is None)) - if signed is not None: + alg |= (_ns['FLOAT'] * bool(kwargs['number_type'] is float)) + alg |= (_ns['INT'] * bool(kwargs['number_type'] in (int, None))) + alg |= (_ns['SIGNED'] * (kwargs['number_type'] not in (float, None))) + if 'signed' in kwargs and kwargs['signed'] is not None: msg = "The 'signed' argument is deprecated as of 3.5.0, " - msg += "please use 'alg=ns.UNSIGNED'." + msg += "please use 'alg=ns.SIGNED'." warn(msg, DeprecationWarning) - alg |= (_ns['UNSIGNED'] * (not signed)) - if exp is not None: + alg |= (_ns['SIGNED'] * bool(kwargs['signed'])) + if 'exp' in kwargs and kwargs['exp'] is not None: msg = "The 'exp' argument is deprecated as of 3.5.0, " msg += "please use 'alg=ns.NOEXP'." warn(msg, DeprecationWarning) - alg |= (_ns['NOEXP'] * (not exp)) - if as_path is not None: + alg |= (_ns['NOEXP'] * (not kwargs['exp'])) + if 'as_path' in kwargs and kwargs['as_path'] is not None: msg = "The 'as_path' argument is deprecated as of 3.5.0, " msg += "please use 'alg=ns.PATH'." warn(msg, DeprecationWarning) - alg |= (_ns['PATH'] * as_path) - if py3_safe is not None: + alg |= (_ns['PATH'] * kwargs['as_path']) + if 'py3_safe' in kwargs and kwargs['py3_safe'] is not None: msg = "The 'py3_safe' argument is deprecated as of 3.5.0, " msg += "please use 'alg=ns.TYPESAFE'." warn(msg, DeprecationWarning) - alg |= (_ns['TYPESAFE'] * py3_safe) + alg |= (_ns['TYPESAFE'] * kwargs['py3_safe']) return alg def _number_extracter(s, regex, numconv, py3_safe, use_locale, group_letters): """Helper to separate the string input into numbers and strings.""" + conv_check = (numconv, _conv_to_check[numconv]) # Split the input string by numbers. # If the input is not a string, TypeError is raised. @@ -131,24 +164,24 @@ def _number_extracter(s, regex, numconv, py3_safe, use_locale, group_letters): # Take into account locale if needed, and group letters if needed. # Remove empty strings from the list. if use_locale: - s = [locale_convert(x, numconv, group_letters) for x in s if x] + s = [locale_convert(x, conv_check, group_letters) for x in s if x] elif group_letters: - s = [grouper(x, numconv) for x in s if x] + s = [grouper(x, conv_check) for x in s if x] else: s = [numconv(x) for x in s if x] # If the list begins with a number, lead with an empty string. # This is used to get around the "unorderable types" issue. - if not s: # Return empty tuple for empty results. - return () - elif isreal(s[0]): + if not s: # Return empty list for empty results. + return [] + elif conv_check[1](s[0], num_only=True): s = [null_string if use_locale else ''] + s # The _py3_safe function inserts "" between numbers in the list, # and is used to get around "unorderable types" in complex cases. # It is a separate function that needs to be requested specifically # because it is expensive to call. - return _py3_safe(s, use_locale) if py3_safe else s + return _py3_safe(s, use_locale, conv_check[1]) if py3_safe else s def _path_splitter(s, _d_match=re.compile(r'\.\d').match): @@ -158,7 +191,7 @@ def _path_splitter(s, _d_match=re.compile(r'\.\d').match): # Convert a pathlib PurePath object to a string. if has_pathlib and isinstance(s, PurePath): path_location = str(s) - else: + else: # pragma: no cover path_location = s # Continue splitting the path from the back until we have reached @@ -199,7 +232,7 @@ def _path_splitter(s, _d_match=re.compile(r'\.\d').match): return path_parts + base_parts -def _py3_safe(parsed_list, use_locale): +def _py3_safe(parsed_list, use_locale, check): """Insert '' between two numbers.""" length = len(parsed_list) if length < 2: @@ -209,7 +242,7 @@ def _py3_safe(parsed_list, use_locale): nl_append = new_list.append for before, after in py23_zip(islice(parsed_list, 0, length-1), islice(parsed_list, 1, None)): - if isreal(before) and isreal(after): + if check(before, num_only=True) and check(after, num_only=True): nl_append(null_string if use_locale else '') nl_append(after) return new_list @@ -275,24 +308,45 @@ def _natsort_key(val, key, alg): # Assume the input are strings, which is the most common case. # Apply the string modification if needed. + orig_val = val try: - if alg & _ns['LOWERCASEFIRST']: + lowfirst = alg & _ns['LOWERCASEFIRST'] + dumb = dumb_sort() if use_locale else False + if use_locale and dumb and not lowfirst: + val = val.swapcase() # Compensate for bad locale lib. + elif lowfirst and not (use_locale and dumb): val = val.swapcase() if alg & _ns['IGNORECASE']: - val = val.lower() - if use_locale and alg & _ns['UNGROUPLETTERS'] and val[0].isupper(): - val = ' ' + val - return tuple(_number_extracter(val, - regex, - num_function, - alg & _ns['TYPESAFE'], - use_locale, - alg & _ns['GROUPLETTERS'])) + val = val.casefold() if PY_VERSION >= 3.3 else val.lower() + gl = alg & _ns['GROUPLETTERS'] + ret = tuple(_number_extracter(val, + regex, + num_function, + alg & _ns['TYPESAFE'], + use_locale, + gl or (use_locale and dumb))) + # For UNGROUPLETTERS, so the high level grouping can occur + # based on the first letter of the string. + # Do no locale transformation of the characters. + if use_locale and alg & _ns['UNGROUPLETTERS']: + if not ret: + return (ret, ret) + elif ret[0] == null_string: + return ((b'' if use_pyicu else '',), ret) + elif dumb: + if lowfirst: + return ((orig_val[0].swapcase(),), ret) + else: + return ((orig_val[0],), ret) + else: + return ((val[0],), ret) + else: + return ret except (TypeError, AttributeError): # Check if it is a bytes type, and if so return as a # one element tuple. if type(val) in (bytes,): - return (val,) + return (val.lower(),) if alg & _ns['IGNORECASE'] else (val,) # If not strings, assume it is an iterable that must # be parsed recursively. Do not apply the key recursively. # If this string was split as a path, turn off 'PATH'. @@ -23,7 +23,11 @@ class PyTest(TestCommand): import pytest err1 = pytest.main(['--cov', 'natsort', '--cov-report', 'term-missing', - '--flakes', '--pep8']) + '--flakes', + '--pep8', + # '--failed', + # '-v', + ]) err2 = pytest.main(['--doctest-modules', 'natsort']) err3 = pytest.main(['README.rst', 'docs/source/intro.rst', @@ -56,9 +60,12 @@ except IOError: REQUIRES = 'argparse' if sys.version[:3] in ('2.6', '3.0', '3.1') else '' # Testing needs pytest, and mock if less than python 3.3 -TESTS_REQUIRE = ['pytest', 'pytest-pep8', 'pytest-flakes', 'pytest-cov'] +TESTS_REQUIRE = ['pytest', 'pytest-pep8', 'pytest-flakes', + 'pytest-cov', 'hypothesis'] if sys.version[0] == 2 or (sys.version[3] == '3' and int(sys.version[2]) < 3): TESTS_REQUIRE.append('mock') +if sys.version[0] == 2 or (sys.version[3] == '3' and int(sys.version[2]) < 4): + TESTS_REQUIRE.append('pathlib') # The setup parameters setup( diff --git a/test_natsort/slow_splitters.py b/test_natsort/slow_splitters.py new file mode 100644 index 0000000..6352dd7 --- /dev/null +++ b/test_natsort/slow_splitters.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- +"""Alternate versions of the splitting functions for testing.""" +from __future__ import unicode_literals + +import unicodedata +from natsort.py23compat import PY_VERSION + +if PY_VERSION >= 3.0: + long = int + + +def int_splitter(x, signed, safe, sep): + """Alternate (slow) method to split a string into numbers.""" + if not x: + return [] + all_digits = set('0123456789') + full_list, strings, nums = [], [], [] + input_len = len(x) + for i, char in enumerate(x): + # If this character is a sign and the next is a number, + # start a new number. + if (i+1 < input_len and signed and + (char in '-+') and (x[i+1] in all_digits)): + # Reset any current string or number. + if strings: + full_list.append(''.join(strings)) + if nums: + full_list.append(int(''.join(nums))) + strings = [] + nums = [char] + # If this is a number, add to the number list. + elif char in all_digits: + nums.append(char) + # Reset any string. + if strings: + full_list.append(''.join(strings)) + strings = [] + # If this is a unicode digit, append directly to the full list. + elif char.isdigit(): + # Reset any string or number. + if strings: + full_list.append(''.join(strings)) + if nums: + full_list.append(int(''.join(nums))) + strings = [] + nums = [] + full_list.append(unicodedata.digit(char)) + # Otherwise add to the string. + else: + strings.append(char) + # Reset any number. + if nums: + full_list.append(int(''.join(nums))) + nums = [] + if nums: + full_list.append(int(''.join(nums))) + elif strings: + full_list.append(''.join(strings)) + if safe: + full_list = sep_inserter(full_list, (int, long), sep) + if type(full_list[0]) in (int, long): + return [sep] + full_list + else: + return full_list + + +def float_splitter(x, signed, exp, safe, sep): + """Alternate (slow) method to split a string into numbers.""" + if not x: + return [] + all_digits = set('0123456789') + full_list, strings, nums = [], [], [] + input_len = len(x) + for i, char in enumerate(x): + # If this character is a sign and the next is a number, + # start a new number. + if (i+1 < input_len and + (signed or (i > 1 and exp and x[i-1] in 'eE' and + x[i-2] in all_digits)) and + (char in '-+') and (x[i+1] in all_digits)): + # Reset any current string or number. + if strings: + full_list.append(''.join(strings)) + if nums and i > 0 and x[i-1] not in 'eE': + full_list.append(float(''.join(nums))) + nums = [char] + else: + nums.append(char) + strings = [] + # If this is a number, add to the number list. + elif char in all_digits: + nums.append(char) + # Reset any string. + if strings: + full_list.append(''.join(strings)) + strings = [] + # If this is a decimal, add to the number list. + elif (i + 1 < input_len and char == '.' and x[i+1] in all_digits): + if nums and '.' in nums: + full_list.append(float(''.join(nums))) + nums = [] + nums.append(char) + if strings: + full_list.append(''.join(strings)) + strings = [] + # If this is an exponent, add to the number list. + elif (i > 0 and i + 1 < input_len and exp and char in 'eE' and + x[i-1] in all_digits and x[i+1] in all_digits | set('+-')): + if 'e' in nums or 'E' in nums: + strings = [char] + full_list.append(float(''.join(nums))) + nums = [] + else: + nums.append(char) + # If this is a unicode digit, append directly to the full list. + elif unicodedata.numeric(char, None) is not None: + # Reset any string or number. + if strings: + full_list.append(''.join(strings)) + if nums: + full_list.append(float(''.join(nums))) + strings = [] + nums = [] + full_list.append(unicodedata.numeric(char)) + # Otherwise add to the string. + else: + strings.append(char) + # Reset any number. + if nums: + full_list.append(float(''.join(nums))) + nums = [] + if nums: + full_list.append(float(''.join(nums))) + elif strings: + full_list.append(''.join(strings)) + # Fix a float that looks like a string. + fstrings = ('inf', 'infinity', '-inf', '-infinity', + '+inf', '+infinity', 'nan') + full_list = [float(y) if type(y) != float and y.lower() in fstrings else y + for y in full_list] + if safe: + full_list = sep_inserter(full_list, (float,), sep) + if type(full_list[0]) == float: + return [sep] + full_list + else: + return full_list + + +def sep_inserter(x, t, sep): + # Simulates the py3_safe function. + ret = [x[0]] + for i, y in enumerate(x[1:]): + if type(y) in t and type(x[i]) in t: + ret.append(sep) + ret.append(y) + return ret diff --git a/test_natsort/test_fake_fastnumbers.py b/test_natsort/test_fake_fastnumbers.py index 5aedadb..ff7e42c 100644 --- a/test_natsort/test_fake_fastnumbers.py +++ b/test_natsort/test_fake_fastnumbers.py @@ -2,34 +2,136 @@ """\ Test the fake fastnumbers module. """ -from natsort.fake_fastnumbers import fast_float, fast_int, isreal +from __future__ import unicode_literals +import unicodedata +from math import isnan +from hypothesis import given, assume +from natsort.fake_fastnumbers import fast_float, fast_int, isfloat, isint +from natsort.py23compat import py23_str -def test_fast_float_converts_float_string_to_float(): + +def is_float(x): + try: + float(x) + except ValueError: + try: + unicodedata.numeric(x) + except (ValueError, TypeError): + return False + else: + return True + else: + return True + + +def is_int(x): + try: + int(x) + except ValueError: + try: + unicodedata.digit(x) + except (ValueError, TypeError): + return False + else: + return True + else: + return True + + +# Each test has an "example" version for demonstrative purposes, +# and a test that uses the hypothesis module. + +def test_fast_float_converts_float_string_to_float_example(): assert fast_float('45.8') == 45.8 assert fast_float('-45') == -45.0 assert fast_float('45.8e-2') == 45.8e-2 + assert isnan(fast_float('nan')) + + +@given(float) +def test_fast_float_converts_float_string_to_float(x): + assume(not isnan(x)) # But inf is included + assert fast_float(repr(x)) == x -def test_fast_float_leaves_string_as_is(): +def test_fast_float_leaves_string_as_is_example(): assert fast_float('invalid') == 'invalid' -def test_fast_int_leaves_float_string_as_is(): +@given(py23_str) +def test_fast_float_leaves_string_as_is(x): + assume(not is_float(x)) + assert fast_float(x) == x + + +def test_fast_int_leaves_float_string_as_is_example(): assert fast_int('45.8') == '45.8' + assert fast_int('nan') == 'nan' + assert fast_int('inf') == 'inf' + + +@given(float) +def test_fast_int_leaves_float_string_as_is(x): + assume(not x.is_integer()) + assert fast_int(repr(x)) == repr(x) -def test_fast_int_converts_int_string_to_int(): +def test_fast_int_converts_int_string_to_int_example(): assert fast_int('-45') == -45 assert fast_int('+45') == 45 -def test_fast_int_leaves_string_as_is(): +@given(int) +def test_fast_int_converts_int_string_to_int(x): + assert fast_int(repr(x)) == x + + +def test_fast_int_leaves_string_as_is_example(): assert fast_int('invalid') == 'invalid' -def test_isreal_returns_True_for_real_numbers_False_for_strings(): - assert isreal(-45) - assert isreal(45.8e-2) - assert not isreal('45.8') - assert not isreal('invalid') +@given(py23_str) +def test_fast_int_leaves_string_as_is(x): + assume(not is_int(x)) + assert fast_int(x) == x + + +def test_isfloat_returns_True_for_real_numbers_example(): + assert isfloat(-45.0) + assert isfloat(45.8e-2) + + +@given(float) +def test_isfloat_returns_True_for_real_numbers(x): + assert isfloat(x) + + +def test_isfloat_returns_False_for_strings_example(): + assert not isfloat('45.8') + assert not isfloat('invalid') + + +@given(py23_str) +def test_isfloat_returns_False_for_strings(x): + assert not isfloat(x) + + +def test_isint_returns_True_for_real_numbers_example(): + assert isint(-45) + assert isint(45) + + +@given(int) +def test_isint_returns_True_for_real_numbers(x): + assert isint(x) + + +def test_isint_returns_False_for_strings_example(): + assert not isint('45') + assert not isint('invalid') + + +@given(py23_str) +def test_isint_returns_False_for_strings(x): + assert not isint(x) diff --git a/test_natsort/test_locale_help.py b/test_natsort/test_locale_help.py index 5d69408..95c3000 100644 --- a/test_natsort/test_locale_help.py +++ b/test_natsort/test_locale_help.py @@ -2,9 +2,15 @@ """\ Test the locale help module module. """ +from __future__ import unicode_literals + import locale -from natsort.fake_fastnumbers import fast_float +from math import isnan +from itertools import chain +from natsort.fake_fastnumbers import fast_float, isfloat, isint from natsort.locale_help import grouper, locale_convert, use_pyicu +from natsort.py23compat import py23_str +from hypothesis import given, assume, example if use_pyicu: from natsort.locale_help import get_pyicu_transform @@ -14,49 +20,125 @@ else: from natsort.locale_help import strxfrm -def test_grouper_returns_letters_with_lowercase_transform_of_letter(): - assert grouper('HELLO', fast_float) == 'hHeElLlLoO' - assert grouper('hello', fast_float) == 'hheelllloo' +def load_locale(x): + try: + locale.setlocale(locale.LC_ALL, str('{}.ISO8859-1'.format(x))) + except: + locale.setlocale(locale.LC_ALL, str('{}.UTF-8'.format(x))) + + +# Each test has an "example" version for demonstrative purposes, +# and a test that uses the hypothesis module. + + +def test_grouper_returns_letters_with_lowercase_transform_of_letter_example(): + assert grouper('HELLO', (fast_float, isfloat)) == 'hHeElLlLoO' + assert grouper('hello', (fast_float, isfloat)) == 'hheelllloo' + + +@given(py23_str) +def test_grouper_returns_letters_with_lowercase_transform_of_letter(x): + assume(type(fast_float(x)) is not float) + try: + low = py23_str.casefold + except AttributeError: + low = py23_str.lower + assert grouper(x, (fast_float, isfloat)) == ''.join(chain.from_iterable([low(y), y] for y in x)) + + +def test_grouper_returns_float_string_as_float_example(): + assert grouper('45.8e-2', (fast_float, isfloat)) == 45.8e-2 -def test_grouper_returns_float_string_as_float(): - assert grouper('45.8e-2', fast_float) == 45.8e-2 +@given(float) +def test_grouper_returns_float_string_as_float(x): + assume(not isnan(x)) + assert grouper(repr(x), (fast_float, isfloat)) == x -def test_locale_convert_transforms_float_string_to_float(): - locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8') - assert locale_convert('45.8', fast_float, False) == 45.8 +def test_locale_convert_transforms_float_string_to_float_example(): + load_locale('en_US') + assert locale_convert('45.8', (fast_float, isfloat), False) == 45.8 locale.setlocale(locale.LC_NUMERIC, str('')) -def test_locale_convert_transforms_nonfloat_string_to_strxfrm_string(): - locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8') +@given(float) +def test_locale_convert_transforms_float_string_to_float(x): + assume(not isnan(x)) + load_locale('en_US') + assert locale_convert(repr(x), (fast_float, isfloat), False) == x + locale.setlocale(locale.LC_NUMERIC, str('')) + + +def test_locale_convert_transforms_nonfloat_string_to_strxfrm_string_example(): + load_locale('en_US') if use_pyicu: from natsort.locale_help import get_pyicu_transform from locale import getlocale strxfrm = get_pyicu_transform(getlocale()) else: from natsort.locale_help import strxfrm - assert locale_convert('45,8', fast_float, False) == strxfrm('45,8') - assert locale_convert('hello', fast_float, False) == strxfrm('hello') + assert locale_convert('45,8', (fast_float, isfloat), False) == strxfrm('45,8') + assert locale_convert('hello', (fast_float, isfloat), False) == strxfrm('hello') locale.setlocale(locale.LC_NUMERIC, str('')) -def test_locale_convert_with_groupletters_transforms_nonfloat_string_to_strxfrm_string_with_grouped_letters(): - locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8') +@given(py23_str) +def test_locale_convert_transforms_nonfloat_string_to_strxfrm_string(x): + assume(type(fast_float(x)) is not float) + load_locale('en_US') if use_pyicu: from natsort.locale_help import get_pyicu_transform from locale import getlocale strxfrm = get_pyicu_transform(getlocale()) else: from natsort.locale_help import strxfrm - assert locale_convert('hello', fast_float, True) == strxfrm('hheelllloo') - assert locale_convert('45,8', fast_float, True) == strxfrm('4455,,88') + assert locale_convert(x, (fast_float, isfloat), False) == strxfrm(x) + locale.setlocale(locale.LC_NUMERIC, str('')) + + +def test_locale_convert_with_groupletters_transforms_nonfloat_string_to_strxfrm_string_with_grouped_letters_example(): + load_locale('en_US') + if use_pyicu: + from natsort.locale_help import get_pyicu_transform + from locale import getlocale + strxfrm = get_pyicu_transform(getlocale()) + else: + from natsort.locale_help import strxfrm + assert locale_convert('hello', (fast_float, isfloat), True) == strxfrm('hheelllloo') + assert locale_convert('45,8', (fast_float, isfloat), True) == strxfrm('4455,,88') + locale.setlocale(locale.LC_NUMERIC, str('')) + + +@given(py23_str) +def test_locale_convert_with_groupletters_transforms_nonfloat_string_to_strxfrm_string_with_grouped_letters(x): + assume(type(fast_float(x)) is not float) + load_locale('en_US') + if use_pyicu: + from natsort.locale_help import get_pyicu_transform + from locale import getlocale + strxfrm = get_pyicu_transform(getlocale()) + else: + from natsort.locale_help import strxfrm + try: + low = py23_str.casefold + except AttributeError: + low = py23_str.lower + assert locale_convert(x, (fast_float, isfloat), True) == strxfrm(''.join(chain.from_iterable([low(y), y] for y in x))) + locale.setlocale(locale.LC_NUMERIC, str('')) + + +def test_locale_convert_transforms_float_string_to_float_with_de_locale_example(): + load_locale('de_DE') + assert locale_convert('45.8', (fast_float, isfloat), False) == 45.8 + assert locale_convert('45,8', (fast_float, isfloat), False) == 45.8 locale.setlocale(locale.LC_NUMERIC, str('')) -def test_locale_convert_transforms_float_string_to_float_with_de_locale(): - locale.setlocale(locale.LC_NUMERIC, 'de_DE.UTF-8') - assert locale_convert('45.8', fast_float, False) == 45.8 - assert locale_convert('45,8', fast_float, False) == 45.8 +@given(float) +def test_locale_convert_transforms_float_string_to_float_with_de_locale(x): + assume(not isnan(x)) + load_locale('de_DE') + assert locale_convert(repr(x), (fast_float, isfloat), False) == x + assert locale_convert(repr(x).replace('.', ','), (fast_float, isfloat), False) == x locale.setlocale(locale.LC_NUMERIC, str('')) diff --git a/test_natsort/test_main.py b/test_natsort/test_main.py index 0416c89..0735f6d 100644 --- a/test_natsort/test_main.py +++ b/test_natsort/test_main.py @@ -2,10 +2,12 @@ """\ Test the natsort command-line tool functions. """ -from __future__ import print_function +from __future__ import print_function, unicode_literals import re import sys from pytest import raises +from hypothesis import given, assume +from hypothesis.specifiers import integers_in_range, integers_from, sampled_from try: from unittest.mock import patch, call except ImportError: @@ -13,6 +15,7 @@ except ImportError: from natsort.__main__ import main, range_check, check_filter from natsort.__main__ import keep_entry_range, exclude_entry from natsort.__main__ import sort_and_print_entries +from natsort.py23compat import py23_str def test_main_passes_default_arguments_with_no_command_line_options(): @@ -25,8 +28,8 @@ def test_main_passes_default_arguments_with_no_command_line_options(): assert args.reverse_filter is None assert args.exclude is None assert not args.reverse - assert args.number_type == 'float' - assert args.signed + assert args.number_type == 'int' + assert not args.signed assert args.exp assert not args.locale @@ -36,8 +39,7 @@ def test_main_passes_arguments_with_all_command_line_options(): sys.argv[1:] = ['--paths', '--reverse', '--locale', '--filter', '4', '10', '--reverse-filter', '100', '110', - '--number-type', 'int', - '--nosign', '--noexp', + '--number-type', 'float', '--noexp', '--sign', '--exclude', '34', '--exclude', '35', 'num-2', 'num-6', 'num-1'] main() @@ -47,60 +49,12 @@ def test_main_passes_arguments_with_all_command_line_options(): assert args.reverse_filter == [(100.0, 110.0)] assert args.exclude == [34, 35] assert args.reverse - assert args.number_type == 'int' - assert not args.signed + assert args.number_type == 'float' + assert args.signed assert not args.exp assert args.locale -def test_range_check_returns_range_as_is_but_with_floats(): - assert range_check(10, 11) == (10.0, 11.0) - assert range_check(6.4, 30) == (6.4, 30.0) - - -def test_range_check_raises_ValueError_if_range_is_invalid(): - with raises(ValueError) as err: - range_check(7, 2) - assert str(err.value) == 'low >= high' - - -def test_check_filter_returns_None_if_filter_evaluates_to_False(): - assert check_filter(()) is None - assert check_filter(False) is None - assert check_filter(None) is None - - -def test_check_filter_converts_filter_numbers_to_floats_if_filter_is_valid(): - assert check_filter([(6, 7)]) == [(6.0, 7.0)] - assert check_filter([(6, 7), (2, 8)]) == [(6.0, 7.0), (2.0, 8.0)] - - -def test_check_filter_raises_ValueError_if_filter_is_invalid(): - with raises(ValueError) as err: - check_filter([(7, 2)]) - assert str(err.value) == 'Error in --filter: low >= high' - - -def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_the_range_bounds(): - assert keep_entry_range('a56b23c89', [0], [100], int, re.compile(r'\d+')) - - -def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_any_range_bounds(): - assert keep_entry_range('a56b23c89', [1, 88], [20, 90], int, re.compile(r'\d+')) - - -def test_keep_entry_range_returns_False_if_no_portion_of_input_is_between_the_range_bounds(): - assert not keep_entry_range('a56b23c89', [1], [20], int, re.compile(r'\d+')) - - -def test_exclude_entry_returns_True_if_exlcude_parameters_are_not_in_input(): - assert exclude_entry('a56b23c89', [100, 45], int, re.compile(r'\d+')) - - -def test_exclude_entry_returns_False_if_exlcude_parameters_are_in_input(): - assert not exclude_entry('a56b23c89', [23], int, re.compile(r'\d+')) - - class Args: """A dummy class to simulate the argparse Namespace object""" def __init__(self, filter, reverse_filter, exclude, as_path, reverse): @@ -198,3 +152,123 @@ def test_sort_and_print_entries_reverses_order_with_reverse_option(): sort_and_print_entries(entries, Args(None, None, False, True, True)) e = [call(entries[i]) for i in reversed([2, 3, 1, 0, 5, 6, 4])] p.assert_has_calls(e) + + +# Each test has an "example" version for demonstrative purposes, +# and a test that uses the hypothesis module. + +def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second_example(): + assert range_check(10, 11) == (10.0, 11.0) + assert range_check(6.4, 30) == (6.4, 30.0) + + +@given(x=int, y=int) +def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second(x, y): + assume(x < y) + assert range_check(x, y) == (float(x), float(y)) + + +@given(x=float, y=float) +def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second2(x, y): + assume(x < y) + assert range_check(x, y) == (x, y) + + +def test_range_check_raises_ValueError_if_second_is_less_than_first_example(): + with raises(ValueError) as err: + range_check(7, 2) + assert str(err.value) == 'low >= high' + + +@given(x=float, y=float) +def test_range_check_raises_ValueError_if_second_is_less_than_first(x, y): + assume(x >= y) + with raises(ValueError) as err: + range_check(x, x) + assert str(err.value) == 'low >= high' + + +def test_check_filter_returns_None_if_filter_evaluates_to_False(): + assert check_filter(()) is None + assert check_filter(False) is None + assert check_filter(None) is None + + +def test_check_filter_converts_filter_numbers_to_floats_if_filter_is_valid_example(): + assert check_filter([(6, 7)]) == [(6.0, 7.0)] + assert check_filter([(6, 7), (2, 8)]) == [(6.0, 7.0), (2.0, 8.0)] + + +@given(x=(int, int, float, float), y=(int, float, float, int)) +def test_check_filter_converts_filter_numbers_to_floats_if_filter_is_valid(x, y): + assume(all(i < j for i, j in zip(x, y))) + assert check_filter(list(zip(x, y))) == [(float(i), float(j)) for i, j in zip(x, y)] + + +def test_check_filter_raises_ValueError_if_filter_is_invalid_example(): + with raises(ValueError) as err: + check_filter([(7, 2)]) + assert str(err.value) == 'Error in --filter: low >= high' + + +@given(x=(int, int, float, float), y=(int, float, float, int)) +def test_check_filter_raises_ValueError_if_filter_is_invalid(x, y): + assume(any(i >= j for i, j in zip(x, y))) + with raises(ValueError) as err: + check_filter(list(zip(x, y))) + assert str(err.value) == 'Error in --filter: low >= high' + + +def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_the_range_bounds_example(): + assert keep_entry_range('a56b23c89', [0], [100], int, re.compile(r'\d+')) + + +@given((py23_str, integers_in_range(1, 99), py23_str, integers_in_range(1, 99), py23_str)) +def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_the_range_bounds(x): + s = ''.join(map(py23_str, x)) + assume(any(0 < int(i) < 100 for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) + assert keep_entry_range(s, [0], [100], int, re.compile(r'\d+')) + + +def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_any_range_bounds_example(): + assert keep_entry_range('a56b23c89', [1, 88], [20, 90], int, re.compile(r'\d+')) + + +@given((py23_str, integers_in_range(2, 89), py23_str, integers_in_range(2, 89), py23_str)) +def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_any_range_bounds(x): + s = ''.join(map(py23_str, x)) + assume(any((1 < int(i) < 20) or (88 < int(i) < 90) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) + assert keep_entry_range(s, [1, 88], [20, 90], int, re.compile(r'\d+')) + + +def test_keep_entry_range_returns_False_if_no_portion_of_input_is_between_the_range_bounds_example(): + assert not keep_entry_range('a56b23c89', [1], [20], int, re.compile(r'\d+')) + + +@given((py23_str, integers_from(21), py23_str, integers_from(21), py23_str)) +def test_keep_entry_range_returns_False_if_no_portion_of_input_is_between_the_range_bounds(x): + s = ''.join(map(py23_str, x)) + assume(all(not (1 <= int(i) <= 20) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) + assert not keep_entry_range(s, [1], [20], int, re.compile(r'\d+')) + + +def test_exclude_entry_returns_True_if_exlcude_parameters_are_not_in_input_example(): + assert exclude_entry('a56b23c89', [100, 45], int, re.compile(r'\d+')) + + +@given((py23_str, integers_from(0), py23_str, integers_from(0), py23_str)) +def test_exclude_entry_returns_True_if_exlcude_parameters_are_not_in_input(x): + s = ''.join(map(py23_str, x)) + assume(not any(int(i) in (23, 45, 87) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) + assert exclude_entry(s, [23, 45, 87], int, re.compile(r'\d+')) + + +def test_exclude_entry_returns_False_if_exlcude_parameters_are_in_input_example(): + assert not exclude_entry('a56b23c89', [23], int, re.compile(r'\d+')) + + +@given((py23_str, sampled_from([23, 45, 87]), py23_str, sampled_from([23, 45, 87]), py23_str)) +def test_exclude_entry_returns_False_if_exlcude_parameters_are_in_input(x): + s = ''.join(map(py23_str, x)) + assume(any(int(i) in (23, 45, 87) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) + assert not exclude_entry(s, [23, 45, 87], int, re.compile(r'\d+')) diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py index 78a3eaa..329d39f 100644 --- a/test_natsort/test_natsort.py +++ b/test_natsort/test_natsort.py @@ -15,6 +15,13 @@ from natsort import realsorted, index_realsorted, decoder, as_ascii, as_utf8 from natsort.utils import _natsort_key +def load_locale(x): + try: + locale.setlocale(locale.LC_ALL, str('{}.ISO8859-1'.format(x))) + except: + locale.setlocale(locale.LC_ALL, str('{}.UTF-8'.format(x))) + + def test_decoder_returns_function_that_can_decode_bytes_but_return_non_bytes_as_is(): f = decoder('latin1') a = 'bytes' @@ -41,7 +48,7 @@ def test_natsort_key_public_raises_DeprecationWarning_when_called(): # But it raises a deprecation warning with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2', key=None, alg=ns.F) + assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2', key=None, alg=ns.I) assert len(w) == 1 assert "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" in str(w[-1].message) # It is called for each element in a list when sorting @@ -54,8 +61,8 @@ def test_natsort_key_public_raises_DeprecationWarning_when_called(): def test_natsort_keygen_returns_natsort_key_with_alg_option(): a = 'a-5.034e1' - assert natsort_keygen()(a) == _natsort_key(a, None, ns.F) - assert natsort_keygen(alg=ns.I | ns.U)(a) == _natsort_key(a, None, ns.I | ns.U) + assert natsort_keygen()(a) == _natsort_key(a, None, ns.I) + assert natsort_keygen(alg=ns.F | ns.S)(a) == _natsort_key(a, None, ns.F | ns.S) def test_natsort_keygen_with_key_returns_same_result_as_nested_lambda_with_bare_natsort_key(): @@ -63,15 +70,15 @@ def test_natsort_keygen_with_key_returns_same_result_as_nested_lambda_with_bare_ f1 = natsort_keygen(key=lambda x: x.upper()) def f2(x): - return _natsort_key(x, lambda y: y.upper(), ns.F) + return _natsort_key(x, lambda y: y.upper(), ns.I) assert f1(a) == f2(a) def test_natsort_keygen_returns_key_that_can_be_used_to_sort_list_in_place_with_same_result_as_natsorted(): a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] b = a[:] - a.sort(key=natsort_keygen(alg=ns.I)) - assert a == natsorted(b, alg=ns.I) + a.sort(key=natsort_keygen(alg=ns.F)) + assert a == natsorted(b, alg=ns.F) def test_natsorted_returns_strings_with_numbers_in_ascending_order(): @@ -80,42 +87,48 @@ def test_natsorted_returns_strings_with_numbers_in_ascending_order(): def test_natsorted_returns_list_of_numbers_sorted_as_signed_floats_with_exponents(): - a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] - assert natsorted(a) == ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'] + a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] + assert natsorted(a, alg=ns.REAL) == ['a-50', 'a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'] -def test_natsorted_returns_list_of_numbers_sorted_as_signed_floats_without_exponents_with_NOEXP_option(): - a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] - assert natsorted(a, alg=ns.NOEXP | ns.FLOAT) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.'] +def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_floats_without_exponents_with_NOEXP_option(): + a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] + assert natsorted(a, alg=ns.N | ns.F | ns.U) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.', 'a-50'] + # UNSIGNED is default + assert natsorted(a, alg=ns.NOEXP | ns.FLOAT) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.', 'a-50'] -def test_natsorted_returns_list_of_numbers_sorted_as_signed_ints_with_INT_option(): - a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] - assert natsorted(a, alg=ns.INT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] +def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_INT_option(): + a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] + assert natsorted(a, alg=ns.INT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] + # INT is default + assert natsorted(a) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] -def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_DIGIT_option(): - a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] - assert natsorted(a, alg=ns.DIGIT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] +def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_DIGIT_and_VERSION_option(): + a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] + assert natsorted(a, alg=ns.DIGIT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] + assert natsorted(a, alg=ns.VERSION) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] -def test_natsorted_returns_list_of_numbers_sorted_without_accounting_for_sign_with_UNSIGNED_option(): - a = ['a-5', 'a7', 'a+2'] - assert natsorted(a, alg=ns.UNSIGNED) == ['a7', 'a+2', 'a-5'] +def test_natsorted_returns_list_of_numbers_sorted_as_signed_ints_with_SIGNED_option(): + a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] + assert natsorted(a, alg=ns.SIGNED) == ['a-50', 'a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] -def test_natsorted_returns_list_of_numbers_sorted_accounting_for_sign_without_UNSIGNED_option(): +def test_natsorted_returns_list_of_numbers_sorted_accounting_for_sign_with_SIGNED_option(): a = ['a-5', 'a7', 'a+2'] - assert natsorted(a) == ['a-5', 'a+2', 'a7'] + assert natsorted(a, alg=ns.SIGNED) == ['a-5', 'a+2', 'a7'] -def test_natsorted_returns_list_of_version_numbers_improperly_sorted_without_VERSION_option(): - a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] - assert natsorted(a) == ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b'] +def test_natsorted_returns_list_of_numbers_sorted_not_accounting_for_sign_without_SIGNED_option(): + a = ['a-5', 'a7', 'a+2'] + assert natsorted(a) == ['a7', 'a+2', 'a-5'] -def test_natsorted_returns_sorted_list_of_version_numbers_with_VERSION_option(): +def test_natsorted_returns_sorted_list_of_version_numbers_by_default_or_with_VERSION_option(): a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] + assert natsorted(a) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] assert natsorted(a, alg=ns.VERSION) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] @@ -233,27 +246,48 @@ def test_natsorted_with_IGNORECASE_sorts_without_regard_to_case_for_nested_input def test_natsorted_with_LOCALE_returns_results_sorted_by_lowercase_first_and_grouped_letters(): a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] - locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) + load_locale('en_US') assert natsorted(a, alg=ns.LOCALE) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] locale.setlocale(locale.LC_ALL, str('')) +def test_natsorted_with_LOCALE_and_CAPITALFIRST_returns_results_sorted_by_capital_first_and_ungrouped(): + a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] + load_locale('en_US') + assert natsorted(a, alg=ns.LOCALE | ns.CAPITALFIRST) == ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn'] + locale.setlocale(locale.LC_ALL, str('')) + + +def test_natsorted_with_LOCALE_and_LOWERCASEFIRST_returns_results_sorted_by_uppercase_first_and_grouped_letters(): + a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] + load_locale('en_US') + assert natsorted(a, alg=ns.LOCALE | ns.LOWERCASEFIRST) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] + locale.setlocale(locale.LC_ALL, str('')) + + +def test_natsorted_with_LOCALE_and_CAPITALFIRST_and_LOWERCASE_returns_results_sorted_by_capital_last_and_ungrouped(): + a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] + load_locale('en_US') + assert natsorted(a, alg=ns.LOCALE | ns.CAPITALFIRST | ns.LOWERCASEFIRST) == ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn'] + locale.setlocale(locale.LC_ALL, str('')) + + def test_natsorted_with_LOCALE_and_en_setting_returns_results_sorted_by_en_language(): - locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) + load_locale('en_US') a = ['c', 'ä', 'b', 'a5,6', 'a5,50'] - assert natsorted(a, alg=ns.LOCALE) == ['a5,6', 'a5,50', 'ä', 'b', 'c'] + assert natsorted(a, alg=ns.LOCALE | ns.F) == ['a5,6', 'a5,50', 'ä', 'b', 'c'] locale.setlocale(locale.LC_ALL, str('')) def test_natsorted_with_LOCALE_and_de_setting_returns_results_sorted_by_de_language(): - locale.setlocale(locale.LC_ALL, str('de_DE.UTF-8')) + load_locale('de_DE') a = ['c', 'ä', 'b', 'a5,6', 'a5,50'] - assert natsorted(a, alg=ns.LOCALE) == ['a5,50', 'a5,6', 'ä', 'b', 'c'] + assert natsorted(a, alg=ns.LOCALE | ns.F) == ['a5,50', 'a5,6', 'ä', 'b', 'c'] locale.setlocale(locale.LC_ALL, str('')) def test_natsorted_with_LOCALE_and_mixed_input_returns_sorted_results_without_error(): - locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) + load_locale('en_US') a = ['0', 'Á', '2', 'Z'] assert natsorted(a) == ['0', '2', 'Z', 'Á'] a = ['2', 'ä', 'b', 1.5, 3] @@ -261,9 +295,15 @@ def test_natsorted_with_LOCALE_and_mixed_input_returns_sorted_results_without_er locale.setlocale(locale.LC_ALL, str('')) -def test_versorted_returns_results_identical_to_natsorted_with_VERSION(): +def test_versorted_returns_results_identical_to_natsorted(): a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] - assert versorted(a) == natsorted(a, alg=ns.VERSION) + # versorted is retained for backwards compatibility + assert versorted(a) == natsorted(a) + + +def test_realsorted_returns_results_identical_to_natsorted_with_REAL(): + a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] + assert realsorted(a) == natsorted(a, alg=ns.REAL) def test_humansorted_returns_results_identical_to_natsorted_with_LOCALE(): @@ -271,11 +311,6 @@ def test_humansorted_returns_results_identical_to_natsorted_with_LOCALE(): assert humansorted(a) == natsorted(a, alg=ns.LOCALE) -def test_realsorted_returns_results_identical_to_natsorted(): - a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] - assert realsorted(a) == natsorted(a) - - def test_index_natsorted_returns_integer_list_of_sort_order_for_input_list(): a = ['num3', 'num5', 'num2'] b = ['foo', 'bar', 'baz'] @@ -312,9 +347,15 @@ def test_index_natsorted_returns_integer_list_in_proper_order_for_input_paths_wi assert index_natsorted(a, alg=ns.PATH) == [1, 2, 0] -def test_index_versorted_returns_results_identical_to_index_natsorted_with_VERSION(): +def test_index_versorted_returns_results_identical_to_index_natsorted(): a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] - assert index_versorted(a) == index_natsorted(a, alg=ns.VERSION) + # index_versorted is retained for backwards compatibility + assert index_versorted(a) == index_natsorted(a) + + +def test_index_realsorted_returns_results_identical_to_index_natsorted_with_REAL(): + a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] + assert index_realsorted(a) == index_natsorted(a, alg=ns.REAL) def test_index_humansorted_returns_results_identical_to_index_natsorted_with_LOCALE(): @@ -322,11 +363,6 @@ def test_index_humansorted_returns_results_identical_to_index_natsorted_with_LOC assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE) -def test_index_realsorted_returns_results_identical_to_index_natsorted(): - a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] - assert index_realsorted(a) == index_natsorted(a) - - def test_order_by_index_sorts_list_according_to_order_of_integer_list(): a = ['num3', 'num5', 'num2'] index = [2, 0, 1] diff --git a/test_natsort/test_unicode_numbers.py b/test_natsort/test_unicode_numbers.py new file mode 100644 index 0000000..f3e8de7 --- /dev/null +++ b/test_natsort/test_unicode_numbers.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +"""\ +Test the Unicode numbers module. +""" +from __future__ import unicode_literals +import unicodedata +from natsort.py23compat import py23_range, py23_unichr +from natsort.unicode_numbers import numeric_chars, numeric, digit_chars, digits + + +def test_numeric_chars_contains_only_valid_unicode_numeric_characters(): + for a in numeric_chars: + assert unicodedata.numeric(a, None) is not None + + +def test_digit_chars_contains_only_valid_unicode_digit_characters(): + for a in digit_chars: + assert unicodedata.digit(a, None) is not None + + +def test_numeric_chars_contains_all_valid_unicode_numeric_characters(): + for i in py23_range(0X10FFFF): + try: + a = py23_unichr(i) + except ValueError: + break + if a in set('0123456789'): + continue + if unicodedata.numeric(a, None) is not None: + assert a in numeric_chars + + +def test_digit_chars_contains_all_valid_unicode_digit_characters(): + for i in py23_range(0X10FFFF): + try: + a = py23_unichr(i) + except ValueError: + break + if a in set('0123456789'): + continue + if unicodedata.digit(a, None) is not None: + assert a in digit_chars + + +def test_combined_string_contains_all_characters_in_list(): + assert numeric == ''.join(numeric_chars) + assert digits == ''.join(digit_chars) diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py index 01fe6a3..16cf411 100644 --- a/test_natsort/test_utils.py +++ b/test_natsort/test_utils.py @@ -4,26 +4,43 @@ from __future__ import unicode_literals import sys import locale +import pathlib +import string +from math import isnan from operator import itemgetter +from itertools import chain from pytest import raises +from hypothesis import given, assume, example +from hypothesis.specifiers import sampled_from from natsort.ns_enum import ns from natsort.utils import _number_extracter, _py3_safe, _natsort_key, _args_to_enum from natsort.utils import _float_sign_exp_re, _float_nosign_exp_re, _float_sign_noexp_re from natsort.utils import _float_nosign_noexp_re, _int_nosign_re, _int_sign_re, _do_decoding -from natsort.locale_help import use_pyicu, null_string +from natsort.utils import _path_splitter +from natsort.locale_help import use_pyicu, null_string, locale_convert, dumb_sort from natsort.py23compat import py23_str +from slow_splitters import int_splitter, float_splitter, sep_inserter try: - from fastnumbers import fast_float, fast_int + from fastnumbers import fast_float, fast_int, isint + import fastnumbers + v = list(map(int, fastnumbers.__version__.split('.'))) + if not (v[0] >= 0 and v[1] >= 5): # Require >= version 0.5.0. + raise ImportError except ImportError: - from natsort.fake_fastnumbers import fast_float, fast_int + from natsort.fake_fastnumbers import fast_float, fast_int, isint + +if sys.version[0] == '3': + long = int + +ichain = chain.from_iterable -try: - import pathlib -except ImportError: - has_pathlib = False -else: - has_pathlib = True + +def load_locale(x): + try: + locale.setlocale(locale.LC_ALL, str('{}.ISO8859-1'.format(x))) + except: + locale.setlocale(locale.LC_ALL, str('{}.UTF-8'.format(x))) def test_do_decoding_decodes_bytes_string_to_unicode(): @@ -32,280 +49,524 @@ def test_do_decoding_decodes_bytes_string_to_unicode(): assert _do_decoding(b'bytes', 'ascii') == b'bytes'.decode('ascii') +def test_args_to_enum_raises_TypeError_for_invalid_argument(): + with raises(TypeError): + _args_to_enum(**{'alf': 0}) + + def test_args_to_enum_converts_signed_exp_float_to_ns_F(): # number_type, signed, exp, as_path, py3_safe - assert _args_to_enum(float, True, True, False, False) == ns.F + assert _args_to_enum(**{'number_type': float, + 'signed': True, + 'exp': True}) == ns.F | ns.S def test_args_to_enum_converts_signed_noexp_float_to_ns_FN(): # number_type, signed, exp, as_path, py3_safe - assert _args_to_enum(float, True, False, False, False) == ns.F | ns.N + assert _args_to_enum(**{'number_type': float, + 'signed': True, + 'exp': False}) == ns.F | ns.N | ns.S def test_args_to_enum_converts_unsigned_exp_float_to_ns_FU(): # number_type, signed, exp, as_path, py3_safe - assert _args_to_enum(float, False, True, False, False) == ns.F | ns.U + assert _args_to_enum(**{'number_type': float, + 'signed': False, + 'exp': True}) == ns.F | ns.U + # unsigned is default + assert _args_to_enum(**{'number_type': float, + 'signed': False, + 'exp': True}) == ns.F def test_args_to_enum_converts_unsigned_unexp_float_to_ns_FNU(): # number_type, signed, exp, as_path, py3_safe - assert _args_to_enum(float, False, False, False, False) == ns.F | ns.U | ns.N + assert _args_to_enum(**{'number_type': float, + 'signed': False, + 'exp': False}) == ns.F | ns.U | ns.N -def test_args_to_enum_converts_signed_exp_float_and_path_and_py3safe_to_ns_FPT(): +def test_args_to_enum_converts_float_and_path_and_py3safe_to_ns_FPT(): # number_type, signed, exp, as_path, py3_safe - assert _args_to_enum(float, True, True, True, True) == ns.F | ns.P | ns.T + assert _args_to_enum(**{'number_type': float, + 'as_path': True, + 'py3_safe': True}) == ns.F | ns.P | ns.T -def test_args_to_enum_converts_singed_int_and_path_to_ns_IP(): +def test_args_to_enum_converts_int_and_path_to_ns_IP(): # number_type, signed, exp, as_path, py3_safe - assert _args_to_enum(int, True, True, True, False) == ns.I | ns.P + assert _args_to_enum(**{'number_type': int, 'as_path': True}) == ns.I | ns.P def test_args_to_enum_converts_unsigned_int_and_py3safe_to_ns_IUT(): # number_type, signed, exp, as_path, py3_safe - assert _args_to_enum(int, False, True, False, True) == ns.I | ns.U | ns.T + assert _args_to_enum(**{'number_type': int, + 'signed': False, + 'py3_safe': True}) == ns.I | ns.U | ns.T def test_args_to_enum_converts_None_to_ns_IU(): # number_type, signed, exp, as_path, py3_safe - assert _args_to_enum(None, True, True, False, False) == ns.I | ns.U - -# fttt = (fast_float, True, True, True) -# fttf = (fast_float, True, True, False) -ftft = (fast_float, True, False, True) -ftff = (fast_float, True, False, False) -# fftt = (fast_float, False, True, True) -ffft = (fast_float, False, False, True) -# fftf = (fast_float, False, True, False) -ffff = (fast_float, False, False, False) -ittt = (fast_int, True, True, True) -ittf = (fast_int, True, True, False) -itft = (fast_int, True, False, True) -itff = (fast_int, True, False, False) -# iftt = (fast_int, False, True, True) -ifft = (fast_int, False, False, True) -# iftf = (fast_int, False, True, False) -ifff = (fast_int, False, False, False) - - -def test_number_extracter_raises_TypeError_if_given_a_number(): - with raises(TypeError): - assert _number_extracter(50.0, _float_sign_exp_re, *ffff) + assert _args_to_enum(**{'number_type': None, + 'exp': True}) == ns.I | ns.U +float_nosafe_locale_group = (fast_float, False, True, True) +float_nosafe_locale_nogroup = (fast_float, False, True, False) +float_safe_nolocale_nogroup = (fast_float, True, False, False) +float_nosafe_nolocale_group = (fast_float, False, False, True) +float_nosafe_nolocale_nogroup = (fast_float, False, False, False) +int_safe_locale_group = (fast_int, True, True, True) +int_safe_locale_nogroup = (fast_int, True, True, False) +int_safe_nolocale_group = (fast_int, True, False, True) +int_safe_nolocale_nogroup = (fast_int, True, False, False) +int_nosafe_locale_group = (fast_int, False, True, True) +int_nosafe_locale_nogroup = (fast_int, False, True, False) +int_nosafe_nolocale_group = (fast_int, False, False, True) +int_nosafe_nolocale_nogroup = (fast_int, False, False, False) -def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats(): - assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *ffff) == ['a', 5.0, 0.5034] +# Each test has an "example" version for demonstrative purposes, +# and a test that uses the hypothesis module. -def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats(): - assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *ffff) == ['a', 5.0, '+', 0.5034] +def test_py3_safe_does_nothing_if_no_numbers_example(): + assert _py3_safe(['a', 'b', 'c'], False, isint) == ['a', 'b', 'c'] + assert _py3_safe(['a'], False, isint) == ['a'] -def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats(): - assert _number_extracter('a5+5.034e-1', _float_sign_noexp_re, *ffff) == ['a', 5.0, 5.034, 'e', -1.0] +def test_py3_safe_does_nothing_if_only_one_number_example(): + assert _py3_safe(['a', 5], False, isint) == ['a', 5] -def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats(): - assert _number_extracter('a5+5.034e-1', _float_nosign_noexp_re, *ffff) == ['a', 5.0, '+', 5.034, 'e-', 1.0] +def test_py3_safe_inserts_empty_string_between_two_numbers_example(): + assert _py3_safe([5, 9], False, isint) == [5, '', 9] -def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints(): - assert _number_extracter('a5+5.034e-1', _int_nosign_re, *ifff) == ['a', 5, '+', 5, '.', 34, 'e-', 1] +def test_py3_safe_with_use_locale_inserts_null_string_between_two_numbers_example(): + assert _py3_safe([5, 9], True, isint) == [5, null_string, 9] -def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints(): - assert _number_extracter('a5+5.034e-1', _int_sign_re, *ifff) == ['a', 5, 5, '.', 34, 'e', -1] +@given([py23_str, int]) +def test_py3_safe_inserts_empty_string_between_two_numbers(x): + assume(bool(x)) + assert _py3_safe(x, False, isint) == sep_inserter(x, (int, long), '') -def test_number_extracter_inserts_empty_string_between_floats_for_py3safe_option(): - assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *ftff) == ['a', 5.0, '', 0.5034] +def test_path_splitter_splits_path_string_by_separator_example(): + z = '/this/is/a/path' + assert _path_splitter(z) == list(pathlib.Path(z).parts) -def test_number_extracter_inserts_empty_string_between_ints_for_py3safe_option(): - assert _number_extracter('a5+5.034e-1', _int_sign_re, *itff) == ['a', 5, '', 5, '.', 34, 'e', -1] +@given([sampled_from(string.ascii_letters)]) +def test_path_splitter_splits_path_string_by_separator(x): + assume(len(x) > 1) + assume(all(x)) + z = py23_str(pathlib.Path(*x)) + assert _path_splitter(z) == list(pathlib.Path(z).parts) -def test_number_extracter_inserts_no_empty_string_py3safe_option_because_no_numbers_are_adjascent(): - assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *ftff) == ['a', 5.0, '+', 0.5034] +def test_path_splitter_splits_path_string_by_separator_and_removes_extension_example(): + z = '/this/is/a/path/file.exe' + y = list(pathlib.Path(z).parts) + assert _path_splitter(z) == y[:-1] + [pathlib.Path(z).stem] + [pathlib.Path(z).suffix] -def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number(): - assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *ffff) == ['', 6.0, 'a', 5.0, 0.5034] +@given([sampled_from(string.ascii_letters)]) +def test_path_splitter_splits_path_string_by_separator_and_removes_extension(x): + assume(len(x) > 2) + assume(all(x)) + z = py23_str(pathlib.Path(*x[:-2])) + '.' + x[-1] + y = list(pathlib.Path(z).parts) + assert _path_splitter(z) == y[:-1] + [pathlib.Path(z).stem] + [pathlib.Path(z).suffix] -def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_and_empty_string_between_numbers_for_py3safe(): - assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *ftff) == ['', 6.0, 'a', 5.0, '', 0.5034] +def test_number_extracter_raises_TypeError_if_given_a_number_example(): + with raises(TypeError): + assert _number_extracter(50.0, _float_sign_exp_re, *float_nosafe_nolocale_nogroup) -def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float(): - assert _number_extracter('A5+5.034E-1', _float_sign_exp_re, *ffft) == ['aA', 5.0, 0.5034] +@given(float) +def test_number_extracter_raises_TypeError_if_given_a_number(x): + with raises(TypeError): + assert _number_extracter(x, _float_sign_exp_re, *float_nosafe_nolocale_nogroup) -def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int(): - assert _number_extracter('A5+5.034E-1', _int_nosign_re, *ifft) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1] +def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats_example(): + assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, 0.5034] -def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale(): - locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) - if use_pyicu: - from natsort.locale_help import get_pyicu_transform - from locale import getlocale - strxfrm = get_pyicu_transform(getlocale()) - else: - from natsort.locale_help import strxfrm - assert _number_extracter('A5+5.034E-1', _int_nosign_re, *ittf) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1] - locale.setlocale(locale.LC_NUMERIC, str('')) +@given([float, py23_str, int]) +def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, True, True, False, '') -def test_number_extracter_extracts_numbers_and_strxfrms_letter_doubled_strings_with_use_locale_and_groupletters(): - locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) - if use_pyicu: - from natsort.locale_help import get_pyicu_transform - from locale import getlocale - strxfrm = get_pyicu_transform(getlocale()) - else: - from natsort.locale_help import strxfrm - assert _number_extracter('A5+5.034E-1', _int_nosign_re, *ittt) == [strxfrm('aA'), 5, strxfrm('++'), 5, strxfrm('..'), 34, strxfrm('eE--'), 1] - locale.setlocale(locale.LC_NUMERIC, str('')) +def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats_example(): + assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, '+', 0.5034] -def test_py3_safe_does_nothing_if_no_numbers(): - assert _py3_safe(['a', 'b', 'c'], False) == ['a', 'b', 'c'] - assert _py3_safe(['a'], False) == ['a'] +@given([float, py23_str, int]) +def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, False, True, False, '') -def test_py3_safe_does_nothing_if_only_one_number(): - assert _py3_safe(['a', 5], False) == ['a', 5] +def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats_example(): + assert _number_extracter('a5+5.034e-1', _float_sign_noexp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, 5.034, 'e', -1.0] -def test_py3_safe_inserts_empty_string_between_two_numbers(): - assert _py3_safe([5, 9], False) == [5, '', 9] +@given([float, py23_str, int]) +def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _number_extracter(s, _float_sign_noexp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, True, False, False, '') -def test_py3_safe_with_use_locale_inserts_null_string_between_two_numbers(): - assert _py3_safe([5, 9], True) == [5, null_string, 9] +def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats_example(): + assert _number_extracter('a5+5.034e-1', _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, '+', 5.034, 'e-', 1.0] -def test__natsort_key_with_float_splits_input_into_string_and_signed_float_with_exponent(): - assert ns.F == ns.FLOAT - assert _natsort_key('a-5.034e2', None, ns.F) == ('a', -503.4) +@given([float, py23_str, int]) +def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _number_extracter(s, _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, False, False, False, '') -def test__natsort_key_with_float_and_noexp_splits_input_into_string_and_signed_float_without_exponent(): - assert _natsort_key('a-5.034e2', None, ns.FLOAT | ns.NOEXP) == ('a', -5.034, 'e', 2.0) - # Default is to split on floats. - assert _natsort_key('a-5.034e2', None, ns.NOEXP) == ('a', -5.034, 'e', 2.0) +def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints_example(): + assert _number_extracter('a5+5.034e-1', _int_nosign_re, *int_nosafe_nolocale_nogroup) == ['a', 5, '+', 5, '.', 34, 'e-', 1] -def test__natsort_key_with_float_and_unsigned_splits_input_into_string_and_unsigned_float(): - assert _natsort_key('a-5.034e2', None, ns.UNSIGNED) == ('a-', 503.4) +@given([float, py23_str, int]) +@example([10000000000000000000000000000000000000000000000000000000000000000000000000, + 100000000000000000000000000000000000000000000000000000000000000000000000000, + 100000000000000000000000000000000000000000000000000000000000000000000000000]) +def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints(x): + assume(len(x) <= 10) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup) == int_splitter(s, False, False, '') -def test__natsort_key_with_float_and_unsigned_and_noexp_splits_input_into_string_and_unsigned_float_without_exponent(): - assert _natsort_key('a-5.034e2', None, ns.UNSIGNED | ns.NOEXP) == ('a-', 5.034, 'e', 2.0) +def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints_example(): + assert _number_extracter('a5+5.034e-1', _int_sign_re, *int_nosafe_nolocale_nogroup) == ['a', 5, 5, '.', 34, 'e', -1] -def test__natsort_key_with_int_splits_input_into_string_and_signed_int(): - assert _natsort_key('a-5.034e2', None, ns.INT) == ('a', -5, '.', 34, 'e', 2) - # NOEXP is ignored for integers - assert _natsort_key('a-5.034e2', None, ns.INT | ns.NOEXP) == ('a', -5, '.', 34, 'e', 2) +@given([float, py23_str, int]) +def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints(x): + assume(len(x) <= 10) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup) == int_splitter(s, True, False, '') -def test__natsort_key_with_int_splits_and_unsigned_input_into_string_and_unsigned_int(): - assert _natsort_key('a-5.034e2', None, ns.INT | ns.UNSIGNED) == ('a-', 5, '.', 34, 'e', 2) +def test_number_extracter_inserts_empty_string_between_floats_for_py3safe_option_example(): + assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *float_safe_nolocale_nogroup) == ['a', 5.0, '', 0.5034] -def test__natsort_key_with_version_or_digit_matches_usigned_int(): - assert _natsort_key('a-5.034e2', None, ns.VERSION) == _natsort_key('a-5.034e2', None, ns.INT | ns.UNSIGNED) - assert _natsort_key('a-5.034e2', None, ns.DIGIT) == _natsort_key('a-5.034e2', None, ns.VERSION) +@given([float, py23_str, int]) +def test_number_extracter_inserts_empty_string_between_floats_for_py3safe_option(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _number_extracter(s, _float_sign_exp_re, *float_safe_nolocale_nogroup) == float_splitter(s, True, True, True, '') -def test__natsort_key_with_key_applies_key_function_before_splitting(): - assert _natsort_key('a-5.034e2', lambda x: x.upper(), ns.F) == ('A', -503.4) +def test_number_extracter_inserts_empty_string_between_ints_for_py3safe_option_example(): + assert _number_extracter('a5+5.034e-1', _int_sign_re, *int_safe_nolocale_nogroup) == ['a', 5, '', 5, '.', 34, 'e', -1] -def test__natsort_key_with_tuple_input_returns_nested_tuples(): - # Iterables are parsed recursively so you can sort lists of lists. - assert _natsort_key(('a1', 'a-5.034e2'), None, ns.V) == (('a', 1), ('a-', 5, '.', 34, 'e', 2)) +@given([float, py23_str, int]) +def test_number_extracter_inserts_empty_string_between_ints_for_py3safe_option(x): + assume(len(x) <= 10) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _number_extracter(s, _int_sign_re, *int_safe_nolocale_nogroup) == int_splitter(s, True, True, '') -def test__natsort_key_with_tuple_input_but_itemgetter_key_returns_split_second_element(): - # A key is applied before recursion, but not in the recursive calls. - assert _natsort_key(('a1', 'a-5.034e2'), itemgetter(1), ns.F) == ('a', -503.4) +def test_number_extracter_inserts_no_empty_string_py3safe_option_because_no_numbers_are_adjascent_example(): + assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *float_safe_nolocale_nogroup) == ['a', 5.0, '+', 0.5034] -def test__natsort_key_with_input_containing_leading_numbers_returns_leading_empty_strings(): - # Strings that lead with a number get an empty string at the front of the tuple. - # This is designed to get around the "unorderable types" issue. - assert _natsort_key(('15a', '6'), None, ns.F) == (('', 15.0, 'a'), ('', 6.0)) +def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_example(): + assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == ['', 6.0, 'a', 5.0, 0.5034] -def test__natsort_key_with_numeric_input_returns_number_with_leading_empty_string(): - assert _natsort_key(10, None, ns.F) == ('', 10) +def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_and_empty_string_between_numbers_for_py3safe_exmple(): + assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *float_safe_nolocale_nogroup) == ['', 6.0, 'a', 5.0, '', 0.5034] -def test__natsort_key_with_absolute_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_root_and_split_extensions(): - # Turn on PATH to split a file path into components - assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', None, ns.PATH) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) +def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float_example(): + assert _number_extracter('A5+5.034E-1', _float_sign_exp_re, *float_nosafe_nolocale_group) == ['aA', 5.0, 0.5034] -def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_relative_parent_and_split_extensions(): - assert _natsort_key('../Folder (10)/file (2).tar.gz', None, ns.PATH) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) +@given([float, py23_str, int]) +def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + try: + low = py23_str.casefold + except AttributeError: + low = py23_str.lower + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + t = float_splitter(s, True, True, False, '') + t = [''.join([low(z) + z for z in y]) if type(y) != float else y for y in t] + assert _number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_group) == t -def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_and_split_extensions(): - assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', None, ns.PATH) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) +def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int_example(): + assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_nolocale_group) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1] -def test__natsort_key_with_pathlib_intput_and_PATH_returns_nested_tuples(): - # Converts pathlib PurePath (and subclass) objects to string before sorting - if has_pathlib: - assert _natsort_key(pathlib.Path('../Folder (10)/file (2).tar.gz'), None, ns.PATH) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) +@given([float, py23_str, int]) +def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int(x): + assume(len(x) <= 10) + try: + low = py23_str.casefold + except AttributeError: + low = py23_str.lower + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + t = int_splitter(s, False, False, '') + t = [''.join([low(z) + z for z in y]) if type(y) not in (int, long) else y for y in t] + assert _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_group) == t -def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple(): - # It gracefully handles as_path for numeric input by putting an extra tuple around it - # so it will sort against the other as_path results. - assert _natsort_key(10, None, ns.PATH) == (('', 10),) +def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale_example(): + load_locale('en_US') + if use_pyicu: + from natsort.locale_help import get_pyicu_transform + from locale import getlocale + strxfrm = get_pyicu_transform(getlocale()) + else: + from natsort.locale_help import strxfrm + assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_locale_nogroup) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1] + locale.setlocale(locale.LC_NUMERIC, str('')) -def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple(): - # PATH also handles recursion well. - assert _natsort_key(('/Folder', '/Folder (1)'), None, ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1.0, ')'))) +@given([float, py23_str, int]) +def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale(x): + assume(len(x) <= 10) + load_locale('en_US') + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + t = int_splitter(s, False, False, null_string) + t = [y if i == 0 and y is null_string else locale_convert(y, (fast_int, isint), False) for i, y in enumerate(t)] + assert _number_extracter(s, _int_nosign_re, *int_nosafe_locale_nogroup) == t + locale.setlocale(locale.LC_NUMERIC, str('')) -def test__natsort_key_with_TYPESAFE_inserts_spaces_between_numbers(): - # Turn on TYPESAFE to put a '' between adjacent numbers - assert _natsort_key('43h7+3', None, ns.TYPESAFE) == ('', 43.0, 'h', 7.0, '', 3.0) +def test_number_extracter_extracts_numbers_and_strxfrms_letter_doubled_strings_with_use_locale_and_groupletters_example(): + load_locale('en_US') + if use_pyicu: + from natsort.locale_help import get_pyicu_transform + from locale import getlocale + strxfrm = get_pyicu_transform(getlocale()) + else: + from natsort.locale_help import strxfrm + assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_locale_group) == [strxfrm('aA'), 5, strxfrm('++'), 5, strxfrm('..'), 34, strxfrm('eE--'), 1] + locale.setlocale(locale.LC_NUMERIC, str('')) -def test__natsort_key_with_invalid_alg_input_raises_ValueError(): - # Invalid arguments give the correct response - with raises(ValueError) as err: - _natsort_key('a', None, '1') - assert str(err.value) == "_natsort_key: 'alg' argument must be from the enum 'ns', got 1" + +@given([float, py23_str, int]) +def test_number_extracter_extracts_numbers_and_strxfrms_letter_doubled_strings_with_use_locale_and_groupletters(x): + assume(len(x) <= 10) + load_locale('en_US') + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + t = int_splitter(s, False, False, null_string) + t = [y if i == 0 and y is null_string else locale_convert(y, (fast_int, isint), True) for i, y in enumerate(t)] + assert _number_extracter(s, _int_nosign_re, *int_nosafe_locale_group) == t + locale.setlocale(locale.LC_NUMERIC, str('')) + + +# The remaining tests provide no examples, just hypothesis tests. +# They only confirm that _natsort_key uses the above building blocks. + + +@given([float, py23_str, int]) +def test__natsort_key_with_float_and_signed_splits_input_into_string_and_signed_float_with_exponent(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert ns.F == ns.FLOAT + assert ns.S == ns.SIGNED + assert _natsort_key(s, None, ns.F | ns.S) == tuple(_number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup)) + + +@given([float, py23_str, int]) +def test__natsort_key_with_real_splits_input_into_string_and_signed_float_with_exponent(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert ns.R == ns.F | ns.S + assert _natsort_key(s, None, ns.R) == tuple(_number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup)) + + +@given([float, py23_str, int]) +def test__natsort_key_with_real_matches_signed_float(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _natsort_key(s, None, ns.R) == _natsort_key(s, None, ns.F | ns.S) + + +@given([float, py23_str, int]) +def test__natsort_key_with_float_and_signed_and_noexp_splits_input_into_string_and_signed_float_without_exponent(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert ns.N == ns.NOEXP + assert _natsort_key(s, None, ns.F | ns.S | ns.N) == tuple(_number_extracter(s, _float_sign_noexp_re, *float_nosafe_nolocale_nogroup)) + + +@given([float, py23_str, int]) +def test__natsort_key_with_float_and_unsigned_splits_input_into_string_and_unsigned_float(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert ns.U == ns.UNSIGNED + assert _natsort_key(s, None, ns.F | ns.U) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup)) + # Default is unsigned search + assert _natsort_key(s, None, ns.F) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup)) + + +@given([float, py23_str, int]) +def test__natsort_key_with_float_and_noexp_splits_input_into_string_and_unsigned_float_without_exponent(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _natsort_key(s, None, ns.F | ns.N) == tuple(_number_extracter(s, _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup)) + + +@given([float, py23_str, int]) +def test__natsort_key_with_int_splits_input_into_string_and_unsigned_int(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert ns.I == ns.INT + assert _natsort_key(s, None, ns.INT) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) + # Default is int search + assert _natsort_key(s, None, ns.NOEXP) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) + # NOEXP is ignored for integers + assert _natsort_key(s, None, ns.I | ns.NOEXP) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) + + +@given([float, py23_str, int]) +def test__natsort_key_with_int_splits_and_signed_input_into_string_and_signed_int(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _natsort_key(s, None, ns.INT | ns.SIGNED) == tuple(_number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup)) + assert _natsort_key(s, None, ns.SIGNED) == tuple(_number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup)) + + +@given([float, py23_str, int]) +def test__natsort_key_with_version_or_digit_matches_usigned_int(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _natsort_key(s, None, ns.VERSION) == _natsort_key(s, None, ns.INT | ns.UNSIGNED) + assert _natsort_key(s, None, ns.DIGIT) == _natsort_key(s, None, ns.VERSION) + + +@given([float, py23_str, int]) +def test__natsort_key_with_key_applies_key_function_before_splitting(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _natsort_key(s, lambda x: x.upper(), ns.I) == tuple(_number_extracter(s.upper(), _int_nosign_re, *int_nosafe_nolocale_nogroup)) + + +@given([float, py23_str, int]) +def test__natsort_key_with_tuple_input_returns_nested_tuples(x): + # Iterables are parsed recursively so you can sort lists of lists. + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + t = tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) + assert _natsort_key((s, s), None, ns.I) == (t, t) -def test__natsort_key_without_string_modifiers_leaves_text_as_is(): - # Changing the sort order of strings - assert _natsort_key('Apple56', None, ns.F) == ('Apple', 56.0) +@given([float, py23_str, int]) +def test__natsort_key_with_tuple_input_but_itemgetter_key_returns_split_second_element(x): + # A key is applied before recursion, but not in the recursive calls. + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + t = tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) + assert _natsort_key((s, s), itemgetter(1), ns.I) == t -def test__natsort_key_with_IGNORECASE_lowercases_text(): - assert _natsort_key('Apple56', None, ns.IGNORECASE) == ('apple', 56.0) +@given(float) +def test__natsort_key_with_numeric_input_returns_number_with_leading_empty_string(x): + assume(not isnan(x)) + if x.is_integer(): + x = int(x) + assert _natsort_key(x, None, ns.I) == ('', x) -def test__natsort_key_with_LOWERCASEFIRST_inverts_text_case(): - assert _natsort_key('Apple56', None, ns.LOWERCASEFIRST) == ('aPPLE', 56.0) +@given([float, py23_str, int]) +def test__natsort_key_with_TYPESAFE_inserts_spaces_between_numbers(x): + # Turn on TYPESAFE to put a '' between adjacent numbers + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _natsort_key(s, None, ns.TYPESAFE | ns.S) == tuple(_number_extracter(s, _int_sign_re, *int_safe_nolocale_nogroup)) -def test__natsort_key_with_GROUPLETTERS_doubles_text_with_lowercase_letter_first(): - assert _natsort_key('Apple56', None, ns.GROUPLETTERS) == ('aAppppllee', 56.0) +def test__natsort_key_with_invalid_alg_input_raises_ValueError(): + # Invalid arguments give the correct response + with raises(ValueError) as err: + _natsort_key('a', None, '1') + assert str(err.value) == "_natsort_key: 'alg' argument must be from the enum 'ns', got 1" -def test__natsort_key_with_GROUPLETTERS_and_LOWERCASEFIRST_inverts_text_first_then_doubles_letters_with_lowercase_letter_first(): - assert _natsort_key('Apple56', None, ns.G | ns.LF) == ('aapPpPlLeE', 56.0) +@given([float, py23_str, int]) +def test__natsort_key_with_IGNORECASE_lowercases_text(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + try: + assert _natsort_key(s, None, ns.IGNORECASE) == tuple(_number_extracter(s.casefold(), _int_nosign_re, *int_nosafe_nolocale_nogroup)) + except AttributeError: + assert _natsort_key(s, None, ns.IGNORECASE) == tuple(_number_extracter(s.lower(), _int_nosign_re, *int_nosafe_nolocale_nogroup)) + + +@given([float, py23_str, int]) +def test__natsort_key_with_LOWERCASEFIRST_inverts_text_case(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _natsort_key(s, None, ns.LOWERCASEFIRST) == tuple(_number_extracter(s.swapcase(), _int_nosign_re, *int_nosafe_nolocale_nogroup)) + + +@given([float, py23_str, int]) +def test__natsort_key_with_GROUPLETTERS_doubles_text_with_lowercase_letter_first(x): + try: + low = py23_str.casefold + except AttributeError: + low = py23_str.lower + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(ichain([repr(y)] if type(y) in (float, long, int) else [low(y), y] for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + t = _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup) + assert _natsort_key(s, None, ns.GROUPLETTERS) == tuple(''.join(low(z) + z for z in y) if type(y) not in (float, long, int) else y for y in t) + + +@given([float, py23_str, int]) +def test__natsort_key_with_GROUPLETTERS_and_LOWERCASEFIRST_inverts_text_first_then_doubles_letters_with_lowercase_letter_first(x): + try: + low = py23_str.casefold + except AttributeError: + low = py23_str.lower + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(ichain([repr(y)] if type(y) in (float, long, int) else [low(y), y] for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + t = _number_extracter(s.swapcase(), _int_nosign_re, *int_nosafe_nolocale_nogroup) + assert _natsort_key(s, None, ns.G | ns.LF) == tuple(''.join(low(z) + z for z in y) if type(y) not in (float, long, int) else y for y in t) def test__natsort_key_with_bytes_input_only_applies_LOWERCASEFIRST_or_IGNORECASE_and_returns_in_tuple(): @@ -318,38 +579,38 @@ def test__natsort_key_with_bytes_input_only_applies_LOWERCASEFIRST_or_IGNORECASE assert True -def test__natsort_key_with_LOCALE_transforms_floats_according_to_the_current_locale_and_strxfrms_strings(): +@given([float, py23_str, int]) +def test__natsort_key_with_LOCALE_transforms_floats_according_to_the_current_locale_and_strxfrms_strings(x): # Locale aware sorting - locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) - if use_pyicu: - from natsort.locale_help import get_pyicu_transform - from locale import getlocale - strxfrm = get_pyicu_transform(getlocale()) + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + load_locale('en_US') + if dumb_sort(): + assert _natsort_key(s, None, ns.LOCALE | ns.F) == tuple(_number_extracter(s.swapcase(), _float_nosign_exp_re, *float_nosafe_locale_group)) else: - from natsort.locale_help import strxfrm - assert _natsort_key('Apple56.5', None, ns.LOCALE) == (strxfrm('Apple'), 56.5) - assert _natsort_key('Apple56,5', None, ns.LOCALE) == (strxfrm('Apple'), 56.0, strxfrm(','), 5.0) - - locale.setlocale(locale.LC_NUMERIC, str('de_DE.UTF-8')) - if use_pyicu: - strxfrm = get_pyicu_transform(getlocale()) - assert _natsort_key('Apple56.5', None, ns.LOCALE) == (strxfrm('Apple'), 56.5) - assert _natsort_key('Apple56,5', None, ns.LOCALE) == (strxfrm('Apple'), 56.5) + assert _natsort_key(s, None, ns.LOCALE | ns.F) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_locale_nogroup)) locale.setlocale(locale.LC_NUMERIC, str('')) -def test__natsort_key_with_LOCALE_and_UNGROUPLETTERS_places_space_before_string_with_capital_first_letter(): +@given([float, py23_str, int]) +def test__natsort_key_with_LOCALE_and_UNGROUPLETTERS_places_space_before_string_with_capital_first_letter(x): # Locale aware sorting - locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) - if use_pyicu: - from natsort.locale_help import get_pyicu_transform - from locale import getlocale - strxfrm = get_pyicu_transform(getlocale()) + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + load_locale('en_US') + if dumb_sort(): + t = tuple(_number_extracter(s.swapcase(), _float_nosign_exp_re, *float_nosafe_locale_group)) else: - from natsort.locale_help import strxfrm - assert _natsort_key('Apple56.5', None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == (strxfrm(' Apple'), 56.5) - assert _natsort_key('apple56.5', None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == (strxfrm('apple'), 56.5) - assert _natsort_key('12Apple56.5', None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == (null_string, 12.0, strxfrm('Apple'), 56.5) + t = tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_locale_nogroup)) + if not t: + r = (t, t) + elif t[0] is null_string: + r = ((b'' if use_pyicu else '',), t) + else: + r = ((s[0],), t) + assert _natsort_key(s, None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == r # The below are all aliases for UNGROUPLETTERS assert ns.UNGROUPLETTERS == ns.UG assert ns.UNGROUPLETTERS == ns.CAPITALFIRST @@ -357,5 +618,42 @@ def test__natsort_key_with_LOCALE_and_UNGROUPLETTERS_places_space_before_string_ locale.setlocale(locale.LC_NUMERIC, str('')) -def test__natsort_key_with_UNGROUPLETTERS_does_nothing_without_LOCALE(): - assert _natsort_key('Apple56.5', None, ns.UG | ns.I) == _natsort_key('Apple56.5', None, ns.I) +@given([float, py23_str, int]) +def test__natsort_key_with_UNGROUPLETTERS_does_nothing_without_LOCALE(x): + assume(len(x) <= 10) + assume(not any(type(y) == float and isnan(y) for y in x)) + s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) + assert _natsort_key(s, None, ns.UG | ns.I) == _natsort_key(s, None, ns.I) + + +# It is difficult to generate code that will create random filesystem paths, +# so "example" based tests are given for the PATH option. + + +def test__natsort_key_with_absolute_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_root_and_split_extensions(): + # Turn on PATH to split a file path into components + assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', None, ns.PATH | ns.F) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) + + +def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_relative_parent_and_split_extensions(): + assert _natsort_key('../Folder (10)/file (2).tar.gz', None, ns.PATH | ns.F) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) + + +def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_and_split_extensions(): + assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', None, ns.PATH | ns.F) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) + + +def test__natsort_key_with_pathlib_intput_and_PATH_returns_nested_tuples(): + # Converts pathlib PurePath (and subclass) objects to string before sorting + assert _natsort_key(pathlib.Path('../Folder (10)/file (2).tar.gz'), None, ns.PATH | ns.F) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) + + +def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple(): + # It gracefully handles as_path for numeric input by putting an extra tuple around it + # so it will sort against the other as_path results. + assert _natsort_key(10, None, ns.PATH) == (('', 10),) + + +def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple(): + # PATH also handles recursion well. + assert _natsort_key(('/Folder', '/Folder (1)'), None, ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1, ')'))) |
