diff options
author | Seth M Morton <seth.m.morton@gmail.com> | 2014-09-25 21:13:58 -0700 |
---|---|---|
committer | Seth M Morton <seth.m.morton@gmail.com> | 2014-09-25 21:13:58 -0700 |
commit | ce94d418c3849e891f0508b41f5bccc4ae1d4e4d (patch) | |
tree | 1408954f6bc4c844b56535599340051428a56f5f | |
parent | afc8e69ccbcf6304a10751d97149f2c8435fb09f (diff) | |
parent | 84f13d5fa3eb8a23eeae00b50eb062503f4ca290 (diff) | |
download | natsort-ce94d418c3849e891f0508b41f5bccc4ae1d4e4d.tar.gz |
natsort version 3.5.1 release.3.5.1
- Fixed bug that caused list/tuples to fail when using
'ns.LOWECASEFIRST' or 'ns.IGNORECASE'.
- Refactored modules so that only the public API was in natsort.py
and ns_enum.py.
- Refactored all import statements to be absolute, not relative.
-rw-r--r-- | .coveragerc | 3 | ||||
-rw-r--r-- | README.rst | 42 | ||||
-rw-r--r-- | docs/source/changelog.rst | 10 | ||||
-rw-r--r-- | docs/source/shell.rst | 14 | ||||
-rw-r--r-- | natsort/__init__.py | 10 | ||||
-rw-r--r-- | natsort/__main__.py | 9 | ||||
-rw-r--r-- | natsort/_version.py | 2 | ||||
-rw-r--r-- | natsort/fake_fastnumbers.py | 1 | ||||
-rw-r--r-- | natsort/locale_help.py | 12 | ||||
-rw-r--r-- | natsort/natsort.py | 396 | ||||
-rw-r--r-- | natsort/ns_enum.py | 128 | ||||
-rw-r--r-- | natsort/utils.py | 277 | ||||
-rw-r--r-- | setup.cfg | 1 | ||||
-rw-r--r-- | setup.py | 4 | ||||
-rw-r--r-- | test_natsort/stress_natsort.py | 3 | ||||
-rw-r--r-- | test_natsort/test_natsort.py | 161 | ||||
-rw-r--r-- | test_natsort/test_utils.py | 159 |
17 files changed, 633 insertions, 599 deletions
diff --git a/.coveragerc b/.coveragerc index 8622bd1..1bbfe9d 100644 --- a/.coveragerc +++ b/.coveragerc @@ -9,9 +9,6 @@ exclude_lines = raise NotImplementedError raise$ - # Don't complain about alternate imports - except ImportError - # Don't complain if non-runnable code isn't run: if 0: if __name__ == .__main__.: @@ -94,9 +94,7 @@ Shell script ``natsort`` comes with a shell script called ``natsort``, or can also be called from the command line with ``python -m natsort``. The command line script is -only installed onto your ``PATH`` if you don't install via a wheel. There is -apparently a known bug with the wheel installation process that will not create -entry points. +only installed onto your ``PATH`` if you don't install via a wheel. Requirements ------------ @@ -157,6 +155,15 @@ History These are the last three entries of the changelog. See the package documentation for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_. +09-25-2014 v. 3.5.1 +''''''''''''''''''' + + - Fixed bug that caused list/tuples to fail when using 'ns.LOWECASEFIRST' + or 'ns.IGNORECASE'. + - Refactored modules so that only the public API was in natsort.py and + ns_enum.py. + - Refactored all import statements to be absolute, not relative. + 09-02-2014 v. 3.5.0 ''''''''''''''''''' @@ -183,32 +190,3 @@ for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_. enhancements. - Made documentation point to more 'natsort' resources, and also added a new example in the examples section. - -07-19-2014 v. 3.4.0 -''''''''''''''''''' - - - Fixed a bug that caused user's options to the 'natsort_key' to not be - passed on to recursive calls of 'natsort_key'. - - Added a 'natsort_keygen' function that will generate a wrapped version - of 'natsort_key' that is easier to call. 'natsort_key' is now set to - depreciate at natsort version 4.0.0. - - Added an 'as_path' option to 'natsorted' & co. that will try to treat - input strings as filepaths. This will help yield correct results for - OS-generated inputs like - ``['/p/q/o.x', '/p/q (1)/o.x', '/p/q (10)/o.x', '/p/q/o (1).x']``. - - Massive performance enhancements for string input (1.8x-2.0x), at the expense - of reduction in speed for numeric input (~2.0x). - - - This is a good compromise because the most common input will be strings, - not numbers, and sorting numbers still only takes 0.6x the time of sorting - strings. If you are sorting only numbers, you would use 'sorted' anyway. - - - Added the 'order_by_index' function to help in using the output of - 'index_natsorted' and 'index_versorted'. - - Added the 'reverse' option to 'natsorted' & co. to make it's API more - similar to the builtin 'sorted'. - - Added more unit tests. - - Added auxiliary test code that helps in profiling and stress-testing. - - Reworked the documentation, moving most of it to PyPI's hosting platform. - - Added support for coveralls.io. - - Entire codebase is now PyFlakes and PEP8 compliant. diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 542f5ad..c30a9db 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -3,6 +3,16 @@ Changelog --------- +09-25-2014 v. 3.5.1 +''''''''''''''''''' + + - Fixed bug that caused list/tuples to fail when using 'ns.LOWECASEFIRST' + or 'ns.IGNORECASE'. + - Refactored modules so that only the public API was in natsort.py and + ns_enum.py. + - Refactored all import statements to be absolute, not relative. + + 09-02-2014 v. 3.5.0 ''''''''''''''''''' diff --git a/docs/source/shell.rst b/docs/source/shell.rst index e29a6fe..5359250 100644 --- a/docs/source/shell.rst +++ b/docs/source/shell.rst @@ -7,8 +7,7 @@ Shell Script ============ The ``natsort`` shell script is automatically installed when you install -:mod:`natsort` from "zip" or "tar.gz" via ``pip`` or ``easy_install`` -(there is a known bug with wheels that will not install the shell script). +:mod:`natsort` with pip. Below is the usage and some usage examples for the ``natsort`` shell script. @@ -17,9 +16,9 @@ Usage :: - usage: natsort [-h] [--version] [-p] [-f LOW HIGH] [-F LOW HIGH] - [-e EXCLUDE] [-r] [-t {digit,int,float,version,ver}] - [--nosign] [--noexp] + usage: natsort [-h] [--version] [-p] [-f LOW HIGH] [-F LOW HIGH] [-e EXCLUDE] + [-r] [-t {digit,int,float,version,ver}] [--nosign] [--noexp] + [--locale] [entries [entries ...]] Performs a natural sort on entries given on the command-line. @@ -59,7 +58,10 @@ Usage --noexp Do not consider an exponential as part of a number, i.e. 1e4, would be considered as 1, "e", and 4, not as 10000. This only effects the --number-type=float. - + --locale, -l Causes natsort to use locale-aware sorting. On some + systems, the underlying C library is broken, so if you + get results that you do not expect please install + PyICU and try again. Description ----------- diff --git a/natsort/__init__.py b/natsort/__init__.py index 02f8d30..88df7ee 100644 --- a/natsort/__init__.py +++ b/natsort/__init__.py @@ -2,10 +2,12 @@ from __future__ import (print_function, division, unicode_literals, absolute_import) -from .natsort import (natsort_key, natsort_keygen, natsorted, humansorted, - index_natsorted, versorted, index_versorted, - index_humansorted, order_by_index, ns) -from ._version import __version__ +# Local imports. +from natsort.natsort import (natsort_key, natsort_keygen, ns, + natsorted, humansorted, versorted, + index_natsorted, index_versorted, + index_humansorted, order_by_index) +from natsort._version import __version__ __all__ = [ 'natsort_key', diff --git a/natsort/__main__.py b/natsort/__main__.py index f55c3fe..5368d12 100644 --- a/natsort/__main__.py +++ b/natsort/__main__.py @@ -2,11 +2,14 @@ from __future__ import (print_function, division, unicode_literals, absolute_import) +# Std. lib imports. import sys -from .natsort import natsorted, _regex_and_num_function_chooser, ns -from ._version import __version__ -from .py23compat import py23_str +# Local imports. +from natsort.natsort import natsorted, ns +from natsort.utils import _regex_and_num_function_chooser +from natsort._version import __version__ +from natsort.py23compat import py23_str def main(): diff --git a/natsort/_version.py b/natsort/_version.py index b490955..2f3cf55 100644 --- a/natsort/_version.py +++ b/natsort/_version.py @@ -2,4 +2,4 @@ from __future__ import (print_function, division, unicode_literals, absolute_import) -__version__ = '3.5.0' +__version__ = '3.5.1' diff --git a/natsort/fake_fastnumbers.py b/natsort/fake_fastnumbers.py index 15d7e88..116bab1 100644 --- a/natsort/fake_fastnumbers.py +++ b/natsort/fake_fastnumbers.py @@ -7,6 +7,7 @@ installed. from __future__ import (print_function, division, unicode_literals, absolute_import) +# Std. lib imports. import re float_re = re.compile(r'[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?$') diff --git a/natsort/locale_help.py b/natsort/locale_help.py index 748a7cb..32bc116 100644 --- a/natsort/locale_help.py +++ b/natsort/locale_help.py @@ -7,11 +7,13 @@ and Python3 differences. from __future__ import (print_function, division, unicode_literals, absolute_import) +# Std. lib imports. import sys from itertools import chain from locale import localeconv -from .py23compat import py23_zip +# Local imports. +from natsort.py23compat import py23_zip # We need cmp_to_key for Python2 because strxfrm is broken for unicode. if sys.version[:3] == '2.7': @@ -20,7 +22,7 @@ if sys.version[:3] == '2.7': elif sys.version[:3] == '2.6': def cmp_to_key(mycmp): """Convert a cmp= function into a key= function""" - class K(object): + class K(object): # pragma: no cover __slots__ = ['obj'] def __init__(self, obj): @@ -75,12 +77,8 @@ except ImportError: from locale import strxfrm use_pyicu = False -# Convenience functions. -lowercase = lambda x: x.lower() -swapcase = lambda x: x.swapcase() - # This little lambda doubles all characters, making letters lowercase. -groupletters = lambda x: ''.join(chain(*py23_zip(lowercase(x), x))) +groupletters = lambda x: ''.join(chain(*py23_zip(x.lower(), x))) def grouper(val, func): diff --git a/natsort/natsort.py b/natsort/natsort.py index 82b84df..d3d6f8a 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -15,400 +15,20 @@ See the README or the natsort homepage for more details. from __future__ import (print_function, division, unicode_literals, absolute_import) -import re -from os import curdir, pardir -from os.path import split, splitext +# Std lib. imports. from operator import itemgetter from functools import partial -from itertools import islice from warnings import warn -from locale import localeconv -# If the user has fastnumbers installed, they will get great speed -# benefits. If not, we simulate the functions here. -try: - from fastnumbers import fast_float, fast_int, isreal -except ImportError: - from .fake_fastnumbers import fast_float, fast_int, isreal -from .locale_help import locale_convert, grouper, lowercase, swapcase -from .py23compat import u_format, py23_str, py23_zip +# Local imports. +from natsort.utils import _natsort_key, _args_to_enum +from natsort.ns_enum import ns +from natsort.py23compat import u_format # Make sure the doctest works for either python2 or python3 __doc__ = u_format(__doc__) -class ns(object): - """ - Enum to control the `natsort` algorithm. - - This class acts like an enum to control the `natsort` algorithm. The - user may select several options simultaneously by or'ing the options - together. For example, to choose ``ns.INT``, `ns.PATH``, and - ``ns.LOCALE``, you could do ``ns.INT | ns.LOCALE | ns.PATH``. - - Each option has a shortened 1- or 2-letter form. - - .. warning:: On some systems, the underlying C library that - Python's locale module uses is broken. On these - systems it is recommended that you install - `PyICU <https://pypi.python.org/pypi/PyICU>`_ - if you wish to use `LOCALE`. - Please validate that `LOCALE` works as - expected on your target system, and if not you - should add - `PyICU <https://pypi.python.org/pypi/PyICU>`_ - as a dependency. - - Attributes - ---------- - FLOAT, F - The default - parse numbers as floats. - INT, I - Tell `natsort` to parse numbers as ints. - UNSIGNED, U - Tell `natsort` to ignore any sign (i.e. "-" or "+") to the - immediate left of a number. It is the same as setting the old - `signed` option to `False`. - VERSION, V - This is a shortcut for ``ns.INT | ns.UNSIGNED``, which is useful - when attempting to sort version numbers. It is the same as - setting the old `number_type` option to `None`. - DIGIT, D - Same as `VERSION` above. - NOEXP, N - Tell `natsort` to not search for exponents as part of the number. - For example, with `NOEXP` the number "5.6E5" would be interpreted - as `5.6`, `"E"`, and `5`. It is the same as setting the old `exp` - option to `False`. - PATH, P - Tell `natsort` to interpret strings as filesystem paths, so they - will be split according to the filesystem separator - (i.e. ‘/’ on UNIX, ‘\’ on Windows), as well as splitting on the - file extension, if any. Without this, lists of file paths like - ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted - properly; 'Folder/' will be placed at the end, not at the front. - It is the same as setting the old `as_path` option to `True`. - LOCALE, L - Tell `natsort` to be locale-aware when sorting strings (everything - that was not converted to a number). Your sorting results will vary - depending on your current locale. Generally, the `GROUPLETTERS` - option is needed with `LOCALE` because the `locale` library - groups the letters in the same manner (although you may still - need `GROUPLETTERS` if there are numbers in your strings). - IGNORECASE, IC - Tell `natsort` to ignore case when sorting. For example, - ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as - ``['apple', 'Apple', 'Banana', 'banana']``. - LOWERCASEFIRST, LF - Tell `natsort` to put lowercase letters before uppercase letters - when sorting. For example, - ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as - ``['apple', 'banana', 'Apple', 'Banana']`` (the default order - would be ``['Apple', 'Banana', 'apple', 'banana']`` which is - the order from a purely ordinal sort). - Useless when used with `IGNORECASE`. - GROUPLETTERS, G - Tell `natsort` to group lowercase and uppercase letters together - when sorting. For example, - ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as - ``['Apple', 'apple', 'Banana', 'banana']``. - Useless when used with `IGNORECASE`; use with `LOWERCASEFIRST` - to reverse the order of upper and lower case. - TYPESAFE, T - Try hard to avoid "unorderable types" error on Python 3. It - is the same as setting the old `py3_safe` option to `True`. - - Notes - ----- - If using `LOCALE`, you may find that if you do not explicitly set - the locale your results may not be as you expect... I have found that - it depends on the system you are on. To do this is straightforward - (in the below example I use 'en_US.UTF-8', but you should use your - locale):: - - >>> import locale - >>> # The 'str' call is only to get around a bug on Python 2.x - >>> # where 'setlocale' does not expect unicode strings (ironic, - >>> # right?) - >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) - 'en_US.UTF-8' - - It is preferred that you do this before importing `natsort`. - If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning - above) then you should not need to do this. - - """ - pass - - -# Sort algorithm "enum" values. -_nsdict = {'FLOAT': 0, 'F': 0, - 'INT': 1, 'I': 1, - 'UNSIGNED': 2, 'U': 2, - 'VERSION': 3, 'V': 3, # Shortcut for INT | UNSIGNED - 'DIGIT': 3, 'D': 3, # Shortcut for INT | UNSIGNED - 'NOEXP': 4, 'N': 4, - 'PATH': 8, 'P': 8, - 'LOCALE': 16, 'L': 16, - 'IGNORECASE': 32, 'IC': 32, - 'LOWERCASEFIRST': 64, 'LF': 64, - 'GROUPLETTERS': 128, 'G': 128, - 'TYPESAFE': 1024, 'T': 1024, - } -# Populate the ns class with the _nsdict values. -for x, y in _nsdict.items(): - setattr(ns, x, y) - -# Group algorithm types for easy extraction -_NUMBER_ALGORITHMS = ns.FLOAT | ns.INT | ns.UNSIGNED | ns.NOEXP -_CASE_ALGORITHMS = ns.IGNORECASE | ns.LOWERCASEFIRST | ns.GROUPLETTERS -_ALL_BUT_PATH = (ns.F | ns.I | ns.U | ns.N | ns.L | - ns.IC | ns.LF | ns.G | ns.TYPESAFE) - -# The regex that locates floats -_float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U) -_float_nosign_exp_re = re.compile(r'(\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U) -_float_sign_noexp_re = re.compile(r'([-+]?\d*\.?\d+)', re.U) -_float_nosign_noexp_re = re.compile(r'(\d*\.?\d+)', re.U) -_float_sign_exp_re_c = re.compile(r'([-+]?\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U) -_float_nosign_exp_re_c = re.compile(r'(\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U) -_float_sign_noexp_re_c = re.compile(r'([-+]?\d*[.,]?\d+)', re.U) -_float_nosign_noexp_re_c = re.compile(r'(\d*[.,]?\d+)', re.U) - -# Integer regexes -_int_nosign_re = re.compile(r'(\d+)', re.U) -_int_sign_re = re.compile(r'([-+]?\d+)', re.U) - -# This dict will help select the correct regex and number conversion function. -_regex_and_num_function_chooser = { - (ns.F, '.'): (_float_sign_exp_re, fast_float), - (ns.F | ns.N, '.'): (_float_sign_noexp_re, fast_float), - (ns.F | ns.U, '.'): (_float_nosign_exp_re, fast_float), - (ns.F | ns.U | ns.N, '.'): (_float_nosign_noexp_re, fast_float), - (ns.I, '.'): (_int_sign_re, fast_int), - (ns.I | ns.N, '.'): (_int_sign_re, fast_int), - (ns.I | ns.U, '.'): (_int_nosign_re, fast_int), - (ns.I | ns.U | ns.N, '.'): (_int_nosign_re, fast_int), - (ns.F, ','): (_float_sign_exp_re_c, fast_float), - (ns.F | ns.N, ','): (_float_sign_noexp_re_c, fast_float), - (ns.F | ns.U, ','): (_float_nosign_exp_re_c, fast_float), - (ns.F | ns.U | ns.N, ','): (_float_nosign_noexp_re_c, fast_float), - (ns.I, ','): (_int_sign_re, fast_int), - (ns.I | ns.N, ','): (_int_sign_re, fast_int), - (ns.I | ns.U, ','): (_int_nosign_re, fast_int), - (ns.I | ns.U | ns.N, ','): (_int_nosign_re, fast_int), -} - - -def _args_to_enum(number_type, signed, exp, as_path, py3_safe): - """A function to convert input booleans to an enum-type argument.""" - alg = 0 - if number_type is not float: - msg = "The 'number_type' argument is depreciated as of 3.5.0, " - msg += "please use 'alg=ns.FLOAT', 'alg=ns.INT', or 'alg=ns.VERSION'" - warn(msg, DeprecationWarning) - alg |= (_nsdict['INT'] * bool(number_type in (int, None))) - alg |= (_nsdict['UNSIGNED'] * (number_type is None)) - if signed is not None: - msg = "The 'signed' argument is depreciated as of 3.5.0, " - msg += "please use 'alg=ns.UNSIGNED'." - warn(msg, DeprecationWarning) - alg |= (_nsdict['UNSIGNED'] * (not signed)) - if exp is not None: - msg = "The 'exp' argument is depreciated as of 3.5.0, " - msg += "please use 'alg=ns.NOEXP'." - warn(msg, DeprecationWarning) - alg |= (_nsdict['NOEXP'] * (not exp)) - if as_path is not None: - msg = "The 'as_path' argument is depreciated as of 3.5.0, " - msg += "please use 'alg=ns.PATH'." - warn(msg, DeprecationWarning) - alg |= (_nsdict['PATH'] * as_path) - if py3_safe is not None: - msg = "The 'py3_safe' argument is depreciated as of 3.5.0, " - msg += "please use 'alg=ns.TYPESAFE'." - warn(msg, DeprecationWarning) - alg |= (_nsdict['TYPESAFE'] * py3_safe) - return alg - - -def _input_parser(s, regex, numconv, py3_safe, use_locale, group_letters): - """Helper to parse the string input into numbers and strings.""" - - # Split the input string by numbers. - # If the input is not a string, TypeError is raised. - s = regex.split(s) - - # Now convert the numbers to numbers, and leave strings as strings. - # Take into account locale if needed, and group letters if needed. - # Remove empty strings from the list. - if use_locale: - s = [locale_convert(x, numconv, group_letters) for x in s if x] - elif group_letters: - s = [grouper(x, numconv) for x in s if x] - else: - s = [numconv(x) for x in s if x] - - # If the list begins with a number, lead with an empty string. - # This is used to get around the "unorderable types" issue. - if not s: # Return empty tuple for empty results. - return () - elif isreal(s[0]): - s = [''] + s - - # The _py3_safe function inserts "" between numbers in the list, - # and is used to get around "unorderable types" in complex cases. - # It is a separate function that needs to be requested specifically - # because it is expensive to call. - return _py3_safe(s) if py3_safe else s - - -def _path_splitter(s, _d_match=re.compile(r'\.\d').match): - """Split a string into its path components. Assumes a string is a path.""" - path_parts = [] - p_append = path_parts.append - path_location = s - - # Continue splitting the path from the back until we have reached - # '..' or '.', or until there is nothing left to split. - while path_location != curdir and path_location != pardir: - parent_path = path_location - path_location, child_path = split(parent_path) - if path_location == parent_path: - break - p_append(child_path) - - # This last append is the base path. - # Only append if the string is non-empty. - if path_location: - p_append(path_location) - - # We created this list in reversed order, so we now correct the order. - path_parts.reverse() - - # Now, split off the file extensions using a similar method to above. - # Continue splitting off file extensions until we reach a decimal number - # or there are no more extensions. - base = path_parts.pop() - base_parts = [] - b_append = base_parts.append - while True: - front = base - base, ext = splitext(front) - if _d_match(ext) or not ext: - # Reset base to before the split if the split is invalid. - base = front - break - b_append(ext) - b_append(base) - base_parts.reverse() - - # Return the split parent paths and then the split basename. - return path_parts + base_parts - - -def _py3_safe(parsed_list): - """Insert '' between two numbers.""" - length = len(parsed_list) - if length < 2: - return parsed_list - else: - new_list = [parsed_list[0]] - nl_append = new_list.append - for before, after in py23_zip(islice(parsed_list, 0, length-1), - islice(parsed_list, 1, None)): - if isreal(before) and isreal(after): - nl_append("") - nl_append(after) - return new_list - - -def _natsort_key(val, key, alg): - """\ - Key to sort strings and numbers naturally. - - It works by separating out the numbers from the strings. This function for - internal use only. See the natsort_keygen documentation for details of each - parameter. - - Parameters - ---------- - val : {str, unicode} - key : callable - alg : ns enum - - Returns - ------- - out : tuple - The modified value with numbers extracted. - - """ - - # Convert the arguments to the proper input tuple - try: - use_locale = alg & _nsdict['LOCALE'] - inp_options = (alg & _NUMBER_ALGORITHMS, - localeconv()['decimal_point'] if use_locale else '.') - except TypeError: - msg = "_natsort_key: 'alg' argument must be from the enum 'ns'" - raise ValueError(msg+', got {0}'.format(py23_str(alg))) - - # Get the proper regex and conversion function. - try: - regex, num_function = _regex_and_num_function_chooser[inp_options] - except KeyError: # pragma: no cover - if inp_options[1] not in ('.', ','): # pragma: no cover - raise ValueError("_natsort_key: currently natsort only supports " - "the decimal separators '.' and ','. " - "Please file a bug report.") - else: - raise - else: - # Apply key if needed. - if key is not None: - val = key(val) - - # If this is a path, convert it. - # An AttrubuteError is raised if not a string. - split_as_path = False - if alg & _nsdict['PATH']: - try: - val = _path_splitter(val) - except AttributeError: - pass - else: - # Record that this string was split as a path so that - # we don't set PATH in the recursive call. - split_as_path = True - - # Assume the input are strings, which is the most common case. - # Apply the string modification if needed. - try: - if alg & _nsdict['LOWERCASEFIRST']: - val = swapcase(val) - if alg & _nsdict['IGNORECASE']: - val = lowercase(val) - return tuple(_input_parser(val, - regex, - num_function, - alg & _nsdict['TYPESAFE'], - use_locale, - alg & _nsdict['GROUPLETTERS'])) - except TypeError: - # If not strings, assume it is an iterable that must - # be parsed recursively. Do not apply the key recursively. - # If this string was split as a path, turn off 'PATH'. - try: - was_path = alg & _nsdict['PATH'] - newalg = alg & _ALL_BUT_PATH - newalg |= (was_path * (not split_as_path)) - return tuple([_natsort_key(x, None, newalg) for x in val]) - # If there is still an error, it must be a number. - # Return as-is, with a leading empty string. - except TypeError: - return (('', val,),) if alg & _nsdict['PATH'] else ('', val,) - - @u_format def natsort_key(val, key=None, number_type=float, signed=None, exp=None, as_path=None, py3_safe=None, alg=0): @@ -689,7 +309,8 @@ def natsorted(seq, key=None, number_type=float, signed=None, exp=None, See Also -------- natsort_keygen : Generates the key that makes natural sorting possible. - versorted : A wrapper for ``natsorted(seq, number_type=None)``. + versorted : A wrapper for ``natsorted(seq, alg=ns.VERSION)``. + humansorted : A wrapper for ``natsorted(seq, alg=ns.LOCALE)``. index_natsorted : Returns the sorted indexes from `natsorted`. Examples @@ -711,7 +332,7 @@ def natsorted(seq, key=None, number_type=float, signed=None, exp=None, if 'unorderable types' in str(e): return sorted(seq, reverse=reverse, key=natsort_keygen(key, - alg=alg | _nsdict['TYPESAFE'])) + alg=alg | ns.TYPESAFE)) else: # Re-raise if the problem was not "unorderable types" raise @@ -1128,6 +749,7 @@ def order_by_index(seq, index, iter=False): -------- index_natsorted index_versorted + index_humansorted Examples -------- diff --git a/natsort/ns_enum.py b/natsort/ns_enum.py new file mode 100644 index 0000000..d83ef16 --- /dev/null +++ b/natsort/ns_enum.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +"""This module defines the "ns" enum for natsort.""" + +from __future__ import (print_function, division, + unicode_literals, absolute_import) + + +class ns(object): + """ + Enum to control the `natsort` algorithm. + + This class acts like an enum to control the `natsort` algorithm. The + user may select several options simultaneously by or'ing the options + together. For example, to choose ``ns.INT``, `ns.PATH``, and + ``ns.LOCALE``, you could do ``ns.INT | ns.LOCALE | ns.PATH``. + + Each option has a shortened 1- or 2-letter form. + + .. warning:: On some systems, the underlying C library that + Python's locale module uses is broken. On these + systems it is recommended that you install + `PyICU <https://pypi.python.org/pypi/PyICU>`_ + if you wish to use `LOCALE`. + Please validate that `LOCALE` works as + expected on your target system, and if not you + should add + `PyICU <https://pypi.python.org/pypi/PyICU>`_ + as a dependency. + + Attributes + ---------- + FLOAT, F + The default - parse numbers as floats. + INT, I + Tell `natsort` to parse numbers as ints. + UNSIGNED, U + Tell `natsort` to ignore any sign (i.e. "-" or "+") to the + immediate left of a number. It is the same as setting the old + `signed` option to `False`. + VERSION, V + This is a shortcut for ``ns.INT | ns.UNSIGNED``, which is useful + when attempting to sort version numbers. It is the same as + setting the old `number_type` option to `None`. + DIGIT, D + Same as `VERSION` above. + NOEXP, N + Tell `natsort` to not search for exponents as part of the number. + For example, with `NOEXP` the number "5.6E5" would be interpreted + as `5.6`, `"E"`, and `5`. It is the same as setting the old `exp` + option to `False`. + PATH, P + Tell `natsort` to interpret strings as filesystem paths, so they + will be split according to the filesystem separator + (i.e. ‘/’ on UNIX, ‘\’ on Windows), as well as splitting on the + file extension, if any. Without this, lists of file paths like + ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted + properly; 'Folder/' will be placed at the end, not at the front. + It is the same as setting the old `as_path` option to `True`. + LOCALE, L + Tell `natsort` to be locale-aware when sorting strings (everything + that was not converted to a number). Your sorting results will vary + depending on your current locale. Generally, the `GROUPLETTERS` + option is needed with `LOCALE` because the `locale` library + groups the letters in the same manner (although you may still + need `GROUPLETTERS` if there are numbers in your strings). + IGNORECASE, IC + Tell `natsort` to ignore case when sorting. For example, + ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as + ``['apple', 'Apple', 'Banana', 'banana']``. + LOWERCASEFIRST, LF + Tell `natsort` to put lowercase letters before uppercase letters + when sorting. For example, + ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as + ``['apple', 'banana', 'Apple', 'Banana']`` (the default order + would be ``['Apple', 'Banana', 'apple', 'banana']`` which is + the order from a purely ordinal sort). + Useless when used with `IGNORECASE`. + GROUPLETTERS, G + Tell `natsort` to group lowercase and uppercase letters together + when sorting. For example, + ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as + ``['Apple', 'apple', 'Banana', 'banana']``. + Useless when used with `IGNORECASE`; use with `LOWERCASEFIRST` + to reverse the order of upper and lower case. + TYPESAFE, T + Try hard to avoid "unorderable types" error on Python 3. It + is the same as setting the old `py3_safe` option to `True`. + + Notes + ----- + If using `LOCALE`, you may find that if you do not explicitly set + the locale your results may not be as you expect... I have found that + it depends on the system you are on. To do this is straightforward + (in the below example I use 'en_US.UTF-8', but you should use your + locale):: + + >>> import locale + >>> # The 'str' call is only to get around a bug on Python 2.x + >>> # where 'setlocale' does not expect unicode strings (ironic, + >>> # right?) + >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) + 'en_US.UTF-8' + + It is preferred that you do this before importing `natsort`. + If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning + above) then you should not need to do this. + + """ + pass + + +# Sort algorithm "enum" values. +_nsdict = {'FLOAT': 0, 'F': 0, + 'INT': 1, 'I': 1, + 'UNSIGNED': 2, 'U': 2, + 'VERSION': 3, 'V': 3, # Shortcut for INT | UNSIGNED + 'DIGIT': 3, 'D': 3, # Shortcut for INT | UNSIGNED + 'NOEXP': 4, 'N': 4, + 'PATH': 8, 'P': 8, + 'LOCALE': 16, 'L': 16, + 'IGNORECASE': 32, 'IC': 32, + 'LOWERCASEFIRST': 64, 'LF': 64, + 'GROUPLETTERS': 128, 'G': 128, + 'TYPESAFE': 1024, 'T': 1024, + } +# Populate the ns class with the _nsdict values. +for x, y in _nsdict.items(): + setattr(ns, x, y) diff --git a/natsort/utils.py b/natsort/utils.py new file mode 100644 index 0000000..add749f --- /dev/null +++ b/natsort/utils.py @@ -0,0 +1,277 @@ +# -*- coding: utf-8 -*- +""" +Utilities and definitions for natsort, mostly all used to define +the _natsort_key function. + +""" + +from __future__ import (print_function, division, + unicode_literals, absolute_import) + +# Std. lib imports. +import re +from warnings import warn +from os import curdir, pardir +from os.path import split, splitext +from itertools import islice +from locale import localeconv + +# Local imports. +from natsort.locale_help import locale_convert, grouper +from natsort.py23compat import py23_str, py23_zip +from natsort.ns_enum import ns, _nsdict + +# If the user has fastnumbers installed, they will get great speed +# benefits. If not, we simulate the functions here. +try: + from fastnumbers import fast_float, fast_int, isreal +except ImportError: + from natsort.fake_fastnumbers import fast_float, fast_int, isreal + +# Group algorithm types for easy extraction +_NUMBER_ALGORITHMS = ns.FLOAT | ns.INT | ns.UNSIGNED | ns.NOEXP +_ALL_BUT_PATH = (ns.F | ns.I | ns.U | ns.N | ns.L | + ns.IC | ns.LF | ns.G | ns.TYPESAFE) + +# The regex that locates floats +_float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U) +_float_nosign_exp_re = re.compile(r'(\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U) +_float_sign_noexp_re = re.compile(r'([-+]?\d*\.?\d+)', re.U) +_float_nosign_noexp_re = re.compile(r'(\d*\.?\d+)', re.U) +_float_sign_exp_re_c = re.compile(r'([-+]?\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U) +_float_nosign_exp_re_c = re.compile(r'(\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U) +_float_sign_noexp_re_c = re.compile(r'([-+]?\d*[.,]?\d+)', re.U) +_float_nosign_noexp_re_c = re.compile(r'(\d*[.,]?\d+)', re.U) + +# Integer regexes +_int_nosign_re = re.compile(r'(\d+)', re.U) +_int_sign_re = re.compile(r'([-+]?\d+)', re.U) + +# This dict will help select the correct regex and number conversion function. +_regex_and_num_function_chooser = { + (ns.F, '.'): (_float_sign_exp_re, fast_float), + (ns.F | ns.N, '.'): (_float_sign_noexp_re, fast_float), + (ns.F | ns.U, '.'): (_float_nosign_exp_re, fast_float), + (ns.F | ns.U | ns.N, '.'): (_float_nosign_noexp_re, fast_float), + (ns.I, '.'): (_int_sign_re, fast_int), + (ns.I | ns.N, '.'): (_int_sign_re, fast_int), + (ns.I | ns.U, '.'): (_int_nosign_re, fast_int), + (ns.I | ns.U | ns.N, '.'): (_int_nosign_re, fast_int), + (ns.F, ','): (_float_sign_exp_re_c, fast_float), + (ns.F | ns.N, ','): (_float_sign_noexp_re_c, fast_float), + (ns.F | ns.U, ','): (_float_nosign_exp_re_c, fast_float), + (ns.F | ns.U | ns.N, ','): (_float_nosign_noexp_re_c, fast_float), + (ns.I, ','): (_int_sign_re, fast_int), + (ns.I | ns.N, ','): (_int_sign_re, fast_int), + (ns.I | ns.U, ','): (_int_nosign_re, fast_int), + (ns.I | ns.U | ns.N, ','): (_int_nosign_re, fast_int), +} + + +def _args_to_enum(number_type, signed, exp, as_path, py3_safe): + """A function to convert input booleans to an enum-type argument.""" + alg = 0 + if number_type is not float: + msg = "The 'number_type' argument is depreciated as of 3.5.0, " + msg += "please use 'alg=ns.FLOAT', 'alg=ns.INT', or 'alg=ns.VERSION'" + warn(msg, DeprecationWarning) + alg |= (_nsdict['INT'] * bool(number_type in (int, None))) + alg |= (_nsdict['UNSIGNED'] * (number_type is None)) + if signed is not None: + msg = "The 'signed' argument is depreciated as of 3.5.0, " + msg += "please use 'alg=ns.UNSIGNED'." + warn(msg, DeprecationWarning) + alg |= (_nsdict['UNSIGNED'] * (not signed)) + if exp is not None: + msg = "The 'exp' argument is depreciated as of 3.5.0, " + msg += "please use 'alg=ns.NOEXP'." + warn(msg, DeprecationWarning) + alg |= (_nsdict['NOEXP'] * (not exp)) + if as_path is not None: + msg = "The 'as_path' argument is depreciated as of 3.5.0, " + msg += "please use 'alg=ns.PATH'." + warn(msg, DeprecationWarning) + alg |= (_nsdict['PATH'] * as_path) + if py3_safe is not None: + msg = "The 'py3_safe' argument is depreciated as of 3.5.0, " + msg += "please use 'alg=ns.TYPESAFE'." + warn(msg, DeprecationWarning) + alg |= (_nsdict['TYPESAFE'] * py3_safe) + return alg + + +def _input_parser(s, regex, numconv, py3_safe, use_locale, group_letters): + """Helper to parse the string input into numbers and strings.""" + + # Split the input string by numbers. + # If the input is not a string, TypeError is raised. + s = regex.split(s) + + # Now convert the numbers to numbers, and leave strings as strings. + # Take into account locale if needed, and group letters if needed. + # Remove empty strings from the list. + if use_locale: + s = [locale_convert(x, numconv, group_letters) for x in s if x] + elif group_letters: + s = [grouper(x, numconv) for x in s if x] + else: + s = [numconv(x) for x in s if x] + + # If the list begins with a number, lead with an empty string. + # This is used to get around the "unorderable types" issue. + if not s: # Return empty tuple for empty results. + return () + elif isreal(s[0]): + s = [''] + s + + # The _py3_safe function inserts "" between numbers in the list, + # and is used to get around "unorderable types" in complex cases. + # It is a separate function that needs to be requested specifically + # because it is expensive to call. + return _py3_safe(s) if py3_safe else s + + +def _path_splitter(s, _d_match=re.compile(r'\.\d').match): + """Split a string into its path components. Assumes a string is a path.""" + path_parts = [] + p_append = path_parts.append + path_location = s + + # Continue splitting the path from the back until we have reached + # '..' or '.', or until there is nothing left to split. + while path_location != curdir and path_location != pardir: + parent_path = path_location + path_location, child_path = split(parent_path) + if path_location == parent_path: + break + p_append(child_path) + + # This last append is the base path. + # Only append if the string is non-empty. + if path_location: + p_append(path_location) + + # We created this list in reversed order, so we now correct the order. + path_parts.reverse() + + # Now, split off the file extensions using a similar method to above. + # Continue splitting off file extensions until we reach a decimal number + # or there are no more extensions. + base = path_parts.pop() + base_parts = [] + b_append = base_parts.append + while True: + front = base + base, ext = splitext(front) + if _d_match(ext) or not ext: + # Reset base to before the split if the split is invalid. + base = front + break + b_append(ext) + b_append(base) + base_parts.reverse() + + # Return the split parent paths and then the split basename. + return path_parts + base_parts + + +def _py3_safe(parsed_list): + """Insert '' between two numbers.""" + length = len(parsed_list) + if length < 2: + return parsed_list + else: + new_list = [parsed_list[0]] + nl_append = new_list.append + for before, after in py23_zip(islice(parsed_list, 0, length-1), + islice(parsed_list, 1, None)): + if isreal(before) and isreal(after): + nl_append("") + nl_append(after) + return new_list + + +def _natsort_key(val, key, alg): + """\ + Key to sort strings and numbers naturally. + + It works by separating out the numbers from the strings. This function for + internal use only. See the natsort_keygen documentation for details of each + parameter. + + Parameters + ---------- + val : {str, unicode} + key : callable + alg : ns enum + + Returns + ------- + out : tuple + The modified value with numbers extracted. + + """ + + # Convert the arguments to the proper input tuple + try: + use_locale = alg & _nsdict['LOCALE'] + inp_options = (alg & _NUMBER_ALGORITHMS, + localeconv()['decimal_point'] if use_locale else '.') + except TypeError: + msg = "_natsort_key: 'alg' argument must be from the enum 'ns'" + raise ValueError(msg+', got {0}'.format(py23_str(alg))) + + # Get the proper regex and conversion function. + try: + regex, num_function = _regex_and_num_function_chooser[inp_options] + except KeyError: # pragma: no cover + if inp_options[1] not in ('.', ','): # pragma: no cover + raise ValueError("_natsort_key: currently natsort only supports " + "the decimal separators '.' and ','. " + "Please file a bug report.") + else: + raise + else: + # Apply key if needed. + if key is not None: + val = key(val) + + # If this is a path, convert it. + # An AttrubuteError is raised if not a string. + split_as_path = False + if alg & _nsdict['PATH']: + try: + val = _path_splitter(val) + except AttributeError: + pass + else: + # Record that this string was split as a path so that + # we don't set PATH in the recursive call. + split_as_path = True + + # Assume the input are strings, which is the most common case. + # Apply the string modification if needed. + try: + if alg & _nsdict['LOWERCASEFIRST']: + val = val.swapcase() + if alg & _nsdict['IGNORECASE']: + val = val.lower() + return tuple(_input_parser(val, + regex, + num_function, + alg & _nsdict['TYPESAFE'], + use_locale, + alg & _nsdict['GROUPLETTERS'])) + except (TypeError, AttributeError): + # If not strings, assume it is an iterable that must + # be parsed recursively. Do not apply the key recursively. + # If this string was split as a path, turn off 'PATH'. + try: + was_path = alg & _nsdict['PATH'] + newalg = alg & _ALL_BUT_PATH + newalg |= (was_path * (not split_as_path)) + return tuple([_natsort_key(x, None, newalg) for x in val]) + # If there is still an error, it must be a number. + # Return as-is, with a leading empty string. + except TypeError: + return (('', val,),) if alg & _nsdict['PATH'] else ('', val,) @@ -14,4 +14,5 @@ flakes-ignore = pep8ignore = test_natsort/test_natsort.py E501 E241 E221 + test_natsort/test_utils.py E501 E241 E221 docs/source/conf.py ALL @@ -21,7 +21,9 @@ class PyTest(TestCommand): def run_tests(self): # import here, cause outside the eggs aren't loaded import pytest - err1 = pytest.main(['--cov', 'natsort', '--flakes', '--pep8']) + err1 = pytest.main(['--cov', 'natsort', + '--cov-report', 'term-missing', + '--flakes', '--pep8']) err2 = pytest.main(['--doctest-modules', 'natsort']) err3 = pytest.main(['README.rst', 'docs/source/intro.rst', diff --git a/test_natsort/stress_natsort.py b/test_natsort/stress_natsort.py index 7237db3..604a33d 100644 --- a/test_natsort/stress_natsort.py +++ b/test_natsort/stress_natsort.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- """\ -This file contains functions to stress-test natsort. +This file contains functions to stress-test natsort, looking +for cases that raise an unknown exception. """ from random import randint, sample, choice from string import printable diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py index afe8662..670050f 100644 --- a/test_natsort/test_natsort.py +++ b/test_natsort/test_natsort.py @@ -9,160 +9,8 @@ import locale from operator import itemgetter from pytest import raises from natsort import natsorted, index_natsorted, natsort_key, versorted, index_versorted -from natsort import humansorted, index_humansorted, natsort_keygen, order_by_index -from natsort.natsort import _input_parser, _py3_safe, _natsort_key, _args_to_enum -from natsort.natsort import _float_sign_exp_re, _float_nosign_exp_re, _float_sign_noexp_re -from natsort.natsort import _float_nosign_noexp_re, _int_nosign_re, _int_sign_re -from natsort.natsort import ns -from natsort.locale_help import use_pyicu - -try: - from fastnumbers import fast_float, fast_int -except ImportError: - from natsort.fake_fastnumbers import fast_float, fast_int - - -def test_args_to_enum(): - - assert _args_to_enum(float, True, True, False, False) == ns.F - assert _args_to_enum(float, True, False, False, False) == ns.F | ns.N - assert _args_to_enum(float, False, True, False, False) == ns.F | ns.U - assert _args_to_enum(float, False, False, False, False) == ns.F | ns.U | ns.N - assert _args_to_enum(float, True, True, True, True) == ns.F | ns.P | ns.T - assert _args_to_enum(int, True, True, True, False) == ns.I | ns.P - assert _args_to_enum(int, False, True, False, True) == ns.I | ns.U | ns.T - assert _args_to_enum(None, True, True, False, False) == ns.I | ns.U - - -def test_input_parser(): - - # fttt = (fast_float, True, True, True) - # fttf = (fast_float, True, True, False) - ftft = (fast_float, True, False, True) - ftff = (fast_float, True, False, False) - # fftt = (fast_float, False, True, True) - # ffft = (fast_float, False, False, True) - # fftf = (fast_float, False, True, False) - ffff = (fast_float, False, False, False) - ittt = (fast_int, True, True, True) - ittf = (fast_int, True, True, False) - itft = (fast_int, True, False, True) - itff = (fast_int, True, False, False) - # iftt = (fast_int, False, True, True) - # ifft = (fast_int, False, False, True) - # iftf = (fast_int, False, True, False) - ifff = (fast_int, False, False, False) - - assert _input_parser('a5+5.034e-1', _float_sign_exp_re, *ffff) == ['a', 5.0, 0.5034] - assert _input_parser('a5+5.034e-1', _float_nosign_exp_re, *ffff) == ['a', 5.0, '+', 0.5034] - assert _input_parser('a5+5.034e-1', _float_sign_noexp_re, *ffff) == ['a', 5.0, 5.034, 'e', -1.0] - assert _input_parser('a5+5.034e-1', _float_nosign_noexp_re, *ffff) == ['a', 5.0, '+', 5.034, 'e-', 1.0] - assert _input_parser('a5+5.034e-1', _int_nosign_re, *ifff) == ['a', 5, '+', 5, '.', 34, 'e-', 1] - assert _input_parser('a5+5.034e-1', _int_sign_re, *ifff) == ['a', 5, 5, '.', 34, 'e', -1] - - assert _input_parser('a5+5.034e-1', _float_sign_exp_re, *ftff) == ['a', 5.0, '', 0.5034] - assert _input_parser('a5+5.034e-1', _float_nosign_exp_re, *ftff) == ['a', 5.0, '+', 0.5034] - assert _input_parser('a5+5.034e-1', _float_sign_noexp_re, *ftff) == ['a', 5.0, '', 5.034, 'e', -1.0] - assert _input_parser('a5+5.034e-1', _float_nosign_noexp_re, *ftff) == ['a', 5.0, '+', 5.034, 'e-', 1.0] - assert _input_parser('a5+5.034e-1', _int_nosign_re, *itff) == ['a', 5, '+', 5, '.', 34, 'e-', 1] - assert _input_parser('a5+5.034e-1', _int_sign_re, *itff) == ['a', 5, '', 5, '.', 34, 'e', -1] - - assert _input_parser('6a5+5.034e-1', _float_sign_exp_re, *ffff) == ['', 6.0, 'a', 5.0, 0.5034] - assert _input_parser('6a5+5.034e-1', _float_sign_exp_re, *ftff) == ['', 6.0, 'a', 5.0, '', 0.5034] - - assert _input_parser('A5+5.034E-1', _float_sign_exp_re, *ftft) == ['aA', 5.0, '', 0.5034] - assert _input_parser('A5+5.034E-1', _int_nosign_re, *itft) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1] - - locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) - if use_pyicu: - from natsort.locale_help import get_pyicu_transform - from locale import getlocale - strxfrm = get_pyicu_transform(getlocale()) - else: - from natsort.locale_help import strxfrm - assert _input_parser('A5+5.034E-1', _int_nosign_re, *ittf) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1] - assert _input_parser('A5+5.034E-1', _int_nosign_re, *ittt) == [strxfrm('aA'), 5, strxfrm('++'), 5, strxfrm('..'), 34, strxfrm('eE--'), 1] - locale.setlocale(locale.LC_NUMERIC, str('')) - - -def test_py3_safe(): - - assert _py3_safe(['a', 'b', 'c']) == ['a', 'b', 'c'] - assert _py3_safe(['a']) == ['a'] - assert _py3_safe(['a', 5]) == ['a', 5] - assert _py3_safe([5, 9]) == [5, '', 9] - - -def test_natsort_key_private(): - - # The below illustrates how the key works, and how the different options affect sorting. - assert _natsort_key('a-5.034e2', key=None, alg=ns.F) == ('a', -503.4) - assert _natsort_key('a-5.034e2', key=None, alg=ns.FLOAT) == ('a', -503.4) - assert _natsort_key('a-5.034e2', key=None, alg=ns.FLOAT | ns.NOEXP) == ('a', -5.034, 'e', 2.0) - assert _natsort_key('a-5.034e2', key=None, alg=ns.NOEXP) == ('a', -5.034, 'e', 2.0) - assert _natsort_key('a-5.034e2', key=None, alg=ns.UNSIGNED) == ('a-', 503.4) - assert _natsort_key('a-5.034e2', key=None, alg=ns.UNSIGNED | ns.NOEXP) == ('a-', 5.034, 'e', 2.0) - assert _natsort_key('a-5.034e2', key=None, alg=ns.INT) == ('a', -5, '.', 34, 'e', 2) - assert _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.NOEXP) == ('a', -5, '.', 34, 'e', 2) - assert _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.UNSIGNED) == ('a-', 5, '.', 34, 'e', 2) - assert _natsort_key('a-5.034e2', key=None, alg=ns.VERSION) == _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.UNSIGNED) - assert _natsort_key('a-5.034e2', key=None, alg=ns.DIGIT) == _natsort_key('a-5.034e2', key=None, alg=ns.VERSION) - assert _natsort_key('a-5.034e2', key=lambda x: x.upper(), alg=ns.F) == ('A', -503.4) - - # Iterables are parsed recursively so you can sort lists of lists. - assert _natsort_key(('a1', 'a-5.034e2'), key=None, alg=ns.F) == (('a', 1.0), ('a', -503.4)) - assert _natsort_key(('a1', 'a-5.034e2'), key=None, alg=ns.V) == (('a', 1), ('a-', 5, '.', 34, 'e', 2)) - # A key is applied before recursion, but not in the recursive calls. - assert _natsort_key(('a1', 'a-5.034e2'), key=itemgetter(1), alg=ns.F) == ('a', -503.4) - - # Strings that lead with a number get an empty string at the front of the tuple. - # This is designed to get around the "unorderable types" issue. - assert _natsort_key(('15a', '6'), key=None, alg=ns.F) == (('', 15.0, 'a'), ('', 6.0)) - assert _natsort_key(10, key=None, alg=ns.F) == ('', 10) - - # Turn on as_path to split a file path into components - assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', key=None, alg=ns.PATH) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) - assert _natsort_key('../Folder (10)/file (2).tar.gz', key=None, alg=ns.PATH) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) - assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', key=None, alg=ns.PATH) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) - - # It gracefully handles as_path for numeric input by putting an extra tuple around it - # so it will sort against the other as_path results. - assert _natsort_key(10, key=None, alg=ns.PATH) == (('', 10),) - # as_path also handles recursion well. - assert _natsort_key(('/Folder', '/Folder (1)'), key=None, alg=ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1.0, ')'))) - - # Turn on py3_safe to put a '' between adjacent numbers - assert _natsort_key('43h7+3', key=None, alg=ns.TYPESAFE) == ('', 43.0, 'h', 7.0, '', 3.0) - - # Invalid arguments give the correct response - with raises(ValueError) as err: - _natsort_key('a', key=None, alg='1') - assert str(err.value) == "_natsort_key: 'alg' argument must be from the enum 'ns', got 1" - - # Changing the sort order of strings - assert _natsort_key('Apple56', key=None, alg=ns.F) == ('Apple', 56.0) - assert _natsort_key('Apple56', key=None, alg=ns.IGNORECASE) == ('apple', 56.0) - assert _natsort_key('Apple56', key=None, alg=ns.LOWERCASEFIRST) == ('aPPLE', 56.0) - assert _natsort_key('Apple56', key=None, alg=ns.GROUPLETTERS) == ('aAppppllee', 56.0) - assert _natsort_key('Apple56', key=None, alg=ns.G | ns.LF) == ('aapPpPlLeE', 56.0) - - # Locale aware sorting - locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) - if use_pyicu: - from natsort.locale_help import get_pyicu_transform - from locale import getlocale - strxfrm = get_pyicu_transform(getlocale()) - else: - from natsort.locale_help import strxfrm - assert _natsort_key('Apple56.5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5) - assert _natsort_key('Apple56,5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.0, strxfrm(','), 5.0) - - locale.setlocale(locale.LC_NUMERIC, str('de_DE.UTF-8')) - if use_pyicu: - strxfrm = get_pyicu_transform(getlocale()) - assert _natsort_key('Apple56.5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5) - assert _natsort_key('Apple56,5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5) - locale.setlocale(locale.LC_NUMERIC, str('')) +from natsort import humansorted, index_humansorted, natsort_keygen, order_by_index, ns +from natsort.utils import _natsort_key def test_natsort_key_public(): @@ -287,6 +135,11 @@ def test_natsorted(): assert natsorted(a, alg=ns.GROUPLETTERS) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] assert natsorted(a, alg=ns.G | ns.LF) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] + b = [('A5', 'a6'), ('a3', 'a1')] + assert natsorted(b) == [('A5', 'a6'), ('a3', 'a1')] + assert natsorted(b, alg=ns.LOWERCASEFIRST) == [('a3', 'a1'), ('A5', 'a6')] + assert natsorted(b, alg=ns.IGNORECASE) == [('a3', 'a1'), ('A5', 'a6')] + # You can also do locale-aware sorting locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) assert natsorted(a, alg=ns.LOCALE) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py new file mode 100644 index 0000000..824e769 --- /dev/null +++ b/test_natsort/test_utils.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +"""These test the utils.py functions.""" + +import locale +from operator import itemgetter +from pytest import raises +from natsort.ns_enum import ns +from natsort.utils import _input_parser, _py3_safe, _natsort_key, _args_to_enum +from natsort.utils import _float_sign_exp_re, _float_nosign_exp_re, _float_sign_noexp_re +from natsort.utils import _float_nosign_noexp_re, _int_nosign_re, _int_sign_re +from natsort.locale_help import use_pyicu + +try: + from fastnumbers import fast_float, fast_int +except ImportError: + from natsort.fake_fastnumbers import fast_float, fast_int + + +def test_args_to_enum(): + + assert _args_to_enum(float, True, True, False, False) == ns.F + assert _args_to_enum(float, True, False, False, False) == ns.F | ns.N + assert _args_to_enum(float, False, True, False, False) == ns.F | ns.U + assert _args_to_enum(float, False, False, False, False) == ns.F | ns.U | ns.N + assert _args_to_enum(float, True, True, True, True) == ns.F | ns.P | ns.T + assert _args_to_enum(int, True, True, True, False) == ns.I | ns.P + assert _args_to_enum(int, False, True, False, True) == ns.I | ns.U | ns.T + assert _args_to_enum(None, True, True, False, False) == ns.I | ns.U + + +def test_input_parser(): + + # fttt = (fast_float, True, True, True) + # fttf = (fast_float, True, True, False) + ftft = (fast_float, True, False, True) + ftff = (fast_float, True, False, False) + # fftt = (fast_float, False, True, True) + # ffft = (fast_float, False, False, True) + # fftf = (fast_float, False, True, False) + ffff = (fast_float, False, False, False) + ittt = (fast_int, True, True, True) + ittf = (fast_int, True, True, False) + itft = (fast_int, True, False, True) + itff = (fast_int, True, False, False) + # iftt = (fast_int, False, True, True) + # ifft = (fast_int, False, False, True) + # iftf = (fast_int, False, True, False) + ifff = (fast_int, False, False, False) + + assert _input_parser('a5+5.034e-1', _float_sign_exp_re, *ffff) == ['a', 5.0, 0.5034] + assert _input_parser('a5+5.034e-1', _float_nosign_exp_re, *ffff) == ['a', 5.0, '+', 0.5034] + assert _input_parser('a5+5.034e-1', _float_sign_noexp_re, *ffff) == ['a', 5.0, 5.034, 'e', -1.0] + assert _input_parser('a5+5.034e-1', _float_nosign_noexp_re, *ffff) == ['a', 5.0, '+', 5.034, 'e-', 1.0] + assert _input_parser('a5+5.034e-1', _int_nosign_re, *ifff) == ['a', 5, '+', 5, '.', 34, 'e-', 1] + assert _input_parser('a5+5.034e-1', _int_sign_re, *ifff) == ['a', 5, 5, '.', 34, 'e', -1] + + assert _input_parser('a5+5.034e-1', _float_sign_exp_re, *ftff) == ['a', 5.0, '', 0.5034] + assert _input_parser('a5+5.034e-1', _float_nosign_exp_re, *ftff) == ['a', 5.0, '+', 0.5034] + assert _input_parser('a5+5.034e-1', _float_sign_noexp_re, *ftff) == ['a', 5.0, '', 5.034, 'e', -1.0] + assert _input_parser('a5+5.034e-1', _float_nosign_noexp_re, *ftff) == ['a', 5.0, '+', 5.034, 'e-', 1.0] + assert _input_parser('a5+5.034e-1', _int_nosign_re, *itff) == ['a', 5, '+', 5, '.', 34, 'e-', 1] + assert _input_parser('a5+5.034e-1', _int_sign_re, *itff) == ['a', 5, '', 5, '.', 34, 'e', -1] + + assert _input_parser('6a5+5.034e-1', _float_sign_exp_re, *ffff) == ['', 6.0, 'a', 5.0, 0.5034] + assert _input_parser('6a5+5.034e-1', _float_sign_exp_re, *ftff) == ['', 6.0, 'a', 5.0, '', 0.5034] + + assert _input_parser('A5+5.034E-1', _float_sign_exp_re, *ftft) == ['aA', 5.0, '', 0.5034] + assert _input_parser('A5+5.034E-1', _int_nosign_re, *itft) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1] + + locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) + if use_pyicu: + from natsort.locale_help import get_pyicu_transform + from locale import getlocale + strxfrm = get_pyicu_transform(getlocale()) + else: + from natsort.locale_help import strxfrm + assert _input_parser('A5+5.034E-1', _int_nosign_re, *ittf) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1] + assert _input_parser('A5+5.034E-1', _int_nosign_re, *ittt) == [strxfrm('aA'), 5, strxfrm('++'), 5, strxfrm('..'), 34, strxfrm('eE--'), 1] + locale.setlocale(locale.LC_NUMERIC, str('')) + + +def test_py3_safe(): + + assert _py3_safe(['a', 'b', 'c']) == ['a', 'b', 'c'] + assert _py3_safe(['a']) == ['a'] + assert _py3_safe(['a', 5]) == ['a', 5] + assert _py3_safe([5, 9]) == [5, '', 9] + + +def test_natsort_key_private(): + + # The below illustrates how the key works, and how the different options affect sorting. + assert _natsort_key('a-5.034e2', key=None, alg=ns.F) == ('a', -503.4) + assert _natsort_key('a-5.034e2', key=None, alg=ns.FLOAT) == ('a', -503.4) + assert _natsort_key('a-5.034e2', key=None, alg=ns.FLOAT | ns.NOEXP) == ('a', -5.034, 'e', 2.0) + assert _natsort_key('a-5.034e2', key=None, alg=ns.NOEXP) == ('a', -5.034, 'e', 2.0) + assert _natsort_key('a-5.034e2', key=None, alg=ns.UNSIGNED) == ('a-', 503.4) + assert _natsort_key('a-5.034e2', key=None, alg=ns.UNSIGNED | ns.NOEXP) == ('a-', 5.034, 'e', 2.0) + assert _natsort_key('a-5.034e2', key=None, alg=ns.INT) == ('a', -5, '.', 34, 'e', 2) + assert _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.NOEXP) == ('a', -5, '.', 34, 'e', 2) + assert _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.UNSIGNED) == ('a-', 5, '.', 34, 'e', 2) + assert _natsort_key('a-5.034e2', key=None, alg=ns.VERSION) == _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.UNSIGNED) + assert _natsort_key('a-5.034e2', key=None, alg=ns.DIGIT) == _natsort_key('a-5.034e2', key=None, alg=ns.VERSION) + assert _natsort_key('a-5.034e2', key=lambda x: x.upper(), alg=ns.F) == ('A', -503.4) + + # Iterables are parsed recursively so you can sort lists of lists. + assert _natsort_key(('a1', 'a-5.034e2'), key=None, alg=ns.F) == (('a', 1.0), ('a', -503.4)) + assert _natsort_key(('a1', 'a-5.034e2'), key=None, alg=ns.V) == (('a', 1), ('a-', 5, '.', 34, 'e', 2)) + # A key is applied before recursion, but not in the recursive calls. + assert _natsort_key(('a1', 'a-5.034e2'), key=itemgetter(1), alg=ns.F) == ('a', -503.4) + + # Strings that lead with a number get an empty string at the front of the tuple. + # This is designed to get around the "unorderable types" issue. + assert _natsort_key(('15a', '6'), key=None, alg=ns.F) == (('', 15.0, 'a'), ('', 6.0)) + assert _natsort_key(10, key=None, alg=ns.F) == ('', 10) + + # Turn on as_path to split a file path into components + assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', key=None, alg=ns.PATH) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) + assert _natsort_key('../Folder (10)/file (2).tar.gz', key=None, alg=ns.PATH) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) + assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', key=None, alg=ns.PATH) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) + + # It gracefully handles as_path for numeric input by putting an extra tuple around it + # so it will sort against the other as_path results. + assert _natsort_key(10, key=None, alg=ns.PATH) == (('', 10),) + # as_path also handles recursion well. + assert _natsort_key(('/Folder', '/Folder (1)'), key=None, alg=ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1.0, ')'))) + + # Turn on py3_safe to put a '' between adjacent numbers + assert _natsort_key('43h7+3', key=None, alg=ns.TYPESAFE) == ('', 43.0, 'h', 7.0, '', 3.0) + + # Invalid arguments give the correct response + with raises(ValueError) as err: + _natsort_key('a', key=None, alg='1') + assert str(err.value) == "_natsort_key: 'alg' argument must be from the enum 'ns', got 1" + + # Changing the sort order of strings + assert _natsort_key('Apple56', key=None, alg=ns.F) == ('Apple', 56.0) + assert _natsort_key('Apple56', key=None, alg=ns.IGNORECASE) == ('apple', 56.0) + assert _natsort_key('Apple56', key=None, alg=ns.LOWERCASEFIRST) == ('aPPLE', 56.0) + assert _natsort_key('Apple56', key=None, alg=ns.GROUPLETTERS) == ('aAppppllee', 56.0) + assert _natsort_key('Apple56', key=None, alg=ns.G | ns.LF) == ('aapPpPlLeE', 56.0) + + # Locale aware sorting + locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) + if use_pyicu: + from natsort.locale_help import get_pyicu_transform + from locale import getlocale + strxfrm = get_pyicu_transform(getlocale()) + else: + from natsort.locale_help import strxfrm + assert _natsort_key('Apple56.5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5) + assert _natsort_key('Apple56,5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.0, strxfrm(','), 5.0) + + locale.setlocale(locale.LC_NUMERIC, str('de_DE.UTF-8')) + if use_pyicu: + strxfrm = get_pyicu_transform(getlocale()) + assert _natsort_key('Apple56.5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5) + assert _natsort_key('Apple56,5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5) + locale.setlocale(locale.LC_NUMERIC, str('')) |