1 files changed, 9 insertions, 387 deletions
diff --git a/natsort/natsort.py b/natsort/natsort.py
index 82b84df..d3d6f8a 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -15,400 +15,20 @@ See the README or the natsort homepage for more details.
 from __future__ import (print_function, division,
                         unicode_literals, absolute_import)
 
-import re
-from os import curdir, pardir
-from os.path import split, splitext
+# Std lib. imports.
 from operator import itemgetter
 from functools import partial
-from itertools import islice
 from warnings import warn
-from locale import localeconv
 
-# If the user has fastnumbers installed, they will get great speed
-# benefits.  If not, we simulate the functions here.
-try:
-    from fastnumbers import fast_float, fast_int, isreal
-except ImportError:
-    from .fake_fastnumbers import fast_float, fast_int, isreal
-from .locale_help import locale_convert, grouper, lowercase, swapcase
-from .py23compat import u_format, py23_str, py23_zip
+# Local imports.
+from natsort.utils import _natsort_key, _args_to_enum
+from natsort.ns_enum import ns
+from natsort.py23compat import u_format
 
 # Make sure the doctest works for either python2 or python3
 __doc__ = u_format(__doc__)
 
 
-class ns(object):
-    """
-    Enum to control the `natsort` algorithm.
-
-    This class acts like an enum to control the `natsort` algorithm. The
-    user may select several options simultaneously by or'ing the options
-    together.  For example, to choose ``ns.INT``, `ns.PATH``, and
-    ``ns.LOCALE``, you could do ``ns.INT | ns.LOCALE | ns.PATH``.
-
-    Each option has a shortened 1- or 2-letter form.
-
-    .. warning:: On some systems, the underlying C library that
-                 Python's locale module uses is broken. On these
-                 systems it is recommended that you install
-                 `PyICU <https://pypi.python.org/pypi/PyICU>`_
-                 if you wish to use `LOCALE`.
-                 Please validate that `LOCALE` works as
-                 expected on your target system, and if not you
-                 should add
-                 `PyICU <https://pypi.python.org/pypi/PyICU>`_
-                 as a dependency.
-
-    Attributes
-    ----------
-    FLOAT, F
-        The default - parse numbers as floats.
-    INT, I
-        Tell `natsort` to parse numbers as ints.
-    UNSIGNED, U
-        Tell `natsort` to ignore any sign (i.e. "-" or "+") to the
-        immediate left of a number.  It is the same as setting the old
-        `signed` option to `False`.
-    VERSION, V
-        This is a shortcut for ``ns.INT | ns.UNSIGNED``, which is useful
-        when attempting to sort version numbers.  It is the same as
-        setting the old `number_type` option to `None`.
-    DIGIT, D
-        Same as `VERSION` above.
-    NOEXP, N
-        Tell `natsort` to not search for exponents as part of the number.
-        For example, with `NOEXP` the number "5.6E5" would be interpreted
-        as `5.6`, `"E"`, and `5`.  It is the same as setting the old `exp`
-        option to `False`.
-    PATH, P
-        Tell `natsort` to interpret strings as filesystem paths, so they
-        will be split according to the filesystem separator
-        (i.e. ‘/’ on UNIX, ‘\’ on Windows), as well as splitting on the
-        file extension, if any. Without this, lists of file paths like
-        ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted
-        properly; 'Folder/' will be placed at the end, not at the front.
-        It is the same as setting the old `as_path` option to `True`.
-    LOCALE, L
-        Tell `natsort` to be locale-aware when sorting strings (everything
-        that was not converted to a number).  Your sorting results will vary
-        depending on your current locale. Generally, the `GROUPLETTERS`
-        option is needed with `LOCALE` because the `locale` library
-        groups the letters in the same manner (although you may still
-        need `GROUPLETTERS` if there are numbers in your strings).
-    IGNORECASE, IC
-        Tell `natsort` to ignore case when sorting.  For example,
-        ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as
-        ``['apple', 'Apple', 'Banana', 'banana']``.
-    LOWERCASEFIRST, LF
-        Tell `natsort` to put lowercase letters before uppercase letters
-        when sorting.  For example,
-        ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as
-        ``['apple', 'banana', 'Apple', 'Banana']`` (the default order
-        would be ``['Apple', 'Banana', 'apple', 'banana']`` which is
-        the order from a purely ordinal sort).
-        Useless when used with `IGNORECASE`.
-    GROUPLETTERS, G
-        Tell `natsort` to group lowercase and uppercase letters together
-        when sorting.  For example,
-        ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as
-        ``['Apple', 'apple', 'Banana', 'banana']``.
-        Useless when used with `IGNORECASE`; use with `LOWERCASEFIRST`
-        to reverse the order of upper and lower case.
-    TYPESAFE, T
-        Try hard to avoid "unorderable types" error on Python 3. It
-        is the same as setting the old `py3_safe` option to `True`.
-
-    Notes
-    -----
-    If using `LOCALE`, you may find that if you do not explicitly set
-    the locale your results may not be as you expect... I have found that
-    it depends on the system you are on. To do this is straightforward
-    (in the below example I use 'en_US.UTF-8', but you should use your
-    locale)::
-
-        >>> import locale
-        >>> # The 'str' call is only to get around a bug on Python 2.x
-        >>> # where 'setlocale' does not expect unicode strings (ironic,
-        >>> # right?)
-        >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8'))
-        'en_US.UTF-8'
-
-    It is preferred that you do this before importing `natsort`.
-    If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning
-    above) then you should not need to do this.
-
-    """
-    pass
-
-
-# Sort algorithm "enum" values.
-_nsdict = {'FLOAT': 0,           'F': 0,
-           'INT': 1,             'I': 1,
-           'UNSIGNED': 2,        'U': 2,
-           'VERSION': 3,         'V': 3,  # Shortcut for INT | UNSIGNED
-           'DIGIT': 3,           'D': 3,  # Shortcut for INT | UNSIGNED
-           'NOEXP': 4,           'N': 4,
-           'PATH': 8,            'P': 8,
-           'LOCALE': 16,         'L': 16,
-           'IGNORECASE': 32,     'IC': 32,
-           'LOWERCASEFIRST': 64, 'LF': 64,
-           'GROUPLETTERS': 128,  'G': 128,
-           'TYPESAFE': 1024,     'T': 1024,
-           }
-# Populate the ns class with the _nsdict values.
-for x, y in _nsdict.items():
-    setattr(ns, x, y)
-
-# Group algorithm types for easy extraction
-_NUMBER_ALGORITHMS = ns.FLOAT | ns.INT | ns.UNSIGNED | ns.NOEXP
-_CASE_ALGORITHMS = ns.IGNORECASE | ns.LOWERCASEFIRST | ns.GROUPLETTERS
-_ALL_BUT_PATH = (ns.F | ns.I | ns.U | ns.N | ns.L |
-                 ns.IC | ns.LF | ns.G | ns.TYPESAFE)
-
-# The regex that locates floats
-_float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U)
-_float_nosign_exp_re = re.compile(r'(\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U)
-_float_sign_noexp_re = re.compile(r'([-+]?\d*\.?\d+)', re.U)
-_float_nosign_noexp_re = re.compile(r'(\d*\.?\d+)', re.U)
-_float_sign_exp_re_c = re.compile(r'([-+]?\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U)
-_float_nosign_exp_re_c = re.compile(r'(\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U)
-_float_sign_noexp_re_c = re.compile(r'([-+]?\d*[.,]?\d+)', re.U)
-_float_nosign_noexp_re_c = re.compile(r'(\d*[.,]?\d+)', re.U)
-
-# Integer regexes
-_int_nosign_re = re.compile(r'(\d+)', re.U)
-_int_sign_re = re.compile(r'([-+]?\d+)', re.U)
-
-# This dict will help select the correct regex and number conversion function.
-_regex_and_num_function_chooser = {
-    (ns.F, '.'):               (_float_sign_exp_re,     fast_float),
-    (ns.F | ns.N, '.'):        (_float_sign_noexp_re,   fast_float),
-    (ns.F | ns.U, '.'):        (_float_nosign_exp_re,   fast_float),
-    (ns.F | ns.U | ns.N, '.'): (_float_nosign_noexp_re, fast_float),
-    (ns.I, '.'):               (_int_sign_re,   fast_int),
-    (ns.I | ns.N, '.'):        (_int_sign_re,   fast_int),
-    (ns.I | ns.U, '.'):        (_int_nosign_re, fast_int),
-    (ns.I | ns.U | ns.N, '.'): (_int_nosign_re, fast_int),
-    (ns.F, ','):               (_float_sign_exp_re_c,     fast_float),
-    (ns.F | ns.N, ','):        (_float_sign_noexp_re_c,   fast_float),
-    (ns.F | ns.U, ','):        (_float_nosign_exp_re_c,   fast_float),
-    (ns.F | ns.U | ns.N, ','): (_float_nosign_noexp_re_c, fast_float),
-    (ns.I, ','):               (_int_sign_re,   fast_int),
-    (ns.I | ns.N, ','):        (_int_sign_re,   fast_int),
-    (ns.I | ns.U, ','):        (_int_nosign_re, fast_int),
-    (ns.I | ns.U | ns.N, ','): (_int_nosign_re, fast_int),
-}
-
-
-def _args_to_enum(number_type, signed, exp, as_path, py3_safe):
-    """A function to convert input booleans to an enum-type argument."""
-    alg = 0
-    if number_type is not float:
-        msg = "The 'number_type' argument is depreciated as of 3.5.0, "
-        msg += "please use 'alg=ns.FLOAT', 'alg=ns.INT', or 'alg=ns.VERSION'"
-        warn(msg, DeprecationWarning)
-        alg |= (_nsdict['INT'] * bool(number_type in (int, None)))
-        alg |= (_nsdict['UNSIGNED'] * (number_type is None))
-    if signed is not None:
-        msg = "The 'signed' argument is depreciated as of 3.5.0, "
-        msg += "please use 'alg=ns.UNSIGNED'."
-        warn(msg, DeprecationWarning)
-        alg |= (_nsdict['UNSIGNED'] * (not signed))
-    if exp is not None:
-        msg = "The 'exp' argument is depreciated as of 3.5.0, "
-        msg += "please use 'alg=ns.NOEXP'."
-        warn(msg, DeprecationWarning)
-        alg |= (_nsdict['NOEXP'] * (not exp))
-    if as_path is not None:
-        msg = "The 'as_path' argument is depreciated as of 3.5.0, "
-        msg += "please use 'alg=ns.PATH'."
-        warn(msg, DeprecationWarning)
-        alg |= (_nsdict['PATH'] * as_path)
-    if py3_safe is not None:
-        msg = "The 'py3_safe' argument is depreciated as of 3.5.0, "
-        msg += "please use 'alg=ns.TYPESAFE'."
-        warn(msg, DeprecationWarning)
-        alg |= (_nsdict['TYPESAFE'] * py3_safe)
-    return alg
-
-
-def _input_parser(s, regex, numconv, py3_safe, use_locale, group_letters):
-    """Helper to parse the string input into numbers and strings."""
-
-    # Split the input string by numbers.
-    # If the input is not a string, TypeError is raised.
-    s = regex.split(s)
-
-    # Now convert the numbers to numbers, and leave strings as strings.
-    # Take into account locale if needed, and group letters if needed.
-    # Remove empty strings from the list.
-    if use_locale:
-        s = [locale_convert(x, numconv, group_letters) for x in s if x]
-    elif group_letters:
-        s = [grouper(x, numconv) for x in s if x]
-    else:
-        s = [numconv(x) for x in s if x]
-
-    # If the list begins with a number, lead with an empty string.
-    # This is used to get around the "unorderable types" issue.
-    if not s:  # Return empty tuple for empty results.
-        return ()
-    elif isreal(s[0]):
-        s = [''] + s
-
-    # The _py3_safe function inserts "" between numbers in the list,
-    # and is used to get around "unorderable types" in complex cases.
-    # It is a separate function that needs to be requested specifically
-    # because it is expensive to call.
-    return _py3_safe(s) if py3_safe else s
-
-
-def _path_splitter(s, _d_match=re.compile(r'\.\d').match):
-    """Split a string into its path components. Assumes a string is a path."""
-    path_parts = []
-    p_append = path_parts.append
-    path_location = s
-
-    # Continue splitting the path from the back until we have reached
-    # '..' or '.', or until there is nothing left to split.
-    while path_location != curdir and path_location != pardir:
-        parent_path = path_location
-        path_location, child_path = split(parent_path)
-        if path_location == parent_path:
-            break
-        p_append(child_path)
-
-    # This last append is the base path.
-    # Only append if the string is non-empty.
-    if path_location:
-        p_append(path_location)
-
-    # We created this list in reversed order, so we now correct the order.
-    path_parts.reverse()
-
-    # Now, split off the file extensions using a similar method to above.
-    # Continue splitting off file extensions until we reach a decimal number
-    # or there are no more extensions.
-    base = path_parts.pop()
-    base_parts = []
-    b_append = base_parts.append
-    while True:
-        front = base
-        base, ext = splitext(front)
-        if _d_match(ext) or not ext:
-            # Reset base to before the split if the split is invalid.
-            base = front
-            break
-        b_append(ext)
-    b_append(base)
-    base_parts.reverse()
-
-    # Return the split parent paths and then the split basename.
-    return path_parts + base_parts
-
-
-def _py3_safe(parsed_list):
-    """Insert '' between two numbers."""
-    length = len(parsed_list)
-    if length < 2:
-        return parsed_list
-    else:
-        new_list = [parsed_list[0]]
-        nl_append = new_list.append
-        for before, after in py23_zip(islice(parsed_list, 0, length-1),
-                                      islice(parsed_list, 1, None)):
-            if isreal(before) and isreal(after):
-                nl_append("")
-            nl_append(after)
-        return new_list
-
-
-def _natsort_key(val, key, alg):
-    """\
-    Key to sort strings and numbers naturally.
-
-    It works by separating out the numbers from the strings. This function for
-    internal use only. See the natsort_keygen documentation for details of each
-    parameter.
-
-    Parameters
-    ----------
-    val : {str, unicode}
-    key : callable
-    alg : ns enum
-
-    Returns
-    -------
-    out : tuple
-        The modified value with numbers extracted.
-
-    """
-
-    # Convert the arguments to the proper input tuple
-    try:
-        use_locale = alg & _nsdict['LOCALE']
-        inp_options = (alg & _NUMBER_ALGORITHMS,
-                       localeconv()['decimal_point'] if use_locale else '.')
-    except TypeError:
-        msg = "_natsort_key: 'alg' argument must be from the enum 'ns'"
-        raise ValueError(msg+', got {0}'.format(py23_str(alg)))
-
-    # Get the proper regex and conversion function.
-    try:
-        regex, num_function = _regex_and_num_function_chooser[inp_options]
-    except KeyError:  # pragma: no cover
-        if inp_options[1] not in ('.', ','):  # pragma: no cover
-            raise ValueError("_natsort_key: currently natsort only supports "
-                             "the decimal separators '.' and ','. "
-                             "Please file a bug report.")
-        else:
-            raise
-    else:
-        # Apply key if needed.
-        if key is not None:
-            val = key(val)
-
-        # If this is a path, convert it.
-        # An AttrubuteError is raised if not a string.
-        split_as_path = False
-        if alg & _nsdict['PATH']:
-            try:
-                val = _path_splitter(val)
-            except AttributeError:
-                pass
-            else:
-                # Record that this string was split as a path so that
-                # we don't set PATH in the recursive call.
-                split_as_path = True
-
-        # Assume the input are strings, which is the most common case.
-        # Apply the string modification if needed.
-        try:
-            if alg & _nsdict['LOWERCASEFIRST']:
-                val = swapcase(val)
-            if alg & _nsdict['IGNORECASE']:
-                val = lowercase(val)
-            return tuple(_input_parser(val,
-                                       regex,
-                                       num_function,
-                                       alg & _nsdict['TYPESAFE'],
-                                       use_locale,
-                                       alg & _nsdict['GROUPLETTERS']))
-        except TypeError:
-            # If not strings, assume it is an iterable that must
-            # be parsed recursively. Do not apply the key recursively.
-            # If this string was split as a path, turn off 'PATH'.
-            try:
-                was_path = alg & _nsdict['PATH']
-                newalg = alg & _ALL_BUT_PATH
-                newalg |= (was_path * (not split_as_path))
-                return tuple([_natsort_key(x, None, newalg) for x in val])
-            # If there is still an error, it must be a number.
-            # Return as-is, with a leading empty string.
-            except TypeError:
-                return (('', val,),) if alg & _nsdict['PATH'] else ('', val,)
-
-
 @u_format
 def natsort_key(val, key=None, number_type=float, signed=None, exp=None,
                 as_path=None, py3_safe=None, alg=0):
@@ -689,7 +309,8 @@ def natsorted(seq, key=None, number_type=float, signed=None, exp=None,
     See Also
     --------
     natsort_keygen : Generates the key that makes natural sorting possible.
-    versorted : A wrapper for ``natsorted(seq, number_type=None)``.
+    versorted : A wrapper for ``natsorted(seq, alg=ns.VERSION)``.
+    humansorted : A wrapper for ``natsorted(seq, alg=ns.LOCALE)``.
     index_natsorted : Returns the sorted indexes from `natsorted`.
 
     Examples
@@ -711,7 +332,7 @@ def natsorted(seq, key=None, number_type=float, signed=None, exp=None,
         if 'unorderable types' in str(e):
             return sorted(seq, reverse=reverse,
                           key=natsort_keygen(key,
-                                             alg=alg | _nsdict['TYPESAFE']))
+                                             alg=alg | ns.TYPESAFE))
         else:
             # Re-raise if the problem was not "unorderable types"
             raise
@@ -1128,6 +749,7 @@ def order_by_index(seq, index, iter=False):
     --------
     index_natsorted
     index_versorted
+    index_humansorted
 
     Examples
     --------