summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth M Morton <seth.m.morton@gmail.com>2018-08-10 12:24:31 -0400
committerSeth M Morton <seth.m.morton@gmail.com>2018-08-10 15:03:25 -0400
commit9fe35c85624ebed1f1ad44694b618614576646dc (patch)
treee47dd074eca61143e3e5404653f915e9ee59b4d4
parentc556296ff88902354da9f73a3555b5be3facaba8 (diff)
downloadnatsort-9fe35c85624ebed1f1ad44694b618614576646dc.tar.gz
Improve docstrings for development.
Many of the utility functions had terse docstrings. All functions now have full docstrings that should help developers who want to contribute better understand what is going on. Some function names or arguments have been changed if it improved self-documentation.
-rw-r--r--natsort/__main__.py116
-rw-r--r--natsort/compat/fake_fastnumbers.py43
-rw-r--r--natsort/compat/fastnumbers.py4
-rw-r--r--natsort/compat/locale.py4
-rw-r--r--natsort/compat/py23.py6
-rw-r--r--natsort/natsort.py121
-rw-r--r--natsort/ns_enum.py5
-rw-r--r--natsort/unicode_numbers.py2
-rw-r--r--natsort/unicode_numeric_hex.py5
-rw-r--r--natsort/utils.py329
-rw-r--r--test_natsort/test_main.py38
11 files changed, 504 insertions, 169 deletions
diff --git a/natsort/__main__.py b/natsort/__main__.py
index 5cdc0b2..6224da0 100644
--- a/natsort/__main__.py
+++ b/natsort/__main__.py
@@ -11,10 +11,10 @@ from natsort.compat.py23 import py23_str
def main():
- """\
+ """
Performs a natural sort on entries given on the command-line.
- A natural sort sorts numerically then alphabetically, and will sort
- by numbers in the middle of an entry.
+
+ Arguments are read from sys.argv.
"""
from argparse import ArgumentParser, RawDescriptionHelpFormatter
@@ -134,8 +134,8 @@ def main():
args = parser.parse_args()
# Make sure the filter range is given properly. Does nothing if no filter
- args.filter = check_filter(args.filter)
- args.reverse_filter = check_filter(args.reverse_filter)
+ args.filter = check_filters(args.filter)
+ args.reverse_filter = check_filters(args.reverse_filter)
# Remove trailing whitespace from all the entries
entries = [e.strip() for e in args.entries]
@@ -145,10 +145,23 @@ def main():
def range_check(low, high):
- """\
- Verifies that that given range has a low lower than the high.
- If the condition is not met, a ValueError is raised.
- Otherwise the input is returned as-is.
+ """
+ Verify that that given range has a low lower than the high.
+
+ Parameters
+ ----------
+ low : {float, int}
+ high : {float, int}
+
+ Returns
+ -------
+ tuple : low, high
+
+ Raises
+ ------
+ ValueError
+ Low is greater than or equal to high.
+
"""
if low >= high:
raise ValueError("low >= high")
@@ -156,29 +169,58 @@ def range_check(low, high):
return low, high
-def check_filter(filt):
- """\
- Check that the low value of the filter is lower than the high.
- If there is to be no filter, return 'None'.
- If the condition is not met, a ValueError is raised.
- Otherwise, the values are returned as-is.
+def check_filters(filters):
+ """
+ Execute range_check for every element of an iterable.
+
+ Parameters
+ ----------
+ filters : iterable
+ The collection of filters to check. Each element
+ must be a two-element tuple of floats or ints.
+
+ Returns
+ -------
+ The input as-is, or None if it evaluates to False.
+
+ Raises
+ ------
+ ValueError
+ Low is greater than or equal to high for any element.
+
"""
- # Quick return if no filter.
- if not filt:
+ if not filters:
return None
try:
- return [range_check(f[0], f[1]) for f in filt]
+ return [range_check(f[0], f[1]) for f in filters]
except ValueError as err:
raise ValueError("Error in --filter: " + py23_str(err))
def keep_entry_range(entry, lows, highs, converter, regex):
- """\
- Boolean function to determine if an entry should be kept out
- based on if any numbers are in a given range.
+ """
+ Check if an entry falls into a desired range.
+
+ Every number in the entry will be extracted using *regex*,
+ if any are within a given low to high range the entry will
+ be kept.
+
+ Parameters
+ ----------
+ entry : str
+ lows : iterable
+ Collection of low values against which to compare the entry.
+ highs : iterable
+ Collection of high values against which to compare the entry.
+ converter : callable
+ Function to convert a string to a number.
+ regex : regex object
+ Regular expression to locate numbers in a string.
+
+ Returns
+ -------
+ True if the entry should be kept, False otherwise.
- Returns True if it should be kept (i.e. falls in the range),
- and False if it is not in the range and should not be kept.
"""
return any(
low <= converter(num) <= high
@@ -187,13 +229,27 @@ def keep_entry_range(entry, lows, highs, converter, regex):
)
-def exclude_entry(entry, values, converter, regex):
- """\
- Boolean function to determine if an entry should be kept out
- based on if it contains a specific number.
+def keep_entry_value(entry, values, converter, regex):
+ """
+ Check if an entry does not match a given value.
+
+ Every number in the entry will be extracted using *regex*,
+ if any match a given value the entry will not be kept.
+
+ Parameters
+ ----------
+ entry : str
+ values : iterable
+ Collection of values against which to compare the entry.
+ converter : callable
+ Function to convert a string to a number.
+ regex : regex object
+ Regular expression to locate numbers in a string.
+
+ Returns
+ -------
+ True if the entry should be kept, False otherwise.
- Returns True if it should be kept (i.e. does not match),
- and False if it matches and should not be kept.
"""
return not any(converter(num) in values for num in regex.findall(entry))
@@ -245,7 +301,7 @@ def sort_and_print_entries(entries, args):
entries = [
entry
for entry in entries
- if exclude_entry(entry, exclude, float, regex)
+ if keep_entry_value(entry, exclude, float, regex)
]
# Print off the sorted results
diff --git a/natsort/compat/fake_fastnumbers.py b/natsort/compat/fake_fastnumbers.py
index 68076a4..f1f8648 100644
--- a/natsort/compat/fake_fastnumbers.py
+++ b/natsort/compat/fake_fastnumbers.py
@@ -1,8 +1,7 @@
# -*- coding: utf-8 -*-
-"""\
+"""
This module is intended to replicate some of the functionality
-from the fastnumbers module in the event that module is not
-installed.
+from the fastnumbers module in the event that module is not installed.
"""
from __future__ import print_function, division, unicode_literals, absolute_import
@@ -38,6 +37,7 @@ NAN_INF = frozenset(NAN_INF)
ASCII_NUMS = "0123456789+-"
+# noinspection PyIncorrectDocstring
def fast_float(
x,
key=lambda x: x,
@@ -46,10 +46,25 @@ def fast_float(
nan_inf=NAN_INF,
_first_char=frozenset(decimal_chars + list(ASCII_NUMS + ".")),
):
- """\
+ """
Convert a string to a float quickly, return input as-is if not possible.
+
We don't need to accept all input that the real fast_int accepts because
- the input will be controlled by the splitting algorithm.
+ natsort is controlling what is passed to this function.
+
+ Parameters
+ ----------
+ x : str
+ String to attempt to convert to a float.
+ key : callable
+ Single-argument function to apply to *x* if conversion fails.
+ nan : object
+ Value to return instead of NaN if NaN would be returned.
+
+ Returns
+ -------
+ *str* or *float*
+
"""
if x[0] in _first_char or x.lstrip()[:3] in nan_inf:
try:
@@ -67,6 +82,7 @@ def fast_float(
return key(x)
+# noinspection PyIncorrectDocstring
def fast_int(
x,
key=lambda x: x,
@@ -74,10 +90,23 @@ def fast_int(
uni=unicodedata.digit,
_first_char=frozenset(decimal_chars + list(ASCII_NUMS)),
):
- """\
+ """
Convert a string to a int quickly, return input as-is if not possible.
+
We don't need to accept all input that the real fast_int accepts because
- the input will be controlled by the splitting algorithm.
+ natsort is controlling what is passed to this function.
+
+ Parameters
+ ----------
+ x : str
+ String to attempt to convert to an int.
+ key : callable
+ Single-argument function to apply to *x* if conversion fails.
+
+ Returns
+ -------
+ *str* or *int*
+
"""
del nan # explicitly indicate we are not using the nan argument
if x[0] in _first_char:
diff --git a/natsort/compat/fastnumbers.py b/natsort/compat/fastnumbers.py
index 3c15acc..3197527 100644
--- a/natsort/compat/fastnumbers.py
+++ b/natsort/compat/fastnumbers.py
@@ -1,4 +1,8 @@
# -*- coding: utf-8 -*-
+"""
+Interface for natsort to access fastnumbers functions without
+having to worry if it is actually installed.
+"""
from __future__ import print_function, division, unicode_literals, absolute_import
from distutils.version import StrictVersion
diff --git a/natsort/compat/locale.py b/natsort/compat/locale.py
index a31a42c..45f5bc4 100644
--- a/natsort/compat/locale.py
+++ b/natsort/compat/locale.py
@@ -1,4 +1,8 @@
# -*- coding: utf-8 -*-
+"""
+Interface for natsort to access locale functionality without
+having to worry about if it is using PyICU or the built-in locale.
+"""
from __future__ import print_function, division, unicode_literals, absolute_import
# Std. lib imports.
diff --git a/natsort/compat/py23.py b/natsort/compat/py23.py
index 7d1bad2..bc70d65 100644
--- a/natsort/compat/py23.py
+++ b/natsort/compat/py23.py
@@ -1,4 +1,10 @@
# -*- coding: utf-8 -*-
+"""
+Compatibility layer for Python 2 and Python 3.
+
+Probably could have used six...
+This file will light up most linters...
+"""
from __future__ import print_function, division, unicode_literals, absolute_import
import functools
diff --git a/natsort/natsort.py b/natsort/natsort.py
index 66c663e..d51630c 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -1,14 +1,9 @@
# -*- coding: utf-8 -*-
"""
-Natsort can sort strings with numbers in a natural order.
-It provides the natsorted function to sort strings with
-arbitrary numbers.
+Along with ns_enum.py, this module contains all of the
+natsort public API.
-You can mix types with natsorted. This can get around the new
-'unorderable types' issue with Python 3. Natsort will recursively
-descend into lists of lists so you can sort by the sublist contents.
-
-See the README or the natsort homepage for more details.
+The majority of the "work" is defined in utils.py.
"""
from __future__ import print_function, division, unicode_literals, absolute_import
@@ -37,9 +32,6 @@ from natsort.utils import (
_final_data_transform_factory,
)
-# Make sure the doctest works for either python2 or python3
-__doc__ = u_format(__doc__)
-
@u_format
def decoder(encoding):
@@ -48,12 +40,12 @@ def decoder(encoding):
Parameters
----------
- encoding: str
+ encoding : str
The codec to use for decoding. This must be a valid unicode codec.
Returns
-------
- decode_function:
+ decode_function
A function that takes a single argument and attempts to decode
it using the supplied codec. Any `UnicodeErrors` are raised.
If the argument was not of `bytes` type, it is simply returned
@@ -90,12 +82,11 @@ def as_ascii(s):
Parameters
----------
- s:
- Any object.
+ s : object
Returns
-------
- output:
+ output
If the input was of type `bytes`, the return value is a `str` decoded
with the ASCII codec. Otherwise, the return value is identically the
input.
@@ -115,12 +106,11 @@ def as_utf8(s):
Parameters
----------
- s:
- Any object.
+ s : object
Returns
-------
- output:
+ output
If the input was of type `bytes`, the return value is a `str` decoded
with the UTF-8 codec. Otherwise, the return value is identically the
input.
@@ -142,12 +132,11 @@ def natsort_key(val, key=None, alg=0, **_kwargs):
@u_format
def natsort_keygen(key=None, alg=0, **_kwargs):
- """\
+ """
Generate a key to sort strings and numbers naturally.
- Generate a key to sort strings and numbers naturally,
- not lexicographically. This key is designed for use as the
- `key` argument to functions such as the `sorted` builtin.
+ This key is designed for use as the `key` argument to
+ functions such as the `sorted` builtin.
The user may customize the generated function with the
arguments to `natsort_keygen`, including an optional
@@ -239,17 +228,13 @@ def natsort_keygen(key=None, alg=0, **_kwargs):
@u_format
def natsorted(seq, key=None, reverse=False, alg=0, **_kwargs):
- """\
+ """
Sorts an iterable naturally.
- Sorts an iterable naturally (alphabetically and numerically),
- not lexicographically. Returns a list containing a sorted copy
- of the iterable.
-
Parameters
----------
seq : iterable
- The iterable to sort.
+ The input to sort.
key : callable, optional
A key used to determine how to sort each element of the iterable.
@@ -268,7 +253,7 @@ def natsorted(seq, key=None, reverse=False, alg=0, **_kwargs):
Returns
-------
out: list
- The sorted sequence.
+ The sorted input.
See Also
--------
@@ -286,13 +271,13 @@ def natsorted(seq, key=None, reverse=False, alg=0, **_kwargs):
[{u}'num2', {u}'num3', {u}'num5']
"""
- natsort_key = natsort_keygen(key, alg, **_kwargs)
- return sorted(seq, reverse=reverse, key=natsort_key)
+ key = natsort_keygen(key, alg, **_kwargs)
+ return sorted(seq, reverse=reverse, key=key)
@u_format
def versorted(seq, key=None, reverse=False, alg=0, **_kwargs):
- """\
+ """
Identical to :func:`natsorted`.
This function exists for backwards compatibility with `natsort`
@@ -308,17 +293,15 @@ def versorted(seq, key=None, reverse=False, alg=0, **_kwargs):
@u_format
def humansorted(seq, key=None, reverse=False, alg=0):
- """\
+ """
Convenience function to properly sort non-numeric characters.
- Convenience function to properly sort non-numeric characters
- in a locale-aware fashion (a.k.a "human sorting"). This is a
- wrapper around ``natsorted(seq, alg=ns.LOCALE)``.
+ This is a wrapper around ``natsorted(seq, alg=ns.LOCALE)``.
Parameters
----------
seq : iterable
- The sequence to sort.
+ The input to sort.
key : callable, optional
A key used to determine how to sort each element of the sequence.
@@ -337,7 +320,7 @@ def humansorted(seq, key=None, reverse=False, alg=0):
Returns
-------
out : list
- The sorted sequence.
+ The sorted input.
See Also
--------
@@ -363,11 +346,10 @@ def humansorted(seq, key=None, reverse=False, alg=0):
@u_format
def realsorted(seq, key=None, reverse=False, alg=0):
- """\
+ """
Convenience function to properly sort signed floats.
- Convenience function to properly sort signed floats within
- strings (i.e. "a-5.7"). This is a wrapper around
+ A signed float in a string could be "a-5.7". This is a wrapper around
``natsorted(seq, alg=ns.REAL)``.
The behavior of :func:`realsorted` for `natsort` version >= 4.0.0
@@ -377,7 +359,7 @@ def realsorted(seq, key=None, reverse=False, alg=0):
Parameters
----------
seq : iterable
- The sequence to sort.
+ The input to sort.
key : callable, optional
A key used to determine how to sort each element of the sequence.
@@ -396,7 +378,7 @@ def realsorted(seq, key=None, reverse=False, alg=0):
Returns
-------
out : list
- The sorted sequence.
+ The sorted input.
See Also
--------
@@ -418,18 +400,18 @@ def realsorted(seq, key=None, reverse=False, alg=0):
@u_format
def index_natsorted(seq, key=None, reverse=False, alg=0, **_kwargs):
- """\
- Return the list of the indexes used to sort the input sequence.
+ """
+ Determine the list of the indexes used to sort the input sequence.
Sorts a sequence naturally, but returns a list of sorted the
- indexes and not the sorted list. This list of indexes can be
- used to sort multiple lists by the sorted order of the given
- sequence.
+ indexes and not the sorted list itself. This list of indexes
+ can be used to sort multiple lists by the sorted order of the
+ given sequence.
Parameters
----------
seq : iterable
- The sequence to sort.
+ The input to sort.
key : callable, optional
A key used to determine how to sort each element of the sequence.
@@ -448,7 +430,7 @@ def index_natsorted(seq, key=None, reverse=False, alg=0, **_kwargs):
Returns
-------
out : tuple
- The ordered indexes of the sequence.
+ The ordered indexes of the input.
See Also
--------
@@ -488,7 +470,7 @@ def index_natsorted(seq, key=None, reverse=False, alg=0, **_kwargs):
@u_format
def index_versorted(seq, key=None, reverse=False, alg=0, **_kwargs):
- """\
+ """
Identical to :func:`index_natsorted`.
This function exists for backwards compatibility with
@@ -507,21 +489,13 @@ def index_versorted(seq, key=None, reverse=False, alg=0, **_kwargs):
@u_format
def index_humansorted(seq, key=None, reverse=False, alg=0):
- """\
- Return the list of the indexes used to sort the input sequence
- in a locale-aware manner.
-
- Sorts a sequence in a locale-aware manner, but returns a list
- of sorted the indexes and not the sorted list. This list of
- indexes can be used to sort multiple lists by the sorted order
- of the given sequence.
-
+ """
This is a wrapper around ``index_natsorted(seq, alg=ns.LOCALE)``.
Parameters
----------
seq: iterable
- The sequence to sort.
+ The input to sort.
key: callable, optional
A key used to determine how to sort each element of the sequence.
@@ -540,7 +514,7 @@ def index_humansorted(seq, key=None, reverse=False, alg=0):
Returns
-------
out : tuple
- The ordered indexes of the sequence.
+ The ordered indexes of the input.
See Also
--------
@@ -565,25 +539,13 @@ def index_humansorted(seq, key=None, reverse=False, alg=0):
@u_format
def index_realsorted(seq, key=None, reverse=False, alg=0):
- """\
- Return the list of the indexes used to sort the input sequence
- in a locale-aware manner.
-
- Sorts a sequence in a locale-aware manner, but returns a list
- of sorted the indexes and not the sorted list. This list of
- indexes can be used to sort multiple lists by the sorted order
- of the given sequence.
-
+ """
This is a wrapper around ``index_natsorted(seq, alg=ns.REAL)``.
- The behavior of :func:`index_realsorted` in `natsort` version >= 4.0.0
- was the default behavior of :func:`index_natsorted` for `natsort`
- version < 4.0.0.
-
Parameters
----------
seq: iterable
- The sequence to sort.
+ The input to sort.
key: callable, optional
A key used to determine how to sort each element of the sequence.
@@ -602,7 +564,7 @@ def index_realsorted(seq, key=None, reverse=False, alg=0):
Returns
-------
out : tuple
- The ordered indexes of the sequence.
+ The ordered indexes of the input.
See Also
--------
@@ -623,7 +585,7 @@ def index_realsorted(seq, key=None, reverse=False, alg=0):
@u_format
def order_by_index(seq, index, iter=False):
- """\
+ """
Order a given sequence by an index sequence.
The output of `index_natsorted` is a
@@ -683,6 +645,7 @@ def order_by_index(seq, index, iter=False):
if float(sys.version[:3]) < 3:
# pylint: disable=unused-variable
+ # noinspection PyUnresolvedReferences,PyPep8Naming
class natcmp(object):
"""
Compare two objects using a key and an algorithm.
diff --git a/natsort/ns_enum.py b/natsort/ns_enum.py
index 54ca133..d601219 100644
--- a/natsort/ns_enum.py
+++ b/natsort/ns_enum.py
@@ -1,5 +1,8 @@
# -*- coding: utf-8 -*-
-"""This module defines the "ns" enum for natsort."""
+"""
+This module defines the "ns" enum for natsort is used to determine
+what algorithm natsort uses.
+"""
from __future__ import print_function, division, unicode_literals, absolute_import
diff --git a/natsort/unicode_numbers.py b/natsort/unicode_numbers.py
index 86172df..a2de65e 100644
--- a/natsort/unicode_numbers.py
+++ b/natsort/unicode_numbers.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
-Contains all possible non-ASCII unicode numbers.
+Pre-determine the collection of unicode decimals, digits, and numerals.
"""
from __future__ import print_function, division, unicode_literals, absolute_import
diff --git a/natsort/unicode_numeric_hex.py b/natsort/unicode_numeric_hex.py
index 0a74144..56c69d6 100644
--- a/natsort/unicode_numeric_hex.py
+++ b/natsort/unicode_numeric_hex.py
@@ -1,9 +1,10 @@
# -*- coding: utf-8 -*-
"""
-Rather than determine what unicode characters are numeric on the fly which
-would incur a startup runtime penalty, the hex values are hard-coded below.
+Contains all possible non-ASCII unicode numbers.
"""
+# Rather than determine what unicode characters are numeric on the fly which
+# would incur a startup runtime penalty, the hex values are hard-coded below.
numeric_hex = (
0XB2,
0XB3,
diff --git a/natsort/utils.py b/natsort/utils.py
index 0da401a..176d75c 100644
--- a/natsort/utils.py
+++ b/natsort/utils.py
@@ -27,8 +27,8 @@ have signatures similar to the following
>>> def factory(parameter):
... val = 'yes' if parameter else 'no'
- ... def closure(x, val=val):
- ... return '{} {}'.format(val, x)
+ ... def closure(x, _val=val):
+ ... return '{} {}'.format(_val, x)
... return closure
...
@@ -56,7 +56,14 @@ from natsort.ns_enum import ns
from natsort.unicode_numbers import numeric_no_decimals, digits_no_decimals
from natsort.compat.pathlib import PurePath, has_pathlib
from natsort.compat.locale import get_strxfrm, get_thousands_sep, get_decimal_point
-from natsort.compat.py23 import py23_str, py23_map, py23_filter, PY_VERSION, NEWPY
+from natsort.compat.py23 import (
+ u_format,
+ py23_str,
+ py23_map,
+ py23_filter,
+ PY_VERSION,
+ NEWPY,
+)
from natsort.compat.fastnumbers import fast_float, fast_int
if PY_VERSION >= 3:
@@ -99,12 +106,26 @@ _regex_chooser = {
def _no_op(x):
- """A function that does nothing."""
+ """A function that does nothing and returns the input as-is."""
return x
def _normalize_input_factory(alg):
- """Create a function that will normalize unicode input data."""
+ """
+ Create a function that will normalize unicode input data.
+
+ Parameters
+ ----------
+ alg : ns enum
+ Used to indicate how to normalize unicode.
+
+ Returns
+ -------
+ func : callable
+ A function that accepts string (unicode) input and returns the
+ the input normalized with the desired normalization scheme.
+
+ """
normalization_form = "NFKD" if alg & ns.COMPATIBILITYNORMALIZE else "NFD"
if NEWPY:
@@ -122,25 +143,45 @@ def _normalize_input_factory(alg):
def _natsort_key(val, key, string_func, bytes_func, num_func):
- """\
+ """
Key to sort strings and numbers naturally.
- It works by separating out the numbers from the strings. This function for
- internal use only. See the natsort_keygen documentation for details of each
- parameter.
+ It works by splitting the string into components of strings and numbers,
+ and then converting the numbers into actual ints or floats.
Parameters
----------
val : str | unicode
key : callable | None
+ A key to apply to the *val* before any other operations are performed.
string_func : callable
+ If *val* (or the output of *key* if given) is of type *str*, this
+ function will be applied to it. The function must return
+ a tuple.
bytes_func : callable
+ If *val* (or the output of *key* if given) is of type *bytes*, this
+ function will be applied to it. The function must return
+ a tuple.
num_func : callable
+ If *val* (or the output of *key* if given) is not of type *bytes*,
+ *str*, nor is iterable, this function will be applied to it.
+ The function must return a tuple.
Returns
-------
out : tuple
- The modified value with numbers extracted.
+ The string split into its string and numeric components.
+ It *always* starts with a string, and then alternates
+ between numbers and strings (unless it was applied
+ recursively, in which case it will return tuples of tuples,
+ but the lowest-level tuples will then *always* start with
+ a string etc.).
+
+ See Also
+ --------
+ _parse_string_factory
+ _parse_bytes_factory
+ _parse_number_factory
"""
@@ -170,7 +211,26 @@ def _natsort_key(val, key, string_func, bytes_func, num_func):
def _parse_bytes_factory(alg):
- """Create a function that will format a bytes string in a tuple."""
+ """
+ Create a function that will format a *bytes* object into a tuple.
+
+ Parameters
+ ----------
+ alg : ns enum
+ Indicate how to format the *bytes*.
+
+ Returns
+ -------
+ func : callable
+ A function that accepts *bytes* input and returns a tuple
+ with the formatted *bytes*. Intended to be used as the
+ *bytes_func* argument to *_natsort_key*.
+
+ See Also
+ --------
+ _natsort_key
+
+ """
# We don't worry about ns.UNGROUPLETTERS | ns.LOCALEALPHA because
# bytes cannot be compared to strings.
if alg & ns.PATH and alg & ns.IGNORECASE:
@@ -184,7 +244,34 @@ def _parse_bytes_factory(alg):
def _parse_number_factory(alg, sep, pre_sep):
- """Create a function that will properly format a number in a tuple."""
+ """
+ Create a function that will format a number into a tuple.
+
+ Parameters
+ ----------
+ alg : ns enum
+ Indicate how to format the *bytes*.
+ sep : str
+ The string character to be inserted before the number
+ in the returned tuple.
+ pre_sep : str
+ In the event that *alg* contains ``UNGROUPLETTERS``, this
+ string will be placed in a single-element tuple at the front
+ of the returned nested tuple.
+
+ Returns
+ -------
+ func : callable
+ A function that accepts numeric input (e.g. *int* or *float*)
+ and returns a tuple containing the number with the leading string
+ *sep*. Intended to be used as the *num_func* argument to
+ *_natsort_key*.
+
+ See Also
+ --------
+ _natsort_key
+
+ """
nan_replace = float("+inf") if alg & ns.NANLAST else float("-inf")
def func(val, nan_replace=nan_replace, sep=sep):
@@ -205,7 +292,50 @@ def _parse_number_factory(alg, sep, pre_sep):
def _parse_string_factory(
alg, sep, splitter, input_transform, component_transform, final_transform
):
- """Create a function that will properly split and format a string."""
+ """
+ Create a function that will split and format a *str* into a tuple.
+
+ Parameters
+ ----------
+ alg : ns enum
+ Indicate how to format and split the *str*.
+ sep : str
+ The string character to be inserted between adjacent numeric
+ objects in the returned tuple.
+ splitter : callable
+ A function the will accept a string and returns an iterable
+ of strings where the numbers are separated from the non-numbers.
+ input_transform : callable
+ A function to apply to the string input *before* applying
+ the *splitter* function. Must return a string.
+ component_transform : callable
+ A function that is operated elementwise on the output of
+ *splitter*. It must accept a single string and return either
+ a string or a number.
+ final_transform : callable
+ A function to operate on the return value as a whole. It
+ must accept a tuple and a string argument - the tuple
+ should be the result of applying the above functions, and the
+ string is the original input value. It must return a tuple.
+
+ Returns
+ -------
+ func : callable
+ A function that accepts string input and returns a tuple
+ containing the string split into numeric and non-numeric
+ components, where the numeric components are converted into
+ numeric objects. The first element is *always* a string,
+ and then alternates number then string. Intended to be
+ used as the *string_func* argument to *_natsort_key*.
+
+ See Also
+ --------
+ _natsort_key
+ _input_string_transform_factory
+ _string_component_transform_factory
+ _final_data_transform_factory
+
+ """
# Sometimes we store the "original" input before transformation,
# sometimes after.
orig_after_xfrm = not (alg & ns._DUMB and alg & ns.LOCALEALPHA)
@@ -228,17 +358,53 @@ def _parse_string_factory(
def _parse_path_factory(str_split):
- """Create a function that will properly split and format a path."""
+ """
+ Create a function that will properly split and format a path.
+
+ Parameters
+ ----------
+ str_split : callable
+ The output of the *_parse_string_factory* function.
+
+ Returns
+ -------
+ func : callable
+ A function that accepts a string or path-like object
+ and splits it into its path components, then passes
+ each component to *str_split* and returns the result
+ as a nested tuple. Can be used as the *string_func*
+ argument to *_natsort_key*.
+
+ See Also
+ --------
+ _natsort_key
+ _parse_string_factory
+
+ """
return lambda x: tuple(py23_map(str_split, _path_splitter(x)))
def _sep_inserter(iterable, sep):
- """Insert '' between numbers."""
+ """
+ Insert '' between numbers in an iterable.
- # Get the first element. If StopIteration is raised, that's OK.
- # Since we are controlling the types of the input, 'type' is used
- # instead of 'isinstance' for the small speed advantage it offers.
+ Parameters
+ ----------
+ iterable
+ sep : str
+ The string character to be inserted between adjacent numeric objects.
+
+ Yields
+ ------
+ The values of *iterable* in order, with *sep* inserted where adjacent
+ elements are numeric. If the first element in the input is numeric
+ then *sep* will be the first value yielded.
+
+ """
try:
+ # Get the first element. A StopIteration indicates an empty iterable.
+ # Since we are controlling the types of the input, 'type' is used
+ # instead of 'isinstance' for the small speed advantage it offers.
types = (int, float, long)
first = next(iterable)
if type(first) in types:
@@ -265,8 +431,23 @@ def _sep_inserter(iterable, sep):
def _input_string_transform_factory(alg):
"""
- Given a set of natsort algorithms, return the function to operate
- on the pre-split input string according to the user's request.
+ Create a function to transform a string.
+
+ Parameters
+ ----------
+ alg : ns enum
+ Indicate how to format the *str*.
+
+ Returns
+ -------
+ func : callable
+ A function to be used as the *input_transform* argument to
+ *_parse_string_factory*.
+
+ See Also
+ --------
+ _parse_string_factory
+
"""
# Shortcuts.
lowfirst = alg & ns.LOWERCASEFIRST
@@ -324,8 +505,23 @@ def _input_string_transform_factory(alg):
def _string_component_transform_factory(alg):
"""
- Given a set of natsort algorithms, return the function to operate
- on the post-split strings according to the user's request.
+ Create a function to either transform a string or convert to a number.
+
+ Parameters
+ ----------
+ alg : ns enum
+ Indicate how to format the *str*.
+
+ Returns
+ -------
+ func : callable
+ A function to be used as the *component_transform* argument to
+ *_parse_string_factory*.
+
+ See Also
+ --------
+ _parse_string_factory
+
"""
# Shortcuts.
use_locale = alg & ns.LOCALEALPHA
@@ -351,8 +547,23 @@ def _string_component_transform_factory(alg):
def _final_data_transform_factory(alg, sep, pre_sep):
"""
- Given a set of natsort algorithms, return the function to operate
- on the post-parsed strings according to the user's request.
+ Create a function to transform a tuple.
+
+ Parameters
+ ----------
+ alg : ns enum
+ Indicate how to format the *str*.
+
+ Returns
+ -------
+ func : callable
+ A function to be used as the *final_transform* argument to
+ *_parse_string_factory*.
+
+ See Also
+ --------
+ _parse_string_factory
+
"""
if alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA:
swap = alg & ns._DUMB and alg & ns.LOWERCASEFIRST
@@ -378,8 +589,26 @@ def _final_data_transform_factory(alg, sep, pre_sep):
return lambda split_val, val: tuple(split_val)
+@u_format
def _groupletters(x, _low=methodcaller("casefold" if NEWPY else "lower")):
- """Double all characters, making doubled letters lowercase."""
+ """
+ Double all characters, making doubled letters lowercase.
+
+ Parameters
+ ----------
+ x : str
+
+ Returns
+ -------
+ str
+
+ Examples
+ --------
+
+ >>> _groupletters("Apple")
+ {u}'aAppppllee'
+
+ """
return "".join(ichain.from_iterable((_low(y), y) for y in x))
@@ -397,7 +626,8 @@ def chain_functions(functions):
Returns
-------
- A single argument function.
+ func : callable
+ A single argument function.
Examples
--------
@@ -420,7 +650,22 @@ def chain_functions(functions):
def _do_decoding(s, encoding):
- """A function to decode a bytes string, or return the object as-is."""
+ """
+ Helper to decode a *bytes* object, or return the object as-is.
+
+ Parameters
+ ----------
+ s : bytes | object
+ encoding : str
+ The encoding to use to decode *s*.
+
+ Returns
+ -------
+ decoded
+ *str* if *s* was *bytes* and the decoding was successful.
+ *s* if *s* was not *bytes*.
+
+ """
try:
return s.decode(encoding)
except UnicodeError:
@@ -429,9 +674,29 @@ def _do_decoding(s, encoding):
return s
+@u_format
def _path_splitter(s, _d_match=re.compile(r"\.\d").match):
- """Split a string into its path components. Assumes a string is a path."""
- # If a PathLib Object, use it's functionality to perform the split.
+ """
+ Split a string into its path components.
+
+ Assumes a string is a path or is path-like.
+
+ Parameters
+ ----------
+ s : str
+
+ Returns
+ -------
+ split : tuple
+ The path split by directory components and extensions.
+
+ Examples
+ --------
+
+ >>> tuple(_path_splitter("/this/thing.ext"))
+ ({u}'/', {u}'this', {u}'thing', {u}'.ext')
+
+ """
if has_pathlib and isinstance(s, PurePath):
s = py23_str(s)
path_parts = deque()
@@ -474,7 +739,11 @@ def _path_splitter(s, _d_match=re.compile(r"\.\d").match):
def _args_to_enum(**kwargs):
- """A function to convert input booleans to an enum-type argument."""
+ """
+ A function to convert input booleans to an enum-type argument.
+
+ For internal use only - will be deprecated in a future release.
+ """
alg = 0
keys = ("number_type", "signed", "exp", "as_path", "py3_safe")
if any(x not in keys for x in kwargs):
diff --git a/test_natsort/test_main.py b/test_natsort/test_main.py
index 85b7151..5f1e252 100644
--- a/test_natsort/test_main.py
+++ b/test_natsort/test_main.py
@@ -12,9 +12,9 @@ from hypothesis.strategies import integers, floats, lists, data
from natsort.__main__ import (
main,
range_check,
- check_filter,
+ check_filters,
keep_entry_range,
- exclude_entry,
+ keep_entry_value,
sort_and_print_entries,
)
@@ -221,43 +221,43 @@ def test_range_check_raises_ValueError_if_second_is_less_than_first(x, data):
assert str(err.value) == "low >= high"
-def test_check_filter_returns_None_if_filter_evaluates_to_False():
- assert check_filter(()) is None
- assert check_filter(False) is None
- assert check_filter(None) is None
+def test_check_filters_returns_None_if_filter_evaluates_to_False():
+ assert check_filters(()) is None
+ assert check_filters(False) is None
+ assert check_filters(None) is None
-def test_check_filter_returns_input_as_is_if_filter_is_valid_example():
- assert check_filter([(6, 7)]) == [(6, 7)]
- assert check_filter([(6, 7), (2, 8)]) == [(6, 7), (2, 8)]
+def test_check_filters_returns_input_as_is_if_filter_is_valid_example():
+ assert check_filters([(6, 7)]) == [(6, 7)]
+ assert check_filters([(6, 7), (2, 8)]) == [(6, 7), (2, 8)]
@given(
x=lists(integers(), min_size=1), data=data()
) # Defer data selection for y till test is run.
-def test_check_filter_returns_input_as_is_if_filter_is_valid(x, data):
+def test_check_filters_returns_input_as_is_if_filter_is_valid(x, data):
y = [
data.draw(integers(min_value=val + 1)) for val in x
] # ensure y is element-wise greater than x
- assert check_filter(list(zip(x, y))) == [(i, j) for i, j in zip(x, y)]
+ assert check_filters(list(zip(x, y))) == [(i, j) for i, j in zip(x, y)]
-def test_check_filter_raises_ValueError_if_filter_is_invalid_example():
+def test_check_filters_raises_ValueError_if_filter_is_invalid_example():
with raises(ValueError) as err:
- check_filter([(7, 2)])
+ check_filters([(7, 2)])
assert str(err.value) == "Error in --filter: low >= high"
@given(
x=lists(integers(), min_size=1), data=data()
) # Defer data selection for y till test is run.
-def test_check_filter_raises_ValueError_if_filter_is_invalid(x, data):
+def test_check_filters_raises_ValueError_if_filter_is_invalid(x, data):
y = [
data.draw(integers(max_value=val)) for val in x
] # ensure y is element-wise less than or equal to x
with raises(ValueError) as err:
- check_filter(list(zip(x, y)))
+ check_filters(list(zip(x, y)))
assert str(err.value) == "Error in --filter: low >= high"
@@ -273,9 +273,9 @@ def test_keep_entry_range_returns_False_if_no_portion_of_input_is_between_the_ra
assert not keep_entry_range("a56b23c89", [1], [20], int, re.compile(r"\d+"))
-def test_exclude_entry_returns_True_if_exlcude_parameters_are_not_in_input_example():
- assert exclude_entry("a56b23c89", [100, 45], int, re.compile(r"\d+"))
+def test_keep_entry_value_returns_True_if_exlcude_parameters_are_not_in_input_example():
+ assert keep_entry_value("a56b23c89", [100, 45], int, re.compile(r"\d+"))
-def test_exclude_entry_returns_False_if_exlcude_parameters_are_in_input_example():
- assert not exclude_entry("a56b23c89", [23], int, re.compile(r"\d+"))
+def test_keep_entry_value_returns_False_if_exlcude_parameters_are_in_input_example():
+ assert not keep_entry_value("a56b23c89", [23], int, re.compile(r"\d+"))