Added an _ungroupletters function.

This function contains the logic that was in-place within the _natsort_key function. The main _natsort_key logic is easier to follow with this extra layer of abstraction.
author: Seth M Morton <seth.m.morton@gmail.com> 2016-04-13 22:09:42 -0700
committer: Seth M Morton <seth.m.morton@gmail.com> 2016-04-13 22:13:58 -0700
commit: a7f9fbced3b6d2fce7d9333fe5e33fed64c7d307 (patch)
tree: 8054488c6edf5ae1dc2d700da2c545ddece76470
parent: 3eff7be98abf8c6359f3c16254cd503397d24295 (diff)
download: natsort-a7f9fbced3b6d2fce7d9333fe5e33fed64c7d307.tar.gz
2 files changed, 60 insertions, 15 deletions
diff --git a/natsort/utils.py b/natsort/utils.py
index 59f1f93..24eafe2 100644
--- a/natsort/utils.py
+++ b/natsort/utils.py
@@ -180,21 +180,9 @@ def _natsort_key(val, key, alg):
             # Handle NaN.
             if any(x != x for x in ret):
                 ret = _fix_nan(ret, alg)
-            # For UNGROUPLETTERS, so the high level grouping can occur
-            # based on the first letter of the string.
-            # Do no locale transformation of the characters.
             if use_locale and alg & ns.UNGROUPLETTERS:
-                if not ret:
-                    return (ret, ret)
-                elif ret[0] == null_string:
-                    return ((b'' if use_pyicu else '',), ret)
-                elif dumb:  # pragma: no cover
-                    if lowfirst:
-                        return ((orig_val[0].swapcase(),), ret)
-                    else:
-                        return ((orig_val[0],), ret)
-                else:
-                    return ((val[0],), ret)
+                val = orig_val if (alg & ns._DUMB) else val
+                return _ungroupletters(ret, val, alg)
             else:
                 return ret
         except (TypeError, AttributeError):
@@ -272,8 +260,11 @@ def _pre_split_function(alg):
     Given a set of natsort algorithms, return the function to operate
     on the pre-split input string according to the user's request.
     """
+    # Shortcuts.
     lowfirst = alg & ns.LOWERCASEFIRST
     dumb = alg & ns._DUMB
+
+    # Build the chain of functions to execute in order.
     function_chain = []
     if (dumb and not lowfirst) or (lowfirst and not dumb):
         function_chain.append(methodcaller('swapcase'))
@@ -282,9 +273,28 @@ def _pre_split_function(alg):
             function_chain.append(methodcaller('casefold'))
         else:
             function_chain.append(methodcaller('lower'))
+
+    # Return the chained functions.
     return _chain_functions(function_chain)
 
 
+def _ungroupletters(split_val, val, alg):
+    """
+    Return a tuple with the first character of the first element
+    of the return value as the first element, and the return value
+    as the second element. This will be used to perform gross sorting
+    by the first letter.
+    """
+    if not split_val:
+        return ((), ())
+    elif split_val[0] == null_string:
+        return ((b'' if use_pyicu else '',), split_val)
+    elif alg & ns._DUMB and alg & ns.LOWERCASEFIRST:
+        return ((val[0].swapcase(),), split_val)
+    else:
+        return ((val[0],), split_val)
+
+
 def _chain_functions(functions):
     """Chain a list of single-argument functions together and return"""
     def func(x, _functions=functions):
diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py
index 9614cf0..3c589c5 100644
--- a/test_natsort/test_utils.py
+++ b/test_natsort/test_utils.py
@@ -7,7 +7,7 @@ import locale
 import pathlib
 import pytest
 import string
-from math import isnan
+from math import isnan, isinf
 from operator import itemgetter, neg as op_neg
 from itertools import chain
 from pytest import raises
@@ -28,6 +28,7 @@ from natsort.utils import (
     _fix_nan,
     _chain_functions,
     _pre_split_function,
+    _ungroupletters,
 )
 from natsort.locale_help import locale_convert
 from natsort.compat.py23 import py23_str
@@ -246,6 +247,40 @@ def test_pre_split_function_performs_swapcase_and_casefold_both_LOWERCASEFIRST_A
         assert _pre_split_function(ns.IGNORECASE | ns.LOWERCASEFIRST)(x) == x.swapcase().lower()
 
 
+def test_ungroupletters_with_empty_tuple_returns_double_empty_tuple():
+    assert _ungroupletters((), '', 0) == ((), ())
+
+
+def test_ungroupletters_with_null_string_first_element_adds_empty_string_on_first_tuple_element():
+    assert _ungroupletters((null_string, 60), '', 0) == ((b'',) if use_pyicu else ('',), (null_string, 60))
+
+
+def test_ungroupletters_returns_first_element_in_first_tuple_element_example():
+    assert _ungroupletters(('this', 60), 'this60', 0) == (('t',), ('this', 60))
+
+
+@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
+@given(x=text(), y=floats() | integers())
+def test_ungroupletters_returns_first_element_in_first_tuple_element(x, y):
+    assume(x)
+    assume(not isnan(y))
+    assume(not isinf(y))
+    assert _ungroupletters((x, y), ''.join(map(str, [x, y])), 0) == ((x[0],), (x, y))
+
+
+def test_ungroupletters_returns_first_element_in_first_tuple_element_caseswapped_with_DUMB_and_LOWERCASEFIRST_example():
+    assert _ungroupletters(('this', 60), 'this60', ns._DUMB | ns.LOWERCASEFIRST) == (('T',), ('this', 60))
+
+
+@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
+@given(x=text(), y=floats() | integers())
+def test_ungroupletters_returns_first_element_in_first_tuple_element_caseswapped_with_DUMB_and_LOWERCASEFIRST(x, y):
+    assume(x)
+    assume(not isnan(y))
+    assume(not isinf(y))
+    assert _ungroupletters((x, y), ''.join(map(str, [x, y])), ns._DUMB | ns.LOWERCASEFIRST) == ((x[0].swapcase(),), (x, y))
+
+
 # Each test has an "example" version for demonstrative purposes,
 # and a test that uses the hypothesis module.
author	Seth M Morton <seth.m.morton@gmail.com>	2016-04-13 22:09:42 -0700
committer	Seth M Morton <seth.m.morton@gmail.com>	2016-04-13 22:13:58 -0700
commit	a7f9fbced3b6d2fce7d9333fe5e33fed64c7d307 (patch)
tree	8054488c6edf5ae1dc2d700da2c545ddece76470
parent	3eff7be98abf8c6359f3c16254cd503397d24295 (diff)
download	natsort-a7f9fbced3b6d2fce7d9333fe5e33fed64c7d307.tar.gz