summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth M Morton <seth.m.morton@gmail.com>2018-02-08 23:10:30 -0800
committerSeth M Morton <seth.m.morton@gmail.com>2018-02-10 13:25:08 -0800
commitd432715ef68470a45561e14c9e3e194bef97e525 (patch)
tree9574eb725ec6a689207d34b330eebbfdfaf27f2b
parent48b50476044860764f2e4cd1f76e0e152940abdc (diff)
downloadnatsort-d432715ef68470a45561e14c9e3e194bef97e525.tar.gz
Add code to support placing numbers after non-numbers.
All infrastructure has been added, but no tests have been yet been inserted to verify that the code is working properly. That will come in the next commit.
-rw-r--r--natsort/compat/locale.py30
-rw-r--r--natsort/natsort.py18
-rw-r--r--natsort/ns_enum.py4
-rw-r--r--natsort/utils.py10
-rw-r--r--test_natsort/test_final_data_transform_factory.py20
-rw-r--r--test_natsort/test_natsort_key.py10
-rw-r--r--test_natsort/test_parse_number_function.py20
-rw-r--r--test_natsort/test_utils.py1
8 files changed, 76 insertions, 37 deletions
diff --git a/natsort/compat/locale.py b/natsort/compat/locale.py
index cbed495..a1cfa5a 100644
--- a/natsort/compat/locale.py
+++ b/natsort/compat/locale.py
@@ -6,8 +6,22 @@ from __future__ import (
absolute_import
)
+# Std. lib imports.
+import sys
+
# Local imports.
-from natsort.compat.py23 import PY_VERSION, cmp_to_key
+from natsort.compat.py23 import (
+ PY_VERSION,
+ cmp_to_key,
+ py23_unichr,
+ py23_cmp,
+)
+
+# This string should be sorted after any other byte string because
+# it contains the max unicode character repeated 20 times.
+# You would need some odd data to come after that.
+null_string = ''
+null_string_max = py23_unichr(sys.maxunicode) * 20
# Make the strxfrm function from strcoll on Python2
# It can be buggy (especially on BSD-based systems),
@@ -18,6 +32,11 @@ try:
null_string_locale = b''
+ # This string should in theory be sorted after any other byte
+ # string because it contains the max byte char repeated many times.
+ # You would need some odd data to come after that.
+ null_string_locale_max = b'x7f' * 50
+
def dumb_sort():
return False
@@ -49,8 +68,15 @@ except ImportError:
null_string_locale = ''
+ # This string should be sorted after any other byte string because
+ # it contains the max unicode character repeated 20 times.
+ # You would need some odd data to come after that.
+ null_string_locale_max = py23_unichr(sys.maxunicode) * 20
+
if PY_VERSION < 3:
- null_string_locale = cmp_to_key(cmp)(null_string_locale)
+ null_string_locale = cmp_to_key(py23_cmp)(null_string_locale)
+ null_string_locale_max = cmp_to_key(py23_cmp)(null_string_locale_max)
+
# On some systems, locale is broken and does not sort in the expected
# order. We will try to detect this and compensate.
def dumb_sort():
diff --git a/natsort/natsort.py b/natsort/natsort.py
index 8703498..557e07b 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -207,16 +207,24 @@ def natsort_keygen(key=None, alg=0, **_kwargs):
alg |= ns._DUMB
# Set some variables that will be passed to the factory functions
- if alg & ns.LOCALEALPHA:
- sep = natsort.compat.locale.null_string_locale
+ if alg & ns.NUMAFTER:
+ if alg & ns.LOCALEALPHA:
+ sep = natsort.compat.locale.null_string_locale_max
+ else:
+ sep = natsort.compat.locale.null_string_max
+ pre_sep = natsort.compat.locale.null_string_max
else:
- sep = ''
+ if alg & ns.LOCALEALPHA:
+ sep = natsort.compat.locale.null_string_locale
+ else:
+ sep = natsort.compat.locale.null_string
+ pre_sep = natsort.compat.locale.null_string
regex = _regex_chooser[alg & ns._NUMERIC_ONLY]
# Create the functions that will be used to split strings.
input_transform = _input_string_transform_factory(alg)
component_transform = _string_component_transform_factory(alg)
- final_transform = _final_data_transform_factory(alg, sep)
+ final_transform = _final_data_transform_factory(alg, sep, pre_sep)
# Create the high-level parsing functions for strings, bytes, and numbers.
string_func = _parse_string_factory(
@@ -226,7 +234,7 @@ def natsort_keygen(key=None, alg=0, **_kwargs):
if alg & ns.PATH:
string_func = _parse_path_factory(string_func)
bytes_func = _parse_bytes_factory(alg)
- num_func = _parse_number_factory(alg, sep)
+ num_func = _parse_number_factory(alg, sep, pre_sep)
# Return the natsort key with the parsing path pre-chosen.
return partial(
diff --git a/natsort/ns_enum.py b/natsort/ns_enum.py
index 37a00de..4216ddd 100644
--- a/natsort/ns_enum.py
+++ b/natsort/ns_enum.py
@@ -42,6 +42,9 @@ class ns(object):
Tell `natsort` to not search for exponents as part of a float number.
For example, with `NOEXP` the number "5.6E5" would be interpreted
as `5.6`, `"E"`, and `5` instead of `560000`.
+ NUMAFTER, NA
+ Tell `natsort` to sort numbers after non-numbers. By default
+ numbers will be ordered before non-numbers.
PATH, P
Tell `natsort` to interpret strings as filesystem paths, so they
will be split according to the filesystem separator
@@ -151,6 +154,7 @@ class ns(object):
CAPITALFIRST = C = UNGROUPLETTERS
NANLAST = NL = 1 << 10
COMPATIBILITYNORMALIZE = CN = 1 << 11
+ NUMAFTER = NA = 1 << 12
# The below are private options for internal use only.
_NUMERIC_ONLY = REAL | NOEXP
diff --git a/natsort/utils.py b/natsort/utils.py
index c33de1d..b6484b0 100644
--- a/natsort/utils.py
+++ b/natsort/utils.py
@@ -197,7 +197,7 @@ def _parse_bytes_factory(alg):
return lambda x: (x,)
-def _parse_number_factory(alg, sep):
+def _parse_number_factory(alg, sep, pre_sep):
"""Create a function that will properly format a number in a tuple."""
nan_replace = float('+inf') if alg & ns.NANLAST else float('-inf')
@@ -207,9 +207,9 @@ def _parse_number_factory(alg, sep):
# Return the function, possibly wrapping in tuple if PATH is selected.
if alg & ns.PATH and alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA:
- return lambda x: ((('',), func(x)),)
+ return lambda x: (((pre_sep,), func(x)),)
elif alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA:
- return lambda x: (('',), func(x))
+ return lambda x: ((pre_sep,), func(x))
elif alg & ns.PATH:
return lambda x: (func(x),)
else:
@@ -363,7 +363,7 @@ def _string_component_transform_factory(alg):
return partial(fast_int, **kwargs)
-def _final_data_transform_factory(alg, sep):
+def _final_data_transform_factory(alg, sep, pre_sep):
"""
Given a set of natsort algorithms, return the function to operate
on the post-parsed strings according to the user's request.
@@ -383,7 +383,7 @@ def _final_data_transform_factory(alg, sep):
if not split_val:
return (), ()
elif split_val[0] == sep:
- return ('',), split_val
+ return (pre_sep,), split_val
else:
return (transform(val[0]),), split_val
return func
diff --git a/test_natsort/test_final_data_transform_factory.py b/test_natsort/test_final_data_transform_factory.py
index bfd3dfd..f0207e6 100644
--- a/test_natsort/test_final_data_transform_factory.py
+++ b/test_natsort/test_final_data_transform_factory.py
@@ -20,38 +20,38 @@ from hypothesis.strategies import (
def test_final_data_transform_factory_with_iterable_returns_tuple_with_no_options_example():
- assert _final_data_transform_factory(0, '')(iter([7]), '') == (7,)
+ assert _final_data_transform_factory(0, '', '')(iter([7]), '') == (7,)
@given(text())
def test_final_data_transform_factory_with_iterable_returns_tuple_with_no_options(x):
- assert _final_data_transform_factory(0, '')(iter([x]), '') == (x,)
+ assert _final_data_transform_factory(0, '', '')(iter([x]), '') == (x,)
# UNGROUPLETTERS without LOCALE does nothing, as does LOCALE without UNGROUPLETTERS
- assert _final_data_transform_factory(ns.UNGROUPLETTERS, '')(iter([x]), '') == _final_data_transform_factory(0, '')(iter([x]), '')
- assert _final_data_transform_factory(ns.LOCALE, '')(iter([x]), '') == _final_data_transform_factory(0, '')(iter([x]), '')
+ assert _final_data_transform_factory(ns.UNGROUPLETTERS, '', '')(iter([x]), '') == _final_data_transform_factory(0, '', '')(iter([x]), '')
+ assert _final_data_transform_factory(ns.LOCALE, '', '')(iter([x]), '') == _final_data_transform_factory(0, '', '')(iter([x]), '')
def test_final_data_transform_factory_with_empty_tuple_returns_double_empty_tuple():
- assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '')((), '') == ((), ())
+ assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '', '')((), '') == ((), ())
def test_final_data_transform_factory_with_null_string_first_element_adds_empty_string_on_first_tuple_element():
- assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '')(('', 60), '') == (('',), ('', 60))
+ assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '', 'xx')(('', 60), '') == (('xx',), ('', 60))
def test_final_data_transform_factory_returns_first_element_in_first_tuple_element_example():
- assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '')(('this', 60), 'this60') == (('t',), ('this', 60))
+ assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '', '')(('this', 60), 'this60') == (('t',), ('this', 60))
@given(x=text().filter(bool), y=floats(allow_nan=False, allow_infinity=False) | integers())
def test_final_data_transform_factory_returns_first_element_in_first_tuple_element(x, y):
- assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '')((x, y), ''.join(map(py23_str, [x, y]))) == ((x[0],), (x, y))
+ assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '', '')((x, y), ''.join(map(py23_str, [x, y]))) == ((x[0],), (x, y))
def test_final_data_transform_factory_returns_first_element_in_first_tuple_element_caseswapped_with_DUMB_and_LOWERCASEFIRST_example():
- assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS | ns._DUMB | ns.LOWERCASEFIRST, '')(('this', 60), 'this60') == (('T',), ('this', 60))
+ assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS | ns._DUMB | ns.LOWERCASEFIRST, '', '')(('this', 60), 'this60') == (('T',), ('this', 60))
@given(x=text().filter(bool), y=floats(allow_nan=False, allow_infinity=False) | integers())
def test_final_data_transform_factory_returns_first_element_in_first_tuple_element_caseswapped_with_DUMB_and_LOWERCASEFIRST(x, y):
- assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS | ns._DUMB | ns.LOWERCASEFIRST, '')((x, y), ''.join(map(py23_str, [x, y]))) == ((x[0].swapcase(),), (x, y))
+ assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS | ns._DUMB | ns.LOWERCASEFIRST, '', '')((x, y), ''.join(map(py23_str, [x, y]))) == ((x[0].swapcase(),), (x, y))
diff --git a/test_natsort/test_natsort_key.py b/test_natsort/test_natsort_key.py
index a675cb2..9aabd11 100644
--- a/test_natsort/test_natsort_key.py
+++ b/test_natsort/test_natsort_key.py
@@ -34,10 +34,10 @@ if PY_VERSION >= 3:
regex = _regex_chooser[ns.INT]
pre = _input_string_transform_factory(ns.INT)
post = _string_component_transform_factory(ns.INT)
-after = _final_data_transform_factory(ns.INT, '')
+after = _final_data_transform_factory(ns.INT, '', '')
string_func = _parse_string_factory(ns.INT, '', regex.split, pre, post, after)
bytes_func = _parse_bytes_factory(ns.INT)
-num_func = _parse_number_factory(ns.INT, '')
+num_func = _parse_number_factory(ns.INT, '', '')
def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple():
@@ -45,7 +45,7 @@ def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple
# so it will sort against the other as_path results.
sfunc = _parse_path_factory(string_func)
bytes_func = _parse_bytes_factory(ns.PATH)
- num_func = _parse_number_factory(ns.PATH, '')
+ num_func = _parse_number_factory(ns.PATH, '', '')
assert _natsort_key(10, None, sfunc, bytes_func, num_func) == (('', 10),)
@@ -55,7 +55,7 @@ def test__natsort_key_with_bytes_input_and_PATH_returns_number_in_nested_tuple()
# so it will sort against the other as_path results.
sfunc = _parse_path_factory(string_func)
bytes_func = _parse_bytes_factory(ns.PATH)
- num_func = _parse_number_factory(ns.PATH, '')
+ num_func = _parse_number_factory(ns.PATH, '', '')
assert _natsort_key(b'/hello/world', None, sfunc, bytes_func, num_func) == ((b'/hello/world',),)
@@ -63,7 +63,7 @@ def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple()
# PATH also handles recursion well.
sfunc = _parse_path_factory(string_func)
bytes_func = _parse_bytes_factory(ns.PATH)
- num_func = _parse_number_factory(ns.PATH, '')
+ num_func = _parse_number_factory(ns.PATH, '', '')
assert _natsort_key(('/Folder', '/Folder (1)'), None, sfunc, bytes_func, num_func) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1, ')')))
diff --git a/test_natsort/test_parse_number_function.py b/test_natsort/test_parse_number_function.py
index 163d066..2e7a9fe 100644
--- a/test_natsort/test_parse_number_function.py
+++ b/test_natsort/test_parse_number_function.py
@@ -18,38 +18,38 @@ from hypothesis.strategies import (
def test_parse_number_factory_makes_function_that_returns_tuple_example():
- assert _parse_number_factory(0, '')(57) == ('', 57)
- assert _parse_number_factory(0, '')(float('nan')) == ('', float('-inf'))
- assert _parse_number_factory(ns.NANLAST, '')(float('nan')) == ('', float('+inf'))
+ assert _parse_number_factory(0, '', '')(57) == ('', 57)
+ assert _parse_number_factory(0, '', '')(float('nan')) == ('', float('-inf'))
+ assert _parse_number_factory(ns.NANLAST, '', '')(float('nan')) == ('', float('+inf'))
@given(floats(allow_nan=False) | integers())
def test_parse_number_factory_makes_function_that_returns_tuple(x):
- assert _parse_number_factory(0, '')(x) == ('', x)
+ assert _parse_number_factory(0, '', '')(x) == ('', x)
def test_parse_number_factory_with_PATH_makes_function_that_returns_nested_tuple_example():
- assert _parse_number_factory(ns.PATH, '')(57) == (('', 57),)
+ assert _parse_number_factory(ns.PATH, '', '')(57) == (('', 57),)
@given(floats(allow_nan=False) | integers())
def test_parse_number_factory_with_PATH_makes_function_that_returns_nested_tuple(x):
- assert _parse_number_factory(ns.PATH, '')(x) == (('', x),)
+ assert _parse_number_factory(ns.PATH, '', '')(x) == (('', x),)
def test_parse_number_factory_with_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple_example():
- assert _parse_number_factory(ns.UNGROUPLETTERS | ns.LOCALE, '')(57) == (('',), ('', 57))
+ assert _parse_number_factory(ns.UNGROUPLETTERS | ns.LOCALE, '', 'xx')(57) == (('xx',), ('', 57))
@given(floats(allow_nan=False) | integers())
def test_parse_number_factory_with_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple(x):
- assert _parse_number_factory(ns.UNGROUPLETTERS | ns.LOCALE, '')(x) == (('',), ('', x))
+ assert _parse_number_factory(ns.UNGROUPLETTERS | ns.LOCALE, '', 'xx')(x) == (('xx',), ('', x))
def test_parse_number_factory_with_PATH_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple_example():
- assert _parse_number_factory(ns.PATH | ns.UNGROUPLETTERS | ns.LOCALE, '')(57) == ((('',), ('', 57)),)
+ assert _parse_number_factory(ns.PATH | ns.UNGROUPLETTERS | ns.LOCALE, '', 'xx')(57) == ((('xx',), ('', 57)),)
@given(floats(allow_nan=False) | integers())
def test_parse_number_factory_with_PATH_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple(x):
- assert _parse_number_factory(ns.PATH | ns.UNGROUPLETTERS | ns.LOCALE, '')(x) == ((('',), ('', x)),)
+ assert _parse_number_factory(ns.PATH | ns.UNGROUPLETTERS | ns.LOCALE, '', 'xx')(x) == ((('xx',), ('', x)),)
diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py
index 3a367cb..10ad0fa 100644
--- a/test_natsort/test_utils.py
+++ b/test_natsort/test_utils.py
@@ -149,6 +149,7 @@ def test_ns_enum_values_have_are_as_expected():
assert ns.UNGROUPLETTERS == ns.CAPITALFIRST
assert ns.NANLAST == ns.NL
assert ns.COMPATIBILITYNORMALIZE == ns.CN
+ assert ns.NUMAFTER == ns.NA
# Convenience
assert ns.LOCALE == ns.LOCALEALPHA | ns.LOCALENUM