summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth Morton <seth.m.morton@gmail.com>2020-11-16 22:05:01 -0800
committerSeth Morton <seth.m.morton@gmail.com>2020-11-17 22:36:23 -0800
commitd69a5414f111b9584f28c17382af1821c5e291fb (patch)
tree9ba0806d803d5376ee6f9a101a7b6876d041dafa
parent0c31c245fc461641024ae9eed98ea97ca2ef79a7 (diff)
downloadnatsort-d69a5414f111b9584f28c17382af1821c5e291fb.tar.gz
Fully implement OS sorting on all platforms
ICU is used to sort on UNIX-based platforms. If not given, the results are OK, but not exactly what you would expect.
-rw-r--r--natsort/natsort.py90
-rw-r--r--tests/test_os_sorted.py194
2 files changed, 261 insertions, 23 deletions
diff --git a/natsort/natsort.py b/natsort/natsort.py
index d52c048..dc9f098 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -608,6 +608,12 @@ def numeric_regex_chooser(alg):
return utils.regex_chooser(alg).pattern[1:-1]
+def _split_apply(v, key=None):
+ if key is not None:
+ v = key(v)
+ return utils.path_splitter(str(v))
+
+
# Choose the implementation based on the host OS
if platform.system() == "Windows":
@@ -620,36 +626,45 @@ if platform.system() == "Windows":
_winsort_key = cmp_to_key(_windows_sort_cmp)
def os_sort_keygen(key=None):
- if key is not None:
- return lambda x: _winsort_key(str(key(x)))
- else:
- return lambda x: _winsort_key(str(x))
+ return lambda x: tuple(map(_winsort_key, _split_apply(x, key)))
-else:
-
- def os_sort_keygen(key=None):
- return natsort_keygen(key=key, alg=ns.PATH)
+else:
-os_sort_keygen.__doc__ = """
-Generate a sorting key to replicate your file browser's sort order
+ # For UNIX-based platforms, ICU performs MUCH better than locale
+ # at replicating the file explorer's sort order. We will use
+ # ICU's ability to do basic natural sorting as it also better
+ # replicates than what natsort does by default.
+ #
+ # However, if the user does not have ICU installed then fall back
+ # on natsort's default handling for paths with locale turned on
+ # which will give good results in most cases (e.g. when there aren't
+ # a bunch of special characters).
+ try:
+ import icu
-.. warning::
+ except ImportError:
+ # No ICU installed
+ def os_sort_keygen(key=None):
+ return natsort_keygen(
+ key=key, alg=ns.LOCALE | ns.PATH | ns.IGNORECASE
+ )
- The resulting function will generate results that will be
- differnt depending on your platform. This is intentional.
+ else:
+ # ICU installed
+ def os_sort_keygen(key=None):
+ loc = natsort.compat.locale.get_icu_locale()
+ collator = icu.Collator.createInstance(loc)
+ collator.setAttribute(
+ icu.UCollAttribute.NUMERIC_COLLATION, icu.UCollAttributeValue.ON
+ )
+ return lambda x: tuple(map(collator.getSortKey, _split_apply(x, key)))
-On Windows, this will sort with the same order as Windows Explorer.
-It does *not* take into account if a path is a directory or a file
-when sorting.
+os_sort_keygen.__doc__ = """
+Generate a sorting key to replicate your file browser's sort order
-Parameters
-----------
-key: callable, optional
- A key used to determine how to sort each element of the sequence.
- It is **not** applied recursively.
- It should accept a single argument and return a single value.
+See :func`:`os_sorted` for description and caveats.
Returns
-------
@@ -688,7 +703,30 @@ def os_sorted(seq, key=None, reverse=False):
"""
Sort elements in the same order as your operating system's file browser
- Only available on Windows.
+ .. warning::
+
+ The resulting function will generate results that will be
+ differnt depending on your platform. This is intentional.
+
+ On Windows, this will sort with the same order as Windows Explorer.
+
+ On MacOS/Linux, you will get different results depending on whether
+ or not you have :mod:`pyicu` installed.
+
+ - If you have :mod:`pyicu` installed, you will get results that are
+ the same as (or very close to) the same order as your operating
+ system's file browser.
+ - If you do not have :mod:`pyicu` installed, then this will give
+ the same results as if you used ``ns.LOCALE``, ``ns.PATH``,
+ and ``ns.IGNORECASE` with :func:`natsorted`. If you do not have
+ special characters this will give correct results, but once
+ special characters are added you should lower your expectations.
+
+ It is *strongly* reccommended to have :mod:`pyicu` installed on
+ MacOS/Linux if you want correct sort results.
+
+ It does *not* take into account if a path is a directory or a file
+ when sorting.
Parameters
----------
@@ -711,6 +749,12 @@ def os_sorted(seq, key=None, reverse=False):
See Also
--------
natsorted
+ os_sort_keygen
+
+ Notes
+ -----
+ On Windows, this will implicitly coerce all inputs to str before
+ collating.
"""
return sorted(seq, key=os_sort_keygen(key), reverse=reverse)
diff --git a/tests/test_os_sorted.py b/tests/test_os_sorted.py
new file mode 100644
index 0000000..aaaffac
--- /dev/null
+++ b/tests/test_os_sorted.py
@@ -0,0 +1,194 @@
+# -*- coding: utf-8 -*-
+"""
+Testing for the OS sorting
+"""
+import platform
+
+import natsort
+import pytest
+
+try:
+ import icu # noqa: F401
+except ImportError:
+ has_icu = False
+else:
+ has_icu = True
+
+
+def test_os_sorted_compound():
+ given = [
+ "/p/Folder (10)/file.tar.gz",
+ "/p/Folder (1)/file (1).tar.gz",
+ "/p/Folder/file.x1.9.tar.gz",
+ "/p/Folder (2)/file.tar.gz",
+ "/p/Folder (1)/file.tar.gz",
+ "/p/Folder/file.x1.10.tar.gz",
+ ]
+ expected = [
+ "/p/Folder/file.x1.9.tar.gz",
+ "/p/Folder/file.x1.10.tar.gz",
+ "/p/Folder (1)/file.tar.gz",
+ "/p/Folder (1)/file (1).tar.gz",
+ "/p/Folder (2)/file.tar.gz",
+ "/p/Folder (10)/file.tar.gz",
+ ]
+ result = natsort.os_sorted(given)
+ assert result == expected
+
+
+def test_os_sorted_misc_no_fail():
+ natsort.os_sorted([9, 4.3, None, float("nan")])
+
+
+# The following is a master list of things that might give trouble
+# when sorting like the file explorer.
+given = [
+ "11111",
+ "!",
+ "#",
+ "$",
+ "%",
+ "&",
+ "'",
+ "(",
+ ")",
+ "+",
+ "+11111",
+ "+aaaaa",
+ ",",
+ "-",
+ ";",
+ "=",
+ "@",
+ "[",
+ "]",
+ "^",
+ "_",
+ "`",
+ "aaaaa",
+ "foo0",
+ "foo_0",
+ "{",
+ "}",
+ "~",
+ "§",
+ "°",
+ "´",
+ "µ",
+ "€",
+ "foo1",
+ "foo2",
+ "foo4",
+ "foo10",
+ "Foo3",
+]
+
+# The expceted values change based on the environment
+if platform.system() == "Windows":
+ expected = [
+ "'",
+ "-",
+ "!",
+ "#",
+ "$",
+ "%",
+ "&",
+ "(",
+ ")",
+ ",",
+ ";",
+ "@",
+ "[",
+ "]",
+ "^",
+ "_",
+ "`",
+ "{",
+ "}",
+ "~",
+ "´",
+ "€",
+ "+",
+ "+11111",
+ "+aaaaa",
+ "=",
+ "§",
+ "°",
+ "µ",
+ "11111",
+ "aaaaa",
+ "foo_0",
+ "foo0",
+ "foo1",
+ "foo2",
+ "Foo3",
+ "foo4",
+ "foo10",
+ ]
+
+elif has_icu:
+ expected = [
+ "_",
+ "-",
+ ",",
+ ";",
+ "!",
+ "'",
+ "(",
+ ")",
+ "[",
+ "]",
+ "{",
+ "}",
+ "§",
+ "@",
+ "&",
+ "#",
+ "%",
+ "`",
+ "´",
+ "^",
+ "°",
+ "+",
+ "+11111",
+ "+aaaaa",
+ "=",
+ "~",
+ "$",
+ "€",
+ "11111",
+ "aaaaa",
+ "foo_0",
+ "foo0",
+ "foo1",
+ "foo2",
+ "Foo3",
+ "foo4",
+ "foo10",
+ "µ",
+ ]
+else:
+ # For non-ICU UNIX, the order is all over the place
+ # from platform to platform, distribution to distribution.
+ # It's not really possible to predict the order across all
+ # the different OS. To work around this, we will exclude
+ # the special characters from the sort.
+ given = given[0:1] + given[22:25] + given[33:]
+ expected = [
+ "11111",
+ "aaaaa",
+ "foo0",
+ "foo1",
+ "foo2",
+ "Foo3",
+ "foo4",
+ "foo10",
+ "foo_0",
+ ]
+
+
+@pytest.mark.usefixtures("with_locale_en_us")
+def test_os_sorted_corpus():
+ result = natsort.os_sorted(given)
+ print(result)
+ assert result == expected