diff options
author | Seth Morton <seth.m.morton@gmail.com> | 2020-11-16 22:05:01 -0800 |
---|---|---|
committer | Seth Morton <seth.m.morton@gmail.com> | 2020-11-17 22:36:23 -0800 |
commit | d69a5414f111b9584f28c17382af1821c5e291fb (patch) | |
tree | 9ba0806d803d5376ee6f9a101a7b6876d041dafa | |
parent | 0c31c245fc461641024ae9eed98ea97ca2ef79a7 (diff) | |
download | natsort-d69a5414f111b9584f28c17382af1821c5e291fb.tar.gz |
Fully implement OS sorting on all platforms
ICU is used to sort on UNIX-based platforms. If not given, the results
are OK, but not exactly what you would expect.
-rw-r--r-- | natsort/natsort.py | 90 | ||||
-rw-r--r-- | tests/test_os_sorted.py | 194 |
2 files changed, 261 insertions, 23 deletions
diff --git a/natsort/natsort.py b/natsort/natsort.py index d52c048..dc9f098 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -608,6 +608,12 @@ def numeric_regex_chooser(alg): return utils.regex_chooser(alg).pattern[1:-1] +def _split_apply(v, key=None): + if key is not None: + v = key(v) + return utils.path_splitter(str(v)) + + # Choose the implementation based on the host OS if platform.system() == "Windows": @@ -620,36 +626,45 @@ if platform.system() == "Windows": _winsort_key = cmp_to_key(_windows_sort_cmp) def os_sort_keygen(key=None): - if key is not None: - return lambda x: _winsort_key(str(key(x))) - else: - return lambda x: _winsort_key(str(x)) + return lambda x: tuple(map(_winsort_key, _split_apply(x, key))) -else: - - def os_sort_keygen(key=None): - return natsort_keygen(key=key, alg=ns.PATH) +else: -os_sort_keygen.__doc__ = """ -Generate a sorting key to replicate your file browser's sort order + # For UNIX-based platforms, ICU performs MUCH better than locale + # at replicating the file explorer's sort order. We will use + # ICU's ability to do basic natural sorting as it also better + # replicates than what natsort does by default. + # + # However, if the user does not have ICU installed then fall back + # on natsort's default handling for paths with locale turned on + # which will give good results in most cases (e.g. when there aren't + # a bunch of special characters). + try: + import icu -.. warning:: + except ImportError: + # No ICU installed + def os_sort_keygen(key=None): + return natsort_keygen( + key=key, alg=ns.LOCALE | ns.PATH | ns.IGNORECASE + ) - The resulting function will generate results that will be - differnt depending on your platform. This is intentional. + else: + # ICU installed + def os_sort_keygen(key=None): + loc = natsort.compat.locale.get_icu_locale() + collator = icu.Collator.createInstance(loc) + collator.setAttribute( + icu.UCollAttribute.NUMERIC_COLLATION, icu.UCollAttributeValue.ON + ) + return lambda x: tuple(map(collator.getSortKey, _split_apply(x, key))) -On Windows, this will sort with the same order as Windows Explorer. -It does *not* take into account if a path is a directory or a file -when sorting. +os_sort_keygen.__doc__ = """ +Generate a sorting key to replicate your file browser's sort order -Parameters ----------- -key: callable, optional - A key used to determine how to sort each element of the sequence. - It is **not** applied recursively. - It should accept a single argument and return a single value. +See :func`:`os_sorted` for description and caveats. Returns ------- @@ -688,7 +703,30 @@ def os_sorted(seq, key=None, reverse=False): """ Sort elements in the same order as your operating system's file browser - Only available on Windows. + .. warning:: + + The resulting function will generate results that will be + differnt depending on your platform. This is intentional. + + On Windows, this will sort with the same order as Windows Explorer. + + On MacOS/Linux, you will get different results depending on whether + or not you have :mod:`pyicu` installed. + + - If you have :mod:`pyicu` installed, you will get results that are + the same as (or very close to) the same order as your operating + system's file browser. + - If you do not have :mod:`pyicu` installed, then this will give + the same results as if you used ``ns.LOCALE``, ``ns.PATH``, + and ``ns.IGNORECASE` with :func:`natsorted`. If you do not have + special characters this will give correct results, but once + special characters are added you should lower your expectations. + + It is *strongly* reccommended to have :mod:`pyicu` installed on + MacOS/Linux if you want correct sort results. + + It does *not* take into account if a path is a directory or a file + when sorting. Parameters ---------- @@ -711,6 +749,12 @@ def os_sorted(seq, key=None, reverse=False): See Also -------- natsorted + os_sort_keygen + + Notes + ----- + On Windows, this will implicitly coerce all inputs to str before + collating. """ return sorted(seq, key=os_sort_keygen(key), reverse=reverse) diff --git a/tests/test_os_sorted.py b/tests/test_os_sorted.py new file mode 100644 index 0000000..aaaffac --- /dev/null +++ b/tests/test_os_sorted.py @@ -0,0 +1,194 @@ +# -*- coding: utf-8 -*- +""" +Testing for the OS sorting +""" +import platform + +import natsort +import pytest + +try: + import icu # noqa: F401 +except ImportError: + has_icu = False +else: + has_icu = True + + +def test_os_sorted_compound(): + given = [ + "/p/Folder (10)/file.tar.gz", + "/p/Folder (1)/file (1).tar.gz", + "/p/Folder/file.x1.9.tar.gz", + "/p/Folder (2)/file.tar.gz", + "/p/Folder (1)/file.tar.gz", + "/p/Folder/file.x1.10.tar.gz", + ] + expected = [ + "/p/Folder/file.x1.9.tar.gz", + "/p/Folder/file.x1.10.tar.gz", + "/p/Folder (1)/file.tar.gz", + "/p/Folder (1)/file (1).tar.gz", + "/p/Folder (2)/file.tar.gz", + "/p/Folder (10)/file.tar.gz", + ] + result = natsort.os_sorted(given) + assert result == expected + + +def test_os_sorted_misc_no_fail(): + natsort.os_sorted([9, 4.3, None, float("nan")]) + + +# The following is a master list of things that might give trouble +# when sorting like the file explorer. +given = [ + "11111", + "!", + "#", + "$", + "%", + "&", + "'", + "(", + ")", + "+", + "+11111", + "+aaaaa", + ",", + "-", + ";", + "=", + "@", + "[", + "]", + "^", + "_", + "`", + "aaaaa", + "foo0", + "foo_0", + "{", + "}", + "~", + "§", + "°", + "´", + "µ", + "€", + "foo1", + "foo2", + "foo4", + "foo10", + "Foo3", +] + +# The expceted values change based on the environment +if platform.system() == "Windows": + expected = [ + "'", + "-", + "!", + "#", + "$", + "%", + "&", + "(", + ")", + ",", + ";", + "@", + "[", + "]", + "^", + "_", + "`", + "{", + "}", + "~", + "´", + "€", + "+", + "+11111", + "+aaaaa", + "=", + "§", + "°", + "µ", + "11111", + "aaaaa", + "foo_0", + "foo0", + "foo1", + "foo2", + "Foo3", + "foo4", + "foo10", + ] + +elif has_icu: + expected = [ + "_", + "-", + ",", + ";", + "!", + "'", + "(", + ")", + "[", + "]", + "{", + "}", + "§", + "@", + "&", + "#", + "%", + "`", + "´", + "^", + "°", + "+", + "+11111", + "+aaaaa", + "=", + "~", + "$", + "€", + "11111", + "aaaaa", + "foo_0", + "foo0", + "foo1", + "foo2", + "Foo3", + "foo4", + "foo10", + "µ", + ] +else: + # For non-ICU UNIX, the order is all over the place + # from platform to platform, distribution to distribution. + # It's not really possible to predict the order across all + # the different OS. To work around this, we will exclude + # the special characters from the sort. + given = given[0:1] + given[22:25] + given[33:] + expected = [ + "11111", + "aaaaa", + "foo0", + "foo1", + "foo2", + "Foo3", + "foo4", + "foo10", + "foo_0", + ] + + +@pytest.mark.usefixtures("with_locale_en_us") +def test_os_sorted_corpus(): + result = natsort.os_sorted(given) + print(result) + assert result == expected |