diff options
-rw-r--r-- | README.rst | 86 | ||||
-rw-r--r-- | natsort/_version.py | 2 | ||||
-rw-r--r-- | natsort/natsort.py | 58 |
3 files changed, 127 insertions, 19 deletions
@@ -64,6 +64,35 @@ is a float:: ``natsort`` is not necessarily optimized for speed, but it is designed to be as flexible as possible. +A Note About Sorting Version Numbers +'''''''''''''''''''''''''''''''''''' + +The algorithm that ``natsort`` uses is optimized to find negative numbers and +floating point numbers (including those with exponentials). Because of this, you +might not get results you expect when sorting version numbers. For example:: + + >>> available_versions = ['1.8.1-r26', '1.8.1-r30', '2.0-r2', '2.0-r7', '2.0-r11'] + >>> natsorted(available_versions) + ['1.8.1-r26', '1.8.1-r30', '2.0-r2', '2.0-r7', '2.0-r11'] + +The above works fine, but adding a prefix can mess things up if you are not careful:: + + >>> natsorted(['my-package-{0}'.format(v) for v in available_versions]) + ['my-package-2.0-r2', 'my-package-2.0-r7', 'my-package-2.0-r11', 'my-package-1.8.1-r26', 'my-package-1.8.1-r30'] + +This is not in the order you might expect. ``natsort`` sees the '-' +before the number and starts to look for a float. It sees ones in ``-2.0`` +and ``-1.8``, and then sorts them in increasing order. Obviously, this +is not what you want for version numbers (but would be good for floats). +This can be fixed by not using a dash as a separator:: + + >>> natsorted(['my-package{0}'.format(v) for v in available_versions]) + ['my-package1.8.1-r26', 'my-package1.8.1-r30', 'my-package2.0-r2', 'my-package2.0-r7', 'my-package2.0-r11'] + +If you find that you need to be able to sort version numbers more reliably, I +recommend taking a look at the `naturalsort <https://pypi.python.org/pypi/naturalsort>`_ +package which will give you what you expect for version numbers. + API --- @@ -81,6 +110,26 @@ Using ``natsort_key`` is just like any other sorting key in python:: >>> a ['num2', 'num3', 'num5'] +Of course, you can chain ``natsort_key`` with other functions to sort by some attribute +of a class (for example). The easiest way is to make a ``lambda`` expression +that calls ``natsort_key``:: + + >>> class Foo: + ... def __init__(self, bar): + ... self.bar = bar + ... def __repr__(self): + ... return "Foo('{0}')".format(self.bar) + >>> b = [Foo('num3'), Foo('num5'), Foo('num2')] + >>> b.sort(key=lambda x: natsort_key(x.bar)) # Get attribute explicitly + >>> b + [Foo('num2'), Foo('num3'), Foo('num5')] + >>> c = [Foo('num3'), Foo('num5'), Foo('num2')] + >>> from operator import attrgetter + >>> f = attrgetter('bar') # Using the operator module gives more flexibility + >>> c.sort(key=lambda x: natsort_key(f(x))) + >>> c + [Foo('num2'), Foo('num3'), Foo('num5')] + natsorted ''''''''' @@ -91,6 +140,13 @@ natsorted >>> natsorted(a) ['num2', 'num3', 'num5'] +``natsorted`` also supports a ``key`` argument just like the ``sorted`` function. +Using our ``Foo`` class from above:: + + >>> b = [Foo('num3'), Foo('num5'), Foo('num2')] + >>> natsorted(b, key=attrgetter('bar')) + [Foo('num2'), Foo('num3'), Foo('num5')] + index_natsorted ''''''''''''''' @@ -101,26 +157,32 @@ one list:: >>> a = ['num3', 'num5', 'num2'] >>> b = ['foo', 'bar', 'baz'] >>> index = index_natsorted(a) + >>> index + [2, 0, 1] >>> # Sort both lists by the sort order of a - >>> a = [a[i] for i in index] - >>> b = [b[i] for i in index] - >>> a + >>> [a[i] for i in index] ['num2', 'num3', 'num5'] - >>> b + >>> [b[i] for i in index] ['baz', 'foo', 'bar'] +Again, ``index_natsorted`` accepts a ``key`` argument:: + + >>> c = [Foo('num3'), Foo('num5'), Foo('num2')] + >>> index_natsorted(c, key=attrgetter('bar')) + [2, 0, 1] + Shell Script ------------ -For your convenience, there is a natsort shell script supplied to you that -allows you to call natsort from the command-line. ``natsort`` was written to -aid in computational chemistry researh so that it would be easy to analyze +For your convenience, there is a ``natsort`` shell script supplied to you that +allows you to call ``natsort`` from the command-line. ``natsort`` was written to +aid in computational chemistry research so that it would be easy to analyze large sets of output files named after the parameter used:: $ ls *.out mode1000.35.out mode1243.34.out mode744.43.out mode943.54.out -(Obvously, in reality there would be more files, but you get the idea.) Notice +(Obviously, in reality there would be more files, but you get the idea.) Notice that the shell sorts in ASCII order. This is the behavior of programs like ``find`` as well as ``ls``. The problem is, when passing these files to an analysis program causes them not to appear in numerical order, which can lead @@ -163,6 +225,14 @@ Seth M. Morton History ------- +6-25-2013 v. 2.2.0 +'''''''''''''''''' + + - Added ``key`` attribute to ``natsorted`` and ``index_natsorted`` so that + it mimics the functionality of the built-in ``sorted`` + - Added tests to reflect the new functionality, as well as tests demonstrating + how to get similar functionality using ``natsort_key``. + 12-5-2012 v. 2.1.0 '''''''''''''''''' diff --git a/natsort/_version.py b/natsort/_version.py index a33997d..04188a1 100644 --- a/natsort/_version.py +++ b/natsort/_version.py @@ -1 +1 @@ -__version__ = '2.1.0' +__version__ = '2.2.0' diff --git a/natsort/natsort.py b/natsort/natsort.py index 2e65e09..390d2d4 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -53,6 +53,21 @@ def natsort_key(s): >>> a.sort(key=natsort_key) >>> a ['num2', 'num3', 'num5'] + >>> class Foo: + ... def __init__(self, bar): + ... self.bar = bar + ... def __repr__(self): + ... return "Foo('{0}')".format(self.bar) + >>> b = [Foo('num3'), Foo('num5'), Foo('num2')] + >>> b.sort(key=lambda x: natsort_key(x.bar)) + >>> b + [Foo('num2'), Foo('num3'), Foo('num5')] + >>> from operator import attrgetter + >>> c = [Foo('num3'), Foo('num5'), Foo('num2')] + >>> f = attrgetter('bar') + >>> c.sort(key=lambda x: natsort_key(f(x))) + >>> c + [Foo('num2'), Foo('num3'), Foo('num5')] """ @@ -65,7 +80,7 @@ def natsort_key(s): if len(s) == 1: return tuple(s) - # Remove all the None elements and exponentoals from the list. + # Remove all the None elements and exponentials from the list. # This results from the way split works when there are parenthesis # in the regular expression ix = [i for i, x in enumerate(s) if x is None or exp_re.match(x)] @@ -100,7 +115,7 @@ def natsort_key(s): # Now, convert this list to a tuple return tuple(s) -def natsorted(seq): +def natsorted(seq, key=None): """\ Sorts a sequence naturally (alphabetically and numerically), not by ASCII. @@ -108,15 +123,27 @@ def natsorted(seq): >>> a = ['num3', 'num5', 'num2'] >>> natsorted(a) ['num2', 'num3', 'num5'] + >>> class Foo: + ... def __init__(self, bar): + ... self.bar = bar + ... def __repr__(self): + ... return "Foo('{0}')".format(self.bar) + >>> b = [Foo('num3'), Foo('num5'), Foo('num2')] + >>> from operator import attrgetter + >>> natsorted(b, key=attrgetter('bar')) + [Foo('num2'), Foo('num3'), Foo('num5')] :argument seq: The sequence to be sorted. :type seq: sequence-like :rtype: list """ - return sorted(seq, key=natsort_key) + if key: + return sorted(seq, key=lambda x: natsort_key(key(x))) + else: + return sorted(seq, key=natsort_key) -def index_natsorted(seq): +def index_natsorted(seq, key=lambda x: x): """\ Sorts a sequence naturally, but returns a list of sorted the indeces and not the sorted list. @@ -124,22 +151,33 @@ def index_natsorted(seq): >>> a = ['num3', 'num5', 'num2'] >>> b = ['foo', 'bar', 'baz'] >>> index = index_natsorted(a) + >>> index + [2, 0, 1] >>> # Sort both lists by the sort order of a - >>> a = [a[i] for i in index] - >>> b = [b[i] for i in index] - >>> a + >>> [a[i] for i in index] ['num2', 'num3', 'num5'] - >>> b + >>> [b[i] for i in index] ['baz', 'foo', 'bar'] + >>> class Foo: + ... def __init__(self, bar): + ... self.bar = bar + ... def __repr__(self): + ... return "Foo('{0}')".format(self.bar) + >>> c = [Foo('num3'), Foo('num5'), Foo('num2')] + >>> from operator import attrgetter + >>> index_natsorted(c, key=attrgetter('bar')) + [2, 0, 1] :argument seq: The sequence that you want the sorted index of. :type seq: sequence-like :rtype: list """ + from operator import itemgetter + item1 = itemgetter(1) # Pair the index and sequence together, then sort by - index_seq_pair = [[x, y] for x, y in zip(xrange(len(seq)), seq)] - index_seq_pair.sort(key=lambda x: natsort_key(x[1])) + index_seq_pair = [[x, key(y)] for x, y in zip(xrange(len(seq)), seq)] + index_seq_pair.sort(key=lambda x: natsort_key(item1(x))) return [x[0] for x in index_seq_pair] def test(): |