summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.rst86
-rw-r--r--natsort/_version.py2
-rw-r--r--natsort/natsort.py58
3 files changed, 127 insertions, 19 deletions
diff --git a/README.rst b/README.rst
index 03a58c9..3063a19 100644
--- a/README.rst
+++ b/README.rst
@@ -64,6 +64,35 @@ is a float::
``natsort`` is not necessarily optimized for speed, but it is designed to be as
flexible as possible.
+A Note About Sorting Version Numbers
+''''''''''''''''''''''''''''''''''''
+
+The algorithm that ``natsort`` uses is optimized to find negative numbers and
+floating point numbers (including those with exponentials). Because of this, you
+might not get results you expect when sorting version numbers. For example::
+
+ >>> available_versions = ['1.8.1-r26', '1.8.1-r30', '2.0-r2', '2.0-r7', '2.0-r11']
+ >>> natsorted(available_versions)
+ ['1.8.1-r26', '1.8.1-r30', '2.0-r2', '2.0-r7', '2.0-r11']
+
+The above works fine, but adding a prefix can mess things up if you are not careful::
+
+ >>> natsorted(['my-package-{0}'.format(v) for v in available_versions])
+ ['my-package-2.0-r2', 'my-package-2.0-r7', 'my-package-2.0-r11', 'my-package-1.8.1-r26', 'my-package-1.8.1-r30']
+
+This is not in the order you might expect. ``natsort`` sees the '-'
+before the number and starts to look for a float. It sees ones in ``-2.0``
+and ``-1.8``, and then sorts them in increasing order. Obviously, this
+is not what you want for version numbers (but would be good for floats).
+This can be fixed by not using a dash as a separator::
+
+ >>> natsorted(['my-package{0}'.format(v) for v in available_versions])
+ ['my-package1.8.1-r26', 'my-package1.8.1-r30', 'my-package2.0-r2', 'my-package2.0-r7', 'my-package2.0-r11']
+
+If you find that you need to be able to sort version numbers more reliably, I
+recommend taking a look at the `naturalsort <https://pypi.python.org/pypi/naturalsort>`_
+package which will give you what you expect for version numbers.
+
API
---
@@ -81,6 +110,26 @@ Using ``natsort_key`` is just like any other sorting key in python::
>>> a
['num2', 'num3', 'num5']
+Of course, you can chain ``natsort_key`` with other functions to sort by some attribute
+of a class (for example). The easiest way is to make a ``lambda`` expression
+that calls ``natsort_key``::
+
+ >>> class Foo:
+ ... def __init__(self, bar):
+ ... self.bar = bar
+ ... def __repr__(self):
+ ... return "Foo('{0}')".format(self.bar)
+ >>> b = [Foo('num3'), Foo('num5'), Foo('num2')]
+ >>> b.sort(key=lambda x: natsort_key(x.bar)) # Get attribute explicitly
+ >>> b
+ [Foo('num2'), Foo('num3'), Foo('num5')]
+ >>> c = [Foo('num3'), Foo('num5'), Foo('num2')]
+ >>> from operator import attrgetter
+ >>> f = attrgetter('bar') # Using the operator module gives more flexibility
+ >>> c.sort(key=lambda x: natsort_key(f(x)))
+ >>> c
+ [Foo('num2'), Foo('num3'), Foo('num5')]
+
natsorted
'''''''''
@@ -91,6 +140,13 @@ natsorted
>>> natsorted(a)
['num2', 'num3', 'num5']
+``natsorted`` also supports a ``key`` argument just like the ``sorted`` function.
+Using our ``Foo`` class from above::
+
+ >>> b = [Foo('num3'), Foo('num5'), Foo('num2')]
+ >>> natsorted(b, key=attrgetter('bar'))
+ [Foo('num2'), Foo('num3'), Foo('num5')]
+
index_natsorted
'''''''''''''''
@@ -101,26 +157,32 @@ one list::
>>> a = ['num3', 'num5', 'num2']
>>> b = ['foo', 'bar', 'baz']
>>> index = index_natsorted(a)
+ >>> index
+ [2, 0, 1]
>>> # Sort both lists by the sort order of a
- >>> a = [a[i] for i in index]
- >>> b = [b[i] for i in index]
- >>> a
+ >>> [a[i] for i in index]
['num2', 'num3', 'num5']
- >>> b
+ >>> [b[i] for i in index]
['baz', 'foo', 'bar']
+Again, ``index_natsorted`` accepts a ``key`` argument::
+
+ >>> c = [Foo('num3'), Foo('num5'), Foo('num2')]
+ >>> index_natsorted(c, key=attrgetter('bar'))
+ [2, 0, 1]
+
Shell Script
------------
-For your convenience, there is a natsort shell script supplied to you that
-allows you to call natsort from the command-line. ``natsort`` was written to
-aid in computational chemistry researh so that it would be easy to analyze
+For your convenience, there is a ``natsort`` shell script supplied to you that
+allows you to call ``natsort`` from the command-line. ``natsort`` was written to
+aid in computational chemistry research so that it would be easy to analyze
large sets of output files named after the parameter used::
$ ls *.out
mode1000.35.out mode1243.34.out mode744.43.out mode943.54.out
-(Obvously, in reality there would be more files, but you get the idea.) Notice
+(Obviously, in reality there would be more files, but you get the idea.) Notice
that the shell sorts in ASCII order. This is the behavior of programs like
``find`` as well as ``ls``. The problem is, when passing these files to an
analysis program causes them not to appear in numerical order, which can lead
@@ -163,6 +225,14 @@ Seth M. Morton
History
-------
+6-25-2013 v. 2.2.0
+''''''''''''''''''
+
+ - Added ``key`` attribute to ``natsorted`` and ``index_natsorted`` so that
+ it mimics the functionality of the built-in ``sorted``
+ - Added tests to reflect the new functionality, as well as tests demonstrating
+ how to get similar functionality using ``natsort_key``.
+
12-5-2012 v. 2.1.0
''''''''''''''''''
diff --git a/natsort/_version.py b/natsort/_version.py
index a33997d..04188a1 100644
--- a/natsort/_version.py
+++ b/natsort/_version.py
@@ -1 +1 @@
-__version__ = '2.1.0'
+__version__ = '2.2.0'
diff --git a/natsort/natsort.py b/natsort/natsort.py
index 2e65e09..390d2d4 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -53,6 +53,21 @@ def natsort_key(s):
>>> a.sort(key=natsort_key)
>>> a
['num2', 'num3', 'num5']
+ >>> class Foo:
+ ... def __init__(self, bar):
+ ... self.bar = bar
+ ... def __repr__(self):
+ ... return "Foo('{0}')".format(self.bar)
+ >>> b = [Foo('num3'), Foo('num5'), Foo('num2')]
+ >>> b.sort(key=lambda x: natsort_key(x.bar))
+ >>> b
+ [Foo('num2'), Foo('num3'), Foo('num5')]
+ >>> from operator import attrgetter
+ >>> c = [Foo('num3'), Foo('num5'), Foo('num2')]
+ >>> f = attrgetter('bar')
+ >>> c.sort(key=lambda x: natsort_key(f(x)))
+ >>> c
+ [Foo('num2'), Foo('num3'), Foo('num5')]
"""
@@ -65,7 +80,7 @@ def natsort_key(s):
if len(s) == 1:
return tuple(s)
- # Remove all the None elements and exponentoals from the list.
+ # Remove all the None elements and exponentials from the list.
# This results from the way split works when there are parenthesis
# in the regular expression
ix = [i for i, x in enumerate(s) if x is None or exp_re.match(x)]
@@ -100,7 +115,7 @@ def natsort_key(s):
# Now, convert this list to a tuple
return tuple(s)
-def natsorted(seq):
+def natsorted(seq, key=None):
"""\
Sorts a sequence naturally (alphabetically and numerically),
not by ASCII.
@@ -108,15 +123,27 @@ def natsorted(seq):
>>> a = ['num3', 'num5', 'num2']
>>> natsorted(a)
['num2', 'num3', 'num5']
+ >>> class Foo:
+ ... def __init__(self, bar):
+ ... self.bar = bar
+ ... def __repr__(self):
+ ... return "Foo('{0}')".format(self.bar)
+ >>> b = [Foo('num3'), Foo('num5'), Foo('num2')]
+ >>> from operator import attrgetter
+ >>> natsorted(b, key=attrgetter('bar'))
+ [Foo('num2'), Foo('num3'), Foo('num5')]
:argument seq:
The sequence to be sorted.
:type seq: sequence-like
:rtype: list
"""
- return sorted(seq, key=natsort_key)
+ if key:
+ return sorted(seq, key=lambda x: natsort_key(key(x)))
+ else:
+ return sorted(seq, key=natsort_key)
-def index_natsorted(seq):
+def index_natsorted(seq, key=lambda x: x):
"""\
Sorts a sequence naturally, but returns a list of sorted the
indeces and not the sorted list.
@@ -124,22 +151,33 @@ def index_natsorted(seq):
>>> a = ['num3', 'num5', 'num2']
>>> b = ['foo', 'bar', 'baz']
>>> index = index_natsorted(a)
+ >>> index
+ [2, 0, 1]
>>> # Sort both lists by the sort order of a
- >>> a = [a[i] for i in index]
- >>> b = [b[i] for i in index]
- >>> a
+ >>> [a[i] for i in index]
['num2', 'num3', 'num5']
- >>> b
+ >>> [b[i] for i in index]
['baz', 'foo', 'bar']
+ >>> class Foo:
+ ... def __init__(self, bar):
+ ... self.bar = bar
+ ... def __repr__(self):
+ ... return "Foo('{0}')".format(self.bar)
+ >>> c = [Foo('num3'), Foo('num5'), Foo('num2')]
+ >>> from operator import attrgetter
+ >>> index_natsorted(c, key=attrgetter('bar'))
+ [2, 0, 1]
:argument seq:
The sequence that you want the sorted index of.
:type seq: sequence-like
:rtype: list
"""
+ from operator import itemgetter
+ item1 = itemgetter(1)
# Pair the index and sequence together, then sort by
- index_seq_pair = [[x, y] for x, y in zip(xrange(len(seq)), seq)]
- index_seq_pair.sort(key=lambda x: natsort_key(x[1]))
+ index_seq_pair = [[x, key(y)] for x, y in zip(xrange(len(seq)), seq)]
+ index_seq_pair.sort(key=lambda x: natsort_key(item1(x)))
return [x[0] for x in index_seq_pair]
def test():