summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth M Morton <seth.m.morton@gmail.com>2014-01-20 23:22:44 -0800
committerSeth M Morton <seth.m.morton@gmail.com>2014-01-20 23:22:44 -0800
commit626ceed146a658c7b588b922cbca8fc266555274 (patch)
tree9d287aef72e98b31f0acd24095f02a11412cc530
parent7a0326722a45953ec886031367384874cfabcea4 (diff)
parent8dd3579c3ed480a397cc9c39dfcf368302a30d36 (diff)
downloadnatsort-626ceed146a658c7b588b922cbca8fc266555274.tar.gz
Merge branch 'release/3.1.0'3.1.0
-rw-r--r--LICENSE2
-rw-r--r--MANIFEST.in1
-rw-r--r--README.rst161
-rw-r--r--natsort/__init__.py2
-rw-r--r--natsort/__main__.py303
-rw-r--r--natsort/_version.py4
-rw-r--r--natsort/natsort.py300
-rw-r--r--natsort/py23compat.py65
-rw-r--r--setup.py1
9 files changed, 565 insertions, 274 deletions
diff --git a/LICENSE b/LICENSE
index 207d774..86a72b3 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2012 Seth M. Morton
+Copyright (c) 2012-2014 Seth M. Morton
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
diff --git a/MANIFEST.in b/MANIFEST.in
index 63f2e17..06ccbf8 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -4,4 +4,5 @@ include natsort/natsort.py
include natsort/_version.py
include natsort/__main__.py
include natsort/__init__.py
+include natsort/py23compat.py
include setup.py
diff --git a/README.rst b/README.rst
index 78cfb87..24234d9 100644
--- a/README.rst
+++ b/README.rst
@@ -2,36 +2,36 @@ natsort
=======
Natural sorting for python. ``natsort`` requires python version 2.6 or greater
-(this includes python 3.x). To run version 2.6, the argparse module is
-required.
+(this includes python 3.x). To run version 2.6, 3.1, or 3.2 the
+`argparse <https://pypi.python.org/pypi/argparse>`_ module is required.
-``natsort`` comes with a shell script that is desecribed below. You can
+``natsort`` comes with a shell script that is described below. You can
also execute ``natsort`` from the command line with ``python -m natsort``.
There exists another natural sorting package for python called
-`naturalsort <https://pypi.python.org/pypi/naturalsort>`_. This package
-does not take into account floats and negatives (which is the default behavior
-of ``natsort``) and so may be preferred if you wish to only sort version numbers.
+`naturalsort <https://pypi.python.org/pypi/naturalsort>`_. You may prefer
+this package if you wish to only sort version numbers.
Problem Statement
-----------------
When you try to sort a list of strings that contain numbers, the normal python
-sort algorithm sorts by ASCII, so you might not get the results that you
+sort algorithm sorts lexicographically, so you might not get the results that you
expect::
- >>> a = ['a2', 'a8', 'a7', 'a5', 'a9', 'a1', 'a4', 'a10', 'a3', 'a6']
+ >>> a = ['a2', 'a9', 'a1', 'a4', 'a10']
>>> sorted(a)
- ['a1', 'a10', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9']
+ ['a1', 'a10', 'a2', 'a4', 'a9']
-Notice that it has the order ('1', '10', '2')? This is because the list is
-being sorted in ASCII order, which sorts numbers like you would letters (i.e.
-'a', 'at', 'b'). It would be better if you had a sorting algorithm that
-recognized numbers as numbers and treated them like numbers, not letters.
+Notice that it has the order ('1', '10', '2') - this is because the list is
+being sorted in lexicographically order, which sorts numbers like you would
+letters (i.e. 'a', 'at', 'b'). It would be better if you had a sorting
+algorithm that recognized numbers as numbers and treated them like numbers,
+not letters.
-This is where ``natsort`` comes it: it provides a key that helps sorts lists
+This is where ``natsort`` comes in: it provides a key that helps sort lists
"naturally". It provides support for ints and floats (including negatives and
-exponental notation) or you can turn this off to support sort version numbers.
+exponential notation) that you can turn off to support sorting version numbers.
Synopsis
--------
@@ -39,22 +39,23 @@ Synopsis
Using ``natsort`` is simple::
>>> from natsort import natsorted
- >>> a = ['a2', 'a8', 'a7', 'a5', 'a9', 'a1', 'a4', 'a10', 'a3', 'a6']
+ >>> a = ['a2', 'a9', 'a1', 'a4', 'a10']
>>> natsorted(a)
- ['a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9', 'a10']
+ ['a1', 'a2', 'a4', 'a9', 'a10']
-``natsort`` identifies the numbers and sorts them separately from letters.
+``natsort`` identifies the numbers and sorts them separately from strings.
-You can also mix and match ``int``, ``float``, ``str``, and ``unicode`` types
+You can also mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types
when you sort::
- >>> a = ['4.5', 6, 2.3, u'5']
- >>> sorted(a)
- [2.3, 6, '4.5', u'5']
+ >>> a = ['4.5', 6, 2.3, '5']
>>> natsorted(a)
- [2.3, '4.5', u'5', 6]
+ [2.3, '4.5', '5', 6]
+ >>> # On Python 2, sorted(a) would return [2.3, 6, '4.5', '5']
+ >>> # On Python 3, sorted(a) would raise an "unorderable types" TypeError
+
-The sorting algorithms
+The Sorting Algorithms
''''''''''''''''''''''
Sometimes you want to sort by floats, sometimes by ints, and sometimes simply
@@ -76,9 +77,6 @@ signs and decimal points when determining a number::
>>> natsorted(a) # Float is the default behavior
['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.']
-To achieve this, selecting this number type causes ``natsort`` to parse
-the string 'b-40.2' into ['b', -40.2].
-
Sort by ints
++++++++++++
@@ -95,16 +93,14 @@ to sort by ints, not floats::
>>> natsorted(a, number_type=int)
['ver1.9.9a', 'ver1.9.9b', 'ver1.10.1', 'ver1.11', 'ver1.11.4']
-To achieve this, selecting this number type causes ``natsort`` to parse
-the string 'b-40.2' into ['b', -40, '.', 2].
-
-Sort by digits
-++++++++++++++
+Sort by digits (best for version numbers)
++++++++++++++++++++++++++++++++++++++++++
The only difference between sorting by ints and sorting by digits is that
sorting by ints may take into account a negative sign, and sorting by digits
will not. This may be an issue if you used a '-' as your separator before the
-version numbers::
+version numbers. Essentially this is a shortcut for a number type of ``int``
+and the ``signed`` option of ``False``::
>>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1']
>>> natsorted(a, number_type=int)
@@ -112,13 +108,10 @@ version numbers::
>>> natsorted(a, number_type=None)
['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b']
-To achieve this, selecting this number type causes ``natsort`` to parse
-the string 'b-40.2' into ['b-', 40, '.', 2].
-
Using a sorting key
'''''''''''''''''''
-Like the builtin ``sorted`` function, ``natsorted`` can accept a key so that
+Like the built-in ``sorted`` function, ``natsorted`` can accept a key so that
you can sort based on a particular item of a list or by an attribute of a class::
>>> from operator import attrgetter, itemgetter
@@ -143,7 +136,7 @@ The ``natsort`` package provides three functions: ``natsort_key``,
natsorted
'''''''''
-``natsort.natsorted`` (*sequence*, *key* = ``lambda x: x``, *number_type* = ``float``)
+``natsort.natsorted`` (*sequence*, *key* = ``lambda x: x``, *number_type* = ``float``, *signed* = ``True``, *exp* = ``True``)
sequence (*iterable*)
The sequence to sort.
@@ -152,9 +145,21 @@ natsorted
A key used to determine how to sort each element of the sequence.
number_type (``None``, ``float``, ``int``)
- The types of number to sort on: ``float`` searches for floating point numbers,
+ The types of number to sort by: ``float`` searches for floating point numbers,
``int`` searches for integers, and ``None`` searches for digits (like integers
- but does not take into account negative sign).
+ but does not take into account negative sign). ``None`` is a shortcut for
+ ``number_type = int`` and ``signed = False``.
+
+ signed (``True``, ``False``)
+ By default a '+' or '-' before a number is taken to be the sign of the number.
+ If ``signed`` is ``False``, any '+' or '-' will not be considered to be part
+ of the number, but as part of the string.
+
+ exp (``True``, ``False``)
+ This option only applies to ``number_type = float``. If ``exp = True``, a string
+ like ``"3.5e5"`` will be interpreted as ``350000``, i.e. the exponential part
+ is considered to be part of the number. If ``exp = False``, ``"3.5e5"`` is
+ interpreted as ``(3.5, "e", 5)``. The default behavior is ``exp = True``.
returns
The sorted sequence.
@@ -169,7 +174,7 @@ Use ``natsorted`` just like the builtin ``sorted``::
natsort_key
'''''''''''
-``natsort.natsort_key`` (value, *number_type* = ``float``)
+``natsort.natsort_key`` (value, *number_type* = ``float``, *signed* = ``True``, *exp* = ``True``)
value
The value used by the sorting algorithm
@@ -177,7 +182,19 @@ natsort_key
number_type (``None``, ``float``, ``int``)
The types of number to sort on: ``float`` searches for floating point numbers,
``int`` searches for integers, and ``None`` searches for digits (like integers
- but does not take into account negative sign).
+ but does not take into account negative sign). ``None`` is a shortcut for
+ ``number_type = int`` and ``signed = False``.
+
+ signed (``True``, ``False``)
+ By default a '+' or '-' before a number is taken to be the sign of the number.
+ If ``signed`` is ``False``, any '+' or '-' will not be considered to be part
+ of the number, but as part part of the string.
+
+ exp (``True``, ``False``)
+ This option only applies to ``number_type = float``. If ``exp = True``, a string
+ like ``"3.5e5"`` will be interpreted as ``350000``, i.e. the exponential part
+ is considered to be part of the number. If ``exp = False``, ``"3.5e5"`` is
+ interpreted as ``(3.5, "e", 5)``. The default behavior is ``exp = True``.
returns
The modified value with numbers extracted.
@@ -204,7 +221,7 @@ attribute or item of each element of the sequence, the easiest way is to make a
index_natsorted
'''''''''''''''
-``natsort.index_natsorted`` (*sequence*, *key* = ``lambda x: x``, *number_type* = ``float``)
+``natsort.index_natsorted`` (*sequence*, *key* = ``lambda x: x``, *number_type* = ``float``, *signed* = ``True``, *exp* = ``True``)
sequence (*iterable*)
The sequence to sort.
@@ -215,12 +232,24 @@ index_natsorted
number_type (``None``, ``float``, ``int``)
The types of number to sort on: ``float`` searches for floating point numbers,
``int`` searches for integers, and ``None`` searches for digits (like integers
- but does not take into account negative sign).
+ but does not take into account negative sign). ``None`` is a shortcut for
+ ``number_type = int`` and ``signed = False``.
+
+ signed (``True``, ``False``)
+ By default a '+' or '-' before a number is taken to be the sign of the number.
+ If ``signed`` is ``False``, any '+' or '-' will not be considered to be part
+ of the number, but as part part of the string.
+
+ exp (``True``, ``False``)
+ This option only applies to ``number_type = float``. If ``exp = True``, a string
+ like ``"3.5e5"`` will be interpreted as ``350000``, i.e. the exponential part
+ is considered to be part of the number. If ``exp = False``, ``"3.5e5"`` is
+ interpreted as ``(3.5, "e", 5)``. The default behavior is ``exp = True``.
returns
The ordered indexes of the sequence.
-Use ``index_natsorted`` if you want to sort multiple lists by the sorting of
+Use ``index_natsorted`` if you want to sort multiple lists by the sort order of
one list::
>>> from natsort import index_natsorted
@@ -247,9 +276,9 @@ large sets of output files named after the parameter used::
mode1000.35.out mode1243.34.out mode744.43.out mode943.54.out
(Obviously, in reality there would be more files, but you get the idea.) Notice
-that the shell sorts in ASCII order. This is the behavior of programs like
-``find`` as well as ``ls``. The problem is, when passing these files to an
-analysis program causes them not to appear in numerical order, which can lead
+that the shell sorts in lexicographical order. This is the behavior of programs like
+``find`` as well as ``ls``. The problem is in passing these files to an
+analysis program that causes them not to appear in numerical order, which can lead
to bad analysis. To remedy this, use ``natsort``::
# This won't get you what you want
@@ -275,11 +304,19 @@ If needed, you can exclude specific numbers::
mode943.54.out
mode1243.34.out
-For other options, use ``natsort --help``.
+For other options, use ``natsort --help``. In general, the other options mirror
+the ``natsorted`` API.
+
+It is also helpful to note that ``natsort`` accepts pipes.
-It is also helpful to note that ``natsort`` accepts pipes, and also will sort
-each directory in a PATH independently of each other. Files in the current
-directory are listed before files in subdirectories.
+Note to users of the ``natsort`` shell script from < v. 3.1.0
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The ``natsort`` shell script options and implementation for version 3.1.0 has
+changed slightly. Options relating to interpreting input as file or directory
+paths have been removed, and internally the input is no longer treated as file
+paths. In most situations, this should not give different results, but in
+some unique cases it may. Feel free to contact me if this ruins your work flow.
Author
------
@@ -289,6 +326,26 @@ Seth M. Morton
History
-------
+01-20-2014 v. 3.1.0
+'''''''''''''''''''
+
+ - Added the ``signed`` and ``exp`` options to allow finer tuning of the sorting
+ - Entire codebase now works for both Python 2 and Python 3 without needing to run
+ ``2to3``.
+ - Updated all doctests.
+ - Further simplified the ``natsort`` base code by removing unneeded functions.
+ - Simplified documentation where possible.
+ - Improved the shell script code
+
+ - Made the documentation less "path"-centric to make it clear it is not just
+ for sorting file paths.
+ - Removed the filesystem-based options because these can be achieved better
+ though a pipeline.
+ - Added doctests.
+ - Added new options that correspond to ``signed`` and ``exp``.
+ - The user can now specify multiple numbers to exclude or multiple ranges
+ to filter by.
+
10-01-2013 v. 3.0.2
'''''''''''''''''''
diff --git a/natsort/__init__.py b/natsort/__init__.py
index 9add2a8..47e2ceb 100644
--- a/natsort/__init__.py
+++ b/natsort/__init__.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
from .natsort import natsort_key, natsorted, index_natsorted
from ._version import __version__
diff --git a/natsort/__main__.py b/natsort/__main__.py
index 7547047..e3242c6 100644
--- a/natsort/__main__.py
+++ b/natsort/__main__.py
@@ -1,15 +1,70 @@
-from __future__ import print_function, division
+# -*- coding: utf-8 -*-
+from __future__ import print_function, division, unicode_literals
import sys
import os
import re
-from natsort import natsort_key, natsorted
-from _version import __version__
+from .natsort import natsort_key, natsorted, int_nosign_re, int_sign_re
+from .natsort import float_sign_exp_re, float_nosign_exp_re
+from .natsort import float_sign_noexp_re, float_nosign_noexp_re
+from .natsort import regex_and_num_function_chooser
+from ._version import __version__
+from .py23compat import py23_str
+
def main():
"""\
- Performs a natural sort on pathnames given on the command-line.
+ Performs a natural sort on entries given on the command-line.
A natural sort sorts numerically then alphabetically, and will sort
- by numbers in the middle of a pathname.
+ by numbers in the middle of an entry.
+
+ >>> import sys
+ >>> sys.argv[1:] = ['num-2', 'num-6', 'num-1']
+ >>> main()
+ num-6
+ num-2
+ num-1
+ >>> sys.argv[1:] = ['-r', 'num-2', 'num-6', 'num-1']
+ >>> main()
+ num-1
+ num-2
+ num-6
+ >>> sys.argv[1:] = ['--nosign', 'num-2', 'num-6', 'num-1']
+ >>> main()
+ num-1
+ num-2
+ num-6
+ >>> sys.argv[1:] = ['-t', 'digit', 'num-2', 'num-6', 'num-1']
+ >>> main()
+ num-1
+ num-2
+ num-6
+ >>> sys.argv[1:] = ['-t', 'int', '-e', '-1', '-e', '6',
+ ... 'num-2', 'num-6', 'num-1']
+ >>> main()
+ num-6
+ num-2
+ >>> sys.argv[1:] = ['-t', 'digit', '-e', '1', '-e', '6',
+ ... 'num-2', 'num-6', 'num-1']
+ >>> main()
+ num-2
+ >>> sys.argv[1:] = ['a1.0e3', 'a5.3', 'a453.6']
+ >>> main()
+ a5.3
+ a453.6
+ a1.0e3
+ >>> sys.argv[1:] = ['-f', '1', '10', 'a1.0e3', 'a5.3', 'a453.6']
+ >>> main()
+ a5.3
+ >>> sys.argv[1:] = ['-f', '1', '10', '-f', '400', '500', 'a1.0e3', 'a5.3', 'a453.6']
+ >>> main()
+ a5.3
+ a453.6
+ >>> sys.argv[1:] = ['--noexp', 'a1.0e3', 'a5.3', 'a453.6']
+ >>> main()
+ a1.0e3
+ a5.3
+ a453.6
+
"""
from argparse import ArgumentParser, RawDescriptionHelpFormatter
@@ -18,111 +73,211 @@ def main():
formatter_class=RawDescriptionHelpFormatter)
parser.add_argument('--version', action='version',
version='%(prog)s {0}'.format(__version__))
- parser.add_argument('-F', '--onlyfiles', help='Only files that '
- 'are readable and non-empty are read in. '
- 'This will exculude folders from being read in.',
- action='store_true', default=False)
parser.add_argument('-f', '--filter', help='Used for '
- 'filtering out only the files that have a number '
+ 'keeping only the entries that have a number '
'falling in the given range.', nargs=2, type=float,
- metavar=('LOW', 'HIGH'))
- parser.add_argument('-e', '--exclude', help='Used to exclude a specific '
- 'number.')
+ metavar=('LOW', 'HIGH'), action='append')
+ parser.add_argument('-e', '--exclude', type=float, action='append',
+ help='Used to exclude an entry '
+ 'that contains a specific number.')
parser.add_argument('-r', '--reverse', help='Returns in reversed order.',
action='store_true', default=False)
- parser.add_argument('-R', '--recursive', help='Recursively decend the '
- 'directory tree.', action='store_true', default=False)
parser.add_argument('-t', '--number_type', choices=('digit', 'int', 'float'),
default='float', help='Choose the type of number '
- 'to search for.')
- parser.add_argument('paths', help='The paths to sort.', nargs='*',
+ 'to search for. "float" will search for floating-point '
+ 'numbers. "int" will only search for integers. '
+ '"digit" is a shortcut for "int" with --nosign.')
+ parser.add_argument('--nosign', default=True, action='store_false',
+ dest='signed', help='Do not consider "+" or "-" as part '
+ 'of a number, i.e. do not take sign into consideration.')
+ parser.add_argument('--noexp', default=True, action='store_false',
+ dest='exp', help='Do not consider an exponential as part '
+ 'of a number, i.e. 1e4, would be considered as 1, "e", '
+ 'and 4, not as 10000. This only effects the '
+ '--number_type=float.')
+ parser.add_argument('entries', help='The entries to sort. Taken from stdin '
+ 'if nothing is given on the command line.', nargs='*',
default=sys.stdin)
args = parser.parse_args()
# Make sure the filter range is given properly. Does nothing if no filter
- filterdata = check_filter(args.filter)
+ args.filter = check_filter(args.filter)
- # Recursively collect paths, if necessary.
- if args.recursive:
- jn = os.path.join
- paths = [jn(p, fn) for p, d, f in os.walk(os.curdir) for fn in f]
- # Collect paths either from a pipe or the command-line arguments.
- else:
- paths = [f.strip() for f in args.paths]
-
- # Split into directory path and filenames
- paths = split_paths(paths, args.onlyfiles)
+ # Remove trailing whitespace from all the entries
+ entries = [e.strip() for e in args.entries]
# Sort by directory then by file within directory and print.
- sort_and_print_paths(paths, filterdata, args.exclude, args.reverse, args.number_type)
+ sort_and_print_entries(entries, args)
def range_check(low, high):
"""\
Verifies that that given range has a low lower than the high.
+
+ >>> range_check(10, 11)
+ (10.0, 11.0)
+ >>> range_check(6.4, 30)
+ (6.4, 30.0)
+ >>> try:
+ ... range_check(7, 2)
+ ... except ValueError as e:
+ ... print(e)
+ low >= high
+
"""
low, high = float(low), float(high)
if low >= high:
- raise ValueError ('low >= high')
+ raise ValueError('low >= high')
else:
return low, high
+
def check_filter(filt):
- """Check that the low value of the filter is lower than the high."""
+ """\
+ Check that the low value of the filter is lower than the high.
+ If there is to be no filter, return 'None'.
+
+ >>> check_filter(())
+ >>> check_filter(False)
+ >>> check_filter(None)
+ >>> check_filter([(6, 7)])
+ [(6.0, 7.0)]
+ >>> check_filter([(6, 7), (2, 8)])
+ [(6.0, 7.0), (2.0, 8.0)]
+ >>> try:
+ ... check_filter([(7, 2)])
+ ... except ValueError as e:
+ ... print(e)
+ Error in --filter: low >= high
+
+ """
# Quick return if no filter.
if not filt:
return None
try:
- low, high = range_check(filt[0], filt[1])
+ return [range_check(f[0], f[1]) for f in filt]
except ValueError as a:
- raise ValueError ('Error in --filter: '+str(a))
- return low, high, re.compile(r'[+-]?\d+\.?\d*')
+ raise ValueError('Error in --filter: '+py23_str(a))
+
+
+def keep_entry_range(entry, lows, highs, converter, regex):
+ """\
+ Boolean function to determine if an entry should be kept out
+ based on if any numbers are in a given range.
+
+ >>> import re
+ >>> regex = re.compile(r'\d+')
+ >>> keep_entry_range('a56b23c89', [0], [100], int, regex)
+ True
+ >>> keep_entry_range('a56b23c89', [1, 88], [20, 90], int, regex)
+ True
+ >>> keep_entry_range('a56b23c89', [1], [20], int, regex)
+ False
-def split_paths(paths, a):
- """For each file, separate into directory and filename. Store all files
- in a dir into a dict where the dir is the key and filename is the value.
"""
- dirs = {}
- for path in paths:
- if a:
- try:
- with open(path) as fl:
- pass
- except IOError:
- continue
- dir, file = os.path.split(path)
- try:
- dirs[dir].append(file)
- except KeyError:
- dirs[dir] = []
- dirs[dir].append(file)
- return dirs
-
-def sort_and_print_paths(dirs, filterdata, exclude, reverse, number_type):
- """Sort the paths by directoy then by file within that directory.
- Print off the results.
+ return any(low <= converter(num) <= high
+ for num in regex.findall(entry)
+ for low, high in zip(lows, highs))
+
+
+def exclude_entry(entry, values, converter, regex):
+ """\
+ Boolean function to determine if an entry should be kept out
+ based on if it contains a specific number.
+
+ >>> import re
+ >>> regex = re.compile(r'\d+')
+ >>> exclude_entry('a56b23c89', [100], int, regex)
+ True
+ >>> exclude_entry('a56b23c89', [23], int, regex)
+ False
+
"""
- number_type = {'digit': None, 'int': int, 'float': float}[number_type]
- for dir in natsorted(dirs.keys(), number_type=number_type):
- dirs[dir].sort(key=lambda x: natsort_key(x, number_type=number_type))
- if reverse:
- dirs[dir] = reversed(dirs[dir])
- for file in dirs[dir]:
- if filterdata is not None:
- # Find all the numbers in the filename.
- nums = filterdata[2].findall(file)
- # If any numbers are between the range, print.
- # Otherwise, move to next file.
- for num in nums:
- if filterdata[0] <= float(num) <= filterdata[1]: break
- else:
- continue
- if exclude and exclude in file: continue
- print(os.path.join(dir, file))
+ return not any(converter(num) in values for num in regex.findall(entry))
+
+
+def sort_and_print_entries(entries, args):
+ """\
+ Sort the entries, applying the filters first if necessary.
+
+ >>> class Args:
+ ... def __init__(self, filter, exclude, reverse):
+ ... self.filter = filter
+ ... self.exclude = exclude
+ ... self.reverse = reverse
+ ... self.number_type = 'float'
+ ... self.signed = True
+ ... self.exp = True
+ >>> entries = ['tmp/a57/path2',
+ ... 'tmp/a23/path1',
+ ... 'tmp/a1/path1',
+ ... 'tmp/a130/path1',
+ ... 'tmp/a64/path1',
+ ... 'tmp/a64/path2']
+ >>> sort_and_print_entries(entries, Args(None, False, False))
+ tmp/a1/path1
+ tmp/a23/path1
+ tmp/a57/path2
+ tmp/a64/path1
+ tmp/a64/path2
+ tmp/a130/path1
+ >>> sort_and_print_entries(entries, Args([(20, 100)], False, False))
+ tmp/a23/path1
+ tmp/a57/path2
+ tmp/a64/path1
+ tmp/a64/path2
+ >>> sort_and_print_entries(entries, Args(None, [23, 130], False))
+ tmp/a1/path1
+ tmp/a57/path2
+ tmp/a64/path1
+ tmp/a64/path2
+ >>> sort_and_print_entries(entries, Args(None, [2], False))
+ tmp/a1/path1
+ tmp/a23/path1
+ tmp/a64/path1
+ tmp/a130/path1
+ >>> sort_and_print_entries(entries, Args(None, False, True))
+ tmp/a130/path1
+ tmp/a64/path2
+ tmp/a64/path1
+ tmp/a57/path2
+ tmp/a23/path1
+ tmp/a1/path1
+
+ """
+
+ # Extract the proper number type.
+ kwargs = {'number_type': {'digit': None, 'int': int, 'float': float}[args.number_type],
+ 'signed': args.signed,
+ 'exp': args.exp}
+
+ # Pre-remove entries that don't pass the filtering criteria
+ # Make sure we use the same searching algorithm for filtering as for sorting.
+ if args.filter is not None or args.exclude:
+ inp_options = (kwargs['number_type'], args.signed, args.exp)
+ regex, num_function = regex_and_num_function_chooser[inp_options]
+ if args.filter is not None:
+ lows, highs = [f[0] for f in args.filter], [f[1] for f in args.filter]
+ entries = [entry for entry in entries
+ if keep_entry_range(entry, lows, highs, num_function, regex)]
+ if args.exclude:
+ exclude = set(args.exclude)
+ entries = [entry for entry in entries
+ if exclude_entry(entry, exclude, num_function, regex)]
+
+ # Print off the sorted results
+ entries.sort(key=lambda x: natsort_key(x, **kwargs), reverse=args.reverse)
+ for entry in entries:
+ print(entry)
+
if __name__ == '__main__':
try:
main()
except ValueError as a:
- sys.exit(str(a))
+ sys.exit(py23_str(a))
except KeyboardInterrupt:
sys.exit(1)
+ # import doctest
+ # ret = doctest.testmod()
+ # if ret[0] == 0:
+ # print('natsort: All {0[1]} tests successful!'.format(ret))
diff --git a/natsort/_version.py b/natsort/_version.py
index da4039b..d763f0a 100644
--- a/natsort/_version.py
+++ b/natsort/_version.py
@@ -1 +1,3 @@
-__version__ = '3.0.2'
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+__version__ = '3.1.0'
diff --git a/natsort/natsort.py b/natsort/natsort.py
index dfb73b0..ef494c5 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -1,71 +1,110 @@
+# -*- coding: utf-8 -*-
"""
Here are a collection of examples of how this module can be used.
See the README or the natsort homepage for more details.
- >>> a = ['a2', 'a8', 'a7', 'a5', 'a9', 'a1', 'a4', 'a10', 'a3', 'a6']
+ >>> a = ['a2', 'a5', 'a9', 'a1', 'a4', 'a10', 'a6']
>>> sorted(a)
- ['a1', 'a10', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9']
+ [{u}'a1', {u}'a10', {u}'a2', {u}'a4', {u}'a5', {u}'a6', {u}'a9']
>>> natsorted(a)
- ['a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9', 'a10']
+ [{u}'a1', {u}'a2', {u}'a4', {u}'a5', {u}'a6', {u}'a9', {u}'a10']
- >>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300']
- >>> sorted(a)
- ['a5.034e1', 'a50', 'a50.300', 'a50.4', 'a51.']
- >>> natsorted(a)
- ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.']
- >>> natsorted(a, number_type=None)
- ['a5.034e1', 'a50', 'a50.4', 'a50.300', 'a51.']
+Here is an example demonstrating how different options sort the same list.
- >>> a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
+ >>> a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300']
>>> sorted(a)
- ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b']
+ [{u}'a5.034e1', {u}'a50', {u}'a50.300', {u}'a50.31', {u}'a50.4', {u}'a51.']
>>> natsorted(a)
- ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b']
+ [{u}'a50', {u}'a50.300', {u}'a50.31', {u}'a5.034e1', {u}'a50.4', {u}'a51.']
+ >>> natsorted(a, number_type=float, exp=False)
+ [{u}'a5.034e1', {u}'a50', {u}'a50.300', {u}'a50.31', {u}'a50.4', {u}'a51.']
+ >>> natsorted(a, number_type=int)
+ [{u}'a5.034e1', {u}'a50', {u}'a50.4', {u}'a50.31', {u}'a50.300', {u}'a51.']
>>> natsorted(a, number_type=None)
- ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4']
+ [{u}'a5.034e1', {u}'a50', {u}'a50.4', {u}'a50.31', {u}'a50.300', {u}'a51.']
- >>> a = ['name.1', 'name.101', 'name.01', 'name.200', 'name.21']
+This demonstrates the signed option. It can account for negative and positive signs.
+Turning it off treats the '+' or '-' as part of the string.
+
+ >>> a = ['a-5', 'a7', 'a+2']
>>> sorted(a)
- ['name.01', 'name.1', 'name.101', 'name.200', 'name.21']
- >>> natsorted(a)
- ['name.01', 'name.1', 'name.101', 'name.200', 'name.21']
- >>> natsorted(a, number_type=None)
- ['name.1', 'name.01', 'name.21', 'name.101', 'name.200']
+ [{u}'a+2', {u}'a-5', {u}'a7']
+ >>> natsorted(a) # signed=True is default, -5 comes first on the number line
+ [{u}'a-5', {u}'a+2', {u}'a7']
+ >>> natsorted(a, signed=False) # 'a' comes before 'a+', which is before 'a-'
+ [{u}'a7', {u}'a+2', {u}'a-5']
- >>> a = ['version-2', 'version-20', 'version-4', 'version-1']
+Sorting version numbers is best with 'number_type=None'. That is a shortcut
+for 'number_type=int, signed=False'
+
+ >>> a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
>>> sorted(a)
- ['version-1', 'version-2', 'version-20', 'version-4']
+ [{u}'1.10.1', {u}'1.11', {u}'1.11.4', {u}'1.9.9a', {u}'1.9.9b']
>>> natsorted(a)
- ['version-20', 'version-4', 'version-2', 'version-1']
- >>> natsorted(a, number_type=int)
- ['version-20', 'version-4', 'version-2', 'version-1']
+ [{u}'1.10.1', {u}'1.11', {u}'1.11.4', {u}'1.9.9a', {u}'1.9.9b']
>>> natsorted(a, number_type=None)
- ['version-1', 'version-2', 'version-4', 'version-20']
+ [{u}'1.9.9a', {u}'1.9.9b', {u}'1.10.1', {u}'1.11', {u}'1.11.4']
+
+You can mix types with natsorted. This can get around the new
+'unorderable types' issue with Python 3.
+ >>> import sys
>>> a = [6, 4.5, '7', u'2.5']
- >>> sorted(a)
- [4.5, 6, u'2.5', '7']
+ >>> if sys.version[0] == '3': # Python 3
+ ... try:
+ ... sorted(a)
+ ... except TypeError as e:
+ ... print(e)
+ ... else: # Python 2
+ ... # This will get the doctest to work properly while illustrating the point
+ ... if sorted(a) == [4.5, 6, u'2.5', '7']:
+ ... print('unorderable types: str() < float()')
+ ...
+ unorderable types: str() < float()
>>> natsorted(a)
- [u'2.5', 4.5, 6, '7']
+ [{u}'2.5', 4.5, 6, {u}'7']
"""
+from __future__ import unicode_literals
+from .py23compat import u_format, py23_basestring, py23_range, py23_str, py23_zip
import re
-# The regex that locates floats
-float_re = re.compile(r'([-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?)')
-# A basic digit splitter
-digit_re = re.compile(r'(\d+)')
-# Integer regex
-int_re = re.compile(r'([-+]?[0-9]+)')
+import sys
+__doc__ = u_format(__doc__) # Make sure the doctest works for either python2 or python3
+# The regex that locates floats
+float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)')
+float_nosign_exp_re = re.compile(r'(\d*\.?\d+(?:[eE][-+]?\d+)?)')
+float_sign_noexp_re = re.compile(r'([-+]?\d*\.?\d+)')
+float_nosign_noexp_re = re.compile(r'(\d*\.?\d+)')
+# Integer regexes
+int_nosign_re = re.compile(r'(\d+)')
+int_sign_re = re.compile(r'([-+]?\d+)')
+# This dict will help select the correct regex and number conversion function.
+regex_and_num_function_chooser = {
+ (float, True, True) : (float_sign_exp_re, float),
+ (float, True, False) : (float_sign_noexp_re, float),
+ (float, False, True) : (float_nosign_exp_re, float),
+ (float, False, False) : (float_nosign_noexp_re, float),
+ (int, True, True) : (int_sign_re, int),
+ (int, True, False) : (int_sign_re, int),
+ (int, False, True) : (int_nosign_re, int),
+ (int, False, False) : (int_nosign_re, int),
+ (None, True, True) : (int_nosign_re, int),
+ (None, True, False) : (int_nosign_re, int),
+ (None, False, True) : (int_nosign_re, int),
+ (None, False, False) : (int_nosign_re, int),
+}
+
+@u_format
def remove_empty(s):
"""\
Remove empty strings from a list.
- >>> a = ['a', 2, '', 'b']
+ >>> a = ['a', 2, '', 'b', '']
>>> remove_empty(a)
- ['a', 2, 'b']
+ [{u}'a', 2, {u}'b']
"""
while True:
@@ -86,7 +125,7 @@ def _number_finder(s, regex, numconv):
# Now convert the numbers to numbers, and leave strings as strings
s = remove_empty(s)
- for i in xrange(len(s)):
+ for i in py23_range(len(s)):
try:
s[i] = numconv(s[i])
except ValueError:
@@ -95,122 +134,99 @@ def _number_finder(s, regex, numconv):
return s
-def find_floats(s):
- """\
- Locate all the floats in a string, and return a tuple of
- strings and floats.
-
- >>> find_floats('name3.5')
- ['name', 3.5]
- >>> find_floats('a5.034e1')
- ['a', 50.34]
- >>> find_floats('b-40.2')
- ['b', -40.2]
-
- """
- return _number_finder(s, float_re, float)
-
-
-def find_ints(s):
- """\
- Locate all the ints in a string, and return a tuple of
- strings and ints.
-
- >>> find_ints('name3.5')
- ['name', 3, '.', 5]
- >>> find_ints('a5.034e1')
- ['a', 5, '.', 34, 'e', 1]
- >>> find_ints('b-40.2')
- ['b', -40, '.', 2]
-
- """
- return _number_finder(s, int_re, int)
-
-
-def find_digits(s):
+@u_format
+def natsort_key(s, number_type=float, signed=True, exp=True):
"""\
- Locate all the digits in a string, and return a tuple of
- strings and ints.
-
- >>> find_digits('name3.5')
- ['name', 3, '.', 5]
- >>> find_digits('a5.034e1')
- ['a', 5, '.', 34, 'e', 1]
- >>> find_digits('b-40.2')
- ['b-', 40, '.', 2]
-
- """
- return _number_finder(s, digit_re, int)
-
-
-def natsort_key(s, number_type=float):
- """\
- Key to sort strings and numbers naturally, not by ASCII.
+ Key to sort strings and numbers naturally, not lexicographically.
It also has basic support for version numbers.
For use in passing to the :py:func:`sorted` builtin or
:py:meth:`sort` attribute of lists.
+ Use natsort_key just like any other sorting key.
+
>>> a = ['num3', 'num5', 'num2']
>>> a.sort(key=natsort_key)
>>> a
- ['num2', 'num3', 'num5']
- >>> class Foo:
- ... def __init__(self, bar):
- ... self.bar = bar
- ... def __repr__(self):
- ... return "Foo('{0}')".format(self.bar)
- >>> b = [Foo('num3'), Foo('num5'), Foo('num2')]
- >>> b.sort(key=lambda x: natsort_key(x.bar))
- >>> b
- [Foo('num2'), Foo('num3'), Foo('num5')]
- >>> from operator import attrgetter
- >>> c = [Foo('num3'), Foo('num5'), Foo('num2')]
- >>> f = attrgetter('bar')
- >>> c.sort(key=lambda x: natsort_key(f(x)))
- >>> c
- [Foo('num2'), Foo('num3'), Foo('num5')]
+ [{u}'num2', {u}'num3', {u}'num5']
+
+ Below illustrates how the key works, and how the different options affect sorting.
+
+ >>> natsort_key('a-5.034e1')
+ ({u}'a', -50.34)
+ >>> natsort_key('a-5.034e1', number_type=float, signed=True, exp=True)
+ ({u}'a', -50.34)
+ >>> natsort_key('a-5.034e1', number_type=float, signed=True, exp=False)
+ ({u}'a', -5.034, {u}'e', 1.0)
+ >>> natsort_key('a-5.034e1', number_type=float, signed=False, exp=True)
+ ({u}'a-', 50.34)
+ >>> natsort_key('a-5.034e1', number_type=float, signed=False, exp=False)
+ ({u}'a-', 5.034, {u}'e', 1.0)
+ >>> natsort_key('a-5.034e1', number_type=int)
+ ({u}'a', -5, {u}'.', 34, {u}'e', 1)
+ >>> natsort_key('a-5.034e1', number_type=int, signed=True)
+ ({u}'a', -5, {u}'.', 34, {u}'e', 1)
+ >>> natsort_key('a-5.034e1', number_type=int, signed=False)
+ ({u}'a-', 5, {u}'.', 34, {u}'e', 1)
+ >>> natsort_key('a-5.034e1', number_type=int, exp=False)
+ ({u}'a', -5, {u}'.', 34, {u}'e', 1)
+ >>> natsort_key('a-5.034e1', number_type=None)
+ ({u}'a-', 5, {u}'.', 34, {u}'e', 1)
+
+ This is a demonstration of what number_type=None works.
+
+ >>> natsort_key('a-5.034e1', number_type=None) == natsort_key('a-5.034e1', number_type=None, signed=False)
+ True
+ >>> natsort_key('a-5.034e1', number_type=None) == natsort_key('a-5.034e1', number_type=None, exp=False)
+ True
+ >>> natsort_key('a-5.034e1', number_type=None) == natsort_key('a-5.034e1', number_type=int, signed=False)
+ True
"""
# If we are dealing with non-strings, return now
- if not isinstance(s, basestring):
+ if not isinstance(s, py23_basestring):
return (s,)
# Convert to the proper tuple and return
- find_method = {float: find_floats, int: find_ints, None: find_digits}
+ inp_options = (number_type, signed, exp)
+ args = (s,) + regex_and_num_function_chooser[inp_options]
try:
- return tuple(find_method[number_type](s))
+ return tuple(_number_finder(*args))
except KeyError:
- raise ValueError("natsort_key: 'search' parameter {0} invalid".format(str(number_type)))
-
-
-def natsorted(seq, key=lambda x: x, number_type=float):
+ # Report errors properly
+ if number_type not in (float, int) or number_type is not None:
+ raise ValueError("natsort_key: 'number_type' "
+ "parameter '{0}'' invalid".format(py23_str(number_type)))
+ elif signed not in (True, False):
+ raise ValueError("natsort_key: 'signed' "
+ "parameter '{0}'' invalid".format(py23_str(signed)))
+ elif exp not in (True, False):
+ raise ValueError("natsort_key: 'exp' "
+ "parameter '{0}'' invalid".format(py23_str(exp)))
+
+
+@u_format
+def natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=True):
"""\
Sorts a sequence naturally (alphabetically and numerically),
- not by ASCII.
+ not lexicographically.
>>> a = ['num3', 'num5', 'num2']
>>> natsorted(a)
- ['num2', 'num3', 'num5']
- >>> class Foo:
- ... def __init__(self, bar):
- ... self.bar = bar
- ... def __repr__(self):
- ... return "Foo('{0}')".format(self.bar)
- >>> b = [Foo('num3'), Foo('num5'), Foo('num2')]
- >>> from operator import attrgetter
- >>> natsorted(b, key=attrgetter('bar'))
- [Foo('num2'), Foo('num3'), Foo('num5')]
-
- :argument seq:
- The sequence to be sorted.
- :type seq: sequence-like
- :rtype: list
+ [{u}'num2', {u}'num3', {u}'num5']
+ >>> b = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')]
+ >>> from operator import itemgetter
+ >>> natsorted(b, key=itemgetter(1))
+ [({u}'c', {u}'num2'), ({u}'a', {u}'num3'), ({u}'b', {u}'num5')]
+
"""
- return sorted(seq, key=lambda x: natsort_key(key(x), number_type=number_type))
+ return sorted(seq, key=lambda x: natsort_key(key(x),
+ number_type=number_type,
+ signed=signed, exp=exp))
-def index_natsorted(seq, key=lambda x: x, number_type=float):
+@u_format
+def index_natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=True):
"""\
Sorts a sequence naturally, but returns a list of sorted the
indeces and not the sorted list.
@@ -222,29 +238,22 @@ def index_natsorted(seq, key=lambda x: x, number_type=float):
[2, 0, 1]
>>> # Sort both lists by the sort order of a
>>> [a[i] for i in index]
- ['num2', 'num3', 'num5']
+ [{u}'num2', {u}'num3', {u}'num5']
>>> [b[i] for i in index]
- ['baz', 'foo', 'bar']
- >>> class Foo:
- ... def __init__(self, bar):
- ... self.bar = bar
- ... def __repr__(self):
- ... return "Foo('{0}')".format(self.bar)
- >>> c = [Foo('num3'), Foo('num5'), Foo('num2')]
- >>> from operator import attrgetter
- >>> index_natsorted(c, key=attrgetter('bar'))
+ [{u}'baz', {u}'foo', {u}'bar']
+ >>> c = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')]
+ >>> from operator import itemgetter
+ >>> index_natsorted(c, key=itemgetter(1))
[2, 0, 1]
- :argument seq:
- The sequence that you want the sorted index of.
- :type seq: sequence-like
- :rtype: list
"""
from operator import itemgetter
item1 = itemgetter(1)
# Pair the index and sequence together, then sort by
- index_seq_pair = [[x, key(y)] for x, y in zip(xrange(len(seq)), seq)]
- index_seq_pair.sort(key=lambda x: natsort_key(item1(x), number_type=number_type))
+ index_seq_pair = [[x, key(y)] for x, y in py23_zip(py23_range(len(seq)), seq)]
+ index_seq_pair.sort(key=lambda x: natsort_key(item1(x),
+ number_type=number_type,
+ signed=signed, exp=exp))
return [x[0] for x in index_seq_pair]
@@ -252,6 +261,7 @@ def test():
from doctest import DocTestSuite
return DocTestSuite()
+
# Test this module
if __name__ == '__main__':
import doctest
diff --git a/natsort/py23compat.py b/natsort/py23compat.py
new file mode 100644
index 0000000..f5af384
--- /dev/null
+++ b/natsort/py23compat.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import functools
+import sys
+
+# These functions are used to make the doctests compatible between
+# python2 and python3. This code is pretty much lifted from the iPython
+# project's py3compat.py file. Credit to the iPython devs.
+
+# Assume all strings are Unicode in Python 2
+py23_str = str if sys.version[0] == '3' else unicode
+
+# Use the range iterator always
+py23_range = range if sys.version[0] == '3' else xrange
+
+# Uniform base string type
+py23_basestring = str if sys.version[0] == '3' else basestring
+
+# zip as an iterator
+if sys.version[0] == '3':
+ py23_zip = zip
+else:
+ import itertools
+ py23_zip = itertools.izip
+
+
+# This function is intended to decorate other functions that will modify
+# either a string directly, or a function's docstring.
+def _modify_str_or_docstring(str_change_func):
+ @functools.wraps(str_change_func)
+ def wrapper(func_or_str):
+ if isinstance(func_or_str, py23_basestring):
+ func = None
+ doc = func_or_str
+ else:
+ func = func_or_str
+ doc = func.__doc__
+
+ doc = str_change_func(doc)
+
+ if func:
+ func.__doc__ = doc
+ return func
+ return doc
+ return wrapper
+
+
+# Properly modify a doctstring to either have the unicode literal or not.
+if sys.version[0] == '3':
+ # Abstract u'abc' syntax:
+ @_modify_str_or_docstring
+ def u_format(s):
+ """"{u}'abc'" --> "'abc'" (Python 3)
+
+ Accepts a string or a function, so it can be used as a decorator."""
+ return s.format(u='')
+else:
+ # Abstract u'abc' syntax:
+ @_modify_str_or_docstring
+ def u_format(s):
+ """"{u}'abc'" --> "u'abc'" (Python 2)
+
+ Accepts a string or a function, so it can be used as a decorator."""
+ return s.format(u='u')
+
diff --git a/setup.py b/setup.py
index 876829b..1b68410 100644
--- a/setup.py
+++ b/setup.py
@@ -40,7 +40,6 @@ setup(name='natsort',
packages=find_packages(),
entry_points={'console_scripts':['natsort = natsort.__main__:main']},
test_suite='natsort.natsort.test',
- use_2to3=True,
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
classifiers=(