diff options
author | Seth M Morton <seth.m.morton@gmail.com> | 2014-01-20 23:22:44 -0800 |
---|---|---|
committer | Seth M Morton <seth.m.morton@gmail.com> | 2014-01-20 23:22:44 -0800 |
commit | 626ceed146a658c7b588b922cbca8fc266555274 (patch) | |
tree | 9d287aef72e98b31f0acd24095f02a11412cc530 | |
parent | 7a0326722a45953ec886031367384874cfabcea4 (diff) | |
parent | 8dd3579c3ed480a397cc9c39dfcf368302a30d36 (diff) | |
download | natsort-626ceed146a658c7b588b922cbca8fc266555274.tar.gz |
Merge branch 'release/3.1.0'3.1.0
-rw-r--r-- | LICENSE | 2 | ||||
-rw-r--r-- | MANIFEST.in | 1 | ||||
-rw-r--r-- | README.rst | 161 | ||||
-rw-r--r-- | natsort/__init__.py | 2 | ||||
-rw-r--r-- | natsort/__main__.py | 303 | ||||
-rw-r--r-- | natsort/_version.py | 4 | ||||
-rw-r--r-- | natsort/natsort.py | 300 | ||||
-rw-r--r-- | natsort/py23compat.py | 65 | ||||
-rw-r--r-- | setup.py | 1 |
9 files changed, 565 insertions, 274 deletions
@@ -1,4 +1,4 @@ -Copyright (c) 2012 Seth M. Morton +Copyright (c) 2012-2014 Seth M. Morton Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/MANIFEST.in b/MANIFEST.in index 63f2e17..06ccbf8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,4 +4,5 @@ include natsort/natsort.py include natsort/_version.py include natsort/__main__.py include natsort/__init__.py +include natsort/py23compat.py include setup.py @@ -2,36 +2,36 @@ natsort ======= Natural sorting for python. ``natsort`` requires python version 2.6 or greater -(this includes python 3.x). To run version 2.6, the argparse module is -required. +(this includes python 3.x). To run version 2.6, 3.1, or 3.2 the +`argparse <https://pypi.python.org/pypi/argparse>`_ module is required. -``natsort`` comes with a shell script that is desecribed below. You can +``natsort`` comes with a shell script that is described below. You can also execute ``natsort`` from the command line with ``python -m natsort``. There exists another natural sorting package for python called -`naturalsort <https://pypi.python.org/pypi/naturalsort>`_. This package -does not take into account floats and negatives (which is the default behavior -of ``natsort``) and so may be preferred if you wish to only sort version numbers. +`naturalsort <https://pypi.python.org/pypi/naturalsort>`_. You may prefer +this package if you wish to only sort version numbers. Problem Statement ----------------- When you try to sort a list of strings that contain numbers, the normal python -sort algorithm sorts by ASCII, so you might not get the results that you +sort algorithm sorts lexicographically, so you might not get the results that you expect:: - >>> a = ['a2', 'a8', 'a7', 'a5', 'a9', 'a1', 'a4', 'a10', 'a3', 'a6'] + >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] >>> sorted(a) - ['a1', 'a10', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9'] + ['a1', 'a10', 'a2', 'a4', 'a9'] -Notice that it has the order ('1', '10', '2')? This is because the list is -being sorted in ASCII order, which sorts numbers like you would letters (i.e. -'a', 'at', 'b'). It would be better if you had a sorting algorithm that -recognized numbers as numbers and treated them like numbers, not letters. +Notice that it has the order ('1', '10', '2') - this is because the list is +being sorted in lexicographically order, which sorts numbers like you would +letters (i.e. 'a', 'at', 'b'). It would be better if you had a sorting +algorithm that recognized numbers as numbers and treated them like numbers, +not letters. -This is where ``natsort`` comes it: it provides a key that helps sorts lists +This is where ``natsort`` comes in: it provides a key that helps sort lists "naturally". It provides support for ints and floats (including negatives and -exponental notation) or you can turn this off to support sort version numbers. +exponential notation) that you can turn off to support sorting version numbers. Synopsis -------- @@ -39,22 +39,23 @@ Synopsis Using ``natsort`` is simple:: >>> from natsort import natsorted - >>> a = ['a2', 'a8', 'a7', 'a5', 'a9', 'a1', 'a4', 'a10', 'a3', 'a6'] + >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] >>> natsorted(a) - ['a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9', 'a10'] + ['a1', 'a2', 'a4', 'a9', 'a10'] -``natsort`` identifies the numbers and sorts them separately from letters. +``natsort`` identifies the numbers and sorts them separately from strings. -You can also mix and match ``int``, ``float``, ``str``, and ``unicode`` types +You can also mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types when you sort:: - >>> a = ['4.5', 6, 2.3, u'5'] - >>> sorted(a) - [2.3, 6, '4.5', u'5'] + >>> a = ['4.5', 6, 2.3, '5'] >>> natsorted(a) - [2.3, '4.5', u'5', 6] + [2.3, '4.5', '5', 6] + >>> # On Python 2, sorted(a) would return [2.3, 6, '4.5', '5'] + >>> # On Python 3, sorted(a) would raise an "unorderable types" TypeError + -The sorting algorithms +The Sorting Algorithms '''''''''''''''''''''' Sometimes you want to sort by floats, sometimes by ints, and sometimes simply @@ -76,9 +77,6 @@ signs and decimal points when determining a number:: >>> natsorted(a) # Float is the default behavior ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] -To achieve this, selecting this number type causes ``natsort`` to parse -the string 'b-40.2' into ['b', -40.2]. - Sort by ints ++++++++++++ @@ -95,16 +93,14 @@ to sort by ints, not floats:: >>> natsorted(a, number_type=int) ['ver1.9.9a', 'ver1.9.9b', 'ver1.10.1', 'ver1.11', 'ver1.11.4'] -To achieve this, selecting this number type causes ``natsort`` to parse -the string 'b-40.2' into ['b', -40, '.', 2]. - -Sort by digits -++++++++++++++ +Sort by digits (best for version numbers) ++++++++++++++++++++++++++++++++++++++++++ The only difference between sorting by ints and sorting by digits is that sorting by ints may take into account a negative sign, and sorting by digits will not. This may be an issue if you used a '-' as your separator before the -version numbers:: +version numbers. Essentially this is a shortcut for a number type of ``int`` +and the ``signed`` option of ``False``:: >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1'] >>> natsorted(a, number_type=int) @@ -112,13 +108,10 @@ version numbers:: >>> natsorted(a, number_type=None) ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] -To achieve this, selecting this number type causes ``natsort`` to parse -the string 'b-40.2' into ['b-', 40, '.', 2]. - Using a sorting key ''''''''''''''''''' -Like the builtin ``sorted`` function, ``natsorted`` can accept a key so that +Like the built-in ``sorted`` function, ``natsorted`` can accept a key so that you can sort based on a particular item of a list or by an attribute of a class:: >>> from operator import attrgetter, itemgetter @@ -143,7 +136,7 @@ The ``natsort`` package provides three functions: ``natsort_key``, natsorted ''''''''' -``natsort.natsorted`` (*sequence*, *key* = ``lambda x: x``, *number_type* = ``float``) +``natsort.natsorted`` (*sequence*, *key* = ``lambda x: x``, *number_type* = ``float``, *signed* = ``True``, *exp* = ``True``) sequence (*iterable*) The sequence to sort. @@ -152,9 +145,21 @@ natsorted A key used to determine how to sort each element of the sequence. number_type (``None``, ``float``, ``int``) - The types of number to sort on: ``float`` searches for floating point numbers, + The types of number to sort by: ``float`` searches for floating point numbers, ``int`` searches for integers, and ``None`` searches for digits (like integers - but does not take into account negative sign). + but does not take into account negative sign). ``None`` is a shortcut for + ``number_type = int`` and ``signed = False``. + + signed (``True``, ``False``) + By default a '+' or '-' before a number is taken to be the sign of the number. + If ``signed`` is ``False``, any '+' or '-' will not be considered to be part + of the number, but as part of the string. + + exp (``True``, ``False``) + This option only applies to ``number_type = float``. If ``exp = True``, a string + like ``"3.5e5"`` will be interpreted as ``350000``, i.e. the exponential part + is considered to be part of the number. If ``exp = False``, ``"3.5e5"`` is + interpreted as ``(3.5, "e", 5)``. The default behavior is ``exp = True``. returns The sorted sequence. @@ -169,7 +174,7 @@ Use ``natsorted`` just like the builtin ``sorted``:: natsort_key ''''''''''' -``natsort.natsort_key`` (value, *number_type* = ``float``) +``natsort.natsort_key`` (value, *number_type* = ``float``, *signed* = ``True``, *exp* = ``True``) value The value used by the sorting algorithm @@ -177,7 +182,19 @@ natsort_key number_type (``None``, ``float``, ``int``) The types of number to sort on: ``float`` searches for floating point numbers, ``int`` searches for integers, and ``None`` searches for digits (like integers - but does not take into account negative sign). + but does not take into account negative sign). ``None`` is a shortcut for + ``number_type = int`` and ``signed = False``. + + signed (``True``, ``False``) + By default a '+' or '-' before a number is taken to be the sign of the number. + If ``signed`` is ``False``, any '+' or '-' will not be considered to be part + of the number, but as part part of the string. + + exp (``True``, ``False``) + This option only applies to ``number_type = float``. If ``exp = True``, a string + like ``"3.5e5"`` will be interpreted as ``350000``, i.e. the exponential part + is considered to be part of the number. If ``exp = False``, ``"3.5e5"`` is + interpreted as ``(3.5, "e", 5)``. The default behavior is ``exp = True``. returns The modified value with numbers extracted. @@ -204,7 +221,7 @@ attribute or item of each element of the sequence, the easiest way is to make a index_natsorted ''''''''''''''' -``natsort.index_natsorted`` (*sequence*, *key* = ``lambda x: x``, *number_type* = ``float``) +``natsort.index_natsorted`` (*sequence*, *key* = ``lambda x: x``, *number_type* = ``float``, *signed* = ``True``, *exp* = ``True``) sequence (*iterable*) The sequence to sort. @@ -215,12 +232,24 @@ index_natsorted number_type (``None``, ``float``, ``int``) The types of number to sort on: ``float`` searches for floating point numbers, ``int`` searches for integers, and ``None`` searches for digits (like integers - but does not take into account negative sign). + but does not take into account negative sign). ``None`` is a shortcut for + ``number_type = int`` and ``signed = False``. + + signed (``True``, ``False``) + By default a '+' or '-' before a number is taken to be the sign of the number. + If ``signed`` is ``False``, any '+' or '-' will not be considered to be part + of the number, but as part part of the string. + + exp (``True``, ``False``) + This option only applies to ``number_type = float``. If ``exp = True``, a string + like ``"3.5e5"`` will be interpreted as ``350000``, i.e. the exponential part + is considered to be part of the number. If ``exp = False``, ``"3.5e5"`` is + interpreted as ``(3.5, "e", 5)``. The default behavior is ``exp = True``. returns The ordered indexes of the sequence. -Use ``index_natsorted`` if you want to sort multiple lists by the sorting of +Use ``index_natsorted`` if you want to sort multiple lists by the sort order of one list:: >>> from natsort import index_natsorted @@ -247,9 +276,9 @@ large sets of output files named after the parameter used:: mode1000.35.out mode1243.34.out mode744.43.out mode943.54.out (Obviously, in reality there would be more files, but you get the idea.) Notice -that the shell sorts in ASCII order. This is the behavior of programs like -``find`` as well as ``ls``. The problem is, when passing these files to an -analysis program causes them not to appear in numerical order, which can lead +that the shell sorts in lexicographical order. This is the behavior of programs like +``find`` as well as ``ls``. The problem is in passing these files to an +analysis program that causes them not to appear in numerical order, which can lead to bad analysis. To remedy this, use ``natsort``:: # This won't get you what you want @@ -275,11 +304,19 @@ If needed, you can exclude specific numbers:: mode943.54.out mode1243.34.out -For other options, use ``natsort --help``. +For other options, use ``natsort --help``. In general, the other options mirror +the ``natsorted`` API. + +It is also helpful to note that ``natsort`` accepts pipes. -It is also helpful to note that ``natsort`` accepts pipes, and also will sort -each directory in a PATH independently of each other. Files in the current -directory are listed before files in subdirectories. +Note to users of the ``natsort`` shell script from < v. 3.1.0 +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The ``natsort`` shell script options and implementation for version 3.1.0 has +changed slightly. Options relating to interpreting input as file or directory +paths have been removed, and internally the input is no longer treated as file +paths. In most situations, this should not give different results, but in +some unique cases it may. Feel free to contact me if this ruins your work flow. Author ------ @@ -289,6 +326,26 @@ Seth M. Morton History ------- +01-20-2014 v. 3.1.0 +''''''''''''''''''' + + - Added the ``signed`` and ``exp`` options to allow finer tuning of the sorting + - Entire codebase now works for both Python 2 and Python 3 without needing to run + ``2to3``. + - Updated all doctests. + - Further simplified the ``natsort`` base code by removing unneeded functions. + - Simplified documentation where possible. + - Improved the shell script code + + - Made the documentation less "path"-centric to make it clear it is not just + for sorting file paths. + - Removed the filesystem-based options because these can be achieved better + though a pipeline. + - Added doctests. + - Added new options that correspond to ``signed`` and ``exp``. + - The user can now specify multiple numbers to exclude or multiple ranges + to filter by. + 10-01-2013 v. 3.0.2 ''''''''''''''''''' diff --git a/natsort/__init__.py b/natsort/__init__.py index 9add2a8..47e2ceb 100644 --- a/natsort/__init__.py +++ b/natsort/__init__.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals from .natsort import natsort_key, natsorted, index_natsorted from ._version import __version__ diff --git a/natsort/__main__.py b/natsort/__main__.py index 7547047..e3242c6 100644 --- a/natsort/__main__.py +++ b/natsort/__main__.py @@ -1,15 +1,70 @@ -from __future__ import print_function, division +# -*- coding: utf-8 -*- +from __future__ import print_function, division, unicode_literals import sys import os import re -from natsort import natsort_key, natsorted -from _version import __version__ +from .natsort import natsort_key, natsorted, int_nosign_re, int_sign_re +from .natsort import float_sign_exp_re, float_nosign_exp_re +from .natsort import float_sign_noexp_re, float_nosign_noexp_re +from .natsort import regex_and_num_function_chooser +from ._version import __version__ +from .py23compat import py23_str + def main(): """\ - Performs a natural sort on pathnames given on the command-line. + Performs a natural sort on entries given on the command-line. A natural sort sorts numerically then alphabetically, and will sort - by numbers in the middle of a pathname. + by numbers in the middle of an entry. + + >>> import sys + >>> sys.argv[1:] = ['num-2', 'num-6', 'num-1'] + >>> main() + num-6 + num-2 + num-1 + >>> sys.argv[1:] = ['-r', 'num-2', 'num-6', 'num-1'] + >>> main() + num-1 + num-2 + num-6 + >>> sys.argv[1:] = ['--nosign', 'num-2', 'num-6', 'num-1'] + >>> main() + num-1 + num-2 + num-6 + >>> sys.argv[1:] = ['-t', 'digit', 'num-2', 'num-6', 'num-1'] + >>> main() + num-1 + num-2 + num-6 + >>> sys.argv[1:] = ['-t', 'int', '-e', '-1', '-e', '6', + ... 'num-2', 'num-6', 'num-1'] + >>> main() + num-6 + num-2 + >>> sys.argv[1:] = ['-t', 'digit', '-e', '1', '-e', '6', + ... 'num-2', 'num-6', 'num-1'] + >>> main() + num-2 + >>> sys.argv[1:] = ['a1.0e3', 'a5.3', 'a453.6'] + >>> main() + a5.3 + a453.6 + a1.0e3 + >>> sys.argv[1:] = ['-f', '1', '10', 'a1.0e3', 'a5.3', 'a453.6'] + >>> main() + a5.3 + >>> sys.argv[1:] = ['-f', '1', '10', '-f', '400', '500', 'a1.0e3', 'a5.3', 'a453.6'] + >>> main() + a5.3 + a453.6 + >>> sys.argv[1:] = ['--noexp', 'a1.0e3', 'a5.3', 'a453.6'] + >>> main() + a1.0e3 + a5.3 + a453.6 + """ from argparse import ArgumentParser, RawDescriptionHelpFormatter @@ -18,111 +73,211 @@ def main(): formatter_class=RawDescriptionHelpFormatter) parser.add_argument('--version', action='version', version='%(prog)s {0}'.format(__version__)) - parser.add_argument('-F', '--onlyfiles', help='Only files that ' - 'are readable and non-empty are read in. ' - 'This will exculude folders from being read in.', - action='store_true', default=False) parser.add_argument('-f', '--filter', help='Used for ' - 'filtering out only the files that have a number ' + 'keeping only the entries that have a number ' 'falling in the given range.', nargs=2, type=float, - metavar=('LOW', 'HIGH')) - parser.add_argument('-e', '--exclude', help='Used to exclude a specific ' - 'number.') + metavar=('LOW', 'HIGH'), action='append') + parser.add_argument('-e', '--exclude', type=float, action='append', + help='Used to exclude an entry ' + 'that contains a specific number.') parser.add_argument('-r', '--reverse', help='Returns in reversed order.', action='store_true', default=False) - parser.add_argument('-R', '--recursive', help='Recursively decend the ' - 'directory tree.', action='store_true', default=False) parser.add_argument('-t', '--number_type', choices=('digit', 'int', 'float'), default='float', help='Choose the type of number ' - 'to search for.') - parser.add_argument('paths', help='The paths to sort.', nargs='*', + 'to search for. "float" will search for floating-point ' + 'numbers. "int" will only search for integers. ' + '"digit" is a shortcut for "int" with --nosign.') + parser.add_argument('--nosign', default=True, action='store_false', + dest='signed', help='Do not consider "+" or "-" as part ' + 'of a number, i.e. do not take sign into consideration.') + parser.add_argument('--noexp', default=True, action='store_false', + dest='exp', help='Do not consider an exponential as part ' + 'of a number, i.e. 1e4, would be considered as 1, "e", ' + 'and 4, not as 10000. This only effects the ' + '--number_type=float.') + parser.add_argument('entries', help='The entries to sort. Taken from stdin ' + 'if nothing is given on the command line.', nargs='*', default=sys.stdin) args = parser.parse_args() # Make sure the filter range is given properly. Does nothing if no filter - filterdata = check_filter(args.filter) + args.filter = check_filter(args.filter) - # Recursively collect paths, if necessary. - if args.recursive: - jn = os.path.join - paths = [jn(p, fn) for p, d, f in os.walk(os.curdir) for fn in f] - # Collect paths either from a pipe or the command-line arguments. - else: - paths = [f.strip() for f in args.paths] - - # Split into directory path and filenames - paths = split_paths(paths, args.onlyfiles) + # Remove trailing whitespace from all the entries + entries = [e.strip() for e in args.entries] # Sort by directory then by file within directory and print. - sort_and_print_paths(paths, filterdata, args.exclude, args.reverse, args.number_type) + sort_and_print_entries(entries, args) def range_check(low, high): """\ Verifies that that given range has a low lower than the high. + + >>> range_check(10, 11) + (10.0, 11.0) + >>> range_check(6.4, 30) + (6.4, 30.0) + >>> try: + ... range_check(7, 2) + ... except ValueError as e: + ... print(e) + low >= high + """ low, high = float(low), float(high) if low >= high: - raise ValueError ('low >= high') + raise ValueError('low >= high') else: return low, high + def check_filter(filt): - """Check that the low value of the filter is lower than the high.""" + """\ + Check that the low value of the filter is lower than the high. + If there is to be no filter, return 'None'. + + >>> check_filter(()) + >>> check_filter(False) + >>> check_filter(None) + >>> check_filter([(6, 7)]) + [(6.0, 7.0)] + >>> check_filter([(6, 7), (2, 8)]) + [(6.0, 7.0), (2.0, 8.0)] + >>> try: + ... check_filter([(7, 2)]) + ... except ValueError as e: + ... print(e) + Error in --filter: low >= high + + """ # Quick return if no filter. if not filt: return None try: - low, high = range_check(filt[0], filt[1]) + return [range_check(f[0], f[1]) for f in filt] except ValueError as a: - raise ValueError ('Error in --filter: '+str(a)) - return low, high, re.compile(r'[+-]?\d+\.?\d*') + raise ValueError('Error in --filter: '+py23_str(a)) + + +def keep_entry_range(entry, lows, highs, converter, regex): + """\ + Boolean function to determine if an entry should be kept out + based on if any numbers are in a given range. + + >>> import re + >>> regex = re.compile(r'\d+') + >>> keep_entry_range('a56b23c89', [0], [100], int, regex) + True + >>> keep_entry_range('a56b23c89', [1, 88], [20, 90], int, regex) + True + >>> keep_entry_range('a56b23c89', [1], [20], int, regex) + False -def split_paths(paths, a): - """For each file, separate into directory and filename. Store all files - in a dir into a dict where the dir is the key and filename is the value. """ - dirs = {} - for path in paths: - if a: - try: - with open(path) as fl: - pass - except IOError: - continue - dir, file = os.path.split(path) - try: - dirs[dir].append(file) - except KeyError: - dirs[dir] = [] - dirs[dir].append(file) - return dirs - -def sort_and_print_paths(dirs, filterdata, exclude, reverse, number_type): - """Sort the paths by directoy then by file within that directory. - Print off the results. + return any(low <= converter(num) <= high + for num in regex.findall(entry) + for low, high in zip(lows, highs)) + + +def exclude_entry(entry, values, converter, regex): + """\ + Boolean function to determine if an entry should be kept out + based on if it contains a specific number. + + >>> import re + >>> regex = re.compile(r'\d+') + >>> exclude_entry('a56b23c89', [100], int, regex) + True + >>> exclude_entry('a56b23c89', [23], int, regex) + False + """ - number_type = {'digit': None, 'int': int, 'float': float}[number_type] - for dir in natsorted(dirs.keys(), number_type=number_type): - dirs[dir].sort(key=lambda x: natsort_key(x, number_type=number_type)) - if reverse: - dirs[dir] = reversed(dirs[dir]) - for file in dirs[dir]: - if filterdata is not None: - # Find all the numbers in the filename. - nums = filterdata[2].findall(file) - # If any numbers are between the range, print. - # Otherwise, move to next file. - for num in nums: - if filterdata[0] <= float(num) <= filterdata[1]: break - else: - continue - if exclude and exclude in file: continue - print(os.path.join(dir, file)) + return not any(converter(num) in values for num in regex.findall(entry)) + + +def sort_and_print_entries(entries, args): + """\ + Sort the entries, applying the filters first if necessary. + + >>> class Args: + ... def __init__(self, filter, exclude, reverse): + ... self.filter = filter + ... self.exclude = exclude + ... self.reverse = reverse + ... self.number_type = 'float' + ... self.signed = True + ... self.exp = True + >>> entries = ['tmp/a57/path2', + ... 'tmp/a23/path1', + ... 'tmp/a1/path1', + ... 'tmp/a130/path1', + ... 'tmp/a64/path1', + ... 'tmp/a64/path2'] + >>> sort_and_print_entries(entries, Args(None, False, False)) + tmp/a1/path1 + tmp/a23/path1 + tmp/a57/path2 + tmp/a64/path1 + tmp/a64/path2 + tmp/a130/path1 + >>> sort_and_print_entries(entries, Args([(20, 100)], False, False)) + tmp/a23/path1 + tmp/a57/path2 + tmp/a64/path1 + tmp/a64/path2 + >>> sort_and_print_entries(entries, Args(None, [23, 130], False)) + tmp/a1/path1 + tmp/a57/path2 + tmp/a64/path1 + tmp/a64/path2 + >>> sort_and_print_entries(entries, Args(None, [2], False)) + tmp/a1/path1 + tmp/a23/path1 + tmp/a64/path1 + tmp/a130/path1 + >>> sort_and_print_entries(entries, Args(None, False, True)) + tmp/a130/path1 + tmp/a64/path2 + tmp/a64/path1 + tmp/a57/path2 + tmp/a23/path1 + tmp/a1/path1 + + """ + + # Extract the proper number type. + kwargs = {'number_type': {'digit': None, 'int': int, 'float': float}[args.number_type], + 'signed': args.signed, + 'exp': args.exp} + + # Pre-remove entries that don't pass the filtering criteria + # Make sure we use the same searching algorithm for filtering as for sorting. + if args.filter is not None or args.exclude: + inp_options = (kwargs['number_type'], args.signed, args.exp) + regex, num_function = regex_and_num_function_chooser[inp_options] + if args.filter is not None: + lows, highs = [f[0] for f in args.filter], [f[1] for f in args.filter] + entries = [entry for entry in entries + if keep_entry_range(entry, lows, highs, num_function, regex)] + if args.exclude: + exclude = set(args.exclude) + entries = [entry for entry in entries + if exclude_entry(entry, exclude, num_function, regex)] + + # Print off the sorted results + entries.sort(key=lambda x: natsort_key(x, **kwargs), reverse=args.reverse) + for entry in entries: + print(entry) + if __name__ == '__main__': try: main() except ValueError as a: - sys.exit(str(a)) + sys.exit(py23_str(a)) except KeyboardInterrupt: sys.exit(1) + # import doctest + # ret = doctest.testmod() + # if ret[0] == 0: + # print('natsort: All {0[1]} tests successful!'.format(ret)) diff --git a/natsort/_version.py b/natsort/_version.py index da4039b..d763f0a 100644 --- a/natsort/_version.py +++ b/natsort/_version.py @@ -1 +1,3 @@ -__version__ = '3.0.2' +# -*- coding: utf-8 -*- +from __future__ import unicode_literals +__version__ = '3.1.0' diff --git a/natsort/natsort.py b/natsort/natsort.py index dfb73b0..ef494c5 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -1,71 +1,110 @@ +# -*- coding: utf-8 -*- """ Here are a collection of examples of how this module can be used. See the README or the natsort homepage for more details. - >>> a = ['a2', 'a8', 'a7', 'a5', 'a9', 'a1', 'a4', 'a10', 'a3', 'a6'] + >>> a = ['a2', 'a5', 'a9', 'a1', 'a4', 'a10', 'a6'] >>> sorted(a) - ['a1', 'a10', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9'] + [{u}'a1', {u}'a10', {u}'a2', {u}'a4', {u}'a5', {u}'a6', {u}'a9'] >>> natsorted(a) - ['a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9', 'a10'] + [{u}'a1', {u}'a2', {u}'a4', {u}'a5', {u}'a6', {u}'a9', {u}'a10'] - >>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300'] - >>> sorted(a) - ['a5.034e1', 'a50', 'a50.300', 'a50.4', 'a51.'] - >>> natsorted(a) - ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] - >>> natsorted(a, number_type=None) - ['a5.034e1', 'a50', 'a50.4', 'a50.300', 'a51.'] +Here is an example demonstrating how different options sort the same list. - >>> a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] + >>> a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] >>> sorted(a) - ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b'] + [{u}'a5.034e1', {u}'a50', {u}'a50.300', {u}'a50.31', {u}'a50.4', {u}'a51.'] >>> natsorted(a) - ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b'] + [{u}'a50', {u}'a50.300', {u}'a50.31', {u}'a5.034e1', {u}'a50.4', {u}'a51.'] + >>> natsorted(a, number_type=float, exp=False) + [{u}'a5.034e1', {u}'a50', {u}'a50.300', {u}'a50.31', {u}'a50.4', {u}'a51.'] + >>> natsorted(a, number_type=int) + [{u}'a5.034e1', {u}'a50', {u}'a50.4', {u}'a50.31', {u}'a50.300', {u}'a51.'] >>> natsorted(a, number_type=None) - ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] + [{u}'a5.034e1', {u}'a50', {u}'a50.4', {u}'a50.31', {u}'a50.300', {u}'a51.'] - >>> a = ['name.1', 'name.101', 'name.01', 'name.200', 'name.21'] +This demonstrates the signed option. It can account for negative and positive signs. +Turning it off treats the '+' or '-' as part of the string. + + >>> a = ['a-5', 'a7', 'a+2'] >>> sorted(a) - ['name.01', 'name.1', 'name.101', 'name.200', 'name.21'] - >>> natsorted(a) - ['name.01', 'name.1', 'name.101', 'name.200', 'name.21'] - >>> natsorted(a, number_type=None) - ['name.1', 'name.01', 'name.21', 'name.101', 'name.200'] + [{u}'a+2', {u}'a-5', {u}'a7'] + >>> natsorted(a) # signed=True is default, -5 comes first on the number line + [{u}'a-5', {u}'a+2', {u}'a7'] + >>> natsorted(a, signed=False) # 'a' comes before 'a+', which is before 'a-' + [{u}'a7', {u}'a+2', {u}'a-5'] - >>> a = ['version-2', 'version-20', 'version-4', 'version-1'] +Sorting version numbers is best with 'number_type=None'. That is a shortcut +for 'number_type=int, signed=False' + + >>> a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] >>> sorted(a) - ['version-1', 'version-2', 'version-20', 'version-4'] + [{u}'1.10.1', {u}'1.11', {u}'1.11.4', {u}'1.9.9a', {u}'1.9.9b'] >>> natsorted(a) - ['version-20', 'version-4', 'version-2', 'version-1'] - >>> natsorted(a, number_type=int) - ['version-20', 'version-4', 'version-2', 'version-1'] + [{u}'1.10.1', {u}'1.11', {u}'1.11.4', {u}'1.9.9a', {u}'1.9.9b'] >>> natsorted(a, number_type=None) - ['version-1', 'version-2', 'version-4', 'version-20'] + [{u}'1.9.9a', {u}'1.9.9b', {u}'1.10.1', {u}'1.11', {u}'1.11.4'] + +You can mix types with natsorted. This can get around the new +'unorderable types' issue with Python 3. + >>> import sys >>> a = [6, 4.5, '7', u'2.5'] - >>> sorted(a) - [4.5, 6, u'2.5', '7'] + >>> if sys.version[0] == '3': # Python 3 + ... try: + ... sorted(a) + ... except TypeError as e: + ... print(e) + ... else: # Python 2 + ... # This will get the doctest to work properly while illustrating the point + ... if sorted(a) == [4.5, 6, u'2.5', '7']: + ... print('unorderable types: str() < float()') + ... + unorderable types: str() < float() >>> natsorted(a) - [u'2.5', 4.5, 6, '7'] + [{u}'2.5', 4.5, 6, {u}'7'] """ +from __future__ import unicode_literals +from .py23compat import u_format, py23_basestring, py23_range, py23_str, py23_zip import re -# The regex that locates floats -float_re = re.compile(r'([-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?)') -# A basic digit splitter -digit_re = re.compile(r'(\d+)') -# Integer regex -int_re = re.compile(r'([-+]?[0-9]+)') +import sys +__doc__ = u_format(__doc__) # Make sure the doctest works for either python2 or python3 +# The regex that locates floats +float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)') +float_nosign_exp_re = re.compile(r'(\d*\.?\d+(?:[eE][-+]?\d+)?)') +float_sign_noexp_re = re.compile(r'([-+]?\d*\.?\d+)') +float_nosign_noexp_re = re.compile(r'(\d*\.?\d+)') +# Integer regexes +int_nosign_re = re.compile(r'(\d+)') +int_sign_re = re.compile(r'([-+]?\d+)') +# This dict will help select the correct regex and number conversion function. +regex_and_num_function_chooser = { + (float, True, True) : (float_sign_exp_re, float), + (float, True, False) : (float_sign_noexp_re, float), + (float, False, True) : (float_nosign_exp_re, float), + (float, False, False) : (float_nosign_noexp_re, float), + (int, True, True) : (int_sign_re, int), + (int, True, False) : (int_sign_re, int), + (int, False, True) : (int_nosign_re, int), + (int, False, False) : (int_nosign_re, int), + (None, True, True) : (int_nosign_re, int), + (None, True, False) : (int_nosign_re, int), + (None, False, True) : (int_nosign_re, int), + (None, False, False) : (int_nosign_re, int), +} + +@u_format def remove_empty(s): """\ Remove empty strings from a list. - >>> a = ['a', 2, '', 'b'] + >>> a = ['a', 2, '', 'b', ''] >>> remove_empty(a) - ['a', 2, 'b'] + [{u}'a', 2, {u}'b'] """ while True: @@ -86,7 +125,7 @@ def _number_finder(s, regex, numconv): # Now convert the numbers to numbers, and leave strings as strings s = remove_empty(s) - for i in xrange(len(s)): + for i in py23_range(len(s)): try: s[i] = numconv(s[i]) except ValueError: @@ -95,122 +134,99 @@ def _number_finder(s, regex, numconv): return s -def find_floats(s): - """\ - Locate all the floats in a string, and return a tuple of - strings and floats. - - >>> find_floats('name3.5') - ['name', 3.5] - >>> find_floats('a5.034e1') - ['a', 50.34] - >>> find_floats('b-40.2') - ['b', -40.2] - - """ - return _number_finder(s, float_re, float) - - -def find_ints(s): - """\ - Locate all the ints in a string, and return a tuple of - strings and ints. - - >>> find_ints('name3.5') - ['name', 3, '.', 5] - >>> find_ints('a5.034e1') - ['a', 5, '.', 34, 'e', 1] - >>> find_ints('b-40.2') - ['b', -40, '.', 2] - - """ - return _number_finder(s, int_re, int) - - -def find_digits(s): +@u_format +def natsort_key(s, number_type=float, signed=True, exp=True): """\ - Locate all the digits in a string, and return a tuple of - strings and ints. - - >>> find_digits('name3.5') - ['name', 3, '.', 5] - >>> find_digits('a5.034e1') - ['a', 5, '.', 34, 'e', 1] - >>> find_digits('b-40.2') - ['b-', 40, '.', 2] - - """ - return _number_finder(s, digit_re, int) - - -def natsort_key(s, number_type=float): - """\ - Key to sort strings and numbers naturally, not by ASCII. + Key to sort strings and numbers naturally, not lexicographically. It also has basic support for version numbers. For use in passing to the :py:func:`sorted` builtin or :py:meth:`sort` attribute of lists. + Use natsort_key just like any other sorting key. + >>> a = ['num3', 'num5', 'num2'] >>> a.sort(key=natsort_key) >>> a - ['num2', 'num3', 'num5'] - >>> class Foo: - ... def __init__(self, bar): - ... self.bar = bar - ... def __repr__(self): - ... return "Foo('{0}')".format(self.bar) - >>> b = [Foo('num3'), Foo('num5'), Foo('num2')] - >>> b.sort(key=lambda x: natsort_key(x.bar)) - >>> b - [Foo('num2'), Foo('num3'), Foo('num5')] - >>> from operator import attrgetter - >>> c = [Foo('num3'), Foo('num5'), Foo('num2')] - >>> f = attrgetter('bar') - >>> c.sort(key=lambda x: natsort_key(f(x))) - >>> c - [Foo('num2'), Foo('num3'), Foo('num5')] + [{u}'num2', {u}'num3', {u}'num5'] + + Below illustrates how the key works, and how the different options affect sorting. + + >>> natsort_key('a-5.034e1') + ({u}'a', -50.34) + >>> natsort_key('a-5.034e1', number_type=float, signed=True, exp=True) + ({u}'a', -50.34) + >>> natsort_key('a-5.034e1', number_type=float, signed=True, exp=False) + ({u}'a', -5.034, {u}'e', 1.0) + >>> natsort_key('a-5.034e1', number_type=float, signed=False, exp=True) + ({u}'a-', 50.34) + >>> natsort_key('a-5.034e1', number_type=float, signed=False, exp=False) + ({u}'a-', 5.034, {u}'e', 1.0) + >>> natsort_key('a-5.034e1', number_type=int) + ({u}'a', -5, {u}'.', 34, {u}'e', 1) + >>> natsort_key('a-5.034e1', number_type=int, signed=True) + ({u}'a', -5, {u}'.', 34, {u}'e', 1) + >>> natsort_key('a-5.034e1', number_type=int, signed=False) + ({u}'a-', 5, {u}'.', 34, {u}'e', 1) + >>> natsort_key('a-5.034e1', number_type=int, exp=False) + ({u}'a', -5, {u}'.', 34, {u}'e', 1) + >>> natsort_key('a-5.034e1', number_type=None) + ({u}'a-', 5, {u}'.', 34, {u}'e', 1) + + This is a demonstration of what number_type=None works. + + >>> natsort_key('a-5.034e1', number_type=None) == natsort_key('a-5.034e1', number_type=None, signed=False) + True + >>> natsort_key('a-5.034e1', number_type=None) == natsort_key('a-5.034e1', number_type=None, exp=False) + True + >>> natsort_key('a-5.034e1', number_type=None) == natsort_key('a-5.034e1', number_type=int, signed=False) + True """ # If we are dealing with non-strings, return now - if not isinstance(s, basestring): + if not isinstance(s, py23_basestring): return (s,) # Convert to the proper tuple and return - find_method = {float: find_floats, int: find_ints, None: find_digits} + inp_options = (number_type, signed, exp) + args = (s,) + regex_and_num_function_chooser[inp_options] try: - return tuple(find_method[number_type](s)) + return tuple(_number_finder(*args)) except KeyError: - raise ValueError("natsort_key: 'search' parameter {0} invalid".format(str(number_type))) - - -def natsorted(seq, key=lambda x: x, number_type=float): + # Report errors properly + if number_type not in (float, int) or number_type is not None: + raise ValueError("natsort_key: 'number_type' " + "parameter '{0}'' invalid".format(py23_str(number_type))) + elif signed not in (True, False): + raise ValueError("natsort_key: 'signed' " + "parameter '{0}'' invalid".format(py23_str(signed))) + elif exp not in (True, False): + raise ValueError("natsort_key: 'exp' " + "parameter '{0}'' invalid".format(py23_str(exp))) + + +@u_format +def natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=True): """\ Sorts a sequence naturally (alphabetically and numerically), - not by ASCII. + not lexicographically. >>> a = ['num3', 'num5', 'num2'] >>> natsorted(a) - ['num2', 'num3', 'num5'] - >>> class Foo: - ... def __init__(self, bar): - ... self.bar = bar - ... def __repr__(self): - ... return "Foo('{0}')".format(self.bar) - >>> b = [Foo('num3'), Foo('num5'), Foo('num2')] - >>> from operator import attrgetter - >>> natsorted(b, key=attrgetter('bar')) - [Foo('num2'), Foo('num3'), Foo('num5')] - - :argument seq: - The sequence to be sorted. - :type seq: sequence-like - :rtype: list + [{u}'num2', {u}'num3', {u}'num5'] + >>> b = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')] + >>> from operator import itemgetter + >>> natsorted(b, key=itemgetter(1)) + [({u}'c', {u}'num2'), ({u}'a', {u}'num3'), ({u}'b', {u}'num5')] + """ - return sorted(seq, key=lambda x: natsort_key(key(x), number_type=number_type)) + return sorted(seq, key=lambda x: natsort_key(key(x), + number_type=number_type, + signed=signed, exp=exp)) -def index_natsorted(seq, key=lambda x: x, number_type=float): +@u_format +def index_natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=True): """\ Sorts a sequence naturally, but returns a list of sorted the indeces and not the sorted list. @@ -222,29 +238,22 @@ def index_natsorted(seq, key=lambda x: x, number_type=float): [2, 0, 1] >>> # Sort both lists by the sort order of a >>> [a[i] for i in index] - ['num2', 'num3', 'num5'] + [{u}'num2', {u}'num3', {u}'num5'] >>> [b[i] for i in index] - ['baz', 'foo', 'bar'] - >>> class Foo: - ... def __init__(self, bar): - ... self.bar = bar - ... def __repr__(self): - ... return "Foo('{0}')".format(self.bar) - >>> c = [Foo('num3'), Foo('num5'), Foo('num2')] - >>> from operator import attrgetter - >>> index_natsorted(c, key=attrgetter('bar')) + [{u}'baz', {u}'foo', {u}'bar'] + >>> c = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')] + >>> from operator import itemgetter + >>> index_natsorted(c, key=itemgetter(1)) [2, 0, 1] - :argument seq: - The sequence that you want the sorted index of. - :type seq: sequence-like - :rtype: list """ from operator import itemgetter item1 = itemgetter(1) # Pair the index and sequence together, then sort by - index_seq_pair = [[x, key(y)] for x, y in zip(xrange(len(seq)), seq)] - index_seq_pair.sort(key=lambda x: natsort_key(item1(x), number_type=number_type)) + index_seq_pair = [[x, key(y)] for x, y in py23_zip(py23_range(len(seq)), seq)] + index_seq_pair.sort(key=lambda x: natsort_key(item1(x), + number_type=number_type, + signed=signed, exp=exp)) return [x[0] for x in index_seq_pair] @@ -252,6 +261,7 @@ def test(): from doctest import DocTestSuite return DocTestSuite() + # Test this module if __name__ == '__main__': import doctest diff --git a/natsort/py23compat.py b/natsort/py23compat.py new file mode 100644 index 0000000..f5af384 --- /dev/null +++ b/natsort/py23compat.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals +import functools +import sys + +# These functions are used to make the doctests compatible between +# python2 and python3. This code is pretty much lifted from the iPython +# project's py3compat.py file. Credit to the iPython devs. + +# Assume all strings are Unicode in Python 2 +py23_str = str if sys.version[0] == '3' else unicode + +# Use the range iterator always +py23_range = range if sys.version[0] == '3' else xrange + +# Uniform base string type +py23_basestring = str if sys.version[0] == '3' else basestring + +# zip as an iterator +if sys.version[0] == '3': + py23_zip = zip +else: + import itertools + py23_zip = itertools.izip + + +# This function is intended to decorate other functions that will modify +# either a string directly, or a function's docstring. +def _modify_str_or_docstring(str_change_func): + @functools.wraps(str_change_func) + def wrapper(func_or_str): + if isinstance(func_or_str, py23_basestring): + func = None + doc = func_or_str + else: + func = func_or_str + doc = func.__doc__ + + doc = str_change_func(doc) + + if func: + func.__doc__ = doc + return func + return doc + return wrapper + + +# Properly modify a doctstring to either have the unicode literal or not. +if sys.version[0] == '3': + # Abstract u'abc' syntax: + @_modify_str_or_docstring + def u_format(s): + """"{u}'abc'" --> "'abc'" (Python 3) + + Accepts a string or a function, so it can be used as a decorator.""" + return s.format(u='') +else: + # Abstract u'abc' syntax: + @_modify_str_or_docstring + def u_format(s): + """"{u}'abc'" --> "u'abc'" (Python 2) + + Accepts a string or a function, so it can be used as a decorator.""" + return s.format(u='u') + @@ -40,7 +40,6 @@ setup(name='natsort', packages=find_packages(), entry_points={'console_scripts':['natsort = natsort.__main__:main']}, test_suite='natsort.natsort.test', - use_2to3=True, description=DESCRIPTION, long_description=LONG_DESCRIPTION, classifiers=( |