diff options
Diffstat (limited to 'Lib/collections.py')
-rw-r--r-- | Lib/collections.py | 449 |
1 files changed, 334 insertions, 115 deletions
diff --git a/Lib/collections.py b/Lib/collections.py index b2a590bd77..eb2024352d 100644 --- a/Lib/collections.py +++ b/Lib/collections.py @@ -13,6 +13,7 @@ import sys as _sys import heapq as _heapq from weakref import proxy as _proxy from itertools import repeat as _repeat, chain as _chain, starmap as _starmap +from reprlib import recursive_repr as _recursive_repr ################################################################################ ### OrderedDict @@ -26,59 +27,57 @@ class OrderedDict(dict): # An inherited dict maps keys to values. # The inherited dict provides __getitem__, __len__, __contains__, and get. # The remaining methods are order-aware. - # Big-O running times for all methods are the same as for regular dictionaries. + # Big-O running times for all methods are the same as regular dictionaries. - # The internal self.__map dictionary maps keys to links in a doubly linked list. + # The internal self.__map dict maps keys to links in a doubly linked list. # The circular doubly linked list starts and ends with a sentinel element. # The sentinel element never gets deleted (this simplifies the algorithm). - # The prev/next links are weakref proxies (to prevent circular references). + # The sentinel is in self.__hardroot with a weakref proxy in self.__root. + # The prev links are weakref proxies (to prevent circular references). # Individual links are kept alive by the hard reference in self.__map. # Those hard references disappear when a key is deleted from an OrderedDict. def __init__(self, *args, **kwds): - '''Initialize an ordered dictionary. Signature is the same as for - regular dictionaries, but keyword arguments are not recommended - because their insertion order is arbitrary. + '''Initialize an ordered dictionary. The signature is the same as + regular dictionaries, but keyword arguments are not recommended because + their insertion order is arbitrary. ''' if len(args) > 1: raise TypeError('expected at most 1 arguments, got %d' % len(args)) - self.__in_repr = False # detects recursive repr try: self.__root except AttributeError: - self.__root = root = _Link() # sentinel node for the doubly linked list + self.__hardroot = _Link() + self.__root = root = _proxy(self.__hardroot) root.prev = root.next = root self.__map = {} self.__update(*args, **kwds) - def clear(self): - 'od.clear() -> None. Remove all items from od.' - root = self.__root - root.prev = root.next = root - self.__map.clear() - dict.clear(self) - - def __setitem__(self, key, value): + def __setitem__(self, key, value, + dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link): 'od.__setitem__(i, y) <==> od[i]=y' - # Setting a new item creates a new link which goes at the end of the linked - # list, and the inherited dictionary is updated with the new key/value pair. + # Setting a new item creates a new link at the end of the linked list, + # and the inherited dictionary is updated with the new key/value pair. if key not in self: - self.__map[key] = link = _Link() + self.__map[key] = link = Link() root = self.__root last = root.prev link.prev, link.next, link.key = last, root, key - last.next = root.prev = _proxy(link) - dict.__setitem__(self, key, value) + last.next = link + root.prev = proxy(link) + dict_setitem(self, key, value) - def __delitem__(self, key): + def __delitem__(self, key, dict_delitem=dict.__delitem__): 'od.__delitem__(y) <==> del od[y]' - # Deleting an existing item uses self.__map to find the link which is - # then removed by updating the links in the predecessor and successor nodes. - dict.__delitem__(self, key) + # Deleting an existing item uses self.__map to find the link which gets + # removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) link = self.__map.pop(key) - link.prev.next = link.next - link.next.prev = link.prev + link_prev = link.prev + link_next = link.next + link_prev.next = link_next + link_next.prev = link_prev def __iter__(self): 'od.__iter__() <==> iter(od)' @@ -98,6 +97,69 @@ class OrderedDict(dict): yield curr.key curr = curr.prev + def clear(self): + 'od.clear() -> None. Remove all items from od.' + root = self.__root + root.prev = root.next = root + self.__map.clear() + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root.prev + link_prev = link.prev + link_prev.next = root + root.prev = link_prev + else: + link = root.next + link_next = link.next + root.next = link_next + link_next.prev = root + key = link.key + del self.__map[key] + value = dict.pop(self, key) + return key, value + + def move_to_end(self, key, last=True): + '''Move an existing element to the end (or beginning if last==False). + + Raises KeyError if the element does not exist. + When last=True, acts like a fast version of self[key]=self.pop(key). + + ''' + link = self.__map[key] + link_prev = link.prev + link_next = link.next + link_prev.next = link_next + link_next.prev = link_prev + root = self.__root + if last: + last = root.prev + link.prev = last + link.next = root + last.next = root.prev = link + else: + first = root.next + link.prev = root + link.next = first + root.next = first.prev = link + + def __sizeof__(self): + sizeof = _sys.getsizeof + n = len(self) + 1 # number of links including root + size = sizeof(self.__dict__) # instance dictionary + size += sizeof(self.__map) * 2 # internal dict and inherited dict + size += sizeof(self.__hardroot) * n # link objects + size += sizeof(self.__root) * n # proxy objects + return size + update = __update = MutableMapping.update keys = MutableMapping.keys values = MutableMapping.values @@ -107,6 +169,11 @@ class OrderedDict(dict): __marker = object() def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding + value. If key is not found, d is returned if given, otherwise KeyError + is raised. + + ''' if key in self: result = self[key] del self[key] @@ -122,29 +189,12 @@ class OrderedDict(dict): self[key] = default return default - def popitem(self, last=True): - '''od.popitem() -> (k, v), return and remove a (key, value) pair. - Pairs are returned in LIFO order if last is true or FIFO order if false. - - ''' - if not self: - raise KeyError('dictionary is empty') - key = next(reversed(self) if last else iter(self)) - value = self.pop(key) - return key, value - + @_recursive_repr() def __repr__(self): 'od.__repr__() <==> repr(od)' - if self.__in_repr: - return '...' if not self: return '%s()' % (self.__class__.__name__,) - self.__in_repr = True - try: - result = '%s(%r)' % (self.__class__.__name__, list(self.items())) - finally: - self.__in_repr = False - return result + return '%s(%r)' % (self.__class__.__name__, list(self.items())) def __reduce__(self): 'Return state information for pickling' @@ -162,14 +212,14 @@ class OrderedDict(dict): @classmethod def fromkeys(cls, iterable, value=None): - '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S - and values equal to v (which defaults to None). + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S. + If not specified, the value defaults to None. ''' - d = cls() + self = cls() for key in iterable: - d[key] = value - return d + self[key] = value + return self def __eq__(self, other): '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive @@ -181,23 +231,69 @@ class OrderedDict(dict): all(p==q for p, q in zip(self.items(), other.items())) return dict.__eq__(self, other) - def __ne__(self, other): - '''od.__ne__(y) <==> od!=y. Comparison to another OD is order-sensitive - while comparison to a regular mapping is order-insensitive. - - ''' - return not self == other - - ################################################################################ ### namedtuple ################################################################################ +_class_template = '''\ +from builtins import property as _property, tuple as _tuple +from operator import itemgetter as _itemgetter +from collections import OrderedDict + +class {typename}(tuple): + '{typename}({arg_list})' + + __slots__ = () + + _fields = {field_names!r} + + def __new__(_cls, {arg_list}): + 'Create new instance of {typename}({arg_list})' + return _tuple.__new__(_cls, ({arg_list})) + + @classmethod + def _make(cls, iterable, new=tuple.__new__, len=len): + 'Make a new {typename} object from a sequence or iterable' + result = new(cls, iterable) + if len(result) != {num_fields:d}: + raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result)) + return result + + def __repr__(self): + 'Return a nicely formatted representation string' + return self.__class__.__name__ + '({repr_fmt})' % self + + def _asdict(self): + 'Return a new OrderedDict which maps field names to their values' + return OrderedDict(zip(self._fields, self)) + + __dict__ = property(_asdict) + + def _replace(_self, **kwds): + 'Return a new {typename} object replacing specified fields with new values' + result = _self._make(map(kwds.pop, {field_names!r}, _self)) + if kwds: + raise ValueError('Got unexpected field names: %r' % list(kwds)) + return result + + def __getnewargs__(self): + 'Return self as a plain tuple. Used by copy and pickle.' + return tuple(self) + +{field_defs} +''' + +_repr_template = '{name}=%r' + +_field_template = '''\ + {name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}') +''' + def namedtuple(typename, field_names, verbose=False, rename=False): """Returns a new subclass of tuple with named fields. - >>> Point = namedtuple('Point', 'x y') + >>> Point = namedtuple('Point', ['x', 'y']) >>> Point.__doc__ # docstring for the new class 'Point(x, y)' >>> p = Point(11, y=22) # instantiate with positional args or keywords @@ -222,76 +318,54 @@ def namedtuple(typename, field_names, verbose=False, rename=False): # generating informative error messages and preventing template injection attacks. if isinstance(field_names, str): field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas - field_names = tuple(map(str, field_names)) + field_names = list(map(str, field_names)) if rename: - names = list(field_names) seen = set() - for i, name in enumerate(names): - if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name) - or not name or name[0].isdigit() or name.startswith('_') + for index, name in enumerate(field_names): + if (not all(c.isalnum() or c=='_' for c in name) + or _iskeyword(name) + or not name + or name[0].isdigit() + or name.startswith('_') or name in seen): - names[i] = '_%d' % i + field_names[index] = '_%d' % index seen.add(name) - field_names = tuple(names) - for name in (typename,) + field_names: + for name in [typename] + field_names: if not all(c.isalnum() or c=='_' for c in name): raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name) if _iskeyword(name): raise ValueError('Type names and field names cannot be a keyword: %r' % name) if name[0].isdigit(): raise ValueError('Type names and field names cannot start with a number: %r' % name) - seen_names = set() + seen = set() for name in field_names: if name.startswith('_') and not rename: raise ValueError('Field names cannot start with an underscore: %r' % name) - if name in seen_names: + if name in seen: raise ValueError('Encountered duplicate field name: %r' % name) - seen_names.add(name) - - # Create and fill-in the class template - numfields = len(field_names) - argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes - reprtxt = ', '.join('%s=%%r' % name for name in field_names) - template = '''class %(typename)s(tuple): - '%(typename)s(%(argtxt)s)' \n - __slots__ = () \n - _fields = %(field_names)r \n - def __new__(_cls, %(argtxt)s): - return _tuple.__new__(_cls, (%(argtxt)s)) \n - @classmethod - def _make(cls, iterable, new=tuple.__new__, len=len): - 'Make a new %(typename)s object from a sequence or iterable' - result = new(cls, iterable) - if len(result) != %(numfields)d: - raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result)) - return result \n - def __repr__(self): - return '%(typename)s(%(reprtxt)s)' %% self \n - def _asdict(self): - 'Return a new OrderedDict which maps field names to their values' - return OrderedDict(zip(self._fields, self)) \n - def _replace(_self, **kwds): - 'Return a new %(typename)s object replacing specified fields with new values' - result = _self._make(map(kwds.pop, %(field_names)r, _self)) - if kwds: - raise ValueError('Got unexpected field names: %%r' %% kwds.keys()) - return result \n - def __getnewargs__(self): - return tuple(self) \n\n''' % locals() - for i, name in enumerate(field_names): - template += ' %s = _property(_itemgetter(%d))\n' % (name, i) - if verbose: - print(template) + seen.add(name) + + # Fill-in the class template + class_definition = _class_template.format( + typename = typename, + field_names = tuple(field_names), + num_fields = len(field_names), + arg_list = repr(tuple(field_names)).replace("'", "")[1:-1], + repr_fmt = ', '.join(_repr_template.format(name=name) for name in field_names), + field_defs = '\n'.join(_field_template.format(index=index, name=name) + for index, name in enumerate(field_names)) + ) # Execute the template string in a temporary namespace and # support tracing utilities by setting a value for frame.f_globals['__name__'] - namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename, - OrderedDict=OrderedDict, _property=property, _tuple=tuple) + namespace = dict(__name__='namedtuple_%s' % typename) try: - exec(template, namespace) + exec(class_definition, namespace) except SyntaxError as e: - raise SyntaxError(e.msg + ':\n' + template) from e + raise SyntaxError(e.msg + ':\n\n' + class_definition) result = namespace[typename] + if verbose: + print(class_definition) # For pickling to work, the __module__ variable needs to be set to the frame # where the named tuple is created. Bypass this step in enviroments where @@ -309,6 +383,17 @@ def namedtuple(typename, field_names, verbose=False, rename=False): ### Counter ######################################################################## +def _count_elements(mapping, iterable): + 'Tally elements from the iterable.' + mapping_get = mapping.get + for elem in iterable: + mapping[elem] = mapping_get(elem, 0) + 1 + +try: # Load C helper function if available + from _collections import _count_elements +except ImportError: + pass + class Counter(dict): '''Dict subclass for counting hashable items. Sometimes called a bag or multiset. Elements are stored as dictionary keys and their counts @@ -452,12 +537,37 @@ class Counter(dict): else: super().update(iterable) # fast path when counter is empty else: - self_get = self.get - for elem in iterable: - self[elem] = 1 + self_get(elem, 0) + _count_elements(self, iterable) if kwds: self.update(kwds) + def subtract(self, iterable=None, **kwds): + '''Like dict.update() but subtracts counts instead of replacing them. + Counts can be reduced below zero. Both the inputs and outputs are + allowed to contain zero and negative counts. + + Source can be an iterable, a dictionary, or another Counter instance. + + >>> c = Counter('which') + >>> c.subtract('witch') # subtract elements from another iterable + >>> c.subtract(Counter('watch')) # subtract elements from another counter + >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch + 0 + >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch + -1 + + ''' + if iterable is not None: + self_get = self.get + if isinstance(iterable, Mapping): + for elem, count in iterable.items(): + self[elem] = self_get(elem, 0) - count + else: + for elem in iterable: + self[elem] = self_get(elem, 0) - 1 + if kwds: + self.subtract(kwds) + def copy(self): 'Return a shallow copy.' return self.__class__(self) @@ -473,8 +583,12 @@ class Counter(dict): def __repr__(self): if not self: return '%s()' % self.__class__.__name__ - items = ', '.join(map('%r: %r'.__mod__, self.most_common())) - return '%s({%s})' % (self.__class__.__name__, items) + try: + items = ', '.join(map('%r: %r'.__mod__, self.most_common())) + return '%s({%s})' % (self.__class__.__name__, items) + except TypeError: + # handle case where values are not orderable + return '{0}({1!r})'.format(self.__class__.__name__, dict(self)) # Multiset-style mathematical operations discussed in: # Knuth TAOCP Volume II section 4.6.3 exercise 19 @@ -561,6 +675,109 @@ class Counter(dict): return result +######################################################################## +### ChainMap (helper for configparser) +######################################################################## + +class _ChainMap(MutableMapping): + ''' A ChainMap groups multiple dicts (or other mappings) together + to create a single, updateable view. + + The underlying mappings are stored in a list. That list is public and can + accessed or updated using the *maps* attribute. There is no other state. + + Lookups search the underlying mappings successively until a key is found. + In contrast, writes, updates, and deletions only operate on the first + mapping. + + ''' + + def __init__(self, *maps): + '''Initialize a ChainMap by setting *maps* to the given mappings. + If no mappings are provided, a single empty dictionary is used. + + ''' + self.maps = list(maps) or [{}] # always at least one map + + def __missing__(self, key): + raise KeyError(key) + + def __getitem__(self, key): + for mapping in self.maps: + try: + return mapping[key] # can't use 'key in mapping' with defaultdict + except KeyError: + pass + return self.__missing__(key) # support subclasses that define __missing__ + + def get(self, key, default=None): + return self[key] if key in self else default + + def __len__(self): + return len(set().union(*self.maps)) # reuses stored hash values if possible + + def __iter__(self): + return iter(set().union(*self.maps)) + + def __contains__(self, key): + return any(key in m for m in self.maps) + + def __bool__(self): + return any(self.maps) + + @_recursive_repr() + def __repr__(self): + return '{0.__class__.__name__}({1})'.format( + self, ', '.join(map(repr, self.maps))) + + @classmethod + def fromkeys(cls, iterable, *args): + 'Create a ChainMap with a single dict created from the iterable.' + return cls(dict.fromkeys(iterable, *args)) + + def copy(self): + 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' + return self.__class__(self.maps[0].copy(), *self.maps[1:]) + + __copy__ = copy + + def new_child(self): # like Django's Context.push() + 'New ChainMap with a new dict followed by all previous maps.' + return self.__class__({}, *self.maps) + + @property + def parents(self): # like Django's Context.pop() + 'New ChainMap from maps[1:].' + return self.__class__(*self.maps[1:]) + + def __setitem__(self, key, value): + self.maps[0][key] = value + + def __delitem__(self, key): + try: + del self.maps[0][key] + except KeyError: + raise KeyError('Key not found in the first mapping: {!r}'.format(key)) + + def popitem(self): + 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.' + try: + return self.maps[0].popitem() + except KeyError: + raise KeyError('No keys found in the first mapping.') + + def pop(self, key, *args): + 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].' + try: + return self.maps[0].pop(key, *args) + except KeyError: + raise KeyError('Key not found in the first mapping: {!r}'.format(key)) + + def clear(self): + 'Clear maps[0], leaving maps[1:] intact.' + self.maps[0].clear() + + ################################################################################ ### UserDict ################################################################################ @@ -802,6 +1019,8 @@ class UserString(Sequence): new = new.data return self.__class__(self.data.replace(old, new, maxsplit)) def rfind(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data return self.data.rfind(sub, start, end) def rindex(self, sub, start=0, end=_sys.maxsize): return self.data.rindex(sub, start, end) |