summaryrefslogtreecommitdiff
path: root/Lib/collections.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/collections.py')
-rw-r--r--Lib/collections.py449
1 files changed, 334 insertions, 115 deletions
diff --git a/Lib/collections.py b/Lib/collections.py
index b2a590bd77..eb2024352d 100644
--- a/Lib/collections.py
+++ b/Lib/collections.py
@@ -13,6 +13,7 @@ import sys as _sys
import heapq as _heapq
from weakref import proxy as _proxy
from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
+from reprlib import recursive_repr as _recursive_repr
################################################################################
### OrderedDict
@@ -26,59 +27,57 @@ class OrderedDict(dict):
# An inherited dict maps keys to values.
# The inherited dict provides __getitem__, __len__, __contains__, and get.
# The remaining methods are order-aware.
- # Big-O running times for all methods are the same as for regular dictionaries.
+ # Big-O running times for all methods are the same as regular dictionaries.
- # The internal self.__map dictionary maps keys to links in a doubly linked list.
+ # The internal self.__map dict maps keys to links in a doubly linked list.
# The circular doubly linked list starts and ends with a sentinel element.
# The sentinel element never gets deleted (this simplifies the algorithm).
- # The prev/next links are weakref proxies (to prevent circular references).
+ # The sentinel is in self.__hardroot with a weakref proxy in self.__root.
+ # The prev links are weakref proxies (to prevent circular references).
# Individual links are kept alive by the hard reference in self.__map.
# Those hard references disappear when a key is deleted from an OrderedDict.
def __init__(self, *args, **kwds):
- '''Initialize an ordered dictionary. Signature is the same as for
- regular dictionaries, but keyword arguments are not recommended
- because their insertion order is arbitrary.
+ '''Initialize an ordered dictionary. The signature is the same as
+ regular dictionaries, but keyword arguments are not recommended because
+ their insertion order is arbitrary.
'''
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
- self.__in_repr = False # detects recursive repr
try:
self.__root
except AttributeError:
- self.__root = root = _Link() # sentinel node for the doubly linked list
+ self.__hardroot = _Link()
+ self.__root = root = _proxy(self.__hardroot)
root.prev = root.next = root
self.__map = {}
self.__update(*args, **kwds)
- def clear(self):
- 'od.clear() -> None. Remove all items from od.'
- root = self.__root
- root.prev = root.next = root
- self.__map.clear()
- dict.clear(self)
-
- def __setitem__(self, key, value):
+ def __setitem__(self, key, value,
+ dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link):
'od.__setitem__(i, y) <==> od[i]=y'
- # Setting a new item creates a new link which goes at the end of the linked
- # list, and the inherited dictionary is updated with the new key/value pair.
+ # Setting a new item creates a new link at the end of the linked list,
+ # and the inherited dictionary is updated with the new key/value pair.
if key not in self:
- self.__map[key] = link = _Link()
+ self.__map[key] = link = Link()
root = self.__root
last = root.prev
link.prev, link.next, link.key = last, root, key
- last.next = root.prev = _proxy(link)
- dict.__setitem__(self, key, value)
+ last.next = link
+ root.prev = proxy(link)
+ dict_setitem(self, key, value)
- def __delitem__(self, key):
+ def __delitem__(self, key, dict_delitem=dict.__delitem__):
'od.__delitem__(y) <==> del od[y]'
- # Deleting an existing item uses self.__map to find the link which is
- # then removed by updating the links in the predecessor and successor nodes.
- dict.__delitem__(self, key)
+ # Deleting an existing item uses self.__map to find the link which gets
+ # removed by updating the links in the predecessor and successor nodes.
+ dict_delitem(self, key)
link = self.__map.pop(key)
- link.prev.next = link.next
- link.next.prev = link.prev
+ link_prev = link.prev
+ link_next = link.next
+ link_prev.next = link_next
+ link_next.prev = link_prev
def __iter__(self):
'od.__iter__() <==> iter(od)'
@@ -98,6 +97,69 @@ class OrderedDict(dict):
yield curr.key
curr = curr.prev
+ def clear(self):
+ 'od.clear() -> None. Remove all items from od.'
+ root = self.__root
+ root.prev = root.next = root
+ self.__map.clear()
+ dict.clear(self)
+
+ def popitem(self, last=True):
+ '''od.popitem() -> (k, v), return and remove a (key, value) pair.
+ Pairs are returned in LIFO order if last is true or FIFO order if false.
+
+ '''
+ if not self:
+ raise KeyError('dictionary is empty')
+ root = self.__root
+ if last:
+ link = root.prev
+ link_prev = link.prev
+ link_prev.next = root
+ root.prev = link_prev
+ else:
+ link = root.next
+ link_next = link.next
+ root.next = link_next
+ link_next.prev = root
+ key = link.key
+ del self.__map[key]
+ value = dict.pop(self, key)
+ return key, value
+
+ def move_to_end(self, key, last=True):
+ '''Move an existing element to the end (or beginning if last==False).
+
+ Raises KeyError if the element does not exist.
+ When last=True, acts like a fast version of self[key]=self.pop(key).
+
+ '''
+ link = self.__map[key]
+ link_prev = link.prev
+ link_next = link.next
+ link_prev.next = link_next
+ link_next.prev = link_prev
+ root = self.__root
+ if last:
+ last = root.prev
+ link.prev = last
+ link.next = root
+ last.next = root.prev = link
+ else:
+ first = root.next
+ link.prev = root
+ link.next = first
+ root.next = first.prev = link
+
+ def __sizeof__(self):
+ sizeof = _sys.getsizeof
+ n = len(self) + 1 # number of links including root
+ size = sizeof(self.__dict__) # instance dictionary
+ size += sizeof(self.__map) * 2 # internal dict and inherited dict
+ size += sizeof(self.__hardroot) * n # link objects
+ size += sizeof(self.__root) * n # proxy objects
+ return size
+
update = __update = MutableMapping.update
keys = MutableMapping.keys
values = MutableMapping.values
@@ -107,6 +169,11 @@ class OrderedDict(dict):
__marker = object()
def pop(self, key, default=__marker):
+ '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
+ value. If key is not found, d is returned if given, otherwise KeyError
+ is raised.
+
+ '''
if key in self:
result = self[key]
del self[key]
@@ -122,29 +189,12 @@ class OrderedDict(dict):
self[key] = default
return default
- def popitem(self, last=True):
- '''od.popitem() -> (k, v), return and remove a (key, value) pair.
- Pairs are returned in LIFO order if last is true or FIFO order if false.
-
- '''
- if not self:
- raise KeyError('dictionary is empty')
- key = next(reversed(self) if last else iter(self))
- value = self.pop(key)
- return key, value
-
+ @_recursive_repr()
def __repr__(self):
'od.__repr__() <==> repr(od)'
- if self.__in_repr:
- return '...'
if not self:
return '%s()' % (self.__class__.__name__,)
- self.__in_repr = True
- try:
- result = '%s(%r)' % (self.__class__.__name__, list(self.items()))
- finally:
- self.__in_repr = False
- return result
+ return '%s(%r)' % (self.__class__.__name__, list(self.items()))
def __reduce__(self):
'Return state information for pickling'
@@ -162,14 +212,14 @@ class OrderedDict(dict):
@classmethod
def fromkeys(cls, iterable, value=None):
- '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
- and values equal to v (which defaults to None).
+ '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
+ If not specified, the value defaults to None.
'''
- d = cls()
+ self = cls()
for key in iterable:
- d[key] = value
- return d
+ self[key] = value
+ return self
def __eq__(self, other):
'''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
@@ -181,23 +231,69 @@ class OrderedDict(dict):
all(p==q for p, q in zip(self.items(), other.items()))
return dict.__eq__(self, other)
- def __ne__(self, other):
- '''od.__ne__(y) <==> od!=y. Comparison to another OD is order-sensitive
- while comparison to a regular mapping is order-insensitive.
-
- '''
- return not self == other
-
-
################################################################################
### namedtuple
################################################################################
+_class_template = '''\
+from builtins import property as _property, tuple as _tuple
+from operator import itemgetter as _itemgetter
+from collections import OrderedDict
+
+class {typename}(tuple):
+ '{typename}({arg_list})'
+
+ __slots__ = ()
+
+ _fields = {field_names!r}
+
+ def __new__(_cls, {arg_list}):
+ 'Create new instance of {typename}({arg_list})'
+ return _tuple.__new__(_cls, ({arg_list}))
+
+ @classmethod
+ def _make(cls, iterable, new=tuple.__new__, len=len):
+ 'Make a new {typename} object from a sequence or iterable'
+ result = new(cls, iterable)
+ if len(result) != {num_fields:d}:
+ raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result))
+ return result
+
+ def __repr__(self):
+ 'Return a nicely formatted representation string'
+ return self.__class__.__name__ + '({repr_fmt})' % self
+
+ def _asdict(self):
+ 'Return a new OrderedDict which maps field names to their values'
+ return OrderedDict(zip(self._fields, self))
+
+ __dict__ = property(_asdict)
+
+ def _replace(_self, **kwds):
+ 'Return a new {typename} object replacing specified fields with new values'
+ result = _self._make(map(kwds.pop, {field_names!r}, _self))
+ if kwds:
+ raise ValueError('Got unexpected field names: %r' % list(kwds))
+ return result
+
+ def __getnewargs__(self):
+ 'Return self as a plain tuple. Used by copy and pickle.'
+ return tuple(self)
+
+{field_defs}
+'''
+
+_repr_template = '{name}=%r'
+
+_field_template = '''\
+ {name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}')
+'''
+
def namedtuple(typename, field_names, verbose=False, rename=False):
"""Returns a new subclass of tuple with named fields.
- >>> Point = namedtuple('Point', 'x y')
+ >>> Point = namedtuple('Point', ['x', 'y'])
>>> Point.__doc__ # docstring for the new class
'Point(x, y)'
>>> p = Point(11, y=22) # instantiate with positional args or keywords
@@ -222,76 +318,54 @@ def namedtuple(typename, field_names, verbose=False, rename=False):
# generating informative error messages and preventing template injection attacks.
if isinstance(field_names, str):
field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
- field_names = tuple(map(str, field_names))
+ field_names = list(map(str, field_names))
if rename:
- names = list(field_names)
seen = set()
- for i, name in enumerate(names):
- if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
- or not name or name[0].isdigit() or name.startswith('_')
+ for index, name in enumerate(field_names):
+ if (not all(c.isalnum() or c=='_' for c in name)
+ or _iskeyword(name)
+ or not name
+ or name[0].isdigit()
+ or name.startswith('_')
or name in seen):
- names[i] = '_%d' % i
+ field_names[index] = '_%d' % index
seen.add(name)
- field_names = tuple(names)
- for name in (typename,) + field_names:
+ for name in [typename] + field_names:
if not all(c.isalnum() or c=='_' for c in name):
raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
if _iskeyword(name):
raise ValueError('Type names and field names cannot be a keyword: %r' % name)
if name[0].isdigit():
raise ValueError('Type names and field names cannot start with a number: %r' % name)
- seen_names = set()
+ seen = set()
for name in field_names:
if name.startswith('_') and not rename:
raise ValueError('Field names cannot start with an underscore: %r' % name)
- if name in seen_names:
+ if name in seen:
raise ValueError('Encountered duplicate field name: %r' % name)
- seen_names.add(name)
-
- # Create and fill-in the class template
- numfields = len(field_names)
- argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
- reprtxt = ', '.join('%s=%%r' % name for name in field_names)
- template = '''class %(typename)s(tuple):
- '%(typename)s(%(argtxt)s)' \n
- __slots__ = () \n
- _fields = %(field_names)r \n
- def __new__(_cls, %(argtxt)s):
- return _tuple.__new__(_cls, (%(argtxt)s)) \n
- @classmethod
- def _make(cls, iterable, new=tuple.__new__, len=len):
- 'Make a new %(typename)s object from a sequence or iterable'
- result = new(cls, iterable)
- if len(result) != %(numfields)d:
- raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
- return result \n
- def __repr__(self):
- return '%(typename)s(%(reprtxt)s)' %% self \n
- def _asdict(self):
- 'Return a new OrderedDict which maps field names to their values'
- return OrderedDict(zip(self._fields, self)) \n
- def _replace(_self, **kwds):
- 'Return a new %(typename)s object replacing specified fields with new values'
- result = _self._make(map(kwds.pop, %(field_names)r, _self))
- if kwds:
- raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
- return result \n
- def __getnewargs__(self):
- return tuple(self) \n\n''' % locals()
- for i, name in enumerate(field_names):
- template += ' %s = _property(_itemgetter(%d))\n' % (name, i)
- if verbose:
- print(template)
+ seen.add(name)
+
+ # Fill-in the class template
+ class_definition = _class_template.format(
+ typename = typename,
+ field_names = tuple(field_names),
+ num_fields = len(field_names),
+ arg_list = repr(tuple(field_names)).replace("'", "")[1:-1],
+ repr_fmt = ', '.join(_repr_template.format(name=name) for name in field_names),
+ field_defs = '\n'.join(_field_template.format(index=index, name=name)
+ for index, name in enumerate(field_names))
+ )
# Execute the template string in a temporary namespace and
# support tracing utilities by setting a value for frame.f_globals['__name__']
- namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
- OrderedDict=OrderedDict, _property=property, _tuple=tuple)
+ namespace = dict(__name__='namedtuple_%s' % typename)
try:
- exec(template, namespace)
+ exec(class_definition, namespace)
except SyntaxError as e:
- raise SyntaxError(e.msg + ':\n' + template) from e
+ raise SyntaxError(e.msg + ':\n\n' + class_definition)
result = namespace[typename]
+ if verbose:
+ print(class_definition)
# For pickling to work, the __module__ variable needs to be set to the frame
# where the named tuple is created. Bypass this step in enviroments where
@@ -309,6 +383,17 @@ def namedtuple(typename, field_names, verbose=False, rename=False):
### Counter
########################################################################
+def _count_elements(mapping, iterable):
+ 'Tally elements from the iterable.'
+ mapping_get = mapping.get
+ for elem in iterable:
+ mapping[elem] = mapping_get(elem, 0) + 1
+
+try: # Load C helper function if available
+ from _collections import _count_elements
+except ImportError:
+ pass
+
class Counter(dict):
'''Dict subclass for counting hashable items. Sometimes called a bag
or multiset. Elements are stored as dictionary keys and their counts
@@ -452,12 +537,37 @@ class Counter(dict):
else:
super().update(iterable) # fast path when counter is empty
else:
- self_get = self.get
- for elem in iterable:
- self[elem] = 1 + self_get(elem, 0)
+ _count_elements(self, iterable)
if kwds:
self.update(kwds)
+ def subtract(self, iterable=None, **kwds):
+ '''Like dict.update() but subtracts counts instead of replacing them.
+ Counts can be reduced below zero. Both the inputs and outputs are
+ allowed to contain zero and negative counts.
+
+ Source can be an iterable, a dictionary, or another Counter instance.
+
+ >>> c = Counter('which')
+ >>> c.subtract('witch') # subtract elements from another iterable
+ >>> c.subtract(Counter('watch')) # subtract elements from another counter
+ >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
+ 0
+ >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
+ -1
+
+ '''
+ if iterable is not None:
+ self_get = self.get
+ if isinstance(iterable, Mapping):
+ for elem, count in iterable.items():
+ self[elem] = self_get(elem, 0) - count
+ else:
+ for elem in iterable:
+ self[elem] = self_get(elem, 0) - 1
+ if kwds:
+ self.subtract(kwds)
+
def copy(self):
'Return a shallow copy.'
return self.__class__(self)
@@ -473,8 +583,12 @@ class Counter(dict):
def __repr__(self):
if not self:
return '%s()' % self.__class__.__name__
- items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
- return '%s({%s})' % (self.__class__.__name__, items)
+ try:
+ items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
+ return '%s({%s})' % (self.__class__.__name__, items)
+ except TypeError:
+ # handle case where values are not orderable
+ return '{0}({1!r})'.format(self.__class__.__name__, dict(self))
# Multiset-style mathematical operations discussed in:
# Knuth TAOCP Volume II section 4.6.3 exercise 19
@@ -561,6 +675,109 @@ class Counter(dict):
return result
+########################################################################
+### ChainMap (helper for configparser)
+########################################################################
+
+class _ChainMap(MutableMapping):
+ ''' A ChainMap groups multiple dicts (or other mappings) together
+ to create a single, updateable view.
+
+ The underlying mappings are stored in a list. That list is public and can
+ accessed or updated using the *maps* attribute. There is no other state.
+
+ Lookups search the underlying mappings successively until a key is found.
+ In contrast, writes, updates, and deletions only operate on the first
+ mapping.
+
+ '''
+
+ def __init__(self, *maps):
+ '''Initialize a ChainMap by setting *maps* to the given mappings.
+ If no mappings are provided, a single empty dictionary is used.
+
+ '''
+ self.maps = list(maps) or [{}] # always at least one map
+
+ def __missing__(self, key):
+ raise KeyError(key)
+
+ def __getitem__(self, key):
+ for mapping in self.maps:
+ try:
+ return mapping[key] # can't use 'key in mapping' with defaultdict
+ except KeyError:
+ pass
+ return self.__missing__(key) # support subclasses that define __missing__
+
+ def get(self, key, default=None):
+ return self[key] if key in self else default
+
+ def __len__(self):
+ return len(set().union(*self.maps)) # reuses stored hash values if possible
+
+ def __iter__(self):
+ return iter(set().union(*self.maps))
+
+ def __contains__(self, key):
+ return any(key in m for m in self.maps)
+
+ def __bool__(self):
+ return any(self.maps)
+
+ @_recursive_repr()
+ def __repr__(self):
+ return '{0.__class__.__name__}({1})'.format(
+ self, ', '.join(map(repr, self.maps)))
+
+ @classmethod
+ def fromkeys(cls, iterable, *args):
+ 'Create a ChainMap with a single dict created from the iterable.'
+ return cls(dict.fromkeys(iterable, *args))
+
+ def copy(self):
+ 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]'
+ return self.__class__(self.maps[0].copy(), *self.maps[1:])
+
+ __copy__ = copy
+
+ def new_child(self): # like Django's Context.push()
+ 'New ChainMap with a new dict followed by all previous maps.'
+ return self.__class__({}, *self.maps)
+
+ @property
+ def parents(self): # like Django's Context.pop()
+ 'New ChainMap from maps[1:].'
+ return self.__class__(*self.maps[1:])
+
+ def __setitem__(self, key, value):
+ self.maps[0][key] = value
+
+ def __delitem__(self, key):
+ try:
+ del self.maps[0][key]
+ except KeyError:
+ raise KeyError('Key not found in the first mapping: {!r}'.format(key))
+
+ def popitem(self):
+ 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.'
+ try:
+ return self.maps[0].popitem()
+ except KeyError:
+ raise KeyError('No keys found in the first mapping.')
+
+ def pop(self, key, *args):
+ 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].'
+ try:
+ return self.maps[0].pop(key, *args)
+ except KeyError:
+ raise KeyError('Key not found in the first mapping: {!r}'.format(key))
+
+ def clear(self):
+ 'Clear maps[0], leaving maps[1:] intact.'
+ self.maps[0].clear()
+
+
################################################################################
### UserDict
################################################################################
@@ -802,6 +1019,8 @@ class UserString(Sequence):
new = new.data
return self.__class__(self.data.replace(old, new, maxsplit))
def rfind(self, sub, start=0, end=_sys.maxsize):
+ if isinstance(sub, UserString):
+ sub = sub.data
return self.data.rfind(sub, start, end)
def rindex(self, sub, start=0, end=_sys.maxsize):
return self.data.rindex(sub, start, end)