diff options
Diffstat (limited to 'lib/sqlalchemy/topological.py')
| -rw-r--r-- | lib/sqlalchemy/topological.py | 146 |
1 files changed, 95 insertions, 51 deletions
diff --git a/lib/sqlalchemy/topological.py b/lib/sqlalchemy/topological.py index a1b03b89a..d9f68ac01 100644 --- a/lib/sqlalchemy/topological.py +++ b/lib/sqlalchemy/topological.py @@ -4,57 +4,78 @@ # This module is part of SQLAlchemy and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -"""topological sorting algorithms. the key to the unit of work is to assemble a list -of dependencies amongst all the different mappers that have been defined for classes. -Related tables with foreign key constraints have a definite insert order, deletion order, -objects need dependent properties from parent objects set up before saved, etc. -These are all encoded as dependencies, in the form "mapper X is dependent on mapper Y", -meaning mapper Y's objects must be saved before those of mapper X, and mapper X's objects -must be deleted before those of mapper Y. - -The topological sort is an algorithm that receives this list of dependencies as a "partial -ordering", that is a list of pairs which might say, "X is dependent on Y", "Q is dependent -on Z", but does not necessarily tell you anything about Q being dependent on X. Therefore, -its not a straight sort where every element can be compared to another...only some of the -elements have any sorting preference, and then only towards just some of the other elements. -For a particular partial ordering, there can be many possible sorts that satisfy the +"""Topological sorting algorithms. + +The key to the unit of work is to assemble a list of dependencies +amongst all the different mappers that have been defined for classes. + +Related tables with foreign key constraints have a definite insert +order, deletion order, objects need dependent properties from parent +objects set up before saved, etc. + +These are all encoded as dependencies, in the form *mapper X is +dependent on mapper Y*, meaning mapper Y's objects must be saved +before those of mapper X, and mapper X's objects must be deleted +before those of mapper Y. + +The topological sort is an algorithm that receives this list of +dependencies as a *partial ordering*, that is a list of pairs which +might say, *X is dependent on Y*, *Q is dependent on Z*, but does not +necessarily tell you anything about Q being dependent on X. Therefore, +its not a straight sort where every element can be compared to +another... only some of the elements have any sorting preference, and +then only towards just some of the other elements. For a particular +partial ordering, there can be many possible sorts that satisfy the conditions. -An intrinsic "gotcha" to this algorithm is that since there are many possible outcomes -to sorting a partial ordering, the algorithm can return any number of different results for the -same input; just running it on a different machine architecture, or just random differences -in the ordering of dictionaries, can change the result that is returned. While this result -is guaranteed to be true to the incoming partial ordering, if the partial ordering itself -does not properly represent the dependencies, code that works fine will suddenly break, then -work again, then break, etc. Most of the bugs I've chased down while developing the "unit of work" -have been of this nature - very tricky to reproduce and track down, particularly before I -realized this characteristic of the algorithm. +An intrinsic *gotcha* to this algorithm is that since there are many +possible outcomes to sorting a partial ordering, the algorithm can +return any number of different results for the same input; just +running it on a different machine architecture, or just random +differences in the ordering of dictionaries, can change the result +that is returned. While this result is guaranteed to be true to the +incoming partial ordering, if the partial ordering itself does not +properly represent the dependencies, code that works fine will +suddenly break, then work again, then break, etc. Most of the bugs +I've chased down while developing the *unit of work* have been of this +nature - very tricky to reproduce and track down, particularly before +I realized this characteristic of the algorithm. """ + import string, StringIO from sqlalchemy import util from sqlalchemy.exceptions import * class _Node(object): - """represents each item in the sort. While the topological sort - produces a straight ordered list of items, _Node ultimately stores a tree-structure - of those items which are organized so that non-dependent nodes are siblings.""" + """Represent each item in the sort. + + While the topological sort produces a straight ordered list of + items, ``_Node`` ultimately stores a tree-structure of those items + which are organized so that non-dependent nodes are siblings. + """ + def __init__(self, item): self.item = item self.dependencies = util.Set() self.children = [] self.cycles = None + def __str__(self): return self.safestr() + def safestr(self, indent=0): return (' ' * indent * 2) + \ str(self.item) + \ (self.cycles is not None and (" (cycles: " + repr([x for x in self.cycles]) + ")") or "") + \ "\n" + \ string.join([n.safestr(indent + 1) for n in self.children], '') + def __repr__(self): return "%s" % (str(self.item)) + def all_deps(self): - """Returns a set of dependencies for this node and all its cycles""" + """Return a set of dependencies for this node and all its cycles.""" + deps = util.Set(self.dependencies) if self.cycles is not None: for c in self.cycles: @@ -62,12 +83,15 @@ class _Node(object): return deps class _EdgeCollection(object): - """a collection of directed edges.""" + """A collection of directed edges.""" + def __init__(self): self.parent_to_children = {} self.child_to_parents = {} + def add(self, edge): - """add an edge to this collection.""" + """Add an edge to this collection.""" + (parentnode, childnode) = edge if not self.parent_to_children.has_key(parentnode): self.parent_to_children[parentnode] = util.Set() @@ -76,8 +100,13 @@ class _EdgeCollection(object): self.child_to_parents[childnode] = util.Set() self.child_to_parents[childnode].add(parentnode) parentnode.dependencies.add(childnode) + def remove(self, edge): - """remove an edge from this collection. return the childnode if it has no other parents""" + """Remove an edge from this collection. + + Return the childnode if it has no other parents. + """ + (parentnode, childnode) = edge self.parent_to_children[parentnode].remove(childnode) self.child_to_parents[childnode].remove(parentnode) @@ -85,52 +114,65 @@ class _EdgeCollection(object): return childnode else: return None + def has_parents(self, node): return self.child_to_parents.has_key(node) and len(self.child_to_parents[node]) > 0 + def edges_by_parent(self, node): if self.parent_to_children.has_key(node): return [(node, child) for child in self.parent_to_children[node]] else: return [] + def get_parents(self): return self.parent_to_children.keys() + def pop_node(self, node): - """remove all edges where the given node is a parent. - - returns the collection of all nodes which were children of the given node, and have - no further parents.""" + """Remove all edges where the given node is a parent. + + Return the collection of all nodes which were children of the + given node, and have no further parents. + """ + children = self.parent_to_children.pop(node, None) if children is not None: for child in children: self.child_to_parents[child].remove(node) if not len(self.child_to_parents[child]): yield child + def __len__(self): return sum([len(x) for x in self.parent_to_children.values()]) + def __iter__(self): for parent, children in self.parent_to_children.iteritems(): for child in children: yield (parent, child) + def __str__(self): return repr(list(self)) + def __repr__(self): return repr(list(self)) - + class QueueDependencySorter(object): - """topological sort adapted from wikipedia's article on the subject. it creates a straight-line - list of elements, then a second pass groups non-dependent actions together to build - more of a tree structure with siblings.""" - + """Topological sort adapted from wikipedia's article on the subject. + + It creates a straight-line list of elements, then a second pass + groups non-dependent actions together to build more of a tree + structure with siblings. + """ + def __init__(self, tuples, allitems): self.tuples = tuples self.allitems = allitems def sort(self, allow_self_cycles=True, allow_all_cycles=False): (tuples, allitems) = (self.tuples, self.allitems) - #print "\n---------------------------------\n" + #print "\n---------------------------------\n" #print repr([t for t in tuples]) #print repr([a for a in allitems]) - #print "\n---------------------------------\n" + #print "\n---------------------------------\n" nodes = {} edges = _EdgeCollection() @@ -138,7 +180,7 @@ class QueueDependencySorter(object): if not nodes.has_key(item): node = _Node(item) nodes[item] = node - + for t in tuples: if t[0] is t[1]: if allow_self_cycles: @@ -188,16 +230,19 @@ class QueueDependencySorter(object): for childnode in edges.pop_node(node): queue.append(childnode) return self._create_batched_tree(output) - + def _create_batched_tree(self, nodes): - """given a list of nodes from a topological sort, organizes the nodes into a tree structure, - with as many non-dependent nodes set as silbings to each other as possible.""" + """Given a list of nodes from a topological sort, organize the + nodes into a tree structure, with as many non-dependent nodes + set as siblings to each other as possible. + """ + if not len(nodes): return None # a list of all currently independent subtrees as a tuple of # (root_node, set_of_all_tree_nodes, set_of_all_cycle_nodes_in_tree) - # order of the list has no semantics for the algorithmic + # order of the list has no semantics for the algorithmic independents = [] # in reverse topological order for node in reversed(nodes): @@ -212,7 +257,7 @@ class QueueDependencySorter(object): if nodealldeps: # iterate over independent node indexes in reverse order so we can efficiently remove them for index in xrange(len(independents)-1,-1,-1): - child, childsubtree, childcycles = independents[index] + child, childsubtree, childcycles = independents[index] # if there is a dependency between this node and an independent node if (childsubtree.intersection(nodealldeps) or childcycles.intersection(node.dependencies)): # prepend child to nodes children @@ -231,7 +276,7 @@ class QueueDependencySorter(object): # used prepend [0:0] instead of extend to maintain exact behaviour of previous implementation head.children[0:0] = [i[0] for i in independents] return head - + def _find_cycles(self, edges): involved_in_cycles = util.Set() cycles = {} @@ -241,7 +286,7 @@ class QueueDependencySorter(object): cycle = [] elif node is goal: return True - + for (n, key) in edges.edges_by_parent(node): if key in cycle: continue @@ -259,7 +304,7 @@ class QueueDependencySorter(object): else: cycles[x] = cycset cycle.pop() - + for parent in edges.get_parents(): traverse(parent) @@ -269,4 +314,3 @@ class QueueDependencySorter(object): if edge[0] in cycle and edge[1] in cycle: edgecollection.append(edge) yield edgecollection - |
