summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Collins <robertc@robertcollins.net>2010-02-19 17:38:27 +1100
committerRobert Collins <robertc@robertcollins.net>2010-02-19 17:38:27 +1100
commitd05ed270f04b294533b4c80f070edf148d1c7a6c (patch)
treeaffabe30a0dd90516a79754bcf026358f7a0bf68
parentcd86ed1616f62488a71e472cbdf3d0dc03a6d65d (diff)
parentd62d20ad8c83f4032d7e0af10efb207217612094 (diff)
downloadtestresources-d05ed270f04b294533b4c80f070edf148d1c7a6c.tar.gz
Merge sortTests performance bugfix.
-rw-r--r--NEWS8
-rw-r--r--lib/testresources/__init__.py276
-rw-r--r--lib/testresources/tests/__init__.py2
-rw-r--r--lib/testresources/tests/test_optimising_test_suite.py54
-rw-r--r--lib/testresources/tests/test_resource_graph.py141
5 files changed, 447 insertions, 34 deletions
diff --git a/NEWS b/NEWS
index 971a6d1..15c59f0 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,14 @@ IMPROVEMENTS
* Rename TestResource to TestResourceManager leaving TestResource as an alias.
Fixes bug #520769.
+* Test sorting no longer performs N! work on tests that can never benefit from
+ order optimisations (when resources are not shared an arbitrary order is
+ sufficient). Partial fix for bug #522338.
+
+* Test sorting now uses a heuristic on each partition to get a sort that is
+ no worse than twice the optimal number of setup/teardown operations and is
+ fast to calculate. Fixes bug #522338
+
0.2.3
-----
diff --git a/lib/testresources/__init__.py b/lib/testresources/__init__.py
index d8988e1..d06c820 100644
--- a/lib/testresources/__init__.py
+++ b/lib/testresources/__init__.py
@@ -19,7 +19,9 @@
"""TestResources: declarative management of external resources for tests."""
+import heapq
import inspect
+import operator
import unittest
@@ -28,6 +30,105 @@ def test_suite():
return testresources.tests.test_suite()
+def _digraph_to_graph(digraph, prime_node_mapping):
+ """Convert digraph to a graph.
+
+ :param digraph: A directed graph in the form
+ {from:{to:value}}.
+ :param prime_node_mapping: A mapping from every
+ node in digraph to a new unique and not in digraph node.
+ :return: A symmetric graph in the form {from:to:value}} created by
+ creating edges in the result between every N to M-prime with the
+ original N-M value and from every N to N-prime with a cost of 0.
+ No other edges are created.
+ """
+ result = {}
+ for from_node, from_prime_node in prime_node_mapping.iteritems():
+ result[from_node] = {from_prime_node:0}
+ result[from_prime_node] = {from_node:0}
+ for from_node, to_nodes in digraph.iteritems():
+ from_prime = prime_node_mapping[from_node]
+ for to_node, value in to_nodes.iteritems():
+ to_prime = prime_node_mapping[to_node]
+ result[from_prime][to_node] = value
+ result[to_node][from_prime] = value
+ return result
+
+
+def _kruskals_graph_MST(graph):
+ """Find the minimal spanning tree in graph using Kruskals algorithm.
+
+ See http://en.wikipedia.org/wiki/Kruskal%27s_algorithm.
+ :param graph: A graph in {from:{to:value}} form. Every node present in
+ graph must be in the outer dict (because graph is not a directed graph.
+ :return: A graph with all nodes and those vertices that are part of the MST
+ for graph. If graph is not connected, then the result will also be a
+ forest.
+ """
+ # forest contains all the nodes -> graph that node is in.
+ forest = {}
+ # graphs is the count of graphs we have yet to combine.
+ for node in graph:
+ forest[node] = {node:{}}
+ graphs = len(forest)
+ # collect edges: every edge is present twice (due to the graph
+ # representation), so normalise.
+ edges = set()
+ for from_node, to_nodes in graph.iteritems():
+ for to_node, value in to_nodes.iteritems():
+ edge = (value,) + tuple(sorted([from_node, to_node]))
+ edges.add(edge)
+ edges = list(edges)
+ heapq.heapify(edges)
+ while edges and graphs > 1:
+ # more edges to go and we haven't gotten a spanning tree yet.
+ edge = heapq.heappop(edges)
+ g1 = forest[edge[1]]
+ g2 = forest[edge[2]]
+ if g1 is g2:
+ continue # already joined
+ # combine g1 and g2 into g1
+ graphs -= 1
+ for from_node, to_nodes in g2.iteritems():
+ #remember its symmetric, don't need to do 'to'.
+ forest[from_node] = g1
+ g1.setdefault(from_node, {}).update(to_nodes)
+ # add edge
+ g1[edge[1]][edge[2]] = edge[0]
+ g1[edge[2]][edge[1]] = edge[0]
+ # union the remaining graphs
+ _, result = forest.popitem()
+ for _, g2 in forest.iteritems():
+ if g2 is result: # common case
+ continue
+ for from_node, to_nodes in g2.iteritems():
+ result.setdefault(from_node, {}).update(to_nodes)
+ return result
+
+
+def _resource_graph(resource_sets):
+ """Convert an iterable of resource_sets into a graph.
+
+ Each resource_set in the iterable is treated as a node, and each resource
+ in that resource_set is used as an edge to other nodes.
+ """
+ nodes = {}
+ edges = {}
+ for resource_set in resource_sets:
+ # put node in nodes
+ node = frozenset(resource_set)
+ nodes[node] = set()
+ # put its contents in as edges
+ for resource in resource_set:
+ edges.setdefault(resource, []).append(node)
+ # populate the adjacent members of nodes
+ for node, connected in nodes.iteritems():
+ for resource in node:
+ connected.update(edges[resource])
+ connected.discard(node)
+ return nodes
+
+
def split_by_resources(tests):
"""Split a list of tests by the resources that the tests use.
@@ -47,6 +148,30 @@ def split_by_resources(tests):
return resource_set_tests
+def _strongly_connected_components(graph, no_resources):
+ """Find the strongly connected components in graph.
+
+ This is essentially a nonrecursive flatterning of Tarjan's method. It
+ may be worth profiling against an actual Tarjan's implementation at some
+ point, but sets are often faster than python calls.
+
+ graph gets consumed, but that could be changed easily enough.
+ """
+ partitions = []
+ while graph:
+ node, pending = graph.popitem()
+ current_partition = set([node])
+ while pending:
+ # add all the nodes connected to a connected node to pending.
+ node = pending.pop()
+ current_partition.add(node)
+ pending.update(graph.pop(node, []))
+ # don't try to process things we've allready processed.
+ pending.difference_update(current_partition)
+ partitions.append(current_partition)
+ return partitions
+
+
class OptimisingTestSuite(unittest.TestSuite):
"""A resource creation optimising TestSuite."""
@@ -126,54 +251,149 @@ class OptimisingTestSuite(unittest.TestSuite):
def sortTests(self):
"""Attempt to topographically sort the contained tests.
+ This function biases to reusing a resource: it assumes that resetting
+ a resource is usually cheaper than a teardown + setup; and that most
+ resources are not dirtied by most tests.
+
Feel free to override to improve the sort behaviour.
"""
# We group the tests by the resource combinations they use,
# since there will usually be fewer resource combinations than
- # actual tests and there can never be more.
+ # actual tests and there can never be more: This gives us 'nodes' or
+ # 'resource_sets' that represent many tests using the same set of
+ # resources.
resource_set_tests = split_by_resources(self._tests)
-
- graph = self._getGraph(resource_set_tests.keys())
+ # Partition into separate sets of resources, there is no ordering
+ # preference between sets that do not share members. Rationale:
+ # If resource_set A and B have no common resources, AB and BA are
+ # equally good - the global setup/teardown sums are identical. Secondly
+ # if A shares one or more resources with C, then pairing AC|CA is better
+ # than having B between A and C, because the shared resources can be
+ # reset or reused. Having partitioned we can use connected graph logic
+ # on each partition.
+ resource_set_graph = _resource_graph(resource_set_tests)
no_resources = frozenset()
- # Recursive visit-all-nodes all-permutations.
- def cost(from_set, resource_sets):
- """Get the cost of resource traversal for resource sets.
-
- :return: cost, order
- """
- if not resource_sets:
- # tear down last resources
- return graph[from_set][no_resources], []
- costs = []
- for to_set in resource_sets:
- child_cost, child_order = cost(
- to_set, resource_sets - set([to_set]))
- costs.append((graph[from_set][to_set] + child_cost,
- [to_set] + child_order))
- return min(costs)
- _, order = cost(no_resources,
- set(resource_set_tests) - set([no_resources]))
- order.append(no_resources)
- self._tests = sum(
- (resource_set_tests[resource_set] for resource_set in order), [])
+ # A list of resource_set_tests, all fully internally connected.
+ partitions = _strongly_connected_components(resource_set_graph,
+ no_resources)
+ result = []
+ for partition in partitions:
+ # we process these at the end for no particularly good reason (it makes
+ # testing slightly easier).
+ if partition == [no_resources]:
+ continue
+ order = self._makeOrder(partition)
+ # Spit this partition out into result
+ for resource_set in order:
+ result.extend(resource_set_tests[resource_set])
+ result.extend(resource_set_tests[no_resources])
+ self._tests = result
def _getGraph(self, resource_sets):
"""Build a graph of the resource-using nodes.
+ This special cases set(['root']) to be a node with no resources and
+ edges to everything.
+
:return: A complete directed graph of the switching costs
- between each resource combination.
+ between each resource combination. Note that links from N to N are
+ not included.
"""
+ no_resources = frozenset()
graph = {}
+ root = set(['root'])
+ # bottom = set(['bottom'])
for from_set in resource_sets:
graph[from_set] = {}
+ if from_set == root:
+ from_resources = no_resources
+ #elif from_set == bottom:
+ # continue # no links from bottom
+ else:
+ from_resources = from_set
for to_set in resource_sets:
if from_set is to_set:
- graph[from_set][to_set] = 0
+ continue # no self-edges
+ #if to_set == bottom:
+ # if from_set == root:
+ # continue # no short cuts!
+ # to_resources = no_resources
+ #el
+ if to_set == root:
+ continue # no links to root
else:
- graph[from_set][to_set] = self.cost_of_switching(
- from_set, to_set)
+ to_resources = to_set
+ graph[from_set][to_set] = self.cost_of_switching(
+ from_resources, to_resources)
return graph
+ def _makeOrder(self, partition):
+ """Return a order for the resource sets in partition."""
+ # This problem is NP-C - find the lowest cost hamiltonian path. It
+ # also meets the triangle inequality, so we can use an approximation.
+ # TODO: implement Christofides.
+ # See http://en.wikipedia.org/wiki/Travelling_salesman_problem#Metric_TSP
+ # We need a root
+ root = frozenset(['root'])
+ partition.add(root)
+ # and an end
+ # partition.add(frozenset(['bottom']))
+ # get rid of 'noresources'
+ partition.discard(frozenset())
+ digraph = self._getGraph(partition)
+ # build a prime map
+ primes = {}
+ prime = frozenset(['prime'])
+ for node in digraph:
+ primes[node] = node.union(prime)
+ graph = _digraph_to_graph(digraph, primes)
+ mst = _kruskals_graph_MST(graph)
+ # Because the representation is a digraph, we can build an Eulerian
+ # cycle directly from the representation by just following the links:
+ # a node with only 1 'edge' has two directed edges; and we can only
+ # enter and leave it once, so the edge lookups will match precisely.
+ # As the mst is a spanning tree, the graph will become disconnected
+ # (we choose non-disconnecting edges first)
+ # - for a stub node (1 outgoing link): when exiting it unless it is
+ # the first node started at
+ # - for a non-stub node if choosing an outgoing link where some other
+ # endpoints incoming link has not been traversed. [exit by a
+ # different node than entering, until all exits taken].
+ # We don't need the mst after, so it gets modified in place.
+ node = root
+ cycle = [node]
+ steps = 2 * (len(mst) - 1)
+ for step in xrange(steps):
+ found = False
+ outgoing = None # For clearer debugging.
+ for outgoing in mst[node]:
+ if node in mst[outgoing]:
+ # we have a return path: take it
+ # print node, '->', outgoing, ' can return'
+ del mst[node][outgoing]
+ node = outgoing
+ cycle.append(node)
+ found = True
+ break
+ if not found:
+ # none of the outgoing links have an incoming, so follow an
+ # arbitrary one (the last examined outgoing)
+ # print node, '->', outgoing
+ del mst[node][outgoing]
+ node = outgoing
+ cycle.append(node)
+ # Convert to a path:
+ visited = set()
+ order = []
+ for node in cycle:
+ if node in visited:
+ continue
+ if node in primes:
+ order.append(node)
+ visited.add(node)
+ assert order[0] == root
+ return order[1:]
+
class TestLoader(unittest.TestLoader):
"""Custom TestLoader to set the right TestSuite class."""
diff --git a/lib/testresources/tests/__init__.py b/lib/testresources/tests/__init__.py
index c7ab9e7..fe4958f 100644
--- a/lib/testresources/tests/__init__.py
+++ b/lib/testresources/tests/__init__.py
@@ -28,10 +28,12 @@ def test_suite():
import testresources.tests.test_resourced_test_case
import testresources.tests.test_test_loader
import testresources.tests.test_test_resource
+ import testresources.tests.test_resource_graph
result = TestUtil.TestSuite()
result.addTest(testresources.tests.test_test_loader.test_suite())
result.addTest(testresources.tests.test_test_resource.test_suite())
result.addTest(testresources.tests.test_resourced_test_case.test_suite())
+ result.addTest(testresources.tests.test_resource_graph.test_suite())
result.addTest(
testresources.tests.test_optimising_test_suite.test_suite())
return result
diff --git a/lib/testresources/tests/test_optimising_test_suite.py b/lib/testresources/tests/test_optimising_test_suite.py
index bd09d42..fcc5fc7 100644
--- a/lib/testresources/tests/test_optimising_test_suite.py
+++ b/lib/testresources/tests/test_optimising_test_suite.py
@@ -403,7 +403,7 @@ class TestCostGraph(testtools.TestCase):
resource = self.makeResource()
suite = testresources.OptimisingTestSuite()
graph = suite._getGraph([frozenset()])
- self.assertEqual({frozenset(): {frozenset(): 0}}, graph)
+ self.assertEqual({frozenset(): {}}, graph)
def testTwoCasesInGraph(self):
res1 = self.makeResource()
@@ -415,9 +415,9 @@ class TestCostGraph(testtools.TestCase):
suite = testresources.OptimisingTestSuite()
graph = suite._getGraph([no_resources, set1, set2])
- self.assertEqual({no_resources: {no_resources: 0, set1: 2, set2: 1},
- set1: {no_resources: 2, set1: 0, set2: 1},
- set2: {no_resources: 1, set1: 1, set2: 0}}, graph)
+ self.assertEqual({no_resources: {set1: 2, set2: 1},
+ set1: {no_resources: 2, set2: 1},
+ set2: {no_resources: 1, set1: 1 }}, graph)
class TestGraphStuff(testtools.TestCase):
@@ -489,10 +489,15 @@ class TestGraphStuff(testtools.TestCase):
return permutations
def testBasicSortTests(self):
- # Test every permutation of inputs, with equal cost resources and
- # legacy tests.
+ # Test every permutation of inputs, with legacy tests.
+ # Cannot use equal costs because of the use of
+ # a 2*optimal heuristic for sorting: with equal
+ # costs the wrong sort order is < twice the optimal
+ # weight, and thus can be selected.
resource_one = testresources.TestResource()
resource_two = testresources.TestResource()
+ resource_two.setUpCost = 5
+ resource_two.tearDownCost = 5
resource_three = testresources.TestResource()
self.case1.resources = [
@@ -584,6 +589,43 @@ class TestGraphStuff(testtools.TestCase):
permutation.index(self.case3) < permutation.index(self.case4),
sorted.index(self.case3) < sorted.index(self.case4))
+ def testSortingTwelveIndependentIsFast(self):
+ # Given twelve independent resource sets, my patience is not exhausted.
+ managers = []
+ for pos in range(12):
+ managers.append(testresources.TestResourceManager())
+ # Add more sample tests
+ cases = [self.case1, self.case2, self.case3, self.case4]
+ for pos in range(5,13):
+ cases.append(
+ testtools.clone_test_with_new_id(cases[0], 'case%d' % pos))
+ # We care that this is fast in this test, so we don't need to have
+ # overlapping resource usage
+ for case, manager in zip(cases, managers):
+ case.resources = [('_resource', manager)]
+ # Any sort is ok, as long as its the right length :)
+ result = self.sortTests(cases)
+ self.assertEqual(12, len(result))
+
+ def testSortingTwelveOverlappingIsFast(self):
+ # Given twelve connected resource sets, my patience is not exhausted.
+ managers = []
+ for pos in range(12):
+ managers.append(testresources.TestResourceManager())
+ # Add more sample tests
+ cases = [self.case1, self.case2, self.case3, self.case4]
+ for pos in range(5,13):
+ cases.append(
+ testtools.clone_test_with_new_id(cases[0], 'case%d' % pos))
+ tempdir = testresources.TestResourceManager()
+ # give all tests a tempdir, enough to provoke a single partition in
+ # the current code.
+ for case, manager in zip(cases, managers):
+ case.resources = [('_resource', manager), ('tempdir', tempdir)]
+ # Any sort is ok, as long as its the right length :)
+ result = self.sortTests(cases)
+ self.assertEqual(12, len(result))
+
def testSortConsidersDependencies(self):
"""Tests with different dependencies are sorted together."""
# We test this by having two resources (one and two) that share a very
diff --git a/lib/testresources/tests/test_resource_graph.py b/lib/testresources/tests/test_resource_graph.py
new file mode 100644
index 0000000..6e1eeca
--- /dev/null
+++ b/lib/testresources/tests/test_resource_graph.py
@@ -0,0 +1,141 @@
+#
+# testresources: extensions to python unittest to allow declaritive use
+# of resources by test cases.
+# Copyright (C) 2010 Robert Collins <robertc@robertcollins.net>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+"""Test _resource_graph(resource_sets)."""
+
+import testtools
+import testresources
+from testresources import split_by_resources, _resource_graph
+from testresources.tests import ResultWithResourceExtensions
+import unittest
+
+
+def test_suite():
+ from testresources.tests import TestUtil
+ loader = TestUtil.TestLoader()
+ result = loader.loadTestsFromName(__name__)
+ return result
+
+
+class TestResourceGraph(testtools.TestCase):
+
+ def test_empty(self):
+ no_resources = frozenset()
+ resource_sets = [no_resources]
+ self.assertEqual({no_resources:set([])}, _resource_graph(resource_sets))
+
+ def test_discrete(self):
+ resset1 = frozenset([testresources.TestResourceManager()])
+ resset2 = frozenset([testresources.TestResourceManager()])
+ resource_sets = [resset1, resset2]
+ result = _resource_graph(resource_sets)
+ self.assertEqual({resset1:set([]), resset2:set([])}, result)
+
+ def test_overlapping(self):
+ res1 = testresources.TestResourceManager()
+ res2 = testresources.TestResourceManager()
+ resset1 = frozenset([res1])
+ resset2 = frozenset([res2])
+ resset3 = frozenset([res1, res2])
+ resource_sets = [resset1, resset2, resset3]
+ result = _resource_graph(resource_sets)
+ self.assertEqual(
+ {resset1:set([resset3]),
+ resset2:set([resset3]),
+ resset3:set([resset1, resset2])},
+ result)
+
+
+class TestDigraphToGraph(testtools.TestCase):
+
+ def test_wikipedia_example(self):
+ """Converting a digraph mirrors it in the XZ axis (matrix view).
+
+ See http://en.wikipedia.org/wiki/Travelling_salesman_problem \
+ #Solving_by_conversion_to_Symmetric_TSP
+ """
+ # A B C
+ # A 1 2
+ # B 6 3
+ # C 5 4
+ A = "A"
+ Ap = "A'"
+ B = "B"
+ Bp = "B'"
+ C = "C"
+ Cp = "C'"
+ digraph = {A:{ B:1, C:2},
+ B:{A:6, C:3},
+ C:{A:5, B:4 }}
+ # and the output
+ # A B C A' B' C'
+ # A 0 6 5
+ # B 1 0 4
+ # C 2 3 0
+ # A' 0 1 2
+ # B' 6 0 3
+ # C' 5 4 0
+ expected = {
+ A :{ Ap:0, Bp:6, Cp:5},
+ B :{ Ap:1, Bp:0, Cp:4},
+ C :{ Ap:2, Bp:3, Cp:0},
+ Ap:{A:0, B:1, C:2 },
+ Bp:{A:6, B:0, C:3 },
+ Cp:{A:5, B:4, C:0 }}
+ self.assertEqual(expected,
+ testresources._digraph_to_graph(digraph, {A:Ap, B:Bp, C:Cp}))
+
+
+class TestKruskalsMST(testtools.TestCase):
+
+ def test_wikipedia_example(self):
+ """Performing KruskalsMST on a graph returns a spanning tree.
+
+ See http://en.wikipedia.org/wiki/Kruskal%27s_algorithm.
+ """
+ A = "A"
+ B = "B"
+ C = "C"
+ D = "D"
+ E = "E"
+ F = "F"
+ G = "G"
+ graph = {
+ A:{ B:7, D:5},
+ B:{A:7, C:8, D:9, E:7},
+ C:{ B:8, E:5},
+ D:{A:5, B:9, E:15, F:6},
+ E:{ B:7, C:5, D:15, F:8, G:9},
+ F:{ D:6, E:8, G:11},
+ G:{ E:9, F:11}}
+ expected = {
+ A:{ B:7, D:5},
+ B:{A:7, E:7},
+ C:{ E:5},
+ D:{A:5, F:6},
+ E:{ B:7, C:5, G:9},
+ F:{ D:6},
+ G:{ E:9}}
+ result = testresources._kruskals_graph_MST(graph)
+ e_weight = sum(sum(row.itervalues()) for row in expected.itervalues())
+ r_weight = sum(sum(row.itervalues()) for row in result.itervalues())
+ self.assertEqual(e_weight, r_weight)
+ self.assertEqual(expected,
+ testresources._kruskals_graph_MST(graph))