diff options
author | Robert Collins <robertc@robertcollins.net> | 2010-02-19 17:38:27 +1100 |
---|---|---|
committer | Robert Collins <robertc@robertcollins.net> | 2010-02-19 17:38:27 +1100 |
commit | d05ed270f04b294533b4c80f070edf148d1c7a6c (patch) | |
tree | affabe30a0dd90516a79754bcf026358f7a0bf68 | |
parent | cd86ed1616f62488a71e472cbdf3d0dc03a6d65d (diff) | |
parent | d62d20ad8c83f4032d7e0af10efb207217612094 (diff) | |
download | testresources-d05ed270f04b294533b4c80f070edf148d1c7a6c.tar.gz |
Merge sortTests performance bugfix.
-rw-r--r-- | NEWS | 8 | ||||
-rw-r--r-- | lib/testresources/__init__.py | 276 | ||||
-rw-r--r-- | lib/testresources/tests/__init__.py | 2 | ||||
-rw-r--r-- | lib/testresources/tests/test_optimising_test_suite.py | 54 | ||||
-rw-r--r-- | lib/testresources/tests/test_resource_graph.py | 141 |
5 files changed, 447 insertions, 34 deletions
@@ -13,6 +13,14 @@ IMPROVEMENTS * Rename TestResource to TestResourceManager leaving TestResource as an alias. Fixes bug #520769. +* Test sorting no longer performs N! work on tests that can never benefit from + order optimisations (when resources are not shared an arbitrary order is + sufficient). Partial fix for bug #522338. + +* Test sorting now uses a heuristic on each partition to get a sort that is + no worse than twice the optimal number of setup/teardown operations and is + fast to calculate. Fixes bug #522338 + 0.2.3 ----- diff --git a/lib/testresources/__init__.py b/lib/testresources/__init__.py index d8988e1..d06c820 100644 --- a/lib/testresources/__init__.py +++ b/lib/testresources/__init__.py @@ -19,7 +19,9 @@ """TestResources: declarative management of external resources for tests.""" +import heapq import inspect +import operator import unittest @@ -28,6 +30,105 @@ def test_suite(): return testresources.tests.test_suite() +def _digraph_to_graph(digraph, prime_node_mapping): + """Convert digraph to a graph. + + :param digraph: A directed graph in the form + {from:{to:value}}. + :param prime_node_mapping: A mapping from every + node in digraph to a new unique and not in digraph node. + :return: A symmetric graph in the form {from:to:value}} created by + creating edges in the result between every N to M-prime with the + original N-M value and from every N to N-prime with a cost of 0. + No other edges are created. + """ + result = {} + for from_node, from_prime_node in prime_node_mapping.iteritems(): + result[from_node] = {from_prime_node:0} + result[from_prime_node] = {from_node:0} + for from_node, to_nodes in digraph.iteritems(): + from_prime = prime_node_mapping[from_node] + for to_node, value in to_nodes.iteritems(): + to_prime = prime_node_mapping[to_node] + result[from_prime][to_node] = value + result[to_node][from_prime] = value + return result + + +def _kruskals_graph_MST(graph): + """Find the minimal spanning tree in graph using Kruskals algorithm. + + See http://en.wikipedia.org/wiki/Kruskal%27s_algorithm. + :param graph: A graph in {from:{to:value}} form. Every node present in + graph must be in the outer dict (because graph is not a directed graph. + :return: A graph with all nodes and those vertices that are part of the MST + for graph. If graph is not connected, then the result will also be a + forest. + """ + # forest contains all the nodes -> graph that node is in. + forest = {} + # graphs is the count of graphs we have yet to combine. + for node in graph: + forest[node] = {node:{}} + graphs = len(forest) + # collect edges: every edge is present twice (due to the graph + # representation), so normalise. + edges = set() + for from_node, to_nodes in graph.iteritems(): + for to_node, value in to_nodes.iteritems(): + edge = (value,) + tuple(sorted([from_node, to_node])) + edges.add(edge) + edges = list(edges) + heapq.heapify(edges) + while edges and graphs > 1: + # more edges to go and we haven't gotten a spanning tree yet. + edge = heapq.heappop(edges) + g1 = forest[edge[1]] + g2 = forest[edge[2]] + if g1 is g2: + continue # already joined + # combine g1 and g2 into g1 + graphs -= 1 + for from_node, to_nodes in g2.iteritems(): + #remember its symmetric, don't need to do 'to'. + forest[from_node] = g1 + g1.setdefault(from_node, {}).update(to_nodes) + # add edge + g1[edge[1]][edge[2]] = edge[0] + g1[edge[2]][edge[1]] = edge[0] + # union the remaining graphs + _, result = forest.popitem() + for _, g2 in forest.iteritems(): + if g2 is result: # common case + continue + for from_node, to_nodes in g2.iteritems(): + result.setdefault(from_node, {}).update(to_nodes) + return result + + +def _resource_graph(resource_sets): + """Convert an iterable of resource_sets into a graph. + + Each resource_set in the iterable is treated as a node, and each resource + in that resource_set is used as an edge to other nodes. + """ + nodes = {} + edges = {} + for resource_set in resource_sets: + # put node in nodes + node = frozenset(resource_set) + nodes[node] = set() + # put its contents in as edges + for resource in resource_set: + edges.setdefault(resource, []).append(node) + # populate the adjacent members of nodes + for node, connected in nodes.iteritems(): + for resource in node: + connected.update(edges[resource]) + connected.discard(node) + return nodes + + def split_by_resources(tests): """Split a list of tests by the resources that the tests use. @@ -47,6 +148,30 @@ def split_by_resources(tests): return resource_set_tests +def _strongly_connected_components(graph, no_resources): + """Find the strongly connected components in graph. + + This is essentially a nonrecursive flatterning of Tarjan's method. It + may be worth profiling against an actual Tarjan's implementation at some + point, but sets are often faster than python calls. + + graph gets consumed, but that could be changed easily enough. + """ + partitions = [] + while graph: + node, pending = graph.popitem() + current_partition = set([node]) + while pending: + # add all the nodes connected to a connected node to pending. + node = pending.pop() + current_partition.add(node) + pending.update(graph.pop(node, [])) + # don't try to process things we've allready processed. + pending.difference_update(current_partition) + partitions.append(current_partition) + return partitions + + class OptimisingTestSuite(unittest.TestSuite): """A resource creation optimising TestSuite.""" @@ -126,54 +251,149 @@ class OptimisingTestSuite(unittest.TestSuite): def sortTests(self): """Attempt to topographically sort the contained tests. + This function biases to reusing a resource: it assumes that resetting + a resource is usually cheaper than a teardown + setup; and that most + resources are not dirtied by most tests. + Feel free to override to improve the sort behaviour. """ # We group the tests by the resource combinations they use, # since there will usually be fewer resource combinations than - # actual tests and there can never be more. + # actual tests and there can never be more: This gives us 'nodes' or + # 'resource_sets' that represent many tests using the same set of + # resources. resource_set_tests = split_by_resources(self._tests) - - graph = self._getGraph(resource_set_tests.keys()) + # Partition into separate sets of resources, there is no ordering + # preference between sets that do not share members. Rationale: + # If resource_set A and B have no common resources, AB and BA are + # equally good - the global setup/teardown sums are identical. Secondly + # if A shares one or more resources with C, then pairing AC|CA is better + # than having B between A and C, because the shared resources can be + # reset or reused. Having partitioned we can use connected graph logic + # on each partition. + resource_set_graph = _resource_graph(resource_set_tests) no_resources = frozenset() - # Recursive visit-all-nodes all-permutations. - def cost(from_set, resource_sets): - """Get the cost of resource traversal for resource sets. - - :return: cost, order - """ - if not resource_sets: - # tear down last resources - return graph[from_set][no_resources], [] - costs = [] - for to_set in resource_sets: - child_cost, child_order = cost( - to_set, resource_sets - set([to_set])) - costs.append((graph[from_set][to_set] + child_cost, - [to_set] + child_order)) - return min(costs) - _, order = cost(no_resources, - set(resource_set_tests) - set([no_resources])) - order.append(no_resources) - self._tests = sum( - (resource_set_tests[resource_set] for resource_set in order), []) + # A list of resource_set_tests, all fully internally connected. + partitions = _strongly_connected_components(resource_set_graph, + no_resources) + result = [] + for partition in partitions: + # we process these at the end for no particularly good reason (it makes + # testing slightly easier). + if partition == [no_resources]: + continue + order = self._makeOrder(partition) + # Spit this partition out into result + for resource_set in order: + result.extend(resource_set_tests[resource_set]) + result.extend(resource_set_tests[no_resources]) + self._tests = result def _getGraph(self, resource_sets): """Build a graph of the resource-using nodes. + This special cases set(['root']) to be a node with no resources and + edges to everything. + :return: A complete directed graph of the switching costs - between each resource combination. + between each resource combination. Note that links from N to N are + not included. """ + no_resources = frozenset() graph = {} + root = set(['root']) + # bottom = set(['bottom']) for from_set in resource_sets: graph[from_set] = {} + if from_set == root: + from_resources = no_resources + #elif from_set == bottom: + # continue # no links from bottom + else: + from_resources = from_set for to_set in resource_sets: if from_set is to_set: - graph[from_set][to_set] = 0 + continue # no self-edges + #if to_set == bottom: + # if from_set == root: + # continue # no short cuts! + # to_resources = no_resources + #el + if to_set == root: + continue # no links to root else: - graph[from_set][to_set] = self.cost_of_switching( - from_set, to_set) + to_resources = to_set + graph[from_set][to_set] = self.cost_of_switching( + from_resources, to_resources) return graph + def _makeOrder(self, partition): + """Return a order for the resource sets in partition.""" + # This problem is NP-C - find the lowest cost hamiltonian path. It + # also meets the triangle inequality, so we can use an approximation. + # TODO: implement Christofides. + # See http://en.wikipedia.org/wiki/Travelling_salesman_problem#Metric_TSP + # We need a root + root = frozenset(['root']) + partition.add(root) + # and an end + # partition.add(frozenset(['bottom'])) + # get rid of 'noresources' + partition.discard(frozenset()) + digraph = self._getGraph(partition) + # build a prime map + primes = {} + prime = frozenset(['prime']) + for node in digraph: + primes[node] = node.union(prime) + graph = _digraph_to_graph(digraph, primes) + mst = _kruskals_graph_MST(graph) + # Because the representation is a digraph, we can build an Eulerian + # cycle directly from the representation by just following the links: + # a node with only 1 'edge' has two directed edges; and we can only + # enter and leave it once, so the edge lookups will match precisely. + # As the mst is a spanning tree, the graph will become disconnected + # (we choose non-disconnecting edges first) + # - for a stub node (1 outgoing link): when exiting it unless it is + # the first node started at + # - for a non-stub node if choosing an outgoing link where some other + # endpoints incoming link has not been traversed. [exit by a + # different node than entering, until all exits taken]. + # We don't need the mst after, so it gets modified in place. + node = root + cycle = [node] + steps = 2 * (len(mst) - 1) + for step in xrange(steps): + found = False + outgoing = None # For clearer debugging. + for outgoing in mst[node]: + if node in mst[outgoing]: + # we have a return path: take it + # print node, '->', outgoing, ' can return' + del mst[node][outgoing] + node = outgoing + cycle.append(node) + found = True + break + if not found: + # none of the outgoing links have an incoming, so follow an + # arbitrary one (the last examined outgoing) + # print node, '->', outgoing + del mst[node][outgoing] + node = outgoing + cycle.append(node) + # Convert to a path: + visited = set() + order = [] + for node in cycle: + if node in visited: + continue + if node in primes: + order.append(node) + visited.add(node) + assert order[0] == root + return order[1:] + class TestLoader(unittest.TestLoader): """Custom TestLoader to set the right TestSuite class.""" diff --git a/lib/testresources/tests/__init__.py b/lib/testresources/tests/__init__.py index c7ab9e7..fe4958f 100644 --- a/lib/testresources/tests/__init__.py +++ b/lib/testresources/tests/__init__.py @@ -28,10 +28,12 @@ def test_suite(): import testresources.tests.test_resourced_test_case import testresources.tests.test_test_loader import testresources.tests.test_test_resource + import testresources.tests.test_resource_graph result = TestUtil.TestSuite() result.addTest(testresources.tests.test_test_loader.test_suite()) result.addTest(testresources.tests.test_test_resource.test_suite()) result.addTest(testresources.tests.test_resourced_test_case.test_suite()) + result.addTest(testresources.tests.test_resource_graph.test_suite()) result.addTest( testresources.tests.test_optimising_test_suite.test_suite()) return result diff --git a/lib/testresources/tests/test_optimising_test_suite.py b/lib/testresources/tests/test_optimising_test_suite.py index bd09d42..fcc5fc7 100644 --- a/lib/testresources/tests/test_optimising_test_suite.py +++ b/lib/testresources/tests/test_optimising_test_suite.py @@ -403,7 +403,7 @@ class TestCostGraph(testtools.TestCase): resource = self.makeResource() suite = testresources.OptimisingTestSuite() graph = suite._getGraph([frozenset()]) - self.assertEqual({frozenset(): {frozenset(): 0}}, graph) + self.assertEqual({frozenset(): {}}, graph) def testTwoCasesInGraph(self): res1 = self.makeResource() @@ -415,9 +415,9 @@ class TestCostGraph(testtools.TestCase): suite = testresources.OptimisingTestSuite() graph = suite._getGraph([no_resources, set1, set2]) - self.assertEqual({no_resources: {no_resources: 0, set1: 2, set2: 1}, - set1: {no_resources: 2, set1: 0, set2: 1}, - set2: {no_resources: 1, set1: 1, set2: 0}}, graph) + self.assertEqual({no_resources: {set1: 2, set2: 1}, + set1: {no_resources: 2, set2: 1}, + set2: {no_resources: 1, set1: 1 }}, graph) class TestGraphStuff(testtools.TestCase): @@ -489,10 +489,15 @@ class TestGraphStuff(testtools.TestCase): return permutations def testBasicSortTests(self): - # Test every permutation of inputs, with equal cost resources and - # legacy tests. + # Test every permutation of inputs, with legacy tests. + # Cannot use equal costs because of the use of + # a 2*optimal heuristic for sorting: with equal + # costs the wrong sort order is < twice the optimal + # weight, and thus can be selected. resource_one = testresources.TestResource() resource_two = testresources.TestResource() + resource_two.setUpCost = 5 + resource_two.tearDownCost = 5 resource_three = testresources.TestResource() self.case1.resources = [ @@ -584,6 +589,43 @@ class TestGraphStuff(testtools.TestCase): permutation.index(self.case3) < permutation.index(self.case4), sorted.index(self.case3) < sorted.index(self.case4)) + def testSortingTwelveIndependentIsFast(self): + # Given twelve independent resource sets, my patience is not exhausted. + managers = [] + for pos in range(12): + managers.append(testresources.TestResourceManager()) + # Add more sample tests + cases = [self.case1, self.case2, self.case3, self.case4] + for pos in range(5,13): + cases.append( + testtools.clone_test_with_new_id(cases[0], 'case%d' % pos)) + # We care that this is fast in this test, so we don't need to have + # overlapping resource usage + for case, manager in zip(cases, managers): + case.resources = [('_resource', manager)] + # Any sort is ok, as long as its the right length :) + result = self.sortTests(cases) + self.assertEqual(12, len(result)) + + def testSortingTwelveOverlappingIsFast(self): + # Given twelve connected resource sets, my patience is not exhausted. + managers = [] + for pos in range(12): + managers.append(testresources.TestResourceManager()) + # Add more sample tests + cases = [self.case1, self.case2, self.case3, self.case4] + for pos in range(5,13): + cases.append( + testtools.clone_test_with_new_id(cases[0], 'case%d' % pos)) + tempdir = testresources.TestResourceManager() + # give all tests a tempdir, enough to provoke a single partition in + # the current code. + for case, manager in zip(cases, managers): + case.resources = [('_resource', manager), ('tempdir', tempdir)] + # Any sort is ok, as long as its the right length :) + result = self.sortTests(cases) + self.assertEqual(12, len(result)) + def testSortConsidersDependencies(self): """Tests with different dependencies are sorted together.""" # We test this by having two resources (one and two) that share a very diff --git a/lib/testresources/tests/test_resource_graph.py b/lib/testresources/tests/test_resource_graph.py new file mode 100644 index 0000000..6e1eeca --- /dev/null +++ b/lib/testresources/tests/test_resource_graph.py @@ -0,0 +1,141 @@ +# +# testresources: extensions to python unittest to allow declaritive use +# of resources by test cases. +# Copyright (C) 2010 Robert Collins <robertc@robertcollins.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +"""Test _resource_graph(resource_sets).""" + +import testtools +import testresources +from testresources import split_by_resources, _resource_graph +from testresources.tests import ResultWithResourceExtensions +import unittest + + +def test_suite(): + from testresources.tests import TestUtil + loader = TestUtil.TestLoader() + result = loader.loadTestsFromName(__name__) + return result + + +class TestResourceGraph(testtools.TestCase): + + def test_empty(self): + no_resources = frozenset() + resource_sets = [no_resources] + self.assertEqual({no_resources:set([])}, _resource_graph(resource_sets)) + + def test_discrete(self): + resset1 = frozenset([testresources.TestResourceManager()]) + resset2 = frozenset([testresources.TestResourceManager()]) + resource_sets = [resset1, resset2] + result = _resource_graph(resource_sets) + self.assertEqual({resset1:set([]), resset2:set([])}, result) + + def test_overlapping(self): + res1 = testresources.TestResourceManager() + res2 = testresources.TestResourceManager() + resset1 = frozenset([res1]) + resset2 = frozenset([res2]) + resset3 = frozenset([res1, res2]) + resource_sets = [resset1, resset2, resset3] + result = _resource_graph(resource_sets) + self.assertEqual( + {resset1:set([resset3]), + resset2:set([resset3]), + resset3:set([resset1, resset2])}, + result) + + +class TestDigraphToGraph(testtools.TestCase): + + def test_wikipedia_example(self): + """Converting a digraph mirrors it in the XZ axis (matrix view). + + See http://en.wikipedia.org/wiki/Travelling_salesman_problem \ + #Solving_by_conversion_to_Symmetric_TSP + """ + # A B C + # A 1 2 + # B 6 3 + # C 5 4 + A = "A" + Ap = "A'" + B = "B" + Bp = "B'" + C = "C" + Cp = "C'" + digraph = {A:{ B:1, C:2}, + B:{A:6, C:3}, + C:{A:5, B:4 }} + # and the output + # A B C A' B' C' + # A 0 6 5 + # B 1 0 4 + # C 2 3 0 + # A' 0 1 2 + # B' 6 0 3 + # C' 5 4 0 + expected = { + A :{ Ap:0, Bp:6, Cp:5}, + B :{ Ap:1, Bp:0, Cp:4}, + C :{ Ap:2, Bp:3, Cp:0}, + Ap:{A:0, B:1, C:2 }, + Bp:{A:6, B:0, C:3 }, + Cp:{A:5, B:4, C:0 }} + self.assertEqual(expected, + testresources._digraph_to_graph(digraph, {A:Ap, B:Bp, C:Cp})) + + +class TestKruskalsMST(testtools.TestCase): + + def test_wikipedia_example(self): + """Performing KruskalsMST on a graph returns a spanning tree. + + See http://en.wikipedia.org/wiki/Kruskal%27s_algorithm. + """ + A = "A" + B = "B" + C = "C" + D = "D" + E = "E" + F = "F" + G = "G" + graph = { + A:{ B:7, D:5}, + B:{A:7, C:8, D:9, E:7}, + C:{ B:8, E:5}, + D:{A:5, B:9, E:15, F:6}, + E:{ B:7, C:5, D:15, F:8, G:9}, + F:{ D:6, E:8, G:11}, + G:{ E:9, F:11}} + expected = { + A:{ B:7, D:5}, + B:{A:7, E:7}, + C:{ E:5}, + D:{A:5, F:6}, + E:{ B:7, C:5, G:9}, + F:{ D:6}, + G:{ E:9}} + result = testresources._kruskals_graph_MST(graph) + e_weight = sum(sum(row.itervalues()) for row in expected.itervalues()) + r_weight = sum(sum(row.itervalues()) for row in result.itervalues()) + self.assertEqual(e_weight, r_weight) + self.assertEqual(expected, + testresources._kruskals_graph_MST(graph)) |