Merge sortTests performance bugfix.

author: Robert Collins <robertc@robertcollins.net> 2010-02-19 17:38:27 +1100
committer: Robert Collins <robertc@robertcollins.net> 2010-02-19 17:38:27 +1100
commit: d05ed270f04b294533b4c80f070edf148d1c7a6c (patch)
tree: affabe30a0dd90516a79754bcf026358f7a0bf68
parent: cd86ed1616f62488a71e472cbdf3d0dc03a6d65d (diff)
parent: d62d20ad8c83f4032d7e0af10efb207217612094 (diff)
download: testresources-d05ed270f04b294533b4c80f070edf148d1c7a6c.tar.gz
5 files changed, 447 insertions, 34 deletions
diff --git a/NEWS b/NEWS
index 971a6d1..15c59f0 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,14 @@ IMPROVEMENTS
 * Rename TestResource to TestResourceManager leaving TestResource as an alias.
   Fixes bug #520769.
 
+* Test sorting no longer performs N! work on tests that can never benefit from
+  order optimisations (when resources are not shared an arbitrary order is
+  sufficient). Partial fix for bug #522338.
+
+* Test sorting now uses a heuristic on each partition to get a sort that is
+  no worse than twice the optimal number of setup/teardown operations and is
+  fast to calculate. Fixes bug #522338
+
 0.2.3
 -----
 
diff --git a/lib/testresources/__init__.py b/lib/testresources/__init__.py
index d8988e1..d06c820 100644
--- a/lib/testresources/__init__.py
+++ b/lib/testresources/__init__.py
@@ -19,7 +19,9 @@
 
 """TestResources: declarative management of external resources for tests."""
 
+import heapq
 import inspect
+import operator
 import unittest
 
 
@@ -28,6 +30,105 @@ def test_suite():
     return testresources.tests.test_suite()
 
 
+def _digraph_to_graph(digraph, prime_node_mapping):
+    """Convert digraph to a graph.
+
+    :param digraph: A directed graph in the form
+        {from:{to:value}}.
+    :param prime_node_mapping: A mapping from every
+        node in digraph to a new unique and not in digraph node.
+    :return: A symmetric graph in the form {from:to:value}} created by
+        creating edges in the result between every N to M-prime with the
+        original N-M value and from every N to N-prime with a cost of 0.
+        No other edges are created.
+    """
+    result = {}
+    for from_node, from_prime_node in prime_node_mapping.iteritems():
+        result[from_node] = {from_prime_node:0}
+        result[from_prime_node] = {from_node:0}
+    for from_node, to_nodes in digraph.iteritems():
+        from_prime = prime_node_mapping[from_node]
+        for to_node, value in to_nodes.iteritems():
+            to_prime = prime_node_mapping[to_node]
+            result[from_prime][to_node] = value
+            result[to_node][from_prime] = value
+    return result
+
+
+def _kruskals_graph_MST(graph):
+    """Find the minimal spanning tree in graph using Kruskals algorithm.
+
+    See http://en.wikipedia.org/wiki/Kruskal%27s_algorithm.
+    :param graph: A graph in {from:{to:value}} form. Every node present in
+        graph must be in the outer dict (because graph is not a directed graph.
+    :return: A graph with all nodes and those vertices that are part of the MST
+        for graph. If graph is not connected, then the result will also be a
+        forest.
+    """
+    # forest contains all the nodes -> graph that node is in.
+    forest = {}
+    # graphs is the count of graphs we have yet to combine.
+    for node in graph:
+        forest[node] = {node:{}}
+    graphs = len(forest)
+    # collect edges: every edge is present twice (due to the graph
+    # representation), so normalise.
+    edges = set()
+    for from_node, to_nodes in graph.iteritems():
+        for to_node, value in to_nodes.iteritems():
+            edge = (value,) + tuple(sorted([from_node, to_node]))
+            edges.add(edge)
+    edges = list(edges)
+    heapq.heapify(edges)
+    while edges and graphs > 1:
+        # more edges to go and we haven't gotten a spanning tree yet.
+        edge = heapq.heappop(edges)
+        g1 = forest[edge[1]]
+        g2 = forest[edge[2]]
+        if g1 is g2:
+            continue # already joined
+        # combine g1 and g2 into g1
+        graphs -= 1
+        for from_node, to_nodes in g2.iteritems():
+            #remember its symmetric, don't need to do 'to'.
+            forest[from_node] = g1
+            g1.setdefault(from_node, {}).update(to_nodes)
+        # add edge
+        g1[edge[1]][edge[2]] = edge[0]
+        g1[edge[2]][edge[1]] = edge[0]
+    # union the remaining graphs
+    _, result = forest.popitem()
+    for _, g2 in forest.iteritems():
+        if g2 is result: # common case
+            continue
+        for from_node, to_nodes in g2.iteritems():
+            result.setdefault(from_node, {}).update(to_nodes)
+    return result
+
+
+def _resource_graph(resource_sets):
+    """Convert an iterable of resource_sets into a graph.
+
+    Each resource_set in the iterable is treated as a node, and each resource
+    in that resource_set is used as an edge to other nodes.
+    """
+    nodes = {}
+    edges = {}
+    for resource_set in resource_sets:
+        # put node in nodes
+        node = frozenset(resource_set)
+        nodes[node] = set()
+        # put its contents in as edges
+        for resource in resource_set:
+            edges.setdefault(resource, []).append(node)
+    # populate the adjacent members of nodes
+    for node, connected in nodes.iteritems():
+        for resource in node:
+            connected.update(edges[resource])
+        connected.discard(node)
+    return nodes
+
+
 def split_by_resources(tests):
     """Split a list of tests by the resources that the tests use.
 
@@ -47,6 +148,30 @@ def split_by_resources(tests):
     return resource_set_tests
 
 
+def _strongly_connected_components(graph, no_resources):
+    """Find the strongly connected components in graph.
+    
+    This is essentially a nonrecursive flatterning of Tarjan's method. It
+    may be worth profiling against an actual Tarjan's implementation at some
+    point, but sets are often faster than python calls.
+
+    graph gets consumed, but that could be changed easily enough.
+    """
+    partitions = []
+    while graph:
+        node, pending = graph.popitem()
+        current_partition = set([node])
+        while pending:
+            # add all the nodes connected to a connected node to pending.
+            node = pending.pop()
+            current_partition.add(node)
+            pending.update(graph.pop(node, []))
+            # don't try to process things we've allready processed.
+            pending.difference_update(current_partition)
+        partitions.append(current_partition)
+    return partitions
+
+
 class OptimisingTestSuite(unittest.TestSuite):
     """A resource creation optimising TestSuite."""
 
@@ -126,54 +251,149 @@ class OptimisingTestSuite(unittest.TestSuite):
     def sortTests(self):
         """Attempt to topographically sort the contained tests.
 
+        This function biases to reusing a resource: it assumes that resetting
+        a resource is usually cheaper than a teardown + setup; and that most
+        resources are not dirtied by most tests.
+
         Feel free to override to improve the sort behaviour.
         """
         # We group the tests by the resource combinations they use,
         # since there will usually be fewer resource combinations than
-        # actual tests and there can never be more.
+        # actual tests and there can never be more: This gives us 'nodes' or
+        # 'resource_sets' that represent many tests using the same set of 
+        # resources.
         resource_set_tests = split_by_resources(self._tests)
-
-        graph = self._getGraph(resource_set_tests.keys())
+        # Partition into separate sets of resources, there is no ordering
+        # preference between sets that do not share members. Rationale:
+        # If resource_set A and B have no common resources, AB and BA are
+        # equally good - the global setup/teardown sums are identical. Secondly
+        # if A shares one or more resources with C, then pairing AC|CA is better
+        # than having B between A and C, because the shared resources can be
+        # reset or reused. Having partitioned we can use connected graph logic
+        # on each partition.
+        resource_set_graph = _resource_graph(resource_set_tests)
         no_resources = frozenset()
-        # Recursive visit-all-nodes all-permutations.
-        def cost(from_set, resource_sets):
-            """Get the cost of resource traversal for resource sets.
-
-            :return: cost, order
-            """
-            if not resource_sets:
-                # tear down last resources
-                return graph[from_set][no_resources], []
-            costs = []
-            for to_set in resource_sets:
-                child_cost, child_order = cost(
-                    to_set, resource_sets - set([to_set]))
-                costs.append((graph[from_set][to_set] + child_cost,
-                              [to_set] + child_order))
-            return min(costs)
-        _, order = cost(no_resources,
-                        set(resource_set_tests) - set([no_resources]))
-        order.append(no_resources)
-        self._tests = sum(
-            (resource_set_tests[resource_set] for resource_set in order), [])
+        # A list of resource_set_tests, all fully internally connected.
+        partitions = _strongly_connected_components(resource_set_graph,
+            no_resources)
+        result = []
+        for partition in partitions:
+            # we process these at the end for no particularly good reason (it makes
+            # testing slightly easier).
+            if partition == [no_resources]:
+                continue
+            order = self._makeOrder(partition)
+            # Spit this partition out into result
+            for resource_set in order:
+                result.extend(resource_set_tests[resource_set])
+        result.extend(resource_set_tests[no_resources])
+        self._tests = result
 
     def _getGraph(self, resource_sets):
         """Build a graph of the resource-using nodes.
 
+        This special cases set(['root']) to be a node with no resources and
+        edges to everything.
+
         :return: A complete directed graph of the switching costs
-            between each resource combination.
+            between each resource combination. Note that links from N to N are
+            not included.
         """
+        no_resources = frozenset()
         graph = {}
+        root = set(['root'])
+        # bottom = set(['bottom'])
         for from_set in resource_sets:
             graph[from_set] = {}
+            if from_set == root:
+                from_resources = no_resources
+            #elif from_set == bottom:
+            #    continue # no links from bottom
+            else:
+                from_resources = from_set
             for to_set in resource_sets:
                 if from_set is to_set:
-                    graph[from_set][to_set] = 0
+                    continue # no self-edges
+                #if to_set == bottom:
+                #   if from_set == root:
+                #       continue # no short cuts!
+                #   to_resources = no_resources
+                #el
+                if to_set == root:
+                    continue # no links to root
                 else:
-                    graph[from_set][to_set] = self.cost_of_switching(
-                        from_set, to_set)
+                    to_resources = to_set
+                graph[from_set][to_set] = self.cost_of_switching(
+                        from_resources, to_resources)
         return graph
 
+    def _makeOrder(self, partition):
+        """Return a order for the resource sets in partition."""
+        # This problem is NP-C - find the lowest cost hamiltonian path. It 
+        # also meets the triangle inequality, so we can use an approximation.
+        # TODO: implement Christofides.
+        # See http://en.wikipedia.org/wiki/Travelling_salesman_problem#Metric_TSP
+        # We need a root
+        root = frozenset(['root'])
+        partition.add(root)
+        # and an end
+        # partition.add(frozenset(['bottom']))
+        # get rid of 'noresources'
+        partition.discard(frozenset())
+        digraph = self._getGraph(partition)
+        # build a prime map
+        primes = {}
+        prime = frozenset(['prime'])
+        for node in digraph:
+            primes[node] = node.union(prime)
+        graph = _digraph_to_graph(digraph, primes)
+        mst = _kruskals_graph_MST(graph)
+        # Because the representation is a digraph, we can build an Eulerian
+        # cycle directly from the representation by just following the links:
+        # a node with only 1 'edge' has two directed edges; and we can only
+        # enter and leave it once, so the edge lookups will match precisely.
+        # As the mst is a spanning tree, the graph will become disconnected
+        # (we choose non-disconnecting edges first)
+        #  - for a stub node (1 outgoing link): when exiting it unless it is
+        #    the first node started at
+        # - for a non-stub node if choosing an outgoing link where some other
+        #   endpoints incoming link has not been traversed. [exit by a
+        #   different node than entering, until all exits taken].
+        # We don't need the mst after, so it gets modified in place.
+        node = root
+        cycle = [node]
+        steps = 2 * (len(mst) - 1)
+        for step in xrange(steps):
+            found = False
+            outgoing = None # For clearer debugging.
+            for outgoing in mst[node]:
+                if node in mst[outgoing]:
+                    # we have a return path: take it
+                    # print node, '->', outgoing, ' can return'
+                    del mst[node][outgoing]
+                    node = outgoing
+                    cycle.append(node)
+                    found = True
+                    break
+            if not found:
+                # none of the outgoing links have an incoming, so follow an
+                # arbitrary one (the last examined outgoing)
+                # print node, '->', outgoing
+                del mst[node][outgoing]
+                node = outgoing
+                cycle.append(node)
+        # Convert to a path:
+        visited = set()
+        order = []
+        for node in cycle:
+            if node in visited:
+                continue
+            if node in primes:
+                order.append(node)
+            visited.add(node)
+        assert order[0] == root
+        return order[1:]
+
 
 class TestLoader(unittest.TestLoader):
     """Custom TestLoader to set the right TestSuite class."""
diff --git a/lib/testresources/tests/__init__.py b/lib/testresources/tests/__init__.py
index c7ab9e7..fe4958f 100644
--- a/lib/testresources/tests/__init__.py
+++ b/lib/testresources/tests/__init__.py
@@ -28,10 +28,12 @@ def test_suite():
     import testresources.tests.test_resourced_test_case
     import testresources.tests.test_test_loader
     import testresources.tests.test_test_resource
+    import testresources.tests.test_resource_graph
     result = TestUtil.TestSuite()
     result.addTest(testresources.tests.test_test_loader.test_suite())
     result.addTest(testresources.tests.test_test_resource.test_suite())
     result.addTest(testresources.tests.test_resourced_test_case.test_suite())
+    result.addTest(testresources.tests.test_resource_graph.test_suite())
     result.addTest(
         testresources.tests.test_optimising_test_suite.test_suite())
     return result
diff --git a/lib/testresources/tests/test_optimising_test_suite.py b/lib/testresources/tests/test_optimising_test_suite.py
index bd09d42..fcc5fc7 100644
--- a/lib/testresources/tests/test_optimising_test_suite.py
+++ b/lib/testresources/tests/test_optimising_test_suite.py
@@ -403,7 +403,7 @@ class TestCostGraph(testtools.TestCase):
         resource = self.makeResource()
         suite = testresources.OptimisingTestSuite()
         graph = suite._getGraph([frozenset()])
-        self.assertEqual({frozenset(): {frozenset(): 0}}, graph)
+        self.assertEqual({frozenset(): {}}, graph)
 
     def testTwoCasesInGraph(self):
         res1 = self.makeResource()
@@ -415,9 +415,9 @@ class TestCostGraph(testtools.TestCase):
 
         suite = testresources.OptimisingTestSuite()
         graph = suite._getGraph([no_resources, set1, set2])
-        self.assertEqual({no_resources: {no_resources: 0, set1: 2, set2: 1},
-                          set1: {no_resources: 2, set1: 0, set2: 1},
-                          set2: {no_resources: 1, set1: 1, set2: 0}}, graph)
+        self.assertEqual({no_resources: {set1: 2, set2: 1},
+                          set1: {no_resources: 2, set2: 1},
+                          set2: {no_resources: 1, set1: 1 }}, graph)
 
 
 class TestGraphStuff(testtools.TestCase):
@@ -489,10 +489,15 @@ class TestGraphStuff(testtools.TestCase):
         return permutations
 
     def testBasicSortTests(self):
-        # Test every permutation of inputs, with equal cost resources and
-        # legacy tests.
+        # Test every permutation of inputs, with legacy tests.
+        # Cannot use equal costs because of the use of
+        # a 2*optimal heuristic for sorting: with equal
+        # costs the wrong sort order is < twice the optimal
+        # weight, and thus can be selected.
         resource_one = testresources.TestResource()
         resource_two = testresources.TestResource()
+        resource_two.setUpCost = 5
+        resource_two.tearDownCost = 5
         resource_three = testresources.TestResource()
 
         self.case1.resources = [
@@ -584,6 +589,43 @@ class TestGraphStuff(testtools.TestCase):
                 permutation.index(self.case3) < permutation.index(self.case4),
                 sorted.index(self.case3) < sorted.index(self.case4))
 
+    def testSortingTwelveIndependentIsFast(self):
+        # Given twelve independent resource sets, my patience is not exhausted.
+        managers = []
+        for pos in range(12):
+            managers.append(testresources.TestResourceManager())
+        # Add more sample tests
+        cases = [self.case1, self.case2, self.case3, self.case4]
+        for pos in range(5,13):
+            cases.append(
+                testtools.clone_test_with_new_id(cases[0], 'case%d' % pos))
+        # We care that this is fast in this test, so we don't need to have
+        # overlapping resource usage
+        for case, manager in zip(cases, managers):
+            case.resources = [('_resource', manager)]
+        # Any sort is ok, as long as its the right length :)
+        result = self.sortTests(cases)
+        self.assertEqual(12, len(result))
+
+    def testSortingTwelveOverlappingIsFast(self):
+        # Given twelve connected resource sets, my patience is not exhausted.
+        managers = []
+        for pos in range(12):
+            managers.append(testresources.TestResourceManager())
+        # Add more sample tests
+        cases = [self.case1, self.case2, self.case3, self.case4]
+        for pos in range(5,13):
+            cases.append(
+                testtools.clone_test_with_new_id(cases[0], 'case%d' % pos))
+        tempdir = testresources.TestResourceManager()
+        # give all tests a tempdir, enough to provoke a single partition in
+        # the current code.
+        for case, manager in zip(cases, managers):
+            case.resources = [('_resource', manager), ('tempdir', tempdir)]
+        # Any sort is ok, as long as its the right length :)
+        result = self.sortTests(cases)
+        self.assertEqual(12, len(result))
+
     def testSortConsidersDependencies(self):
         """Tests with different dependencies are sorted together."""
         # We test this by having two resources (one and two) that share a very
diff --git a/lib/testresources/tests/test_resource_graph.py b/lib/testresources/tests/test_resource_graph.py
new file mode 100644
index 0000000..6e1eeca
--- /dev/null
+++ b/lib/testresources/tests/test_resource_graph.py
@@ -0,0 +1,141 @@
+#
+#  testresources: extensions to python unittest to allow declaritive use
+#  of resources by test cases.
+#  Copyright (C) 2010  Robert Collins <robertc@robertcollins.net>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+"""Test _resource_graph(resource_sets)."""
+
+import testtools
+import testresources
+from testresources import split_by_resources, _resource_graph
+from testresources.tests import ResultWithResourceExtensions
+import unittest
+
+
+def test_suite():
+    from testresources.tests import TestUtil
+    loader = TestUtil.TestLoader()
+    result = loader.loadTestsFromName(__name__)
+    return result
+
+
+class TestResourceGraph(testtools.TestCase):
+
+    def test_empty(self):
+        no_resources = frozenset()
+        resource_sets = [no_resources]
+        self.assertEqual({no_resources:set([])}, _resource_graph(resource_sets))
+
+    def test_discrete(self):
+        resset1 = frozenset([testresources.TestResourceManager()])
+        resset2 = frozenset([testresources.TestResourceManager()])
+        resource_sets = [resset1, resset2]
+        result = _resource_graph(resource_sets)
+        self.assertEqual({resset1:set([]), resset2:set([])}, result)
+
+    def test_overlapping(self):
+        res1 = testresources.TestResourceManager()
+        res2 = testresources.TestResourceManager()
+        resset1 = frozenset([res1])
+        resset2 = frozenset([res2])
+        resset3 = frozenset([res1, res2])
+        resource_sets = [resset1, resset2, resset3]
+        result = _resource_graph(resource_sets)
+        self.assertEqual(
+            {resset1:set([resset3]),
+             resset2:set([resset3]),
+             resset3:set([resset1, resset2])},
+            result)
+
+
+class TestDigraphToGraph(testtools.TestCase):
+
+    def test_wikipedia_example(self):
+        """Converting a digraph mirrors it in the XZ axis (matrix view).
+
+        See http://en.wikipedia.org/wiki/Travelling_salesman_problem \
+        #Solving_by_conversion_to_Symmetric_TSP
+        """
+        #       A   B   C
+        #   A       1   2
+        #   B   6       3
+        #   C   5   4   
+        A = "A"
+        Ap = "A'"
+        B = "B"
+        Bp = "B'"
+        C = "C"
+        Cp = "C'"
+        digraph = {A:{     B:1, C:2},
+                   B:{A:6,      C:3},
+                   C:{A:5, B:4     }}
+        # and the output
+        #       A   B   C   A'  B'  C'
+        #   A               0   6   5
+        #   B               1   0   4
+        #   C               2   3   0
+        #   A'  0   1   2           
+        #   B'  6   0   3           
+        #   C'  5   4   0
+        expected = {
+            A :{              Ap:0, Bp:6, Cp:5},
+            B :{              Ap:1, Bp:0, Cp:4},
+            C :{              Ap:2, Bp:3, Cp:0},
+            Ap:{A:0, B:1, C:2                 },
+            Bp:{A:6, B:0, C:3                 },
+            Cp:{A:5, B:4, C:0                 }}
+        self.assertEqual(expected,
+            testresources._digraph_to_graph(digraph, {A:Ap, B:Bp, C:Cp}))
+
+
+class TestKruskalsMST(testtools.TestCase):
+
+    def test_wikipedia_example(self):
+        """Performing KruskalsMST on a graph returns a spanning tree.
+
+        See http://en.wikipedia.org/wiki/Kruskal%27s_algorithm.
+        """
+        A = "A"
+        B = "B"
+        C = "C"
+        D = "D"
+        E = "E"
+        F = "F"
+        G = "G"
+        graph = {
+            A:{     B:7,      D:5},
+            B:{A:7,      C:8, D:9,  E:7},
+            C:{     B:8,            E:5},
+            D:{A:5, B:9,            E:15, F:6},
+            E:{     B:7, C:5, D:15,       F:8, G:9},
+            F:{               D:6,  E:8,       G:11},
+            G:{                     E:9, F:11}}
+        expected = {
+            A:{     B:7,      D:5},
+            B:{A:7,                 E:7},
+            C:{                     E:5},
+            D:{A:5,                      F:6},
+            E:{     B:7, C:5,                  G:9},
+            F:{               D:6},
+            G:{                     E:9}}
+        result = testresources._kruskals_graph_MST(graph)
+        e_weight = sum(sum(row.itervalues()) for row in expected.itervalues())
+        r_weight = sum(sum(row.itervalues()) for row in result.itervalues())
+        self.assertEqual(e_weight, r_weight)
+        self.assertEqual(expected,
+            testresources._kruskals_graph_MST(graph))
author	Robert Collins <robertc@robertcollins.net>	2010-02-19 17:38:27 +1100
committer	Robert Collins <robertc@robertcollins.net>	2010-02-19 17:38:27 +1100
commit	d05ed270f04b294533b4c80f070edf148d1c7a6c (patch)
tree	affabe30a0dd90516a79754bcf026358f7a0bf68
parent	cd86ed1616f62488a71e472cbdf3d0dc03a6d65d (diff)
parent	d62d20ad8c83f4032d7e0af10efb207217612094 (diff)
download	testresources-d05ed270f04b294533b4c80f070edf148d1c7a6c.tar.gz