* The scheduler can now groups tests together permitting co-dependent tests to

always be scheduled onto the same backend. Note that this does not force co-dependent tests to be executed, so partial test runs (e.g. --failing) may still fail. (Matthew Treinish, Robert Collins)
author: Robert Collins <robertc@robertcollins.net> 2013-07-16 00:04:42 +1200
committer: Robert Collins <robertc@robertcollins.net> 2013-07-16 00:04:42 +1200
commit: 18ddf5aa8af1b0d339d20fb9a060451428a7e4dc (patch)
tree: 589de303ea642ea276dec5cc211c0b983027fba7
parent: f92cc3662411925ebbc26e957ca40fb92f00d81f (diff)
parent: bf920ea2002a64f23ded34bff1cd81a4bb7a4637 (diff)
download: testrepository-18ddf5aa8af1b0d339d20fb9a060451428a7e4dc.tar.gz
4 files changed, 99 insertions, 14 deletions
diff --git a/COPYING b/COPYING
index daebc89..632a79e 100644
--- a/COPYING
+++ b/COPYING
@@ -29,6 +29,7 @@ for distributions such as Debian that wish to list all the copyright holders
 in their metadata:
 * Robert Collins <robertc@robertcollins.net>, 2009
 * Hewlett-Packard Development Company, L.P., 2013
+* IBM Corp., 2013
 
 
 Code that has been incorporated into Testrepository from other projects will
diff --git a/NEWS b/NEWS
index 2200e1c..accde30 100644
--- a/NEWS
+++ b/NEWS
@@ -19,6 +19,11 @@ CHANGES
   load command to take interactive input without it reading from the raw
   subunit stream on stdin. (Robert Collins)
 
+* The scheduler can now groups tests together permitting co-dependent tests to
+  always be scheduled onto the same backend. Note that this does not force
+  co-dependent tests to be executed, so partial test runs (e.g. --failing)
+  may still fail.  (Matthew Treinish, Robert Collins)
+
 0.0.15
 ++++++
 
diff --git a/testrepository/testcommand.py b/testrepository/testcommand.py
index ef285ab..d93881d 100644
--- a/testrepository/testcommand.py
+++ b/testrepository/testcommand.py
@@ -16,6 +16,7 @@
 
 from extras import try_imports
 
+from collections import defaultdict
 ConfigParser = try_imports(['ConfigParser', 'configparser'])
 import itertools
 import operator
@@ -134,7 +135,7 @@ class TestListingFixture(Fixture):
 
     def __init__(self, test_ids, cmd_template, listopt, idoption, ui,
         repository, parallel=True, listpath=None, parser=None,
-        test_filters=None, instance_source=None):
+        test_filters=None, instance_source=None, group_callback=None):
         """Create a TestListingFixture.
 
         :param test_ids: The test_ids to use. May be None indicating that
@@ -167,6 +168,10 @@ class TestListingFixture(Fixture):
         :param instance_source: A source of test run instances. Must support
             obtain_instance(max_concurrency) -> id and release_instance(id)
             calls.
+        :param group_callback: If supplied, should be a function that accepts a
+            test id and returns a group id. A group id is an arbitrary value
+            used as a dictionary key in the scheduler. All test ids with the
+            same group id are scheduled onto the same backend test process.
         """
         self.test_ids = test_ids
         self.template = cmd_template
@@ -178,6 +183,7 @@ class TestListingFixture(Fixture):
         self._listpath = listpath
         self._parser = parser
         self.test_filters = test_filters
+        self._group_callback = group_callback
         self._instance_source = instance_source
 
     def setUp(self):
@@ -370,22 +376,55 @@ class TestListingFixture(Fixture):
         partitions = [list() for i in range(concurrency)]
         timed_partitions = [[0.0, partition] for partition in partitions]
         time_data = self.repository.get_test_times(test_ids)
-        timed = time_data['known']
-        unknown = time_data['unknown']
+        timed_tests = time_data['known']
+        unknown_tests = time_data['unknown']
+        # Group tests: generate group_id -> test_ids.
+        group_ids = defaultdict(list)
+        if self._group_callback is None:
+            group_callback = lambda _:None
+        else:
+            group_callback = self._group_callback
+        for test_id in test_ids:
+            group_id = group_callback(test_id) or test_id
+            group_ids[group_id].append(test_id)
+        # Time groups: generate three sets of groups:
+        # - fully timed dict(group_id -> time),
+        # - partially timed dict(group_id -> time) and
+        # - unknown (set of group_id)
+        # We may in future treat partially timed different for scheduling, but
+        # at least today we just schedule them after the fully timed groups.
+        timed = {}
+        partial = {}
+        unknown = []
+        for group_id, group_tests in group_ids.items():
+            untimed_ids = unknown_tests.intersection(group_tests)
+            group_time = sum([timed_tests[test_id]
+                for test_id in untimed_ids.symmetric_difference(group_tests)])
+            if not untimed_ids:
+                timed[group_id] = group_time
+            elif group_time:
+                partial[group_id] = group_time
+            else:
+                unknown.append(group_id)
         # Scheduling is NP complete in general, so we avoid aiming for
         # perfection. A quick approximation that is sufficient for our general
         # needs:
-        # sort the tests by time
-        # allocate to partitions by putting each test in to the partition with
-        # the current (lowest time, shortest length)
-        queue = sorted(timed.items(), key=operator.itemgetter(1), reverse=True)
-        for test_id, duration in queue:
-            timed_partitions[0][0] = timed_partitions[0][0] + duration
-            timed_partitions[0][1].append(test_id)
-            timed_partitions.sort(key=lambda item:(item[0], len(item[1])))
-        # Assign tests with unknown times in round robin fashion to the partitions.
-        for partition, test_id in zip(itertools.cycle(partitions), unknown):
-            partition.append(test_id)
+        # sort the groups by time
+        # allocate to partitions by putting each group in to the partition with
+        # the current (lowest time, shortest length[in tests])
+        def consume_queue(groups):
+            queue = sorted(
+                groups.items(), key=operator.itemgetter(1), reverse=True)
+            for group_id, duration in queue:
+                timed_partitions[0][0] = timed_partitions[0][0] + duration
+                timed_partitions[0][1].extend(group_ids[group_id])
+                timed_partitions.sort(key=lambda item:(item[0], len(item[1])))
+        consume_queue(timed)
+        consume_queue(partial)
+        # Assign groups with entirely unknown times in round robin fashion to
+        # the partitions. 
+        for partition, group_id in zip(itertools.cycle(partitions), unknown):
+            partition.extend(group_ids[group_id])
         return partitions
 
     def callout_concurrency(self):
diff --git a/testrepository/tests/test_testcommand.py b/testrepository/tests/test_testcommand.py
index 43be8e6..b5b3374 100644
--- a/testrepository/tests/test_testcommand.py
+++ b/testrepository/tests/test_testcommand.py
@@ -362,6 +362,46 @@ class TestTestCommand(ResourcedTestCase):
         self.assertEqual(1, len(partitions[0]))
         self.assertEqual(1, len(partitions[1]))
 
+    def test_partition_tests_with_grouping(self):
+        repo = memory.RepositoryFactory().initialise('memory:')
+        result = repo.get_inserter()
+        result.startTestRun()
+        run_timed("TestCase1.slow", 3, result)
+        run_timed("TestCase2.fast1", 1, result)
+        run_timed("TestCase2.fast2", 1, result)
+        result.stopTestRun()
+        ui, command = self.get_test_ui_and_cmd(repository=repo)
+        self.set_config(
+            '[DEFAULT]\ntest_command=foo $IDLIST $LISTOPT\n'
+            'test_list_option=--list\n')
+        fixture = self.useFixture(command.get_run_command())
+        test_ids = frozenset(['TestCase1.slow', 'TestCase1.fast',
+                              'TestCase1.fast2', 'TestCase2.fast1',
+                              'TestCase3.test1', 'TestCase3.test2',
+                              'TestCase2.fast2', 'TestCase4.test',
+                              'testdir.testfile.TestCase5.test'])
+        regex = 'TestCase[0-5]'
+        def group_id(test_id, regex=re.compile('TestCase[0-5]')):
+            match = regex.match(test_id)
+            if match:
+                return match.group(0)
+        # There isn't a public way to define a group callback [as yet].
+        fixture._group_callback = group_id
+        partitions = fixture.partition_tests(test_ids, 2)
+        # Timed groups are deterministic:
+        self.assertTrue('TestCase2.fast1' in partitions[0])
+        self.assertTrue('TestCase2.fast2' in partitions[0])
+        self.assertTrue('TestCase1.slow' in partitions[1])
+        self.assertTrue('TestCase1.fast' in partitions[1])
+        self.assertTrue('TestCase1.fast2' in partitions[1])
+        # Untimed groups just need to be kept together:
+        if 'TestCase3.test1' in partitions[0]:
+            self.assertTrue('TestCase3.test2' in partitions[0])
+        if 'TestCase4.test' not in partitions[0]:
+            self.assertTrue('TestCase4.test' in partitions[1])
+        if 'testdir.testfile.TestCase5.test' not in partitions[0]:
+            self.assertTrue('testdir.testfile.TestCase5.test' in partitions[1])
+
     def test_run_tests_with_instances(self):
         # when there are instances and no instance_execute, run_tests acts as
         # normal.
author	Robert Collins <robertc@robertcollins.net>	2013-07-16 00:04:42 +1200
committer	Robert Collins <robertc@robertcollins.net>	2013-07-16 00:04:42 +1200
commit	18ddf5aa8af1b0d339d20fb9a060451428a7e4dc (patch)
tree	589de303ea642ea276dec5cc211c0b983027fba7
parent	f92cc3662411925ebbc26e957ca40fb92f00d81f (diff)
parent	bf920ea2002a64f23ded34bff1cd81a4bb7a4637 (diff)
download	testrepository-18ddf5aa8af1b0d339d20fb9a060451428a7e4dc.tar.gz