Add group regex scheduling hint to the test partitioner

This commit adds a new optional parameter, group_regex, to TestListingFixture. The parameter group_regex is used to group the test_ids. By passing a regex string with the parameter the test partitioner will match the regex to the test ids and then group by the result. In the case a test id does not match the regex it will be put in a default group. These groups are then used for scheduling the partitions. Each test groups is scheduled together so that all tests in a group are run together on the same partition. For the purposes of scheduling the groups the sum of all test ids in a group is used to schedule the whole group in a partition.
author: Matthew Treinish <treinish@linux.vnet.ibm.com> 2013-07-08 16:18:37 -0400
committer: Matthew Treinish <treinish@linux.vnet.ibm.com> 2013-07-08 16:18:37 -0400
commit: bf920ea2002a64f23ded34bff1cd81a4bb7a4637 (patch)
tree: 04c5a4b17eb6726744c87fa065fff14e65437201
parent: 610acebaff00b30303dc11644e0a88931d760a28 (diff)
download: testrepository-bf920ea2002a64f23ded34bff1cd81a4bb7a4637.tar.gz
3 files changed, 110 insertions, 13 deletions
diff --git a/COPYING b/COPYING
index daebc89..632a79e 100644
--- a/COPYING
+++ b/COPYING
@@ -29,6 +29,7 @@ for distributions such as Debian that wish to list all the copyright holders
 in their metadata:
 * Robert Collins <robertc@robertcollins.net>, 2009
 * Hewlett-Packard Development Company, L.P., 2013
+* IBM Corp., 2013
 
 
 Code that has been incorporated into Testrepository from other projects will
diff --git a/testrepository/testcommand.py b/testrepository/testcommand.py
index ef285ab..65a8f61 100644
--- a/testrepository/testcommand.py
+++ b/testrepository/testcommand.py
@@ -134,7 +134,7 @@ class TestListingFixture(Fixture):
 
     def __init__(self, test_ids, cmd_template, listopt, idoption, ui,
         repository, parallel=True, listpath=None, parser=None,
-        test_filters=None, instance_source=None):
+        test_filters=None, instance_source=None, group_regex=None):
         """Create a TestListingFixture.
 
         :param test_ids: The test_ids to use. May be None indicating that
@@ -167,6 +167,8 @@ class TestListingFixture(Fixture):
         :param instance_source: A source of test run instances. Must support
             obtain_instance(max_concurrency) -> id and release_instance(id)
             calls.
+        :param group_regex: An optional regular expression string which is used
+            to provide a grouping hint to the test partitioner
         """
         self.test_ids = test_ids
         self.template = cmd_template
@@ -179,6 +181,7 @@ class TestListingFixture(Fixture):
         self._parser = parser
         self.test_filters = test_filters
         self._instance_source = instance_source
+        self.group_regex = group_regex
 
     def setUp(self):
         super(TestListingFixture, self).setUp()
@@ -327,6 +330,7 @@ class TestListingFixture(Fixture):
         :return: A list of spawned processes.
         """
         result = []
+        group_tags = None
         test_ids = self.test_ids
         if self.concurrency == 1 and (test_ids is None or test_ids):
             # Have to customise cmd here, as instances are allocated
@@ -343,8 +347,11 @@ class TestListingFixture(Fixture):
                 return [CallWhenProcFinishes(run_proc,
                     lambda:self._instance_source.release_instance(instance))]
             else:
-                return [run_proc]
-        test_id_groups = self.partition_tests(test_ids, self.concurrency)
+                return [run_proc] 
+        if self.group_regex:
+            group_tags = self.filter_test_groups(test_ids, self.group_regex)
+        test_id_groups = self.partition_tests(test_ids, self.concurrency,
+                                              group_tags)
         for test_ids in test_id_groups:
             if not test_ids:
                 # No tests in this partition
@@ -356,7 +363,28 @@ class TestListingFixture(Fixture):
             result.extend(fixture.run_tests())
         return result
 
-    def partition_tests(self, test_ids, concurrency):
+    def filter_test_groups(self, test_ids, group_regex):
+        """Add a group tag based on the regex provided
+
+        :return A dict with the group tags as keys and a list of
+            test ids that are a member of the group tag as the value
+        """
+
+        group_dict = {}
+        expr = re.compile(group_regex)
+        for test_id in test_ids:
+            match = expr.match(test_id)
+            if match:
+                group_id = match.group(0)
+            else:
+                group_id = None
+            if group_dict.get(group_id):
+                group_dict[group_id].append(test_id)
+            else:
+                group_dict[group_id] = [test_id]
+        return group_dict
+
+    def partition_tests(self, test_ids, concurrency, group_tags=None):
         """Parition test_ids by concurrency.
 
         Test durations from the repository are used to get partitions which
@@ -367,26 +395,63 @@ class TestListingFixture(Fixture):
         :return: A list where each element is a distinct subset of test_ids,
             and the union of all the elements is equal to set(test_ids).
         """
+
         partitions = [list() for i in range(concurrency)]
         timed_partitions = [[0.0, partition] for partition in partitions]
         time_data = self.repository.get_test_times(test_ids)
         timed = time_data['known']
         unknown = time_data['unknown']
+        # Schedule test groups by the sum of execute time for each test that is
+        # a member of the group
+        if group_tags:
+            group_timed = {}
+            group_unknown = []
+            for group_tag in group_tags.keys():
+                time = 0.0
+                for test_id in group_tags[group_tag]:
+                    # If a test_id is not timed remove the whole group from the
+                    # timed groups dict and
+                    if test_id in unknown:
+                        if group_tag in group_timed.keys():
+                            group_timed.pop(group_tag, None)
+                        group_unknown.append(group_tag)
+                        break
+                    time = time + timed[test_id]
+                group_timed[group_tag] = (group_tags[group_tag], time)
+
+            queue = sorted(group_timed.items(),
+                           key=operator.itemgetter(1),
+                           reverse=True)
+
+            # Sort the tests by runtime
+            for group_tag, test_tuple in queue:
+                test_ids = test_tuple[0]
+                duration = test_tuple[1]
+                timed_partitions[0][0] = timed_partitions[0][0] + duration
+                # Handle groups larger than a single entry
+                timed_partitions[0][1].extend(test_ids)
+                timed_partitions.sort(key=lambda item: (item[0], len(item[1])))
+            for partition, group_id in zip(itertools.cycle(partitions),
+                                           group_unknown):
+                partition = partition + group_tags[group_id]
+            return partitions
+
         # Scheduling is NP complete in general, so we avoid aiming for
         # perfection. A quick approximation that is sufficient for our general
         # needs:
         # sort the tests by time
         # allocate to partitions by putting each test in to the partition with
         # the current (lowest time, shortest length)
-        queue = sorted(timed.items(), key=operator.itemgetter(1), reverse=True)
-        for test_id, duration in queue:
-            timed_partitions[0][0] = timed_partitions[0][0] + duration
-            timed_partitions[0][1].append(test_id)
-            timed_partitions.sort(key=lambda item:(item[0], len(item[1])))
-        # Assign tests with unknown times in round robin fashion to the partitions.
-        for partition, test_id in zip(itertools.cycle(partitions), unknown):
-            partition.append(test_id)
-        return partitions
+        else:
+            queue = sorted(timed.items(), key=operator.itemgetter(1), reverse=True)
+            for test_id, duration in queue:
+                timed_partitions[0][0] = timed_partitions[0][0] + duration
+                timed_partitions[0][1].append(test_id)
+                timed_partitions.sort(key=lambda item:(item[0], len(item[1])))
+           # Assign tests with unknown times in round robin fashion to the partitions. 
+            for partition, test_id in zip(itertools.cycle(partitions), unknown):
+                partition.append(test_id)
+            return partitions
 
     def callout_concurrency(self):
         """Callout for user defined concurrency."""
diff --git a/testrepository/tests/test_testcommand.py b/testrepository/tests/test_testcommand.py
index 43be8e6..bbe4ac8 100644
--- a/testrepository/tests/test_testcommand.py
+++ b/testrepository/tests/test_testcommand.py
@@ -362,6 +362,37 @@ class TestTestCommand(ResourcedTestCase):
         self.assertEqual(1, len(partitions[0]))
         self.assertEqual(1, len(partitions[1]))
 
+    def test_partition_tests_with_group_regex(self):
+        repo = memory.RepositoryFactory().initialise('memory:')
+        result = repo.get_inserter()
+        result.startTestRun()
+        run_timed("TestCase1.slow", 3, result)
+        run_timed("TestCase2.fast1", 1, result)
+        run_timed("TestCase2.fast2", 1, result)
+        result.stopTestRun()
+        ui, command = self.get_test_ui_and_cmd(repository=repo)
+        self.set_config(
+            '[DEFAULT]\ntest_command=foo $IDLIST $LISTOPT\n'
+            'test_list_option=--list\n')
+        fixture = self.useFixture(command.get_run_command())
+        test_ids = frozenset(['TestCase1.slow', 'TestCase1.fast',
+                              'TestCase1.fast2', 'TestCase2.fast1',
+                              'TestCase3.test1', 'TestCase3.test2',
+                              'TestCase2.fast2', 'TestCase4.test',
+                              'testdir.testfile.TestCase5.test'])
+        regex = 'TestCase[0-5]'
+        group_tags = fixture.filter_test_groups(test_ids, regex)
+        partitions = fixture.partition_tests(test_ids, 2, group_tags)
+        self.assertTrue('TestCase1.slow' in partitions[1])
+        self.assertTrue('TestCase1.fast' in partitions[1])
+        self.assertTrue('TestCase1.fast2' in partitions[1])
+        self.assertTrue('TestCase3.test2' in partitions[1])
+        self.assertTrue('TestCase3.test1' in partitions[1])
+        self.assertTrue('TestCase4.test' in partitions[1])
+        self.assertTrue('testdir.testfile.TestCase5.test' in partitions[0])
+        self.assertTrue('TestCase2.fast1' in partitions[0])
+        self.assertTrue('TestCase2.fast2' in partitions[0])
+
     def test_run_tests_with_instances(self):
         # when there are instances and no instance_execute, run_tests acts as
         # normal.
author	Matthew Treinish <treinish@linux.vnet.ibm.com>	2013-07-08 16:18:37 -0400
committer	Matthew Treinish <treinish@linux.vnet.ibm.com>	2013-07-08 16:18:37 -0400
commit	bf920ea2002a64f23ded34bff1cd81a4bb7a4637 (patch)
tree	04c5a4b17eb6726744c87fa065fff14e65437201
parent	610acebaff00b30303dc11644e0a88931d760a28 (diff)
download	testrepository-bf920ea2002a64f23ded34bff1cd81a4bb7a4637.tar.gz