summaryrefslogtreecommitdiff
path: root/chromium/testing/trigger_scripts
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/testing/trigger_scripts')
-rwxr-xr-xchromium/testing/trigger_scripts/base_test_triggerer.py2
-rwxr-xr-xchromium/testing/trigger_scripts/trigger_multiple_dimensions.py163
-rwxr-xr-xchromium/testing/trigger_scripts/trigger_multiple_dimensions_unittest.py351
3 files changed, 1 insertions, 515 deletions
diff --git a/chromium/testing/trigger_scripts/base_test_triggerer.py b/chromium/testing/trigger_scripts/base_test_triggerer.py
index bf7c44b5c01..d16ab381a0d 100755
--- a/chromium/testing/trigger_scripts/base_test_triggerer.py
+++ b/chromium/testing/trigger_scripts/base_test_triggerer.py
@@ -10,7 +10,7 @@ calls under one logical step. It also gives the subclasses the ability to
define their own logic for pruning the configurations they want to trigger
jobs on and what configurations to use.
-See trigger_multiple_dimensions.py for an example of how to use this base class.
+See perf_device_triggerer.py for an example of how to use this base class.
"""
diff --git a/chromium/testing/trigger_scripts/trigger_multiple_dimensions.py b/chromium/testing/trigger_scripts/trigger_multiple_dimensions.py
deleted file mode 100755
index d4d26f55351..00000000000
--- a/chromium/testing/trigger_scripts/trigger_multiple_dimensions.py
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2018 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-"""Custom swarming triggering script.
-
-This script does custom swarming triggering logic, to allow one bot to
-conceptually span multiple Swarming configurations, while lumping all trigger
-calls under one logical step.
-
-The reason this script is needed is to allow seamless upgrades of the GPU, OS
-version, or graphics driver. Most Chromium tests, GPU tests in particular, are
-triggered with precise values for all of these Swarming dimensions. This ensures
-that if a machine is added to the Swarming pool with a slightly different
-configuration, tests don't fail for unexpected reasons.
-
-During an upgrade of the fleet, it's not feasible to take half of the machines
-offline. Some experience was gained with this during a recent upgrade of the
-GPUs in Chromium's main Windows and Linux NVIDIA bots. In the middle of the
-upgrade, only 50% of the capacity was available, and CQ jobs started to time
-out. Once the hurdle had been passed in the middle of the upgrade, capacity was
-sufficient, but it's crucial that this process remain seamless.
-
-This script receives multiple machine configurations on the command line in the
-form of quoted strings. These strings are JSON dictionaries that represent
-entries in the "dimensions" array of the "swarming" dictionary in the
-src/testing/buildbot JSON files. The script queries the Swarming pool for the
-number of machines of each configuration, and distributes work (shards) among
-them using the following algorithm:
-
-1. If either configuration has machines available (online, not busy at the time
-of the query) then distribute shards to them first.
-
-2. Compute the relative fractions of all of the live (online, not quarantined,
-not dead) machines of all configurations.
-
-3. Distribute the remaining shards probabilistically among these configurations.
-
-The use of random numbers attempts to avoid the pathology where one
-configuration only has a couple of machines, and work is never distributed to it
-once all machines are busy.
-
-This script must have roughly the same command line interface as swarming.py
-trigger. It modifies it in the following ways:
- * Intercepts the dump-json argument, and creates its own by combining the
- results from each trigger call.
- * Scans through the multiple-trigger-configs dictionaries. For any key found,
- deletes that dimension from the originally triggered task's dimensions. This
- is what allows the Swarming dimensions to be replaced.
- * On a per-shard basis, adds the Swarming dimensions chosen from the
- multiple-trigger-configs list to the dimensions for the shard.
-
-This script is normally called from the swarming recipe module in tools/build.
-
-"""
-
-import argparse
-import copy
-import json
-import os
-import random
-import subprocess
-import sys
-import tempfile
-import urllib
-
-import base_test_triggerer
-
-
-class MultiDimensionTestTriggerer(base_test_triggerer.BaseTestTriggerer):
- def __init__(self):
- super(MultiDimensionTestTriggerer, self).__init__()
-
- def choose_random_int(self, max_num):
- return random.randint(1, max_num)
-
- def pick_bot_configuration(self, verbose):
- # These are the rules used:
- # 1. If any configuration has bots available, pick the configuration with
- # the most bots available.
- # 2. If no configuration has bots available, pick a random configuration
- # based on the total number of bots in each configuration.
- #
- # This method updates bot_statuses_ in case (1), and in both cases, returns
- # the index into bot_configs_ that should be used.
- if any(status['available'] > 0 for status in self._bot_statuses):
- # Case 1.
- max_index = 0
- max_val = self._bot_statuses[0]['available']
- for i in xrange(1, len(self._bot_statuses)):
- avail = self._bot_statuses[i]['available']
- if avail > max_val:
- max_index = i
- max_val = avail
- self._bot_statuses[max_index]['available'] -= 1
- assert self._bot_statuses[max_index]['available'] >= 0
- if verbose:
- print 'Chose bot config %d because bots were available' % (max_index)
- return max_index
- # Case 2.
- # We want to choose a bot uniformly at random from all of the bots specified
- # in the bot configs. To do this, we conceptually group the bots into
- # buckets, pick a random number between 1 and the total number of bots, and
- # figure out which bucket of bots it landed in.
- r = self.choose_random_int(self._total_bots)
- for i, status in enumerate(self._bot_statuses):
- if r <= status['total']:
- if verbose:
- print 'Chose bot config %d stochastically' % (i)
- return i
- r -= status['total']
- raise Exception('Should not reach here')
-
- def select_config_indices(self, args, verbose):
- selected_indices = []
- for shard_index in self.indices_to_trigger(args):
- selected_indices.append(
- (shard_index, self.pick_bot_configuration(verbose)))
- return selected_indices
-
- def prune_test_specific_configs(self, args, verbose):
- self.query_swarming_for_bot_configs(verbose)
- # This script doesn't know how long individual test shards take to
- # run, nor how many Swarming jobs are waiting to run on a
- # particular configuration. It can end up scheduling jobs on
- # configurations that have very few machines, and backing them up
- # to the point where the tasks start expiring. To try to prevent
- # this, don't schedule jobs at all on configurations that have
- # less than 10% of the total capacity. crbug.com/886985
- MIN_CONFIG_CAPACITY_PERCENTAGE = 0.1
- filtered_bot_configs = []
- filtered_bot_statuses = []
- for i in xrange(len(self._bot_configs)):
- config = self._bot_configs[i]
- status = self._bot_statuses[i]
- if status['total'] >= MIN_CONFIG_CAPACITY_PERCENTAGE * self._total_bots:
- filtered_bot_configs.append(config)
- filtered_bot_statuses.append(status)
- else:
- if verbose:
- print 'Filtered config because it had too few bots: %s' % str(status)
- if len(filtered_bot_configs) == 0:
- raise Exception('The bot configurations are too fragmented; no single ' +
- 'configuration has even 10% of the total capacity. ' +
- 'Distribution will not work well. Failing.')
- self._bot_configs = filtered_bot_configs
- self._bot_statuses = filtered_bot_statuses
- self._total_bots = sum(x['total'] for x in self._bot_statuses)
- if verbose:
- print 'Total bots after filtering: %d' % (self._total_bots)
-
-def main():
- # setup args for common contract of base class
- parser = base_test_triggerer.BaseTestTriggerer.setup_parser_contract(
- argparse.ArgumentParser(description=__doc__))
- args, remaining = parser.parse_known_args()
-
- triggerer = MultiDimensionTestTriggerer()
- return triggerer.trigger_tasks(args, remaining)
-
-
-if __name__ == '__main__':
- sys.exit(main())
diff --git a/chromium/testing/trigger_scripts/trigger_multiple_dimensions_unittest.py b/chromium/testing/trigger_scripts/trigger_multiple_dimensions_unittest.py
deleted file mode 100755
index 9a29cd3960e..00000000000
--- a/chromium/testing/trigger_scripts/trigger_multiple_dimensions_unittest.py
+++ /dev/null
@@ -1,351 +0,0 @@
-#!/usr/bin/python
-# Copyright 2018 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Tests for trigger_multiple_dimensions.py."""
-
-import unittest
-
-import trigger_multiple_dimensions
-
-class Args(object):
- def __init__(self):
- self.shards = 1
- self.shard_index = None
- self.dump_json = ''
- self.multiple_trigger_configs = []
- self.multiple_dimension_script_verbose = False
-
-
-class FakeTriggerer(trigger_multiple_dimensions.MultiDimensionTestTriggerer):
- def __init__(self, bot_configs, bot_statuses,
- use_superclass_random_number_generator, first_random_number):
- super(FakeTriggerer, self).__init__()
- self._bot_configs = bot_configs
- self._bot_statuses = bot_statuses
- self._swarming_runs = []
- self._files = {}
- self._temp_file_id = 0
- self._use_superclass_rng = use_superclass_random_number_generator
- self._last_random_number = first_random_number
-
- def set_files(self, files):
- self._files = files
-
- def choose_random_int(self, max_num):
- if self._use_superclass_rng:
- return super(FakeTriggerer, self).choose_random_int(max_num)
- if self._last_random_number > max_num:
- self._last_random_number = 1
- result = self._last_random_number
- self._last_random_number += 1
- return result
-
- def make_temp_file(self, prefix=None, suffix=None):
- result = prefix + str(self._temp_file_id) + suffix
- self._temp_file_id += 1
- return result
-
- def delete_temp_file(self, temp_file):
- pass
-
- def read_json_from_temp_file(self, temp_file):
- return self._files[temp_file]
-
- def write_json_to_file(self, merged_json, output_file):
- self._files[output_file] = merged_json
-
- def parse_bot_configs(self, args):
- pass
-
- def query_swarming_for_bot_configs(self, verbose):
- # Sum up the total count of all bots.
- self._total_bots = sum(x['total'] for x in self._bot_statuses)
-
- def run_swarming(self, args, verbose):
- self._swarming_runs.append(args)
-
-
-WIN_NVIDIA_QUADRO_P400_STABLE_DRIVER = '10de:1cb3-23.21.13.8792'
-WIN7 = 'Windows-2008ServerR2-SP1'
-WIN10 = 'Windows-10'
-
-WIN7_NVIDIA = {
- 'gpu': WIN_NVIDIA_QUADRO_P400_STABLE_DRIVER,
- 'os': WIN7,
- 'pool': 'Chrome-GPU',
-}
-
-WIN10_NVIDIA = {
- 'gpu': WIN_NVIDIA_QUADRO_P400_STABLE_DRIVER,
- 'os': WIN10,
- 'pool': 'Chrome-GPU',
-}
-
-class UnitTest(unittest.TestCase):
- def basic_win7_win10_setup(self, bot_statuses,
- use_superclass_random_number_generator=False,
- first_random_number=1,
- shards=2):
- triggerer = FakeTriggerer(
- [
- WIN7_NVIDIA,
- WIN10_NVIDIA
- ],
- bot_statuses,
- use_superclass_random_number_generator,
- first_random_number
- )
- # Note: the contents of these JSON files don't accurately reflect
- # that produced by "swarming.py trigger". The unit tests only
- # verify that shard 0's JSON is preserved.
- file_map = {}
- for i in xrange(shards):
- file_name = 'base_trigger_dimensions%d.json' % i
- result = {
- 'tasks': {
- 'webgl_conformance_tests on NVIDIA GPU on Windows': {
- 'task_id': hex(0xf000 + i)[2:]
- },
- },
- }
- if i == 0:
- result['base_task_name'] = 'webgl_conformance_tests'
- result['request'] = {
- 'expiration_secs': 3600,
- 'properties': {
- 'execution_timeout_secs': 3600,
- },
- }
- file_map[file_name] = result
- triggerer.set_files(file_map)
- args = Args()
- args.shards = shards
- args.dump_json = 'output.json'
- args.multiple_dimension_script_verbose = False
- triggerer.trigger_tasks(
- args,
- [
- 'trigger',
- '--dimension',
- 'gpu',
- WIN_NVIDIA_QUADRO_P400_STABLE_DRIVER,
- '--dimension',
- 'os',
- WIN7,
- '--dimension',
- 'pool',
- 'Chrome-GPU',
- '--',
- 'webgl_conformance',
- ])
- return triggerer
-
- def list_contains_sublist(self, main_list, sub_list):
- return any(sub_list == main_list[offset:offset + len(sub_list)]
- for offset in xrange(len(main_list) - (len(sub_list) - 1)))
-
- def shard_runs_on_os(self, triggerer, shard_index, os):
- return self.list_contains_sublist(triggerer._swarming_runs[shard_index],
- ['--dimension', 'os', os])
-
- def test_parse_bot_configs(self):
- triggerer = trigger_multiple_dimensions.MultiDimensionTestTriggerer()
- args = Args()
- args.multiple_trigger_configs = "{ foo }"
- self.assertRaisesRegexp(ValueError, "Error while parsing JSON.*",
- triggerer.parse_bot_configs, args)
- args.multiple_trigger_configs = "{ \"foo\": \"bar\" }"
- self.assertRaisesRegexp(ValueError, "Bot configurations must be a list.*",
- triggerer.parse_bot_configs, args)
- args.multiple_trigger_configs = "[]"
- self.assertRaisesRegexp(ValueError,
- "Bot configuration list must have at least.*",
- triggerer.parse_bot_configs, args)
- args.multiple_trigger_configs = "[{}, \"\"]"
- self.assertRaisesRegexp(ValueError,
- "Bot configurations must all be.*",
- triggerer.parse_bot_configs, args)
- args.multiple_trigger_configs = "[{}]"
- triggerer.parse_bot_configs(args)
- self.assertEqual(triggerer._bot_configs, [{}])
-
- def test_split_with_available_machines(self):
- triggerer = self.basic_win7_win10_setup(
- [
- {
- 'total': 1,
- 'available': 1,
- },
- {
- 'total': 1,
- 'available': 1,
- },
- ],
- )
- # First shard should run on Win7.
- self.assertTrue(self.shard_runs_on_os(triggerer, 0, WIN7))
- # Second shard should run on Win10.
- self.assertTrue(self.shard_runs_on_os(triggerer, 1, WIN10))
- # And not vice versa.
- self.assertFalse(self.shard_runs_on_os(triggerer, 0, WIN10))
- self.assertFalse(self.shard_runs_on_os(triggerer, 1, WIN7))
-
- def test_shard_env_vars(self):
- triggerer = self.basic_win7_win10_setup(
- [
- {
- 'total': 2,
- 'available': 2,
- },
- {
- 'total': 2,
- 'available': 0,
- },
- ],
- )
- self.assertTrue(self.list_contains_sublist(
- triggerer._swarming_runs[0], ['--env', 'GTEST_SHARD_INDEX', '0']))
- self.assertTrue(self.list_contains_sublist(
- triggerer._swarming_runs[1], ['--env', 'GTEST_SHARD_INDEX', '1']))
- self.assertTrue(self.list_contains_sublist(
- triggerer._swarming_runs[0], ['--env', 'GTEST_TOTAL_SHARDS', '2']))
- self.assertTrue(self.list_contains_sublist(
- triggerer._swarming_runs[1], ['--env', 'GTEST_TOTAL_SHARDS', '2']))
-
- def test_json_merging(self):
- triggerer = self.basic_win7_win10_setup(
- [
- {
- 'total': 1,
- 'available': 1,
- },
- {
- 'total': 1,
- 'available': 1,
- },
- ],
- )
- self.assertTrue('output.json' in triggerer._files)
- output_json = triggerer._files['output.json']
- self.assertTrue('base_task_name' in output_json)
- self.assertTrue('request' in output_json)
- self.assertEqual(output_json['request']['expiration_secs'], 3600)
- self.assertEqual(
- output_json['request']['properties']['execution_timeout_secs'], 3600)
-
- def test_split_with_only_one_config_available(self):
- triggerer = self.basic_win7_win10_setup(
- [
- {
- 'total': 2,
- 'available': 2,
- },
- {
- 'total': 2,
- 'available': 0,
- },
- ],
- )
- # Both shards should run on Win7.
- self.assertTrue(self.shard_runs_on_os(triggerer, 0, WIN7))
- self.assertTrue(self.shard_runs_on_os(triggerer, 1, WIN7))
- # Redo with only Win10 bots available.
- triggerer = self.basic_win7_win10_setup(
- [
- {
- 'total': 2,
- 'available': 0,
- },
- {
- 'total': 2,
- 'available': 2,
- },
- ],
- )
- self.assertTrue(self.shard_runs_on_os(triggerer, 0, WIN10))
- self.assertTrue(self.shard_runs_on_os(triggerer, 1, WIN10))
-
- def test_split_with_no_bots_available(self):
- triggerer = self.basic_win7_win10_setup(
- [
- {
- 'total': 1,
- 'available': 0,
- },
- {
- 'total': 1,
- 'available': 0,
- },
- ],
- )
- # Given the fake random number generator above, first shard should
- # run on Win7.
- self.assertTrue(self.shard_runs_on_os(triggerer, 0, WIN7))
- # Second shard should run on Win10.
- self.assertTrue(self.shard_runs_on_os(triggerer, 1, WIN10))
- # Try again with different bot distribution and random numbers.
- triggerer = self.basic_win7_win10_setup(
- [
- {
- 'total': 2,
- 'available': 0,
- },
- {
- 'total': 2,
- 'available': 0,
- },
- ],
- first_random_number=3,
- )
- self.assertTrue(self.shard_runs_on_os(triggerer, 0, WIN10))
- self.assertTrue(self.shard_runs_on_os(triggerer, 1, WIN10))
-
- def test_superclass_random_number_generator_works(self):
- # Probe randomly a certain number of times.
- num_runs = 0
- for _ in xrange(100):
- triggerer = self.basic_win7_win10_setup(
- [
- {
- 'total': 2,
- 'available': 0,
- },
- {
- 'total': 2,
- 'available': 0,
- },
- ],
- use_superclass_random_number_generator=True
- )
- for _ in xrange(2):
- self.assertTrue(self.shard_runs_on_os(triggerer, 0, WIN7) or
- self.shard_runs_on_os(triggerer, 0, WIN10))
- num_runs += 1
- self.assertEqual(num_runs, 200)
-
- def test_split_with_imbalanced_configs(self):
- num_shards = 20
- triggerer = self.basic_win7_win10_setup(
- [
- {
- 'total': 15,
- 'available': 1,
- },
- {
- 'total': 1,
- 'available': 1,
- },
- ],
- shards=num_shards
- )
- # Because the second configuration (Win10) has so few machines --
- # fewer than 10% of the total -- the triggerer should never
- # schedule any shard on it.
- for i in xrange(num_shards):
- self.assertTrue(self.shard_runs_on_os(triggerer, i, WIN7))
- self.assertFalse(self.shard_runs_on_os(triggerer, i, WIN10))
-
-if __name__ == '__main__':
- unittest.main()