diff options
Diffstat (limited to 'chromium/testing/trigger_scripts/trigger_multiple_dimensions.py')
-rwxr-xr-x | chromium/testing/trigger_scripts/trigger_multiple_dimensions.py | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/chromium/testing/trigger_scripts/trigger_multiple_dimensions.py b/chromium/testing/trigger_scripts/trigger_multiple_dimensions.py new file mode 100755 index 00000000000..c17984a3838 --- /dev/null +++ b/chromium/testing/trigger_scripts/trigger_multiple_dimensions.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python +# Copyright 2018 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Custom swarming triggering script. + +This script does custom swarming triggering logic, to allow one bot to +conceptually span multiple Swarming configurations, while lumping all trigger +calls under one logical step. + +The reason this script is needed is to allow seamless upgrades of the GPU, OS +version, or graphics driver. Most Chromium tests, GPU tests in particular, are +triggered with precise values for all of these Swarming dimensions. This ensures +that if a machine is added to the Swarming pool with a slightly different +configuration, tests don't fail for unexpected reasons. + +During an upgrade of the fleet, it's not feasible to take half of the machines +offline. Some experience was gained with this during a recent upgrade of the +GPUs in Chromium's main Windows and Linux NVIDIA bots. In the middle of the +upgrade, only 50% of the capacity was available, and CQ jobs started to time +out. Once the hurdle had been passed in the middle of the upgrade, capacity was +sufficient, but it's crucial that this process remain seamless. + +This script receives multiple machine configurations on the command line in the +form of quoted strings. These strings are JSON dictionaries that represent +entries in the "dimensions" array of the "swarming" dictionary in the +src/testing/buildbot JSON files. The script queries the Swarming pool for the +number of machines of each configuration, and distributes work (shards) among +them using the following algorithm: + +1. If either configuration has machines available (online, not busy at the time +of the query) then distribute shards to them first. + +2. Compute the relative fractions of all of the live (online, not quarantined, +not dead) machines of all configurations. + +3. Distribute the remaining shards probabilistically among these configurations. + +The use of random numbers attempts to avoid the pathology where one +configuration only has a couple of machines, and work is never distributed to it +once all machines are busy. + +This script must have roughly the same command line interface as swarming.py +trigger. It modifies it in the following ways: + * Intercepts the dump-json argument, and creates its own by combining the + results from each trigger call. + * Scans through the multiple-trigger-configs dictionaries. For any key found, + deletes that dimension from the originally triggered task's dimensions. This + is what allows the Swarming dimensions to be replaced. + * On a per-shard basis, adds the Swarming dimensions chosen from the + multiple-trigger-configs list to the dimensions for the shard. + +This script is normally called from the swarming recipe module in tools/build. + +""" + +import argparse +import copy +import json +import os +import random +import subprocess +import sys +import tempfile +import urllib + +import base_test_triggerer + + +class MultiDimensionTestTriggerer(base_test_triggerer.BaseTestTriggerer): + def __init__(self): + super(MultiDimensionTestTriggerer, self).__init__() + + def choose_random_int(self, max_num): + return random.randint(1, max_num) + + def pick_bot_configuration(self, verbose): + # These are the rules used: + # 1. If any configuration has bots available, pick the configuration with + # the most bots available. + # 2. If no configuration has bots available, pick a random configuration + # based on the total number of bots in each configuration. + # + # This method updates bot_statuses_ in case (1), and in both cases, returns + # the index into bot_configs_ that should be used. + if any(status['available'] > 0 for status in self._bot_statuses): + # Case 1. + max_index = 0 + max_val = self._bot_statuses[0]['available'] + for i in xrange(1, len(self._bot_statuses)): + avail = self._bot_statuses[i]['available'] + if avail > max_val: + max_index = i + max_val = avail + self._bot_statuses[max_index]['available'] -= 1 + assert self._bot_statuses[max_index]['available'] >= 0 + if verbose: + print 'Chose bot config %d because bots were available' % (max_index) + return max_index + # Case 2. + # We want to choose a bot uniformly at random from all of the bots specified + # in the bot configs. To do this, we conceptually group the bots into + # buckets, pick a random number between 1 and the total number of bots, and + # figure out which bucket of bots it landed in. + r = self.choose_random_int(self._total_bots) + for i, status in enumerate(self._bot_statuses): + if r <= status['total']: + if verbose: + print 'Chose bot config %d stochastically' % (i) + return i + r -= status['total'] + raise Exception('Should not reach here') + + def select_config_indices(self, args, verbose): + selected_indices = [] + for _ in xrange(args.shards): + selected_indices.append(self.pick_bot_configuration(verbose)) + return selected_indices + + def prune_test_specific_configs(self, args, verbose): + self.query_swarming_for_bot_configs(verbose) + +def main(): + triggerer = MultiDimensionTestTriggerer() + # setup args for common contract of base class + parser = triggerer.setup_parser_contract( + argparse.ArgumentParser(description=__doc__)) + args, remaining = parser.parse_known_args() + return triggerer.trigger_tasks(args, remaining) + + +if __name__ == '__main__': + sys.exit(main()) |