diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2023-02-28 12:14:07 +0000 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2023-02-28 12:14:07 +0000 |
commit | 22ecb1e3fc02bb923c3e9941b1baa849348a036f (patch) | |
tree | c01d9e91564f50e790a63c71675dd0f6e7735153 /scripts | |
parent | 5eab6dcdd923ca375b86d6993f20a3e37dbd7a51 (diff) | |
download | gitlab-ce-22ecb1e3fc02bb923c3e9941b1baa849348a036f.tar.gz |
Add latest changes from gitlab-org/gitlab@master
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/generate-rspec-foss-impact-pipeline | 66 | ||||
-rwxr-xr-x | scripts/generate_rspec_pipeline.rb | 176 |
2 files changed, 176 insertions, 66 deletions
diff --git a/scripts/generate-rspec-foss-impact-pipeline b/scripts/generate-rspec-foss-impact-pipeline deleted file mode 100755 index 3277f38ebe1..00000000000 --- a/scripts/generate-rspec-foss-impact-pipeline +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Script to generate `rspec foss-impact` test child pipeline with dynamically parallelized jobs. - -source scripts/utils.sh - -rspec_matching_tests_foss_path="${1}" -pipeline_yml="${2}" - -test_file_count=$(wc -w "${rspec_matching_tests_foss_path}" | awk '{ print $1 }') -echoinfo "test_file_count: ${test_file_count}" - -if [[ "${test_file_count}" -eq 0 ]]; then - skip_pipeline=".gitlab/ci/_skip.yml" - - echo "Using ${skip_pipeline} due to no impacted FOSS rspec tests to run" - cp $skip_pipeline "$pipeline_yml" - exit -fi - -# As of 2022-09-01: -# $ find spec -type f | wc -l -# 12825 -# and -# $ find ee/spec -type f | wc -l -# 5610 -# which gives a total of 18435 test files (`number_of_tests_in_total_in_the_test_suite`). -# -# Total time to run all tests (based on https://gitlab-org.gitlab.io/rspec_profiling_stats/) is 170183 seconds (`duration_of_the_test_suite_in_seconds`). -# -# This gives an approximate 170183 / 18435 = 9.2 seconds per test file (`average_test_file_duration_in_seconds`). -# -# If we want each test job to finish in 10 minutes, given we have 3 minutes of setup (`setup_duration_in_seconds`), then we need to give 7 minutes of testing to each test node (`optimal_test_runtime_duration_in_seconds`). -# (7 * 60) / 9.2 = 45.6 -# -# So if we'd want to run the full test suites in 10 minutes (`optimal_test_job_duration_in_seconds`), we'd need to run at max 45 test file per nodes (`optimal_test_file_count_per_node`). -number_of_tests_in_total_in_the_test_suite=18435 -duration_of_the_test_suite_in_seconds=170183 -optimal_test_job_duration_in_seconds=600 # 10 minutes -setup_duration_in_seconds=180 # 3 minutes - -optimal_test_runtime_duration_in_seconds=$(( optimal_test_job_duration_in_seconds - setup_duration_in_seconds )) -echoinfo "optimal_test_runtime_duration_in_seconds: ${optimal_test_runtime_duration_in_seconds}" - -average_test_file_duration_in_seconds=$(( duration_of_the_test_suite_in_seconds / number_of_tests_in_total_in_the_test_suite )) -echoinfo "average_test_file_duration_in_seconds: ${average_test_file_duration_in_seconds}" - -optimal_test_file_count_per_node=$(( optimal_test_runtime_duration_in_seconds / average_test_file_duration_in_seconds )) -echoinfo "optimal_test_file_count_per_node: ${optimal_test_file_count_per_node}" - -node_count=$(( test_file_count / optimal_test_file_count_per_node )) -echoinfo "node_count: ${node_count}" - -echoinfo "Optimal node count for 'rspec foss-impact' jobs is ${node_count}." - -MAX_NODES_COUNT=50 # Maximum parallelization allowed by GitLab -if [[ "${node_count}" -gt "${MAX_NODES_COUNT}" ]]; then - echoinfo "We don't want to parallelize 'rspec foss-impact' to more than ${MAX_NODES_COUNT} jobs for now! Decreasing the parallelization to ${MAX_NODES_COUNT}." - node_count=${MAX_NODES_COUNT} -fi - -ruby -rerb -e "puts ERB.new(File.read('.gitlab/ci/rails/rspec-foss-impact.gitlab-ci.yml.erb')).result_with_hash(parallel_value: ${node_count})" > "${pipeline_yml}" - -echosuccess "Generated ${pipeline_yml} pipeline with following content:" -cat "${pipeline_yml}" diff --git a/scripts/generate_rspec_pipeline.rb b/scripts/generate_rspec_pipeline.rb new file mode 100755 index 00000000000..e226acc0430 --- /dev/null +++ b/scripts/generate_rspec_pipeline.rb @@ -0,0 +1,176 @@ +#!/usr/bin/env ruby + +# frozen_string_literal: true + +require 'optparse' +require 'json' +require 'fileutils' +require 'erb' +require_relative '../tooling/quality/test_level' + +# Class to generate RSpec test child pipeline with dynamically parallelized jobs. +class GenerateRspecPipeline + SKIP_PIPELINE_YML_FILE = ".gitlab/ci/_skip.yml" + TEST_LEVELS = %i[migration background_migration unit integration system].freeze + MAX_NODES_COUNT = 50 # Maximum parallelization allowed by GitLab + + OPTIMAL_TEST_JOB_DURATION_IN_SECONDS = 600 # 10 MINUTES + SETUP_DURATION_IN_SECONDS = 180.0 # 3 MINUTES + OPTIMAL_TEST_RUNTIME_DURATION_IN_SECONDS = OPTIMAL_TEST_JOB_DURATION_IN_SECONDS - SETUP_DURATION_IN_SECONDS + + # As of 2022-09-01: + # $ find spec -type f | wc -l + # 12825 + # and + # $ find ee/spec -type f | wc -l + # 5610 + # which gives a total of 18435 test files (`NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE`). + # + # Total time to run all tests (based on https://gitlab-org.gitlab.io/rspec_profiling_stats/) + # is 170183 seconds (`DURATION_OF_THE_TEST_SUITE_IN_SECONDS`). + # + # This gives an approximate 170183 / 18435 = 9.2 seconds per test file + # (`DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS`). + # + # If we want each test job to finish in 10 minutes, given we have 3 minutes of setup (`SETUP_DURATION_IN_SECONDS`), + # then we need to give 7 minutes of testing to each test node (`OPTIMAL_TEST_RUNTIME_DURATION_IN_SECONDS`). + # (7 * 60) / 9.2 = 45.6 + # + # So if we'd want to run the full test suites in 10 minutes (`OPTIMAL_TEST_JOB_DURATION_IN_SECONDS`), + # we'd need to run at max 45 test file per nodes (`#optimal_test_file_count_per_node_per_test_level`). + NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE = 18_435 + DURATION_OF_THE_TEST_SUITE_IN_SECONDS = 170_183 + DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS = + DURATION_OF_THE_TEST_SUITE_IN_SECONDS / NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE + + # rspec_files_path: A file containing RSpec files to run, separated by a space + # pipeline_template_path: A YAML pipeline configuration template to generate the final pipeline config from + def initialize(pipeline_template_path:, rspec_files_path: nil, knapsack_report_path: nil) + @pipeline_template_path = pipeline_template_path.to_s + @rspec_files_path = rspec_files_path.to_s + @knapsack_report_path = knapsack_report_path.to_s + + raise ArgumentError unless File.exist?(@pipeline_template_path) + end + + def generate! + if all_rspec_files.empty? + info "Using #{SKIP_PIPELINE_YML_FILE} due to no RSpec files to run" + FileUtils.cp(SKIP_PIPELINE_YML_FILE, pipeline_filename) + return + end + + File.open(pipeline_filename, 'w') do |handle| + pipeline_yaml = ERB.new(File.read(pipeline_template_path)).result_with_hash(**erb_binding) + handle.write(pipeline_yaml.squeeze("\n").strip) + end + end + + private + + attr_reader :pipeline_template_path, :rspec_files_path, :knapsack_report_path + + def info(text) + $stdout.puts "[#{self.class.name}] #{text}" + end + + def all_rspec_files + @all_rspec_files ||= File.exist?(rspec_files_path) ? File.read(rspec_files_path).split(' ') : [] + end + + def pipeline_filename + @pipeline_filename ||= "#{pipeline_template_path}.yml" + end + + def erb_binding + { rspec_files_per_test_level: rspec_files_per_test_level } + end + + def rspec_files_per_test_level + @rspec_files_per_test_level ||= begin + all_remaining_rspec_files = all_rspec_files.dup + TEST_LEVELS.each_with_object(Hash.new { |h, k| h[k] = {} }) do |test_level, memo| # rubocop:disable Rails/IndexWith + memo[test_level][:files] = all_remaining_rspec_files + .grep(Quality::TestLevel.new.regexp(test_level)) + .tap { |files| files.each { |file| all_remaining_rspec_files.delete(file) } } + memo[test_level][:parallelization] = optimal_nodes_count(test_level, memo[test_level][:files]) + end + end + end + + def optimal_nodes_count(test_level, rspec_files) + nodes_count = (rspec_files.size / optimal_test_file_count_per_node_per_test_level(test_level)).ceil + info "Optimal node count for #{rspec_files.size} #{test_level} RSpec files is #{nodes_count}." + + if nodes_count > MAX_NODES_COUNT + info "We don't want to parallelize to more than #{MAX_NODES_COUNT} jobs for now! " \ + "Decreasing the parallelization to #{MAX_NODES_COUNT}." + + MAX_NODES_COUNT + else + nodes_count + end + end + + def optimal_test_file_count_per_node_per_test_level(test_level) + [ + (OPTIMAL_TEST_RUNTIME_DURATION_IN_SECONDS / average_test_file_duration_in_seconds_per_test_level[test_level]), + 1 + ].max + end + + def average_test_file_duration_in_seconds_per_test_level + @optimal_test_file_count_per_node_per_test_level ||= + if knapsack_report.any? + remaining_knapsack_report = knapsack_report.dup + TEST_LEVELS.each_with_object({}) do |test_level, memo| + matching_data_per_test_level = remaining_knapsack_report + .select { |test_file, _| test_file.match?(Quality::TestLevel.new.regexp(test_level)) } + .tap { |test_data| test_data.each { |file, _| remaining_knapsack_report.delete(file) } } + memo[test_level] = + matching_data_per_test_level.values.sum / matching_data_per_test_level.keys.size + end + else + TEST_LEVELS.each_with_object({}) do |test_level, memo| # rubocop:disable Rails/IndexWith + memo[test_level] = DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS + end + end + end + + def knapsack_report + @knapsack_report ||= + begin + File.exist?(knapsack_report_path) ? JSON.parse(File.read(knapsack_report_path)) : {} + rescue JSON::ParserError => e + info "[ERROR] Knapsack report at #{knapsack_report_path} couldn't be parsed! Error:\n#{e}" + {} + end + end +end + +if $PROGRAM_NAME == __FILE__ + options = {} + + OptionParser.new do |opts| + opts.on("-f", "--rspec-files-path path", String, "Path to a file containing RSpec files to run, " \ + "separated by a space") do |value| + options[:rspec_files_path] = value + end + + opts.on("-t", "--pipeline-template-path PATH", String, "Path to a YAML pipeline configuration template to " \ + "generate the final pipeline config from") do |value| + options[:pipeline_template_path] = value + end + + opts.on("-k", "--knapsack-report-path path", String, "Path to a Knapsack report") do |value| + options[:knapsack_report_path] = value + end + + opts.on("-h", "--help", "Prints this help") do + puts opts + exit + end + end.parse! + + GenerateRspecPipeline.new(**options).generate! +end |