diff options
Diffstat (limited to 'qa')
4 files changed, 43 insertions, 31 deletions
diff --git a/qa/qa/service/praefect_manager.rb b/qa/qa/service/praefect_manager.rb index 718790a3d01..c2eb50a4f7f 100644 --- a/qa/qa/service/praefect_manager.rb +++ b/qa/qa/service/praefect_manager.rb @@ -50,6 +50,7 @@ module QA def stop_primary_node stop_node(@primary_node) + wait_until_node_is_removed_from_healthy_storages(@primary_node) end def start_primary_node @@ -67,6 +68,7 @@ module QA def stop_secondary_node stop_node(@secondary_node) + wait_until_node_is_removed_from_healthy_storages(@stop_secondary_node) end def start_secondary_node @@ -75,6 +77,7 @@ module QA def stop_tertiary_node stop_node(@tertiary_node) + wait_until_node_is_removed_from_healthy_storages(@tertiary_node) end def start_tertiary_node @@ -82,20 +85,39 @@ module QA end def start_node(name) - shell "docker start #{name}" - end + state = node_state(name) + return if state == "running" + + if state == "paused" + shell "docker unpause #{name}" + end + + if state == "stopped" + shell "docker start #{name}" + end - def stop_node(name) - shell "docker stop #{name}" wait_until_shell_command_matches( "docker inspect -f {{.State.Running}} #{name}", - /false/, + /true/, sleep_interval: 3, max_duration: 180, retry_on_exception: true ) end + def stop_node(name) + shell "docker pause #{name}" + end + + def node_state(name) + state = "stopped" + wait_until_shell_command("docker inspect -f {{.State.Status}} #{name}") do |line| + QA::Runtime::Logger.debug(line) + break state = "running" if line.include?("running") + break state = "paused" if line.include?("paused") + end + end + def clear_replication_queue QA::Runtime::Logger.info("Clearing the replication queue") shell sql_to_docker_exec_cmd( @@ -204,9 +226,8 @@ module QA def wait_for_praefect QA::Runtime::Logger.info("Waiting for health check on praefect") Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do - # praefect runs a grpc server on port 2305, which will return an error 'Connection refused' until such time it is ready - wait_until_shell_command("docker exec #{@gitaly_cluster} bash -c 'curl #{@praefect}:2305'") do |line| - break if line.include?('curl: (1) Received HTTP/0.9 when not allowed') + wait_until_shell_command("docker exec #{@praefect} gitlab-ctl status praefect") do |line| + break true if line.include?('run: praefect: ') QA::Runtime::Logger.debug(line.chomp) end @@ -269,9 +290,8 @@ module QA def wait_for_gitaly_health_check(node) QA::Runtime::Logger.info("Waiting for health check on #{node}") Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do - # gitaly runs a grpc server on port 8075, which will return an error 'Connection refused' until such time it is ready - wait_until_shell_command("docker exec #{@praefect} bash -c 'curl #{node}:8075'") do |line| - break if line.include?('curl: (1) Received HTTP/0.9 when not allowed') + wait_until_shell_command("docker exec #{node} gitlab-ctl status gitaly") do |line| + break true if line.include?('run: gitaly: ') QA::Runtime::Logger.debug(line.chomp) end diff --git a/qa/qa/specs/features/api/3_create/gitaly/automatic_failover_and_recovery_spec.rb b/qa/qa/specs/features/api/3_create/gitaly/automatic_failover_and_recovery_spec.rb index 6a9be19efdd..55ae0d215cf 100644 --- a/qa/qa/specs/features/api/3_create/gitaly/automatic_failover_and_recovery_spec.rb +++ b/qa/qa/specs/features/api/3_create/gitaly/automatic_failover_and_recovery_spec.rb @@ -9,37 +9,30 @@ module QA project = nil let(:intial_commit_message) { 'Initial commit' } - let(:first_added_commit_message) { 'pushed to primary gitaly node' } - let(:second_added_commit_message) { 'commit to failover node' } + let(:first_added_commit_message) { 'first_added_commit_message to primary gitaly node' } + let(:second_added_commit_message) { 'second_added_commit_message to failover node' } before(:context) do - # Reset the cluster in case previous tests left it in a bad state praefect_manager.start_all_nodes project = Resource::Project.fabricate! do |project| project.name = "gitaly_cluster" project.initialize_with_readme = true end - end - - after do - praefect_manager.start_all_nodes + # We need to ensure that the the project is replicated to all nodes before proceeding with this test + praefect_manager.wait_for_replication(project.id) end it 'automatically fails over', testcase: 'https://gitlab.com/gitlab-org/gitlab/-/quality/test_cases/347830' do - # Create a new project with a commit and wait for it to replicate - - # make sure that our project is published to the 'primary' node + # stop other nodes, so we can control which node the commit is sent to praefect_manager.stop_secondary_node praefect_manager.stop_tertiary_node - praefect_manager.wait_for_secondary_node_health_check_failure - praefect_manager.wait_for_tertiary_node_health_check_failure Resource::Repository::ProjectPush.fabricate! do |push| push.project = project push.commit_message = first_added_commit_message push.new_branch = false - push.file_content = "This should exist on all nodes" + push.file_content = 'This file created on gitaly1 while gitaly2/gitaly3 not running' end praefect_manager.start_all_nodes @@ -56,7 +49,7 @@ module QA commit.add_files([ { file_path: "file-#{SecureRandom.hex(8)}", - content: 'This should exist on one node before reconciliation' + content: 'This is created on gitaly2/gitaly3 while gitaly1 is unavailable' } ]) end diff --git a/qa/qa/specs/features/api/3_create/gitaly/praefect_replication_queue_spec.rb b/qa/qa/specs/features/api/3_create/gitaly/praefect_replication_queue_spec.rb index e7e23124312..d066953d12e 100644 --- a/qa/qa/specs/features/api/3_create/gitaly/praefect_replication_queue_spec.rb +++ b/qa/qa/specs/features/api/3_create/gitaly/praefect_replication_queue_spec.rb @@ -4,7 +4,7 @@ require 'parallel' module QA RSpec.describe 'Create' do - context 'Gitaly Cluster replication queue', :orchestrated, :gitaly_cluster, :skip_live_env, quarantine: { issue: 'https://gitlab.com/gitlab-org/gitlab/-/issues/346453', type: :flaky } do + context 'Gitaly Cluster replication queue', :orchestrated, :gitaly_cluster, :skip_live_env do let(:praefect_manager) { Service::PraefectManager.new } let(:project) do Resource::Project.fabricate! do |project| @@ -15,12 +15,10 @@ module QA before do praefect_manager.start_all_nodes - praefect_manager.start_praefect end after do praefect_manager.start_all_nodes - praefect_manager.start_praefect praefect_manager.clear_replication_queue end diff --git a/qa/qa/specs/features/api/3_create/merge_request/push_options_mwps_spec.rb b/qa/qa/specs/features/api/3_create/merge_request/push_options_mwps_spec.rb index 83dcb163d56..6eb3060fb59 100644 --- a/qa/qa/specs/features/api/3_create/merge_request/push_options_mwps_spec.rb +++ b/qa/qa/specs/features/api/3_create/merge_request/push_options_mwps_spec.rb @@ -68,9 +68,10 @@ module QA mr.iid = merge_request[:iid] end - expect(merge_request.state).to eq('opened') - expect(merge_request.merge_status).to eq('checking') - expect(merge_request.merge_when_pipeline_succeeds).to be true + aggregate_failures do + expect(merge_request.state).to eq('opened') + expect(merge_request.merge_when_pipeline_succeeds).to be true + end end it 'merges when pipeline succeeds', testcase: 'https://gitlab.com/gitlab-org/gitlab/-/quality/test_cases/347842' do |