diff options
author | John T Skarbek <jtslear@gmail.com> | 2019-03-04 14:45:08 -0500 |
---|---|---|
committer | John T Skarbek <jtslear@gmail.com> | 2019-03-04 14:45:08 -0500 |
commit | ad220b8b0f8319a9574ed2c490206ade63f22d44 (patch) | |
tree | 37469f16101018bb3fe1eb3728257fac6425da03 | |
parent | e4e0ed92f25027a8873245b02d44003e4e44daef (diff) | |
parent | 5c80bbb33c12490bc5fa711642a40fc16bdb79a4 (diff) | |
download | gitlab-ce-ad220b8b0f8319a9574ed2c490206ade63f22d44.tar.gz |
Merge remote-tracking branch 'origin/master' into 11-9-stable
11 files changed, 159 insertions, 7 deletions
diff --git a/app/assets/javascripts/vue_shared/components/ci_icon.vue b/app/assets/javascripts/vue_shared/components/ci_icon.vue index 2f498c4fa2a..e6f0a1c69cd 100644 --- a/app/assets/javascripts/vue_shared/components/ci_icon.vue +++ b/app/assets/javascripts/vue_shared/components/ci_icon.vue @@ -21,6 +21,7 @@ import Icon from '../../vue_shared/components/icon.vue'; * - Jobs table * - Jobs show view header * - Jobs show view sidebar + * - Linked pipelines */ const validSizes = [8, 12, 16, 18, 24, 32, 48, 72]; diff --git a/changelogs/unreleased/40396-sidekiq-in-process-group.yml b/changelogs/unreleased/40396-sidekiq-in-process-group.yml new file mode 100644 index 00000000000..e41557e20d0 --- /dev/null +++ b/changelogs/unreleased/40396-sidekiq-in-process-group.yml @@ -0,0 +1,5 @@ +--- +title: 'sidekiq: terminate child processes at shutdown' +merge_request: 25669 +author: +type: added diff --git a/config/initializers/sidekiq.rb b/config/initializers/sidekiq.rb index 4a8d72e37a5..2e4aa9c1053 100644 --- a/config/initializers/sidekiq.rb +++ b/config/initializers/sidekiq.rb @@ -77,6 +77,9 @@ Sidekiq.configure_server do |config| # Avoid autoload issue such as 'Mail::Parsers::AddressStruct' # https://github.com/mikel/mail/issues/912#issuecomment-214850355 Mail.eager_autoload! + + # Ensure the whole process group is terminated if possible + Gitlab::SidekiqSignals.install!(Sidekiq::CLI::SIGNAL_HANDLERS) end Sidekiq.configure_client do |config| diff --git a/doc/administration/operations/sidekiq_memory_killer.md b/doc/administration/operations/sidekiq_memory_killer.md index cbffd883774..8eac42f2fe2 100644 --- a/doc/administration/operations/sidekiq_memory_killer.md +++ b/doc/administration/operations/sidekiq_memory_killer.md @@ -17,6 +17,11 @@ With the default settings, the MemoryKiller will cause a Sidekiq restart no more often than once every 15 minutes, with the restart causing about one minute of delay for incoming background jobs. +Some background jobs rely on long-running external processes. To ensure these +are cleanly terminated when Sidekiq is restarted, each Sidekiq process should be +run as a process group leader (e.g., using `chpst -P`). If using Omnibus or the +`bin/background_jobs` script with `runit` installed, this is handled for you. + ## Configuring the MemoryKiller The MemoryKiller is controlled using environment variables. diff --git a/lib/gitlab/sidekiq_middleware/memory_killer.rb b/lib/gitlab/sidekiq_middleware/memory_killer.rb index 47333d257eb..ed2c7ee9a2d 100644 --- a/lib/gitlab/sidekiq_middleware/memory_killer.rb +++ b/lib/gitlab/sidekiq_middleware/memory_killer.rb @@ -36,11 +36,13 @@ module Gitlab # Wait `SHUTDOWN_WAIT` to give already fetched jobs time to finish. # Then, tell Sidekiq to gracefully shut down by giving jobs a few more # moments to finish, killing and requeuing them if they didn't, and - # then terminating itself. + # then terminating itself. Sidekiq will replicate the TERM to all its + # children if it can. wait_and_signal(SHUTDOWN_WAIT, 'SIGTERM', 'gracefully shut down') # Wait for Sidekiq to shutdown gracefully, and kill it if it didn't. - wait_and_signal(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die') + # Kill the whole pgroup, so we can be sure no children are left behind + wait_and_signal_pgroup(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die') end end @@ -53,6 +55,17 @@ module Gitlab output.to_i end + # If this sidekiq process is pgroup leader, signal to the whole pgroup + def wait_and_signal_pgroup(time, signal, explanation) + return wait_and_signal(time, signal, explanation) unless Process.getpgrp == pid + + Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PGRP-#{pid} #{signal} (#{explanation})" + sleep(time) + + Sidekiq.logger.warn "sending Sidekiq worker PGRP-#{pid} #{signal} (#{explanation})" + Process.kill(signal, "-#{pid}") + end + def wait_and_signal(time, signal, explanation) Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})" sleep(time) diff --git a/lib/gitlab/sidekiq_signals.rb b/lib/gitlab/sidekiq_signals.rb new file mode 100644 index 00000000000..b704ee9a0a9 --- /dev/null +++ b/lib/gitlab/sidekiq_signals.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +module Gitlab + # As a process group leader, we can ensure that children of sidekiq are killed + # at the same time as sidekiq itself, to stop long-lived children from being + # reparented to init and "escaping". To do this, we override the default + # handlers used by sidekiq for INT and TERM signals + module SidekiqSignals + REPLACE_SIGNALS = %w[INT TERM].freeze + + SIDEKIQ_CHANGED_MESSAGE = + "Intercepting signal handlers: #{REPLACE_SIGNALS.join(", ")} failed. " \ + "Sidekiq should have registered them, but appears not to have done so." + + def self.install!(sidekiq_handlers) + # This only works if we're process group leader + return unless Process.getpgrp == Process.pid + + raise SIDEKIQ_CHANGED_MESSAGE unless + REPLACE_SIGNALS == sidekiq_handlers.keys & REPLACE_SIGNALS + + REPLACE_SIGNALS.each do |signal| + old_handler = sidekiq_handlers[signal] + sidekiq_handlers[signal] = ->(cli) do + blindly_signal_pgroup!(signal) + old_handler.call(cli) + end + end + end + + # The process group leader can forward INT and TERM signals to the whole + # group. However, the forwarded signal is *also* received by the leader, + # which could lead to an infinite loop. We can avoid this by temporarily + # ignoring the forwarded signal. This may cause us to miss some repeated + # signals from outside the process group, but that isn't fatal. + def self.blindly_signal_pgroup!(signal) + old_trap = trap(signal, 'IGNORE') + Process.kill(signal, "-#{Process.getpgrp}") + trap(signal, old_trap) + end + end +end diff --git a/qa/qa/specs/features/browser_ui/1_manage/project/view_project_activity_spec.rb b/qa/qa/specs/features/browser_ui/1_manage/project/view_project_activity_spec.rb index e172206eb88..d4cedc9362d 100644 --- a/qa/qa/specs/features/browser_ui/1_manage/project/view_project_activity_spec.rb +++ b/qa/qa/specs/features/browser_ui/1_manage/project/view_project_activity_spec.rb @@ -1,8 +1,7 @@ # frozen_string_literal: true module QA - # Failure issue: https://gitlab.com/gitlab-org/quality/staging/issues/21 - context 'Manage', :quarantine do + context 'Manage' do describe 'Project activity' do it 'user creates an event in the activity page upon Git push' do Runtime::Browser.visit(:gitlab, Page::Main::Login) diff --git a/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_http_spec.rb b/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_http_spec.rb index d1535d6519d..01a367fceac 100644 --- a/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_http_spec.rb +++ b/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_http_spec.rb @@ -1,7 +1,9 @@ # frozen_string_literal: true module QA - context 'Create' do + # Git protocol v2 is temporarily disabled + # https://gitlab.com/gitlab-org/gitlab-ce/issues/55769 (confidential) + context 'Create', :quarantine do describe 'Push over HTTP using Git protocol version 2', :requires_git_protocol_v2 do it 'user pushes to the repository' do Runtime::Browser.visit(:gitlab, Page::Main::Login) diff --git a/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_ssh_spec.rb b/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_ssh_spec.rb index 48800cc81e5..eb59b54e9ab 100644 --- a/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_ssh_spec.rb +++ b/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_ssh_spec.rb @@ -1,7 +1,9 @@ # frozen_string_literal: true module QA - context 'Create' do + # Git protocol v2 is temporarily disabled + # https://gitlab.com/gitlab-org/gitlab-ce/issues/55769 (confidential) + context 'Create', :quarantine do describe 'Push over SSH using Git protocol version 2', :requires_git_protocol_v2 do # Note: If you run this test against GDK make sure you've enabled sshd and # enabled setting the Git protocol by adding `AcceptEnv GIT_PROTOCOL` to diff --git a/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb b/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb index 5df56178df2..ff8c0825ee4 100644 --- a/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb +++ b/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb @@ -35,7 +35,7 @@ describe Gitlab::SidekiqMiddleware::MemoryKiller do stub_const("#{described_class}::MAX_RSS", 5.kilobytes) end - it 'sends the STP, TERM and KILL signals at expected times' do + it 'sends the TSTP, TERM and KILL signals at expected times' do expect(subject).to receive(:sleep).with(15 * 60).ordered expect(Process).to receive(:kill).with('SIGTSTP', pid).ordered @@ -47,6 +47,17 @@ describe Gitlab::SidekiqMiddleware::MemoryKiller do run end + + it 'sends TSTP and TERM to the pid, but KILL to the pgroup, when running as process leader' do + allow(Process).to receive(:getpgrp) { pid } + allow(subject).to receive(:sleep) + + expect(Process).to receive(:kill).with('SIGTSTP', pid).ordered + expect(Process).to receive(:kill).with('SIGTERM', pid).ordered + expect(Process).to receive(:kill).with('SIGKILL', "-#{pid}").ordered + + run + end end context 'when MAX_RSS is not exceeded' do diff --git a/spec/lib/gitlab/sidekiq_signals_spec.rb b/spec/lib/gitlab/sidekiq_signals_spec.rb new file mode 100644 index 00000000000..4483224f49d --- /dev/null +++ b/spec/lib/gitlab/sidekiq_signals_spec.rb @@ -0,0 +1,69 @@ +require 'spec_helper' + +describe Gitlab::SidekiqSignals do + describe '.install' do + let(:result) { Hash.new { |h, k| h[k] = 0 } } + let(:int_handler) { -> (_) { result['INT'] += 1 } } + let(:term_handler) { -> (_) { result['TERM'] += 1 } } + let(:other_handler) { -> (_) { result['OTHER'] += 1 } } + let(:signals) { { 'INT' => int_handler, 'TERM' => term_handler, 'OTHER' => other_handler } } + + context 'not a process group leader' do + before do + allow(Process).to receive(:getpgrp) { Process.pid * 2 } + end + + it 'does nothing' do + expect { described_class.install!(signals) } + .not_to change { signals } + end + end + + context 'as a process group leader' do + before do + allow(Process).to receive(:getpgrp) { Process.pid } + end + + it 'installs its own signal handlers for TERM and INT only' do + described_class.install!(signals) + + expect(signals['INT']).not_to eq(int_handler) + expect(signals['TERM']).not_to eq(term_handler) + expect(signals['OTHER']).to eq(other_handler) + end + + %w[INT TERM].each do |signal| + it "installs a forwarding signal handler for #{signal}" do + described_class.install!(signals) + + expect(described_class) + .to receive(:trap) + .with(signal, 'IGNORE') + .and_return(:original_trap) + .ordered + + expect(Process) + .to receive(:kill) + .with(signal, "-#{Process.pid}") + .ordered + + expect(described_class) + .to receive(:trap) + .with(signal, :original_trap) + .ordered + + signals[signal].call(:cli) + + expect(result[signal]).to eq(1) + end + + it "raises if sidekiq no longer traps SIG#{signal}" do + signals.delete(signal) + + expect { described_class.install!(signals) } + .to raise_error(/Sidekiq should have registered/) + end + end + end + end +end |