summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn T Skarbek <jtslear@gmail.com>2019-03-04 14:45:08 -0500
committerJohn T Skarbek <jtslear@gmail.com>2019-03-04 14:45:08 -0500
commitad220b8b0f8319a9574ed2c490206ade63f22d44 (patch)
tree37469f16101018bb3fe1eb3728257fac6425da03
parente4e0ed92f25027a8873245b02d44003e4e44daef (diff)
parent5c80bbb33c12490bc5fa711642a40fc16bdb79a4 (diff)
downloadgitlab-ce-ad220b8b0f8319a9574ed2c490206ade63f22d44.tar.gz
Merge remote-tracking branch 'origin/master' into 11-9-stable
-rw-r--r--app/assets/javascripts/vue_shared/components/ci_icon.vue1
-rw-r--r--changelogs/unreleased/40396-sidekiq-in-process-group.yml5
-rw-r--r--config/initializers/sidekiq.rb3
-rw-r--r--doc/administration/operations/sidekiq_memory_killer.md5
-rw-r--r--lib/gitlab/sidekiq_middleware/memory_killer.rb17
-rw-r--r--lib/gitlab/sidekiq_signals.rb42
-rw-r--r--qa/qa/specs/features/browser_ui/1_manage/project/view_project_activity_spec.rb3
-rw-r--r--qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_http_spec.rb4
-rw-r--r--qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_ssh_spec.rb4
-rw-r--r--spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb13
-rw-r--r--spec/lib/gitlab/sidekiq_signals_spec.rb69
11 files changed, 159 insertions, 7 deletions
diff --git a/app/assets/javascripts/vue_shared/components/ci_icon.vue b/app/assets/javascripts/vue_shared/components/ci_icon.vue
index 2f498c4fa2a..e6f0a1c69cd 100644
--- a/app/assets/javascripts/vue_shared/components/ci_icon.vue
+++ b/app/assets/javascripts/vue_shared/components/ci_icon.vue
@@ -21,6 +21,7 @@ import Icon from '../../vue_shared/components/icon.vue';
* - Jobs table
* - Jobs show view header
* - Jobs show view sidebar
+ * - Linked pipelines
*/
const validSizes = [8, 12, 16, 18, 24, 32, 48, 72];
diff --git a/changelogs/unreleased/40396-sidekiq-in-process-group.yml b/changelogs/unreleased/40396-sidekiq-in-process-group.yml
new file mode 100644
index 00000000000..e41557e20d0
--- /dev/null
+++ b/changelogs/unreleased/40396-sidekiq-in-process-group.yml
@@ -0,0 +1,5 @@
+---
+title: 'sidekiq: terminate child processes at shutdown'
+merge_request: 25669
+author:
+type: added
diff --git a/config/initializers/sidekiq.rb b/config/initializers/sidekiq.rb
index 4a8d72e37a5..2e4aa9c1053 100644
--- a/config/initializers/sidekiq.rb
+++ b/config/initializers/sidekiq.rb
@@ -77,6 +77,9 @@ Sidekiq.configure_server do |config|
# Avoid autoload issue such as 'Mail::Parsers::AddressStruct'
# https://github.com/mikel/mail/issues/912#issuecomment-214850355
Mail.eager_autoload!
+
+ # Ensure the whole process group is terminated if possible
+ Gitlab::SidekiqSignals.install!(Sidekiq::CLI::SIGNAL_HANDLERS)
end
Sidekiq.configure_client do |config|
diff --git a/doc/administration/operations/sidekiq_memory_killer.md b/doc/administration/operations/sidekiq_memory_killer.md
index cbffd883774..8eac42f2fe2 100644
--- a/doc/administration/operations/sidekiq_memory_killer.md
+++ b/doc/administration/operations/sidekiq_memory_killer.md
@@ -17,6 +17,11 @@ With the default settings, the MemoryKiller will cause a Sidekiq restart no
more often than once every 15 minutes, with the restart causing about one
minute of delay for incoming background jobs.
+Some background jobs rely on long-running external processes. To ensure these
+are cleanly terminated when Sidekiq is restarted, each Sidekiq process should be
+run as a process group leader (e.g., using `chpst -P`). If using Omnibus or the
+`bin/background_jobs` script with `runit` installed, this is handled for you.
+
## Configuring the MemoryKiller
The MemoryKiller is controlled using environment variables.
diff --git a/lib/gitlab/sidekiq_middleware/memory_killer.rb b/lib/gitlab/sidekiq_middleware/memory_killer.rb
index 47333d257eb..ed2c7ee9a2d 100644
--- a/lib/gitlab/sidekiq_middleware/memory_killer.rb
+++ b/lib/gitlab/sidekiq_middleware/memory_killer.rb
@@ -36,11 +36,13 @@ module Gitlab
# Wait `SHUTDOWN_WAIT` to give already fetched jobs time to finish.
# Then, tell Sidekiq to gracefully shut down by giving jobs a few more
# moments to finish, killing and requeuing them if they didn't, and
- # then terminating itself.
+ # then terminating itself. Sidekiq will replicate the TERM to all its
+ # children if it can.
wait_and_signal(SHUTDOWN_WAIT, 'SIGTERM', 'gracefully shut down')
# Wait for Sidekiq to shutdown gracefully, and kill it if it didn't.
- wait_and_signal(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die')
+ # Kill the whole pgroup, so we can be sure no children are left behind
+ wait_and_signal_pgroup(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die')
end
end
@@ -53,6 +55,17 @@ module Gitlab
output.to_i
end
+ # If this sidekiq process is pgroup leader, signal to the whole pgroup
+ def wait_and_signal_pgroup(time, signal, explanation)
+ return wait_and_signal(time, signal, explanation) unless Process.getpgrp == pid
+
+ Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PGRP-#{pid} #{signal} (#{explanation})"
+ sleep(time)
+
+ Sidekiq.logger.warn "sending Sidekiq worker PGRP-#{pid} #{signal} (#{explanation})"
+ Process.kill(signal, "-#{pid}")
+ end
+
def wait_and_signal(time, signal, explanation)
Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})"
sleep(time)
diff --git a/lib/gitlab/sidekiq_signals.rb b/lib/gitlab/sidekiq_signals.rb
new file mode 100644
index 00000000000..b704ee9a0a9
--- /dev/null
+++ b/lib/gitlab/sidekiq_signals.rb
@@ -0,0 +1,42 @@
+# frozen_string_literal: true
+
+module Gitlab
+ # As a process group leader, we can ensure that children of sidekiq are killed
+ # at the same time as sidekiq itself, to stop long-lived children from being
+ # reparented to init and "escaping". To do this, we override the default
+ # handlers used by sidekiq for INT and TERM signals
+ module SidekiqSignals
+ REPLACE_SIGNALS = %w[INT TERM].freeze
+
+ SIDEKIQ_CHANGED_MESSAGE =
+ "Intercepting signal handlers: #{REPLACE_SIGNALS.join(", ")} failed. " \
+ "Sidekiq should have registered them, but appears not to have done so."
+
+ def self.install!(sidekiq_handlers)
+ # This only works if we're process group leader
+ return unless Process.getpgrp == Process.pid
+
+ raise SIDEKIQ_CHANGED_MESSAGE unless
+ REPLACE_SIGNALS == sidekiq_handlers.keys & REPLACE_SIGNALS
+
+ REPLACE_SIGNALS.each do |signal|
+ old_handler = sidekiq_handlers[signal]
+ sidekiq_handlers[signal] = ->(cli) do
+ blindly_signal_pgroup!(signal)
+ old_handler.call(cli)
+ end
+ end
+ end
+
+ # The process group leader can forward INT and TERM signals to the whole
+ # group. However, the forwarded signal is *also* received by the leader,
+ # which could lead to an infinite loop. We can avoid this by temporarily
+ # ignoring the forwarded signal. This may cause us to miss some repeated
+ # signals from outside the process group, but that isn't fatal.
+ def self.blindly_signal_pgroup!(signal)
+ old_trap = trap(signal, 'IGNORE')
+ Process.kill(signal, "-#{Process.getpgrp}")
+ trap(signal, old_trap)
+ end
+ end
+end
diff --git a/qa/qa/specs/features/browser_ui/1_manage/project/view_project_activity_spec.rb b/qa/qa/specs/features/browser_ui/1_manage/project/view_project_activity_spec.rb
index e172206eb88..d4cedc9362d 100644
--- a/qa/qa/specs/features/browser_ui/1_manage/project/view_project_activity_spec.rb
+++ b/qa/qa/specs/features/browser_ui/1_manage/project/view_project_activity_spec.rb
@@ -1,8 +1,7 @@
# frozen_string_literal: true
module QA
- # Failure issue: https://gitlab.com/gitlab-org/quality/staging/issues/21
- context 'Manage', :quarantine do
+ context 'Manage' do
describe 'Project activity' do
it 'user creates an event in the activity page upon Git push' do
Runtime::Browser.visit(:gitlab, Page::Main::Login)
diff --git a/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_http_spec.rb b/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_http_spec.rb
index d1535d6519d..01a367fceac 100644
--- a/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_http_spec.rb
+++ b/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_http_spec.rb
@@ -1,7 +1,9 @@
# frozen_string_literal: true
module QA
- context 'Create' do
+ # Git protocol v2 is temporarily disabled
+ # https://gitlab.com/gitlab-org/gitlab-ce/issues/55769 (confidential)
+ context 'Create', :quarantine do
describe 'Push over HTTP using Git protocol version 2', :requires_git_protocol_v2 do
it 'user pushes to the repository' do
Runtime::Browser.visit(:gitlab, Page::Main::Login)
diff --git a/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_ssh_spec.rb b/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_ssh_spec.rb
index 48800cc81e5..eb59b54e9ab 100644
--- a/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_ssh_spec.rb
+++ b/qa/qa/specs/features/browser_ui/3_create/repository/protocol_v2_push_ssh_spec.rb
@@ -1,7 +1,9 @@
# frozen_string_literal: true
module QA
- context 'Create' do
+ # Git protocol v2 is temporarily disabled
+ # https://gitlab.com/gitlab-org/gitlab-ce/issues/55769 (confidential)
+ context 'Create', :quarantine do
describe 'Push over SSH using Git protocol version 2', :requires_git_protocol_v2 do
# Note: If you run this test against GDK make sure you've enabled sshd and
# enabled setting the Git protocol by adding `AcceptEnv GIT_PROTOCOL` to
diff --git a/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb b/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb
index 5df56178df2..ff8c0825ee4 100644
--- a/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb
+++ b/spec/lib/gitlab/sidekiq_middleware/memory_killer_spec.rb
@@ -35,7 +35,7 @@ describe Gitlab::SidekiqMiddleware::MemoryKiller do
stub_const("#{described_class}::MAX_RSS", 5.kilobytes)
end
- it 'sends the STP, TERM and KILL signals at expected times' do
+ it 'sends the TSTP, TERM and KILL signals at expected times' do
expect(subject).to receive(:sleep).with(15 * 60).ordered
expect(Process).to receive(:kill).with('SIGTSTP', pid).ordered
@@ -47,6 +47,17 @@ describe Gitlab::SidekiqMiddleware::MemoryKiller do
run
end
+
+ it 'sends TSTP and TERM to the pid, but KILL to the pgroup, when running as process leader' do
+ allow(Process).to receive(:getpgrp) { pid }
+ allow(subject).to receive(:sleep)
+
+ expect(Process).to receive(:kill).with('SIGTSTP', pid).ordered
+ expect(Process).to receive(:kill).with('SIGTERM', pid).ordered
+ expect(Process).to receive(:kill).with('SIGKILL', "-#{pid}").ordered
+
+ run
+ end
end
context 'when MAX_RSS is not exceeded' do
diff --git a/spec/lib/gitlab/sidekiq_signals_spec.rb b/spec/lib/gitlab/sidekiq_signals_spec.rb
new file mode 100644
index 00000000000..4483224f49d
--- /dev/null
+++ b/spec/lib/gitlab/sidekiq_signals_spec.rb
@@ -0,0 +1,69 @@
+require 'spec_helper'
+
+describe Gitlab::SidekiqSignals do
+ describe '.install' do
+ let(:result) { Hash.new { |h, k| h[k] = 0 } }
+ let(:int_handler) { -> (_) { result['INT'] += 1 } }
+ let(:term_handler) { -> (_) { result['TERM'] += 1 } }
+ let(:other_handler) { -> (_) { result['OTHER'] += 1 } }
+ let(:signals) { { 'INT' => int_handler, 'TERM' => term_handler, 'OTHER' => other_handler } }
+
+ context 'not a process group leader' do
+ before do
+ allow(Process).to receive(:getpgrp) { Process.pid * 2 }
+ end
+
+ it 'does nothing' do
+ expect { described_class.install!(signals) }
+ .not_to change { signals }
+ end
+ end
+
+ context 'as a process group leader' do
+ before do
+ allow(Process).to receive(:getpgrp) { Process.pid }
+ end
+
+ it 'installs its own signal handlers for TERM and INT only' do
+ described_class.install!(signals)
+
+ expect(signals['INT']).not_to eq(int_handler)
+ expect(signals['TERM']).not_to eq(term_handler)
+ expect(signals['OTHER']).to eq(other_handler)
+ end
+
+ %w[INT TERM].each do |signal|
+ it "installs a forwarding signal handler for #{signal}" do
+ described_class.install!(signals)
+
+ expect(described_class)
+ .to receive(:trap)
+ .with(signal, 'IGNORE')
+ .and_return(:original_trap)
+ .ordered
+
+ expect(Process)
+ .to receive(:kill)
+ .with(signal, "-#{Process.pid}")
+ .ordered
+
+ expect(described_class)
+ .to receive(:trap)
+ .with(signal, :original_trap)
+ .ordered
+
+ signals[signal].call(:cli)
+
+ expect(result[signal]).to eq(1)
+ end
+
+ it "raises if sidekiq no longer traps SIG#{signal}" do
+ signals.delete(signal)
+
+ expect { described_class.install!(signals) }
+ .to raise_error(/Sidekiq should have registered/)
+ end
+ end
+ end
+ end
+end