From 4d4e99a2f163408de44d39ea98131a4231667c24 Mon Sep 17 00:00:00 2001 From: Tomasz Maczukin Date: Fri, 3 Mar 2017 00:43:39 +0100 Subject: Renable StuckCiBuildsWorker to StucjCiJobsWorker --- app/workers/stuck_ci_builds_worker.rb | 59 ------------- app/workers/stuck_ci_jobs_worker.rb | 59 +++++++++++++ config/gitlab.yml.example | 4 +- config/initializers/1_settings.rb | 6 +- spec/workers/stuck_ci_builds_worker_spec.rb | 129 ---------------------------- spec/workers/stuck_ci_jobs_worker_spec.rb | 129 ++++++++++++++++++++++++++++ 6 files changed, 193 insertions(+), 193 deletions(-) delete mode 100644 app/workers/stuck_ci_builds_worker.rb create mode 100644 app/workers/stuck_ci_jobs_worker.rb delete mode 100644 spec/workers/stuck_ci_builds_worker_spec.rb create mode 100644 spec/workers/stuck_ci_jobs_worker_spec.rb diff --git a/app/workers/stuck_ci_builds_worker.rb b/app/workers/stuck_ci_builds_worker.rb deleted file mode 100644 index 0c51c34a47f..00000000000 --- a/app/workers/stuck_ci_builds_worker.rb +++ /dev/null @@ -1,59 +0,0 @@ -class StuckCiBuildsWorker - include Sidekiq::Worker - include CronjobQueue - - EXCLUSIVE_LEASE_KEY = 'stuck_ci_builds_worker_lease'.freeze - - BUILD_RUNNING_OUTDATED_TIMEOUT = 1.hour - BUILD_PENDING_OUTDATED_TIMEOUT = 1.day - BUILD_PENDING_STUCK_TIMEOUT = 1.hour - - def perform - return unless try_obtain_lease - - Rails.logger.info "#{self.class}: Cleaning stuck builds" - - drop :running, BUILD_RUNNING_OUTDATED_TIMEOUT - drop :pending, BUILD_PENDING_OUTDATED_TIMEOUT - drop_stuck :pending, BUILD_PENDING_STUCK_TIMEOUT - - remove_lease - end - - private - - def try_obtain_lease - @uuid = Gitlab::ExclusiveLease.new(EXCLUSIVE_LEASE_KEY, timeout: 30.minutes).try_obtain - end - - def remove_lease - Gitlab::ExclusiveLease.cancel(EXCLUSIVE_LEASE_KEY, @uuid) - end - - def drop(status, timeout) - search(status, timeout) do |build| - drop_build :outdated, build, status, timeout - end - end - - def drop_stuck(status, timeout) - search(status, timeout) do |build| - return unless build.stuck? - drop_build :stuck, build, status, timeout - end - end - - def search(status, timeout) - builds = Ci::Build.where(status: status).where('ci_builds.updated_at < ?', timeout.ago) - builds.joins(:project).includes(:tags, :runner, project: :namespace).find_each(batch_size: 50).each do |build| - yield(build) - end - end - - def drop_build(type, build, status, timeout) - Rails.logger.info "#{self.class}: Dropping #{type} build #{build.id} for runner #{build.runner_id} (status: #{status}, timeout: #{timeout})" - Gitlab::OptimisticLocking.retry_lock(build, 3) do |b| - b.drop - end - end -end diff --git a/app/workers/stuck_ci_jobs_worker.rb b/app/workers/stuck_ci_jobs_worker.rb new file mode 100644 index 00000000000..ae8c980c9e4 --- /dev/null +++ b/app/workers/stuck_ci_jobs_worker.rb @@ -0,0 +1,59 @@ +class StuckCiJobsWorker + include Sidekiq::Worker + include CronjobQueue + + EXCLUSIVE_LEASE_KEY = 'stuck_ci_builds_worker_lease'.freeze + + BUILD_RUNNING_OUTDATED_TIMEOUT = 1.hour + BUILD_PENDING_OUTDATED_TIMEOUT = 1.day + BUILD_PENDING_STUCK_TIMEOUT = 1.hour + + def perform + return unless try_obtain_lease + + Rails.logger.info "#{self.class}: Cleaning stuck builds" + + drop :running, BUILD_RUNNING_OUTDATED_TIMEOUT + drop :pending, BUILD_PENDING_OUTDATED_TIMEOUT + drop_stuck :pending, BUILD_PENDING_STUCK_TIMEOUT + + remove_lease + end + + private + + def try_obtain_lease + @uuid = Gitlab::ExclusiveLease.new(EXCLUSIVE_LEASE_KEY, timeout: 30.minutes).try_obtain + end + + def remove_lease + Gitlab::ExclusiveLease.cancel(EXCLUSIVE_LEASE_KEY, @uuid) + end + + def drop(status, timeout) + search(status, timeout) do |build| + drop_build :outdated, build, status, timeout + end + end + + def drop_stuck(status, timeout) + search(status, timeout) do |build| + return unless build.stuck? + drop_build :stuck, build, status, timeout + end + end + + def search(status, timeout) + builds = Ci::Build.where(status: status).where('ci_builds.updated_at < ?', timeout.ago) + builds.joins(:project).includes(:tags, :runner, project: :namespace).find_each(batch_size: 50).each do |build| + yield(build) + end + end + + def drop_build(type, build, status, timeout) + Rails.logger.info "#{self.class}: Dropping #{type} build #{build.id} for runner #{build.runner_id} (status: #{status}, timeout: #{timeout})" + Gitlab::OptimisticLocking.retry_lock(build, 3) do |b| + b.drop + end + end +end diff --git a/config/gitlab.yml.example b/config/gitlab.yml.example index b4f47b30622..8f99a4d541f 100644 --- a/config/gitlab.yml.example +++ b/config/gitlab.yml.example @@ -177,8 +177,8 @@ production: &base # Periodically executed jobs, to self-heal Gitlab, do external synchronizations, etc. # Please read here for more information: https://github.com/ondrejbartas/sidekiq-cron#adding-cron-job cron_jobs: - # Flag stuck CI builds as failed - stuck_ci_builds_worker: + # Flag stuck CI jobs as failed + stuck_ci_jobs_worker: cron: "0 * * * *" # Remove expired build artifacts expire_build_artifacts_worker: diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 3ec57c5bb52..5aeaa7eab81 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -308,9 +308,9 @@ Settings.gravatar['host'] = Settings.host_without_www(Settings.gravatar[ # Cron Jobs # Settings['cron_jobs'] ||= Settingslogic.new({}) -Settings.cron_jobs['stuck_ci_builds_worker'] ||= Settingslogic.new({}) -Settings.cron_jobs['stuck_ci_builds_worker']['cron'] ||= '0 * * * *' -Settings.cron_jobs['stuck_ci_builds_worker']['job_class'] = 'StuckCiBuildsWorker' +Settings.cron_jobs['stuck_ci_jobs_worker'] ||= Settingslogic.new({}) +Settings.cron_jobs['stuck_ci_jobs_worker']['cron'] ||= '0 * * * *' +Settings.cron_jobs['stuck_ci_jobs_worker']['job_class'] = 'StuckCiJobsWorker' Settings.cron_jobs['expire_build_artifacts_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['expire_build_artifacts_worker']['cron'] ||= '50 * * * *' Settings.cron_jobs['expire_build_artifacts_worker']['job_class'] = 'ExpireBuildArtifactsWorker' diff --git a/spec/workers/stuck_ci_builds_worker_spec.rb b/spec/workers/stuck_ci_builds_worker_spec.rb deleted file mode 100644 index 82bdc2b14f3..00000000000 --- a/spec/workers/stuck_ci_builds_worker_spec.rb +++ /dev/null @@ -1,129 +0,0 @@ -require 'spec_helper' - -describe StuckCiBuildsWorker do - let!(:runner) { create :ci_runner } - let!(:build) { create :ci_build, runner: runner } - let(:worker) { described_class.new } - let(:exclusive_lease_uuid) { SecureRandom.uuid } - - subject do - build.reload - build.status - end - - before do - build.update!(status: status, updated_at: updated_at) - allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).and_return(exclusive_lease_uuid) - end - - shared_examples 'build is dropped' do - it 'changes status' do - worker.perform - is_expected.to eq('failed') - end - end - - shared_examples 'build is unchanged' do - it "doesn't change status" do - worker.perform - is_expected.to eq(status) - end - end - - context 'when build is pending' do - let(:status) { 'pending' } - - context 'when build is not stuck' do - before { allow_any_instance_of(Ci::Build).to receive(:stuck?).and_return(false) } - - context 'when build was not updated for more than 1 day ago' do - let(:updated_at) { 2.days.ago } - it_behaves_like 'build is dropped' - end - - context 'when build was updated in less than 1 day ago' do - let(:updated_at) { 6.hours.ago } - it_behaves_like 'build is unchanged' - end - - context 'when build was not updated for more than 1 hour ago' do - let(:updated_at) { 2.hours.ago } - it_behaves_like 'build is unchanged' - end - end - - context 'when build is stuck' do - before { allow_any_instance_of(Ci::Build).to receive(:stuck?).and_return(true) } - - context 'when build was not updated for more than 1 hour ago' do - let(:updated_at) { 2.hours.ago } - it_behaves_like 'build is dropped' - end - - context 'when build was updated in less than 1 hour ago' do - let(:updated_at) { 30.minutes.ago } - it_behaves_like 'build is unchanged' - end - end - end - - context 'when build is running' do - let(:status) { 'running' } - - context 'when build was not updated for more than 1 hour ago' do - let(:updated_at) { 2.hours.ago } - it_behaves_like 'build is dropped' - end - - context 'when build was updated in less than 1 hour ago' do - let(:updated_at) { 30.minutes.ago } - it_behaves_like 'build is unchanged' - end - end - - %w(success skipped failed canceled).each do |status| - context "when build is #{status}" do - let(:status) { status } - let(:updated_at) { 2.days.ago } - it_behaves_like 'build is unchanged' - end - end - - context 'for deleted project' do - let(:status) { 'running' } - let(:updated_at) { 2.days.ago } - - before { build.project.update(pending_delete: true) } - - it 'does not drop build' do - expect_any_instance_of(Ci::Build).not_to receive(:drop) - worker.perform - end - end - - describe 'exclusive lease' do - let(:status) { 'running' } - let(:updated_at) { 2.days.ago } - let(:worker2) { described_class.new } - - it 'is guard by exclusive lease when executed concurrently' do - expect(worker).to receive(:drop).at_least(:once) - expect(worker2).not_to receive(:drop) - worker.perform - allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).and_return(false) - worker2.perform - end - - it 'can be executed in sequence' do - expect(worker).to receive(:drop).at_least(:once) - expect(worker2).to receive(:drop).at_least(:once) - worker.perform - worker2.perform - end - - it 'cancels exclusive lease after worker perform' do - expect(Gitlab::ExclusiveLease).to receive(:cancel).with(described_class::EXCLUSIVE_LEASE_KEY, exclusive_lease_uuid) - worker.perform - end - end -end diff --git a/spec/workers/stuck_ci_jobs_worker_spec.rb b/spec/workers/stuck_ci_jobs_worker_spec.rb new file mode 100644 index 00000000000..8434b0c8e5b --- /dev/null +++ b/spec/workers/stuck_ci_jobs_worker_spec.rb @@ -0,0 +1,129 @@ +require 'spec_helper' + +describe StuckCiJobsWorker do + let!(:runner) { create :ci_runner } + let!(:job) { create :ci_build, runner: runner } + let(:worker) { described_class.new } + let(:exclusive_lease_uuid) { SecureRandom.uuid } + + subject do + job.reload + job.status + end + + before do + job.update!(status: status, updated_at: updated_at) + allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).and_return(exclusive_lease_uuid) + end + + shared_examples 'job is dropped' do + it 'changes status' do + worker.perform + is_expected.to eq('failed') + end + end + + shared_examples 'job is unchanged' do + it "doesn't change status" do + worker.perform + is_expected.to eq(status) + end + end + + context 'when job is pending' do + let(:status) { 'pending' } + + context 'when job is not stuck' do + before { allow_any_instance_of(Ci::Build).to receive(:stuck?).and_return(false) } + + context 'when job was not updated for more than 1 day ago' do + let(:updated_at) { 2.days.ago } + it_behaves_like 'job is dropped' + end + + context 'when job was updated in less than 1 day ago' do + let(:updated_at) { 6.hours.ago } + it_behaves_like 'job is unchanged' + end + + context 'when job was not updated for more than 1 hour ago' do + let(:updated_at) { 2.hours.ago } + it_behaves_like 'job is unchanged' + end + end + + context 'when job is stuck' do + before { allow_any_instance_of(Ci::Build).to receive(:stuck?).and_return(true) } + + context 'when job was not updated for more than 1 hour ago' do + let(:updated_at) { 2.hours.ago } + it_behaves_like 'job is dropped' + end + + context 'when job was updated in less than 1 hour ago' do + let(:updated_at) { 30.minutes.ago } + it_behaves_like 'job is unchanged' + end + end + end + + context 'when job is running' do + let(:status) { 'running' } + + context 'when job was not updated for more than 1 hour ago' do + let(:updated_at) { 2.hours.ago } + it_behaves_like 'job is dropped' + end + + context 'when job was updated in less than 1 hour ago' do + let(:updated_at) { 30.minutes.ago } + it_behaves_like 'job is unchanged' + end + end + + %w(success skipped failed canceled).each do |status| + context "when job is #{status}" do + let(:status) { status } + let(:updated_at) { 2.days.ago } + it_behaves_like 'job is unchanged' + end + end + + context 'for deleted project' do + let(:status) { 'running' } + let(:updated_at) { 2.days.ago } + + before { job.project.update(pending_delete: true) } + + it 'does not drop job' do + expect_any_instance_of(Ci::Build).not_to receive(:drop) + worker.perform + end + end + + describe 'exclusive lease' do + let(:status) { 'running' } + let(:updated_at) { 2.days.ago } + let(:worker2) { described_class.new } + + it 'is guard by exclusive lease when executed concurrently' do + expect(worker).to receive(:drop).at_least(:once) + expect(worker2).not_to receive(:drop) + worker.perform + allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).and_return(false) + worker2.perform + end + + it 'can be executed in sequence' do + expect(worker).to receive(:drop).at_least(:once) + expect(worker2).to receive(:drop).at_least(:once) + worker.perform + worker2.perform + end + + it 'cancels exclusive lease after worker perform' do + expect(Gitlab::ExclusiveLease).to receive(:cancel).with(described_class::EXCLUSIVE_LEASE_KEY, exclusive_lease_uuid) + worker.perform + end + end +end -- cgit v1.2.1