summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKamil Trzciński <ayufan@ayufan.eu>2018-11-29 10:11:14 +0000
committerKamil Trzciński <ayufan@ayufan.eu>2018-11-29 10:11:14 +0000
commit938b891f89403ced0268699a6295531db508be17 (patch)
tree61dc5668629ccad697d6285098d516e9ede43180
parentf6e51e83a597e2f7cf3cf977c7c2edd67899c4c4 (diff)
parent9fa869778fcb5f4445f4981dd7087f31bcbb440b (diff)
downloadgitlab-ce-938b891f89403ced0268699a6295531db508be17.tar.gz
Merge branch 'add-counter-for-trace-chunks' into 'master'
Improve traceability for failed attempts of archiving traces Closes #51502 See merge request gitlab-org/gitlab-ce!21826
-rw-r--r--app/services/ci/archive_trace_service.rb35
-rw-r--r--app/workers/archive_trace_worker.rb2
-rw-r--r--app/workers/ci/archive_traces_cron_worker.rb14
-rw-r--r--spec/services/ci/archive_trace_service_spec.rb51
-rw-r--r--spec/workers/archive_trace_worker_spec.rb8
-rw-r--r--spec/workers/ci/archive_traces_cron_worker_spec.rb10
6 files changed, 102 insertions, 18 deletions
diff --git a/app/services/ci/archive_trace_service.rb b/app/services/ci/archive_trace_service.rb
new file mode 100644
index 00000000000..a1dd00721b5
--- /dev/null
+++ b/app/services/ci/archive_trace_service.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+module Ci
+ class ArchiveTraceService
+ def execute(job)
+ job.trace.archive!
+ rescue ::Gitlab::Ci::Trace::AlreadyArchivedError
+ # It's already archived, thus we can safely ignore this exception.
+ rescue => e
+ # Tracks this error with application logs, Sentry, and Prometheus.
+ # If `archive!` keeps failing for over a week, that could incur data loss.
+ # (See more https://docs.gitlab.com/ee/administration/job_traces.html#new-live-trace-architecture)
+ # In order to avoid interrupting the system, we do not raise an exception here.
+ archive_error(e, job)
+ end
+
+ private
+
+ def failed_archive_counter
+ @failed_archive_counter ||=
+ Gitlab::Metrics.counter(:job_trace_archive_failed_total,
+ "Counter of failed attempts of trace archiving")
+ end
+
+ def archive_error(error, job)
+ failed_archive_counter.increment
+ Rails.logger.error "Failed to archive trace. id: #{job.id} message: #{error.message}"
+
+ Gitlab::Sentry
+ .track_exception(error,
+ issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/51502',
+ extra: { job_id: job.id })
+ end
+ end
+end
diff --git a/app/workers/archive_trace_worker.rb b/app/workers/archive_trace_worker.rb
index c1283e9b2fc..4a9becf0ca7 100644
--- a/app/workers/archive_trace_worker.rb
+++ b/app/workers/archive_trace_worker.rb
@@ -7,7 +7,7 @@ class ArchiveTraceWorker
# rubocop: disable CodeReuse/ActiveRecord
def perform(job_id)
Ci::Build.without_archived_trace.find_by(id: job_id).try do |job|
- job.trace.archive!
+ Ci::ArchiveTraceService.new.execute(job)
end
end
# rubocop: enable CodeReuse/ActiveRecord
diff --git a/app/workers/ci/archive_traces_cron_worker.rb b/app/workers/ci/archive_traces_cron_worker.rb
index 7443aad1380..f65ff239866 100644
--- a/app/workers/ci/archive_traces_cron_worker.rb
+++ b/app/workers/ci/archive_traces_cron_worker.rb
@@ -11,21 +11,9 @@ module Ci
# This could happen when ArchiveTraceWorker sidekiq jobs were lost by receiving SIGKILL
# More details in https://gitlab.com/gitlab-org/gitlab-ce/issues/36791
Ci::Build.finished.with_live_trace.find_each(batch_size: 100) do |build|
- begin
- build.trace.archive!
- rescue ::Gitlab::Ci::Trace::AlreadyArchivedError
- rescue => e
- failed_archive_counter.increment
- Rails.logger.error "Failed to archive stale live trace. id: #{build.id} message: #{e.message}"
- end
+ Ci::ArchiveTraceService.new.execute(build)
end
end
# rubocop: enable CodeReuse/ActiveRecord
-
- private
-
- def failed_archive_counter
- @failed_archive_counter ||= Gitlab::Metrics.counter(:job_trace_archive_failed_total, "Counter of failed attempts of traces archiving")
- end
end
end
diff --git a/spec/services/ci/archive_trace_service_spec.rb b/spec/services/ci/archive_trace_service_spec.rb
new file mode 100644
index 00000000000..8e9cb65f3bc
--- /dev/null
+++ b/spec/services/ci/archive_trace_service_spec.rb
@@ -0,0 +1,51 @@
+require 'spec_helper'
+
+describe Ci::ArchiveTraceService, '#execute' do
+ subject { described_class.new.execute(job) }
+
+ context 'when job is finished' do
+ let(:job) { create(:ci_build, :success, :trace_live) }
+
+ it 'creates an archived trace' do
+ expect { subject }.not_to raise_error
+
+ expect(job.reload.job_artifacts_trace).to be_exist
+ end
+
+ context 'when trace is already archived' do
+ let!(:job) { create(:ci_build, :success, :trace_artifact) }
+
+ it 'ignores an exception' do
+ expect { subject }.not_to raise_error
+ end
+
+ it 'does not create an archived trace' do
+ expect { subject }.not_to change { Ci::JobArtifact.trace.count }
+ end
+ end
+ end
+
+ context 'when job is running' do
+ let(:job) { create(:ci_build, :running, :trace_live) }
+
+ it 'increments Prometheus counter, sends crash report to Sentry and ignore an error for continuing to archive' do
+ expect(Gitlab::Sentry)
+ .to receive(:track_exception)
+ .with(::Gitlab::Ci::Trace::ArchiveError,
+ issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/51502',
+ extra: { job_id: job.id } ).once
+
+ expect(Rails.logger)
+ .to receive(:error)
+ .with("Failed to archive trace. id: #{job.id} message: Job is not finished yet")
+ .and_call_original
+
+ expect(Gitlab::Metrics)
+ .to receive(:counter)
+ .with(:job_trace_archive_failed_total, "Counter of failed attempts of trace archiving")
+ .and_call_original
+
+ expect { subject }.not_to raise_error
+ end
+ end
+end
diff --git a/spec/workers/archive_trace_worker_spec.rb b/spec/workers/archive_trace_worker_spec.rb
index b768588c6e1..7244ad4f199 100644
--- a/spec/workers/archive_trace_worker_spec.rb
+++ b/spec/workers/archive_trace_worker_spec.rb
@@ -5,10 +5,11 @@ describe ArchiveTraceWorker do
subject { described_class.new.perform(job&.id) }
context 'when job is found' do
- let(:job) { create(:ci_build) }
+ let(:job) { create(:ci_build, :trace_live) }
it 'executes service' do
- expect_any_instance_of(Gitlab::Ci::Trace).to receive(:archive!)
+ expect_any_instance_of(Ci::ArchiveTraceService)
+ .to receive(:execute).with(job)
subject
end
@@ -18,7 +19,8 @@ describe ArchiveTraceWorker do
let(:job) { nil }
it 'does not execute service' do
- expect_any_instance_of(Gitlab::Ci::Trace).not_to receive(:archive!)
+ expect_any_instance_of(Ci::ArchiveTraceService)
+ .not_to receive(:execute)
subject
end
diff --git a/spec/workers/ci/archive_traces_cron_worker_spec.rb b/spec/workers/ci/archive_traces_cron_worker_spec.rb
index 23f5dda298a..478fb7d2c0f 100644
--- a/spec/workers/ci/archive_traces_cron_worker_spec.rb
+++ b/spec/workers/ci/archive_traces_cron_worker_spec.rb
@@ -30,6 +30,13 @@ describe Ci::ArchiveTracesCronWorker do
it_behaves_like 'archives trace'
+ it 'executes service' do
+ expect_any_instance_of(Ci::ArchiveTraceService)
+ .to receive(:execute).with(build)
+
+ subject
+ end
+
context 'when a trace had already been archived' do
let!(:build) { create(:ci_build, :success, :trace_live, :trace_artifact) }
let!(:build2) { create(:ci_build, :success, :trace_live) }
@@ -46,11 +53,12 @@ describe Ci::ArchiveTracesCronWorker do
let!(:build) { create(:ci_build, :success, :trace_live) }
before do
+ allow(Gitlab::Sentry).to receive(:track_exception)
allow_any_instance_of(Gitlab::Ci::Trace).to receive(:archive!).and_raise('Unexpected error')
end
it 'puts a log' do
- expect(Rails.logger).to receive(:error).with("Failed to archive stale live trace. id: #{build.id} message: Unexpected error")
+ expect(Rails.logger).to receive(:error).with("Failed to archive trace. id: #{build.id} message: Unexpected error")
subject
end