From 3fbd48e127053517e9ee0f6307989758a4d59f9a Mon Sep 17 00:00:00 2001 From: Shinya Maeda Date: Thu, 29 Nov 2018 14:34:14 +0900 Subject: Squashed commit of the following: commit 10456b1e9240886432f565dd17689080bbb133b9 Merge: 312c1a9bdf8 a5f4627857b Author: Shinya Maeda Date: Thu Nov 29 14:33:21 2018 +0900 Merge branch 'master-ce' into add-counter-for-trace-chunks commit 312c1a9bdf8efc45c3fed5ff50f05cc589bbb4ed Author: Shinya Maeda Date: Wed Nov 28 20:06:18 2018 +0900 Fix coding offence commit e397cc2ccc1b2cf7f8b3558b8fa81fe2aa0ab366 Author: Shinya Maeda Date: Wed Nov 28 14:40:24 2018 +0900 Fix tracking archive failure --- app/services/ci/archive_trace_service.rb | 25 +++++++++++++++++++++++++ app/workers/archive_trace_worker.rb | 2 +- app/workers/ci/archive_traces_cron_worker.rb | 14 +------------- 3 files changed, 27 insertions(+), 14 deletions(-) create mode 100644 app/services/ci/archive_trace_service.rb (limited to 'app') diff --git a/app/services/ci/archive_trace_service.rb b/app/services/ci/archive_trace_service.rb new file mode 100644 index 00000000000..8504ceb2327 --- /dev/null +++ b/app/services/ci/archive_trace_service.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Ci + class ArchiveTraceService + def execute(job) + job.trace.archive! + rescue ::Gitlab::Ci::Trace::AlreadyArchivedError + # It's already archived, thus we can safely ignore this exception. + rescue => e + archive_error(e, job) + end + + private + + def failed_archive_counter + @failed_archive_counter ||= Gitlab::Metrics.counter(:job_trace_archive_failed_total, "Counter of failed attempts of trace archiving") + end + + def archive_error(error, job) + failed_archive_counter.increment + Gitlab::Sentry.track_exception(error, issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/51502', extra: { job_id: job.id }) + Rails.logger.error "Failed to archive trace. id: #{job.id} message: #{error.message}" + end + end +end diff --git a/app/workers/archive_trace_worker.rb b/app/workers/archive_trace_worker.rb index c1283e9b2fc..4a9becf0ca7 100644 --- a/app/workers/archive_trace_worker.rb +++ b/app/workers/archive_trace_worker.rb @@ -7,7 +7,7 @@ class ArchiveTraceWorker # rubocop: disable CodeReuse/ActiveRecord def perform(job_id) Ci::Build.without_archived_trace.find_by(id: job_id).try do |job| - job.trace.archive! + Ci::ArchiveTraceService.new.execute(job) end end # rubocop: enable CodeReuse/ActiveRecord diff --git a/app/workers/ci/archive_traces_cron_worker.rb b/app/workers/ci/archive_traces_cron_worker.rb index 7443aad1380..f65ff239866 100644 --- a/app/workers/ci/archive_traces_cron_worker.rb +++ b/app/workers/ci/archive_traces_cron_worker.rb @@ -11,21 +11,9 @@ module Ci # This could happen when ArchiveTraceWorker sidekiq jobs were lost by receiving SIGKILL # More details in https://gitlab.com/gitlab-org/gitlab-ce/issues/36791 Ci::Build.finished.with_live_trace.find_each(batch_size: 100) do |build| - begin - build.trace.archive! - rescue ::Gitlab::Ci::Trace::AlreadyArchivedError - rescue => e - failed_archive_counter.increment - Rails.logger.error "Failed to archive stale live trace. id: #{build.id} message: #{e.message}" - end + Ci::ArchiveTraceService.new.execute(build) end end # rubocop: enable CodeReuse/ActiveRecord - - private - - def failed_archive_counter - @failed_archive_counter ||= Gitlab::Metrics.counter(:job_trace_archive_failed_total, "Counter of failed attempts of traces archiving") - end end end -- cgit v1.2.1 From 137541c0414c0cf93170221d11e6a75ed9d89b19 Mon Sep 17 00:00:00 2001 From: Shinya Maeda Date: Thu, 29 Nov 2018 15:13:36 +0900 Subject: Improve comments --- app/services/ci/archive_trace_service.rb | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'app') diff --git a/app/services/ci/archive_trace_service.rb b/app/services/ci/archive_trace_service.rb index 8504ceb2327..a1dd00721b5 100644 --- a/app/services/ci/archive_trace_service.rb +++ b/app/services/ci/archive_trace_service.rb @@ -7,19 +7,29 @@ module Ci rescue ::Gitlab::Ci::Trace::AlreadyArchivedError # It's already archived, thus we can safely ignore this exception. rescue => e + # Tracks this error with application logs, Sentry, and Prometheus. + # If `archive!` keeps failing for over a week, that could incur data loss. + # (See more https://docs.gitlab.com/ee/administration/job_traces.html#new-live-trace-architecture) + # In order to avoid interrupting the system, we do not raise an exception here. archive_error(e, job) end private def failed_archive_counter - @failed_archive_counter ||= Gitlab::Metrics.counter(:job_trace_archive_failed_total, "Counter of failed attempts of trace archiving") + @failed_archive_counter ||= + Gitlab::Metrics.counter(:job_trace_archive_failed_total, + "Counter of failed attempts of trace archiving") end def archive_error(error, job) failed_archive_counter.increment - Gitlab::Sentry.track_exception(error, issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/51502', extra: { job_id: job.id }) Rails.logger.error "Failed to archive trace. id: #{job.id} message: #{error.message}" + + Gitlab::Sentry + .track_exception(error, + issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/51502', + extra: { job_id: job.id }) end end end -- cgit v1.2.1