From b5f5b6dc50b2963f3190caecf12f63d4ba2da878 Mon Sep 17 00:00:00 2001 From: Shinya Maeda Date: Mon, 26 Feb 2018 18:21:03 +0900 Subject: Add checksum to ci_job_artifacts --- app/models/ci/job_artifact.rb | 5 +++++ app/uploaders/job_artifact_uploader.rb | 10 ++++++++++ db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb | 8 ++++++++ db/schema.rb | 1 + 4 files changed, 24 insertions(+) create mode 100644 db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb diff --git a/app/models/ci/job_artifact.rb b/app/models/ci/job_artifact.rb index 0a599f72bc7..a06a4bc502e 100644 --- a/app/models/ci/job_artifact.rb +++ b/app/models/ci/job_artifact.rb @@ -6,6 +6,7 @@ module Ci belongs_to :job, class_name: "Ci::Build", foreign_key: :job_id before_save :set_size, if: :file_changed? + before_save :set_checksum, if: :file_changed? mount_uploader :file, JobArtifactUploader @@ -25,6 +26,10 @@ module Ci self.size = file.size end + def set_checksum + self.checksum = file.checksum + end + def expire_in expire_at - Time.now if expire_at end diff --git a/app/uploaders/job_artifact_uploader.rb b/app/uploaders/job_artifact_uploader.rb index ad5385f45a4..5560700f442 100644 --- a/app/uploaders/job_artifact_uploader.rb +++ b/app/uploaders/job_artifact_uploader.rb @@ -9,6 +9,12 @@ class JobArtifactUploader < GitlabUploader model.size end + def checksum + return calc_checksum if model.checksum.nil? + + model.checksum + end + def store_dir dynamic_segment end @@ -21,6 +27,10 @@ class JobArtifactUploader < GitlabUploader private + def calc_checksum + Digest::SHA256.file(file.path).hexdigest + end + def dynamic_segment creation_date = model.created_at.utc.strftime('%Y_%m_%d') diff --git a/db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb b/db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb new file mode 100644 index 00000000000..30973f6f5c5 --- /dev/null +++ b/db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb @@ -0,0 +1,8 @@ +class AddChecksumToCiJobArtifacts < ActiveRecord::Migration + DOWNTIME = false + + def change + add_column :ci_job_artifacts, :checksum, :string, limit: 64 + end +end + diff --git a/db/schema.rb b/db/schema.rb index 9e117440ed2..6a752593c96 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -346,6 +346,7 @@ ActiveRecord::Schema.define(version: 20180304204842) do t.datetime_with_timezone "updated_at", null: false t.datetime_with_timezone "expire_at" t.string "file" + t.string "checksum", limit: 64 end add_index "ci_job_artifacts", ["expire_at", "job_id"], name: "index_ci_job_artifacts_on_expire_at_and_job_id", using: :btree -- cgit v1.2.1 From c2954f38150fa5654fe63dd37f36de550e5f3679 Mon Sep 17 00:00:00 2001 From: Shinya Maeda Date: Wed, 28 Feb 2018 20:49:50 +0900 Subject: Change column type to binary from string --- db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb | 3 +-- db/schema.rb | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb b/db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb index 30973f6f5c5..dd1b9339b28 100644 --- a/db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb +++ b/db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb @@ -2,7 +2,6 @@ class AddChecksumToCiJobArtifacts < ActiveRecord::Migration DOWNTIME = false def change - add_column :ci_job_artifacts, :checksum, :string, limit: 64 + add_column :ci_job_artifacts, :checksum, :binary end end - diff --git a/db/schema.rb b/db/schema.rb index 6a752593c96..bfe95199e83 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -346,7 +346,7 @@ ActiveRecord::Schema.define(version: 20180304204842) do t.datetime_with_timezone "updated_at", null: false t.datetime_with_timezone "expire_at" t.string "file" - t.string "checksum", limit: 64 + t.binary "checksum" end add_index "ci_job_artifacts", ["expire_at", "job_id"], name: "index_ci_job_artifacts_on_expire_at_and_job_id", using: :btree -- cgit v1.2.1 From 4dddd5858af5b9770fad43b58185373153e0f653 Mon Sep 17 00:00:00 2001 From: Shinya Maeda Date: Wed, 28 Feb 2018 20:50:48 +0900 Subject: Import use_file method from EE and use it for calculation of checksum --- app/uploaders/gitlab_uploader.rb | 8 ++++++++ app/uploaders/job_artifact_uploader.rb | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/app/uploaders/gitlab_uploader.rb b/app/uploaders/gitlab_uploader.rb index 010100f2da1..9f693269808 100644 --- a/app/uploaders/gitlab_uploader.rb +++ b/app/uploaders/gitlab_uploader.rb @@ -71,6 +71,14 @@ class GitlabUploader < CarrierWave::Uploader::Base !!model end + ## + # ObjectStorage::Concern extends this method for remote files + def use_file + if file_storage? + return yield path + end + end + private # Designed to be overridden by child uploaders that have a dynamic path diff --git a/app/uploaders/job_artifact_uploader.rb b/app/uploaders/job_artifact_uploader.rb index 5560700f442..f994b89dd00 100644 --- a/app/uploaders/job_artifact_uploader.rb +++ b/app/uploaders/job_artifact_uploader.rb @@ -28,7 +28,9 @@ class JobArtifactUploader < GitlabUploader private def calc_checksum - Digest::SHA256.file(file.path).hexdigest + use_file do |file_path| + return Digest::SHA256.file(file_path).hexdigest + end end def dynamic_segment -- cgit v1.2.1 From 22a7f1f9ad4504c51657b1957b01a7646eee78cd Mon Sep 17 00:00:00 2001 From: Shinya Maeda Date: Wed, 28 Feb 2018 18:43:35 +0900 Subject: Add ObjectStorageQueue concern and test --- app/workers/concerns/object_storage_queue.rb | 8 ++++++++ spec/workers/concerns/object_storage_queue_spec.rb | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 app/workers/concerns/object_storage_queue.rb create mode 100644 spec/workers/concerns/object_storage_queue_spec.rb diff --git a/app/workers/concerns/object_storage_queue.rb b/app/workers/concerns/object_storage_queue.rb new file mode 100644 index 00000000000..a80f473a6d4 --- /dev/null +++ b/app/workers/concerns/object_storage_queue.rb @@ -0,0 +1,8 @@ +# Concern for setting Sidekiq settings for the various GitLab ObjectStorage workers. +module ObjectStorageQueue + extend ActiveSupport::Concern + + included do + queue_namespace :object_storage + end +end diff --git a/spec/workers/concerns/object_storage_queue_spec.rb b/spec/workers/concerns/object_storage_queue_spec.rb new file mode 100644 index 00000000000..f725fc9a4b1 --- /dev/null +++ b/spec/workers/concerns/object_storage_queue_spec.rb @@ -0,0 +1,19 @@ +require 'spec_helper' + +describe ObjectStorageQueue do + let(:worker) do + Class.new do + def self.name + 'DummyWorker' + end + + include ApplicationWorker + include ObjectStorageQueue + end + end + + it 'sets a default object storage queue automatically' do + expect(worker.sidekiq_options['queue']) + .to eq 'object_storage:dummy' + end +end -- cgit v1.2.1 From 99b0542cb0be67fd9af2de74133efb8911519947 Mon Sep 17 00:00:00 2001 From: Shinya Maeda Date: Wed, 28 Feb 2018 21:11:38 +0900 Subject: Add post migration for checksum calculation --- app/workers/all_queues.yml | 2 ++ app/workers/update_artifact_checksum_worker.rb | 11 ++++++++++ ...8121020_update_checksum_for_ci_job_artifacts.rb | 25 ++++++++++++++++++++++ 3 files changed, 38 insertions(+) create mode 100644 app/workers/update_artifact_checksum_worker.rb create mode 100644 db/post_migrate/20180228121020_update_checksum_for_ci_job_artifacts.rb diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 328db19be29..e47a8ca3985 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -104,3 +104,5 @@ - update_user_activity - upload_checksum - web_hook + +- object_storage:update_artifact_checksum_worker \ No newline at end of file diff --git a/app/workers/update_artifact_checksum_worker.rb b/app/workers/update_artifact_checksum_worker.rb new file mode 100644 index 00000000000..a83fad04b95 --- /dev/null +++ b/app/workers/update_artifact_checksum_worker.rb @@ -0,0 +1,11 @@ +class UpdateArtifactChecksumWorker + include ApplicationWorker + include ObjectStorageQueue + + def perform(job_artifact_id) + Ci::JobArtifact.find_by(id: job_artifact_id).try do |job_artifact| + job_artifact.set_checksum + job_artifact.save! + end + end +end diff --git a/db/post_migrate/20180228121020_update_checksum_for_ci_job_artifacts.rb b/db/post_migrate/20180228121020_update_checksum_for_ci_job_artifacts.rb new file mode 100644 index 00000000000..bf69c647b4d --- /dev/null +++ b/db/post_migrate/20180228121020_update_checksum_for_ci_job_artifacts.rb @@ -0,0 +1,25 @@ +class UpdateChecksumForCiJobArtifacts < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + BATCH_SIZE = 2500 + + class JobArtifact < ActiveRecord::Base + include EachBatch + self.table_name = 'ci_job_artifacts' + end + + def up + UpdateChecksumForCiJobArtifacts::JobArtifact + .where('checksum IS NULL') + .each_batch(of: BATCH_SIZE) do |relation| + ids = relation.pluck(:id).map { |id| [id] } + + UpdateArtifactChecksumWorker.bulk_perform_async(ids) + end + end + + def down + # no-op + end +end -- cgit v1.2.1 From f00cec607f6ffc99c6170e66c6ecfa99c9e15b75 Mon Sep 17 00:00:00 2001 From: Shinya Maeda Date: Thu, 1 Mar 2018 18:27:57 +0900 Subject: Revert logic of calculating checksum --- app/models/ci/job_artifact.rb | 5 ----- app/uploaders/gitlab_uploader.rb | 8 ------- app/uploaders/job_artifact_uploader.rb | 12 ----------- app/workers/all_queues.yml | 2 -- app/workers/concerns/object_storage_queue.rb | 8 ------- app/workers/update_artifact_checksum_worker.rb | 11 ---------- ...8121020_update_checksum_for_ci_job_artifacts.rb | 25 ---------------------- spec/workers/concerns/object_storage_queue_spec.rb | 19 ---------------- 8 files changed, 90 deletions(-) delete mode 100644 app/workers/concerns/object_storage_queue.rb delete mode 100644 app/workers/update_artifact_checksum_worker.rb delete mode 100644 db/post_migrate/20180228121020_update_checksum_for_ci_job_artifacts.rb delete mode 100644 spec/workers/concerns/object_storage_queue_spec.rb diff --git a/app/models/ci/job_artifact.rb b/app/models/ci/job_artifact.rb index a06a4bc502e..0a599f72bc7 100644 --- a/app/models/ci/job_artifact.rb +++ b/app/models/ci/job_artifact.rb @@ -6,7 +6,6 @@ module Ci belongs_to :job, class_name: "Ci::Build", foreign_key: :job_id before_save :set_size, if: :file_changed? - before_save :set_checksum, if: :file_changed? mount_uploader :file, JobArtifactUploader @@ -26,10 +25,6 @@ module Ci self.size = file.size end - def set_checksum - self.checksum = file.checksum - end - def expire_in expire_at - Time.now if expire_at end diff --git a/app/uploaders/gitlab_uploader.rb b/app/uploaders/gitlab_uploader.rb index 9f693269808..010100f2da1 100644 --- a/app/uploaders/gitlab_uploader.rb +++ b/app/uploaders/gitlab_uploader.rb @@ -71,14 +71,6 @@ class GitlabUploader < CarrierWave::Uploader::Base !!model end - ## - # ObjectStorage::Concern extends this method for remote files - def use_file - if file_storage? - return yield path - end - end - private # Designed to be overridden by child uploaders that have a dynamic path diff --git a/app/uploaders/job_artifact_uploader.rb b/app/uploaders/job_artifact_uploader.rb index f994b89dd00..ad5385f45a4 100644 --- a/app/uploaders/job_artifact_uploader.rb +++ b/app/uploaders/job_artifact_uploader.rb @@ -9,12 +9,6 @@ class JobArtifactUploader < GitlabUploader model.size end - def checksum - return calc_checksum if model.checksum.nil? - - model.checksum - end - def store_dir dynamic_segment end @@ -27,12 +21,6 @@ class JobArtifactUploader < GitlabUploader private - def calc_checksum - use_file do |file_path| - return Digest::SHA256.file(file_path).hexdigest - end - end - def dynamic_segment creation_date = model.created_at.utc.strftime('%Y_%m_%d') diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index e47a8ca3985..328db19be29 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -104,5 +104,3 @@ - update_user_activity - upload_checksum - web_hook - -- object_storage:update_artifact_checksum_worker \ No newline at end of file diff --git a/app/workers/concerns/object_storage_queue.rb b/app/workers/concerns/object_storage_queue.rb deleted file mode 100644 index a80f473a6d4..00000000000 --- a/app/workers/concerns/object_storage_queue.rb +++ /dev/null @@ -1,8 +0,0 @@ -# Concern for setting Sidekiq settings for the various GitLab ObjectStorage workers. -module ObjectStorageQueue - extend ActiveSupport::Concern - - included do - queue_namespace :object_storage - end -end diff --git a/app/workers/update_artifact_checksum_worker.rb b/app/workers/update_artifact_checksum_worker.rb deleted file mode 100644 index a83fad04b95..00000000000 --- a/app/workers/update_artifact_checksum_worker.rb +++ /dev/null @@ -1,11 +0,0 @@ -class UpdateArtifactChecksumWorker - include ApplicationWorker - include ObjectStorageQueue - - def perform(job_artifact_id) - Ci::JobArtifact.find_by(id: job_artifact_id).try do |job_artifact| - job_artifact.set_checksum - job_artifact.save! - end - end -end diff --git a/db/post_migrate/20180228121020_update_checksum_for_ci_job_artifacts.rb b/db/post_migrate/20180228121020_update_checksum_for_ci_job_artifacts.rb deleted file mode 100644 index bf69c647b4d..00000000000 --- a/db/post_migrate/20180228121020_update_checksum_for_ci_job_artifacts.rb +++ /dev/null @@ -1,25 +0,0 @@ -class UpdateChecksumForCiJobArtifacts < ActiveRecord::Migration - include Gitlab::Database::MigrationHelpers - - DOWNTIME = false - BATCH_SIZE = 2500 - - class JobArtifact < ActiveRecord::Base - include EachBatch - self.table_name = 'ci_job_artifacts' - end - - def up - UpdateChecksumForCiJobArtifacts::JobArtifact - .where('checksum IS NULL') - .each_batch(of: BATCH_SIZE) do |relation| - ids = relation.pluck(:id).map { |id| [id] } - - UpdateArtifactChecksumWorker.bulk_perform_async(ids) - end - end - - def down - # no-op - end -end diff --git a/spec/workers/concerns/object_storage_queue_spec.rb b/spec/workers/concerns/object_storage_queue_spec.rb deleted file mode 100644 index f725fc9a4b1..00000000000 --- a/spec/workers/concerns/object_storage_queue_spec.rb +++ /dev/null @@ -1,19 +0,0 @@ -require 'spec_helper' - -describe ObjectStorageQueue do - let(:worker) do - Class.new do - def self.name - 'DummyWorker' - end - - include ApplicationWorker - include ObjectStorageQueue - end - end - - it 'sets a default object storage queue automatically' do - expect(worker.sidekiq_options['queue']) - .to eq 'object_storage:dummy' - end -end -- cgit v1.2.1 From a1c612ce2dc2b42137664f73c3a70f3a283bcc0a Mon Sep 17 00:00:00 2001 From: Shinya Maeda Date: Thu, 1 Mar 2018 22:10:44 +0900 Subject: Add checksum at runner grape api --- lib/api/runner.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/api/runner.rb b/lib/api/runner.rb index 91cdc564002..8d2bcc73bb3 100644 --- a/lib/api/runner.rb +++ b/lib/api/runner.rb @@ -204,6 +204,7 @@ module API optional 'file.path', type: String, desc: %q(path to locally stored body (generated by Workhorse)) optional 'file.name', type: String, desc: %q(real filename as send in Content-Disposition (generated by Workhorse)) optional 'file.type', type: String, desc: %q(real content type as send in Content-Type (generated by Workhorse)) + optional 'file.sha256', type: String, desc: %q(checksum of the file) optional 'metadata.path', type: String, desc: %q(path to locally stored body (generated by Workhorse)) optional 'metadata.name', type: String, desc: %q(filename (generated by Workhorse)) end @@ -224,7 +225,7 @@ module API expire_in = params['expire_in'] || Gitlab::CurrentSettings.current_application_settings.default_artifacts_expire_in - job.build_job_artifacts_archive(project: job.project, file_type: :archive, file: artifacts, expire_in: expire_in) + job.build_job_artifacts_archive(project: job.project, file_type: :archive, file: artifacts, checksum: params['file.sha256'], expire_in: expire_in) job.build_job_artifacts_metadata(project: job.project, file_type: :metadata, file: metadata, expire_in: expire_in) if metadata job.artifacts_expire_in = expire_in -- cgit v1.2.1 From 03438886e1de94df6bd89471e67aa6347fd5fb2e Mon Sep 17 00:00:00 2001 From: Shinya Maeda Date: Tue, 6 Mar 2018 17:16:33 +0900 Subject: Change column to file_sha256. Add test. Add changelog --- changelogs/unreleased/feature-sm-add-check-sum-to-job-artifacts.yml | 5 +++++ db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb | 2 +- db/schema.rb | 2 +- lib/api/runner.rb | 4 ++-- spec/requests/api/runner_spec.rb | 4 ++++ 5 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 changelogs/unreleased/feature-sm-add-check-sum-to-job-artifacts.yml diff --git a/changelogs/unreleased/feature-sm-add-check-sum-to-job-artifacts.yml b/changelogs/unreleased/feature-sm-add-check-sum-to-job-artifacts.yml new file mode 100644 index 00000000000..23a870d6e9f --- /dev/null +++ b/changelogs/unreleased/feature-sm-add-check-sum-to-job-artifacts.yml @@ -0,0 +1,5 @@ +--- +title: Store sha256 checksum to job artifacts +merge_request: 17354 +author: +type: performance diff --git a/db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb b/db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb index dd1b9339b28..54e6e35449e 100644 --- a/db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb +++ b/db/migrate/20180226050030_add_checksum_to_ci_job_artifacts.rb @@ -2,6 +2,6 @@ class AddChecksumToCiJobArtifacts < ActiveRecord::Migration DOWNTIME = false def change - add_column :ci_job_artifacts, :checksum, :binary + add_column :ci_job_artifacts, :file_sha256, :binary end end diff --git a/db/schema.rb b/db/schema.rb index bfe95199e83..eb92cf030ee 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -346,7 +346,7 @@ ActiveRecord::Schema.define(version: 20180304204842) do t.datetime_with_timezone "updated_at", null: false t.datetime_with_timezone "expire_at" t.string "file" - t.binary "checksum" + t.binary "file_sha256" end add_index "ci_job_artifacts", ["expire_at", "job_id"], name: "index_ci_job_artifacts_on_expire_at_and_job_id", using: :btree diff --git a/lib/api/runner.rb b/lib/api/runner.rb index 8d2bcc73bb3..7e6c33ec33d 100644 --- a/lib/api/runner.rb +++ b/lib/api/runner.rb @@ -204,7 +204,7 @@ module API optional 'file.path', type: String, desc: %q(path to locally stored body (generated by Workhorse)) optional 'file.name', type: String, desc: %q(real filename as send in Content-Disposition (generated by Workhorse)) optional 'file.type', type: String, desc: %q(real content type as send in Content-Type (generated by Workhorse)) - optional 'file.sha256', type: String, desc: %q(checksum of the file) + optional 'file.sha256', type: String, desc: %q(sha256 checksum of the file) optional 'metadata.path', type: String, desc: %q(path to locally stored body (generated by Workhorse)) optional 'metadata.name', type: String, desc: %q(filename (generated by Workhorse)) end @@ -225,7 +225,7 @@ module API expire_in = params['expire_in'] || Gitlab::CurrentSettings.current_application_settings.default_artifacts_expire_in - job.build_job_artifacts_archive(project: job.project, file_type: :archive, file: artifacts, checksum: params['file.sha256'], expire_in: expire_in) + job.build_job_artifacts_archive(project: job.project, file_type: :archive, file: artifacts, file_sha256: params['file.sha256'], expire_in: expire_in) job.build_job_artifacts_metadata(project: job.project, file_type: :metadata, file: metadata, expire_in: expire_in) if metadata job.artifacts_expire_in = expire_in diff --git a/spec/requests/api/runner_spec.rb b/spec/requests/api/runner_spec.rb index 72cafac3f90..ce1311ac97c 100644 --- a/spec/requests/api/runner_spec.rb +++ b/spec/requests/api/runner_spec.rb @@ -1100,11 +1100,13 @@ describe API::Runner do context 'posts artifacts file and metadata file' do let!(:artifacts) { file_upload } + let!(:artifacts_sha256) { Digest::SHA256.file(artifacts.path).hexdigest } let!(:metadata) { file_upload2 } let(:stored_artifacts_file) { job.reload.artifacts_file.file } let(:stored_metadata_file) { job.reload.artifacts_metadata.file } let(:stored_artifacts_size) { job.reload.artifacts_size } + let(:stored_artifacts_sha256) { job.reload.job_artifacts_archive.file_sha256 } before do post(api("/jobs/#{job.id}/artifacts"), post_data, headers_with_token) @@ -1114,6 +1116,7 @@ describe API::Runner do let(:post_data) do { 'file.path' => artifacts.path, 'file.name' => artifacts.original_filename, + 'file.sha256' => artifacts_sha256, 'metadata.path' => metadata.path, 'metadata.name' => metadata.original_filename } end @@ -1123,6 +1126,7 @@ describe API::Runner do expect(stored_artifacts_file.original_filename).to eq(artifacts.original_filename) expect(stored_metadata_file.original_filename).to eq(metadata.original_filename) expect(stored_artifacts_size).to eq(72821) + expect(stored_artifacts_sha256).to eq(artifacts_sha256) end end -- cgit v1.2.1