diff options
author | Jarka Kadlecová <jarka@gitlab.com> | 2018-07-30 20:14:38 +0200 |
---|---|---|
committer | Jarka Kadlecová <jarka@gitlab.com> | 2018-08-07 12:51:58 +0200 |
commit | 6aaeb6dc411d6a92e9dc8d7968aa774c9e8ae044 (patch) | |
tree | f9877c182926456459693cd09c0aa63da21baa01 | |
parent | ab08f9986de070b8d6bc15c36115653bc3ef3000 (diff) | |
download | gitlab-ce-6aaeb6dc411d6a92e9dc8d7968aa774c9e8ae044.tar.gz |
Clean orphaned files in object storage
-rw-r--r-- | changelogs/unreleased/46535-orphaned-uploads.yml | 5 | ||||
-rw-r--r-- | doc/raketasks/cleanup.md | 31 | ||||
-rw-r--r-- | lib/gitlab/cleanup/remote_uploads.rb | 80 | ||||
-rw-r--r-- | lib/tasks/gitlab/cleanup.rake | 10 | ||||
-rw-r--r-- | spec/lib/gitlab/cleanup/remote_uploads_spec.rb | 74 |
5 files changed, 199 insertions, 1 deletions
diff --git a/changelogs/unreleased/46535-orphaned-uploads.yml b/changelogs/unreleased/46535-orphaned-uploads.yml new file mode 100644 index 00000000000..1cd087a6aad --- /dev/null +++ b/changelogs/unreleased/46535-orphaned-uploads.yml @@ -0,0 +1,5 @@ +--- +title: Clean orphaned files in object storage +merge_request: 20918 +author: +type: added diff --git a/doc/raketasks/cleanup.md b/doc/raketasks/cleanup.md index e2eb342361a..e70a009323e 100644 --- a/doc/raketasks/cleanup.md +++ b/doc/raketasks/cleanup.md @@ -52,4 +52,33 @@ D, [2018-07-27T12:08:33.293568 #89817] DEBUG -- : Processing batch of 500 projec I, [2018-07-27T12:08:33.689869 #89817] INFO -- : Did move to lost and found /opt/gitlab/embedded/service/gitlab-rails/public/uploads/test.out -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/-/project-lost-found/test.out I, [2018-07-27T12:08:33.755624 #89817] INFO -- : Did fix /opt/gitlab/embedded/service/gitlab-rails/public/uploads/foo/bar/89a0f7b0b97008a4a18cedccfdcd93fb/foo.txt -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/qux/foo/bar/89a0f7b0b97008a4a18cedccfdcd93fb/foo.txt I, [2018-07-27T12:08:33.760257 #89817] INFO -- : Did move to lost and found /opt/gitlab/embedded/service/gitlab-rails/public/uploads/foo/bar/1dd6f0f7eefd2acc4c2233f89a0f7b0b/image.png -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/-/project-lost-found/foo/bar/1dd6f0f7eefd2acc4c2233f89a0f7b0b/image.png -```
\ No newline at end of file +``` + +Remove object store upload files if they don't exist in GitLab database. + +``` +# omnibus-gitlab +sudo gitlab-rake gitlab:cleanup:remote_upload_files + +# installation from source +bundle exec rake gitlab:cleanup:remote_upload_files RAILS_ENV=production +``` + +Example output: + +``` +$ sudo gitlab-rake gitlab:cleanup:remote_upload_files + +I, [2018-08-02T10:26:13.995978 #45011] INFO -- : Looking for orphaned remote uploads to remove. Dry run... +I, [2018-08-02T10:26:14.120400 #45011] INFO -- : Can be moved to lost and found: @hashed/6b/DSC_6152.JPG +I, [2018-08-02T10:26:14.120482 #45011] INFO -- : Can be moved to lost and found: @hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg +I, [2018-08-02T10:26:14.120634 #45011] INFO -- : To cleanup these files run this command with DRY_RUN=false +``` + +``` +$ sudo gitlab-rake gitlab:cleanup:remote_upload_files DRY_RUN=false + +I, [2018-08-02T10:26:47.598424 #45087] INFO -- : Looking for orphaned remote uploads to remove... +I, [2018-08-02T10:26:47.753131 #45087] INFO -- : Moved to lost and found: @hashed/6b/DSC_6152.JPG -> lost_and_found/@hashed/6b/DSC_6152.JPG +I, [2018-08-02T10:26:47.764356 #45087] INFO -- : Moved to lost and found: @hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg -> lost_and_found/@hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg +``` diff --git a/lib/gitlab/cleanup/remote_uploads.rb b/lib/gitlab/cleanup/remote_uploads.rb new file mode 100644 index 00000000000..45a5aea4fcd --- /dev/null +++ b/lib/gitlab/cleanup/remote_uploads.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true +module Gitlab + module Cleanup + class RemoteUploads + attr_reader :logger + + BATCH_SIZE = 100 + + def initialize(logger: nil) + @logger = logger || Rails.logger + end + + def run!(dry_run: false) + unless configuration.enabled + logger.warn "Object storage not enabled. Exit".color(:yellow) + + return + end + + logger.info "Looking for orphaned remote uploads to remove#{'. Dry run' if dry_run}..." + + each_orphan_file do |file| + info = if dry_run + "Can be moved to lost and found: #{file.key}" + else + new_path = move_to_lost_and_found(file) + "Moved to lost and found: #{file.key} -> #{new_path}" + end + + logger.info(info) + end + end + + private + + def each_orphan_file + # we want to skip files already moved to lost_and_found directory + lost_dir_match = "^#{lost_and_found_dir}\/" + + remote_directory.files.each_slice(BATCH_SIZE) do |remote_files| + remote_files.reject! { |file| file.key.match(/#{lost_dir_match}/) } + file_paths = remote_files.map(&:key) + tracked_paths = Upload + .where(store: ObjectStorage::Store::REMOTE, path: file_paths) + .pluck(:path) + + remote_files.reject! { |file| tracked_paths.include?(file.key) } + remote_files.each do |file| + yield file + end + end + end + + def move_to_lost_and_found(file) + new_path = "#{lost_and_found_dir}/#{file.key}" + + file.copy(configuration['remote_directory'], new_path) + file.destroy + + new_path + end + + def lost_and_found_dir + 'lost_and_found' + end + + def remote_directory + connection.directories.get(configuration['remote_directory']) + end + + def connection + ::Fog::Storage.new(configuration['connection'].symbolize_keys) + end + + def configuration + Gitlab.config.uploads.object_store + end + end + end +end diff --git a/lib/tasks/gitlab/cleanup.rake b/lib/tasks/gitlab/cleanup.rake index a2feb074b1d..c8a8863443e 100644 --- a/lib/tasks/gitlab/cleanup.rake +++ b/lib/tasks/gitlab/cleanup.rake @@ -116,6 +116,16 @@ namespace :gitlab do end end + desc 'GitLab | Cleanup | Clean orphan remote upload files that do not exist in the db' + task remote_upload_files: :environment do + cleaner = Gitlab::Cleanup::RemoteUploads.new(logger: logger) + cleaner.run!(dry_run: dry_run?) + + if dry_run? + logger.info "To cleanup these files run this command with DRY_RUN=false".color(:yellow) + end + end + def remove? ENV['REMOVE'] == 'true' end diff --git a/spec/lib/gitlab/cleanup/remote_uploads_spec.rb b/spec/lib/gitlab/cleanup/remote_uploads_spec.rb new file mode 100644 index 00000000000..8d03baeb07b --- /dev/null +++ b/spec/lib/gitlab/cleanup/remote_uploads_spec.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true +require 'spec_helper' + +describe Gitlab::Cleanup::RemoteUploads do + context 'when object_storage is enabled' do + let(:connection) { double } + let(:directory) { double } + let!(:uploads) do + [ + create(:upload, path: 'dir/file1', store: ObjectStorage::Store::REMOTE), + create(:upload, path: 'dir/file2', store: ObjectStorage::Store::LOCAL) + ] + end + let(:remote_files) do + [ + double(key: 'dir/file1'), + double(key: 'dir/file2'), + double(key: 'dir/file3'), + double(key: 'lost_and_found/dir/file3') + ] + end + + before do + stub_uploads_object_storage(FileUploader) + + expect(::Fog::Storage).to receive(:new).and_return(connection) + + expect(connection).to receive(:directories).and_return(double(get: directory)) + expect(directory).to receive(:files).and_return(remote_files) + end + + context 'when dry_run is set to false' do + subject { described_class.new.run!(dry_run: false) } + + it 'moves files that are not in uploads table' do + expect(remote_files[0]).not_to receive(:copy) + expect(remote_files[0]).not_to receive(:destroy) + expect(remote_files[1]).to receive(:copy) + expect(remote_files[1]).to receive(:destroy) + expect(remote_files[2]).to receive(:copy) + expect(remote_files[2]).to receive(:destroy) + expect(remote_files[3]).not_to receive(:copy) + expect(remote_files[3]).not_to receive(:destroy) + + subject + end + end + + context 'when dry_run is set to true' do + subject { described_class.new.run!(dry_run: true) } + + it 'does not move filese' do + expect(remote_files[0]).not_to receive(:copy) + expect(remote_files[0]).not_to receive(:destroy) + expect(remote_files[1]).not_to receive(:copy) + expect(remote_files[1]).not_to receive(:destroy) + expect(remote_files[2]).not_to receive(:copy) + expect(remote_files[2]).not_to receive(:destroy) + expect(remote_files[3]).not_to receive(:copy) + expect(remote_files[3]).not_to receive(:destroy) + + subject + end + end + end + + context 'when object_storage is not enabled' do + it 'does not connect to any storage' do + expect(::Fog::Storage).not_to receive(:new) + + subject + end + end +end |