diff options
author | Zeger-Jan van de Weg <git@zjvandeweg.nl> | 2018-09-07 11:16:34 +0200 |
---|---|---|
committer | Zeger-Jan van de Weg <git@zjvandeweg.nl> | 2018-09-07 13:58:31 +0200 |
commit | 3aedccb17a5dbee40b5a08014c92cab8ea11e9fb (patch) | |
tree | acd5edd14ea25f8a7bb7db251f1de1dbaa673065 | |
parent | c380d3acebd181f13629a25d2e2acca46ffe1e00 (diff) | |
download | gitlab-ce-3aedccb17a5dbee40b5a08014c92cab8ea11e9fb.tar.gz |
Port cleanup tasks to use Gitaly
Rake tasks cleaning up the Git storage were still using direct disk
access, which won't work if these aren't attached. To mitigate a
migration issue was created.
To port gitlab:cleanup:dirs, and gitlab:cleanup:repos, a new RPC was
required, ListDirectories. This was implemented in Gitaly, through
https://gitlab.com/gitlab-org/gitaly/merge_requests/868.
To be able to use the new RPC the Gitaly server was bumped to v0.120.
This is an RPC that will not use feature gates, as this doesn't scale on
.com so there is no way to test it at scale. Futhermore, we _know_ it
doesn't scale, but this might be a useful task for smaller instances.
Lastly, the tests are slightly updated to also work when the disk isn't
attached. Eventhough this is not planned, it was very little effort and
thus I applied the boy scout rule.
Closes https://gitlab.com/gitlab-org/gitaly/issues/954
Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/40529
-rw-r--r-- | GITALY_SERVER_VERSION | 2 | ||||
-rw-r--r-- | changelogs/unreleased/zj-cleanup-port-gitaly.yml | 5 | ||||
-rw-r--r-- | lib/gitlab/gitaly_client/storage_service.rb | 8 | ||||
-rw-r--r-- | lib/tasks/gitlab/cleanup.rake | 89 | ||||
-rw-r--r-- | spec/tasks/gitlab/cleanup_rake_spec.rb | 35 |
5 files changed, 73 insertions, 66 deletions
diff --git a/GITALY_SERVER_VERSION b/GITALY_SERVER_VERSION index f34340fc21c..99e0d1ed987 100644 --- a/GITALY_SERVER_VERSION +++ b/GITALY_SERVER_VERSION @@ -1 +1 @@ -0.119.0 +0.120.0 diff --git a/changelogs/unreleased/zj-cleanup-port-gitaly.yml b/changelogs/unreleased/zj-cleanup-port-gitaly.yml new file mode 100644 index 00000000000..25e13b0fd50 --- /dev/null +++ b/changelogs/unreleased/zj-cleanup-port-gitaly.yml @@ -0,0 +1,5 @@ +--- +title: Administrative cleanup rake tasks now leverage Gitaly +merge_request: 21588 +author: +type: changed diff --git a/lib/gitlab/gitaly_client/storage_service.rb b/lib/gitlab/gitaly_client/storage_service.rb index eb0e910665b..3a26dd58ff4 100644 --- a/lib/gitlab/gitaly_client/storage_service.rb +++ b/lib/gitlab/gitaly_client/storage_service.rb @@ -5,6 +5,14 @@ module Gitlab @storage = storage end + # Returns all directories in the git storage directory, lexically ordered + def list_directories(depth: 1) + request = Gitaly::ListDirectoriesRequest.new(storage_name: @storage, depth: depth) + + GitalyClient.call(@storage, :storage_service, :list_directories, request) + .flat_map(&:paths) + end + # Delete all repositories in the storage. This is a slow and VERY DESTRUCTIVE operation. def delete_all_repositories request = Gitaly::DeleteAllRepositoriesRequest.new(storage_name: @storage) diff --git a/lib/tasks/gitlab/cleanup.rake b/lib/tasks/gitlab/cleanup.rake index c8a8863443e..e8ae5dfa540 100644 --- a/lib/tasks/gitlab/cleanup.rake +++ b/lib/tasks/gitlab/cleanup.rake @@ -1,40 +1,29 @@ -# Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/954 -# +# frozen_string_literal: true +require 'set' + namespace :gitlab do namespace :cleanup do - HASHED_REPOSITORY_NAME = '@hashed'.freeze - desc "GitLab | Cleanup | Clean namespaces" task dirs: :gitlab_environment do - warn_user_is_not_gitlab + namespaces = Set.new(Namespace.pluck(:path)) + namespaces << Storage::HashedProject::ROOT_PATH_PREFIX - namespaces = Namespace.pluck(:path) - namespaces << HASHED_REPOSITORY_NAME # add so that it will be ignored - Gitlab.config.repositories.storages.each do |name, repository_storage| - git_base_path = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path } - all_dirs = Dir.glob(git_base_path + '/*') + Gitaly::Server.all.each do |server| + all_dirs = Gitlab::GitalyClient::StorageService + .new(server.storage) + .list_directories(depth: 0) + .reject { |dir| dir.ends_with?('.git') || namespaces.include?(File.basename(dir)) } - puts git_base_path.color(:yellow) puts "Looking for directories to remove... " - - all_dirs.reject! do |dir| - # skip if git repo - dir =~ /.git$/ - end - - all_dirs.reject! do |dir| - dir_name = File.basename dir - - # skip if namespace present - namespaces.include?(dir_name) - end - all_dirs.each do |dir_path| if remove? - if FileUtils.rm_rf dir_path - puts "Removed...#{dir_path}".color(:red) - else - puts "Cannot remove #{dir_path}".color(:red) + begin + Gitlab::GitalyClient::NamespaceService.new(server.storage) + .remove(dir_path) + + puts "Removed...#{dir_path}" + rescue StandardError => e + puts "Cannot remove #{dir_path}: #{e.message}".color(:red) end else puts "Can be removed: #{dir_path}".color(:red) @@ -49,29 +38,29 @@ namespace :gitlab do desc "GitLab | Cleanup | Clean repositories" task repos: :gitlab_environment do - warn_user_is_not_gitlab - move_suffix = "+orphaned+#{Time.now.to_i}" - Gitlab.config.repositories.storages.each do |name, repository_storage| - repo_root = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path } - - # Look for global repos (legacy, depth 1) and normal repos (depth 2) - IO.popen(%W(find #{repo_root} -mindepth 1 -maxdepth 2 -name *.git)) do |find| - find.each_line do |path| - path.chomp! - repo_with_namespace = path - .sub(repo_root, '') - .sub(%r{^/*}, '') - .chomp('.git') - .chomp('.wiki') - - # TODO ignoring hashed repositories for now. But revisit to fully support - # possible orphaned hashed repos - next if repo_with_namespace.start_with?("#{HASHED_REPOSITORY_NAME}/") || Project.find_by_full_path(repo_with_namespace) - - new_path = path + move_suffix - puts path.inspect + ' -> ' + new_path.inspect - File.rename(path, new_path) + + Gitaly::Server.all.each do |server| + Gitlab::GitalyClient::StorageService + .new(server.storage) + .list_directories + .each do |path| + repo_with_namespace = path.chomp('.git').chomp('.wiki') + + # TODO ignoring hashed repositories for now. But revisit to fully support + # possible orphaned hashed repos + next if repo_with_namespace.start_with?(Storage::HashedProject::ROOT_PATH_PREFIX) + next if Project.find_by_full_path(repo_with_namespace) + + new_path = path + move_suffix + puts path.inspect + ' -> ' + new_path.inspect + + begin + Gitlab::GitalyClient::NamespaceService + .new(server.storage) + .rename(path, new_path) + rescue StandardError => e + puts "Error occured while moving the repository: #{e.message}".color(:red) end end end diff --git a/spec/tasks/gitlab/cleanup_rake_spec.rb b/spec/tasks/gitlab/cleanup_rake_spec.rb index cc2cca10f58..19794227d9f 100644 --- a/spec/tasks/gitlab/cleanup_rake_spec.rb +++ b/spec/tasks/gitlab/cleanup_rake_spec.rb @@ -6,6 +6,8 @@ describe 'gitlab:cleanup rake tasks' do end describe 'cleanup namespaces and repos' do + let(:gitlab_shell) { Gitlab::Shell.new } + let(:storage) { storages.keys.first } let(:storages) do { 'default' => Gitlab::GitalyClient::StorageSettings.new(@default_storage_hash.merge('path' => 'tmp/tests/default_storage')) @@ -17,53 +19,56 @@ describe 'gitlab:cleanup rake tasks' do end before do - FileUtils.mkdir(Settings.absolute('tmp/tests/default_storage')) allow(Gitlab.config.repositories).to receive(:storages).and_return(storages) end after do - FileUtils.rm_rf(Settings.absolute('tmp/tests/default_storage')) + Gitlab::GitalyClient::StorageService.new(storage).delete_all_repositories end describe 'cleanup:repos' do before do - FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/broken/project.git')) - FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git')) + gitlab_shell.add_namespace(storage, 'broken/project.git') + gitlab_shell.add_namespace(storage, '@hashed/12/34/5678.git') end it 'moves it to an orphaned path' do - run_rake_task('gitlab:cleanup:repos') - repo_list = Dir['tmp/tests/default_storage/broken/*'] + now = Time.now + + Timecop.freeze(now) do + run_rake_task('gitlab:cleanup:repos') + repo_list = Gitlab::GitalyClient::StorageService.new(storage).list_directories(depth: 0) - expect(repo_list.first).to include('+orphaned+') + expect(repo_list.last).to include("broken+orphaned+#{now.to_i}") + end end it 'ignores @hashed repos' do run_rake_task('gitlab:cleanup:repos') - expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))).to be_truthy + expect(gitlab_shell.exists?(storage, '@hashed/12/34/5678.git')).to be(true) end end describe 'cleanup:dirs' do it 'removes missing namespaces' do - FileUtils.mkdir_p(Settings.absolute("tmp/tests/default_storage/namespace_1/project.git")) - FileUtils.mkdir_p(Settings.absolute("tmp/tests/default_storage/namespace_2/project.git")) - allow(Namespace).to receive(:pluck).and_return('namespace_1') + gitlab_shell.add_namespace(storage, "namespace_1/project.git") + gitlab_shell.add_namespace(storage, "namespace_2/project.git") + allow(Namespace).to receive(:pluck).and_return(['namespace_1']) stub_env('REMOVE', 'true') run_rake_task('gitlab:cleanup:dirs') - expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/namespace_1'))).to be_truthy - expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/namespace_2'))).to be_falsey + expect(gitlab_shell.exists?(storage, 'namespace_1')).to be(true) + expect(gitlab_shell.exists?(storage, 'namespace_2')).to be(false) end it 'ignores @hashed directory' do - FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git')) + gitlab_shell.add_namespace(storage, '@hashed/12/34/5678.git') run_rake_task('gitlab:cleanup:dirs') - expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))).to be_truthy + expect(gitlab_shell.exists?(storage, '@hashed/12/34/5678.git')).to be(true) end end end |