summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZeger-Jan van de Weg <git@zjvandeweg.nl>2018-09-07 11:16:34 +0200
committerZeger-Jan van de Weg <git@zjvandeweg.nl>2018-09-07 13:58:31 +0200
commit3aedccb17a5dbee40b5a08014c92cab8ea11e9fb (patch)
treeacd5edd14ea25f8a7bb7db251f1de1dbaa673065
parentc380d3acebd181f13629a25d2e2acca46ffe1e00 (diff)
downloadgitlab-ce-3aedccb17a5dbee40b5a08014c92cab8ea11e9fb.tar.gz
Port cleanup tasks to use Gitaly
Rake tasks cleaning up the Git storage were still using direct disk access, which won't work if these aren't attached. To mitigate a migration issue was created. To port gitlab:cleanup:dirs, and gitlab:cleanup:repos, a new RPC was required, ListDirectories. This was implemented in Gitaly, through https://gitlab.com/gitlab-org/gitaly/merge_requests/868. To be able to use the new RPC the Gitaly server was bumped to v0.120. This is an RPC that will not use feature gates, as this doesn't scale on .com so there is no way to test it at scale. Futhermore, we _know_ it doesn't scale, but this might be a useful task for smaller instances. Lastly, the tests are slightly updated to also work when the disk isn't attached. Eventhough this is not planned, it was very little effort and thus I applied the boy scout rule. Closes https://gitlab.com/gitlab-org/gitaly/issues/954 Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/40529
-rw-r--r--GITALY_SERVER_VERSION2
-rw-r--r--changelogs/unreleased/zj-cleanup-port-gitaly.yml5
-rw-r--r--lib/gitlab/gitaly_client/storage_service.rb8
-rw-r--r--lib/tasks/gitlab/cleanup.rake89
-rw-r--r--spec/tasks/gitlab/cleanup_rake_spec.rb35
5 files changed, 73 insertions, 66 deletions
diff --git a/GITALY_SERVER_VERSION b/GITALY_SERVER_VERSION
index f34340fc21c..99e0d1ed987 100644
--- a/GITALY_SERVER_VERSION
+++ b/GITALY_SERVER_VERSION
@@ -1 +1 @@
-0.119.0
+0.120.0
diff --git a/changelogs/unreleased/zj-cleanup-port-gitaly.yml b/changelogs/unreleased/zj-cleanup-port-gitaly.yml
new file mode 100644
index 00000000000..25e13b0fd50
--- /dev/null
+++ b/changelogs/unreleased/zj-cleanup-port-gitaly.yml
@@ -0,0 +1,5 @@
+---
+title: Administrative cleanup rake tasks now leverage Gitaly
+merge_request: 21588
+author:
+type: changed
diff --git a/lib/gitlab/gitaly_client/storage_service.rb b/lib/gitlab/gitaly_client/storage_service.rb
index eb0e910665b..3a26dd58ff4 100644
--- a/lib/gitlab/gitaly_client/storage_service.rb
+++ b/lib/gitlab/gitaly_client/storage_service.rb
@@ -5,6 +5,14 @@ module Gitlab
@storage = storage
end
+ # Returns all directories in the git storage directory, lexically ordered
+ def list_directories(depth: 1)
+ request = Gitaly::ListDirectoriesRequest.new(storage_name: @storage, depth: depth)
+
+ GitalyClient.call(@storage, :storage_service, :list_directories, request)
+ .flat_map(&:paths)
+ end
+
# Delete all repositories in the storage. This is a slow and VERY DESTRUCTIVE operation.
def delete_all_repositories
request = Gitaly::DeleteAllRepositoriesRequest.new(storage_name: @storage)
diff --git a/lib/tasks/gitlab/cleanup.rake b/lib/tasks/gitlab/cleanup.rake
index c8a8863443e..e8ae5dfa540 100644
--- a/lib/tasks/gitlab/cleanup.rake
+++ b/lib/tasks/gitlab/cleanup.rake
@@ -1,40 +1,29 @@
-# Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/954
-#
+# frozen_string_literal: true
+require 'set'
+
namespace :gitlab do
namespace :cleanup do
- HASHED_REPOSITORY_NAME = '@hashed'.freeze
-
desc "GitLab | Cleanup | Clean namespaces"
task dirs: :gitlab_environment do
- warn_user_is_not_gitlab
+ namespaces = Set.new(Namespace.pluck(:path))
+ namespaces << Storage::HashedProject::ROOT_PATH_PREFIX
- namespaces = Namespace.pluck(:path)
- namespaces << HASHED_REPOSITORY_NAME # add so that it will be ignored
- Gitlab.config.repositories.storages.each do |name, repository_storage|
- git_base_path = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path }
- all_dirs = Dir.glob(git_base_path + '/*')
+ Gitaly::Server.all.each do |server|
+ all_dirs = Gitlab::GitalyClient::StorageService
+ .new(server.storage)
+ .list_directories(depth: 0)
+ .reject { |dir| dir.ends_with?('.git') || namespaces.include?(File.basename(dir)) }
- puts git_base_path.color(:yellow)
puts "Looking for directories to remove... "
-
- all_dirs.reject! do |dir|
- # skip if git repo
- dir =~ /.git$/
- end
-
- all_dirs.reject! do |dir|
- dir_name = File.basename dir
-
- # skip if namespace present
- namespaces.include?(dir_name)
- end
-
all_dirs.each do |dir_path|
if remove?
- if FileUtils.rm_rf dir_path
- puts "Removed...#{dir_path}".color(:red)
- else
- puts "Cannot remove #{dir_path}".color(:red)
+ begin
+ Gitlab::GitalyClient::NamespaceService.new(server.storage)
+ .remove(dir_path)
+
+ puts "Removed...#{dir_path}"
+ rescue StandardError => e
+ puts "Cannot remove #{dir_path}: #{e.message}".color(:red)
end
else
puts "Can be removed: #{dir_path}".color(:red)
@@ -49,29 +38,29 @@ namespace :gitlab do
desc "GitLab | Cleanup | Clean repositories"
task repos: :gitlab_environment do
- warn_user_is_not_gitlab
-
move_suffix = "+orphaned+#{Time.now.to_i}"
- Gitlab.config.repositories.storages.each do |name, repository_storage|
- repo_root = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path }
-
- # Look for global repos (legacy, depth 1) and normal repos (depth 2)
- IO.popen(%W(find #{repo_root} -mindepth 1 -maxdepth 2 -name *.git)) do |find|
- find.each_line do |path|
- path.chomp!
- repo_with_namespace = path
- .sub(repo_root, '')
- .sub(%r{^/*}, '')
- .chomp('.git')
- .chomp('.wiki')
-
- # TODO ignoring hashed repositories for now. But revisit to fully support
- # possible orphaned hashed repos
- next if repo_with_namespace.start_with?("#{HASHED_REPOSITORY_NAME}/") || Project.find_by_full_path(repo_with_namespace)
-
- new_path = path + move_suffix
- puts path.inspect + ' -> ' + new_path.inspect
- File.rename(path, new_path)
+
+ Gitaly::Server.all.each do |server|
+ Gitlab::GitalyClient::StorageService
+ .new(server.storage)
+ .list_directories
+ .each do |path|
+ repo_with_namespace = path.chomp('.git').chomp('.wiki')
+
+ # TODO ignoring hashed repositories for now. But revisit to fully support
+ # possible orphaned hashed repos
+ next if repo_with_namespace.start_with?(Storage::HashedProject::ROOT_PATH_PREFIX)
+ next if Project.find_by_full_path(repo_with_namespace)
+
+ new_path = path + move_suffix
+ puts path.inspect + ' -> ' + new_path.inspect
+
+ begin
+ Gitlab::GitalyClient::NamespaceService
+ .new(server.storage)
+ .rename(path, new_path)
+ rescue StandardError => e
+ puts "Error occured while moving the repository: #{e.message}".color(:red)
end
end
end
diff --git a/spec/tasks/gitlab/cleanup_rake_spec.rb b/spec/tasks/gitlab/cleanup_rake_spec.rb
index cc2cca10f58..19794227d9f 100644
--- a/spec/tasks/gitlab/cleanup_rake_spec.rb
+++ b/spec/tasks/gitlab/cleanup_rake_spec.rb
@@ -6,6 +6,8 @@ describe 'gitlab:cleanup rake tasks' do
end
describe 'cleanup namespaces and repos' do
+ let(:gitlab_shell) { Gitlab::Shell.new }
+ let(:storage) { storages.keys.first }
let(:storages) do
{
'default' => Gitlab::GitalyClient::StorageSettings.new(@default_storage_hash.merge('path' => 'tmp/tests/default_storage'))
@@ -17,53 +19,56 @@ describe 'gitlab:cleanup rake tasks' do
end
before do
- FileUtils.mkdir(Settings.absolute('tmp/tests/default_storage'))
allow(Gitlab.config.repositories).to receive(:storages).and_return(storages)
end
after do
- FileUtils.rm_rf(Settings.absolute('tmp/tests/default_storage'))
+ Gitlab::GitalyClient::StorageService.new(storage).delete_all_repositories
end
describe 'cleanup:repos' do
before do
- FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/broken/project.git'))
- FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))
+ gitlab_shell.add_namespace(storage, 'broken/project.git')
+ gitlab_shell.add_namespace(storage, '@hashed/12/34/5678.git')
end
it 'moves it to an orphaned path' do
- run_rake_task('gitlab:cleanup:repos')
- repo_list = Dir['tmp/tests/default_storage/broken/*']
+ now = Time.now
+
+ Timecop.freeze(now) do
+ run_rake_task('gitlab:cleanup:repos')
+ repo_list = Gitlab::GitalyClient::StorageService.new(storage).list_directories(depth: 0)
- expect(repo_list.first).to include('+orphaned+')
+ expect(repo_list.last).to include("broken+orphaned+#{now.to_i}")
+ end
end
it 'ignores @hashed repos' do
run_rake_task('gitlab:cleanup:repos')
- expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))).to be_truthy
+ expect(gitlab_shell.exists?(storage, '@hashed/12/34/5678.git')).to be(true)
end
end
describe 'cleanup:dirs' do
it 'removes missing namespaces' do
- FileUtils.mkdir_p(Settings.absolute("tmp/tests/default_storage/namespace_1/project.git"))
- FileUtils.mkdir_p(Settings.absolute("tmp/tests/default_storage/namespace_2/project.git"))
- allow(Namespace).to receive(:pluck).and_return('namespace_1')
+ gitlab_shell.add_namespace(storage, "namespace_1/project.git")
+ gitlab_shell.add_namespace(storage, "namespace_2/project.git")
+ allow(Namespace).to receive(:pluck).and_return(['namespace_1'])
stub_env('REMOVE', 'true')
run_rake_task('gitlab:cleanup:dirs')
- expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/namespace_1'))).to be_truthy
- expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/namespace_2'))).to be_falsey
+ expect(gitlab_shell.exists?(storage, 'namespace_1')).to be(true)
+ expect(gitlab_shell.exists?(storage, 'namespace_2')).to be(false)
end
it 'ignores @hashed directory' do
- FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))
+ gitlab_shell.add_namespace(storage, '@hashed/12/34/5678.git')
run_rake_task('gitlab:cleanup:dirs')
- expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))).to be_truthy
+ expect(gitlab_shell.exists?(storage, '@hashed/12/34/5678.git')).to be(true)
end
end
end