diff options
author | Douwe Maan <douwe@gitlab.com> | 2016-07-21 21:44:53 +0000 |
---|---|---|
committer | Douwe Maan <douwe@gitlab.com> | 2016-07-21 21:44:53 +0000 |
commit | d2598f6273d4a714134c26ee520b99a40579e8fa (patch) | |
tree | 49a147aa44b3df4b664c1aecdcbcbb52b88130e9 | |
parent | 95c4825a456a4d1df8dba1def8735203368356c9 (diff) | |
parent | 3b2c17a9a203aa8e82a3367a77d28eacaa5a0ab7 (diff) | |
download | gitlab-ce-d2598f6273d4a714134c26ee520b99a40579e8fa.tar.gz |
Merge branch 'fix-data-integrity-issue-with-repository-downloads-path' into 'master'
Avoid data-integrity issue when cleaning up repository archive cache
## What does this MR do?
Sets the default value for `repository_downloads_path` if someone has it configured incorrectly, and it points to the path where repositories are stored. It's also replace invocation of `find` with Ruby code that matches old cached files in a better, and safe way to avoid data-integrity issues.
## Why was this MR needed?
The `repository_downloads_path` is used by the `RepositoryArchiveCacheWorker` to remove outdated repository archives, if it points to the wrong directory can cause some data-integrity issue.
## What are the relevant issue numbers?
Closes #14222
See merge request !5285
-rw-r--r-- | CHANGELOG | 1 | ||||
-rw-r--r-- | app/models/repository.rb | 10 | ||||
-rw-r--r-- | app/services/repository_archive_clean_up_service.rb | 33 | ||||
-rw-r--r-- | app/workers/repository_archive_cache_worker.rb | 2 | ||||
-rw-r--r-- | config/gitlab.yml.example | 4 | ||||
-rw-r--r-- | config/initializers/1_settings.rb | 16 | ||||
-rw-r--r-- | spec/models/repository_spec.rb | 24 | ||||
-rw-r--r-- | spec/services/repository_archive_clean_up_service_spec.rb | 81 |
8 files changed, 133 insertions, 38 deletions
diff --git a/CHANGELOG b/CHANGELOG index 8fe2caac7bd..938e3a496a6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -22,6 +22,7 @@ v 8.10.0 (unreleased) - Delete award emoji when deleting a user - Remove pinTo from Flash and make inline flash messages look nicer. !4854 (winniehell) - Add an API for downloading latest successful build from a particular branch or tag. !5347 + - Avoid data-integrity issue when cleaning up repository archive cache. - Add link to profile to commit avatar. !5163 (winniehell) - Wrap code blocks on Activies and Todos page. !4783 (winniehell) - Align flash messages with left side of page content. !4959 (winniehell) diff --git a/app/models/repository.rb b/app/models/repository.rb index 511df2d67c6..a6580e85498 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -11,16 +11,6 @@ class Repository attr_accessor :path_with_namespace, :project - def self.clean_old_archives - Gitlab::Metrics.measure(:clean_old_archives) do - repository_downloads_path = Gitlab.config.gitlab.repository_downloads_path - - return unless File.directory?(repository_downloads_path) - - Gitlab::Popen.popen(%W(find #{repository_downloads_path} -not -path #{repository_downloads_path} -mmin +120 -delete)) - end - end - def initialize(path_with_namespace, project) @path_with_namespace = path_with_namespace @project = project diff --git a/app/services/repository_archive_clean_up_service.rb b/app/services/repository_archive_clean_up_service.rb new file mode 100644 index 00000000000..0b56b09738d --- /dev/null +++ b/app/services/repository_archive_clean_up_service.rb @@ -0,0 +1,33 @@ +class RepositoryArchiveCleanUpService + LAST_MODIFIED_TIME_IN_MINUTES = 120 + + def initialize(mmin = LAST_MODIFIED_TIME_IN_MINUTES) + @mmin = mmin + @path = Gitlab.config.gitlab.repository_downloads_path + end + + def execute + Gitlab::Metrics.measure(:repository_archive_clean_up) do + return unless File.directory?(path) + + clean_up_old_archives + clean_up_empty_directories + end + end + + private + + attr_reader :mmin, :path + + def clean_up_old_archives + run(%W(find #{path} -not -path #{path} -type f \( -name \*.tar -o -name \*.bz2 -o -name \*.tar.gz -o -name \*.zip \) -maxdepth 2 -mmin +#{mmin} -delete)) + end + + def clean_up_empty_directories + run(%W(find #{path} -not -path #{path} -type d -empty -name \*.git -maxdepth 1 -delete)) + end + + def run(cmd) + Gitlab::Popen.popen(cmd) + end +end diff --git a/app/workers/repository_archive_cache_worker.rb b/app/workers/repository_archive_cache_worker.rb index 47c5a670ed4..a2e49c61f59 100644 --- a/app/workers/repository_archive_cache_worker.rb +++ b/app/workers/repository_archive_cache_worker.rb @@ -4,6 +4,6 @@ class RepositoryArchiveCacheWorker sidekiq_options queue: :default def perform - Repository.clean_old_archives + RepositoryArchiveCleanUpService.new.execute end end diff --git a/config/gitlab.yml.example b/config/gitlab.yml.example index 325eca72862..1470a6e2550 100644 --- a/config/gitlab.yml.example +++ b/config/gitlab.yml.example @@ -106,8 +106,8 @@ production: &base ## Repository downloads directory # When a user clicks e.g. 'Download zip' on a project, a temporary zip file is created in the following directory. - # The default is 'tmp/repositories' relative to the root of the Rails app. - # repository_downloads_path: tmp/repositories + # The default is 'shared/cache/archive/' relative to the root of the Rails app. + # repository_downloads_path: shared/cache/archive/ ## Reply by email # Allow users to comment on issues and merge requests by replying to notification emails. diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 693507e0bec..86f55210487 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -211,7 +211,6 @@ Settings.gitlab.default_projects_features['snippets'] = false if Setti Settings.gitlab.default_projects_features['builds'] = true if Settings.gitlab.default_projects_features['builds'].nil? Settings.gitlab.default_projects_features['container_registry'] = true if Settings.gitlab.default_projects_features['container_registry'].nil? Settings.gitlab.default_projects_features['visibility_level'] = Settings.send(:verify_constant, Gitlab::VisibilityLevel, Settings.gitlab.default_projects_features['visibility_level'], Gitlab::VisibilityLevel::PRIVATE) -Settings.gitlab['repository_downloads_path'] = File.join(Settings.shared['path'], 'cache/archive') if Settings.gitlab['repository_downloads_path'].nil? Settings.gitlab['domain_whitelist'] ||= [] Settings.gitlab['import_sources'] ||= %w[github bitbucket gitlab gitorious google_code fogbugz git gitlab_project] Settings.gitlab['trusted_proxies'] ||= [] @@ -316,6 +315,21 @@ Settings.repositories['storages'] ||= {} Settings.repositories.storages['default'] ||= Settings.gitlab_shell['repos_path'] || Settings.gitlab['user_home'] + '/repositories/' # +# The repository_downloads_path is used to remove outdated repository +# archives, if someone has it configured incorrectly, and it points +# to the path where repositories are stored this can cause some +# data-integrity issue. In this case, we sets it to the default +# repository_downloads_path value. +# +repositories_storages_path = Settings.repositories.storages.values +repository_downloads_path = Settings.gitlab['repository_downloads_path'].to_s.gsub(/\/$/, '') +repository_downloads_full_path = File.expand_path(repository_downloads_path, Settings.gitlab['user_home']) + +if repository_downloads_path.blank? || repositories_storages_path.any? { |path| [repository_downloads_path, repository_downloads_full_path].include?(path.gsub(/\/$/, '')) } + Settings.gitlab['repository_downloads_path'] = File.join(Settings.shared['path'], 'cache/archive') +end + +# # Backup # Settings['backup'] ||= Settingslogic.new({}) diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb index 59c5732c075..38b0c345b48 100644 --- a/spec/models/repository_spec.rb +++ b/spec/models/repository_spec.rb @@ -1172,30 +1172,6 @@ describe Repository, models: true do end end - describe '.clean_old_archives' do - let(:path) { Gitlab.config.gitlab.repository_downloads_path } - - context 'when the downloads directory does not exist' do - it 'does not remove any archives' do - expect(File).to receive(:directory?).with(path).and_return(false) - - expect(Gitlab::Popen).not_to receive(:popen) - - described_class.clean_old_archives - end - end - - context 'when the downloads directory exists' do - it 'removes old archives' do - expect(File).to receive(:directory?).with(path).and_return(true) - - expect(Gitlab::Popen).to receive(:popen) - - described_class.clean_old_archives - end - end - end - describe "#keep_around" do it "stores a reference to the specified commit sha so it isn't garbage collected" do repository.keep_around(sample_commit.id) diff --git a/spec/services/repository_archive_clean_up_service_spec.rb b/spec/services/repository_archive_clean_up_service_spec.rb new file mode 100644 index 00000000000..842585f9e54 --- /dev/null +++ b/spec/services/repository_archive_clean_up_service_spec.rb @@ -0,0 +1,81 @@ +require 'spec_helper' + +describe RepositoryArchiveCleanUpService, services: true do + describe '#execute' do + subject(:service) { described_class.new } + + context 'when the downloads directory does not exist' do + it 'does not remove any archives' do + path = '/invalid/path/' + stub_repository_downloads_path(path) + + expect(File).to receive(:directory?).with(path).and_return(false) + expect(service).not_to receive(:clean_up_old_archives) + expect(service).not_to receive(:clean_up_empty_directories) + + service.execute + end + end + + context 'when the downloads directory exists' do + shared_examples 'invalid archive files' do |dirname, extensions, mtime| + it 'does not remove files and directoy' do + in_directory_with_files(dirname, extensions, mtime) do |dir, files| + service.execute + + files.each { |file| expect(File.exist?(file)).to eq true } + expect(File.directory?(dir)).to eq true + end + end + end + + it 'removes files older than 2 hours that matches valid archive extensions' do + in_directory_with_files('sample.git', %w[tar tar.bz2 tar.gz zip], 2.hours) do |dir, files| + service.execute + + files.each { |file| expect(File.exist?(file)).to eq false } + expect(File.directory?(dir)).to eq false + end + end + + context 'with files older than 2 hours that does not matches valid archive extensions' do + it_behaves_like 'invalid archive files', 'sample.git', %w[conf rb], 2.hours + end + + context 'with files older than 2 hours inside invalid directories' do + it_behaves_like 'invalid archive files', 'john_doe/sample.git', %w[conf rb tar tar.gz], 2.hours + end + + context 'with files newer than 2 hours that matches valid archive extensions' do + it_behaves_like 'invalid archive files', 'sample.git', %w[tar tar.bz2 tar.gz zip], 1.hour + end + + context 'with files newer than 2 hours that does not matches valid archive extensions' do + it_behaves_like 'invalid archive files', 'sample.git', %w[conf rb], 1.hour + end + + context 'with files newer than 2 hours inside invalid directories' do + it_behaves_like 'invalid archive files', 'sample.git', %w[conf rb tar tar.gz], 1.hour + end + end + + def in_directory_with_files(dirname, extensions, mtime) + Dir.mktmpdir do |tmpdir| + stub_repository_downloads_path(tmpdir) + dir = File.join(tmpdir, dirname) + files = create_temporary_files(dir, extensions, mtime) + + yield(dir, files) + end + end + + def stub_repository_downloads_path(path) + allow(Gitlab.config.gitlab).to receive(:repository_downloads_path).and_return(path) + end + + def create_temporary_files(dir, extensions, mtime) + FileUtils.mkdir_p(dir) + FileUtils.touch(extensions.map { |ext| File.join(dir, "sample.#{ext}") }, mtime: Time.now - mtime) + end + end +end |