diff options
author | Robert Speicher <robert@gitlab.com> | 2017-04-03 21:26:41 +0000 |
---|---|---|
committer | Robert Speicher <robert@gitlab.com> | 2017-04-03 21:26:41 +0000 |
commit | d9aca741155f456711e37ea68a28a46d349af698 (patch) | |
tree | 96967336d0e3387de1d261517537e6b0196b9f97 | |
parent | e473a90700cad4dea6c883122f793b5d6453c7c8 (diff) | |
parent | a9af86250c5226505ccce03f4e4aae08fe976be7 (diff) | |
download | gitlab-ce-d9aca741155f456711e37ea68a28a46d349af698.tar.gz |
Merge branch 'fix-github-importer-slowness' into 'master'
Improve performance of GitHub importer
Closes #28183
See merge request !10273
-rw-r--r-- | app/models/concerns/importable.rb | 3 | ||||
-rw-r--r-- | app/models/concerns/issuable.rb | 3 | ||||
-rw-r--r-- | app/models/concerns/repository_mirroring.rb | 17 | ||||
-rw-r--r-- | app/models/merge_request.rb | 1 | ||||
-rw-r--r-- | app/models/project.rb | 4 | ||||
-rw-r--r-- | app/models/repository.rb | 28 | ||||
-rw-r--r-- | app/services/projects/import_service.rb | 31 | ||||
-rw-r--r-- | app/workers/repository_import_worker.rb | 1 | ||||
-rw-r--r-- | changelogs/unreleased/fix-github-importer-slowness.yml | 4 | ||||
-rw-r--r-- | lib/gitlab/github_import/importer.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/github_import/pull_request_formatter.rb | 3 | ||||
-rw-r--r-- | lib/gitlab/shell.rb | 20 | ||||
-rw-r--r-- | spec/lib/gitlab/github_import/pull_request_formatter_spec.rb | 9 | ||||
-rw-r--r-- | spec/models/repository_spec.rb | 8 | ||||
-rw-r--r-- | spec/services/projects/import_service_spec.rb | 83 |
15 files changed, 159 insertions, 58 deletions
diff --git a/app/models/concerns/importable.rb b/app/models/concerns/importable.rb index 019ef755849..c9331eaf4cc 100644 --- a/app/models/concerns/importable.rb +++ b/app/models/concerns/importable.rb @@ -3,4 +3,7 @@ module Importable attr_accessor :importing alias_method :importing?, :importing + + attr_accessor :imported + alias_method :imported?, :imported end diff --git a/app/models/concerns/issuable.rb b/app/models/concerns/issuable.rb index 4d54426b79e..b4dded7e27e 100644 --- a/app/models/concerns/issuable.rb +++ b/app/models/concerns/issuable.rb @@ -14,6 +14,7 @@ module Issuable include Awardable include Taskable include TimeTrackable + include Importable # This object is used to gather issuable meta data for displaying # upvotes, downvotes, notes and closing merge requests count for issues and merge requests @@ -99,7 +100,7 @@ module Issuable acts_as_paranoid after_save :update_assignee_cache_counts, if: :assignee_id_changed? - after_save :record_metrics + after_save :record_metrics, unless: :imported? def update_assignee_cache_counts # make sure we flush the cache for both the old *and* new assignees(if they exist) diff --git a/app/models/concerns/repository_mirroring.rb b/app/models/concerns/repository_mirroring.rb new file mode 100644 index 00000000000..fed336c29d6 --- /dev/null +++ b/app/models/concerns/repository_mirroring.rb @@ -0,0 +1,17 @@ +module RepositoryMirroring + def set_remote_as_mirror(name) + config = raw_repository.rugged.config + + # This is used to define repository as equivalent as "git clone --mirror" + config["remote.#{name}.fetch"] = 'refs/*:refs/*' + config["remote.#{name}.mirror"] = true + config["remote.#{name}.prune"] = true + end + + def fetch_mirror(remote, url) + add_remote(remote, url) + set_remote_as_mirror(remote) + fetch_remote(remote, forced: true) + remove_remote(remote) + end +end diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index 5ff83944d8c..8d740adb771 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -3,7 +3,6 @@ class MergeRequest < ActiveRecord::Base include Issuable include Referable include Sortable - include Importable belongs_to :target_project, class_name: "Project" belongs_to :source_project, class_name: "Project" diff --git a/app/models/project.rb b/app/models/project.rb index f1bba56d32c..83660d8c431 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -551,6 +551,10 @@ class Project < ActiveRecord::Base import_type == 'gitea' end + def github_import? + import_type == 'github' + end + def check_limit unless creator.can_create_project? || namespace.kind == 'group' projects_limit = creator.projects_limit diff --git a/app/models/repository.rb b/app/models/repository.rb index 596650353fc..6b2383b73eb 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -2,6 +2,7 @@ require 'securerandom' class Repository include Gitlab::ShellAdapter + include RepositoryMirroring attr_accessor :path_with_namespace, :project @@ -64,7 +65,7 @@ class Repository # Return absolute path to repository def path_to_repo @path_to_repo ||= File.expand_path( - File.join(@project.repository_storage_path, path_with_namespace + ".git") + File.join(repository_storage_path, path_with_namespace + ".git") ) end @@ -401,10 +402,6 @@ class Repository expire_tags_cache end - def before_import - expire_content_cache - end - # Runs code after the HEAD of a repository is changed. def after_change_head expire_method_caches(METHOD_CACHES_FOR_FILE_TYPES.keys) @@ -1033,6 +1030,23 @@ class Repository rugged.references.delete(tmp_ref) if tmp_ref end + def add_remote(name, url) + raw_repository.remote_add(name, url) + rescue Rugged::ConfigError + raw_repository.remote_update(name, url: url) + end + + def remove_remote(name) + raw_repository.remote_delete(name) + true + rescue Rugged::ConfigError + false + end + + def fetch_remote(remote, forced: false, no_tags: false) + gitlab_shell.fetch_remote(repository_storage_path, path_with_namespace, remote, forced: forced, no_tags: no_tags) + end + def fetch_ref(source_path, source_ref, target_ref) args = %W(#{Gitlab.config.git.bin_path} fetch --no-tags -f #{source_path} #{source_ref}:#{target_ref}) Gitlab::Popen.popen(args, path_to_repo) @@ -1150,4 +1164,8 @@ class Repository def repository_event(event, tags = {}) Gitlab::Metrics.add_event(event, { path: path_with_namespace }.merge(tags)) end + + def repository_storage_path + @project.repository_storage_path + end end diff --git a/app/services/projects/import_service.rb b/app/services/projects/import_service.rb index d484a96f785..4c72d5e117d 100644 --- a/app/services/projects/import_service.rb +++ b/app/services/projects/import_service.rb @@ -11,7 +11,7 @@ module Projects success rescue => e - error(e.message) + error("Error importing repository #{project.import_url} into #{project.path_with_namespace} - #{e.message}") end private @@ -32,23 +32,40 @@ module Projects end def import_repository + raise Error, 'Blocked import URL.' if Gitlab::UrlBlocker.blocked_url?(project.import_url) + begin - raise Error, "Blocked import URL." if Gitlab::UrlBlocker.blocked_url?(project.import_url) - gitlab_shell.import_repository(project.repository_storage_path, project.path_with_namespace, project.import_url) - rescue => e + if project.github_import? || project.gitea_import? + fetch_repository + else + clone_repository + end + rescue Gitlab::Shell::Error => e # Expire cache to prevent scenarios such as: # 1. First import failed, but the repo was imported successfully, so +exists?+ returns true # 2. Retried import, repo is broken or not imported but +exists?+ still returns true - project.repository.before_import if project.repository_exists? + project.repository.expire_content_cache if project.repository_exists? - raise Error, "Error importing repository #{project.import_url} into #{project.path_with_namespace} - #{e.message}" + raise Error, e.message end end + def clone_repository + gitlab_shell.import_repository(project.repository_storage_path, project.path_with_namespace, project.import_url) + end + + def fetch_repository + project.create_repository + project.repository.add_remote(project.import_type, project.import_url) + project.repository.set_remote_as_mirror(project.import_type) + project.repository.fetch_remote(project.import_type, forced: true) + project.repository.remove_remote(project.import_type) + end + def import_data return unless has_importer? - project.repository.before_import unless project.gitlab_project_import? + project.repository.expire_content_cache unless project.gitlab_project_import? unless importer.execute raise Error, 'The remote data could not be imported.' diff --git a/app/workers/repository_import_worker.rb b/app/workers/repository_import_worker.rb index c8a77e21c12..68a6fd76e70 100644 --- a/app/workers/repository_import_worker.rb +++ b/app/workers/repository_import_worker.rb @@ -1,6 +1,5 @@ class RepositoryImportWorker include Sidekiq::Worker - include Gitlab::ShellAdapter include DedicatedSidekiqQueue attr_accessor :project, :current_user diff --git a/changelogs/unreleased/fix-github-importer-slowness.yml b/changelogs/unreleased/fix-github-importer-slowness.yml new file mode 100644 index 00000000000..c1f8d0e02d5 --- /dev/null +++ b/changelogs/unreleased/fix-github-importer-slowness.yml @@ -0,0 +1,4 @@ +--- +title: Improve performance of GitHub importer for large repositories. +merge_request: 10273 +author: diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb index eea4a91f17d..a8c0b47e786 100644 --- a/lib/gitlab/github_import/importer.rb +++ b/lib/gitlab/github_import/importer.rb @@ -157,7 +157,7 @@ module Gitlab end def restore_source_branch(pull_request) - project.repository.fetch_ref(repo_url, "pull/#{pull_request.number}/head", pull_request.source_branch_name) + project.repository.create_branch(pull_request.source_branch_name, pull_request.source_branch_sha) end def restore_target_branch(pull_request) diff --git a/lib/gitlab/github_import/pull_request_formatter.rb b/lib/gitlab/github_import/pull_request_formatter.rb index 38660a7ccca..150afa31432 100644 --- a/lib/gitlab/github_import/pull_request_formatter.rb +++ b/lib/gitlab/github_import/pull_request_formatter.rb @@ -20,7 +20,8 @@ module Gitlab author_id: author_id, assignee_id: assignee_id, created_at: raw_data.created_at, - updated_at: raw_data.updated_at + updated_at: raw_data.updated_at, + imported: true } end diff --git a/lib/gitlab/shell.rb b/lib/gitlab/shell.rb index 9864a9c7f1a..b631ef11ce7 100644 --- a/lib/gitlab/shell.rb +++ b/lib/gitlab/shell.rb @@ -88,6 +88,26 @@ module Gitlab true end + # Fetch remote for repository + # + # name - project path with namespace + # remote - remote name + # forced - should we use --force flag? + # no_tags - should we use --no-tags flag? + # + # Ex. + # fetch_remote("gitlab/gitlab-ci", "upstream") + # + def fetch_remote(storage, name, remote, forced: false, no_tags: false) + args = [gitlab_shell_projects_path, 'fetch-remote', storage, "#{name}.git", remote, '800'] + args << '--force' if forced + args << '--no-tags' if no_tags + + output, status = Popen.popen(args) + raise Error, output unless status.zero? + true + end + # Move repository # storage - project's storage path # path - project path with namespace diff --git a/spec/lib/gitlab/github_import/pull_request_formatter_spec.rb b/spec/lib/gitlab/github_import/pull_request_formatter_spec.rb index 9b9f7e4d34e..b7c59918a76 100644 --- a/spec/lib/gitlab/github_import/pull_request_formatter_spec.rb +++ b/spec/lib/gitlab/github_import/pull_request_formatter_spec.rb @@ -64,7 +64,8 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do author_id: project.creator_id, assignee_id: nil, created_at: created_at, - updated_at: updated_at + updated_at: updated_at, + imported: true } expect(pull_request.attributes).to eq(expected) @@ -90,7 +91,8 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do author_id: project.creator_id, assignee_id: nil, created_at: created_at, - updated_at: updated_at + updated_at: updated_at, + imported: true } expect(pull_request.attributes).to eq(expected) @@ -117,7 +119,8 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do author_id: project.creator_id, assignee_id: nil, created_at: created_at, - updated_at: updated_at + updated_at: updated_at, + imported: true } expect(pull_request.attributes).to eq(expected) diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb index df742ee8084..b16848f6f4f 100644 --- a/spec/models/repository_spec.rb +++ b/spec/models/repository_spec.rb @@ -1293,14 +1293,6 @@ describe Repository, models: true do end end - describe '#before_import' do - it 'flushes the repository caches' do - expect(repository).to receive(:expire_content_cache) - - repository.before_import - end - end - describe '#after_import' do it 'flushes and builds the cache' do expect(repository).to receive(:expire_content_cache) diff --git a/spec/services/projects/import_service_spec.rb b/spec/services/projects/import_service_spec.rb index e5917bb0b7a..09cfa36b3b9 100644 --- a/spec/services/projects/import_service_spec.rb +++ b/spec/services/projects/import_service_spec.rb @@ -26,30 +26,59 @@ describe Projects::ImportService, services: true do result = subject.execute expect(result[:status]).to eq :error - expect(result[:message]).to eq 'The repository could not be created.' + expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - The repository could not be created." end end context 'with known url' do before do project.import_url = 'https://github.com/vim/vim.git' + project.import_type = 'github' end - it 'succeeds if repository import is successfully' do - expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_return(true) + context 'with a Github repository' do + it 'succeeds if repository import is successfully' do + expect_any_instance_of(Repository).to receive(:fetch_remote).and_return(true) + expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true) - result = subject.execute + result = subject.execute - expect(result[:status]).to eq :success + expect(result[:status]).to eq :success + end + + it 'fails if repository import fails' do + expect_any_instance_of(Repository).to receive(:fetch_remote).and_raise(Gitlab::Shell::Error.new('Failed to import the repository')) + + result = subject.execute + + expect(result[:status]).to eq :error + expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - Failed to import the repository" + end end - it 'fails if repository import fails' do - expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_raise(Gitlab::Shell::Error.new('Failed to import the repository')) + context 'with a non Github repository' do + before do + project.import_url = 'https://bitbucket.org/vim/vim.git' + project.import_type = 'bitbucket' + end - result = subject.execute + it 'succeeds if repository import is successfully' do + expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_return(true) + expect_any_instance_of(Gitlab::BitbucketImport::Importer).to receive(:execute).and_return(true) - expect(result[:status]).to eq :error - expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - Failed to import the repository" + result = subject.execute + + expect(result[:status]).to eq :success + end + + it 'fails if repository import fails' do + expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_raise(Gitlab::Shell::Error.new('Failed to import the repository')) + + result = subject.execute + + expect(result[:status]).to eq :error + expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - Failed to import the repository" + end end end @@ -64,8 +93,8 @@ describe Projects::ImportService, services: true do end it 'succeeds if importer succeeds' do - expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_return(true) - expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true) + allow_any_instance_of(Repository).to receive(:fetch_remote).and_return(true) + allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true) result = subject.execute @@ -73,48 +102,42 @@ describe Projects::ImportService, services: true do end it 'flushes various caches' do - expect_any_instance_of(Gitlab::Shell).to receive(:import_repository). - with(project.repository_storage_path, project.path_with_namespace, project.import_url). + allow_any_instance_of(Repository).to receive(:fetch_remote). and_return(true) - expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute). + allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute). and_return(true) - expect_any_instance_of(Repository).to receive(:expire_emptiness_caches). - and_call_original - - expect_any_instance_of(Repository).to receive(:expire_exists_cache). - and_call_original + expect_any_instance_of(Repository).to receive(:expire_content_cache) subject.execute end it 'fails if importer fails' do - expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_return(true) - expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(false) + allow_any_instance_of(Repository).to receive(:fetch_remote).and_return(true) + allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(false) result = subject.execute expect(result[:status]).to eq :error - expect(result[:message]).to eq 'The remote data could not be imported.' + expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - The remote data could not be imported." end it 'fails if importer raise an error' do - expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_return(true) - expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_raise(Projects::ImportService::Error.new('Github: failed to connect API')) + allow_any_instance_of(Gitlab::Shell).to receive(:fetch_remote).and_return(true) + allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_raise(Projects::ImportService::Error.new('Github: failed to connect API')) result = subject.execute expect(result[:status]).to eq :error - expect(result[:message]).to eq 'Github: failed to connect API' + expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - Github: failed to connect API" end - it 'expires existence cache after error' do + it 'expires content cache after error' do allow_any_instance_of(Project).to receive(:repository_exists?).and_return(false, true) - expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_raise(Gitlab::Shell::Error.new('Failed to import the repository')) - expect_any_instance_of(Repository).to receive(:expire_emptiness_caches).and_call_original - expect_any_instance_of(Repository).to receive(:expire_exists_cache).and_call_original + expect_any_instance_of(Gitlab::Shell).to receive(:fetch_remote).and_raise(Gitlab::Shell::Error.new('Failed to import the repository')) + expect_any_instance_of(Repository).to receive(:expire_content_cache) subject.execute end |