summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Speicher <robert@gitlab.com>2017-04-03 21:26:41 +0000
committerRobert Speicher <robert@gitlab.com>2017-04-03 21:26:41 +0000
commitd9aca741155f456711e37ea68a28a46d349af698 (patch)
tree96967336d0e3387de1d261517537e6b0196b9f97
parente473a90700cad4dea6c883122f793b5d6453c7c8 (diff)
parenta9af86250c5226505ccce03f4e4aae08fe976be7 (diff)
downloadgitlab-ce-d9aca741155f456711e37ea68a28a46d349af698.tar.gz
Merge branch 'fix-github-importer-slowness' into 'master'
Improve performance of GitHub importer Closes #28183 See merge request !10273
-rw-r--r--app/models/concerns/importable.rb3
-rw-r--r--app/models/concerns/issuable.rb3
-rw-r--r--app/models/concerns/repository_mirroring.rb17
-rw-r--r--app/models/merge_request.rb1
-rw-r--r--app/models/project.rb4
-rw-r--r--app/models/repository.rb28
-rw-r--r--app/services/projects/import_service.rb31
-rw-r--r--app/workers/repository_import_worker.rb1
-rw-r--r--changelogs/unreleased/fix-github-importer-slowness.yml4
-rw-r--r--lib/gitlab/github_import/importer.rb2
-rw-r--r--lib/gitlab/github_import/pull_request_formatter.rb3
-rw-r--r--lib/gitlab/shell.rb20
-rw-r--r--spec/lib/gitlab/github_import/pull_request_formatter_spec.rb9
-rw-r--r--spec/models/repository_spec.rb8
-rw-r--r--spec/services/projects/import_service_spec.rb83
15 files changed, 159 insertions, 58 deletions
diff --git a/app/models/concerns/importable.rb b/app/models/concerns/importable.rb
index 019ef755849..c9331eaf4cc 100644
--- a/app/models/concerns/importable.rb
+++ b/app/models/concerns/importable.rb
@@ -3,4 +3,7 @@ module Importable
attr_accessor :importing
alias_method :importing?, :importing
+
+ attr_accessor :imported
+ alias_method :imported?, :imported
end
diff --git a/app/models/concerns/issuable.rb b/app/models/concerns/issuable.rb
index 4d54426b79e..b4dded7e27e 100644
--- a/app/models/concerns/issuable.rb
+++ b/app/models/concerns/issuable.rb
@@ -14,6 +14,7 @@ module Issuable
include Awardable
include Taskable
include TimeTrackable
+ include Importable
# This object is used to gather issuable meta data for displaying
# upvotes, downvotes, notes and closing merge requests count for issues and merge requests
@@ -99,7 +100,7 @@ module Issuable
acts_as_paranoid
after_save :update_assignee_cache_counts, if: :assignee_id_changed?
- after_save :record_metrics
+ after_save :record_metrics, unless: :imported?
def update_assignee_cache_counts
# make sure we flush the cache for both the old *and* new assignees(if they exist)
diff --git a/app/models/concerns/repository_mirroring.rb b/app/models/concerns/repository_mirroring.rb
new file mode 100644
index 00000000000..fed336c29d6
--- /dev/null
+++ b/app/models/concerns/repository_mirroring.rb
@@ -0,0 +1,17 @@
+module RepositoryMirroring
+ def set_remote_as_mirror(name)
+ config = raw_repository.rugged.config
+
+ # This is used to define repository as equivalent as "git clone --mirror"
+ config["remote.#{name}.fetch"] = 'refs/*:refs/*'
+ config["remote.#{name}.mirror"] = true
+ config["remote.#{name}.prune"] = true
+ end
+
+ def fetch_mirror(remote, url)
+ add_remote(remote, url)
+ set_remote_as_mirror(remote)
+ fetch_remote(remote, forced: true)
+ remove_remote(remote)
+ end
+end
diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb
index 5ff83944d8c..8d740adb771 100644
--- a/app/models/merge_request.rb
+++ b/app/models/merge_request.rb
@@ -3,7 +3,6 @@ class MergeRequest < ActiveRecord::Base
include Issuable
include Referable
include Sortable
- include Importable
belongs_to :target_project, class_name: "Project"
belongs_to :source_project, class_name: "Project"
diff --git a/app/models/project.rb b/app/models/project.rb
index f1bba56d32c..83660d8c431 100644
--- a/app/models/project.rb
+++ b/app/models/project.rb
@@ -551,6 +551,10 @@ class Project < ActiveRecord::Base
import_type == 'gitea'
end
+ def github_import?
+ import_type == 'github'
+ end
+
def check_limit
unless creator.can_create_project? || namespace.kind == 'group'
projects_limit = creator.projects_limit
diff --git a/app/models/repository.rb b/app/models/repository.rb
index 596650353fc..6b2383b73eb 100644
--- a/app/models/repository.rb
+++ b/app/models/repository.rb
@@ -2,6 +2,7 @@ require 'securerandom'
class Repository
include Gitlab::ShellAdapter
+ include RepositoryMirroring
attr_accessor :path_with_namespace, :project
@@ -64,7 +65,7 @@ class Repository
# Return absolute path to repository
def path_to_repo
@path_to_repo ||= File.expand_path(
- File.join(@project.repository_storage_path, path_with_namespace + ".git")
+ File.join(repository_storage_path, path_with_namespace + ".git")
)
end
@@ -401,10 +402,6 @@ class Repository
expire_tags_cache
end
- def before_import
- expire_content_cache
- end
-
# Runs code after the HEAD of a repository is changed.
def after_change_head
expire_method_caches(METHOD_CACHES_FOR_FILE_TYPES.keys)
@@ -1033,6 +1030,23 @@ class Repository
rugged.references.delete(tmp_ref) if tmp_ref
end
+ def add_remote(name, url)
+ raw_repository.remote_add(name, url)
+ rescue Rugged::ConfigError
+ raw_repository.remote_update(name, url: url)
+ end
+
+ def remove_remote(name)
+ raw_repository.remote_delete(name)
+ true
+ rescue Rugged::ConfigError
+ false
+ end
+
+ def fetch_remote(remote, forced: false, no_tags: false)
+ gitlab_shell.fetch_remote(repository_storage_path, path_with_namespace, remote, forced: forced, no_tags: no_tags)
+ end
+
def fetch_ref(source_path, source_ref, target_ref)
args = %W(#{Gitlab.config.git.bin_path} fetch --no-tags -f #{source_path} #{source_ref}:#{target_ref})
Gitlab::Popen.popen(args, path_to_repo)
@@ -1150,4 +1164,8 @@ class Repository
def repository_event(event, tags = {})
Gitlab::Metrics.add_event(event, { path: path_with_namespace }.merge(tags))
end
+
+ def repository_storage_path
+ @project.repository_storage_path
+ end
end
diff --git a/app/services/projects/import_service.rb b/app/services/projects/import_service.rb
index d484a96f785..4c72d5e117d 100644
--- a/app/services/projects/import_service.rb
+++ b/app/services/projects/import_service.rb
@@ -11,7 +11,7 @@ module Projects
success
rescue => e
- error(e.message)
+ error("Error importing repository #{project.import_url} into #{project.path_with_namespace} - #{e.message}")
end
private
@@ -32,23 +32,40 @@ module Projects
end
def import_repository
+ raise Error, 'Blocked import URL.' if Gitlab::UrlBlocker.blocked_url?(project.import_url)
+
begin
- raise Error, "Blocked import URL." if Gitlab::UrlBlocker.blocked_url?(project.import_url)
- gitlab_shell.import_repository(project.repository_storage_path, project.path_with_namespace, project.import_url)
- rescue => e
+ if project.github_import? || project.gitea_import?
+ fetch_repository
+ else
+ clone_repository
+ end
+ rescue Gitlab::Shell::Error => e
# Expire cache to prevent scenarios such as:
# 1. First import failed, but the repo was imported successfully, so +exists?+ returns true
# 2. Retried import, repo is broken or not imported but +exists?+ still returns true
- project.repository.before_import if project.repository_exists?
+ project.repository.expire_content_cache if project.repository_exists?
- raise Error, "Error importing repository #{project.import_url} into #{project.path_with_namespace} - #{e.message}"
+ raise Error, e.message
end
end
+ def clone_repository
+ gitlab_shell.import_repository(project.repository_storage_path, project.path_with_namespace, project.import_url)
+ end
+
+ def fetch_repository
+ project.create_repository
+ project.repository.add_remote(project.import_type, project.import_url)
+ project.repository.set_remote_as_mirror(project.import_type)
+ project.repository.fetch_remote(project.import_type, forced: true)
+ project.repository.remove_remote(project.import_type)
+ end
+
def import_data
return unless has_importer?
- project.repository.before_import unless project.gitlab_project_import?
+ project.repository.expire_content_cache unless project.gitlab_project_import?
unless importer.execute
raise Error, 'The remote data could not be imported.'
diff --git a/app/workers/repository_import_worker.rb b/app/workers/repository_import_worker.rb
index c8a77e21c12..68a6fd76e70 100644
--- a/app/workers/repository_import_worker.rb
+++ b/app/workers/repository_import_worker.rb
@@ -1,6 +1,5 @@
class RepositoryImportWorker
include Sidekiq::Worker
- include Gitlab::ShellAdapter
include DedicatedSidekiqQueue
attr_accessor :project, :current_user
diff --git a/changelogs/unreleased/fix-github-importer-slowness.yml b/changelogs/unreleased/fix-github-importer-slowness.yml
new file mode 100644
index 00000000000..c1f8d0e02d5
--- /dev/null
+++ b/changelogs/unreleased/fix-github-importer-slowness.yml
@@ -0,0 +1,4 @@
+---
+title: Improve performance of GitHub importer for large repositories.
+merge_request: 10273
+author:
diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb
index eea4a91f17d..a8c0b47e786 100644
--- a/lib/gitlab/github_import/importer.rb
+++ b/lib/gitlab/github_import/importer.rb
@@ -157,7 +157,7 @@ module Gitlab
end
def restore_source_branch(pull_request)
- project.repository.fetch_ref(repo_url, "pull/#{pull_request.number}/head", pull_request.source_branch_name)
+ project.repository.create_branch(pull_request.source_branch_name, pull_request.source_branch_sha)
end
def restore_target_branch(pull_request)
diff --git a/lib/gitlab/github_import/pull_request_formatter.rb b/lib/gitlab/github_import/pull_request_formatter.rb
index 38660a7ccca..150afa31432 100644
--- a/lib/gitlab/github_import/pull_request_formatter.rb
+++ b/lib/gitlab/github_import/pull_request_formatter.rb
@@ -20,7 +20,8 @@ module Gitlab
author_id: author_id,
assignee_id: assignee_id,
created_at: raw_data.created_at,
- updated_at: raw_data.updated_at
+ updated_at: raw_data.updated_at,
+ imported: true
}
end
diff --git a/lib/gitlab/shell.rb b/lib/gitlab/shell.rb
index 9864a9c7f1a..b631ef11ce7 100644
--- a/lib/gitlab/shell.rb
+++ b/lib/gitlab/shell.rb
@@ -88,6 +88,26 @@ module Gitlab
true
end
+ # Fetch remote for repository
+ #
+ # name - project path with namespace
+ # remote - remote name
+ # forced - should we use --force flag?
+ # no_tags - should we use --no-tags flag?
+ #
+ # Ex.
+ # fetch_remote("gitlab/gitlab-ci", "upstream")
+ #
+ def fetch_remote(storage, name, remote, forced: false, no_tags: false)
+ args = [gitlab_shell_projects_path, 'fetch-remote', storage, "#{name}.git", remote, '800']
+ args << '--force' if forced
+ args << '--no-tags' if no_tags
+
+ output, status = Popen.popen(args)
+ raise Error, output unless status.zero?
+ true
+ end
+
# Move repository
# storage - project's storage path
# path - project path with namespace
diff --git a/spec/lib/gitlab/github_import/pull_request_formatter_spec.rb b/spec/lib/gitlab/github_import/pull_request_formatter_spec.rb
index 9b9f7e4d34e..b7c59918a76 100644
--- a/spec/lib/gitlab/github_import/pull_request_formatter_spec.rb
+++ b/spec/lib/gitlab/github_import/pull_request_formatter_spec.rb
@@ -64,7 +64,8 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do
author_id: project.creator_id,
assignee_id: nil,
created_at: created_at,
- updated_at: updated_at
+ updated_at: updated_at,
+ imported: true
}
expect(pull_request.attributes).to eq(expected)
@@ -90,7 +91,8 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do
author_id: project.creator_id,
assignee_id: nil,
created_at: created_at,
- updated_at: updated_at
+ updated_at: updated_at,
+ imported: true
}
expect(pull_request.attributes).to eq(expected)
@@ -117,7 +119,8 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do
author_id: project.creator_id,
assignee_id: nil,
created_at: created_at,
- updated_at: updated_at
+ updated_at: updated_at,
+ imported: true
}
expect(pull_request.attributes).to eq(expected)
diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb
index df742ee8084..b16848f6f4f 100644
--- a/spec/models/repository_spec.rb
+++ b/spec/models/repository_spec.rb
@@ -1293,14 +1293,6 @@ describe Repository, models: true do
end
end
- describe '#before_import' do
- it 'flushes the repository caches' do
- expect(repository).to receive(:expire_content_cache)
-
- repository.before_import
- end
- end
-
describe '#after_import' do
it 'flushes and builds the cache' do
expect(repository).to receive(:expire_content_cache)
diff --git a/spec/services/projects/import_service_spec.rb b/spec/services/projects/import_service_spec.rb
index e5917bb0b7a..09cfa36b3b9 100644
--- a/spec/services/projects/import_service_spec.rb
+++ b/spec/services/projects/import_service_spec.rb
@@ -26,30 +26,59 @@ describe Projects::ImportService, services: true do
result = subject.execute
expect(result[:status]).to eq :error
- expect(result[:message]).to eq 'The repository could not be created.'
+ expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - The repository could not be created."
end
end
context 'with known url' do
before do
project.import_url = 'https://github.com/vim/vim.git'
+ project.import_type = 'github'
end
- it 'succeeds if repository import is successfully' do
- expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_return(true)
+ context 'with a Github repository' do
+ it 'succeeds if repository import is successfully' do
+ expect_any_instance_of(Repository).to receive(:fetch_remote).and_return(true)
+ expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true)
- result = subject.execute
+ result = subject.execute
- expect(result[:status]).to eq :success
+ expect(result[:status]).to eq :success
+ end
+
+ it 'fails if repository import fails' do
+ expect_any_instance_of(Repository).to receive(:fetch_remote).and_raise(Gitlab::Shell::Error.new('Failed to import the repository'))
+
+ result = subject.execute
+
+ expect(result[:status]).to eq :error
+ expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - Failed to import the repository"
+ end
end
- it 'fails if repository import fails' do
- expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_raise(Gitlab::Shell::Error.new('Failed to import the repository'))
+ context 'with a non Github repository' do
+ before do
+ project.import_url = 'https://bitbucket.org/vim/vim.git'
+ project.import_type = 'bitbucket'
+ end
- result = subject.execute
+ it 'succeeds if repository import is successfully' do
+ expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_return(true)
+ expect_any_instance_of(Gitlab::BitbucketImport::Importer).to receive(:execute).and_return(true)
- expect(result[:status]).to eq :error
- expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - Failed to import the repository"
+ result = subject.execute
+
+ expect(result[:status]).to eq :success
+ end
+
+ it 'fails if repository import fails' do
+ expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_raise(Gitlab::Shell::Error.new('Failed to import the repository'))
+
+ result = subject.execute
+
+ expect(result[:status]).to eq :error
+ expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - Failed to import the repository"
+ end
end
end
@@ -64,8 +93,8 @@ describe Projects::ImportService, services: true do
end
it 'succeeds if importer succeeds' do
- expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_return(true)
- expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true)
+ allow_any_instance_of(Repository).to receive(:fetch_remote).and_return(true)
+ allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true)
result = subject.execute
@@ -73,48 +102,42 @@ describe Projects::ImportService, services: true do
end
it 'flushes various caches' do
- expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).
- with(project.repository_storage_path, project.path_with_namespace, project.import_url).
+ allow_any_instance_of(Repository).to receive(:fetch_remote).
and_return(true)
- expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).
+ allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).
and_return(true)
- expect_any_instance_of(Repository).to receive(:expire_emptiness_caches).
- and_call_original
-
- expect_any_instance_of(Repository).to receive(:expire_exists_cache).
- and_call_original
+ expect_any_instance_of(Repository).to receive(:expire_content_cache)
subject.execute
end
it 'fails if importer fails' do
- expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_return(true)
- expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(false)
+ allow_any_instance_of(Repository).to receive(:fetch_remote).and_return(true)
+ allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(false)
result = subject.execute
expect(result[:status]).to eq :error
- expect(result[:message]).to eq 'The remote data could not be imported.'
+ expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - The remote data could not be imported."
end
it 'fails if importer raise an error' do
- expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_return(true)
- expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_raise(Projects::ImportService::Error.new('Github: failed to connect API'))
+ allow_any_instance_of(Gitlab::Shell).to receive(:fetch_remote).and_return(true)
+ allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_raise(Projects::ImportService::Error.new('Github: failed to connect API'))
result = subject.execute
expect(result[:status]).to eq :error
- expect(result[:message]).to eq 'Github: failed to connect API'
+ expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - Github: failed to connect API"
end
- it 'expires existence cache after error' do
+ it 'expires content cache after error' do
allow_any_instance_of(Project).to receive(:repository_exists?).and_return(false, true)
- expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_raise(Gitlab::Shell::Error.new('Failed to import the repository'))
- expect_any_instance_of(Repository).to receive(:expire_emptiness_caches).and_call_original
- expect_any_instance_of(Repository).to receive(:expire_exists_cache).and_call_original
+ expect_any_instance_of(Gitlab::Shell).to receive(:fetch_remote).and_raise(Gitlab::Shell::Error.new('Failed to import the repository'))
+ expect_any_instance_of(Repository).to receive(:expire_content_cache)
subject.execute
end