From 4b785df27baa78b2ebe51e66de25edcb8566ab2d Mon Sep 17 00:00:00 2001 From: Nick Thomas Date: Mon, 11 Dec 2017 17:52:07 +0000 Subject: Import gitlab_projects.rb from gitlab-shell By importing this Ruby code into gitlab-rails (and gitaly-ruby), we avoid 200ms of startup time for each gitlab_projects subprocess we are eliminating. By not having a gitlab_projects subprocess between gitlab-rails / sidekiq and any git subprocesses (e.g. for fork_project, fetch_remote, etc, calls), we can also manage these git processes more cleanly, and avoid sending SIGKILL to them --- lib/gitlab/git/gitlab_projects.rb | 258 ++++++++++++++++++++++++++++++++++++++ lib/gitlab/git/repository.rb | 29 ++++- lib/gitlab/shell.rb | 113 ++++++++++++----- 3 files changed, 365 insertions(+), 35 deletions(-) create mode 100644 lib/gitlab/git/gitlab_projects.rb (limited to 'lib') diff --git a/lib/gitlab/git/gitlab_projects.rb b/lib/gitlab/git/gitlab_projects.rb new file mode 100644 index 00000000000..d948d7895ed --- /dev/null +++ b/lib/gitlab/git/gitlab_projects.rb @@ -0,0 +1,258 @@ +module Gitlab + module Git + class GitlabProjects + include Gitlab::Git::Popen + + # Absolute path to directory where repositories are stored. + # Example: /home/git/repositories + attr_reader :shard_path + + # Relative path is a directory name for repository with .git at the end. + # Example: gitlab-org/gitlab-test.git + attr_reader :repository_relative_path + + # Absolute path to the repository. + # Example: /home/git/repositorities/gitlab-org/gitlab-test.git + attr_reader :repository_absolute_path + + # This is the path at which the gitlab-shell hooks directory can be found. + # It's essential for integration between git and GitLab proper. All new + # repositories should have their hooks directory symlinked here. + attr_reader :global_hooks_path + + attr_reader :logger + + def initialize(shard_path, repository_relative_path, global_hooks_path:, logger:) + @shard_path = shard_path + @repository_relative_path = repository_relative_path + + @logger = logger + @global_hooks_path = global_hooks_path + @repository_absolute_path = File.join(shard_path, repository_relative_path) + @output = StringIO.new + end + + def output + io = @output.dup + io.rewind + io.read + end + + def rm_project + logger.info "Removing repository <#{repository_absolute_path}>." + FileUtils.rm_rf(repository_absolute_path) + end + + # Move repository from one directory to another + # + # Example: gitlab/gitlab-ci.git -> randx/six.git + # + # Won't work if target namespace directory does not exist + # + def mv_project(new_path) + new_absolute_path = File.join(shard_path, new_path) + + # verify that the source repo exists + unless File.exist?(repository_absolute_path) + logger.error "mv-project failed: source path <#{repository_absolute_path}> does not exist." + return false + end + + # ...and that the target repo does not exist + if File.exist?(new_absolute_path) + logger.error "mv-project failed: destination path <#{new_absolute_path}> already exists." + return false + end + + logger.info "Moving repository from <#{repository_absolute_path}> to <#{new_absolute_path}>." + FileUtils.mv(repository_absolute_path, new_absolute_path) + end + + # Import project via git clone --bare + # URL must be publicly cloneable + def import_project(source, timeout) + # Skip import if repo already exists + return false if File.exist?(repository_absolute_path) + + masked_source = mask_password_in_url(source) + + logger.info "Importing project from <#{masked_source}> to <#{repository_absolute_path}>." + cmd = %W(git clone --bare -- #{source} #{repository_absolute_path}) + + success = run_with_timeout(cmd, timeout, nil) + + unless success + logger.error("Importing project from <#{masked_source}> to <#{repository_absolute_path}> failed.") + FileUtils.rm_rf(repository_absolute_path) + return false + end + + Gitlab::Git::Repository.create_hooks(repository_absolute_path, global_hooks_path) + + # The project was imported successfully. + # Remove the origin URL since it may contain password. + remove_origin_in_repo + + true + end + + def fork_repository(new_shard_path, new_repository_relative_path) + from_path = repository_absolute_path + to_path = File.join(new_shard_path, new_repository_relative_path) + + # The repository cannot already exist + if File.exist?(to_path) + logger.error "fork-repository failed: destination repository <#{to_path}> already exists." + return false + end + + # Ensure the namepsace / hashed storage directory exists + FileUtils.mkdir_p(File.dirname(to_path), mode: 0770) + + logger.info "Forking repository from <#{from_path}> to <#{to_path}>." + cmd = %W(git clone --bare --no-local -- #{from_path} #{to_path}) + + run(cmd, nil) && Gitlab::Git::Repository.create_hooks(to_path, global_hooks_path) + end + + def fetch_remote(name, timeout, force:, tags:, ssh_key: nil, known_hosts: nil) + tags_option = tags ? '--tags' : '--no-tags' + + logger.info "Fetching remote #{name} for repository #{repository_absolute_path}." + cmd = %W(git fetch #{name} --prune --quiet) + cmd << '--force' if force + cmd << tags_option + + setup_ssh_auth(ssh_key, known_hosts) do |env| + success = run_with_timeout(cmd, timeout, repository_absolute_path, env) + + unless success + logger.error "Fetching remote #{name} for repository #{repository_absolute_path} failed." + end + + success + end + end + + def push_branches(remote_name, timeout, force, branch_names) + logger.info "Pushing branches from #{repository_absolute_path} to remote #{remote_name}: #{branch_names}" + cmd = %w(git push) + cmd << '--force' if force + cmd += %W(-- #{remote_name}).concat(branch_names) + + success = run_with_timeout(cmd, timeout, repository_absolute_path) + + unless success + logger.error("Pushing branches to remote #{remote_name} failed.") + end + + success + end + + def delete_remote_branches(remote_name, branch_names) + branches = branch_names.map { |branch_name| ":#{branch_name}" } + + logger.info "Pushing deleted branches from #{repository_absolute_path} to remote #{remote_name}: #{branch_names}" + cmd = %W(git push -- #{remote_name}).concat(branches) + + success = run(cmd, repository_absolute_path) + + unless success + logger.error("Pushing deleted branches to remote #{remote_name} failed.") + end + + success + end + + protected + + def run(*args) + output, exitstatus = popen(*args) + @output << output + + exitstatus&.zero? + end + + def run_with_timeout(*args) + output, exitstatus = popen_with_timeout(*args) + @output << output + + exitstatus&.zero? + rescue Timeout::Error + @output.puts('Timed out') + + false + end + + def mask_password_in_url(url) + result = URI(url) + result.password = "*****" unless result.password.nil? + result.user = "*****" unless result.user.nil? # it's needed for oauth access_token + result + rescue + url + end + + def remove_origin_in_repo + cmd = %w(git remote rm origin) + run(cmd, repository_absolute_path) + end + + # Builds a small shell script that can be used to execute SSH with a set of + # custom options. + # + # Options are expanded as `'-oKey="Value"'`, so SSH will correctly interpret + # paths with spaces in them. We trust the user not to embed single or double + # quotes in the key or value. + def custom_ssh_script(options = {}) + args = options.map { |k, v| %Q{'-o#{k}="#{v}"'} }.join(' ') + + [ + "#!/bin/sh", + "exec ssh #{args} \"$@\"" + ].join("\n") + end + + # Known hosts data and private keys can be passed to gitlab-shell in the + # environment. If present, this method puts them into temporary files, writes + # a script that can substitute as `ssh`, setting the options to respect those + # files, and yields: { "GIT_SSH" => "/tmp/myScript" } + def setup_ssh_auth(key, known_hosts) + options = {} + + if key + key_file = Tempfile.new('gitlab-shell-key-file') + key_file.chmod(0o400) + key_file.write(key) + key_file.close + + options['IdentityFile'] = key_file.path + options['IdentitiesOnly'] = 'yes' + end + + if known_hosts + known_hosts_file = Tempfile.new('gitlab-shell-known-hosts') + known_hosts_file.chmod(0o400) + known_hosts_file.write(known_hosts) + known_hosts_file.close + + options['StrictHostKeyChecking'] = 'yes' + options['UserKnownHostsFile'] = known_hosts_file.path + end + + return yield({}) if options.empty? + + script = Tempfile.new('gitlab-shell-ssh-wrapper') + script.chmod(0o755) + script.write(custom_ssh_script(options)) + script.close + + yield('GIT_SSH' => script.path) + ensure + key_file&.close! + known_hosts_file&.close! + script&.close! + end + end + end +end diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index 0e0a1987c7d..c4c6ed7b619 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -39,10 +39,31 @@ module Gitlab repo = Rugged::Repository.init_at(repo_path, bare) repo.close - if symlink_hooks_to.present? - hooks_path = File.join(repo_path, 'hooks') - FileUtils.rm_rf(hooks_path) - FileUtils.ln_s(symlink_hooks_to, hooks_path) + create_hooks(repo_path, symlink_hooks_to) if symlink_hooks_to.present? + + true + end + + def create_hooks(repo_path, global_hooks_path) + local_hooks_path = File.join(repo_path, 'hooks') + real_local_hooks_path = :not_found + + begin + real_local_hooks_path = File.realpath(local_hooks_path) + rescue Errno::ENOENT + # real_local_hooks_path == :not_found + end + + # Do nothing if hooks already exist + unless real_local_hooks_path == File.realpath(global_hooks_path) + # Move the existing hooks somewhere safe + FileUtils.mv( + local_hooks_path, + "#{local_hooks_path}.old.#{Time.now.to_i}" + ) if File.exist?(local_hooks_path) + + # Create the hooks symlink + FileUtils.ln_sf(global_hooks_path, local_hooks_path) end true diff --git a/lib/gitlab/shell.rb b/lib/gitlab/shell.rb index a22a63665be..9cdd3d22f18 100644 --- a/lib/gitlab/shell.rb +++ b/lib/gitlab/shell.rb @@ -66,7 +66,7 @@ module Gitlab # Init new repository # # storage - project's storage name - # name - project path with namespace + # name - project disk path # # Ex. # add_repository("/path/to/storage", "gitlab/gitlab-ci") @@ -94,23 +94,28 @@ module Gitlab # Import repository # # storage - project's storage path - # name - project path with namespace + # name - project disk path + # url - URL to import from # # Ex. - # import_repository("/path/to/storage", "gitlab/gitlab-ci", "https://github.com/randx/six.git") + # import_repository("/path/to/storage", "gitlab/gitlab-ci", "https://gitlab.com/gitlab-org/gitlab-test.git") # # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/387 def import_repository(storage, name, url) # The timeout ensures the subprocess won't hang forever - cmd = [gitlab_shell_projects_path, 'import-project', - storage, "#{name}.git", url, "#{Gitlab.config.gitlab_shell.git_timeout}"] - gitlab_shell_fast_execute_raise_error(cmd) + cmd = gitlab_projects(storage, "#{name}.git") + success = cmd.import_project(url, git_timeout) + + raise Error, cmd.output unless success + + success end # Fetch remote for repository # # repository - an instance of Git::Repository # remote - remote name + # ssh_auth - SSH known_hosts data and a private key to use for public-key authentication # forced - should we use --force flag? # no_tags - should we use --no-tags flag? # @@ -131,16 +136,15 @@ module Gitlab # Move repository # storage - project's storage path - # path - project path with namespace - # new_path - new project path with namespace + # path - project disk path + # new_path - new project disk path # # Ex. # mv_repository("/path/to/storage", "gitlab/gitlab-ci", "randx/gitlab-ci-new") # # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/387 def mv_repository(storage, path, new_path) - gitlab_shell_fast_execute([gitlab_shell_projects_path, 'mv-project', - storage, "#{path}.git", "#{new_path}.git"]) + gitlab_projects(storage, "#{path}.git").mv_project("#{new_path}.git") end # Fork repository to new path @@ -154,30 +158,21 @@ module Gitlab # # Gitaly note: JV: not easy to migrate because this involves two Gitaly servers, not one. def fork_repository(forked_from_storage, forked_from_disk_path, forked_to_storage, forked_to_disk_path) - gitlab_shell_fast_execute( - [ - gitlab_shell_projects_path, - 'fork-repository', - forked_from_storage, - "#{forked_from_disk_path}.git", - forked_to_storage, - "#{forked_to_disk_path}.git" - ] - ) + gitlab_projects(forked_from_storage, "#{forked_from_disk_path}.git") + .fork_repository(forked_to_storage, "#{forked_to_disk_path}.git") end # Remove repository from file system # # storage - project's storage path - # name - project path with namespace + # name - project disk path # # Ex. # remove_repository("/path/to/storage", "gitlab/gitlab-ci") # # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/387 def remove_repository(storage, name) - gitlab_shell_fast_execute([gitlab_shell_projects_path, - 'rm-project', storage, "#{name}.git"]) + gitlab_projects(storage, "#{name}.git").rm_project end # Add new key to gitlab-shell @@ -311,6 +306,47 @@ module Gitlab end end + # Push branch to remote repository + # + # storage - project's storage path + # project_name - project's disk path + # remote_name - remote name + # branch_names - remote branch names to push + # forced - should we use --force flag + # + # Ex. + # push_remote_branches('/path/to/storage', 'gitlab-org/gitlab-test' 'upstream', ['feature']) + # + def push_remote_branches(storage, project_name, remote_name, branch_names, forced: true) + cmd = gitlab_projects(storage, "#{project_name}.git") + + success = cmd.push_branches(remote_name, git_timeout, forced, branch_names) + + raise Error, cmd.output unless success + + success + end + + # Delete branch from remote repository + # + # storage - project's storage path + # project_name - project's disk path + # remote_name - remote name + # branch_names - remote branch names + # + # Ex. + # delete_remote_branches('/path/to/storage', 'gitlab-org/gitlab-test', 'upstream', ['feature']) + # + def delete_remote_branches(storage, project_name, remote_name, branch_names) + cmd = gitlab_projects(storage, "#{project_name}.git") + + success = cmd.delete_remote_branches(remote_name, branch_names) + + raise Error, cmd.output unless success + + success + end + protected def gitlab_shell_path @@ -341,24 +377,35 @@ module Gitlab private - def local_fetch_remote(storage, name, remote, ssh_auth: nil, forced: false, no_tags: false) - args = [gitlab_shell_projects_path, 'fetch-remote', storage, name, remote, "#{Gitlab.config.gitlab_shell.git_timeout}"] - args << '--force' if forced - args << '--no-tags' if no_tags + def gitlab_projects(shard_path, disk_path) + Gitlab::Git::GitlabProjects.new( + shard_path, + disk_path, + global_hooks_path: Gitlab.config.gitlab_shell.hooks_path, + logger: Rails.logger + ) + end - vars = {} + def local_fetch_remote(storage_path, repository_relative_path, remote, ssh_auth: nil, forced: false, no_tags: false) + vars = { force: forced, tags: !no_tags } if ssh_auth&.ssh_import? if ssh_auth.ssh_key_auth? && ssh_auth.ssh_private_key.present? - vars['GITLAB_SHELL_SSH_KEY'] = ssh_auth.ssh_private_key + vars[:ssh_key] = ssh_auth.ssh_private_key end if ssh_auth.ssh_known_hosts.present? - vars['GITLAB_SHELL_KNOWN_HOSTS'] = ssh_auth.ssh_known_hosts + vars[:known_hosts] = ssh_auth.ssh_known_hosts end end - gitlab_shell_fast_execute_raise_error(args, vars) + cmd = gitlab_projects(storage_path, repository_relative_path) + + success = cmd.fetch_remote(remote, git_timeout, vars) + + raise Error, cmd.output unless success + + success end def gitlab_shell_fast_execute(cmd) @@ -394,6 +441,10 @@ module Gitlab Gitlab::GitalyClient::NamespaceService.new(storage) end + def git_timeout + Gitlab.config.gitlab_shell.git_timeout + end + def gitaly_migrate(method, &block) Gitlab::GitalyClient.migrate(method, &block) rescue GRPC::NotFound, GRPC::BadStatus => e -- cgit v1.2.1