summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--app/services/git/branch_hooks_service.rb25
-rw-r--r--app/workers/process_commit_worker.rb17
-rw-r--r--changelogs/unreleased/dm-process-commit-worker-n-1.yml5
-rw-r--r--spec/services/git/branch_hooks_service_spec.rb70
-rw-r--r--spec/workers/process_commit_worker_spec.rb40
5 files changed, 90 insertions, 67 deletions
diff --git a/app/services/git/branch_hooks_service.rb b/app/services/git/branch_hooks_service.rb
index 431a5aedf2e..d2b037a680c 100644
--- a/app/services/git/branch_hooks_service.rb
+++ b/app/services/git/branch_hooks_service.rb
@@ -83,8 +83,16 @@ module Git
# Schedules processing of commit messages
def enqueue_process_commit_messages
- limited_commits.each do |commit|
- next unless commit.matches_cross_reference_regex?
+ referencing_commits = limited_commits.select(&:matches_cross_reference_regex?)
+
+ upstream_commit_ids = upstream_commit_ids(referencing_commits)
+
+ referencing_commits.each do |commit|
+ # Avoid reprocessing commits that already exist upstream if the project
+ # is a fork. This will prevent duplicated/superfluous system notes on
+ # mentionables referenced by a commit that is pushed to the upstream,
+ # that is then also pushed to forks when these get synced by users.
+ next if upstream_commit_ids.include?(commit.id)
ProcessCommitWorker.perform_async(
project.id,
@@ -142,5 +150,18 @@ module Git
def branch_name
strong_memoize(:branch_name) { Gitlab::Git.ref_name(params[:ref]) }
end
+
+ def upstream_commit_ids(commits)
+ set = Set.new
+
+ upstream_project = project.fork_source
+ if upstream_project
+ upstream_project
+ .commits_by(oids: commits.map(&:id))
+ .each { |commit| set << commit.id }
+ end
+
+ set
+ end
end
end
diff --git a/app/workers/process_commit_worker.rb b/app/workers/process_commit_worker.rb
index 3efb5343a96..f6ebe4ab006 100644
--- a/app/workers/process_commit_worker.rb
+++ b/app/workers/process_commit_worker.rb
@@ -2,7 +2,8 @@
# Worker for processing individual commit messages pushed to a repository.
#
-# Jobs for this worker are scheduled for every commit that is being pushed. As a
+# Jobs for this worker are scheduled for every commit that contains mentionable
+# references in its message and does not exist in the upstream project. As a
# result of this the workload of this worker should be kept to a bare minimum.
# Consider using an extra worker if you need to add any extra (and potentially
# slow) processing of commits.
@@ -19,7 +20,6 @@ class ProcessCommitWorker
project = Project.find_by(id: project_id)
return unless project
- return if commit_exists_in_upstream?(project, commit_hash)
user = User.find_by(id: user_id)
@@ -77,17 +77,4 @@ class ProcessCommitWorker
Commit.from_hash(hash, project)
end
-
- private
-
- # Avoid reprocessing commits that already exist in the upstream
- # when project is forked. This will also prevent duplicated system notes.
- def commit_exists_in_upstream?(project, commit_hash)
- upstream_project = project.fork_source
-
- return false unless upstream_project
-
- commit_id = commit_hash.with_indifferent_access[:id]
- upstream_project.commit(commit_id).present?
- end
end
diff --git a/changelogs/unreleased/dm-process-commit-worker-n-1.yml b/changelogs/unreleased/dm-process-commit-worker-n-1.yml
new file mode 100644
index 00000000000..0bd7de6730a
--- /dev/null
+++ b/changelogs/unreleased/dm-process-commit-worker-n-1.yml
@@ -0,0 +1,5 @@
+---
+title: Look up upstream commits once before queuing ProcessCommitWorkers
+merge_request:
+author:
+type: performance
diff --git a/spec/services/git/branch_hooks_service_spec.rb b/spec/services/git/branch_hooks_service_spec.rb
index 3929f51a0e2..2bf7dc32436 100644
--- a/spec/services/git/branch_hooks_service_spec.rb
+++ b/spec/services/git/branch_hooks_service_spec.rb
@@ -4,6 +4,7 @@ require 'spec_helper'
describe Git::BranchHooksService do
include RepoHelpers
+ include ProjectForksHelper
let(:project) { create(:project, :repository) }
let(:user) { project.creator }
@@ -272,10 +273,10 @@ describe Git::BranchHooksService do
end
describe 'Processing commit messages' do
- # Create 4 commits, 2 of which have references. Limiting to 2 commits, we
- # expect to see one commit message processor enqueued.
- let(:commit_ids) do
- Array.new(4) do |i|
+ # Create 6 commits, 3 of which have references. Limiting to 4 commits, we
+ # expect to see two commit message processors enqueued.
+ let!(:commit_ids) do
+ Array.new(6) do |i|
message = "Issue #{'#' if i.even?}#{i}"
project.repository.update_file(
user, 'README.md', '', message: message, branch_name: branch
@@ -283,18 +284,18 @@ describe Git::BranchHooksService do
end
end
- let(:oldrev) { commit_ids.first }
+ let(:oldrev) { project.commit(commit_ids.first).parent_id }
let(:newrev) { commit_ids.last }
before do
- stub_const("::Git::BaseHooksService::PROCESS_COMMIT_LIMIT", 2)
+ stub_const("::Git::BaseHooksService::PROCESS_COMMIT_LIMIT", 4)
end
context 'creating the default branch' do
let(:oldrev) { Gitlab::Git::BLANK_SHA }
it 'processes a limited number of commit messages' do
- expect(ProcessCommitWorker).to receive(:perform_async).once
+ expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
end
@@ -302,7 +303,7 @@ describe Git::BranchHooksService do
context 'updating the default branch' do
it 'processes a limited number of commit messages' do
- expect(ProcessCommitWorker).to receive(:perform_async).once
+ expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
end
@@ -323,7 +324,7 @@ describe Git::BranchHooksService do
let(:oldrev) { Gitlab::Git::BLANK_SHA }
it 'processes a limited number of commit messages' do
- expect(ProcessCommitWorker).to receive(:perform_async).once
+ expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
end
@@ -333,7 +334,7 @@ describe Git::BranchHooksService do
let(:branch) { 'fix' }
it 'processes a limited number of commit messages' do
- expect(ProcessCommitWorker).to receive(:perform_async).once
+ expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
end
@@ -349,6 +350,55 @@ describe Git::BranchHooksService do
service.execute
end
end
+
+ context 'when the project is forked' do
+ let(:upstream_project) { project }
+ let(:forked_project) { fork_project(upstream_project, user, repository: true) }
+
+ let!(:forked_service) do
+ described_class.new(forked_project, user, oldrev: oldrev, newrev: newrev, ref: ref)
+ end
+
+ context 'when commits already exists in the upstream project' do
+ it 'does not process commit messages' do
+ expect(ProcessCommitWorker).not_to receive(:perform_async)
+
+ forked_service.execute
+ end
+ end
+
+ context 'when a commit does not exist in the upstream repo' do
+ # On top of the existing 6 commits, 3 of which have references,
+ # create 2 more, 1 of which has a reference. Limiting to 4 commits, we
+ # expect to see one commit message processor enqueued.
+ let!(:forked_commit_ids) do
+ Array.new(2) do |i|
+ message = "Issue #{'#' if i.even?}#{i}"
+ forked_project.repository.update_file(
+ user, 'README.md', '', message: message, branch_name: branch
+ )
+ end
+ end
+
+ let(:newrev) { forked_commit_ids.last }
+
+ it 'processes the commit message' do
+ expect(ProcessCommitWorker).to receive(:perform_async).once
+
+ forked_service.execute
+ end
+ end
+
+ context 'when the upstream project no longer exists' do
+ it 'processes the commit messages' do
+ upstream_project.destroy!
+
+ expect(ProcessCommitWorker).to receive(:perform_async).twice
+
+ forked_service.execute
+ end
+ end
+ end
end
describe 'New branch detection' do
diff --git a/spec/workers/process_commit_worker_spec.rb b/spec/workers/process_commit_worker_spec.rb
index 47bac63511e..eb1d3c364ac 100644
--- a/spec/workers/process_commit_worker_spec.rb
+++ b/spec/workers/process_commit_worker_spec.rb
@@ -3,8 +3,6 @@
require 'spec_helper'
describe ProcessCommitWorker do
- include ProjectForksHelper
-
let(:worker) { described_class.new }
let(:user) { create(:user) }
let(:project) { create(:project, :public, :repository) }
@@ -35,44 +33,6 @@ describe ProcessCommitWorker do
worker.perform(project.id, user.id, commit.to_hash)
end
-
- context 'when the project is forked' do
- context 'when commit already exists in the upstream project' do
- it 'does not process the commit message' do
- forked = fork_project(project, user, repository: true)
-
- expect(worker).not_to receive(:process_commit_message)
-
- worker.perform(forked.id, user.id, forked.commit.to_hash)
- end
- end
-
- context 'when the commit does not exist in the upstream project' do
- it 'processes the commit message' do
- empty_project = create(:project, :public)
- forked = fork_project(empty_project, user, repository: true)
-
- TestEnv.copy_repo(forked,
- bare_repo: TestEnv.factory_repo_path_bare,
- refs: TestEnv::BRANCH_SHA)
-
- expect(worker).to receive(:process_commit_message)
-
- worker.perform(forked.id, user.id, forked.commit.to_hash)
- end
- end
-
- context 'when the upstream project no longer exists' do
- it 'processes the commit message' do
- forked = fork_project(project, user, repository: true)
- project.destroy!
-
- expect(worker).to receive(:process_commit_message)
-
- worker.perform(forked.id, user.id, forked.commit.to_hash)
- end
- end
- end
end
describe '#process_commit_message' do