From 8ef0a9d5cae654eb6d5515f776bf7eb519327915 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Thu, 14 Mar 2019 17:20:40 +1300 Subject: Enrich commits with full data in CommitCollection Allow incomplete commit records to load their full data from gitaly. Commits can be based on a Hash of data retrieved from PostgreSQL, and this data can be intentionally incomplete in order to save space. A new method #gitaly? has been added to Gitlab::Git::Commit, which returns true if the underlying data source of the Commit is a Gitaly::GitCommit. CommitCollection now has a method #enrich which replaces non-gitaly commits in place with commits from gitaly. CommitCollection#without_merge_commits has been updated to call this method, as in order to determine a merge commit we need to have parent data. Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/58805 --- app/models/commit_collection.rb | 31 +++++++- lib/gitlab/git/commit.rb | 6 +- spec/lib/gitlab/git/commit_spec.rb | 12 +++ spec/models/commit_collection_spec.rb | 86 +++++++++++++++++++++- spec/models/merge_request_spec.rb | 33 ++++----- .../merge_request_widget_entity_spec.rb | 15 ++-- 6 files changed, 154 insertions(+), 29 deletions(-) diff --git a/app/models/commit_collection.rb b/app/models/commit_collection.rb index a9a2e9c81eb..f4e64abf362 100644 --- a/app/models/commit_collection.rb +++ b/app/models/commit_collection.rb @@ -28,10 +28,39 @@ class CommitCollection def without_merge_commits strong_memoize(:without_merge_commits) do - commits.reject(&:merge_commit?) + # `#enrich!` the collection to ensure all commits contain + # the necessary parent data + enrich!.commits.reject(&:merge_commit?) end end + def unenriched + commits.reject(&:gitaly_commit?) + end + + def fully_enriched? + unenriched.empty? + end + + # Batch load any commits that are not backed by full gitaly data, and + # replace them in the collection. + def enrich! + return self if fully_enriched? + + # Batch load full Commits from the repository + # and map to a Hash of id => Commit + replacements = Hash[unenriched.map do |c| + [c.id, Commit.lazy(project, c.id)] + end.compact] + + # Replace the commits, keeping the same order + @commits = @commits.map do |c| + replacements.fetch(c.id, c) + end + + self + end + # Sets the pipeline status for every commit. # # Setting this status ahead of time removes the need for running a query for diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb index e5bbd500e98..798deb1be4a 100644 --- a/lib/gitlab/git/commit.rb +++ b/lib/gitlab/git/commit.rb @@ -311,6 +311,10 @@ module Gitlab parent_ids.size > 1 end + def gitaly_commit? + raw_commit.is_a?(Gitaly::GitCommit) + end + def tree_entry(path) return unless path.present? @@ -333,7 +337,7 @@ module Gitlab end def to_gitaly_commit - return raw_commit if raw_commit.is_a?(Gitaly::GitCommit) + return raw_commit if gitaly_commit? message_split = raw_commit.message.split("\n", 2) Gitaly::GitCommit.new( diff --git a/spec/lib/gitlab/git/commit_spec.rb b/spec/lib/gitlab/git/commit_spec.rb index 3fb41a626b2..4a4ac833e39 100644 --- a/spec/lib/gitlab/git/commit_spec.rb +++ b/spec/lib/gitlab/git/commit_spec.rb @@ -537,6 +537,18 @@ describe Gitlab::Git::Commit, :seed_helper do end end + describe '#gitaly_commit?' do + context 'when the commit data comes from gitaly' do + it { expect(commit.gitaly_commit?).to eq(true) } + end + + context 'when the commit data comes from a Hash' do + let(:commit) { described_class.new(repository, sample_commit_hash) } + + it { expect(commit.gitaly_commit?).to eq(false) } + end + end + describe '#has_zero_stats?' do it { expect(commit.has_zero_stats?).to eq(false) } end diff --git a/spec/models/commit_collection_spec.rb b/spec/models/commit_collection_spec.rb index 0f5d03ff458..30c504ebea8 100644 --- a/spec/models/commit_collection_spec.rb +++ b/spec/models/commit_collection_spec.rb @@ -37,12 +37,92 @@ describe CommitCollection do describe '#without_merge_commits' do it 'returns all commits except merge commits' do + merge_commit = project.commit("60ecb67744cb56576c30214ff52294f8ce2def98") + expect(merge_commit).to receive(:merge_commit?).and_return(true) + collection = described_class.new(project, [ - build(:commit), - build(:commit, :merge_commit) + commit, + merge_commit ]) - expect(collection.without_merge_commits.size).to eq(1) + expect(collection.without_merge_commits).to contain_exactly(commit) + end + end + + describe 'enrichment methods' do + let(:gitaly_commit) { commit } + let(:hash_commit) { Commit.from_hash(gitaly_commit.to_hash, project) } + + describe '#unenriched' do + it 'returns all commits that are not backed by gitaly data' do + collection = described_class.new(project, [gitaly_commit, hash_commit]) + + expect(collection.unenriched).to contain_exactly(hash_commit) + end + end + + describe '#fully_enriched?' do + it 'returns true when all commits are backed by gitaly data' do + collection = described_class.new(project, [gitaly_commit, gitaly_commit]) + + expect(collection.fully_enriched?).to eq(true) + end + + it 'returns false when any commits are not backed by gitaly data' do + collection = described_class.new(project, [gitaly_commit, hash_commit]) + + expect(collection.fully_enriched?).to eq(false) + end + + it 'returns true when the collection is empty' do + collection = described_class.new(project, []) + + expect(collection.fully_enriched?).to eq(true) + end + end + + describe '#enrich!' do + it 'replaces commits in the collection with those backed by gitaly data' do + collection = described_class.new(project, [hash_commit]) + + collection.enrich! + + new_commit = collection.commits.first + expect(new_commit.id).to eq(hash_commit.id) + expect(hash_commit.gitaly_commit?).to eq(false) + expect(new_commit.gitaly_commit?).to eq(true) + end + + it 'maintains the original order of the commits' do + gitaly_commits = [gitaly_commit] * 3 + hash_commits = [hash_commit] * 3 + # Interleave the gitaly and hash commits together + original_commits = gitaly_commits.zip(hash_commits).flatten + collection = described_class.new(project, original_commits) + + collection.enrich! + + original_commits.each_with_index do |original_commit, i| + new_commit = collection.commits[i] + expect(original_commit.id).to eq(new_commit.id) + end + end + + it 'fetches data if there are unenriched commits' do + collection = described_class.new(project, [hash_commit]) + + expect(Commit).to receive(:lazy).exactly(:once) + + collection.enrich! + end + + it 'does not fetch data if all commits are enriched' do + collection = described_class.new(project, [gitaly_commit]) + + expect(Commit).not_to receive(:lazy) + + collection.enrich! + end end end diff --git a/spec/models/merge_request_spec.rb b/spec/models/merge_request_spec.rb index 42c49e330cc..c487ae45ceb 100644 --- a/spec/models/merge_request_spec.rb +++ b/spec/models/merge_request_spec.rb @@ -84,32 +84,27 @@ describe MergeRequest do describe '#default_squash_commit_message' do let(:project) { subject.project } - - def commit_collection(commit_hashes) - raw_commits = commit_hashes.map { |raw| Commit.from_hash(raw, project) } - - CommitCollection.new(project, raw_commits) - end + let(:is_multiline) { -> (c) { c.description.present? } } + let(:multiline_commits) { subject.commits.select(&is_multiline) } + let(:singleline_commits) { subject.commits.reject(&is_multiline) } it 'returns the oldest multiline commit message' do - commits = commit_collection([ - { message: 'Singleline', parent_ids: [] }, - { message: "Second multiline\nCommit message", parent_ids: [] }, - { message: "First multiline\nCommit message", parent_ids: [] } - ]) - - expect(subject).to receive(:commits).and_return(commits) - - expect(subject.default_squash_commit_message).to eq("First multiline\nCommit message") + expect(subject.default_squash_commit_message).to eq(multiline_commits.last.message) end it 'returns the merge request title if there are no multiline commits' do - commits = commit_collection([ - { message: 'Singleline', parent_ids: [] } - ]) + expect(subject).to receive(:commits).and_return( + CommitCollection.new(project, singleline_commits) + ) + + expect(subject.default_squash_commit_message).to eq(subject.title) + end - expect(subject).to receive(:commits).and_return(commits) + it 'does not return commit messages from multiline merge commits' do + collection = CommitCollection.new(project, multiline_commits).enrich! + expect(collection.commits).to all( receive(:merge_commit?).and_return(true) ) + expect(subject).to receive(:commits).and_return(collection) expect(subject.default_squash_commit_message).to eq(subject.title) end end diff --git a/spec/serializers/merge_request_widget_entity_spec.rb b/spec/serializers/merge_request_widget_entity_spec.rb index 4dbd79f2fc0..727fd8951f2 100644 --- a/spec/serializers/merge_request_widget_entity_spec.rb +++ b/spec/serializers/merge_request_widget_entity_spec.rb @@ -279,13 +279,18 @@ describe MergeRequestWidgetEntity do end describe 'commits_without_merge_commits' do + def find_matching_commit(short_id) + resource.commits.find { |c| c.short_id == short_id } + end + it 'should not include merge commits' do - # Mock all but the first 5 commits to be merge commits - resource.commits.each_with_index do |commit, i| - expect(commit).to receive(:merge_commit?).at_least(:once).and_return(i > 4) - end + commits_in_widget = subject[:commits_without_merge_commits] - expect(subject[:commits_without_merge_commits].size).to eq(5) + expect(commits_in_widget.length).to be < resource.commits.length + expect(commits_in_widget.length).to eq(resource.commits.without_merge_commits.length) + commits_in_widget.each do |c| + expect(find_matching_commit(c[:short_id]).merge_commit?).to eq(false) + end end end end -- cgit v1.2.1 From 0420c2393beff2dcf045c9eff12ec44b13759c14 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Fri, 15 Mar 2019 11:49:28 +1300 Subject: Scope out merge commits in MergeRequest spec Previously the code for excluding merge commits from the commit collection (CommitCollection#without_merge_commits) was not working when the commits had come from a merge request. Now that this has been fixed, these tests fails. They should always have been written to exclude merge commits when comparing. --- spec/models/merge_request_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/models/merge_request_spec.rb b/spec/models/merge_request_spec.rb index c487ae45ceb..fad73613989 100644 --- a/spec/models/merge_request_spec.rb +++ b/spec/models/merge_request_spec.rb @@ -1040,7 +1040,7 @@ describe MergeRequest do describe '#commit_authors' do it 'returns all the authors of every commit in the merge request' do - users = subject.commits.map(&:author_email).uniq.map do |email| + users = subject.commits.without_merge_commits.map(&:author_email).uniq.map do |email| create(:user, email: email) end @@ -1054,7 +1054,7 @@ describe MergeRequest do describe '#authors' do it 'returns a list with all the commit authors in the merge request and author' do - users = subject.commits.map(&:author_email).uniq.map do |email| + users = subject.commits.without_merge_commits.map(&:author_email).uniq.map do |email| create(:user, email: email) end -- cgit v1.2.1 From 5d4e02f17318a21a5c2b4fc588cad066a4cf7e59 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Wed, 20 Mar 2019 12:07:51 +1300 Subject: Handle blank projects in CommitCollection#enrich! A project is needed in order to fetch data from gitaly. Projects can be absent from commits in certain rare situations (like when viewing a MR of a deleted fork). In these cases, assume that the enriched data is not needed. See this comment: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/26144#note_152191106 "It's led to a dilemma about where to "fix" this in code. I'm going to fix it by allowing CommitCollection#enrich! to just return unenriched commits when a project is missing, essentially "silently failing". I hope this is the right decision. It's going with the assumption that calls in these situations in the future are not needing the full data. The alternative would be to allow CommitCollection#enrich! to still error, but handle it in the methods that call #enrich!, however that might lead to brittleness in future when working with project-less MRs." --- app/models/commit_collection.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/models/commit_collection.rb b/app/models/commit_collection.rb index f4e64abf362..52524456439 100644 --- a/app/models/commit_collection.rb +++ b/app/models/commit_collection.rb @@ -45,7 +45,11 @@ class CommitCollection # Batch load any commits that are not backed by full gitaly data, and # replace them in the collection. def enrich! - return self if fully_enriched? + # A project is needed in order to fetch data from gitaly. Projects + # can be absent from commits in certain rare situations (like when + # viewing a MR of a deleted fork). In these cases, assume that the + # enriched data is not needed. + return self if project.blank? || fully_enriched? # Batch load full Commits from the repository # and map to a Hash of id => Commit -- cgit v1.2.1