diff options
author | Yorick Peterse <yorickpeterse@gmail.com> | 2018-08-17 14:33:27 +0000 |
---|---|---|
committer | Yorick Peterse <yorickpeterse@gmail.com> | 2018-08-17 14:33:27 +0000 |
commit | a58d0a0182d06efbbde57821e71c305518325d6f (patch) | |
tree | 4caf920f90d558f57fd2e858ecb709bc3fa177de | |
parent | c2f14af75e9b65ae8d88d3d025ea63da7bd2106b (diff) | |
parent | 729de4f1ba04f1a9a44b7f3f3b9bd5fb9165e4ca (diff) | |
download | gitlab-ce-a58d0a0182d06efbbde57821e71c305518325d6f.tar.gz |
Merge branch 'ab-49754-gh-importer-internal-ids' into 'master'
GitHub importer: Keep track of internal_ids
Closes #49754
See merge request gitlab-org/gitlab-ce!20926
12 files changed, 257 insertions, 8 deletions
diff --git a/app/models/internal_id.rb b/app/models/internal_id.rb index 4eb211eff61..e7168d49db9 100644 --- a/app/models/internal_id.rb +++ b/app/models/internal_id.rb @@ -111,7 +111,7 @@ class InternalId < ActiveRecord::Base # Generates next internal id and returns it def generate - subject.transaction do + InternalId.transaction do # Create a record in internal_ids if one does not yet exist # and increment its last value # @@ -125,7 +125,7 @@ class InternalId < ActiveRecord::Base # # Note this will acquire a ROW SHARE lock on the InternalId record def track_greatest(new_value) - subject.transaction do + InternalId.transaction do (lookup || create_record).track_greatest_and_save!(new_value) end end @@ -148,7 +148,7 @@ class InternalId < ActiveRecord::Base # violation. We can safely roll-back the nested transaction and perform # a lookup instead to retrieve the record. def create_record - subject.transaction(requires_new: true) do + InternalId.transaction(requires_new: true) do InternalId.create!( **scope, usage: usage_value, diff --git a/changelogs/unreleased/ab-49446-internal-ids-inconsistency.yml b/changelogs/unreleased/ab-49446-internal-ids-inconsistency.yml new file mode 100644 index 00000000000..bfea57d79e0 --- /dev/null +++ b/changelogs/unreleased/ab-49446-internal-ids-inconsistency.yml @@ -0,0 +1,5 @@ +--- +title: Add migration to cleanup internal_ids inconsistency. +merge_request: 20926 +author: +type: fixed diff --git a/db/post_migrate/20180723130817_delete_inconsistent_internal_id_records.rb b/db/post_migrate/20180723130817_delete_inconsistent_internal_id_records.rb new file mode 100644 index 00000000000..3b9b95ec9ca --- /dev/null +++ b/db/post_migrate/20180723130817_delete_inconsistent_internal_id_records.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true +class DeleteInconsistentInternalIdRecords < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + # This migration cleans up any inconsistent records in internal_ids. + # + # That is, it deletes records that track a `last_value` that is + # smaller than the maximum internal id (usually `iid`) found in + # the corresponding model records. + + def up + disable_statement_timeout do + delete_internal_id_records('issues', 'project_id') + delete_internal_id_records('merge_requests', 'project_id', 'target_project_id') + delete_internal_id_records('deployments', 'project_id') + delete_internal_id_records('milestones', 'project_id') + delete_internal_id_records('milestones', 'namespace_id', 'group_id') + delete_internal_id_records('ci_pipelines', 'project_id') + end + end + + class InternalId < ActiveRecord::Base + self.table_name = 'internal_ids' + enum usage: { issues: 0, merge_requests: 1, deployments: 2, milestones: 3, epics: 4, ci_pipelines: 5 } + end + + private + + def delete_internal_id_records(base_table, scope_column_name, base_scope_column_name = scope_column_name) + sql = <<~SQL + SELECT id FROM ( -- workaround for MySQL + SELECT internal_ids.id FROM ( + SELECT #{base_scope_column_name} AS #{scope_column_name}, max(iid) as maximum_iid from #{base_table} GROUP BY #{scope_column_name} + ) maxima JOIN internal_ids USING (#{scope_column_name}) + WHERE internal_ids.usage=#{InternalId.usages.fetch(base_table)} AND maxima.maximum_iid > internal_ids.last_value + ) internal_ids + SQL + + InternalId.where("id IN (#{sql})").tap do |ids| # rubocop:disable GitlabSecurity/SqlInjection + say "Deleting internal_id records for #{base_table}: #{ids.pluck(:project_id, :last_value)}" unless ids.empty? + end.delete_all + end +end diff --git a/lib/gitlab/github_import/bulk_importing.rb b/lib/gitlab/github_import/bulk_importing.rb index 147597289cf..da2f96b5c4b 100644 --- a/lib/gitlab/github_import/bulk_importing.rb +++ b/lib/gitlab/github_import/bulk_importing.rb @@ -15,10 +15,12 @@ module Gitlab end # Bulk inserts the given rows into the database. - def bulk_insert(model, rows, batch_size: 100) + def bulk_insert(model, rows, batch_size: 100, pre_hook: nil) rows.each_slice(batch_size) do |slice| + pre_hook.call(slice) if pre_hook Gitlab::Database.bulk_insert(model.table_name, slice) end + rows end end end diff --git a/lib/gitlab/github_import/importer/issue_importer.rb b/lib/gitlab/github_import/importer/issue_importer.rb index 31fefebf787..ead4215810f 100644 --- a/lib/gitlab/github_import/importer/issue_importer.rb +++ b/lib/gitlab/github_import/importer/issue_importer.rb @@ -55,7 +55,11 @@ module Gitlab updated_at: issue.updated_at } - GithubImport.insert_and_return_id(attributes, project.issues) + GithubImport.insert_and_return_id(attributes, project.issues).tap do |id| + # We use .insert_and_return_id which effectively disables all callbacks. + # Trigger iid logic here to make sure we track internal id values consistently. + project.issues.find(id).ensure_project_iid! + end rescue ActiveRecord::InvalidForeignKey # It's possible the project has been deleted since scheduling this # job. In this case we'll just skip creating the issue. diff --git a/lib/gitlab/github_import/importer/milestones_importer.rb b/lib/gitlab/github_import/importer/milestones_importer.rb index c53480e828a..94eb9136b9a 100644 --- a/lib/gitlab/github_import/importer/milestones_importer.rb +++ b/lib/gitlab/github_import/importer/milestones_importer.rb @@ -17,10 +17,20 @@ module Gitlab end def execute - bulk_insert(Milestone, build_milestones) + # We insert records in bulk, by-passing any standard model callbacks. + # The pre_hook here makes sure we track internal ids consistently. + # Note this has to be called before performing an insert of a batch + # because we're outside a transaction scope here. + bulk_insert(Milestone, build_milestones, pre_hook: method(:track_greatest_iid)) build_milestones_cache end + def track_greatest_iid(slice) + greatest_iid = slice.max { |e| e[:iid] }[:iid] + + InternalId.track_greatest(nil, { project: project }, :milestones, greatest_iid, ->(_) { project.milestones.maximum(:iid) }) + end + def build_milestones build_database_rows(each_milestone) end diff --git a/lib/gitlab/github_import/importer/pull_request_importer.rb b/lib/gitlab/github_import/importer/pull_request_importer.rb index 6b3688c4381..e4b49d2143a 100644 --- a/lib/gitlab/github_import/importer/pull_request_importer.rb +++ b/lib/gitlab/github_import/importer/pull_request_importer.rb @@ -76,7 +76,13 @@ module Gitlab merge_request_id = GithubImport .insert_and_return_id(attributes, project.merge_requests) - [project.merge_requests.find(merge_request_id), false] + merge_request = project.merge_requests.find(merge_request_id) + + # We use .insert_and_return_id which effectively disables all callbacks. + # Trigger iid logic here to make sure we track internal id values consistently. + merge_request.ensure_target_project_iid! + + [merge_request, false] end rescue ActiveRecord::InvalidForeignKey # It's possible the project has been deleted since scheduling this diff --git a/spec/lib/gitlab/github_import/bulk_importing_spec.rb b/spec/lib/gitlab/github_import/bulk_importing_spec.rb index 91229d9c7d4..861710f7e9b 100644 --- a/spec/lib/gitlab/github_import/bulk_importing_spec.rb +++ b/spec/lib/gitlab/github_import/bulk_importing_spec.rb @@ -58,5 +58,17 @@ describe Gitlab::GithubImport::BulkImporting do importer.bulk_insert(model, rows, batch_size: 5) end + + it 'calls pre_hook for each slice if given' do + rows = [{ title: 'Foo' }] * 10 + model = double(:model, table_name: 'kittens') + pre_hook = double('pre_hook', call: nil) + allow(Gitlab::Database).to receive(:bulk_insert) + + expect(pre_hook).to receive(:call).with(rows[0..4]) + expect(pre_hook).to receive(:call).with(rows[5..9]) + + importer.bulk_insert(model, rows, batch_size: 5, pre_hook: pre_hook) + end end end diff --git a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb index 81fe97c1e49..3f7a12144d5 100644 --- a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb @@ -78,6 +78,11 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach .to receive(:id_for) .with(issue) .and_return(milestone.id) + + allow(importer.user_finder) + .to receive(:author_id_for) + .with(issue) + .and_return([user.id, true]) end context 'when the issue author could be found' do @@ -172,6 +177,23 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach expect(importer.create_issue).to be_a_kind_of(Numeric) end + + it 'triggers internal_id functionality to track greatest iids' do + allow(importer.user_finder) + .to receive(:author_id_for) + .with(issue) + .and_return([user.id, true]) + + issue = build_stubbed(:issue, project: project) + allow(Gitlab::GithubImport) + .to receive(:insert_and_return_id) + .and_return(issue.id) + allow(project.issues).to receive(:find).with(issue.id).and_return(issue) + + expect(issue).to receive(:ensure_project_iid!) + + importer.create_issue + end end describe '#create_assignees' do diff --git a/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb b/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb index b1cac3b6e46..db0be760c7b 100644 --- a/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb @@ -29,13 +29,25 @@ describe Gitlab::GithubImport::Importer::MilestonesImporter, :clean_gitlab_redis expect(importer) .to receive(:bulk_insert) - .with(Milestone, [milestone_hash]) + .with(Milestone, [milestone_hash], any_args) expect(importer) .to receive(:build_milestones_cache) importer.execute end + + it 'tracks internal ids' do + milestone_hash = { iid: 1, title: '1.0', project_id: project.id } + allow(importer) + .to receive(:build_milestones) + .and_return([milestone_hash]) + + expect(InternalId).to receive(:track_greatest) + .with(nil, { project: project }, :milestones, 1, any_args) + + importer.execute + end end describe '#build_milestones' do diff --git a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb index 3422a1e82fc..44c920043b4 100644 --- a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb @@ -111,6 +111,16 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi expect(mr).to be_instance_of(MergeRequest) expect(exists).to eq(false) end + + it 'triggers internal_id functionality to track greatest iids' do + mr = build_stubbed(:merge_request, source_project: project, target_project: project) + allow(Gitlab::GithubImport).to receive(:insert_and_return_id).and_return(mr.id) + allow(project.merge_requests).to receive(:find).with(mr.id).and_return(mr) + + expect(mr).to receive(:ensure_target_project_iid!) + + importer.create_merge_request + end end context 'when the author could not be found' do diff --git a/spec/migrations/delete_inconsistent_internal_id_records_spec.rb b/spec/migrations/delete_inconsistent_internal_id_records_spec.rb new file mode 100644 index 00000000000..becb71cf427 --- /dev/null +++ b/spec/migrations/delete_inconsistent_internal_id_records_spec.rb @@ -0,0 +1,119 @@ +# frozen_string_literal: true +# rubocop:disable RSpec/FactoriesInMigrationSpecs +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20180723130817_delete_inconsistent_internal_id_records.rb') + +describe DeleteInconsistentInternalIdRecords, :migration do + let!(:project1) { create(:project) } + let!(:project2) { create(:project) } + let!(:project3) { create(:project) } + + let(:internal_id_query) { ->(project) { InternalId.where(usage: InternalId.usages[scope.to_s.tableize], project: project) } } + + let(:create_models) do + 3.times { create(scope, project: project1) } + 3.times { create(scope, project: project2) } + 3.times { create(scope, project: project3) } + end + + shared_examples_for 'deleting inconsistent internal_id records' do + before do + create_models + + internal_id_query.call(project1).first.tap do |iid| + iid.last_value = iid.last_value - 2 + # This is an inconsistent record + iid.save! + end + + internal_id_query.call(project3).first.tap do |iid| + iid.last_value = iid.last_value + 2 + # This is a consistent record + iid.save! + end + end + + it "deletes inconsistent issues" do + expect { migrate! }.to change { internal_id_query.call(project1).size }.from(1).to(0) + end + + it "retains consistent issues" do + expect { migrate! }.not_to change { internal_id_query.call(project2).size } + end + + it "retains consistent records, especially those with a greater last_value" do + expect { migrate! }.not_to change { internal_id_query.call(project3).size } + end + end + + context 'for issues' do + let(:scope) { :issue } + it_behaves_like 'deleting inconsistent internal_id records' + end + + context 'for merge_requests' do + let(:scope) { :merge_request } + + let(:create_models) do + 3.times { |i| create(scope, target_project: project1, source_project: project1, source_branch: i.to_s) } + 3.times { |i| create(scope, target_project: project2, source_project: project2, source_branch: i.to_s) } + 3.times { |i| create(scope, target_project: project3, source_project: project3, source_branch: i.to_s) } + end + + it_behaves_like 'deleting inconsistent internal_id records' + end + + context 'for deployments' do + let(:scope) { :deployment } + it_behaves_like 'deleting inconsistent internal_id records' + end + + context 'for milestones (by project)' do + let(:scope) { :milestone } + it_behaves_like 'deleting inconsistent internal_id records' + end + + context 'for ci_pipelines' do + let(:scope) { :ci_pipeline } + it_behaves_like 'deleting inconsistent internal_id records' + end + + context 'for milestones (by group)' do + # milestones (by group) is a little different than all of the other models + let!(:group1) { create(:group) } + let!(:group2) { create(:group) } + let!(:group3) { create(:group) } + + let(:internal_id_query) { ->(group) { InternalId.where(usage: InternalId.usages['milestones'], namespace: group) } } + + before do + 3.times { create(:milestone, group: group1) } + 3.times { create(:milestone, group: group2) } + 3.times { create(:milestone, group: group3) } + + internal_id_query.call(group1).first.tap do |iid| + iid.last_value = iid.last_value - 2 + # This is an inconsistent record + iid.save! + end + + internal_id_query.call(group3).first.tap do |iid| + iid.last_value = iid.last_value + 2 + # This is a consistent record + iid.save! + end + end + + it "deletes inconsistent issues" do + expect { migrate! }.to change { internal_id_query.call(group1).size }.from(1).to(0) + end + + it "retains consistent issues" do + expect { migrate! }.not_to change { internal_id_query.call(group2).size } + end + + it "retains consistent records, especially those with a greater last_value" do + expect { migrate! }.not_to change { internal_id_query.call(group3).size } + end + end +end |