From 7e00adcc71877677637a8ebe0680f1d38176e7c8 Mon Sep 17 00:00:00 2001 From: Bob Van Landuyt Date: Thu, 8 Feb 2018 08:51:29 +0100 Subject: Early migrations populating fork-networks: no-op Since populating the fork networks was scheduled multiple times because of bugs that needed to be fixed: - https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/15595/ Creating fork networks for projects that were missing the root of the fork network. - https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/15709 The API allowed creating forked_project_links without fork_network_members. Scheduling this migration multiple times would case it to run concurrently. Which in turn would try to insert the same data into `fork_network_members` causing duplicate key errors. This avoids running the migration multiple times. --- .../bvl-fix-concurrent-fork-network-migrations.yml | 5 +++++ db/migrate/20170929131201_populate_fork_networks.rb | 16 +--------------- .../20171124150326_reschedule_fork_network_creation.rb | 16 +--------------- 3 files changed, 7 insertions(+), 30 deletions(-) create mode 100644 changelogs/unreleased/bvl-fix-concurrent-fork-network-migrations.yml diff --git a/changelogs/unreleased/bvl-fix-concurrent-fork-network-migrations.yml b/changelogs/unreleased/bvl-fix-concurrent-fork-network-migrations.yml new file mode 100644 index 00000000000..b2a77f75e55 --- /dev/null +++ b/changelogs/unreleased/bvl-fix-concurrent-fork-network-migrations.yml @@ -0,0 +1,5 @@ +--- +title: Avoid running `PopulateForkNetworksRange`-migration multiple times +merge_request: 16988 +author: +type: fixed diff --git a/db/migrate/20170929131201_populate_fork_networks.rb b/db/migrate/20170929131201_populate_fork_networks.rb index 1214962770f..ddbf27e1852 100644 --- a/db/migrate/20170929131201_populate_fork_networks.rb +++ b/db/migrate/20170929131201_populate_fork_networks.rb @@ -6,22 +6,8 @@ class PopulateForkNetworks < ActiveRecord::Migration DOWNTIME = false - MIGRATION = 'PopulateForkNetworksRange'.freeze - BATCH_SIZE = 100 - DELAY_INTERVAL = 15.seconds - - disable_ddl_transaction! - - class ForkedProjectLink < ActiveRecord::Base - include EachBatch - - self.table_name = 'forked_project_links' - end - def up - say 'Populating the `fork_networks` based on existing `forked_project_links`' - - queue_background_migration_jobs_by_range_at_intervals(ForkedProjectLink, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + say 'Fork networks will be populated in 20171205190711 - RescheduleForkNetworkCreationCaller' end def down diff --git a/db/post_migrate/20171124150326_reschedule_fork_network_creation.rb b/db/post_migrate/20171124150326_reschedule_fork_network_creation.rb index 05430efe1f6..26f917d5a1e 100644 --- a/db/post_migrate/20171124150326_reschedule_fork_network_creation.rb +++ b/db/post_migrate/20171124150326_reschedule_fork_network_creation.rb @@ -3,22 +3,8 @@ class RescheduleForkNetworkCreation < ActiveRecord::Migration DOWNTIME = false - MIGRATION = 'PopulateForkNetworksRange'.freeze - BATCH_SIZE = 100 - DELAY_INTERVAL = 15.seconds - - disable_ddl_transaction! - - class ForkedProjectLink < ActiveRecord::Base - include EachBatch - - self.table_name = 'forked_project_links' - end - def up - say 'Populating the `fork_networks` based on existing `forked_project_links`' - - queue_background_migration_jobs_by_range_at_intervals(ForkedProjectLink, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + say 'Fork networks will be populated in 20171205190711 - RescheduleForkNetworkCreationCaller' end def down -- cgit v1.2.1 From 0478ff7cd44d291e931436b8fcd4c5c53b249741 Mon Sep 17 00:00:00 2001 From: Bob Van Landuyt Date: Thu, 8 Feb 2018 09:11:00 +0100 Subject: Handle uniqueness on fork_network_member creation Since the migration might be queued already and be rescheduled when it fails on a uniqueness error, this should help clearing the background migration queue faster. --- .../create_fork_network_memberships_range.rb | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/gitlab/background_migration/create_fork_network_memberships_range.rb b/lib/gitlab/background_migration/create_fork_network_memberships_range.rb index 03b17b319fa..1b4a9e8a194 100644 --- a/lib/gitlab/background_migration/create_fork_network_memberships_range.rb +++ b/lib/gitlab/background_migration/create_fork_network_memberships_range.rb @@ -14,6 +14,14 @@ module Gitlab def perform(start_id, end_id) log("Creating memberships for forks: #{start_id} - #{end_id}") + insert_members(start_id, end_id) + + if missing_members?(start_id, end_id) + BackgroundMigrationWorker.perform_in(RESCHEDULE_DELAY, "CreateForkNetworkMembershipsRange", [start_id, end_id]) + end + end + + def insert_members(start_id, end_id) ActiveRecord::Base.connection.execute <<~INSERT_MEMBERS INSERT INTO fork_network_members (fork_network_id, project_id, forked_from_project_id) @@ -33,10 +41,9 @@ module Gitlab WHERE existing_members.project_id = forked_project_links.forked_to_project_id ) INSERT_MEMBERS - - if missing_members?(start_id, end_id) - BackgroundMigrationWorker.perform_in(RESCHEDULE_DELAY, "CreateForkNetworkMembershipsRange", [start_id, end_id]) - end + rescue ActiveRecord::RecordNotUnique => e + # `fork_network_member` was created concurrently in another migration + log(e.message) end def missing_members?(start_id, end_id) -- cgit v1.2.1 From 917fb1744c10cdddd50c22bda690dfcca9a47eff Mon Sep 17 00:00:00 2001 From: Bob Van Landuyt Date: Thu, 8 Feb 2018 09:15:45 +0100 Subject: [docs] Info rescheduling background migrations --- doc/development/background_migrations.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/doc/development/background_migrations.md b/doc/development/background_migrations.md index af2026c483e..fc1b202b5eb 100644 --- a/doc/development/background_migrations.md +++ b/doc/development/background_migrations.md @@ -94,6 +94,18 @@ jobs = [['BackgroundMigrationClassName', [1]], BackgroundMigrationWorker.bulk_perform_in(5.minutes, jobs) ``` +### Rescheduling background migrations + +If one of the background migrations contains a bug that is fixed in a patch +release, the background migration needs to be rescheduled so the migration would +be repeated on systems that already performed the initial migration. + +When you reschedule the background migration, make sure to turn the original +scheduling into a no-op by clearing up the `#up` and `#down` methods of the +migration performing the scheduling. Otherwise the background migration would be +scheduled multiple times on systems that are upgrading multiple patch releases at +once. + ## Cleaning Up >**Note:** -- cgit v1.2.1