summaryrefslogtreecommitdiff
path: root/db/post_migrate/20200716234259_remove_duplicate_labels_from_group.rb
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2021-01-04 18:10:11 +0000
committerGitLab Bot <gitlab-bot@gitlab.com>2021-01-04 18:10:11 +0000
commitdb3acec198e92833482b9ce5c055f562b06a14a1 (patch)
tree8e7642c700b84ac613c4766f9a65ada7090b2ee2 /db/post_migrate/20200716234259_remove_duplicate_labels_from_group.rb
parentca89460cfa70ffe0a6d7cb289b31bd72ce8d6da3 (diff)
downloadgitlab-ce-db3acec198e92833482b9ce5c055f562b06a14a1.tar.gz
Add latest changes from gitlab-org/gitlab@master
Diffstat (limited to 'db/post_migrate/20200716234259_remove_duplicate_labels_from_group.rb')
-rw-r--r--db/post_migrate/20200716234259_remove_duplicate_labels_from_group.rb135
1 files changed, 135 insertions, 0 deletions
diff --git a/db/post_migrate/20200716234259_remove_duplicate_labels_from_group.rb b/db/post_migrate/20200716234259_remove_duplicate_labels_from_group.rb
new file mode 100644
index 00000000000..f19a209092b
--- /dev/null
+++ b/db/post_migrate/20200716234259_remove_duplicate_labels_from_group.rb
@@ -0,0 +1,135 @@
+# frozen_string_literal: true
+
+class RemoveDuplicateLabelsFromGroup < ActiveRecord::Migration[6.0]
+ DOWNTIME = false
+
+ CREATE = 1
+ RENAME = 2
+
+ disable_ddl_transaction!
+
+ class BackupLabel < ApplicationRecord
+ include EachBatch
+
+ self.table_name = 'backup_labels'
+ end
+
+ class Label < ApplicationRecord
+ self.table_name = 'labels'
+ end
+
+ class Group < ApplicationRecord
+ include EachBatch
+
+ self.table_name = 'namespaces'
+ end
+
+ BATCH_SIZE = 10_000
+
+ def up
+ # Split to smaller chunks
+ # Loop rather than background job, every 10,000
+ # there are ~1,800,000 groups in total (excluding personal namespaces, which can't have labels)
+ Group.where(type: 'Group').each_batch(of: BATCH_SIZE) do |batch|
+ range = batch.pluck('MIN(id)', 'MAX(id)').first
+
+ transaction do
+ remove_full_duplicates(*range)
+ end
+
+ transaction do
+ rename_partial_duplicates(*range)
+ end
+ end
+ end
+
+ DOWN_BATCH_SIZE = 1000
+
+ def down
+ BackupLabel.where('project_id IS NULL AND group_id IS NOT NULL').each_batch(of: DOWN_BATCH_SIZE) do |batch|
+ range = batch.pluck('MIN(id)', 'MAX(id)').first
+
+ restore_renamed_labels(*range)
+ restore_deleted_labels(*range)
+ end
+ end
+
+ def remove_full_duplicates(start_id, stop_id)
+ # Fields that are considered duplicate:
+ # group_id title template description type color
+
+ duplicate_labels = ApplicationRecord.connection.execute(<<-SQL.squish)
+WITH data AS (
+ SELECT labels.*,
+ row_number() OVER (PARTITION BY labels.group_id, labels.title, labels.template, labels.description, labels.type, labels.color ORDER BY labels.id) AS row_number,
+ #{CREATE} AS restore_action
+ FROM labels
+ WHERE labels.group_id BETWEEN #{start_id} AND #{stop_id}
+ AND NOT EXISTS (SELECT * FROM board_labels WHERE board_labels.label_id = labels.id)
+ AND NOT EXISTS (SELECT * FROM label_links WHERE label_links.label_id = labels.id)
+ AND NOT EXISTS (SELECT * FROM label_priorities WHERE label_priorities.label_id = labels.id)
+ AND NOT EXISTS (SELECT * FROM lists WHERE lists.label_id = labels.id)
+ AND NOT EXISTS (SELECT * FROM resource_label_events WHERE resource_label_events.label_id = labels.id)
+) SELECT * FROM data WHERE row_number > 1;
+ SQL
+
+ if duplicate_labels.any?
+ # create backup records
+ BackupLabel.insert_all!(duplicate_labels.map { |label| label.except("row_number") })
+
+ Label.unscoped.where(id: duplicate_labels.pluck("id")).delete_all
+ end
+ end
+
+ def rename_partial_duplicates(start_id, stop_id)
+ # We need to ensure that the new title (with `_duplicate#{ID}`) doesn't exceed the limit.
+ # Truncate the original title (if needed) to 245 characters minus the length of the ID
+ # then add `_duplicate#{ID}`
+
+ soft_duplicates = ApplicationRecord.connection.execute(<<-SQL.squish)
+WITH data AS (
+ SELECT
+ *,
+ substring(title from 1 for 245 - length(id::text)) || '_duplicate' || id::text as new_title,
+ #{RENAME} AS restore_action,
+ row_number() OVER (PARTITION BY group_id, title ORDER BY id) AS row_number
+ FROM labels
+ WHERE group_id BETWEEN #{start_id} AND #{stop_id}
+) SELECT * FROM data WHERE row_number > 1;
+ SQL
+
+ if soft_duplicates.any?
+ # create backup records
+ BackupLabel.insert_all!(soft_duplicates.map { |label| label.except("row_number") })
+
+ ApplicationRecord.connection.execute(<<-SQL.squish)
+UPDATE labels SET title = substring(title from 1 for 245 - length(id::text)) || '_duplicate' || id::text
+WHERE labels.id IN (#{soft_duplicates.map { |dup| dup["id"] }.join(", ")});
+ SQL
+ end
+ end
+
+ def restore_renamed_labels(start_id, stop_id)
+ # the backup label IDs are not incremental, they are copied directly from the Labels table
+ ApplicationRecord.connection.execute(<<-SQL.squish)
+WITH backups AS (
+ SELECT id, title
+ FROM backup_labels
+ WHERE id BETWEEN #{start_id} AND #{stop_id}
+ AND restore_action = #{RENAME}
+) UPDATE labels SET title = backups.title
+FROM backups
+WHERE labels.id = backups.id;
+ SQL
+ end
+
+ def restore_deleted_labels(start_id, stop_id)
+ ActiveRecord::Base.connection.execute(<<-SQL.squish)
+INSERT INTO labels
+SELECT id, title, color, group_id, created_at, updated_at, template, description, description_html, type, cached_markdown_version FROM backup_labels
+ WHERE backup_labels.id BETWEEN #{start_id} AND #{stop_id}
+ AND backup_labels.project_id IS NULL AND backup_labels.group_id IS NOT NULL
+ AND backup_labels.restore_action = #{CREATE}
+ SQL
+ end
+end