diff options
author | Yorick Peterse <yorickpeterse@gmail.com> | 2018-01-02 17:21:28 +0100 |
---|---|---|
committer | Yorick Peterse <yorickpeterse@gmail.com> | 2018-01-08 17:04:45 +0100 |
commit | d0b8f536a1865af3741fc3255325b7e211ed1d42 (patch) | |
tree | ef9c424adac8cb2954cb52026859a08e62a8ead9 /db | |
parent | 33fb2f99e92acc96ab322f4594a13f11218db87e (diff) | |
download | gitlab-ce-d0b8f536a1865af3741fc3255325b7e211ed1d42.tar.gz |
Remove soft removals related code
This removes all usage of soft removals except for the "pending delete"
system implemented for projects. This in turn simplifies all the query
plans of the models that used soft removals. Since we don't really use
soft removals for anything useful there's no point in keeping it around.
This _does_ mean that hard removals of issues (which only admins can do
if I'm not mistaken) can influence the "iid" values, but that code is
broken to begin with. More on this (and how to fix it) can be found in
https://gitlab.com/gitlab-org/gitlab-ce/issues/31114.
Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/37447
Diffstat (limited to 'db')
-rw-r--r-- | db/post_migrate/20171207150343_remove_soft_removed_objects.rb | 210 | ||||
-rw-r--r-- | db/post_migrate/20171207150344_remove_deleted_at_columns.rb | 31 | ||||
-rw-r--r-- | db/schema.rb | 8 |
3 files changed, 241 insertions, 8 deletions
diff --git a/db/post_migrate/20171207150343_remove_soft_removed_objects.rb b/db/post_migrate/20171207150343_remove_soft_removed_objects.rb new file mode 100644 index 00000000000..542cfb42fdc --- /dev/null +++ b/db/post_migrate/20171207150343_remove_soft_removed_objects.rb @@ -0,0 +1,210 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class RemoveSoftRemovedObjects < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + disable_ddl_transaction! + + module SoftRemoved + extend ActiveSupport::Concern + + included do + scope :soft_removed, -> { where('deleted_at IS NOT NULL') } + end + end + + class User < ActiveRecord::Base + self.table_name = 'users' + + include EachBatch + end + + class Issue < ActiveRecord::Base + self.table_name = 'issues' + + include EachBatch + include SoftRemoved + end + + class MergeRequest < ActiveRecord::Base + self.table_name = 'merge_requests' + + include EachBatch + include SoftRemoved + end + + class Namespace < ActiveRecord::Base + self.table_name = 'namespaces' + + include EachBatch + include SoftRemoved + + scope :soft_removed_personal, -> { soft_removed.where(type: nil) } + scope :soft_removed_group, -> { soft_removed.where(type: 'Group') } + end + + class Route < ActiveRecord::Base + self.table_name = 'routes' + + include EachBatch + include SoftRemoved + end + + class Project < ActiveRecord::Base + self.table_name = 'projects' + + include EachBatch + include SoftRemoved + end + + class CiPipelineSchedule < ActiveRecord::Base + self.table_name = 'ci_pipeline_schedules' + + include EachBatch + include SoftRemoved + end + + class CiTrigger < ActiveRecord::Base + self.table_name = 'ci_triggers' + + include EachBatch + include SoftRemoved + end + + MODELS = [Issue, MergeRequest, CiPipelineSchedule, CiTrigger].freeze + + def up + disable_statement_timeout + + remove_personal_routes + remove_personal_namespaces + remove_group_namespaces + remove_simple_soft_removed_rows + end + + def down + # The data removed by this migration can't be restored in an automated way. + end + + def remove_simple_soft_removed_rows + create_temporary_indexes + + MODELS.each do |model| + say_with_time("Removing soft removed rows from #{model.table_name}") do + model.soft_removed.each_batch do |batch, index| + batch.delete_all + end + end + end + ensure + remove_temporary_indexes + end + + def create_temporary_indexes + MODELS.each do |model| + index_name = temporary_index_name_for(model) + + # Without this index the removal process can take a very long time. For + # example, getting the next ID of a batch for the `issues` table in + # staging would take between 15 and 20 seconds. + next if temporary_index_exists?(model) + + say_with_time("Creating temporary index #{index_name}") do + add_concurrent_index( + model.table_name, + [:deleted_at, :id], + name: index_name, + where: 'deleted_at IS NOT NULL' + ) + end + end + end + + def remove_temporary_indexes + MODELS.each do |model| + index_name = temporary_index_name_for(model) + + next unless temporary_index_exists?(model) + + say_with_time("Removing temporary index #{index_name}") do + remove_concurrent_index_by_name(model.table_name, index_name) + end + end + end + + def temporary_index_name_for(model) + "index_on_#{model.table_name}_tmp" + end + + def temporary_index_exists?(model) + index_name = temporary_index_name_for(model) + + index_exists?(model.table_name, [:deleted_at, :id], name: index_name) + end + + def remove_personal_namespaces + # Some personal namespaces are left behind in case of GitLab.com. In these + # cases the associated data such as the projects and users has already been + # removed. + Namespace.soft_removed_personal.each_batch do |batch| + batch.delete_all + end + end + + def remove_group_namespaces + # Left over groups can't be easily removed because we may also need to + # remove memberships, repositories, and other associated data. As a result + # we'll just schedule a Sidekiq job to remove these. + # + # As of January 5th, 2018 there are 36 groups that will be removed using + # this code. + Namespace.select(:id).soft_removed_group.each_batch(of: 10) do |batch, index| + # We need the ID of an admin user as the owners of the group may no longer + # exist (or might not even be set in `namespaces.owner_id`). + admin_id = id_for_admin_user + + batch.each do |ns| + schedule_group_removal(index * 5.minutes, ns.id, admin_id) + end + end + end + + def schedule_group_removal(delay, group_id, user_id) + if migrate_inline? + GroupDestroyWorker.new.perform(group_id, user_id) + else + GroupDestroyWorker.perform_in(delay, group_id, user_id) + end + end + + def remove_personal_routes + namespaces = Namespace.select(1) + .soft_removed + .where('namespaces.type IS NULL') + .where('routes.source_type = ?', 'Namespace') + .where('routes.source_id = namespaces.id') + + Route.where('EXISTS (?)', namespaces).each_batch do |batch| + batch.delete_all + end + end + + def id_for_admin_user + return @id_for_admin_user if @id_for_admin_user + + if (admin_id = User.where(admin: true).limit(1).pluck(:id).first) + @id_for_admin_user = admin_id + else + raise 'Can not remove soft removed groups as no admin user exists. ' \ + 'Please make sure at least one user with `admin` set to TRUE exists before proceeding.' + end + end + + def migrate_inline? + Rails.env.test? || Rails.env.development? + end +end diff --git a/db/post_migrate/20171207150344_remove_deleted_at_columns.rb b/db/post_migrate/20171207150344_remove_deleted_at_columns.rb new file mode 100644 index 00000000000..154d7a1b926 --- /dev/null +++ b/db/post_migrate/20171207150344_remove_deleted_at_columns.rb @@ -0,0 +1,31 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class RemoveDeletedAtColumns < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + TABLES = %i[issues merge_requests namespaces ci_pipeline_schedules ci_triggers].freeze + COLUMN = :deleted_at + + def up + TABLES.each do |table| + remove_column(table, COLUMN) if column_exists?(table, COLUMN) + end + end + + def down + TABLES.each do |table| + unless column_exists?(table, COLUMN) + add_column(table, COLUMN, :datetime_with_timezone) + end + + unless index_exists?(table, COLUMN) + add_concurrent_index(table, COLUMN) + end + end + end +end diff --git a/db/schema.rb b/db/schema.rb index e6a2ea4c862..544a1bcc439 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -356,7 +356,6 @@ ActiveRecord::Schema.define(version: 20171230123729) do t.integer "project_id" t.integer "owner_id" t.boolean "active", default: true - t.datetime "deleted_at" t.datetime "created_at" t.datetime "updated_at" end @@ -466,7 +465,6 @@ ActiveRecord::Schema.define(version: 20171230123729) do create_table "ci_triggers", force: :cascade do |t| t.string "token" - t.datetime "deleted_at" t.datetime "created_at" t.datetime "updated_at" t.integer "project_id" @@ -860,7 +858,6 @@ ActiveRecord::Schema.define(version: 20171230123729) do t.integer "iid" t.integer "updated_by_id" t.boolean "confidential", default: false, null: false - t.datetime "deleted_at" t.date "due_date" t.integer "moved_to_id" t.integer "lock_version" @@ -877,7 +874,6 @@ ActiveRecord::Schema.define(version: 20171230123729) do add_index "issues", ["author_id"], name: "index_issues_on_author_id", using: :btree add_index "issues", ["confidential"], name: "index_issues_on_confidential", using: :btree - add_index "issues", ["deleted_at"], name: "index_issues_on_deleted_at", using: :btree add_index "issues", ["description"], name: "index_issues_on_description_trigram", using: :gin, opclasses: {"description"=>"gin_trgm_ops"} add_index "issues", ["milestone_id"], name: "index_issues_on_milestone_id", using: :btree add_index "issues", ["moved_to_id"], name: "index_issues_on_moved_to_id", where: "(moved_to_id IS NOT NULL)", using: :btree @@ -1086,7 +1082,6 @@ ActiveRecord::Schema.define(version: 20171230123729) do t.boolean "merge_when_pipeline_succeeds", default: false, null: false t.integer "merge_user_id" t.string "merge_commit_sha" - t.datetime "deleted_at" t.string "in_progress_merge_commit_sha" t.integer "lock_version" t.text "title_html" @@ -1105,7 +1100,6 @@ ActiveRecord::Schema.define(version: 20171230123729) do add_index "merge_requests", ["assignee_id"], name: "index_merge_requests_on_assignee_id", using: :btree add_index "merge_requests", ["author_id"], name: "index_merge_requests_on_author_id", using: :btree add_index "merge_requests", ["created_at"], name: "index_merge_requests_on_created_at", using: :btree - add_index "merge_requests", ["deleted_at"], name: "index_merge_requests_on_deleted_at", using: :btree add_index "merge_requests", ["description"], name: "index_merge_requests_on_description_trigram", using: :gin, opclasses: {"description"=>"gin_trgm_ops"} add_index "merge_requests", ["head_pipeline_id"], name: "index_merge_requests_on_head_pipeline_id", using: :btree add_index "merge_requests", ["latest_merge_request_diff_id"], name: "index_merge_requests_on_latest_merge_request_diff_id", using: :btree @@ -1165,7 +1159,6 @@ ActiveRecord::Schema.define(version: 20171230123729) do t.boolean "share_with_group_lock", default: false t.integer "visibility_level", default: 20, null: false t.boolean "request_access_enabled", default: false, null: false - t.datetime "deleted_at" t.text "description_html" t.boolean "lfs_enabled" t.integer "parent_id" @@ -1175,7 +1168,6 @@ ActiveRecord::Schema.define(version: 20171230123729) do end add_index "namespaces", ["created_at"], name: "index_namespaces_on_created_at", using: :btree - add_index "namespaces", ["deleted_at"], name: "index_namespaces_on_deleted_at", using: :btree add_index "namespaces", ["name", "parent_id"], name: "index_namespaces_on_name_and_parent_id", unique: true, using: :btree add_index "namespaces", ["name"], name: "index_namespaces_on_name_trigram", using: :gin, opclasses: {"name"=>"gin_trgm_ops"} add_index "namespaces", ["owner_id"], name: "index_namespaces_on_owner_id", using: :btree |