From c63e3221587daf9e7464f7d2079ca8ed3111f6ff Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Tue, 30 May 2017 13:54:59 +0200 Subject: Add many foreign keys to the projects table This removes the need for relying on Rails' "dependent" option for data removal, which is _incredibly_ slow (even when using :delete_all) when deleting large amounts of data. This also ensures data consistency is enforced on DB level and not on application level (something Rails is really bad at). This commit also includes various migrations to add foreign keys to tables that eventually point to "projects" to ensure no rows get orphaned upon removing a project. --- ..._project_foreign_keys_with_cascading_deletes.rb | 187 +++++++++++++++++++++ ...0029_correct_protected_branches_foreign_keys.rb | 40 +++++ ...2212_add_foreign_key_for_merge_request_diffs.rb | 30 ++++ db/schema.rb | 39 ++++- 4 files changed, 291 insertions(+), 5 deletions(-) create mode 100644 db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb create mode 100644 db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb create mode 100644 db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb (limited to 'db') diff --git a/db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb b/db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb new file mode 100644 index 00000000000..3eaafac321d --- /dev/null +++ b/db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb @@ -0,0 +1,187 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class ProjectForeignKeysWithCascadingDeletes < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + CONCURRENCY = 4 + + disable_ddl_transaction! + + # The tables/columns for which to remove orphans and add foreign keys. Order + # matters as some tables/columns should be processed before others. + TABLES = [ + [:boards, :projects, :project_id], + [:lists, :labels, :label_id], + [:lists, :boards, :board_id], + [:services, :projects, :project_id], + [:forked_project_links, :projects, :forked_to_project_id], + [:merge_requests, :projects, :target_project_id], + [:labels, :projects, :project_id], + [:issues, :projects, :project_id], + [:events, :projects, :project_id], + [:milestones, :projects, :project_id], + [:notes, :projects, :project_id], + [:snippets, :projects, :project_id], + [:web_hooks, :projects, :project_id], + [:protected_branch_merge_access_levels, :protected_branches, :protected_branch_id], + [:protected_branch_push_access_levels, :protected_branches, :protected_branch_id], + [:protected_branches, :projects, :project_id], + [:protected_tags, :projects, :project_id], + [:deploy_keys_projects, :projects, :project_id], + [:users_star_projects, :projects, :project_id], + [:releases, :projects, :project_id], + [:project_group_links, :projects, :project_id], + [:pages_domains, :projects, :project_id], + [:todos, :projects, :project_id], + [:project_import_data, :projects, :project_id], + [:project_features, :projects, :project_id], + [:ci_builds, :projects, :project_id], + [:ci_pipelines, :projects, :project_id], + [:ci_runner_projects, :projects, :project_id], + [:ci_triggers, :projects, :project_id], + [:environments, :projects, :project_id], + [:deployments, :projects, :project_id] + ] + + def up + # These existing foreign keys don't have an "ON DELETE CASCADE" clause. + remove_foreign_key_without_error(:boards, :project_id) + remove_foreign_key_without_error(:lists, :label_id) + remove_foreign_key_without_error(:lists, :board_id) + remove_foreign_key_without_error(:protected_branch_merge_access_levels, + :protected_branch_id) + + remove_foreign_key_without_error(:protected_branch_push_access_levels, + :protected_branch_id) + + remove_orphaned_rows + add_foreign_keys + + # These columns are not indexed yet, meaning a cascading delete would take + # forever. + add_concurrent_index(:project_group_links, :project_id) + add_concurrent_index(:pages_domains, :project_id) + end + + def down + TABLES.each do |(source, _, column)| + remove_foreign_key_without_error(source, column) + end + + add_concurrent_foreign_key(:boards, :projects, column: :project_id) + add_concurrent_foreign_key(:lists, :labels, column: :label_id) + add_concurrent_foreign_key(:lists, :boards, column: :board_id) + + add_concurrent_foreign_key(:protected_branch_merge_access_levels, + :protected_branches, + column: :protected_branch_id) + + add_concurrent_foreign_key(:protected_branch_push_access_levels, + :protected_branches, + column: :protected_branch_id) + + remove_index_without_error(:project_group_links, :project_id) + remove_index_without_error(:pages_domains, :project_id) + end + + def add_foreign_keys + TABLES.each do |(source, target, column)| + add_concurrent_foreign_key(source, target, column: column) + end + end + + # Removes orphans from various tables concurrently. + def remove_orphaned_rows + Gitlab::Database.with_connection_pool(CONCURRENCY) do |pool| + queues = queues_for_rows(TABLES) + + threads = queues.map do |queue| + Thread.new do + pool.with_connection do |connection| + Thread.current[:foreign_key_connection] = connection + + # Disables statement timeouts for the current connection. This is + # necessary as removing of orphaned data might otherwise exceed the + # statement timeout. + disable_statement_timeout + + remove_orphans(*queue.pop) until queue.empty? + + steal_from_queues(queues - [queue]) + end + end + end + + threads.each(&:join) + end + end + + def steal_from_queues(queues) + loop do + stolen = false + + queues.each do |queue| + # Stealing is racy so it's possible a pop might be called on an + # already-empty queue. + begin + remove_orphans(*queue.pop(true)) + stolen = true + rescue ThreadError + end + end + + break unless stolen + end + end + + def remove_orphans(source, target, column) + quoted_source = quote_table_name(source) + quoted_target = quote_table_name(target) + quoted_column = quote_column_name(column) + + execute <<-EOF.strip_heredoc + DELETE FROM #{quoted_source} + WHERE NOT EXISTS ( + SELECT true + FROM #{quoted_target} + WHERE #{quoted_target}.id = #{quoted_source}.#{quoted_column} + ) + AND #{quoted_source}.#{quoted_column} IS NOT NULL + EOF + end + + def remove_foreign_key_without_error(table, column) + remove_foreign_key(table, column: column) + rescue ArgumentError + end + + def remove_index_without_error(table, column) + remove_concurrent_index(table, column) + rescue ArgumentError + end + + def connection + # Rails memoizes connection objects, but this causes them to be shared + # amongst threads; we don't want that. + Thread.current[:foreign_key_connection] || ActiveRecord::Base.connection + end + + def queues_for_rows(rows) + queues = Array.new(CONCURRENCY) { Queue.new } + slice_size = rows.length / CONCURRENCY + + # Divide all the tuples as evenly as possible amongst the queues. + rows.each_slice(slice_size).each_with_index do |slice, index| + bucket = index % CONCURRENCY + + slice.each do |row| + queues[bucket] << row + end + end + + queues + end +end diff --git a/db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb b/db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb new file mode 100644 index 00000000000..46497775527 --- /dev/null +++ b/db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb @@ -0,0 +1,40 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class CorrectProtectedBranchesForeignKeys < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + disable_ddl_transaction! + + def up + remove_foreign_key_without_error(:protected_branch_push_access_levels, + column: :protected_branch_id) + + execute <<-EOF + DELETE FROM protected_branch_push_access_levels + WHERE NOT EXISTS ( + SELECT true + FROM protected_branches + WHERE protected_branch_push_access_levels.protected_branch_id = protected_branches.id + ) + AND protected_branch_id IS NOT NULL + EOF + + add_concurrent_foreign_key(:protected_branch_push_access_levels, + :protected_branches, + column: :protected_branch_id) + end + + def down + # Previously there was a foreign key without a CASCADING DELETE, so we'll + # just leave the foreign key in place. + end + + def remove_foreign_key_without_error(*args) + remove_foreign_key(*args) + rescue ArgumentError + end +end diff --git a/db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb b/db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb new file mode 100644 index 00000000000..9f524fac8a7 --- /dev/null +++ b/db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb @@ -0,0 +1,30 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddForeignKeyForMergeRequestDiffs < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + disable_ddl_transaction! + + def up + execute <<-EOF + DELETE FROM merge_request_diffs + WHERE NOT EXISTS ( + SELECT true + FROM merge_requests + WHERE merge_requests.id = merge_request_diffs.merge_request_id + ) + EOF + + add_concurrent_foreign_key(:merge_request_diffs, + :merge_requests, + column: :merge_request_id) + end + + def down + remove_foreign_key(:merge_request_diffs, column: :merge_request_id) + end +end diff --git a/db/schema.rb b/db/schema.rb index 40f30a10a01..f12fdf903c5 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -971,6 +971,7 @@ ActiveRecord::Schema.define(version: 20170703102400) do end add_index "pages_domains", ["domain"], name: "index_pages_domains_on_domain", unique: true, using: :btree + add_index "pages_domains", ["project_id"], name: "index_pages_domains_on_project_id", using: :btree create_table "personal_access_tokens", force: :cascade do |t| t.integer "user_id", null: false @@ -1020,6 +1021,7 @@ ActiveRecord::Schema.define(version: 20170703102400) do end add_index "project_group_links", ["group_id"], name: "index_project_group_links_on_group_id", using: :btree + add_index "project_group_links", ["project_id"], name: "index_project_group_links_on_project_id", using: :btree create_table "project_import_data", force: :cascade do |t| t.integer "project_id" @@ -1528,48 +1530,75 @@ ActiveRecord::Schema.define(version: 20170703102400) do add_index "web_hooks", ["project_id"], name: "index_web_hooks_on_project_id", using: :btree add_index "web_hooks", ["type"], name: "index_web_hooks_on_type", using: :btree - add_foreign_key "boards", "projects" + add_foreign_key "boards", "projects", name: "fk_f15266b5f9", on_delete: :cascade add_foreign_key "chat_teams", "namespaces", on_delete: :cascade add_foreign_key "ci_builds", "ci_pipelines", column: "auto_canceled_by_id", name: "fk_a2141b1522", on_delete: :nullify add_foreign_key "ci_builds", "ci_stages", column: "stage_id", name: "fk_3a9eaa254d", on_delete: :cascade + add_foreign_key "ci_builds", "projects", name: "fk_befce0568a", on_delete: :cascade add_foreign_key "ci_pipeline_schedules", "projects", name: "fk_8ead60fcc4", on_delete: :cascade add_foreign_key "ci_pipeline_schedules", "users", column: "owner_id", name: "fk_9ea99f58d2", on_delete: :nullify add_foreign_key "ci_pipelines", "ci_pipeline_schedules", column: "pipeline_schedule_id", name: "fk_3d34ab2e06", on_delete: :nullify add_foreign_key "ci_pipelines", "ci_pipelines", column: "auto_canceled_by_id", name: "fk_262d4c2d19", on_delete: :nullify + add_foreign_key "ci_pipelines", "projects", name: "fk_86635dbd80", on_delete: :cascade + add_foreign_key "ci_runner_projects", "projects", name: "fk_4478a6f1e4", on_delete: :cascade add_foreign_key "ci_stages", "ci_pipelines", column: "pipeline_id", name: "fk_fb57e6cc56", on_delete: :cascade add_foreign_key "ci_stages", "projects", name: "fk_2360681d1d", on_delete: :cascade add_foreign_key "ci_trigger_requests", "ci_triggers", column: "trigger_id", name: "fk_b8ec8b7245", on_delete: :cascade + add_foreign_key "ci_triggers", "projects", name: "fk_e3e63f966e", on_delete: :cascade add_foreign_key "ci_triggers", "users", column: "owner_id", name: "fk_e8e10d1964", on_delete: :cascade add_foreign_key "ci_variables", "projects", name: "fk_ada5eb64b3", on_delete: :cascade add_foreign_key "container_repositories", "projects" + add_foreign_key "deploy_keys_projects", "projects", name: "fk_58a901ca7e", on_delete: :cascade + add_foreign_key "deployments", "projects", name: "fk_b9a3851b82", on_delete: :cascade + add_foreign_key "environments", "projects", name: "fk_d1c8c1da6a", on_delete: :cascade + add_foreign_key "events", "projects", name: "fk_0434b48643", on_delete: :cascade + add_foreign_key "forked_project_links", "projects", column: "forked_to_project_id", name: "fk_434510edb0", on_delete: :cascade add_foreign_key "issue_assignees", "issues", name: "fk_b7d881734a", on_delete: :cascade add_foreign_key "issue_assignees", "users", name: "fk_5e0c8d9154", on_delete: :cascade add_foreign_key "issue_metrics", "issues", on_delete: :cascade + add_foreign_key "issues", "projects", name: "fk_899c8f3231", on_delete: :cascade add_foreign_key "label_priorities", "labels", on_delete: :cascade add_foreign_key "label_priorities", "projects", on_delete: :cascade add_foreign_key "labels", "namespaces", column: "group_id", on_delete: :cascade - add_foreign_key "lists", "boards" - add_foreign_key "lists", "labels" + add_foreign_key "labels", "projects", name: "fk_7de4989a69", on_delete: :cascade + add_foreign_key "lists", "boards", name: "fk_0d3f677137", on_delete: :cascade + add_foreign_key "lists", "labels", name: "fk_7a5553d60f", on_delete: :cascade add_foreign_key "merge_request_diff_files", "merge_request_diffs", on_delete: :cascade + add_foreign_key "merge_request_diffs", "merge_requests", name: "fk_8483f3258f", on_delete: :cascade add_foreign_key "merge_request_metrics", "ci_pipelines", column: "pipeline_id", on_delete: :cascade add_foreign_key "merge_request_metrics", "merge_requests", on_delete: :cascade + add_foreign_key "merge_requests", "projects", column: "target_project_id", name: "fk_a6963e8447", on_delete: :cascade add_foreign_key "merge_requests_closing_issues", "issues", on_delete: :cascade add_foreign_key "merge_requests_closing_issues", "merge_requests", on_delete: :cascade + add_foreign_key "milestones", "projects", name: "fk_9bd0a0c791", on_delete: :cascade + add_foreign_key "notes", "projects", name: "fk_99e097b079", on_delete: :cascade add_foreign_key "oauth_openid_requests", "oauth_access_grants", column: "access_grant_id", name: "fk_oauth_openid_requests_oauth_access_grants_access_grant_id" + add_foreign_key "pages_domains", "projects", name: "fk_ea2f6dfc6f", on_delete: :cascade add_foreign_key "personal_access_tokens", "users" add_foreign_key "project_authorizations", "projects", on_delete: :cascade add_foreign_key "project_authorizations", "users", on_delete: :cascade + add_foreign_key "project_features", "projects", name: "fk_18513d9b92", on_delete: :cascade + add_foreign_key "project_group_links", "projects", name: "fk_daa8cee94c", on_delete: :cascade + add_foreign_key "project_import_data", "projects", name: "fk_ffb9ee3a10", on_delete: :cascade add_foreign_key "project_statistics", "projects", on_delete: :cascade - add_foreign_key "protected_branch_merge_access_levels", "protected_branches" - add_foreign_key "protected_branch_push_access_levels", "protected_branches" + add_foreign_key "protected_branch_merge_access_levels", "protected_branches", name: "fk_8a3072ccb3", on_delete: :cascade + add_foreign_key "protected_branch_push_access_levels", "protected_branches", name: "fk_9ffc86a3d9", on_delete: :cascade + add_foreign_key "protected_branches", "projects", name: "fk_7a9c6d93e7", on_delete: :cascade add_foreign_key "protected_tag_create_access_levels", "namespaces", column: "group_id" add_foreign_key "protected_tag_create_access_levels", "protected_tags" add_foreign_key "protected_tag_create_access_levels", "users" + add_foreign_key "protected_tags", "projects", name: "fk_8e4af87648", on_delete: :cascade + add_foreign_key "releases", "projects", name: "fk_47fe2a0596", on_delete: :cascade + add_foreign_key "services", "projects", name: "fk_71cce407f9", on_delete: :cascade + add_foreign_key "snippets", "projects", name: "fk_be41fd4bb7", on_delete: :cascade add_foreign_key "subscriptions", "projects", on_delete: :cascade add_foreign_key "system_note_metadata", "notes", name: "fk_d83a918cb1", on_delete: :cascade add_foreign_key "timelogs", "issues", name: "fk_timelogs_issues_issue_id", on_delete: :cascade add_foreign_key "timelogs", "merge_requests", name: "fk_timelogs_merge_requests_merge_request_id", on_delete: :cascade + add_foreign_key "todos", "projects", name: "fk_45054f9c45", on_delete: :cascade add_foreign_key "trending_projects", "projects", on_delete: :cascade add_foreign_key "u2f_registrations", "users" + add_foreign_key "users_star_projects", "projects", name: "fk_22cd27ddfc", on_delete: :cascade add_foreign_key "web_hook_logs", "web_hooks", on_delete: :cascade + add_foreign_key "web_hooks", "projects", name: "fk_0c8ca6d9d1", on_delete: :cascade end -- cgit v1.2.1