diff options
author | Yorick Peterse <yorickpeterse@gmail.com> | 2017-05-30 13:54:59 +0200 |
---|---|---|
committer | Yorick Peterse <yorickpeterse@gmail.com> | 2017-07-06 12:01:36 +0200 |
commit | c63e3221587daf9e7464f7d2079ca8ed3111f6ff (patch) | |
tree | 386eb322d420d4ddf5a4bff514334f686b58c9fb /db | |
parent | 050eae8c4dff87fef63e79eb60d927d0171b5f7b (diff) | |
download | gitlab-ce-c63e3221587daf9e7464f7d2079ca8ed3111f6ff.tar.gz |
Add many foreign keys to the projects table
This removes the need for relying on Rails' "dependent" option for data
removal, which is _incredibly_ slow (even when using :delete_all) when
deleting large amounts of data. This also ensures data consistency is
enforced on DB level and not on application level (something Rails is
really bad at).
This commit also includes various migrations to add foreign keys to
tables that eventually point to "projects" to ensure no rows get
orphaned upon removing a project.
Diffstat (limited to 'db')
4 files changed, 291 insertions, 5 deletions
diff --git a/db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb b/db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb new file mode 100644 index 00000000000..3eaafac321d --- /dev/null +++ b/db/migrate/20170530130129_project_foreign_keys_with_cascading_deletes.rb @@ -0,0 +1,187 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class ProjectForeignKeysWithCascadingDeletes < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + CONCURRENCY = 4 + + disable_ddl_transaction! + + # The tables/columns for which to remove orphans and add foreign keys. Order + # matters as some tables/columns should be processed before others. + TABLES = [ + [:boards, :projects, :project_id], + [:lists, :labels, :label_id], + [:lists, :boards, :board_id], + [:services, :projects, :project_id], + [:forked_project_links, :projects, :forked_to_project_id], + [:merge_requests, :projects, :target_project_id], + [:labels, :projects, :project_id], + [:issues, :projects, :project_id], + [:events, :projects, :project_id], + [:milestones, :projects, :project_id], + [:notes, :projects, :project_id], + [:snippets, :projects, :project_id], + [:web_hooks, :projects, :project_id], + [:protected_branch_merge_access_levels, :protected_branches, :protected_branch_id], + [:protected_branch_push_access_levels, :protected_branches, :protected_branch_id], + [:protected_branches, :projects, :project_id], + [:protected_tags, :projects, :project_id], + [:deploy_keys_projects, :projects, :project_id], + [:users_star_projects, :projects, :project_id], + [:releases, :projects, :project_id], + [:project_group_links, :projects, :project_id], + [:pages_domains, :projects, :project_id], + [:todos, :projects, :project_id], + [:project_import_data, :projects, :project_id], + [:project_features, :projects, :project_id], + [:ci_builds, :projects, :project_id], + [:ci_pipelines, :projects, :project_id], + [:ci_runner_projects, :projects, :project_id], + [:ci_triggers, :projects, :project_id], + [:environments, :projects, :project_id], + [:deployments, :projects, :project_id] + ] + + def up + # These existing foreign keys don't have an "ON DELETE CASCADE" clause. + remove_foreign_key_without_error(:boards, :project_id) + remove_foreign_key_without_error(:lists, :label_id) + remove_foreign_key_without_error(:lists, :board_id) + remove_foreign_key_without_error(:protected_branch_merge_access_levels, + :protected_branch_id) + + remove_foreign_key_without_error(:protected_branch_push_access_levels, + :protected_branch_id) + + remove_orphaned_rows + add_foreign_keys + + # These columns are not indexed yet, meaning a cascading delete would take + # forever. + add_concurrent_index(:project_group_links, :project_id) + add_concurrent_index(:pages_domains, :project_id) + end + + def down + TABLES.each do |(source, _, column)| + remove_foreign_key_without_error(source, column) + end + + add_concurrent_foreign_key(:boards, :projects, column: :project_id) + add_concurrent_foreign_key(:lists, :labels, column: :label_id) + add_concurrent_foreign_key(:lists, :boards, column: :board_id) + + add_concurrent_foreign_key(:protected_branch_merge_access_levels, + :protected_branches, + column: :protected_branch_id) + + add_concurrent_foreign_key(:protected_branch_push_access_levels, + :protected_branches, + column: :protected_branch_id) + + remove_index_without_error(:project_group_links, :project_id) + remove_index_without_error(:pages_domains, :project_id) + end + + def add_foreign_keys + TABLES.each do |(source, target, column)| + add_concurrent_foreign_key(source, target, column: column) + end + end + + # Removes orphans from various tables concurrently. + def remove_orphaned_rows + Gitlab::Database.with_connection_pool(CONCURRENCY) do |pool| + queues = queues_for_rows(TABLES) + + threads = queues.map do |queue| + Thread.new do + pool.with_connection do |connection| + Thread.current[:foreign_key_connection] = connection + + # Disables statement timeouts for the current connection. This is + # necessary as removing of orphaned data might otherwise exceed the + # statement timeout. + disable_statement_timeout + + remove_orphans(*queue.pop) until queue.empty? + + steal_from_queues(queues - [queue]) + end + end + end + + threads.each(&:join) + end + end + + def steal_from_queues(queues) + loop do + stolen = false + + queues.each do |queue| + # Stealing is racy so it's possible a pop might be called on an + # already-empty queue. + begin + remove_orphans(*queue.pop(true)) + stolen = true + rescue ThreadError + end + end + + break unless stolen + end + end + + def remove_orphans(source, target, column) + quoted_source = quote_table_name(source) + quoted_target = quote_table_name(target) + quoted_column = quote_column_name(column) + + execute <<-EOF.strip_heredoc + DELETE FROM #{quoted_source} + WHERE NOT EXISTS ( + SELECT true + FROM #{quoted_target} + WHERE #{quoted_target}.id = #{quoted_source}.#{quoted_column} + ) + AND #{quoted_source}.#{quoted_column} IS NOT NULL + EOF + end + + def remove_foreign_key_without_error(table, column) + remove_foreign_key(table, column: column) + rescue ArgumentError + end + + def remove_index_without_error(table, column) + remove_concurrent_index(table, column) + rescue ArgumentError + end + + def connection + # Rails memoizes connection objects, but this causes them to be shared + # amongst threads; we don't want that. + Thread.current[:foreign_key_connection] || ActiveRecord::Base.connection + end + + def queues_for_rows(rows) + queues = Array.new(CONCURRENCY) { Queue.new } + slice_size = rows.length / CONCURRENCY + + # Divide all the tuples as evenly as possible amongst the queues. + rows.each_slice(slice_size).each_with_index do |slice, index| + bucket = index % CONCURRENCY + + slice.each do |row| + queues[bucket] << row + end + end + + queues + end +end diff --git a/db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb b/db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb new file mode 100644 index 00000000000..46497775527 --- /dev/null +++ b/db/migrate/20170622130029_correct_protected_branches_foreign_keys.rb @@ -0,0 +1,40 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class CorrectProtectedBranchesForeignKeys < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + disable_ddl_transaction! + + def up + remove_foreign_key_without_error(:protected_branch_push_access_levels, + column: :protected_branch_id) + + execute <<-EOF + DELETE FROM protected_branch_push_access_levels + WHERE NOT EXISTS ( + SELECT true + FROM protected_branches + WHERE protected_branch_push_access_levels.protected_branch_id = protected_branches.id + ) + AND protected_branch_id IS NOT NULL + EOF + + add_concurrent_foreign_key(:protected_branch_push_access_levels, + :protected_branches, + column: :protected_branch_id) + end + + def down + # Previously there was a foreign key without a CASCADING DELETE, so we'll + # just leave the foreign key in place. + end + + def remove_foreign_key_without_error(*args) + remove_foreign_key(*args) + rescue ArgumentError + end +end diff --git a/db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb b/db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb new file mode 100644 index 00000000000..9f524fac8a7 --- /dev/null +++ b/db/migrate/20170622132212_add_foreign_key_for_merge_request_diffs.rb @@ -0,0 +1,30 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddForeignKeyForMergeRequestDiffs < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + disable_ddl_transaction! + + def up + execute <<-EOF + DELETE FROM merge_request_diffs + WHERE NOT EXISTS ( + SELECT true + FROM merge_requests + WHERE merge_requests.id = merge_request_diffs.merge_request_id + ) + EOF + + add_concurrent_foreign_key(:merge_request_diffs, + :merge_requests, + column: :merge_request_id) + end + + def down + remove_foreign_key(:merge_request_diffs, column: :merge_request_id) + end +end diff --git a/db/schema.rb b/db/schema.rb index 40f30a10a01..f12fdf903c5 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -971,6 +971,7 @@ ActiveRecord::Schema.define(version: 20170703102400) do end add_index "pages_domains", ["domain"], name: "index_pages_domains_on_domain", unique: true, using: :btree + add_index "pages_domains", ["project_id"], name: "index_pages_domains_on_project_id", using: :btree create_table "personal_access_tokens", force: :cascade do |t| t.integer "user_id", null: false @@ -1020,6 +1021,7 @@ ActiveRecord::Schema.define(version: 20170703102400) do end add_index "project_group_links", ["group_id"], name: "index_project_group_links_on_group_id", using: :btree + add_index "project_group_links", ["project_id"], name: "index_project_group_links_on_project_id", using: :btree create_table "project_import_data", force: :cascade do |t| t.integer "project_id" @@ -1528,48 +1530,75 @@ ActiveRecord::Schema.define(version: 20170703102400) do add_index "web_hooks", ["project_id"], name: "index_web_hooks_on_project_id", using: :btree add_index "web_hooks", ["type"], name: "index_web_hooks_on_type", using: :btree - add_foreign_key "boards", "projects" + add_foreign_key "boards", "projects", name: "fk_f15266b5f9", on_delete: :cascade add_foreign_key "chat_teams", "namespaces", on_delete: :cascade add_foreign_key "ci_builds", "ci_pipelines", column: "auto_canceled_by_id", name: "fk_a2141b1522", on_delete: :nullify add_foreign_key "ci_builds", "ci_stages", column: "stage_id", name: "fk_3a9eaa254d", on_delete: :cascade + add_foreign_key "ci_builds", "projects", name: "fk_befce0568a", on_delete: :cascade add_foreign_key "ci_pipeline_schedules", "projects", name: "fk_8ead60fcc4", on_delete: :cascade add_foreign_key "ci_pipeline_schedules", "users", column: "owner_id", name: "fk_9ea99f58d2", on_delete: :nullify add_foreign_key "ci_pipelines", "ci_pipeline_schedules", column: "pipeline_schedule_id", name: "fk_3d34ab2e06", on_delete: :nullify add_foreign_key "ci_pipelines", "ci_pipelines", column: "auto_canceled_by_id", name: "fk_262d4c2d19", on_delete: :nullify + add_foreign_key "ci_pipelines", "projects", name: "fk_86635dbd80", on_delete: :cascade + add_foreign_key "ci_runner_projects", "projects", name: "fk_4478a6f1e4", on_delete: :cascade add_foreign_key "ci_stages", "ci_pipelines", column: "pipeline_id", name: "fk_fb57e6cc56", on_delete: :cascade add_foreign_key "ci_stages", "projects", name: "fk_2360681d1d", on_delete: :cascade add_foreign_key "ci_trigger_requests", "ci_triggers", column: "trigger_id", name: "fk_b8ec8b7245", on_delete: :cascade + add_foreign_key "ci_triggers", "projects", name: "fk_e3e63f966e", on_delete: :cascade add_foreign_key "ci_triggers", "users", column: "owner_id", name: "fk_e8e10d1964", on_delete: :cascade add_foreign_key "ci_variables", "projects", name: "fk_ada5eb64b3", on_delete: :cascade add_foreign_key "container_repositories", "projects" + add_foreign_key "deploy_keys_projects", "projects", name: "fk_58a901ca7e", on_delete: :cascade + add_foreign_key "deployments", "projects", name: "fk_b9a3851b82", on_delete: :cascade + add_foreign_key "environments", "projects", name: "fk_d1c8c1da6a", on_delete: :cascade + add_foreign_key "events", "projects", name: "fk_0434b48643", on_delete: :cascade + add_foreign_key "forked_project_links", "projects", column: "forked_to_project_id", name: "fk_434510edb0", on_delete: :cascade add_foreign_key "issue_assignees", "issues", name: "fk_b7d881734a", on_delete: :cascade add_foreign_key "issue_assignees", "users", name: "fk_5e0c8d9154", on_delete: :cascade add_foreign_key "issue_metrics", "issues", on_delete: :cascade + add_foreign_key "issues", "projects", name: "fk_899c8f3231", on_delete: :cascade add_foreign_key "label_priorities", "labels", on_delete: :cascade add_foreign_key "label_priorities", "projects", on_delete: :cascade add_foreign_key "labels", "namespaces", column: "group_id", on_delete: :cascade - add_foreign_key "lists", "boards" - add_foreign_key "lists", "labels" + add_foreign_key "labels", "projects", name: "fk_7de4989a69", on_delete: :cascade + add_foreign_key "lists", "boards", name: "fk_0d3f677137", on_delete: :cascade + add_foreign_key "lists", "labels", name: "fk_7a5553d60f", on_delete: :cascade add_foreign_key "merge_request_diff_files", "merge_request_diffs", on_delete: :cascade + add_foreign_key "merge_request_diffs", "merge_requests", name: "fk_8483f3258f", on_delete: :cascade add_foreign_key "merge_request_metrics", "ci_pipelines", column: "pipeline_id", on_delete: :cascade add_foreign_key "merge_request_metrics", "merge_requests", on_delete: :cascade + add_foreign_key "merge_requests", "projects", column: "target_project_id", name: "fk_a6963e8447", on_delete: :cascade add_foreign_key "merge_requests_closing_issues", "issues", on_delete: :cascade add_foreign_key "merge_requests_closing_issues", "merge_requests", on_delete: :cascade + add_foreign_key "milestones", "projects", name: "fk_9bd0a0c791", on_delete: :cascade + add_foreign_key "notes", "projects", name: "fk_99e097b079", on_delete: :cascade add_foreign_key "oauth_openid_requests", "oauth_access_grants", column: "access_grant_id", name: "fk_oauth_openid_requests_oauth_access_grants_access_grant_id" + add_foreign_key "pages_domains", "projects", name: "fk_ea2f6dfc6f", on_delete: :cascade add_foreign_key "personal_access_tokens", "users" add_foreign_key "project_authorizations", "projects", on_delete: :cascade add_foreign_key "project_authorizations", "users", on_delete: :cascade + add_foreign_key "project_features", "projects", name: "fk_18513d9b92", on_delete: :cascade + add_foreign_key "project_group_links", "projects", name: "fk_daa8cee94c", on_delete: :cascade + add_foreign_key "project_import_data", "projects", name: "fk_ffb9ee3a10", on_delete: :cascade add_foreign_key "project_statistics", "projects", on_delete: :cascade - add_foreign_key "protected_branch_merge_access_levels", "protected_branches" - add_foreign_key "protected_branch_push_access_levels", "protected_branches" + add_foreign_key "protected_branch_merge_access_levels", "protected_branches", name: "fk_8a3072ccb3", on_delete: :cascade + add_foreign_key "protected_branch_push_access_levels", "protected_branches", name: "fk_9ffc86a3d9", on_delete: :cascade + add_foreign_key "protected_branches", "projects", name: "fk_7a9c6d93e7", on_delete: :cascade add_foreign_key "protected_tag_create_access_levels", "namespaces", column: "group_id" add_foreign_key "protected_tag_create_access_levels", "protected_tags" add_foreign_key "protected_tag_create_access_levels", "users" + add_foreign_key "protected_tags", "projects", name: "fk_8e4af87648", on_delete: :cascade + add_foreign_key "releases", "projects", name: "fk_47fe2a0596", on_delete: :cascade + add_foreign_key "services", "projects", name: "fk_71cce407f9", on_delete: :cascade + add_foreign_key "snippets", "projects", name: "fk_be41fd4bb7", on_delete: :cascade add_foreign_key "subscriptions", "projects", on_delete: :cascade add_foreign_key "system_note_metadata", "notes", name: "fk_d83a918cb1", on_delete: :cascade add_foreign_key "timelogs", "issues", name: "fk_timelogs_issues_issue_id", on_delete: :cascade add_foreign_key "timelogs", "merge_requests", name: "fk_timelogs_merge_requests_merge_request_id", on_delete: :cascade + add_foreign_key "todos", "projects", name: "fk_45054f9c45", on_delete: :cascade add_foreign_key "trending_projects", "projects", on_delete: :cascade add_foreign_key "u2f_registrations", "users" + add_foreign_key "users_star_projects", "projects", name: "fk_22cd27ddfc", on_delete: :cascade add_foreign_key "web_hook_logs", "web_hooks", on_delete: :cascade + add_foreign_key "web_hooks", "projects", name: "fk_0c8ca6d9d1", on_delete: :cascade end |