diff options
author | Stan Hu <stanhu@gmail.com> | 2018-02-21 22:50:07 +0000 |
---|---|---|
committer | Stan Hu <stanhu@gmail.com> | 2018-02-21 22:50:07 +0000 |
commit | a5b7f27373550f0c7d3e302cc537e74e36b1a6ed (patch) | |
tree | 0e4967f6b1ab926c5316fd519caca01dc8c4853e /lib | |
parent | 4bc17b01f5d61615db3848dcb58ae9b06d70539f (diff) | |
parent | 0c357ac83b941e3a3b5d13e8430ec555b384b967 (diff) | |
download | gitlab-ce-a5b7f27373550f0c7d3e302cc537e74e36b1a6ed.tar.gz |
Merge branch 'mk-improve-background-migration-specs' into 'master'
Improve background migration specs
See merge request gitlab-org/gitlab-ce!17162
Diffstat (limited to 'lib')
-rw-r--r-- | lib/gitlab/background_migration/populate_untracked_uploads.rb | 160 | ||||
-rw-r--r-- | lib/gitlab/background_migration/populate_untracked_uploads_dependencies.rb | 201 |
2 files changed, 208 insertions, 153 deletions
diff --git a/lib/gitlab/background_migration/populate_untracked_uploads.rb b/lib/gitlab/background_migration/populate_untracked_uploads.rb index ee55fabd6f0..9232f20a063 100644 --- a/lib/gitlab/background_migration/populate_untracked_uploads.rb +++ b/lib/gitlab/background_migration/populate_untracked_uploads.rb @@ -5,157 +5,10 @@ module Gitlab # This class processes a batch of rows in `untracked_files_for_uploads` by # adding each file to the `uploads` table if it does not exist. class PopulateUntrackedUploads # rubocop:disable Metrics/ClassLength - # This class is responsible for producing the attributes necessary to - # track an uploaded file in the `uploads` table. - class UntrackedFile < ActiveRecord::Base # rubocop:disable Metrics/ClassLength, Metrics/LineLength - self.table_name = 'untracked_files_for_uploads' - - # Ends with /:random_hex/:filename - FILE_UPLOADER_PATH = %r{/\h+/[^/]+\z} - FULL_PATH_CAPTURE = /\A(.+)#{FILE_UPLOADER_PATH}/ - - # These regex patterns are tested against a relative path, relative to - # the upload directory. - # For convenience, if there exists a capture group in the pattern, then - # it indicates the model_id. - PATH_PATTERNS = [ - { - pattern: %r{\A-/system/appearance/logo/(\d+)/}, - uploader: 'AttachmentUploader', - model_type: 'Appearance' - }, - { - pattern: %r{\A-/system/appearance/header_logo/(\d+)/}, - uploader: 'AttachmentUploader', - model_type: 'Appearance' - }, - { - pattern: %r{\A-/system/note/attachment/(\d+)/}, - uploader: 'AttachmentUploader', - model_type: 'Note' - }, - { - pattern: %r{\A-/system/user/avatar/(\d+)/}, - uploader: 'AvatarUploader', - model_type: 'User' - }, - { - pattern: %r{\A-/system/group/avatar/(\d+)/}, - uploader: 'AvatarUploader', - model_type: 'Namespace' - }, - { - pattern: %r{\A-/system/project/avatar/(\d+)/}, - uploader: 'AvatarUploader', - model_type: 'Project' - }, - { - pattern: FILE_UPLOADER_PATH, - uploader: 'FileUploader', - model_type: 'Project' - } - ].freeze - - def to_h - @upload_hash ||= { - path: upload_path, - uploader: uploader, - model_type: model_type, - model_id: model_id, - size: file_size, - checksum: checksum - } - end - - def upload_path - # UntrackedFile#path is absolute, but Upload#path depends on uploader - @upload_path ||= - if uploader == 'FileUploader' - # Path relative to project directory in uploads - matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH) - matchd[0].sub(%r{\A/}, '') # remove leading slash - else - path - end - end - - def uploader - matching_pattern_map[:uploader] - end - - def model_type - matching_pattern_map[:model_type] - end - - def model_id - return @model_id if defined?(@model_id) - - pattern = matching_pattern_map[:pattern] - matchd = path_relative_to_upload_dir.match(pattern) - - # If something is captured (matchd[1] is not nil), it is a model_id - # Only the FileUploader pattern will not match an ID - @model_id = matchd[1] ? matchd[1].to_i : file_uploader_model_id - end - - def file_size - File.size(absolute_path) - end - - def checksum - Digest::SHA256.file(absolute_path).hexdigest - end - - private - - def matching_pattern_map - @matching_pattern_map ||= PATH_PATTERNS.find do |path_pattern_map| - path_relative_to_upload_dir.match(path_pattern_map[:pattern]) - end - - unless @matching_pattern_map - raise "Unknown upload path pattern \"#{path}\"" - end - - @matching_pattern_map - end - - def file_uploader_model_id - matchd = path_relative_to_upload_dir.match(FULL_PATH_CAPTURE) - not_found_msg = <<~MSG - Could not capture project full_path from a FileUploader path: - "#{path_relative_to_upload_dir}" - MSG - raise not_found_msg unless matchd - - full_path = matchd[1] - project = Project.find_by_full_path(full_path) - return nil unless project - - project.id - end - - # Not including a leading slash - def path_relative_to_upload_dir - upload_dir = Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR # rubocop:disable Metrics/LineLength - base = %r{\A#{Regexp.escape(upload_dir)}/} - @path_relative_to_upload_dir ||= path.sub(base, '') - end - - def absolute_path - File.join(Gitlab.config.uploads.storage_path, path) - end - end - - # This class is used to query the `uploads` table. - class Upload < ActiveRecord::Base - self.table_name = 'uploads' - end - def perform(start_id, end_id) return unless migrate? - files = UntrackedFile.where(id: start_id..end_id) + files = Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.where(id: start_id..end_id) processed_files = insert_uploads_if_needed(files) processed_files.delete_all @@ -165,7 +18,8 @@ module Gitlab private def migrate? - UntrackedFile.table_exists? && Upload.table_exists? + Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.table_exists? && + Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::Upload.table_exists? end def insert_uploads_if_needed(files) @@ -197,7 +51,7 @@ module Gitlab def filter_existing_uploads(files) paths = files.map(&:upload_path) - existing_paths = Upload.where(path: paths).pluck(:path).to_set + existing_paths = Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::Upload.where(path: paths).pluck(:path).to_set files.reject do |file| existing_paths.include?(file.upload_path) @@ -229,7 +83,7 @@ module Gitlab end ids.each do |model_type, model_ids| - model_class = Object.const_get(model_type) + model_class = "Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::#{model_type}".constantize found_ids = model_class.where(id: model_ids.uniq).pluck(:id) deleted_ids = ids[model_type] - found_ids ids[model_type] = deleted_ids @@ -249,8 +103,8 @@ module Gitlab end def drop_temp_table_if_finished - if UntrackedFile.all.empty? && !Rails.env.test? # Dropping a table intermittently breaks test cleanup - UntrackedFile.connection.drop_table(:untracked_files_for_uploads, + if Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.all.empty? && !Rails.env.test? # Dropping a table intermittently breaks test cleanup + Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.connection.drop_table(:untracked_files_for_uploads, if_exists: true) end end diff --git a/lib/gitlab/background_migration/populate_untracked_uploads_dependencies.rb b/lib/gitlab/background_migration/populate_untracked_uploads_dependencies.rb new file mode 100644 index 00000000000..a2c5acbde71 --- /dev/null +++ b/lib/gitlab/background_migration/populate_untracked_uploads_dependencies.rb @@ -0,0 +1,201 @@ +# frozen_string_literal: true +module Gitlab + module BackgroundMigration + module PopulateUntrackedUploadsDependencies + # This class is responsible for producing the attributes necessary to + # track an uploaded file in the `uploads` table. + class UntrackedFile < ActiveRecord::Base # rubocop:disable Metrics/ClassLength, Metrics/LineLength + self.table_name = 'untracked_files_for_uploads' + + # Ends with /:random_hex/:filename + FILE_UPLOADER_PATH = %r{/\h+/[^/]+\z} + FULL_PATH_CAPTURE = /\A(.+)#{FILE_UPLOADER_PATH}/ + + # These regex patterns are tested against a relative path, relative to + # the upload directory. + # For convenience, if there exists a capture group in the pattern, then + # it indicates the model_id. + PATH_PATTERNS = [ + { + pattern: %r{\A-/system/appearance/logo/(\d+)/}, + uploader: 'AttachmentUploader', + model_type: 'Appearance' + }, + { + pattern: %r{\A-/system/appearance/header_logo/(\d+)/}, + uploader: 'AttachmentUploader', + model_type: 'Appearance' + }, + { + pattern: %r{\A-/system/note/attachment/(\d+)/}, + uploader: 'AttachmentUploader', + model_type: 'Note' + }, + { + pattern: %r{\A-/system/user/avatar/(\d+)/}, + uploader: 'AvatarUploader', + model_type: 'User' + }, + { + pattern: %r{\A-/system/group/avatar/(\d+)/}, + uploader: 'AvatarUploader', + model_type: 'Namespace' + }, + { + pattern: %r{\A-/system/project/avatar/(\d+)/}, + uploader: 'AvatarUploader', + model_type: 'Project' + }, + { + pattern: FILE_UPLOADER_PATH, + uploader: 'FileUploader', + model_type: 'Project' + } + ].freeze + + def to_h + @upload_hash ||= { + path: upload_path, + uploader: uploader, + model_type: model_type, + model_id: model_id, + size: file_size, + checksum: checksum + } + end + + def upload_path + # UntrackedFile#path is absolute, but Upload#path depends on uploader + @upload_path ||= + if uploader == 'FileUploader' + # Path relative to project directory in uploads + matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH) + matchd[0].sub(%r{\A/}, '') # remove leading slash + else + path + end + end + + def uploader + matching_pattern_map[:uploader] + end + + def model_type + matching_pattern_map[:model_type] + end + + def model_id + return @model_id if defined?(@model_id) + + pattern = matching_pattern_map[:pattern] + matchd = path_relative_to_upload_dir.match(pattern) + + # If something is captured (matchd[1] is not nil), it is a model_id + # Only the FileUploader pattern will not match an ID + @model_id = matchd[1] ? matchd[1].to_i : file_uploader_model_id + end + + def file_size + File.size(absolute_path) + end + + def checksum + Digest::SHA256.file(absolute_path).hexdigest + end + + private + + def matching_pattern_map + @matching_pattern_map ||= PATH_PATTERNS.find do |path_pattern_map| + path_relative_to_upload_dir.match(path_pattern_map[:pattern]) + end + + unless @matching_pattern_map + raise "Unknown upload path pattern \"#{path}\"" + end + + @matching_pattern_map + end + + def file_uploader_model_id + matchd = path_relative_to_upload_dir.match(FULL_PATH_CAPTURE) + not_found_msg = <<~MSG + Could not capture project full_path from a FileUploader path: + "#{path_relative_to_upload_dir}" + MSG + raise not_found_msg unless matchd + + full_path = matchd[1] + project = Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::Project.find_by_full_path(full_path) + return nil unless project + + project.id + end + + # Not including a leading slash + def path_relative_to_upload_dir + upload_dir = Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR # rubocop:disable Metrics/LineLength + base = %r{\A#{Regexp.escape(upload_dir)}/} + @path_relative_to_upload_dir ||= path.sub(base, '') + end + + def absolute_path + File.join(Gitlab.config.uploads.storage_path, path) + end + end + + # Avoid using application code + class Upload < ActiveRecord::Base + self.table_name = 'uploads' + end + + # Avoid using application code + class Appearance < ActiveRecord::Base + self.table_name = 'appearances' + end + + # Avoid using application code + class Namespace < ActiveRecord::Base + self.table_name = 'namespaces' + end + + # Avoid using application code + class Note < ActiveRecord::Base + self.table_name = 'notes' + end + + # Avoid using application code + class User < ActiveRecord::Base + self.table_name = 'users' + end + + # Since project Markdown upload paths don't contain the project ID, we have to find the + # project by its full_path. Due to MySQL/PostgreSQL differences, and historical reasons, + # the logic is somewhat complex, so I've mostly copied it in here. + class Project < ActiveRecord::Base + self.table_name = 'projects' + + def self.find_by_full_path(path) + binary = Gitlab::Database.mysql? ? 'BINARY' : '' + order_sql = "(CASE WHEN #{binary} routes.path = #{connection.quote(path)} THEN 0 ELSE 1 END)" + where_full_path_in(path).reorder(order_sql).take + end + + def self.where_full_path_in(path) + cast_lower = Gitlab::Database.postgresql? + + path = connection.quote(path) + + where = + if cast_lower + "(LOWER(routes.path) = LOWER(#{path}))" + else + "(routes.path = #{path})" + end + + joins("INNER JOIN routes ON routes.source_id = projects.id AND routes.source_type = 'Project'").where(where) + end + end + end + end +end |