summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorStan Hu <stanhu@gmail.com>2018-02-21 22:50:07 +0000
committerStan Hu <stanhu@gmail.com>2018-02-21 22:50:07 +0000
commita5b7f27373550f0c7d3e302cc537e74e36b1a6ed (patch)
tree0e4967f6b1ab926c5316fd519caca01dc8c4853e /lib
parent4bc17b01f5d61615db3848dcb58ae9b06d70539f (diff)
parent0c357ac83b941e3a3b5d13e8430ec555b384b967 (diff)
downloadgitlab-ce-a5b7f27373550f0c7d3e302cc537e74e36b1a6ed.tar.gz
Merge branch 'mk-improve-background-migration-specs' into 'master'
Improve background migration specs See merge request gitlab-org/gitlab-ce!17162
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/background_migration/populate_untracked_uploads.rb160
-rw-r--r--lib/gitlab/background_migration/populate_untracked_uploads_dependencies.rb201
2 files changed, 208 insertions, 153 deletions
diff --git a/lib/gitlab/background_migration/populate_untracked_uploads.rb b/lib/gitlab/background_migration/populate_untracked_uploads.rb
index ee55fabd6f0..9232f20a063 100644
--- a/lib/gitlab/background_migration/populate_untracked_uploads.rb
+++ b/lib/gitlab/background_migration/populate_untracked_uploads.rb
@@ -5,157 +5,10 @@ module Gitlab
# This class processes a batch of rows in `untracked_files_for_uploads` by
# adding each file to the `uploads` table if it does not exist.
class PopulateUntrackedUploads # rubocop:disable Metrics/ClassLength
- # This class is responsible for producing the attributes necessary to
- # track an uploaded file in the `uploads` table.
- class UntrackedFile < ActiveRecord::Base # rubocop:disable Metrics/ClassLength, Metrics/LineLength
- self.table_name = 'untracked_files_for_uploads'
-
- # Ends with /:random_hex/:filename
- FILE_UPLOADER_PATH = %r{/\h+/[^/]+\z}
- FULL_PATH_CAPTURE = /\A(.+)#{FILE_UPLOADER_PATH}/
-
- # These regex patterns are tested against a relative path, relative to
- # the upload directory.
- # For convenience, if there exists a capture group in the pattern, then
- # it indicates the model_id.
- PATH_PATTERNS = [
- {
- pattern: %r{\A-/system/appearance/logo/(\d+)/},
- uploader: 'AttachmentUploader',
- model_type: 'Appearance'
- },
- {
- pattern: %r{\A-/system/appearance/header_logo/(\d+)/},
- uploader: 'AttachmentUploader',
- model_type: 'Appearance'
- },
- {
- pattern: %r{\A-/system/note/attachment/(\d+)/},
- uploader: 'AttachmentUploader',
- model_type: 'Note'
- },
- {
- pattern: %r{\A-/system/user/avatar/(\d+)/},
- uploader: 'AvatarUploader',
- model_type: 'User'
- },
- {
- pattern: %r{\A-/system/group/avatar/(\d+)/},
- uploader: 'AvatarUploader',
- model_type: 'Namespace'
- },
- {
- pattern: %r{\A-/system/project/avatar/(\d+)/},
- uploader: 'AvatarUploader',
- model_type: 'Project'
- },
- {
- pattern: FILE_UPLOADER_PATH,
- uploader: 'FileUploader',
- model_type: 'Project'
- }
- ].freeze
-
- def to_h
- @upload_hash ||= {
- path: upload_path,
- uploader: uploader,
- model_type: model_type,
- model_id: model_id,
- size: file_size,
- checksum: checksum
- }
- end
-
- def upload_path
- # UntrackedFile#path is absolute, but Upload#path depends on uploader
- @upload_path ||=
- if uploader == 'FileUploader'
- # Path relative to project directory in uploads
- matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH)
- matchd[0].sub(%r{\A/}, '') # remove leading slash
- else
- path
- end
- end
-
- def uploader
- matching_pattern_map[:uploader]
- end
-
- def model_type
- matching_pattern_map[:model_type]
- end
-
- def model_id
- return @model_id if defined?(@model_id)
-
- pattern = matching_pattern_map[:pattern]
- matchd = path_relative_to_upload_dir.match(pattern)
-
- # If something is captured (matchd[1] is not nil), it is a model_id
- # Only the FileUploader pattern will not match an ID
- @model_id = matchd[1] ? matchd[1].to_i : file_uploader_model_id
- end
-
- def file_size
- File.size(absolute_path)
- end
-
- def checksum
- Digest::SHA256.file(absolute_path).hexdigest
- end
-
- private
-
- def matching_pattern_map
- @matching_pattern_map ||= PATH_PATTERNS.find do |path_pattern_map|
- path_relative_to_upload_dir.match(path_pattern_map[:pattern])
- end
-
- unless @matching_pattern_map
- raise "Unknown upload path pattern \"#{path}\""
- end
-
- @matching_pattern_map
- end
-
- def file_uploader_model_id
- matchd = path_relative_to_upload_dir.match(FULL_PATH_CAPTURE)
- not_found_msg = <<~MSG
- Could not capture project full_path from a FileUploader path:
- "#{path_relative_to_upload_dir}"
- MSG
- raise not_found_msg unless matchd
-
- full_path = matchd[1]
- project = Project.find_by_full_path(full_path)
- return nil unless project
-
- project.id
- end
-
- # Not including a leading slash
- def path_relative_to_upload_dir
- upload_dir = Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR # rubocop:disable Metrics/LineLength
- base = %r{\A#{Regexp.escape(upload_dir)}/}
- @path_relative_to_upload_dir ||= path.sub(base, '')
- end
-
- def absolute_path
- File.join(Gitlab.config.uploads.storage_path, path)
- end
- end
-
- # This class is used to query the `uploads` table.
- class Upload < ActiveRecord::Base
- self.table_name = 'uploads'
- end
-
def perform(start_id, end_id)
return unless migrate?
- files = UntrackedFile.where(id: start_id..end_id)
+ files = Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.where(id: start_id..end_id)
processed_files = insert_uploads_if_needed(files)
processed_files.delete_all
@@ -165,7 +18,8 @@ module Gitlab
private
def migrate?
- UntrackedFile.table_exists? && Upload.table_exists?
+ Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.table_exists? &&
+ Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::Upload.table_exists?
end
def insert_uploads_if_needed(files)
@@ -197,7 +51,7 @@ module Gitlab
def filter_existing_uploads(files)
paths = files.map(&:upload_path)
- existing_paths = Upload.where(path: paths).pluck(:path).to_set
+ existing_paths = Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::Upload.where(path: paths).pluck(:path).to_set
files.reject do |file|
existing_paths.include?(file.upload_path)
@@ -229,7 +83,7 @@ module Gitlab
end
ids.each do |model_type, model_ids|
- model_class = Object.const_get(model_type)
+ model_class = "Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::#{model_type}".constantize
found_ids = model_class.where(id: model_ids.uniq).pluck(:id)
deleted_ids = ids[model_type] - found_ids
ids[model_type] = deleted_ids
@@ -249,8 +103,8 @@ module Gitlab
end
def drop_temp_table_if_finished
- if UntrackedFile.all.empty? && !Rails.env.test? # Dropping a table intermittently breaks test cleanup
- UntrackedFile.connection.drop_table(:untracked_files_for_uploads,
+ if Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.all.empty? && !Rails.env.test? # Dropping a table intermittently breaks test cleanup
+ Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.connection.drop_table(:untracked_files_for_uploads,
if_exists: true)
end
end
diff --git a/lib/gitlab/background_migration/populate_untracked_uploads_dependencies.rb b/lib/gitlab/background_migration/populate_untracked_uploads_dependencies.rb
new file mode 100644
index 00000000000..a2c5acbde71
--- /dev/null
+++ b/lib/gitlab/background_migration/populate_untracked_uploads_dependencies.rb
@@ -0,0 +1,201 @@
+# frozen_string_literal: true
+module Gitlab
+ module BackgroundMigration
+ module PopulateUntrackedUploadsDependencies
+ # This class is responsible for producing the attributes necessary to
+ # track an uploaded file in the `uploads` table.
+ class UntrackedFile < ActiveRecord::Base # rubocop:disable Metrics/ClassLength, Metrics/LineLength
+ self.table_name = 'untracked_files_for_uploads'
+
+ # Ends with /:random_hex/:filename
+ FILE_UPLOADER_PATH = %r{/\h+/[^/]+\z}
+ FULL_PATH_CAPTURE = /\A(.+)#{FILE_UPLOADER_PATH}/
+
+ # These regex patterns are tested against a relative path, relative to
+ # the upload directory.
+ # For convenience, if there exists a capture group in the pattern, then
+ # it indicates the model_id.
+ PATH_PATTERNS = [
+ {
+ pattern: %r{\A-/system/appearance/logo/(\d+)/},
+ uploader: 'AttachmentUploader',
+ model_type: 'Appearance'
+ },
+ {
+ pattern: %r{\A-/system/appearance/header_logo/(\d+)/},
+ uploader: 'AttachmentUploader',
+ model_type: 'Appearance'
+ },
+ {
+ pattern: %r{\A-/system/note/attachment/(\d+)/},
+ uploader: 'AttachmentUploader',
+ model_type: 'Note'
+ },
+ {
+ pattern: %r{\A-/system/user/avatar/(\d+)/},
+ uploader: 'AvatarUploader',
+ model_type: 'User'
+ },
+ {
+ pattern: %r{\A-/system/group/avatar/(\d+)/},
+ uploader: 'AvatarUploader',
+ model_type: 'Namespace'
+ },
+ {
+ pattern: %r{\A-/system/project/avatar/(\d+)/},
+ uploader: 'AvatarUploader',
+ model_type: 'Project'
+ },
+ {
+ pattern: FILE_UPLOADER_PATH,
+ uploader: 'FileUploader',
+ model_type: 'Project'
+ }
+ ].freeze
+
+ def to_h
+ @upload_hash ||= {
+ path: upload_path,
+ uploader: uploader,
+ model_type: model_type,
+ model_id: model_id,
+ size: file_size,
+ checksum: checksum
+ }
+ end
+
+ def upload_path
+ # UntrackedFile#path is absolute, but Upload#path depends on uploader
+ @upload_path ||=
+ if uploader == 'FileUploader'
+ # Path relative to project directory in uploads
+ matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH)
+ matchd[0].sub(%r{\A/}, '') # remove leading slash
+ else
+ path
+ end
+ end
+
+ def uploader
+ matching_pattern_map[:uploader]
+ end
+
+ def model_type
+ matching_pattern_map[:model_type]
+ end
+
+ def model_id
+ return @model_id if defined?(@model_id)
+
+ pattern = matching_pattern_map[:pattern]
+ matchd = path_relative_to_upload_dir.match(pattern)
+
+ # If something is captured (matchd[1] is not nil), it is a model_id
+ # Only the FileUploader pattern will not match an ID
+ @model_id = matchd[1] ? matchd[1].to_i : file_uploader_model_id
+ end
+
+ def file_size
+ File.size(absolute_path)
+ end
+
+ def checksum
+ Digest::SHA256.file(absolute_path).hexdigest
+ end
+
+ private
+
+ def matching_pattern_map
+ @matching_pattern_map ||= PATH_PATTERNS.find do |path_pattern_map|
+ path_relative_to_upload_dir.match(path_pattern_map[:pattern])
+ end
+
+ unless @matching_pattern_map
+ raise "Unknown upload path pattern \"#{path}\""
+ end
+
+ @matching_pattern_map
+ end
+
+ def file_uploader_model_id
+ matchd = path_relative_to_upload_dir.match(FULL_PATH_CAPTURE)
+ not_found_msg = <<~MSG
+ Could not capture project full_path from a FileUploader path:
+ "#{path_relative_to_upload_dir}"
+ MSG
+ raise not_found_msg unless matchd
+
+ full_path = matchd[1]
+ project = Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::Project.find_by_full_path(full_path)
+ return nil unless project
+
+ project.id
+ end
+
+ # Not including a leading slash
+ def path_relative_to_upload_dir
+ upload_dir = Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR # rubocop:disable Metrics/LineLength
+ base = %r{\A#{Regexp.escape(upload_dir)}/}
+ @path_relative_to_upload_dir ||= path.sub(base, '')
+ end
+
+ def absolute_path
+ File.join(Gitlab.config.uploads.storage_path, path)
+ end
+ end
+
+ # Avoid using application code
+ class Upload < ActiveRecord::Base
+ self.table_name = 'uploads'
+ end
+
+ # Avoid using application code
+ class Appearance < ActiveRecord::Base
+ self.table_name = 'appearances'
+ end
+
+ # Avoid using application code
+ class Namespace < ActiveRecord::Base
+ self.table_name = 'namespaces'
+ end
+
+ # Avoid using application code
+ class Note < ActiveRecord::Base
+ self.table_name = 'notes'
+ end
+
+ # Avoid using application code
+ class User < ActiveRecord::Base
+ self.table_name = 'users'
+ end
+
+ # Since project Markdown upload paths don't contain the project ID, we have to find the
+ # project by its full_path. Due to MySQL/PostgreSQL differences, and historical reasons,
+ # the logic is somewhat complex, so I've mostly copied it in here.
+ class Project < ActiveRecord::Base
+ self.table_name = 'projects'
+
+ def self.find_by_full_path(path)
+ binary = Gitlab::Database.mysql? ? 'BINARY' : ''
+ order_sql = "(CASE WHEN #{binary} routes.path = #{connection.quote(path)} THEN 0 ELSE 1 END)"
+ where_full_path_in(path).reorder(order_sql).take
+ end
+
+ def self.where_full_path_in(path)
+ cast_lower = Gitlab::Database.postgresql?
+
+ path = connection.quote(path)
+
+ where =
+ if cast_lower
+ "(LOWER(routes.path) = LOWER(#{path}))"
+ else
+ "(routes.path = #{path})"
+ end
+
+ joins("INNER JOIN routes ON routes.source_id = projects.id AND routes.source_type = 'Project'").where(where)
+ end
+ end
+ end
+ end
+end