diff options
author | Robert Speicher <robert@gitlab.com> | 2016-07-06 15:06:01 +0000 |
---|---|---|
committer | Robert Speicher <robert@gitlab.com> | 2016-07-06 15:06:01 +0000 |
commit | be018ba8c4f61babfea494a3946df9931d476a8a (patch) | |
tree | cf5acc63374a7a570ae03deaf1800b183806be07 /db | |
parent | 400f9f72233c6c5390367a95bf11ebee09c86d2c (diff) | |
parent | 54a50bf81d7bb304adaedffd8eb3e0bc0fc348a9 (diff) | |
download | gitlab-ce-be018ba8c4f61babfea494a3946df9931d476a8a.tar.gz |
Merge branch 'fix/import-url-validator' into 'master'
Fixing URL validation for import_url on projects
Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/17536
This MR fixes problems related to bypassing `import_url` validation on projects. This makes sure the URL is properly validated so we don't enter crap and fail while running workers that handle this URL.
It also adds a migration to fix current invalid `import_url`s
See merge request !4753
Diffstat (limited to 'db')
-rw-r--r-- | db/migrate/20160620110927_fix_no_validatable_import_url.rb | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/db/migrate/20160620110927_fix_no_validatable_import_url.rb b/db/migrate/20160620110927_fix_no_validatable_import_url.rb new file mode 100644 index 00000000000..82a616c62d9 --- /dev/null +++ b/db/migrate/20160620110927_fix_no_validatable_import_url.rb @@ -0,0 +1,86 @@ +# Updates project records containing invalid URLs using the AddressableUrlValidator. +# This is optimized assuming the number of invalid records is low, but +# we still need to loop through all the projects with an +import_url+ +# so we use batching for the latter. +# +# This migration is non-reversible as we would have to keep the old data. + +class FixNoValidatableImportUrl < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + class SqlBatches + + attr_reader :results, :query + + def initialize(batch_size: 100, query:) + @offset = 0 + @batch_size = batch_size + @query = query + @results = [] + end + + def next? + @results = ActiveRecord::Base.connection.exec_query(batched_sql) + @offset += @batch_size + @results.any? + end + + private + + def batched_sql + "#{@query} LIMIT #{@batch_size} OFFSET #{@offset}" + end + end + + # AddressableValidator - Snapshot of AddressableUrlValidator + module AddressableUrlValidatorSnap + extend self + + def valid_url?(value) + return false unless value + + valid_uri?(value) && valid_protocol?(value) + rescue Addressable::URI::InvalidURIError + false + end + + def valid_uri?(value) + Addressable::URI.parse(value).is_a?(Addressable::URI) + end + + def valid_protocol?(value) + value =~ /\A#{URI.regexp(%w(http https ssh git))}\z/ + end + end + + def up + unless defined?(Addressable::URI::InvalidURIError) + say('Skipping cleaning up invalid import URLs as class from Addressable is missing') + return + end + + say('Cleaning up invalid import URLs... This may take a few minutes if we have a large number of imported projects.') + + invalid_import_url_project_ids.each { |project_id| cleanup_import_url(project_id) } + end + + def invalid_import_url_project_ids + ids = [] + batches = SqlBatches.new(query: "SELECT id, import_url FROM projects WHERE import_url IS NOT NULL") + + while batches.next? + batches.results.each do |result| + ids << result['id'] unless valid_url?(result['import_url']) + end + end + + ids + end + + def valid_url?(url) + AddressableUrlValidatorSnap.valid_url?(url) + end + + def cleanup_import_url(project_id) + execute("UPDATE projects SET import_url = NULL WHERE id = #{project_id}") + end +end |