diff options
author | Robert Speicher <robert@gitlab.com> | 2016-07-06 15:06:01 +0000 |
---|---|---|
committer | Robert Speicher <robert@gitlab.com> | 2016-07-06 15:06:01 +0000 |
commit | be018ba8c4f61babfea494a3946df9931d476a8a (patch) | |
tree | cf5acc63374a7a570ae03deaf1800b183806be07 | |
parent | 400f9f72233c6c5390367a95bf11ebee09c86d2c (diff) | |
parent | 54a50bf81d7bb304adaedffd8eb3e0bc0fc348a9 (diff) | |
download | gitlab-ce-be018ba8c4f61babfea494a3946df9931d476a8a.tar.gz |
Merge branch 'fix/import-url-validator' into 'master'
Fixing URL validation for import_url on projects
Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/17536
This MR fixes problems related to bypassing `import_url` validation on projects. This makes sure the URL is properly validated so we don't enter crap and fail while running workers that handle this URL.
It also adds a migration to fix current invalid `import_url`s
See merge request !4753
-rw-r--r-- | CHANGELOG | 1 | ||||
-rw-r--r-- | app/models/project.rb | 6 | ||||
-rw-r--r-- | app/validators/addressable_url_validator.rb | 45 | ||||
-rw-r--r-- | db/migrate/20160620110927_fix_no_validatable_import_url.rb | 86 | ||||
-rw-r--r-- | lib/gitlab/url_sanitizer.rb | 10 | ||||
-rw-r--r-- | spec/models/project_spec.rb | 12 |
6 files changed, 156 insertions, 4 deletions
diff --git a/CHANGELOG b/CHANGELOG index 8ef934bf80d..406dec09e79 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -37,6 +37,7 @@ v 8.10.0 (unreleased) - Metrics for Rouge::Plugins::Redcarpet and Rouge::Formatters::HTMLGitlab - RailsCache metris now includes fetch_hit/fetch_miss and read_hit/read_miss info. - Allow [ci skip] to be in any case and allow [skip ci]. !4785 (simon_w) + - Set import_url validation to be more strict - Add basic system information like memory and disk usage to the admin panel - Don't garbage collect commits that have related DB records like comments - More descriptive message for git hooks and file locks diff --git a/app/models/project.rb b/app/models/project.rb index e5fae15cb19..029026a4e56 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -162,9 +162,7 @@ class Project < ActiveRecord::Base validates :namespace, presence: true validates_uniqueness_of :name, scope: :namespace_id validates_uniqueness_of :path, scope: :namespace_id - validates :import_url, - url: { protocols: %w(ssh git http https) }, - if: :external_import? + validates :import_url, addressable_url: true, if: :external_import? validates :star_count, numericality: { greater_than_or_equal_to: 0 } validate :check_limit, on: :create validate :avatar_type, @@ -463,6 +461,8 @@ class Project < ActiveRecord::Base end def import_url=(value) + return super(value) unless Gitlab::UrlSanitizer.valid?(value) + import_url = Gitlab::UrlSanitizer.new(value) create_or_update_import_data(credentials: import_url.credentials) super(import_url.sanitized_url) diff --git a/app/validators/addressable_url_validator.rb b/app/validators/addressable_url_validator.rb new file mode 100644 index 00000000000..09bfa613cbe --- /dev/null +++ b/app/validators/addressable_url_validator.rb @@ -0,0 +1,45 @@ +# AddressableUrlValidator +# +# Custom validator for URLs. This is a stricter version of UrlValidator - it also checks +# for using the right protocol, but it actually parses the URL checking for any syntax errors. +# The regex is also different from `URI` as we use `Addressable::URI` here. +# +# By default, only URLs for http, https, ssh, and git protocols will be considered valid. +# Provide a `:protocols` option to configure accepted protocols. +# +# Example: +# +# class User < ActiveRecord::Base +# validates :personal_url, addressable_url: true +# +# validates :ftp_url, addressable_url: { protocols: %w(ftp) } +# +# validates :git_url, addressable_url: { protocols: %w(http https ssh git) } +# end +# +class AddressableUrlValidator < ActiveModel::EachValidator + DEFAULT_OPTIONS = { protocols: %w(http https ssh git) } + + def validate_each(record, attribute, value) + unless valid_url?(value) + record.errors.add(attribute, "must be a valid URL") + end + end + + private + + def valid_url?(value) + return false unless value + + valid_protocol?(value) && valid_uri?(value) + end + + def valid_uri?(value) + Gitlab::UrlSanitizer.valid?(value) + end + + def valid_protocol?(value) + options = DEFAULT_OPTIONS.merge(self.options) + value =~ /\A#{URI.regexp(options[:protocols])}\z/ + end +end diff --git a/db/migrate/20160620110927_fix_no_validatable_import_url.rb b/db/migrate/20160620110927_fix_no_validatable_import_url.rb new file mode 100644 index 00000000000..82a616c62d9 --- /dev/null +++ b/db/migrate/20160620110927_fix_no_validatable_import_url.rb @@ -0,0 +1,86 @@ +# Updates project records containing invalid URLs using the AddressableUrlValidator. +# This is optimized assuming the number of invalid records is low, but +# we still need to loop through all the projects with an +import_url+ +# so we use batching for the latter. +# +# This migration is non-reversible as we would have to keep the old data. + +class FixNoValidatableImportUrl < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + class SqlBatches + + attr_reader :results, :query + + def initialize(batch_size: 100, query:) + @offset = 0 + @batch_size = batch_size + @query = query + @results = [] + end + + def next? + @results = ActiveRecord::Base.connection.exec_query(batched_sql) + @offset += @batch_size + @results.any? + end + + private + + def batched_sql + "#{@query} LIMIT #{@batch_size} OFFSET #{@offset}" + end + end + + # AddressableValidator - Snapshot of AddressableUrlValidator + module AddressableUrlValidatorSnap + extend self + + def valid_url?(value) + return false unless value + + valid_uri?(value) && valid_protocol?(value) + rescue Addressable::URI::InvalidURIError + false + end + + def valid_uri?(value) + Addressable::URI.parse(value).is_a?(Addressable::URI) + end + + def valid_protocol?(value) + value =~ /\A#{URI.regexp(%w(http https ssh git))}\z/ + end + end + + def up + unless defined?(Addressable::URI::InvalidURIError) + say('Skipping cleaning up invalid import URLs as class from Addressable is missing') + return + end + + say('Cleaning up invalid import URLs... This may take a few minutes if we have a large number of imported projects.') + + invalid_import_url_project_ids.each { |project_id| cleanup_import_url(project_id) } + end + + def invalid_import_url_project_ids + ids = [] + batches = SqlBatches.new(query: "SELECT id, import_url FROM projects WHERE import_url IS NOT NULL") + + while batches.next? + batches.results.each do |result| + ids << result['id'] unless valid_url?(result['import_url']) + end + end + + ids + end + + def valid_url?(url) + AddressableUrlValidatorSnap.valid_url?(url) + end + + def cleanup_import_url(project_id) + execute("UPDATE projects SET import_url = NULL WHERE id = #{project_id}") + end +end diff --git a/lib/gitlab/url_sanitizer.rb b/lib/gitlab/url_sanitizer.rb index 7d02fe3c971..86ed18fb50d 100644 --- a/lib/gitlab/url_sanitizer.rb +++ b/lib/gitlab/url_sanitizer.rb @@ -6,8 +6,16 @@ module Gitlab content.gsub(regexp) { |url| new(url).masked_url } end + def self.valid?(url) + Addressable::URI.parse(url.strip) + + true + rescue Addressable::URI::InvalidURIError + false + end + def initialize(url, credentials: nil) - @url = Addressable::URI.parse(url) + @url = Addressable::URI.parse(url.strip) @credentials = credentials end diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index a8c777d1e3e..2e89d6de3a2 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -129,6 +129,18 @@ describe Project, models: true do expect(project2.errors[:repository_storage].first).to match(/is not included in the list/) end end + + it 'should not allow an invalid URI as import_url' do + project2 = build(:project, import_url: 'invalid://') + + expect(project2).not_to be_valid + end + + it 'should allow a valid URI as import_url' do + project2 = build(:project, import_url: 'ssh://test@gitlab.com/project.git') + + expect(project2).to be_valid + end end describe 'default_scope' do |