diff options
Diffstat (limited to 'lib/gitlab/git')
-rw-r--r-- | lib/gitlab/git/blame.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/git/blob.rb | 3 | ||||
-rw-r--r-- | lib/gitlab/git/commit.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/git/diff.rb | 99 | ||||
-rw-r--r-- | lib/gitlab/git/diff_collection.rb | 20 | ||||
-rw-r--r-- | lib/gitlab/git/encoding_helper.rb | 64 | ||||
-rw-r--r-- | lib/gitlab/git/ref.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/git/repository.rb | 47 | ||||
-rw-r--r-- | lib/gitlab/git/tree.rb | 2 |
9 files changed, 104 insertions, 137 deletions
diff --git a/lib/gitlab/git/blame.rb b/lib/gitlab/git/blame.rb index 58193391926..66829a03c2e 100644 --- a/lib/gitlab/git/blame.rb +++ b/lib/gitlab/git/blame.rb @@ -1,7 +1,7 @@ module Gitlab module Git class Blame - include Gitlab::Git::EncodingHelper + include Gitlab::EncodingHelper attr_reader :lines, :blames diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb index c1b31618e0d..d60e607b02b 100644 --- a/lib/gitlab/git/blob.rb +++ b/lib/gitlab/git/blob.rb @@ -2,7 +2,7 @@ module Gitlab module Git class Blob include Linguist::BlobHelper - include Gitlab::Git::EncodingHelper + include Gitlab::EncodingHelper # This number is the maximum amount of data that we want to display to # the user. We load as much as we can for encoding detection @@ -88,6 +88,7 @@ module Gitlab new( id: blob_entry[:oid], name: blob_entry[:name], + size: 0, data: '', path: path, commit_id: sha diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb index 297531db4cc..bb04731f08c 100644 --- a/lib/gitlab/git/commit.rb +++ b/lib/gitlab/git/commit.rb @@ -2,7 +2,7 @@ module Gitlab module Git class Commit - include Gitlab::Git::EncodingHelper + include Gitlab::EncodingHelper attr_accessor :raw_commit, :head, :refs diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb index deade337354..8926aa19925 100644 --- a/lib/gitlab/git/diff.rb +++ b/lib/gitlab/git/diff.rb @@ -3,7 +3,7 @@ module Gitlab module Git class Diff TimeoutError = Class.new(StandardError) - include Gitlab::Git::EncodingHelper + include Gitlab::EncodingHelper # Diff properties attr_accessor :old_path, :new_path, :a_mode, :b_mode, :diff @@ -15,15 +15,30 @@ module Gitlab alias_method :deleted_file?, :deleted_file alias_method :renamed_file?, :renamed_file + attr_accessor :expanded + + # We need this accessor because of `to_hash` and `init_from_hash` attr_accessor :too_large - # The maximum size of a diff to display. - DIFF_SIZE_LIMIT = 102400 # 100 KB + class << self + # The maximum size of a diff to display. + def size_limit + if Feature.enabled?('gitlab_git_diff_size_limit_increase') + 200.kilobytes + else + 100.kilobytes + end + end - # The maximum size before a diff is collapsed. - DIFF_COLLAPSE_LIMIT = 10240 # 10 KB + # The maximum size before a diff is collapsed. + def collapse_limit + if Feature.enabled?('gitlab_git_diff_size_limit_increase') + 100.kilobytes + else + 10.kilobytes + end + end - class << self def between(repo, head, base, options = {}, *paths) straight = options.delete(:straight) || false @@ -152,7 +167,7 @@ module Gitlab :include_untracked_content, :skip_binary_check, :include_typechange, :include_typechange_trees, :ignore_filemode, :recurse_ignored_dirs, :paths, - :max_files, :max_lines, :all_diffs, :no_collapse] + :max_files, :max_lines, :limits, :expanded] if default_options actual_defaults = default_options.dup @@ -177,16 +192,18 @@ module Gitlab end end - def initialize(raw_diff, collapse: false) + def initialize(raw_diff, expanded: true) + @expanded = expanded + case raw_diff when Hash init_from_hash(raw_diff) - prune_diff_if_eligible(collapse) + prune_diff_if_eligible when Rugged::Patch, Rugged::Diff::Delta - init_from_rugged(raw_diff, collapse: collapse) - when Gitaly::CommitDiffResponse + init_from_rugged(raw_diff) + when Gitlab::GitalyClient::Diff init_from_gitaly(raw_diff) - prune_diff_if_eligible(collapse) + prune_diff_if_eligible when Gitaly::CommitDelta init_from_gitaly(raw_diff) when nil @@ -226,17 +243,13 @@ module Gitlab def too_large? if @too_large.nil? - @too_large = @diff.bytesize >= DIFF_SIZE_LIMIT + @too_large = @diff.bytesize >= self.class.size_limit else @too_large end end - def collapsible? - @diff.bytesize >= DIFF_COLLAPSE_LIMIT - end - - def prune_large_diff! + def too_large! @diff = '' @line_count = 0 @too_large = true @@ -244,10 +257,11 @@ module Gitlab def collapsed? return @collapsed if defined?(@collapsed) - false + + @collapsed = !expanded && @diff.bytesize >= self.class.collapse_limit end - def prune_collapsed_diff! + def collapse! @diff = '' @line_count = 0 @collapsed = true @@ -255,9 +269,9 @@ module Gitlab private - def init_from_rugged(rugged, collapse: false) + def init_from_rugged(rugged) if rugged.is_a?(Rugged::Patch) - init_from_rugged_patch(rugged, collapse: collapse) + init_from_rugged_patch(rugged) d = rugged.delta else d = rugged @@ -272,10 +286,10 @@ module Gitlab @deleted_file = d.deleted? end - def init_from_rugged_patch(patch, collapse: false) + def init_from_rugged_patch(patch) # Don't bother initializing diffs that are too large. If a diff is # binary we're not going to display anything so we skip the size check. - return if !patch.delta.binary? && prune_large_patch(patch, collapse) + return if !patch.delta.binary? && prune_large_patch(patch) @diff = encode!(strip_diff_headers(patch.to_s)) end @@ -288,40 +302,43 @@ module Gitlab end end - def init_from_gitaly(msg) - @diff = msg.raw_chunks.join if msg.respond_to?(:raw_chunks) - @new_path = encode!(msg.to_path.dup) - @old_path = encode!(msg.from_path.dup) - @a_mode = msg.old_mode.to_s(8) - @b_mode = msg.new_mode.to_s(8) - @new_file = msg.from_id == BLANK_SHA - @renamed_file = msg.from_path != msg.to_path - @deleted_file = msg.to_id == BLANK_SHA + def init_from_gitaly(diff) + @diff = diff.patch if diff.respond_to?(:patch) + @new_path = encode!(diff.to_path.dup) + @old_path = encode!(diff.from_path.dup) + @a_mode = diff.old_mode.to_s(8) + @b_mode = diff.new_mode.to_s(8) + @new_file = diff.from_id == BLANK_SHA + @renamed_file = diff.from_path != diff.to_path + @deleted_file = diff.to_id == BLANK_SHA end - def prune_diff_if_eligible(collapse = false) - prune_large_diff! if too_large? - prune_collapsed_diff! if collapse && collapsible? + def prune_diff_if_eligible + if too_large? + too_large! + elsif collapsed? + collapse! + end end # If the patch surpasses any of the diff limits it calls the appropiate # prune method and returns true. Otherwise returns false. - def prune_large_patch(patch, collapse) + def prune_large_patch(patch) size = 0 patch.each_hunk do |hunk| hunk.each_line do |line| size += line.content.bytesize - if size >= DIFF_SIZE_LIMIT - prune_large_diff! + if size >= self.class.size_limit + too_large! return true end end end - if collapse && size >= DIFF_COLLAPSE_LIMIT - prune_collapsed_diff! + if !expanded && size >= self.class.collapse_limit + collapse! return true end diff --git a/lib/gitlab/git/diff_collection.rb b/lib/gitlab/git/diff_collection.rb index 898a5ae15f2..334e06a6eca 100644 --- a/lib/gitlab/git/diff_collection.rb +++ b/lib/gitlab/git/diff_collection.rb @@ -9,12 +9,12 @@ module Gitlab @iterator = iterator @max_files = options.fetch(:max_files, DEFAULT_LIMITS[:max_files]) @max_lines = options.fetch(:max_lines, DEFAULT_LIMITS[:max_lines]) - @max_bytes = @max_files * 5120 # Average 5 KB per file + @max_bytes = @max_files * 5.kilobytes # Average 5 KB per file @safe_max_files = [@max_files, DEFAULT_LIMITS[:max_files]].min @safe_max_lines = [@max_lines, DEFAULT_LIMITS[:max_lines]].min - @safe_max_bytes = @safe_max_files * 5120 # Average 5 KB per file - @all_diffs = !!options.fetch(:all_diffs, false) - @no_collapse = !!options.fetch(:no_collapse, true) + @safe_max_bytes = @safe_max_files * 5.kilobytes # Average 5 KB per file + @enforce_limits = !!options.fetch(:limits, true) + @expanded = !!options.fetch(:expanded, true) @line_count = 0 @byte_count = 0 @@ -88,23 +88,23 @@ module Gitlab @iterator.each do |raw| @empty = false - if !@all_diffs && i >= @max_files + if @enforce_limits && i >= @max_files @overflow = true break end - collapse = !@all_diffs && !@no_collapse + expanded = !@enforce_limits || @expanded - diff = Gitlab::Git::Diff.new(raw, collapse: collapse) + diff = Gitlab::Git::Diff.new(raw, expanded: expanded) - if collapse && over_safe_limits?(i) - diff.prune_collapsed_diff! + if !expanded && over_safe_limits?(i) + diff.collapse! end @line_count += diff.line_count @byte_count += diff.diff.bytesize - if !@all_diffs && (@line_count >= @max_lines || @byte_count >= @max_bytes) + if @enforce_limits && (@line_count >= @max_lines || @byte_count >= @max_bytes) # This last Diff instance pushes us over the lines limit. We stop and # discard it. @overflow = true diff --git a/lib/gitlab/git/encoding_helper.rb b/lib/gitlab/git/encoding_helper.rb deleted file mode 100644 index f918074cb14..00000000000 --- a/lib/gitlab/git/encoding_helper.rb +++ /dev/null @@ -1,64 +0,0 @@ -module Gitlab - module Git - module EncodingHelper - extend self - - # This threshold is carefully tweaked to prevent usage of encodings detected - # by CharlockHolmes with low confidence. If CharlockHolmes confidence is low, - # we're better off sticking with utf8 encoding. - # Reason: git diff can return strings with invalid utf8 byte sequences if it - # truncates a diff in the middle of a multibyte character. In this case - # CharlockHolmes will try to guess the encoding and will likely suggest an - # obscure encoding with low confidence. - # There is a lot more info with this merge request: - # https://gitlab.com/gitlab-org/gitlab_git/merge_requests/77#note_4754193 - ENCODING_CONFIDENCE_THRESHOLD = 40 - - def encode!(message) - return nil unless message.respond_to? :force_encoding - - # if message is utf-8 encoding, just return it - message.force_encoding("UTF-8") - return message if message.valid_encoding? - - # return message if message type is binary - detect = CharlockHolmes::EncodingDetector.detect(message) - return message.force_encoding("BINARY") if detect && detect[:type] == :binary - - # force detected encoding if we have sufficient confidence. - if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD - message.force_encoding(detect[:encoding]) - end - - # encode and clean the bad chars - message.replace clean(message) - rescue - encoding = detect ? detect[:encoding] : "unknown" - "--broken encoding: #{encoding}" - end - - def encode_utf8(message) - detect = CharlockHolmes::EncodingDetector.detect(message) - if detect - begin - CharlockHolmes::Converter.convert(message, detect[:encoding], 'UTF-8') - rescue ArgumentError => e - Rails.logger.warn("Ignoring error converting #{detect[:encoding]} into UTF8: #{e.message}") - - '' - end - else - clean(message) - end - end - - private - - def clean(message) - message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "") - .encode("UTF-8") - .gsub("\0".encode("UTF-8"), "") - end - end - end -end diff --git a/lib/gitlab/git/ref.rb b/lib/gitlab/git/ref.rb index 37ef6836742..ebf7393dc61 100644 --- a/lib/gitlab/git/ref.rb +++ b/lib/gitlab/git/ref.rb @@ -1,7 +1,7 @@ module Gitlab module Git class Ref - include Gitlab::Git::EncodingHelper + include Gitlab::EncodingHelper # Branch or tag name # without "refs/tags|heads" prefix diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index b9f1ac144b6..9d6adbdb4ac 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -1006,31 +1006,39 @@ module Gitlab # Parses the contents of a .gitmodules file and returns a hash of # submodule information. def parse_gitmodules(commit, content) - results = {} + modules = {} - current = "" - content.split("\n").each do |txt| - if txt =~ /^\s*\[/ - current = txt.match(/(?<=").*(?=")/)[0] - results[current] = {} - else - next unless results[current] - match_data = txt.match(/(\w+)\s*=\s*(.*)/) - next unless match_data - target = match_data[2].chomp - results[current][match_data[1]] = target + name = nil + content.each_line do |line| + case line.strip + when /\A\[submodule "(?<name>[^"]+)"\]\z/ # Submodule header + name = $~[:name] + modules[name] = {} + when /\A(?<key>\w+)\s*=\s*(?<value>.*)\z/ # Key/value pair + key = $~[:key] + value = $~[:value].chomp + + next unless name && modules[name] + + modules[name][key] = value - if match_data[1] == "path" + if key == 'path' begin - results[current]["id"] = blob_content(commit, target) + modules[name]['id'] = blob_content(commit, value) rescue InvalidBlobName - results.delete(current) + # The current entry is invalid + modules.delete(name) + name = nil end end + when /\A#/ # Comment + next + else # Invalid line + name = nil end end - results + modules end # Returns true if +commit+ introduced changes to +path+, using commit @@ -1086,7 +1094,12 @@ module Gitlab elsif tmp_entry.nil? return nil else - tmp_entry = rugged.lookup(tmp_entry[:oid]) + begin + tmp_entry = rugged.lookup(tmp_entry[:oid]) + rescue Rugged::OdbError, Rugged::InvalidError, Rugged::ReferenceError + return nil + end + return nil unless tmp_entry.type == :tree tmp_entry = tmp_entry[dir] end diff --git a/lib/gitlab/git/tree.rb b/lib/gitlab/git/tree.rb index d41256d9a84..b9afa05c819 100644 --- a/lib/gitlab/git/tree.rb +++ b/lib/gitlab/git/tree.rb @@ -1,7 +1,7 @@ module Gitlab module Git class Tree - include Gitlab::Git::EncodingHelper + include Gitlab::EncodingHelper attr_accessor :id, :root_id, :name, :path, :type, :mode, :commit_id, :submodule_url |