summaryrefslogtreecommitdiff
path: root/lib/gitlab/git
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/git')
-rw-r--r--lib/gitlab/git/blame.rb2
-rw-r--r--lib/gitlab/git/blob.rb3
-rw-r--r--lib/gitlab/git/commit.rb2
-rw-r--r--lib/gitlab/git/diff.rb99
-rw-r--r--lib/gitlab/git/diff_collection.rb20
-rw-r--r--lib/gitlab/git/encoding_helper.rb64
-rw-r--r--lib/gitlab/git/ref.rb2
-rw-r--r--lib/gitlab/git/repository.rb47
-rw-r--r--lib/gitlab/git/tree.rb2
9 files changed, 104 insertions, 137 deletions
diff --git a/lib/gitlab/git/blame.rb b/lib/gitlab/git/blame.rb
index 58193391926..66829a03c2e 100644
--- a/lib/gitlab/git/blame.rb
+++ b/lib/gitlab/git/blame.rb
@@ -1,7 +1,7 @@
module Gitlab
module Git
class Blame
- include Gitlab::Git::EncodingHelper
+ include Gitlab::EncodingHelper
attr_reader :lines, :blames
diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb
index c1b31618e0d..d60e607b02b 100644
--- a/lib/gitlab/git/blob.rb
+++ b/lib/gitlab/git/blob.rb
@@ -2,7 +2,7 @@ module Gitlab
module Git
class Blob
include Linguist::BlobHelper
- include Gitlab::Git::EncodingHelper
+ include Gitlab::EncodingHelper
# This number is the maximum amount of data that we want to display to
# the user. We load as much as we can for encoding detection
@@ -88,6 +88,7 @@ module Gitlab
new(
id: blob_entry[:oid],
name: blob_entry[:name],
+ size: 0,
data: '',
path: path,
commit_id: sha
diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb
index 297531db4cc..bb04731f08c 100644
--- a/lib/gitlab/git/commit.rb
+++ b/lib/gitlab/git/commit.rb
@@ -2,7 +2,7 @@
module Gitlab
module Git
class Commit
- include Gitlab::Git::EncodingHelper
+ include Gitlab::EncodingHelper
attr_accessor :raw_commit, :head, :refs
diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb
index deade337354..8926aa19925 100644
--- a/lib/gitlab/git/diff.rb
+++ b/lib/gitlab/git/diff.rb
@@ -3,7 +3,7 @@ module Gitlab
module Git
class Diff
TimeoutError = Class.new(StandardError)
- include Gitlab::Git::EncodingHelper
+ include Gitlab::EncodingHelper
# Diff properties
attr_accessor :old_path, :new_path, :a_mode, :b_mode, :diff
@@ -15,15 +15,30 @@ module Gitlab
alias_method :deleted_file?, :deleted_file
alias_method :renamed_file?, :renamed_file
+ attr_accessor :expanded
+
+ # We need this accessor because of `to_hash` and `init_from_hash`
attr_accessor :too_large
- # The maximum size of a diff to display.
- DIFF_SIZE_LIMIT = 102400 # 100 KB
+ class << self
+ # The maximum size of a diff to display.
+ def size_limit
+ if Feature.enabled?('gitlab_git_diff_size_limit_increase')
+ 200.kilobytes
+ else
+ 100.kilobytes
+ end
+ end
- # The maximum size before a diff is collapsed.
- DIFF_COLLAPSE_LIMIT = 10240 # 10 KB
+ # The maximum size before a diff is collapsed.
+ def collapse_limit
+ if Feature.enabled?('gitlab_git_diff_size_limit_increase')
+ 100.kilobytes
+ else
+ 10.kilobytes
+ end
+ end
- class << self
def between(repo, head, base, options = {}, *paths)
straight = options.delete(:straight) || false
@@ -152,7 +167,7 @@ module Gitlab
:include_untracked_content, :skip_binary_check,
:include_typechange, :include_typechange_trees,
:ignore_filemode, :recurse_ignored_dirs, :paths,
- :max_files, :max_lines, :all_diffs, :no_collapse]
+ :max_files, :max_lines, :limits, :expanded]
if default_options
actual_defaults = default_options.dup
@@ -177,16 +192,18 @@ module Gitlab
end
end
- def initialize(raw_diff, collapse: false)
+ def initialize(raw_diff, expanded: true)
+ @expanded = expanded
+
case raw_diff
when Hash
init_from_hash(raw_diff)
- prune_diff_if_eligible(collapse)
+ prune_diff_if_eligible
when Rugged::Patch, Rugged::Diff::Delta
- init_from_rugged(raw_diff, collapse: collapse)
- when Gitaly::CommitDiffResponse
+ init_from_rugged(raw_diff)
+ when Gitlab::GitalyClient::Diff
init_from_gitaly(raw_diff)
- prune_diff_if_eligible(collapse)
+ prune_diff_if_eligible
when Gitaly::CommitDelta
init_from_gitaly(raw_diff)
when nil
@@ -226,17 +243,13 @@ module Gitlab
def too_large?
if @too_large.nil?
- @too_large = @diff.bytesize >= DIFF_SIZE_LIMIT
+ @too_large = @diff.bytesize >= self.class.size_limit
else
@too_large
end
end
- def collapsible?
- @diff.bytesize >= DIFF_COLLAPSE_LIMIT
- end
-
- def prune_large_diff!
+ def too_large!
@diff = ''
@line_count = 0
@too_large = true
@@ -244,10 +257,11 @@ module Gitlab
def collapsed?
return @collapsed if defined?(@collapsed)
- false
+
+ @collapsed = !expanded && @diff.bytesize >= self.class.collapse_limit
end
- def prune_collapsed_diff!
+ def collapse!
@diff = ''
@line_count = 0
@collapsed = true
@@ -255,9 +269,9 @@ module Gitlab
private
- def init_from_rugged(rugged, collapse: false)
+ def init_from_rugged(rugged)
if rugged.is_a?(Rugged::Patch)
- init_from_rugged_patch(rugged, collapse: collapse)
+ init_from_rugged_patch(rugged)
d = rugged.delta
else
d = rugged
@@ -272,10 +286,10 @@ module Gitlab
@deleted_file = d.deleted?
end
- def init_from_rugged_patch(patch, collapse: false)
+ def init_from_rugged_patch(patch)
# Don't bother initializing diffs that are too large. If a diff is
# binary we're not going to display anything so we skip the size check.
- return if !patch.delta.binary? && prune_large_patch(patch, collapse)
+ return if !patch.delta.binary? && prune_large_patch(patch)
@diff = encode!(strip_diff_headers(patch.to_s))
end
@@ -288,40 +302,43 @@ module Gitlab
end
end
- def init_from_gitaly(msg)
- @diff = msg.raw_chunks.join if msg.respond_to?(:raw_chunks)
- @new_path = encode!(msg.to_path.dup)
- @old_path = encode!(msg.from_path.dup)
- @a_mode = msg.old_mode.to_s(8)
- @b_mode = msg.new_mode.to_s(8)
- @new_file = msg.from_id == BLANK_SHA
- @renamed_file = msg.from_path != msg.to_path
- @deleted_file = msg.to_id == BLANK_SHA
+ def init_from_gitaly(diff)
+ @diff = diff.patch if diff.respond_to?(:patch)
+ @new_path = encode!(diff.to_path.dup)
+ @old_path = encode!(diff.from_path.dup)
+ @a_mode = diff.old_mode.to_s(8)
+ @b_mode = diff.new_mode.to_s(8)
+ @new_file = diff.from_id == BLANK_SHA
+ @renamed_file = diff.from_path != diff.to_path
+ @deleted_file = diff.to_id == BLANK_SHA
end
- def prune_diff_if_eligible(collapse = false)
- prune_large_diff! if too_large?
- prune_collapsed_diff! if collapse && collapsible?
+ def prune_diff_if_eligible
+ if too_large?
+ too_large!
+ elsif collapsed?
+ collapse!
+ end
end
# If the patch surpasses any of the diff limits it calls the appropiate
# prune method and returns true. Otherwise returns false.
- def prune_large_patch(patch, collapse)
+ def prune_large_patch(patch)
size = 0
patch.each_hunk do |hunk|
hunk.each_line do |line|
size += line.content.bytesize
- if size >= DIFF_SIZE_LIMIT
- prune_large_diff!
+ if size >= self.class.size_limit
+ too_large!
return true
end
end
end
- if collapse && size >= DIFF_COLLAPSE_LIMIT
- prune_collapsed_diff!
+ if !expanded && size >= self.class.collapse_limit
+ collapse!
return true
end
diff --git a/lib/gitlab/git/diff_collection.rb b/lib/gitlab/git/diff_collection.rb
index 898a5ae15f2..334e06a6eca 100644
--- a/lib/gitlab/git/diff_collection.rb
+++ b/lib/gitlab/git/diff_collection.rb
@@ -9,12 +9,12 @@ module Gitlab
@iterator = iterator
@max_files = options.fetch(:max_files, DEFAULT_LIMITS[:max_files])
@max_lines = options.fetch(:max_lines, DEFAULT_LIMITS[:max_lines])
- @max_bytes = @max_files * 5120 # Average 5 KB per file
+ @max_bytes = @max_files * 5.kilobytes # Average 5 KB per file
@safe_max_files = [@max_files, DEFAULT_LIMITS[:max_files]].min
@safe_max_lines = [@max_lines, DEFAULT_LIMITS[:max_lines]].min
- @safe_max_bytes = @safe_max_files * 5120 # Average 5 KB per file
- @all_diffs = !!options.fetch(:all_diffs, false)
- @no_collapse = !!options.fetch(:no_collapse, true)
+ @safe_max_bytes = @safe_max_files * 5.kilobytes # Average 5 KB per file
+ @enforce_limits = !!options.fetch(:limits, true)
+ @expanded = !!options.fetch(:expanded, true)
@line_count = 0
@byte_count = 0
@@ -88,23 +88,23 @@ module Gitlab
@iterator.each do |raw|
@empty = false
- if !@all_diffs && i >= @max_files
+ if @enforce_limits && i >= @max_files
@overflow = true
break
end
- collapse = !@all_diffs && !@no_collapse
+ expanded = !@enforce_limits || @expanded
- diff = Gitlab::Git::Diff.new(raw, collapse: collapse)
+ diff = Gitlab::Git::Diff.new(raw, expanded: expanded)
- if collapse && over_safe_limits?(i)
- diff.prune_collapsed_diff!
+ if !expanded && over_safe_limits?(i)
+ diff.collapse!
end
@line_count += diff.line_count
@byte_count += diff.diff.bytesize
- if !@all_diffs && (@line_count >= @max_lines || @byte_count >= @max_bytes)
+ if @enforce_limits && (@line_count >= @max_lines || @byte_count >= @max_bytes)
# This last Diff instance pushes us over the lines limit. We stop and
# discard it.
@overflow = true
diff --git a/lib/gitlab/git/encoding_helper.rb b/lib/gitlab/git/encoding_helper.rb
deleted file mode 100644
index f918074cb14..00000000000
--- a/lib/gitlab/git/encoding_helper.rb
+++ /dev/null
@@ -1,64 +0,0 @@
-module Gitlab
- module Git
- module EncodingHelper
- extend self
-
- # This threshold is carefully tweaked to prevent usage of encodings detected
- # by CharlockHolmes with low confidence. If CharlockHolmes confidence is low,
- # we're better off sticking with utf8 encoding.
- # Reason: git diff can return strings with invalid utf8 byte sequences if it
- # truncates a diff in the middle of a multibyte character. In this case
- # CharlockHolmes will try to guess the encoding and will likely suggest an
- # obscure encoding with low confidence.
- # There is a lot more info with this merge request:
- # https://gitlab.com/gitlab-org/gitlab_git/merge_requests/77#note_4754193
- ENCODING_CONFIDENCE_THRESHOLD = 40
-
- def encode!(message)
- return nil unless message.respond_to? :force_encoding
-
- # if message is utf-8 encoding, just return it
- message.force_encoding("UTF-8")
- return message if message.valid_encoding?
-
- # return message if message type is binary
- detect = CharlockHolmes::EncodingDetector.detect(message)
- return message.force_encoding("BINARY") if detect && detect[:type] == :binary
-
- # force detected encoding if we have sufficient confidence.
- if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD
- message.force_encoding(detect[:encoding])
- end
-
- # encode and clean the bad chars
- message.replace clean(message)
- rescue
- encoding = detect ? detect[:encoding] : "unknown"
- "--broken encoding: #{encoding}"
- end
-
- def encode_utf8(message)
- detect = CharlockHolmes::EncodingDetector.detect(message)
- if detect
- begin
- CharlockHolmes::Converter.convert(message, detect[:encoding], 'UTF-8')
- rescue ArgumentError => e
- Rails.logger.warn("Ignoring error converting #{detect[:encoding]} into UTF8: #{e.message}")
-
- ''
- end
- else
- clean(message)
- end
- end
-
- private
-
- def clean(message)
- message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
- .encode("UTF-8")
- .gsub("\0".encode("UTF-8"), "")
- end
- end
- end
-end
diff --git a/lib/gitlab/git/ref.rb b/lib/gitlab/git/ref.rb
index 37ef6836742..ebf7393dc61 100644
--- a/lib/gitlab/git/ref.rb
+++ b/lib/gitlab/git/ref.rb
@@ -1,7 +1,7 @@
module Gitlab
module Git
class Ref
- include Gitlab::Git::EncodingHelper
+ include Gitlab::EncodingHelper
# Branch or tag name
# without "refs/tags|heads" prefix
diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb
index b9f1ac144b6..9d6adbdb4ac 100644
--- a/lib/gitlab/git/repository.rb
+++ b/lib/gitlab/git/repository.rb
@@ -1006,31 +1006,39 @@ module Gitlab
# Parses the contents of a .gitmodules file and returns a hash of
# submodule information.
def parse_gitmodules(commit, content)
- results = {}
+ modules = {}
- current = ""
- content.split("\n").each do |txt|
- if txt =~ /^\s*\[/
- current = txt.match(/(?<=").*(?=")/)[0]
- results[current] = {}
- else
- next unless results[current]
- match_data = txt.match(/(\w+)\s*=\s*(.*)/)
- next unless match_data
- target = match_data[2].chomp
- results[current][match_data[1]] = target
+ name = nil
+ content.each_line do |line|
+ case line.strip
+ when /\A\[submodule "(?<name>[^"]+)"\]\z/ # Submodule header
+ name = $~[:name]
+ modules[name] = {}
+ when /\A(?<key>\w+)\s*=\s*(?<value>.*)\z/ # Key/value pair
+ key = $~[:key]
+ value = $~[:value].chomp
+
+ next unless name && modules[name]
+
+ modules[name][key] = value
- if match_data[1] == "path"
+ if key == 'path'
begin
- results[current]["id"] = blob_content(commit, target)
+ modules[name]['id'] = blob_content(commit, value)
rescue InvalidBlobName
- results.delete(current)
+ # The current entry is invalid
+ modules.delete(name)
+ name = nil
end
end
+ when /\A#/ # Comment
+ next
+ else # Invalid line
+ name = nil
end
end
- results
+ modules
end
# Returns true if +commit+ introduced changes to +path+, using commit
@@ -1086,7 +1094,12 @@ module Gitlab
elsif tmp_entry.nil?
return nil
else
- tmp_entry = rugged.lookup(tmp_entry[:oid])
+ begin
+ tmp_entry = rugged.lookup(tmp_entry[:oid])
+ rescue Rugged::OdbError, Rugged::InvalidError, Rugged::ReferenceError
+ return nil
+ end
+
return nil unless tmp_entry.type == :tree
tmp_entry = tmp_entry[dir]
end
diff --git a/lib/gitlab/git/tree.rb b/lib/gitlab/git/tree.rb
index d41256d9a84..b9afa05c819 100644
--- a/lib/gitlab/git/tree.rb
+++ b/lib/gitlab/git/tree.rb
@@ -1,7 +1,7 @@
module Gitlab
module Git
class Tree
- include Gitlab::Git::EncodingHelper
+ include Gitlab::EncodingHelper
attr_accessor :id, :root_id, :name, :path, :type,
:mode, :commit_id, :submodule_url