diff options
author | Robert Speicher <robert@gitlab.com> | 2018-10-15 15:27:15 +0000 |
---|---|---|
committer | Robert Speicher <robert@gitlab.com> | 2018-10-15 15:27:15 +0000 |
commit | f5d088eb11b066dadad06948e158b39e9432573c (patch) | |
tree | 9f1db7d7ba83f1b221bdb88b864e8efca1fd95d8 /lib | |
parent | 201143e9db57ed6f4cd72704387777b4be5ecc34 (diff) | |
parent | 399056ed783e12337a9c47b06b4aae021198f1cd (diff) | |
download | gitlab-ce-f5d088eb11b066dadad06948e158b39e9432573c.tar.gz |
Merge branch 'zj-remove-linguist' into 'master'
Remove dependencies on Linguist
Closes #35450
See merge request gitlab-org/gitlab-ce!21008
Diffstat (limited to 'lib')
-rw-r--r-- | lib/gitlab/blob_helper.rb | 145 | ||||
-rw-r--r-- | lib/gitlab/conflict/file.rb | 1 | ||||
-rw-r--r-- | lib/gitlab/git/blob.rb | 8 | ||||
-rw-r--r-- | lib/gitlab/git/blob_snippet.rb | 34 | ||||
-rw-r--r-- | lib/gitlab/language_data.rb | 33 |
5 files changed, 182 insertions, 39 deletions
diff --git a/lib/gitlab/blob_helper.rb b/lib/gitlab/blob_helper.rb new file mode 100644 index 00000000000..9b3b383b0c8 --- /dev/null +++ b/lib/gitlab/blob_helper.rb @@ -0,0 +1,145 @@ +# This has been extracted from https://github.com/github/linguist/blob/master/lib/linguist/blob_helper.rb +module Gitlab + module BlobHelper + def extname + File.extname(name.to_s) + end + + def known_extension? + LanguageData.extensions.include?(extname) + end + + def viewable? + !large? && text? + end + + MEGABYTE = 1024 * 1024 + + def large? + size.to_i > MEGABYTE + end + + def binary? + # Large blobs aren't even loaded into memory + if data.nil? + true + + # Treat blank files as text + elsif data == "" + false + + # Charlock doesn't know what to think + elsif encoding.nil? + true + + # If Charlock says its binary + else + detect_encoding[:type] == :binary + end + end + + def text? + !binary? + end + + def image? + ['.png', '.jpg', '.jpeg', '.gif'].include?(extname.downcase) + end + + # Internal: Lookup mime type for extension. + # + # Returns a MIME::Type + # rubocop:disable Gitlab/ModuleWithInstanceVariables + def _mime_type + if defined? @_mime_type + @_mime_type + else + guesses = ::MIME::Types.type_for(extname.to_s) + + # Prefer text mime types over binary + @_mime_type = guesses.detect { |type| type.ascii? } || guesses.first + end + end + # rubocop:enable Gitlab/ModuleWithInstanceVariables + + # Public: Get the actual blob mime type + # + # Examples + # + # # => 'text/plain' + # # => 'text/html' + # + # Returns a mime type String. + def mime_type + _mime_type ? _mime_type.to_s : 'text/plain' + end + + def binary_mime_type? + _mime_type ? _mime_type.binary? : false + end + + def lines + @lines ||= + if viewable? && data + # `data` is usually encoded as ASCII-8BIT even when the content has + # been detected as a different encoding. However, we are not allowed + # to change the encoding of `data` because we've made the implicit + # guarantee that each entry in `lines` is encoded the same way as + # `data`. + # + # Instead, we re-encode each possible newline sequence as the + # detected encoding, then force them back to the encoding of `data` + # (usually a binary encoding like ASCII-8BIT). This means that the + # byte sequence will match how newlines are likely encoded in the + # file, but we don't have to change the encoding of `data` as far as + # Ruby is concerned. This allows us to correctly parse out each line + # without changing the encoding of `data`, and + # also--importantly--without having to duplicate many (potentially + # large) strings. + begin + data.split(encoded_newlines_re, -1) + rescue Encoding::ConverterNotFoundError + # The data is not splittable in the detected encoding. Assume it's + # one big line. + [data] + end + else + [] + end + end + + def content_type + # rubocop:disable Style/MultilineTernaryOperator + # rubocop:disable Style/NestedTernaryOperator + @content_type ||= binary_mime_type? || binary? ? mime_type : + (encoding ? "text/plain; charset=#{encoding.downcase}" : "text/plain") + # rubocop:enable Style/NestedTernaryOperator + # rubocop:enable Style/MultilineTernaryOperator + end + + def encoded_newlines_re + @encoded_newlines_re ||= + Regexp.union(["\r\n", "\r", "\n"].map { |nl| nl.encode(ruby_encoding, "ASCII-8BIT").force_encoding(data.encoding) }) + end + + def ruby_encoding + if hash = detect_encoding + hash[:ruby_encoding] + end + end + + def encoding + if hash = detect_encoding + hash[:encoding] + end + end + + def detect_encoding + @detect_encoding ||= CharlockHolmes::EncodingDetector.new.detect(data) if data # rubocop:disable Gitlab/ModuleWithInstanceVariables + end + + def empty? + data.nil? || data == "" + end + end +end diff --git a/lib/gitlab/conflict/file.rb b/lib/gitlab/conflict/file.rb index 2a0cb640a14..30911b49b18 100644 --- a/lib/gitlab/conflict/file.rb +++ b/lib/gitlab/conflict/file.rb @@ -158,7 +158,6 @@ module Gitlab json_hash.tap do |json_hash| if opts[:full_content] json_hash[:content] = content - json_hash[:blob_ace_mode] = our_blob && our_blob.language.try(:ace_mode) else json_hash[:sections] = sections if type.text? json_hash[:type] = type diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb index 71857bd2d87..13b0bb930f4 100644 --- a/lib/gitlab/git/blob.rb +++ b/lib/gitlab/git/blob.rb @@ -3,13 +3,13 @@ module Gitlab module Git class Blob - include Linguist::BlobHelper + include Gitlab::BlobHelper include Gitlab::EncodingHelper # This number is the maximum amount of data that we want to display to - # the user. We load as much as we can for encoding detection - # (Linguist) and LFS pointer parsing. All other cases where we need full - # blob data should use load_all_data!. + # the user. We load as much as we can for encoding detection and LFS + # pointer parsing. All other cases where we need full blob data should + # use load_all_data!. MAX_DATA_DISPLAY_SIZE = 10.megabytes # These limits are used as a heuristic to ignore files which can't be LFS diff --git a/lib/gitlab/git/blob_snippet.rb b/lib/gitlab/git/blob_snippet.rb deleted file mode 100644 index 68116e775c6..00000000000 --- a/lib/gitlab/git/blob_snippet.rb +++ /dev/null @@ -1,34 +0,0 @@ -# Gitaly note: JV: no RPC's here. - -module Gitlab - module Git - class BlobSnippet - include Linguist::BlobHelper - - attr_accessor :ref - attr_accessor :lines - attr_accessor :filename - attr_accessor :startline - - def initialize(ref, lines, startline, filename) - @ref, @lines, @startline, @filename = ref, lines, startline, filename - end - - def data - lines&.join("\n") - end - - def name - filename - end - - def size - data.length - end - - def mode - nil - end - end - end -end diff --git a/lib/gitlab/language_data.rb b/lib/gitlab/language_data.rb new file mode 100644 index 00000000000..bfdd7175198 --- /dev/null +++ b/lib/gitlab/language_data.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Gitlab + module LanguageData + EXTENSION_MUTEX = Mutex.new + + class << self + include Gitlab::Utils::StrongMemoize + + def extensions + EXTENSION_MUTEX.synchronize do + strong_memoize(:extensions) do + Set.new.tap do |set| + YAML.load_file(Rails.root.join('vendor', 'languages.yml')).each do |_name, details| + details['extensions']&.each do |ext| + next unless ext.start_with?('.') + + set << ext.downcase + end + end + end + end + end + end + + def clear_extensions! + EXTENSION_MUTEX.synchronize do + clear_memoization(:extensions) + end + end + end + end +end |