summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorRobert Speicher <robert@gitlab.com>2018-10-15 15:27:15 +0000
committerRobert Speicher <robert@gitlab.com>2018-10-15 15:27:15 +0000
commitf5d088eb11b066dadad06948e158b39e9432573c (patch)
tree9f1db7d7ba83f1b221bdb88b864e8efca1fd95d8 /lib
parent201143e9db57ed6f4cd72704387777b4be5ecc34 (diff)
parent399056ed783e12337a9c47b06b4aae021198f1cd (diff)
downloadgitlab-ce-f5d088eb11b066dadad06948e158b39e9432573c.tar.gz
Merge branch 'zj-remove-linguist' into 'master'
Remove dependencies on Linguist Closes #35450 See merge request gitlab-org/gitlab-ce!21008
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/blob_helper.rb145
-rw-r--r--lib/gitlab/conflict/file.rb1
-rw-r--r--lib/gitlab/git/blob.rb8
-rw-r--r--lib/gitlab/git/blob_snippet.rb34
-rw-r--r--lib/gitlab/language_data.rb33
5 files changed, 182 insertions, 39 deletions
diff --git a/lib/gitlab/blob_helper.rb b/lib/gitlab/blob_helper.rb
new file mode 100644
index 00000000000..9b3b383b0c8
--- /dev/null
+++ b/lib/gitlab/blob_helper.rb
@@ -0,0 +1,145 @@
+# This has been extracted from https://github.com/github/linguist/blob/master/lib/linguist/blob_helper.rb
+module Gitlab
+ module BlobHelper
+ def extname
+ File.extname(name.to_s)
+ end
+
+ def known_extension?
+ LanguageData.extensions.include?(extname)
+ end
+
+ def viewable?
+ !large? && text?
+ end
+
+ MEGABYTE = 1024 * 1024
+
+ def large?
+ size.to_i > MEGABYTE
+ end
+
+ def binary?
+ # Large blobs aren't even loaded into memory
+ if data.nil?
+ true
+
+ # Treat blank files as text
+ elsif data == ""
+ false
+
+ # Charlock doesn't know what to think
+ elsif encoding.nil?
+ true
+
+ # If Charlock says its binary
+ else
+ detect_encoding[:type] == :binary
+ end
+ end
+
+ def text?
+ !binary?
+ end
+
+ def image?
+ ['.png', '.jpg', '.jpeg', '.gif'].include?(extname.downcase)
+ end
+
+ # Internal: Lookup mime type for extension.
+ #
+ # Returns a MIME::Type
+ # rubocop:disable Gitlab/ModuleWithInstanceVariables
+ def _mime_type
+ if defined? @_mime_type
+ @_mime_type
+ else
+ guesses = ::MIME::Types.type_for(extname.to_s)
+
+ # Prefer text mime types over binary
+ @_mime_type = guesses.detect { |type| type.ascii? } || guesses.first
+ end
+ end
+ # rubocop:enable Gitlab/ModuleWithInstanceVariables
+
+ # Public: Get the actual blob mime type
+ #
+ # Examples
+ #
+ # # => 'text/plain'
+ # # => 'text/html'
+ #
+ # Returns a mime type String.
+ def mime_type
+ _mime_type ? _mime_type.to_s : 'text/plain'
+ end
+
+ def binary_mime_type?
+ _mime_type ? _mime_type.binary? : false
+ end
+
+ def lines
+ @lines ||=
+ if viewable? && data
+ # `data` is usually encoded as ASCII-8BIT even when the content has
+ # been detected as a different encoding. However, we are not allowed
+ # to change the encoding of `data` because we've made the implicit
+ # guarantee that each entry in `lines` is encoded the same way as
+ # `data`.
+ #
+ # Instead, we re-encode each possible newline sequence as the
+ # detected encoding, then force them back to the encoding of `data`
+ # (usually a binary encoding like ASCII-8BIT). This means that the
+ # byte sequence will match how newlines are likely encoded in the
+ # file, but we don't have to change the encoding of `data` as far as
+ # Ruby is concerned. This allows us to correctly parse out each line
+ # without changing the encoding of `data`, and
+ # also--importantly--without having to duplicate many (potentially
+ # large) strings.
+ begin
+ data.split(encoded_newlines_re, -1)
+ rescue Encoding::ConverterNotFoundError
+ # The data is not splittable in the detected encoding. Assume it's
+ # one big line.
+ [data]
+ end
+ else
+ []
+ end
+ end
+
+ def content_type
+ # rubocop:disable Style/MultilineTernaryOperator
+ # rubocop:disable Style/NestedTernaryOperator
+ @content_type ||= binary_mime_type? || binary? ? mime_type :
+ (encoding ? "text/plain; charset=#{encoding.downcase}" : "text/plain")
+ # rubocop:enable Style/NestedTernaryOperator
+ # rubocop:enable Style/MultilineTernaryOperator
+ end
+
+ def encoded_newlines_re
+ @encoded_newlines_re ||=
+ Regexp.union(["\r\n", "\r", "\n"].map { |nl| nl.encode(ruby_encoding, "ASCII-8BIT").force_encoding(data.encoding) })
+ end
+
+ def ruby_encoding
+ if hash = detect_encoding
+ hash[:ruby_encoding]
+ end
+ end
+
+ def encoding
+ if hash = detect_encoding
+ hash[:encoding]
+ end
+ end
+
+ def detect_encoding
+ @detect_encoding ||= CharlockHolmes::EncodingDetector.new.detect(data) if data # rubocop:disable Gitlab/ModuleWithInstanceVariables
+ end
+
+ def empty?
+ data.nil? || data == ""
+ end
+ end
+end
diff --git a/lib/gitlab/conflict/file.rb b/lib/gitlab/conflict/file.rb
index 2a0cb640a14..30911b49b18 100644
--- a/lib/gitlab/conflict/file.rb
+++ b/lib/gitlab/conflict/file.rb
@@ -158,7 +158,6 @@ module Gitlab
json_hash.tap do |json_hash|
if opts[:full_content]
json_hash[:content] = content
- json_hash[:blob_ace_mode] = our_blob && our_blob.language.try(:ace_mode)
else
json_hash[:sections] = sections if type.text?
json_hash[:type] = type
diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb
index 71857bd2d87..13b0bb930f4 100644
--- a/lib/gitlab/git/blob.rb
+++ b/lib/gitlab/git/blob.rb
@@ -3,13 +3,13 @@
module Gitlab
module Git
class Blob
- include Linguist::BlobHelper
+ include Gitlab::BlobHelper
include Gitlab::EncodingHelper
# This number is the maximum amount of data that we want to display to
- # the user. We load as much as we can for encoding detection
- # (Linguist) and LFS pointer parsing. All other cases where we need full
- # blob data should use load_all_data!.
+ # the user. We load as much as we can for encoding detection and LFS
+ # pointer parsing. All other cases where we need full blob data should
+ # use load_all_data!.
MAX_DATA_DISPLAY_SIZE = 10.megabytes
# These limits are used as a heuristic to ignore files which can't be LFS
diff --git a/lib/gitlab/git/blob_snippet.rb b/lib/gitlab/git/blob_snippet.rb
deleted file mode 100644
index 68116e775c6..00000000000
--- a/lib/gitlab/git/blob_snippet.rb
+++ /dev/null
@@ -1,34 +0,0 @@
-# Gitaly note: JV: no RPC's here.
-
-module Gitlab
- module Git
- class BlobSnippet
- include Linguist::BlobHelper
-
- attr_accessor :ref
- attr_accessor :lines
- attr_accessor :filename
- attr_accessor :startline
-
- def initialize(ref, lines, startline, filename)
- @ref, @lines, @startline, @filename = ref, lines, startline, filename
- end
-
- def data
- lines&.join("\n")
- end
-
- def name
- filename
- end
-
- def size
- data.length
- end
-
- def mode
- nil
- end
- end
- end
-end
diff --git a/lib/gitlab/language_data.rb b/lib/gitlab/language_data.rb
new file mode 100644
index 00000000000..bfdd7175198
--- /dev/null
+++ b/lib/gitlab/language_data.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module LanguageData
+ EXTENSION_MUTEX = Mutex.new
+
+ class << self
+ include Gitlab::Utils::StrongMemoize
+
+ def extensions
+ EXTENSION_MUTEX.synchronize do
+ strong_memoize(:extensions) do
+ Set.new.tap do |set|
+ YAML.load_file(Rails.root.join('vendor', 'languages.yml')).each do |_name, details|
+ details['extensions']&.each do |ext|
+ next unless ext.start_with?('.')
+
+ set << ext.downcase
+ end
+ end
+ end
+ end
+ end
+ end
+
+ def clear_extensions!
+ EXTENSION_MUTEX.synchronize do
+ clear_memoization(:extensions)
+ end
+ end
+ end
+ end
+end