summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/api/circuit_breakers.rb2
-rw-r--r--lib/gitlab/git/storage/checker.rb98
-rw-r--r--lib/gitlab/git/storage/circuit_breaker.rb106
-rw-r--r--lib/gitlab/git/storage/circuit_breaker_settings.rb12
-rw-r--r--lib/gitlab/git/storage/failure_info.rb39
-rw-r--r--lib/gitlab/git/storage/null_circuit_breaker.rb22
-rw-r--r--lib/gitlab/storage_check.rb11
-rw-r--r--lib/gitlab/storage_check/cli.rb69
-rw-r--r--lib/gitlab/storage_check/gitlab_caller.rb39
-rw-r--r--lib/gitlab/storage_check/option_parser.rb39
-rw-r--r--lib/gitlab/storage_check/response.rb77
11 files changed, 396 insertions, 118 deletions
diff --git a/lib/api/circuit_breakers.rb b/lib/api/circuit_breakers.rb
index 118883f5ea5..598c76f6168 100644
--- a/lib/api/circuit_breakers.rb
+++ b/lib/api/circuit_breakers.rb
@@ -41,7 +41,7 @@ module API
detail 'This feature was introduced in GitLab 9.5'
end
delete do
- Gitlab::Git::Storage::CircuitBreaker.reset_all!
+ Gitlab::Git::Storage::FailureInfo.reset_all!
end
end
end
diff --git a/lib/gitlab/git/storage/checker.rb b/lib/gitlab/git/storage/checker.rb
new file mode 100644
index 00000000000..de63cb4b40c
--- /dev/null
+++ b/lib/gitlab/git/storage/checker.rb
@@ -0,0 +1,98 @@
+module Gitlab
+ module Git
+ module Storage
+ class Checker
+ include CircuitBreakerSettings
+
+ attr_reader :storage_path, :storage, :hostname, :logger
+
+ def self.check_all(logger = Rails.logger)
+ threads = Gitlab.config.repositories.storages.keys.map do |storage_name|
+ Thread.new do
+ Thread.current[:result] = new(storage_name, logger).check_with_lease
+ end
+ end
+
+ threads.map do |thread|
+ thread.join
+ thread[:result]
+ end
+ end
+
+ def initialize(storage, logger = Rails.logger)
+ @storage = storage
+ config = Gitlab.config.repositories.storages[@storage]
+ @storage_path = config['path']
+ @logger = logger
+
+ @hostname = Gitlab::Environment.hostname
+ end
+
+ def check_with_lease
+ lease_key = "storage_check:#{cache_key}"
+ lease = Gitlab::ExclusiveLease.new(lease_key, timeout: storage_timeout)
+ result = { storage: storage, success: nil }
+
+ if uuid = lease.try_obtain
+ result[:success] = check
+
+ Gitlab::ExclusiveLease.cancel(lease_key, uuid)
+ else
+ logger.warn("#{hostname}: #{storage}: Skipping check, previous check still running")
+ end
+
+ result
+ end
+
+ def check
+ if Gitlab::Git::Storage::ForkedStorageCheck.storage_available?(storage_path, storage_timeout, access_retries)
+ track_storage_accessible
+ true
+ else
+ track_storage_inaccessible
+ logger.error("#{hostname}: #{storage}: Not accessible.")
+ false
+ end
+ end
+
+ private
+
+ def track_storage_inaccessible
+ first_failure = current_failure_info.first_failure || Time.now
+ last_failure = Time.now
+
+ Gitlab::Git::Storage.redis.with do |redis|
+ redis.pipelined do
+ redis.hset(cache_key, :first_failure, first_failure.to_i)
+ redis.hset(cache_key, :last_failure, last_failure.to_i)
+ redis.hincrby(cache_key, :failure_count, 1)
+ redis.expire(cache_key, failure_reset_time)
+ maintain_known_keys(redis)
+ end
+ end
+ end
+
+ def track_storage_accessible
+ Gitlab::Git::Storage.redis.with do |redis|
+ redis.pipelined do
+ redis.hset(cache_key, :first_failure, nil)
+ redis.hset(cache_key, :last_failure, nil)
+ redis.hset(cache_key, :failure_count, 0)
+ maintain_known_keys(redis)
+ end
+ end
+ end
+
+ def maintain_known_keys(redis)
+ expire_time = Time.now.to_i + failure_reset_time
+ redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, expire_time, cache_key)
+ redis.zremrangebyscore(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, '-inf', Time.now.to_i)
+ end
+
+ def current_failure_info
+ FailureInfo.load(cache_key)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/storage/circuit_breaker.rb b/lib/gitlab/git/storage/circuit_breaker.rb
index 4328c0ea29b..898bb1b65be 100644
--- a/lib/gitlab/git/storage/circuit_breaker.rb
+++ b/lib/gitlab/git/storage/circuit_breaker.rb
@@ -4,22 +4,11 @@ module Gitlab
class CircuitBreaker
include CircuitBreakerSettings
- FailureInfo = Struct.new(:last_failure, :failure_count)
-
attr_reader :storage,
- :hostname,
- :storage_path
-
- delegate :last_failure, :failure_count, to: :failure_info
-
- def self.reset_all!
- Gitlab::Git::Storage.redis.with do |redis|
- all_storage_keys = redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1)
- redis.del(*all_storage_keys) unless all_storage_keys.empty?
- end
+ :hostname
- RequestStore.delete(:circuitbreaker_cache)
- end
+ delegate :last_failure, :failure_count, :no_failures?,
+ to: :failure_info
def self.for_storage(storage)
cached_circuitbreakers = RequestStore.fetch(:circuitbreaker_cache) do
@@ -46,9 +35,6 @@ module Gitlab
def initialize(storage, hostname)
@storage = storage
@hostname = hostname
-
- config = Gitlab.config.repositories.storages[@storage]
- @storage_path = config['path']
end
def perform
@@ -65,15 +51,6 @@ module Gitlab
failure_count > failure_count_threshold
end
- def backing_off?
- return false if no_failures?
-
- recent_failure = last_failure > failure_wait_time.seconds.ago
- too_many_failures = failure_count > backoff_threshold
-
- recent_failure && too_many_failures
- end
-
private
# The circuitbreaker can be enabled for the entire fleet using a Feature
@@ -86,88 +63,13 @@ module Gitlab
end
def failure_info
- @failure_info ||= get_failure_info
- end
-
- # Memoizing the `storage_available` call means we only do it once per
- # request when the storage is available.
- #
- # When the storage appears not available, and the memoized value is `false`
- # we might want to try again.
- def storage_available?
- return @storage_available if @storage_available
-
- if @storage_available = Gitlab::Git::Storage::ForkedStorageCheck
- .storage_available?(storage_path, storage_timeout, access_retries)
- track_storage_accessible
- else
- track_storage_inaccessible
- end
-
- @storage_available
+ @failure_info ||= FailureInfo.load(cache_key)
end
def check_storage_accessible!
if circuit_broken?
raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_reset_time)
end
-
- if backing_off?
- raise Gitlab::Git::Storage::Failing.new("Backing off access to #{storage}", failure_wait_time)
- end
-
- unless storage_available?
- raise Gitlab::Git::Storage::Inaccessible.new("#{storage} not accessible", failure_wait_time)
- end
- end
-
- def no_failures?
- last_failure.blank? && failure_count == 0
- end
-
- def track_storage_inaccessible
- @failure_info = FailureInfo.new(Time.now, failure_count + 1)
-
- Gitlab::Git::Storage.redis.with do |redis|
- redis.pipelined do
- redis.hset(cache_key, :last_failure, last_failure.to_i)
- redis.hincrby(cache_key, :failure_count, 1)
- redis.expire(cache_key, failure_reset_time)
- maintain_known_keys(redis)
- end
- end
- end
-
- def track_storage_accessible
- @failure_info = FailureInfo.new(nil, 0)
-
- Gitlab::Git::Storage.redis.with do |redis|
- redis.pipelined do
- redis.hset(cache_key, :last_failure, nil)
- redis.hset(cache_key, :failure_count, 0)
- maintain_known_keys(redis)
- end
- end
- end
-
- def maintain_known_keys(redis)
- expire_time = Time.now.to_i + failure_reset_time
- redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, expire_time, cache_key)
- redis.zremrangebyscore(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, '-inf', Time.now.to_i)
- end
-
- def get_failure_info
- last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis|
- redis.hmget(cache_key, :last_failure, :failure_count)
- end
-
- last_failure = Time.at(last_failure.to_i) if last_failure.present?
-
- FailureInfo.new(last_failure, failure_count.to_i)
- end
-
- def cache_key
- @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
end
end
end
diff --git a/lib/gitlab/git/storage/circuit_breaker_settings.rb b/lib/gitlab/git/storage/circuit_breaker_settings.rb
index 257fe8cd8f0..c9e225f187d 100644
--- a/lib/gitlab/git/storage/circuit_breaker_settings.rb
+++ b/lib/gitlab/git/storage/circuit_breaker_settings.rb
@@ -6,10 +6,6 @@ module Gitlab
application_settings.circuitbreaker_failure_count_threshold
end
- def failure_wait_time
- application_settings.circuitbreaker_failure_wait_time
- end
-
def failure_reset_time
application_settings.circuitbreaker_failure_reset_time
end
@@ -22,8 +18,12 @@ module Gitlab
application_settings.circuitbreaker_access_retries
end
- def backoff_threshold
- application_settings.circuitbreaker_backoff_threshold
+ def check_interval
+ application_settings.circuitbreaker_check_interval
+ end
+
+ def cache_key
+ @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
end
private
diff --git a/lib/gitlab/git/storage/failure_info.rb b/lib/gitlab/git/storage/failure_info.rb
new file mode 100644
index 00000000000..387279c110d
--- /dev/null
+++ b/lib/gitlab/git/storage/failure_info.rb
@@ -0,0 +1,39 @@
+module Gitlab
+ module Git
+ module Storage
+ class FailureInfo
+ attr_accessor :first_failure, :last_failure, :failure_count
+
+ def self.reset_all!
+ Gitlab::Git::Storage.redis.with do |redis|
+ all_storage_keys = redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1)
+ redis.del(*all_storage_keys) unless all_storage_keys.empty?
+ end
+
+ RequestStore.delete(:circuitbreaker_cache)
+ end
+
+ def self.load(cache_key)
+ first_failure, last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis|
+ redis.hmget(cache_key, :first_failure, :last_failure, :failure_count)
+ end
+
+ last_failure = Time.at(last_failure.to_i) if last_failure.present?
+ first_failure = Time.at(first_failure.to_i) if first_failure.present?
+
+ new(first_failure, last_failure, failure_count.to_i)
+ end
+
+ def initialize(first_failure, last_failure, failure_count)
+ @first_failure = first_failure
+ @last_failure = last_failure
+ @failure_count = failure_count
+ end
+
+ def no_failures?
+ first_failure.blank? && last_failure.blank? && failure_count == 0
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/storage/null_circuit_breaker.rb b/lib/gitlab/git/storage/null_circuit_breaker.rb
index a12d52d295f..261c936c689 100644
--- a/lib/gitlab/git/storage/null_circuit_breaker.rb
+++ b/lib/gitlab/git/storage/null_circuit_breaker.rb
@@ -11,6 +11,9 @@ module Gitlab
# These will always have nil values
attr_reader :storage_path
+ delegate :last_failure, :failure_count, :no_failures?,
+ to: :failure_info
+
def initialize(storage, hostname, error: nil)
@storage = storage
@hostname = hostname
@@ -29,16 +32,17 @@ module Gitlab
false
end
- def last_failure
- circuit_broken? ? Time.now : nil
- end
-
- def failure_count
- circuit_broken? ? failure_count_threshold : 0
- end
-
def failure_info
- Gitlab::Git::Storage::CircuitBreaker::FailureInfo.new(last_failure, failure_count)
+ @failure_info ||=
+ if circuit_broken?
+ Gitlab::Git::Storage::FailureInfo.new(Time.now,
+ Time.now,
+ failure_count_threshold)
+ else
+ Gitlab::Git::Storage::FailureInfo.new(nil,
+ nil,
+ 0)
+ end
end
end
end
diff --git a/lib/gitlab/storage_check.rb b/lib/gitlab/storage_check.rb
new file mode 100644
index 00000000000..fe81513c9ec
--- /dev/null
+++ b/lib/gitlab/storage_check.rb
@@ -0,0 +1,11 @@
+require_relative 'storage_check/cli'
+require_relative 'storage_check/gitlab_caller'
+require_relative 'storage_check/option_parser'
+require_relative 'storage_check/response'
+
+module Gitlab
+ module StorageCheck
+ ENDPOINT = '/-/storage_check'.freeze
+ Options = Struct.new(:target, :token, :interval, :dryrun)
+ end
+end
diff --git a/lib/gitlab/storage_check/cli.rb b/lib/gitlab/storage_check/cli.rb
new file mode 100644
index 00000000000..04bf1bf1d26
--- /dev/null
+++ b/lib/gitlab/storage_check/cli.rb
@@ -0,0 +1,69 @@
+module Gitlab
+ module StorageCheck
+ class CLI
+ def self.start!(args)
+ runner = new(Gitlab::StorageCheck::OptionParser.parse!(args))
+ runner.start_loop
+ end
+
+ attr_reader :logger, :options
+
+ def initialize(options)
+ @options = options
+ @logger = Logger.new(STDOUT)
+ end
+
+ def start_loop
+ logger.info "Checking #{options.target} every #{options.interval} seconds"
+
+ if options.dryrun
+ logger.info "Dryrun, exiting..."
+ return
+ end
+
+ begin
+ loop do
+ response = GitlabCaller.new(options).call!
+ log_response(response)
+ update_settings(response)
+
+ sleep options.interval
+ end
+ rescue Interrupt
+ logger.info "Ending storage-check"
+ end
+ end
+
+ def update_settings(response)
+ previous_interval = options.interval
+
+ if response.valid?
+ options.interval = response.check_interval || previous_interval
+ end
+
+ if previous_interval != options.interval
+ logger.info "Interval changed: #{options.interval} seconds"
+ end
+ end
+
+ def log_response(response)
+ unless response.valid?
+ return logger.error("Invalid response checking nfs storage: #{response.http_response.inspect}")
+ end
+
+ if response.responsive_shards.any?
+ logger.debug("Responsive shards: #{response.responsive_shards.join(', ')}")
+ end
+
+ warnings = []
+ if response.skipped_shards.any?
+ warnings << "Skipped shards: #{response.skipped_shards.join(', ')}"
+ end
+ if response.failing_shards.any?
+ warnings << "Failing shards: #{response.failing_shards.join(', ')}"
+ end
+ logger.warn(warnings.join(' - ')) if warnings.any?
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/storage_check/gitlab_caller.rb b/lib/gitlab/storage_check/gitlab_caller.rb
new file mode 100644
index 00000000000..44952b68844
--- /dev/null
+++ b/lib/gitlab/storage_check/gitlab_caller.rb
@@ -0,0 +1,39 @@
+require 'excon'
+
+module Gitlab
+ module StorageCheck
+ class GitlabCaller
+ def initialize(options)
+ @options = options
+ end
+
+ def call!
+ Gitlab::StorageCheck::Response.new(get_response)
+ rescue Errno::ECONNREFUSED, Excon::Error
+ # Server not ready, treated as invalid response.
+ Gitlab::StorageCheck::Response.new(nil)
+ end
+
+ def get_response
+ scheme, *other_parts = URI.split(@options.target)
+ socket_path = if scheme == 'unix'
+ other_parts.compact.join
+ end
+
+ connection = Excon.new(@options.target, socket: socket_path)
+ connection.post(path: Gitlab::StorageCheck::ENDPOINT,
+ headers: headers)
+ end
+
+ def headers
+ @headers ||= begin
+ headers = {}
+ headers['Content-Type'] = headers['Accept'] = 'application/json'
+ headers['TOKEN'] = @options.token if @options.token
+
+ headers
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/storage_check/option_parser.rb b/lib/gitlab/storage_check/option_parser.rb
new file mode 100644
index 00000000000..66ed7906f97
--- /dev/null
+++ b/lib/gitlab/storage_check/option_parser.rb
@@ -0,0 +1,39 @@
+module Gitlab
+ module StorageCheck
+ class OptionParser
+ def self.parse!(args)
+ # Start out with some defaults
+ options = Gitlab::StorageCheck::Options.new(nil, nil, 1, false)
+
+ parser = ::OptionParser.new do |opts|
+ opts.banner = "Usage: bin/storage_check [options]"
+
+ opts.on('-t=string', '--target string', 'URL or socket to trigger storage check') do |value|
+ options.target = value
+ end
+
+ opts.on('-T=string', '--token string', 'Health token to use') { |value| options.token = value }
+
+ opts.on('-i=n', '--interval n', ::OptionParser::DecimalInteger, 'Seconds between checks') do |value|
+ options.interval = value
+ end
+
+ opts.on('-d', '--dryrun', "Output what will be performed, but don't start the process") do |value|
+ options.dryrun = value
+ end
+ end
+ parser.parse!(args)
+
+ unless options.target
+ raise ::OptionParser::InvalidArgument.new('Provide a URI to provide checks')
+ end
+
+ if URI.parse(options.target).scheme.nil?
+ raise ::OptionParser::InvalidArgument.new('Add the scheme to the target, `unix://`, `https://` or `http://` are supported')
+ end
+
+ options
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/storage_check/response.rb b/lib/gitlab/storage_check/response.rb
new file mode 100644
index 00000000000..326ab236e3e
--- /dev/null
+++ b/lib/gitlab/storage_check/response.rb
@@ -0,0 +1,77 @@
+require 'json'
+
+module Gitlab
+ module StorageCheck
+ class Response
+ attr_reader :http_response
+
+ def initialize(http_response)
+ @http_response = http_response
+ end
+
+ def valid?
+ @http_response && (200...299).cover?(@http_response.status) &&
+ @http_response.headers['Content-Type'].include?('application/json') &&
+ parsed_response
+ end
+
+ def check_interval
+ return nil unless parsed_response
+
+ parsed_response['check_interval']
+ end
+
+ def responsive_shards
+ divided_results[:responsive_shards]
+ end
+
+ def skipped_shards
+ divided_results[:skipped_shards]
+ end
+
+ def failing_shards
+ divided_results[:failing_shards]
+ end
+
+ private
+
+ def results
+ return [] unless parsed_response
+
+ parsed_response['results']
+ end
+
+ def divided_results
+ return @divided_results if @divided_results
+
+ @divided_results = {}
+ @divided_results[:responsive_shards] = []
+ @divided_results[:skipped_shards] = []
+ @divided_results[:failing_shards] = []
+
+ results.each do |info|
+ name = info['storage']
+
+ case info['success']
+ when true
+ @divided_results[:responsive_shards] << name
+ when false
+ @divided_results[:failing_shards] << name
+ else
+ @divided_results[:skipped_shards] << name
+ end
+ end
+
+ @divided_results
+ end
+
+ def parsed_response
+ return @parsed_response if defined?(@parsed_response)
+
+ @parsed_response = JSON.parse(@http_response.body)
+ rescue JSON::JSONError
+ @parsed_response = nil
+ end
+ end
+ end
+end