diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/api/circuit_breakers.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/git/storage/checker.rb | 98 | ||||
-rw-r--r-- | lib/gitlab/git/storage/circuit_breaker.rb | 106 | ||||
-rw-r--r-- | lib/gitlab/git/storage/circuit_breaker_settings.rb | 12 | ||||
-rw-r--r-- | lib/gitlab/git/storage/failure_info.rb | 39 | ||||
-rw-r--r-- | lib/gitlab/git/storage/null_circuit_breaker.rb | 22 | ||||
-rw-r--r-- | lib/gitlab/storage_check.rb | 11 | ||||
-rw-r--r-- | lib/gitlab/storage_check/cli.rb | 69 | ||||
-rw-r--r-- | lib/gitlab/storage_check/gitlab_caller.rb | 39 | ||||
-rw-r--r-- | lib/gitlab/storage_check/option_parser.rb | 39 | ||||
-rw-r--r-- | lib/gitlab/storage_check/response.rb | 77 |
11 files changed, 396 insertions, 118 deletions
diff --git a/lib/api/circuit_breakers.rb b/lib/api/circuit_breakers.rb index 118883f5ea5..598c76f6168 100644 --- a/lib/api/circuit_breakers.rb +++ b/lib/api/circuit_breakers.rb @@ -41,7 +41,7 @@ module API detail 'This feature was introduced in GitLab 9.5' end delete do - Gitlab::Git::Storage::CircuitBreaker.reset_all! + Gitlab::Git::Storage::FailureInfo.reset_all! end end end diff --git a/lib/gitlab/git/storage/checker.rb b/lib/gitlab/git/storage/checker.rb new file mode 100644 index 00000000000..de63cb4b40c --- /dev/null +++ b/lib/gitlab/git/storage/checker.rb @@ -0,0 +1,98 @@ +module Gitlab + module Git + module Storage + class Checker + include CircuitBreakerSettings + + attr_reader :storage_path, :storage, :hostname, :logger + + def self.check_all(logger = Rails.logger) + threads = Gitlab.config.repositories.storages.keys.map do |storage_name| + Thread.new do + Thread.current[:result] = new(storage_name, logger).check_with_lease + end + end + + threads.map do |thread| + thread.join + thread[:result] + end + end + + def initialize(storage, logger = Rails.logger) + @storage = storage + config = Gitlab.config.repositories.storages[@storage] + @storage_path = config['path'] + @logger = logger + + @hostname = Gitlab::Environment.hostname + end + + def check_with_lease + lease_key = "storage_check:#{cache_key}" + lease = Gitlab::ExclusiveLease.new(lease_key, timeout: storage_timeout) + result = { storage: storage, success: nil } + + if uuid = lease.try_obtain + result[:success] = check + + Gitlab::ExclusiveLease.cancel(lease_key, uuid) + else + logger.warn("#{hostname}: #{storage}: Skipping check, previous check still running") + end + + result + end + + def check + if Gitlab::Git::Storage::ForkedStorageCheck.storage_available?(storage_path, storage_timeout, access_retries) + track_storage_accessible + true + else + track_storage_inaccessible + logger.error("#{hostname}: #{storage}: Not accessible.") + false + end + end + + private + + def track_storage_inaccessible + first_failure = current_failure_info.first_failure || Time.now + last_failure = Time.now + + Gitlab::Git::Storage.redis.with do |redis| + redis.pipelined do + redis.hset(cache_key, :first_failure, first_failure.to_i) + redis.hset(cache_key, :last_failure, last_failure.to_i) + redis.hincrby(cache_key, :failure_count, 1) + redis.expire(cache_key, failure_reset_time) + maintain_known_keys(redis) + end + end + end + + def track_storage_accessible + Gitlab::Git::Storage.redis.with do |redis| + redis.pipelined do + redis.hset(cache_key, :first_failure, nil) + redis.hset(cache_key, :last_failure, nil) + redis.hset(cache_key, :failure_count, 0) + maintain_known_keys(redis) + end + end + end + + def maintain_known_keys(redis) + expire_time = Time.now.to_i + failure_reset_time + redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, expire_time, cache_key) + redis.zremrangebyscore(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, '-inf', Time.now.to_i) + end + + def current_failure_info + FailureInfo.load(cache_key) + end + end + end + end +end diff --git a/lib/gitlab/git/storage/circuit_breaker.rb b/lib/gitlab/git/storage/circuit_breaker.rb index 4328c0ea29b..898bb1b65be 100644 --- a/lib/gitlab/git/storage/circuit_breaker.rb +++ b/lib/gitlab/git/storage/circuit_breaker.rb @@ -4,22 +4,11 @@ module Gitlab class CircuitBreaker include CircuitBreakerSettings - FailureInfo = Struct.new(:last_failure, :failure_count) - attr_reader :storage, - :hostname, - :storage_path - - delegate :last_failure, :failure_count, to: :failure_info - - def self.reset_all! - Gitlab::Git::Storage.redis.with do |redis| - all_storage_keys = redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1) - redis.del(*all_storage_keys) unless all_storage_keys.empty? - end + :hostname - RequestStore.delete(:circuitbreaker_cache) - end + delegate :last_failure, :failure_count, :no_failures?, + to: :failure_info def self.for_storage(storage) cached_circuitbreakers = RequestStore.fetch(:circuitbreaker_cache) do @@ -46,9 +35,6 @@ module Gitlab def initialize(storage, hostname) @storage = storage @hostname = hostname - - config = Gitlab.config.repositories.storages[@storage] - @storage_path = config['path'] end def perform @@ -65,15 +51,6 @@ module Gitlab failure_count > failure_count_threshold end - def backing_off? - return false if no_failures? - - recent_failure = last_failure > failure_wait_time.seconds.ago - too_many_failures = failure_count > backoff_threshold - - recent_failure && too_many_failures - end - private # The circuitbreaker can be enabled for the entire fleet using a Feature @@ -86,88 +63,13 @@ module Gitlab end def failure_info - @failure_info ||= get_failure_info - end - - # Memoizing the `storage_available` call means we only do it once per - # request when the storage is available. - # - # When the storage appears not available, and the memoized value is `false` - # we might want to try again. - def storage_available? - return @storage_available if @storage_available - - if @storage_available = Gitlab::Git::Storage::ForkedStorageCheck - .storage_available?(storage_path, storage_timeout, access_retries) - track_storage_accessible - else - track_storage_inaccessible - end - - @storage_available + @failure_info ||= FailureInfo.load(cache_key) end def check_storage_accessible! if circuit_broken? raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_reset_time) end - - if backing_off? - raise Gitlab::Git::Storage::Failing.new("Backing off access to #{storage}", failure_wait_time) - end - - unless storage_available? - raise Gitlab::Git::Storage::Inaccessible.new("#{storage} not accessible", failure_wait_time) - end - end - - def no_failures? - last_failure.blank? && failure_count == 0 - end - - def track_storage_inaccessible - @failure_info = FailureInfo.new(Time.now, failure_count + 1) - - Gitlab::Git::Storage.redis.with do |redis| - redis.pipelined do - redis.hset(cache_key, :last_failure, last_failure.to_i) - redis.hincrby(cache_key, :failure_count, 1) - redis.expire(cache_key, failure_reset_time) - maintain_known_keys(redis) - end - end - end - - def track_storage_accessible - @failure_info = FailureInfo.new(nil, 0) - - Gitlab::Git::Storage.redis.with do |redis| - redis.pipelined do - redis.hset(cache_key, :last_failure, nil) - redis.hset(cache_key, :failure_count, 0) - maintain_known_keys(redis) - end - end - end - - def maintain_known_keys(redis) - expire_time = Time.now.to_i + failure_reset_time - redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, expire_time, cache_key) - redis.zremrangebyscore(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, '-inf', Time.now.to_i) - end - - def get_failure_info - last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis| - redis.hmget(cache_key, :last_failure, :failure_count) - end - - last_failure = Time.at(last_failure.to_i) if last_failure.present? - - FailureInfo.new(last_failure, failure_count.to_i) - end - - def cache_key - @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}" end end end diff --git a/lib/gitlab/git/storage/circuit_breaker_settings.rb b/lib/gitlab/git/storage/circuit_breaker_settings.rb index 257fe8cd8f0..c9e225f187d 100644 --- a/lib/gitlab/git/storage/circuit_breaker_settings.rb +++ b/lib/gitlab/git/storage/circuit_breaker_settings.rb @@ -6,10 +6,6 @@ module Gitlab application_settings.circuitbreaker_failure_count_threshold end - def failure_wait_time - application_settings.circuitbreaker_failure_wait_time - end - def failure_reset_time application_settings.circuitbreaker_failure_reset_time end @@ -22,8 +18,12 @@ module Gitlab application_settings.circuitbreaker_access_retries end - def backoff_threshold - application_settings.circuitbreaker_backoff_threshold + def check_interval + application_settings.circuitbreaker_check_interval + end + + def cache_key + @cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}" end private diff --git a/lib/gitlab/git/storage/failure_info.rb b/lib/gitlab/git/storage/failure_info.rb new file mode 100644 index 00000000000..387279c110d --- /dev/null +++ b/lib/gitlab/git/storage/failure_info.rb @@ -0,0 +1,39 @@ +module Gitlab + module Git + module Storage + class FailureInfo + attr_accessor :first_failure, :last_failure, :failure_count + + def self.reset_all! + Gitlab::Git::Storage.redis.with do |redis| + all_storage_keys = redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1) + redis.del(*all_storage_keys) unless all_storage_keys.empty? + end + + RequestStore.delete(:circuitbreaker_cache) + end + + def self.load(cache_key) + first_failure, last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis| + redis.hmget(cache_key, :first_failure, :last_failure, :failure_count) + end + + last_failure = Time.at(last_failure.to_i) if last_failure.present? + first_failure = Time.at(first_failure.to_i) if first_failure.present? + + new(first_failure, last_failure, failure_count.to_i) + end + + def initialize(first_failure, last_failure, failure_count) + @first_failure = first_failure + @last_failure = last_failure + @failure_count = failure_count + end + + def no_failures? + first_failure.blank? && last_failure.blank? && failure_count == 0 + end + end + end + end +end diff --git a/lib/gitlab/git/storage/null_circuit_breaker.rb b/lib/gitlab/git/storage/null_circuit_breaker.rb index a12d52d295f..261c936c689 100644 --- a/lib/gitlab/git/storage/null_circuit_breaker.rb +++ b/lib/gitlab/git/storage/null_circuit_breaker.rb @@ -11,6 +11,9 @@ module Gitlab # These will always have nil values attr_reader :storage_path + delegate :last_failure, :failure_count, :no_failures?, + to: :failure_info + def initialize(storage, hostname, error: nil) @storage = storage @hostname = hostname @@ -29,16 +32,17 @@ module Gitlab false end - def last_failure - circuit_broken? ? Time.now : nil - end - - def failure_count - circuit_broken? ? failure_count_threshold : 0 - end - def failure_info - Gitlab::Git::Storage::CircuitBreaker::FailureInfo.new(last_failure, failure_count) + @failure_info ||= + if circuit_broken? + Gitlab::Git::Storage::FailureInfo.new(Time.now, + Time.now, + failure_count_threshold) + else + Gitlab::Git::Storage::FailureInfo.new(nil, + nil, + 0) + end end end end diff --git a/lib/gitlab/storage_check.rb b/lib/gitlab/storage_check.rb new file mode 100644 index 00000000000..fe81513c9ec --- /dev/null +++ b/lib/gitlab/storage_check.rb @@ -0,0 +1,11 @@ +require_relative 'storage_check/cli' +require_relative 'storage_check/gitlab_caller' +require_relative 'storage_check/option_parser' +require_relative 'storage_check/response' + +module Gitlab + module StorageCheck + ENDPOINT = '/-/storage_check'.freeze + Options = Struct.new(:target, :token, :interval, :dryrun) + end +end diff --git a/lib/gitlab/storage_check/cli.rb b/lib/gitlab/storage_check/cli.rb new file mode 100644 index 00000000000..04bf1bf1d26 --- /dev/null +++ b/lib/gitlab/storage_check/cli.rb @@ -0,0 +1,69 @@ +module Gitlab + module StorageCheck + class CLI + def self.start!(args) + runner = new(Gitlab::StorageCheck::OptionParser.parse!(args)) + runner.start_loop + end + + attr_reader :logger, :options + + def initialize(options) + @options = options + @logger = Logger.new(STDOUT) + end + + def start_loop + logger.info "Checking #{options.target} every #{options.interval} seconds" + + if options.dryrun + logger.info "Dryrun, exiting..." + return + end + + begin + loop do + response = GitlabCaller.new(options).call! + log_response(response) + update_settings(response) + + sleep options.interval + end + rescue Interrupt + logger.info "Ending storage-check" + end + end + + def update_settings(response) + previous_interval = options.interval + + if response.valid? + options.interval = response.check_interval || previous_interval + end + + if previous_interval != options.interval + logger.info "Interval changed: #{options.interval} seconds" + end + end + + def log_response(response) + unless response.valid? + return logger.error("Invalid response checking nfs storage: #{response.http_response.inspect}") + end + + if response.responsive_shards.any? + logger.debug("Responsive shards: #{response.responsive_shards.join(', ')}") + end + + warnings = [] + if response.skipped_shards.any? + warnings << "Skipped shards: #{response.skipped_shards.join(', ')}" + end + if response.failing_shards.any? + warnings << "Failing shards: #{response.failing_shards.join(', ')}" + end + logger.warn(warnings.join(' - ')) if warnings.any? + end + end + end +end diff --git a/lib/gitlab/storage_check/gitlab_caller.rb b/lib/gitlab/storage_check/gitlab_caller.rb new file mode 100644 index 00000000000..44952b68844 --- /dev/null +++ b/lib/gitlab/storage_check/gitlab_caller.rb @@ -0,0 +1,39 @@ +require 'excon' + +module Gitlab + module StorageCheck + class GitlabCaller + def initialize(options) + @options = options + end + + def call! + Gitlab::StorageCheck::Response.new(get_response) + rescue Errno::ECONNREFUSED, Excon::Error + # Server not ready, treated as invalid response. + Gitlab::StorageCheck::Response.new(nil) + end + + def get_response + scheme, *other_parts = URI.split(@options.target) + socket_path = if scheme == 'unix' + other_parts.compact.join + end + + connection = Excon.new(@options.target, socket: socket_path) + connection.post(path: Gitlab::StorageCheck::ENDPOINT, + headers: headers) + end + + def headers + @headers ||= begin + headers = {} + headers['Content-Type'] = headers['Accept'] = 'application/json' + headers['TOKEN'] = @options.token if @options.token + + headers + end + end + end + end +end diff --git a/lib/gitlab/storage_check/option_parser.rb b/lib/gitlab/storage_check/option_parser.rb new file mode 100644 index 00000000000..66ed7906f97 --- /dev/null +++ b/lib/gitlab/storage_check/option_parser.rb @@ -0,0 +1,39 @@ +module Gitlab + module StorageCheck + class OptionParser + def self.parse!(args) + # Start out with some defaults + options = Gitlab::StorageCheck::Options.new(nil, nil, 1, false) + + parser = ::OptionParser.new do |opts| + opts.banner = "Usage: bin/storage_check [options]" + + opts.on('-t=string', '--target string', 'URL or socket to trigger storage check') do |value| + options.target = value + end + + opts.on('-T=string', '--token string', 'Health token to use') { |value| options.token = value } + + opts.on('-i=n', '--interval n', ::OptionParser::DecimalInteger, 'Seconds between checks') do |value| + options.interval = value + end + + opts.on('-d', '--dryrun', "Output what will be performed, but don't start the process") do |value| + options.dryrun = value + end + end + parser.parse!(args) + + unless options.target + raise ::OptionParser::InvalidArgument.new('Provide a URI to provide checks') + end + + if URI.parse(options.target).scheme.nil? + raise ::OptionParser::InvalidArgument.new('Add the scheme to the target, `unix://`, `https://` or `http://` are supported') + end + + options + end + end + end +end diff --git a/lib/gitlab/storage_check/response.rb b/lib/gitlab/storage_check/response.rb new file mode 100644 index 00000000000..326ab236e3e --- /dev/null +++ b/lib/gitlab/storage_check/response.rb @@ -0,0 +1,77 @@ +require 'json' + +module Gitlab + module StorageCheck + class Response + attr_reader :http_response + + def initialize(http_response) + @http_response = http_response + end + + def valid? + @http_response && (200...299).cover?(@http_response.status) && + @http_response.headers['Content-Type'].include?('application/json') && + parsed_response + end + + def check_interval + return nil unless parsed_response + + parsed_response['check_interval'] + end + + def responsive_shards + divided_results[:responsive_shards] + end + + def skipped_shards + divided_results[:skipped_shards] + end + + def failing_shards + divided_results[:failing_shards] + end + + private + + def results + return [] unless parsed_response + + parsed_response['results'] + end + + def divided_results + return @divided_results if @divided_results + + @divided_results = {} + @divided_results[:responsive_shards] = [] + @divided_results[:skipped_shards] = [] + @divided_results[:failing_shards] = [] + + results.each do |info| + name = info['storage'] + + case info['success'] + when true + @divided_results[:responsive_shards] << name + when false + @divided_results[:failing_shards] << name + else + @divided_results[:skipped_shards] << name + end + end + + @divided_results + end + + def parsed_response + return @parsed_response if defined?(@parsed_response) + + @parsed_response = JSON.parse(@http_response.body) + rescue JSON::JSONError + @parsed_response = nil + end + end + end +end |