diff options
21 files changed, 212 insertions, 127 deletions
diff --git a/app/helpers/application_settings_helper.rb b/app/helpers/application_settings_helper.rb index 7bd34df5c95..1ee8911bb1a 100644 --- a/app/helpers/application_settings_helper.rb +++ b/app/helpers/application_settings_helper.rb @@ -108,6 +108,34 @@ module ApplicationSettingsHelper options_for_select(Sidekiq::Queue.all.map(&:name), @application_setting.sidekiq_throttling_queues) end + def circuitbreaker_failure_count_help_text + health_link = link_to(s_('AdminHealthPageLink|health page'), admin_health_check_path) + api_link = link_to(s_('CircuitBreakerApiLink|circuitbreaker api'), help_page_path("api/repository_storage_health")) + message = _("The number of failures of after which GitLab will completely "\ + "prevent access to the storage. The number of failures can be "\ + "reset in the admin interface: %{link_to_health_page} or using "\ + "the %{api_documentation_link}.") + message = message % { link_to_health_page: health_link, api_documentation_link: api_link } + + message.html_safe + end + + def circuitbreaker_failure_wait_time_help_text + _("When access to a storage fails. GitLab will prevent access to the "\ + "storage for the time specified here. This allows the filesystem to "\ + "recover. Repositories on failing shards are temporarly unavailable") + end + + def circuitbreaker_failure_reset_time_help_text + _("The time in seconds GitLab will keep failure information. When no "\ + "failures occur during this time, information about the mount is reset.") + end + + def circuitbreaker_storage_timeout_help_text + _("The time in seconds GitLab will try to access storage. After this time a "\ + "timeout error will be raised.") + end + def visible_attributes [ :admin_notification_email, @@ -116,6 +144,10 @@ module ApplicationSettingsHelper :akismet_api_key, :akismet_enabled, :auto_devops_enabled, + :circuitbreaker_failure_count_threshold, + :circuitbreaker_failure_reset_time, + :circuitbreaker_failure_wait_time, + :circuitbreaker_storage_timeout, :clientside_sentry_dsn, :clientside_sentry_enabled, :container_registry_token_expire_delay, diff --git a/app/models/application_setting.rb b/app/models/application_setting.rb index c0cc60d5ebf..a93b777a9bc 100644 --- a/app/models/application_setting.rb +++ b/app/models/application_setting.rb @@ -151,6 +151,13 @@ class ApplicationSetting < ActiveRecord::Base presence: true, numericality: { greater_than_or_equal_to: 0 } + validates :circuitbreaker_failure_count_threshold, + :circuitbreaker_failure_wait_time, + :circuitbreaker_failure_reset_time, + :circuitbreaker_storage_timeout, + presence: true, + numericality: { only_integer: true, greater_than_or_equal_to: 0 } + SUPPORTED_KEY_TYPES.each do |type| validates :"#{type}_key_restriction", presence: true, key_restriction: { type: type } end diff --git a/app/views/admin/application_settings/_form.html.haml b/app/views/admin/application_settings/_form.html.haml index dbaed1d09fb..2b23af9212e 100644 --- a/app/views/admin/application_settings/_form.html.haml +++ b/app/views/admin/application_settings/_form.html.haml @@ -530,6 +530,32 @@ = succeed "." do = link_to "repository storages documentation", help_page_path("administration/repository_storages") + %fieldset + %legend Git Storage Circuitbreaker settings + .form-group + = f.label :circuitbreaker_failure_count_threshold, _('Maximum git storage failures'), class: 'control-label col-sm-2' + .col-sm-10 + = f.number_field :circuitbreaker_failure_count_threshold, class: 'form-control' + .help-block + = circuitbreaker_failure_count_help_text + .form-group + = f.label :circuitbreaker_failure_wait_time, _('Seconds to wait after a storage failure'), class: 'control-label col-sm-2' + .col-sm-10 + = f.number_field :circuitbreaker_failure_wait_time, class: 'form-control' + .help-block + = circuitbreaker_failure_wait_time_help_text + .form-group + = f.label :circuitbreaker_failure_reset_time, _('Seconds before reseting failure information'), class: 'control-label col-sm-2' + .col-sm-10 + = f.number_field :circuitbreaker_failure_reset_time, class: 'form-control' + .help-block + = circuitbreaker_failure_reset_time_help_text + .form-group + = f.label :circuitbreaker_storage_timeout, _('Seconds to wait for a storage access attempt'), class: 'control-label col-sm-2' + .col-sm-10 + = f.number_field :circuitbreaker_storage_timeout, class: 'form-control' + .help-block + = circuitbreaker_storage_timeout_help_text %fieldset %legend Repository Checks diff --git a/changelogs/unreleased/bvl-circuitbreaker-improvements.yml b/changelogs/unreleased/bvl-circuitbreaker-improvements.yml new file mode 100644 index 00000000000..15cbd5592e9 --- /dev/null +++ b/changelogs/unreleased/bvl-circuitbreaker-improvements.yml @@ -0,0 +1,5 @@ +--- +title: Store circuitbreaker settings in the database instead of config +merge_request: 14842 +author: +type: changed diff --git a/config/gitlab.yml.example b/config/gitlab.yml.example index 1069c7be5f0..4bfa5be0136 100644 --- a/config/gitlab.yml.example +++ b/config/gitlab.yml.example @@ -522,11 +522,6 @@ production: &base path: /home/git/repositories/ gitaly_address: unix:/home/git/gitlab/tmp/sockets/private/gitaly.socket # TCP connections are supported too (e.g. tcp://host:port) # gitaly_token: 'special token' # Optional: override global gitaly.token for this storage. - failure_count_threshold: 10 # number of failures before stopping attempts - failure_wait_time: 30 # Seconds after an access failure before allowing access again - failure_reset_time: 1800 # Time in seconds to expire failures - storage_timeout: 30 # Time in seconds to wait before aborting a storage access attempt - ## Backup settings backup: @@ -659,9 +654,6 @@ test: default: path: tmp/tests/repositories/ gitaly_address: unix:tmp/tests/gitaly/gitaly.socket - failure_count_threshold: 999999 - failure_wait_time: 0 - storage_timeout: 30 broken: path: tmp/tests/non-existent-repositories gitaly_address: unix:tmp/tests/gitaly/gitaly.socket diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index b790df565c6..12694f8016f 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -455,17 +455,6 @@ Settings.repositories.storages.each do |key, storage| # Expand relative paths storage['path'] = Settings.absolute(storage['path']) - # Set failure defaults - storage['failure_count_threshold'] ||= 10 - storage['failure_wait_time'] ||= 30 - storage['failure_reset_time'] ||= 1800 - storage['storage_timeout'] ||= 5 - # Set turn strings into numbers - storage['failure_count_threshold'] = storage['failure_count_threshold'].to_i - storage['failure_wait_time'] = storage['failure_wait_time'].to_i - storage['failure_reset_time'] = storage['failure_reset_time'].to_i - # We might want to have a timeout shorter than 1 second. - storage['storage_timeout'] = storage['storage_timeout'].to_f Settings.repositories.storages[key] = storage end diff --git a/db/migrate/20171012101043_add_circuit_breaker_properties_to_application_settings.rb b/db/migrate/20171012101043_add_circuit_breaker_properties_to_application_settings.rb new file mode 100644 index 00000000000..bcf7dbd8e64 --- /dev/null +++ b/db/migrate/20171012101043_add_circuit_breaker_properties_to_application_settings.rb @@ -0,0 +1,27 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddCircuitBreakerPropertiesToApplicationSettings < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + def change + add_column :application_settings, + :circuitbreaker_failure_count_threshold, + :integer, + default: 160 + add_column :application_settings, + :circuitbreaker_failure_wait_time, + :integer, + default: 30 + add_column :application_settings, + :circuitbreaker_failure_reset_time, + :integer, + default: 1800 + add_column :application_settings, + :circuitbreaker_storage_timeout, + :integer, + default: 30 + end +end diff --git a/db/schema.rb b/db/schema.rb index 8aadcfeb7d1..c2c04873d4d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20171006091000) do +ActiveRecord::Schema.define(version: 20171012101043) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -134,6 +134,10 @@ ActiveRecord::Schema.define(version: 20171006091000) do t.boolean "hashed_storage_enabled", default: false, null: false t.boolean "project_export_enabled", default: true, null: false t.boolean "auto_devops_enabled", default: false, null: false + t.integer "circuitbreaker_failure_count_threshold", default: 160 + t.integer "circuitbreaker_failure_wait_time", default: 30 + t.integer "circuitbreaker_failure_reset_time", default: 1800 + t.integer "circuitbreaker_storage_timeout", default: 30 end create_table "audit_events", force: :cascade do |t| diff --git a/doc/administration/img/circuitbreaker_config.png b/doc/administration/img/circuitbreaker_config.png Binary files differnew file mode 100644 index 00000000000..9250d38297c --- /dev/null +++ b/doc/administration/img/circuitbreaker_config.png diff --git a/doc/administration/repository_storage_paths.md b/doc/administration/repository_storage_paths.md index 624a908b3a3..efcabd69822 100644 --- a/doc/administration/repository_storage_paths.md +++ b/doc/administration/repository_storage_paths.md @@ -105,61 +105,26 @@ When GitLab detects access to the repositories storage fails repeatedly, it can gracefully prevent attempts to access the storage. This might be useful when the repositories are stored somewhere on the network. -The configuration could look as follows: +This can be configured from the admin interface: -**For Omnibus installations** - -1. Edit `/etc/gitlab/gitlab.rb`: - - ```ruby - git_data_dirs({ - "default" => { - "path" => "/mnt/nfs-01/git-data", - "failure_count_threshold" => 10, - "failure_wait_time" => 30, - "failure_reset_time" => 1800, - "storage_timeout" => 5 - } - }) - ``` - -1. Save the file and [reconfigure GitLab][reconfigure-gitlab] for the changes to take effect. - ---- - -**For installations from source** - -1. Edit `config/gitlab.yml`: - - ```yaml - repositories: - storages: # You must have at least a `default` storage path. - default: - path: /home/git/repositories/ - failure_count_threshold: 10 # number of failures before stopping attempts - failure_wait_time: 30 # Seconds after last access failure before trying again - failure_reset_time: 1800 # Time in seconds to expire failures - storage_timeout: 5 # Time in seconds to wait before aborting a storage access attempt - ``` - -1. Save the file and [restart GitLab][restart-gitlab] for the changes to take effect. +![circuitbreaker configuration](img/circuitbreaker_config.png) -**`failure_count_threshold`:** The number of failures of after which GitLab will +**Maximum git storage failures:** The number of failures of after which GitLab will completely prevent access to the storage. The number of failures can be reset in the admin interface: `https://gitlab.example.com/admin/health_check` or using the [api](../api/repository_storage_health.md) to allow access to the storage again. -**`failure_wait_time`:** When access to a storage fails. GitLab will prevent -access to the storage for the time specified here. This allows the filesystem to -recover without. +**Seconds to wait after a storage failure:** When access to a storage fails. GitLab +will prevent access to the storage for the time specified here. This allows the +filesystem to recover. -**`failure_reset_time`:** The time in seconds GitLab will keep failure -information. When no failures occur during this time, information about the +**Seconds before reseting failure information:** The time in seconds GitLab will +keep failure information. When no failures occur during this time, information about the mount is reset. -**`storage_timeout`:** The time in seconds GitLab will try to access storage. -After this time a timeout error will be raised. +**Seconds to wait for a storage access attempt:** The time in seconds GitLab will +try to access storage. After this time a timeout error will be raised. When storage failures occur, this will be visible in the admin interface like this: diff --git a/doc/api/settings.md b/doc/api/settings.md index be75e1b66ba..664f3ef7b77 100644 --- a/doc/api/settings.md +++ b/doc/api/settings.md @@ -69,6 +69,10 @@ PUT /application/settings | `after_sign_up_text` | string | no | Text shown to the user after signing up | | `akismet_api_key` | string | no | API key for akismet spam protection | | `akismet_enabled` | boolean | no | Enable or disable akismet spam protection | +| `circuitbreaker_failure_count_threshold` | integer | no | The number of failures of after which GitLab will completely prevent access to the storage. | +| `circuitbreaker_failure_reset_time` | integer | no | Time in seconds GitLab will keep storage failure information. When no failures occur during this time, the failure information is reset. | +| `circuitbreaker_failure_wait_time` | integer | no | Time in seconds GitLab will block access to a failing storage to allow it to recover. | +| `circuitbreaker_storage_timeout` | integer | no | Seconds to wait for a storage access attempt | | `clientside_sentry_dsn` | string | no | Required if `clientside_sentry_dsn` is enabled | | `clientside_sentry_enabled` | boolean | no | Enable Sentry error reporting for the client side | | `container_registry_token_expire_delay` | integer | no | Container Registry token duration in minutes | diff --git a/lib/gitlab/git/storage/circuit_breaker.rb b/lib/gitlab/git/storage/circuit_breaker.rb index 1eaa2d83fb6..4e2d261f461 100644 --- a/lib/gitlab/git/storage/circuit_breaker.rb +++ b/lib/gitlab/git/storage/circuit_breaker.rb @@ -2,15 +2,13 @@ module Gitlab module Git module Storage class CircuitBreaker + include CircuitBreakerSettings + FailureInfo = Struct.new(:last_failure, :failure_count) attr_reader :storage, :hostname, - :storage_path, - :failure_count_threshold, - :failure_wait_time, - :failure_reset_time, - :storage_timeout + :storage_path delegate :last_failure, :failure_count, to: :failure_info @@ -53,10 +51,6 @@ module Gitlab config = Gitlab.config.repositories.storages[@storage] @storage_path = config['path'] - @failure_count_threshold = config['failure_count_threshold'] - @failure_wait_time = config['failure_wait_time'] - @failure_reset_time = config['failure_reset_time'] - @storage_timeout = config['storage_timeout'] end def perform diff --git a/lib/gitlab/git/storage/circuit_breaker_settings.rb b/lib/gitlab/git/storage/circuit_breaker_settings.rb new file mode 100644 index 00000000000..d2313fe7c1b --- /dev/null +++ b/lib/gitlab/git/storage/circuit_breaker_settings.rb @@ -0,0 +1,29 @@ +module Gitlab + module Git + module Storage + module CircuitBreakerSettings + def failure_count_threshold + application_settings.circuitbreaker_failure_count_threshold + end + + def failure_wait_time + application_settings.circuitbreaker_failure_wait_time + end + + def failure_reset_time + application_settings.circuitbreaker_failure_reset_time + end + + def storage_timeout + application_settings.circuitbreaker_storage_timeout + end + + private + + def application_settings + Gitlab::CurrentSettings.current_application_settings + end + end + end + end +end diff --git a/lib/gitlab/git/storage/null_circuit_breaker.rb b/lib/gitlab/git/storage/null_circuit_breaker.rb index 297c043d054..60c6791a7e4 100644 --- a/lib/gitlab/git/storage/null_circuit_breaker.rb +++ b/lib/gitlab/git/storage/null_circuit_breaker.rb @@ -2,15 +2,14 @@ module Gitlab module Git module Storage class NullCircuitBreaker + include CircuitBreakerSettings + # These will have actual values attr_reader :storage, :hostname # These will always have nil values - attr_reader :storage_path, - :failure_wait_time, - :failure_reset_time, - :storage_timeout + attr_reader :storage_path def initialize(storage, hostname, error: nil) @storage = storage @@ -26,16 +25,12 @@ module Gitlab !!@error end - def failure_count_threshold - 1 - end - def last_failure circuit_broken? ? Time.now : nil end def failure_count - circuit_broken? ? 1 : 0 + circuit_broken? ? failure_count_threshold : 0 end def failure_info diff --git a/spec/features/admin/admin_health_check_spec.rb b/spec/features/admin/admin_health_check_spec.rb index 09e6965849a..4430fc15501 100644 --- a/spec/features/admin/admin_health_check_spec.rb +++ b/spec/features/admin/admin_health_check_spec.rb @@ -65,9 +65,11 @@ feature "Admin Health Check", :feature, :broken_storage do it 'shows storage failure information' do hostname = Gitlab::Environment.hostname + maximum_failures = Gitlab::CurrentSettings.current_application_settings + .circuitbreaker_failure_count_threshold expect(page).to have_content('broken: failed storage access attempt on host:') - expect(page).to have_content("#{hostname}: 1 of 10 failures.") + expect(page).to have_content("#{hostname}: 1 of #{maximum_failures} failures.") end it 'allows resetting storage failures' do diff --git a/spec/initializers/settings_spec.rb b/spec/initializers/settings_spec.rb index 9a974e70e8c..a11824d0ac5 100644 --- a/spec/initializers/settings_spec.rb +++ b/spec/initializers/settings_spec.rb @@ -18,26 +18,6 @@ describe Settings do end end - describe '#repositories' do - it 'assigns the default failure attributes' do - repository_settings = Gitlab.config.repositories.storages['broken'] - - expect(repository_settings['failure_count_threshold']).to eq(10) - expect(repository_settings['failure_wait_time']).to eq(30) - expect(repository_settings['failure_reset_time']).to eq(1800) - expect(repository_settings['storage_timeout']).to eq(5) - end - - it 'can be accessed with dot syntax all the way down' do - expect(Gitlab.config.repositories.storages.broken.failure_count_threshold).to eq(10) - end - - it 'can be accessed in a very specific way that breaks without reassigning each element with Settingslogic' do - storage_settings = Gitlab.config.repositories.storages['broken'] - expect(storage_settings.failure_count_threshold).to eq(10) - end - end - describe '#host_without_www' do context 'URL with protocol' do it 'returns the host' do diff --git a/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb b/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb index 98cf7966dad..68eb93eb9f9 100644 --- a/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb +++ b/spec/lib/gitlab/git/storage/circuit_breaker_spec.rb @@ -10,18 +10,10 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state: # Override test-settings for the circuitbreaker with something more realistic # for these specs. stub_storage_settings('default' => { - 'path' => TestEnv.repos_path, - 'failure_count_threshold' => 10, - 'failure_wait_time' => 30, - 'failure_reset_time' => 1800, - 'storage_timeout' => 5 + 'path' => TestEnv.repos_path }, 'broken' => { - 'path' => 'tmp/tests/non-existent-repositories', - 'failure_count_threshold' => 10, - 'failure_wait_time' => 30, - 'failure_reset_time' => 1800, - 'storage_timeout' => 5 + 'path' => 'tmp/tests/non-existent-repositories' }, 'nopath' => { 'path' => nil } ) @@ -75,10 +67,39 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state: expect(circuit_breaker.hostname).to eq(hostname) expect(circuit_breaker.storage).to eq('default') expect(circuit_breaker.storage_path).to eq(TestEnv.repos_path) - expect(circuit_breaker.failure_count_threshold).to eq(10) - expect(circuit_breaker.failure_wait_time).to eq(30) - expect(circuit_breaker.failure_reset_time).to eq(1800) - expect(circuit_breaker.storage_timeout).to eq(5) + end + end + + context 'circuitbreaker settings' do + before do + stub_application_setting(circuitbreaker_failure_count_threshold: 0, + circuitbreaker_failure_wait_time: 1, + circuitbreaker_failure_reset_time: 2, + circuitbreaker_storage_timeout: 3) + end + + describe '#failure_count_threshold' do + it 'reads the value from settings' do + expect(circuit_breaker.failure_count_threshold).to eq(0) + end + end + + describe '#failure_wait_time' do + it 'reads the value from settings' do + expect(circuit_breaker.failure_wait_time).to eq(1) + end + end + + describe '#failure_reset_time' do + it 'reads the value from settings' do + expect(circuit_breaker.failure_reset_time).to eq(2) + end + end + + describe '#storage_timeout' do + it 'reads the value from settings' do + expect(circuit_breaker.storage_timeout).to eq(3) + end end end @@ -151,10 +172,7 @@ describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state: context 'the `failure_wait_time` is set to 0' do before do - stub_storage_settings('default' => { - 'failure_wait_time' => 0, - 'path' => TestEnv.repos_path - }) + stub_application_setting(circuitbreaker_failure_wait_time: 0) end it 'is working even when there is a recent failure' do diff --git a/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb b/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb index 0e645008c88..7ee6d2f3709 100644 --- a/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb +++ b/spec/lib/gitlab/git/storage/null_circuit_breaker_spec.rb @@ -54,6 +54,10 @@ describe Gitlab::Git::Storage::NullCircuitBreaker do end describe '#failure_count_threshold' do + before do + stub_application_setting(circuitbreaker_failure_count_threshold: 1) + end + it { expect(breaker.failure_count_threshold).to eq(1) } end diff --git a/spec/models/application_setting_spec.rb b/spec/models/application_setting_spec.rb index 78cacf9ff5d..cf192691507 100644 --- a/spec/models/application_setting_spec.rb +++ b/spec/models/application_setting_spec.rb @@ -114,6 +114,19 @@ describe ApplicationSetting do it { expect(setting.repository_storages).to eq(['default']) } end + context 'circuitbreaker settings' do + [:circuitbreaker_failure_count_threshold, + :circuitbreaker_failure_wait_time, + :circuitbreaker_failure_reset_time, + :circuitbreaker_storage_timeout].each do |field| + it "Validates #{field} as number" do + is_expected.to validate_numericality_of(field) + .only_integer + .is_greater_than_or_equal_to(0) + end + end + end + context 'repository storages' do before do storages = { diff --git a/spec/requests/api/settings_spec.rb b/spec/requests/api/settings_spec.rb index 0b9a4b5c3db..c24de58ee9d 100644 --- a/spec/requests/api/settings_spec.rb +++ b/spec/requests/api/settings_spec.rb @@ -23,6 +23,7 @@ describe API::Settings, 'Settings' do expect(json_response['dsa_key_restriction']).to eq(0) expect(json_response['ecdsa_key_restriction']).to eq(0) expect(json_response['ed25519_key_restriction']).to eq(0) + expect(json_response['circuitbreaker_failure_count_threshold']).not_to be_nil end end @@ -52,7 +53,8 @@ describe API::Settings, 'Settings' do rsa_key_restriction: ApplicationSetting::FORBIDDEN_KEY_VALUE, dsa_key_restriction: 2048, ecdsa_key_restriction: 384, - ed25519_key_restriction: 256 + ed25519_key_restriction: 256, + circuitbreaker_failure_wait_time: 2 expect(response).to have_http_status(200) expect(json_response['default_projects_limit']).to eq(3) @@ -73,6 +75,7 @@ describe API::Settings, 'Settings' do expect(json_response['dsa_key_restriction']).to eq(2048) expect(json_response['ecdsa_key_restriction']).to eq(384) expect(json_response['ed25519_key_restriction']).to eq(256) + expect(json_response['circuitbreaker_failure_wait_time']).to eq(2) end end diff --git a/spec/support/stub_configuration.rb b/spec/support/stub_configuration.rb index 2dfb4d4a07f..4d448a55978 100644 --- a/spec/support/stub_configuration.rb +++ b/spec/support/stub_configuration.rb @@ -43,10 +43,6 @@ module StubConfiguration messages['default'] ||= Gitlab.config.repositories.storages.default messages.each do |storage_name, storage_settings| storage_settings['path'] = TestEnv.repos_path unless storage_settings.key?('path') - storage_settings['failure_count_threshold'] ||= 10 - storage_settings['failure_wait_time'] ||= 30 - storage_settings['failure_reset_time'] ||= 1800 - storage_settings['storage_timeout'] ||= 5 end allow(Gitlab.config.repositories).to receive(:storages).and_return(Settingslogic.new(messages)) |