diff options
author | Ben Kochie <superq@gmail.com> | 2017-07-24 13:50:12 +0200 |
---|---|---|
committer | Simon Knox <psimyn@gmail.com> | 2017-07-27 11:44:59 +1000 |
commit | b9bdca220ab43eda44030c5e8695949e9db7261d (patch) | |
tree | 3c1ecf41aaa8ca8d9364e41cd3b7482d96f7434b | |
parent | f8ccd895468f3f36cb064457aba2ce676c24c42e (diff) | |
download | gitlab-ce-b9bdca220ab43eda44030c5e8695949e9db7261d.tar.gz |
Add patch for Prometheus metric updates.
Include patch of https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/12812
-rw-r--r-- | app/controllers/sessions_controller.rb | 2 | ||||
-rw-r--r-- | config/initializers/8_metrics.rb | 2 | ||||
-rw-r--r-- | doc/administration/monitoring/prometheus/gitlab_metrics.md | 33 | ||||
-rw-r--r-- | lib/gitlab/health_checks/fs_shards_check.rb | 6 | ||||
-rw-r--r-- | lib/gitlab/health_checks/simple_abstract_check.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/metrics/connection_rack_middleware.rb | 45 | ||||
-rw-r--r-- | lib/gitlab/metrics/requests_rack_middleware.rb | 39 | ||||
-rw-r--r-- | spec/controllers/metrics_controller_spec.rb | 10 | ||||
-rw-r--r-- | spec/lib/gitlab/health_checks/fs_shards_check_spec.rb | 18 | ||||
-rw-r--r-- | spec/lib/gitlab/health_checks/simple_check_shared.rb | 6 | ||||
-rw-r--r-- | spec/lib/gitlab/metrics/requests_rack_middleware_spec.rb (renamed from spec/lib/gitlab/metrics/connection_rack_middleware_spec.rb) | 35 |
11 files changed, 89 insertions, 109 deletions
diff --git a/app/controllers/sessions_controller.rb b/app/controllers/sessions_controller.rb index 8118a32aeef..11826e07163 100644 --- a/app/controllers/sessions_controller.rb +++ b/app/controllers/sessions_controller.rb @@ -56,7 +56,7 @@ class SessionsController < Devise::SessionsController private def login_counter - @login_counter ||= Gitlab::Metrics.counter(:user_session_logins, 'User sign in count') + @login_counter ||= Gitlab::Metrics.counter(:user_session_logins_total, 'User sign in count') end # Handle an "initial setup" state, where there's only one user, it's an admin, diff --git a/config/initializers/8_metrics.rb b/config/initializers/8_metrics.rb index 5ab15f45ed2..2aeb94d47cd 100644 --- a/config/initializers/8_metrics.rb +++ b/config/initializers/8_metrics.rb @@ -126,7 +126,7 @@ Gitlab::Metrics::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_ Gitlab::Application.configure do |config| # 0 should be Sentry to catch errors in this middleware - config.middleware.insert(1, Gitlab::Metrics::ConnectionRackMiddleware) + config.middleware.insert(1, Gitlab::Metrics::RequestsRackMiddleware) end if Gitlab::Metrics.enabled? diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index 7b5ee3dfe05..e3684263208 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -26,21 +26,24 @@ server, because the embedded server configuration is overwritten once every In this experimental phase, only a few metrics are available: -| Metric | Type | Description | -| ------ | ---- | ----------- | -| db_ping_timeout | Gauge | Whether or not the last database ping timed out | -| db_ping_success | Gauge | Whether or not the last database ping succeeded | -| db_ping_latency | Gauge | Round trip time of the database ping | -| redis_ping_timeout | Gauge | Whether or not the last redis ping timed out | -| redis_ping_success | Gauge | Whether or not the last redis ping succeeded | -| redis_ping_latency | Gauge | Round trip time of the redis ping | -| filesystem_access_latency | gauge | Latency in accessing a specific filesystem | -| filesystem_accessible | gauge | Whether or not a specific filesystem is accessible | -| filesystem_write_latency | gauge | Write latency of a specific filesystem | -| filesystem_writable | gauge | Whether or not the filesystem is writable | -| filesystem_read_latency | gauge | Read latency of a specific filesystem | -| filesystem_readable | gauge | Whether or not the filesystem is readable | -| user_sessions_logins | Counter | Counter of how many users have logged in | +| Metric | Type | Description | +| --------------------------------- | --------- | ----------- | +| db_ping_timeout | Gauge | Whether or not the last database ping timed out | +| db_ping_success | Gauge | Whether or not the last database ping succeeded | +| db_ping_latency_seconds | Gauge | Round trip time of the database ping | +| filesystem_access_latency_seconds | Gauge | Latency in accessing a specific filesystem | +| filesystem_accessible | Gauge | Whether or not a specific filesystem is accessible | +| filesystem_write_latency_seconds | Gauge | Write latency of a specific filesystem | +| filesystem_writable | Gauge | Whether or not the filesystem is writable | +| filesystem_read_latency_seconds | Gauge | Read latency of a specific filesystem | +| filesystem_readable | Gauge | Whether or not the filesystem is readable | +| http_requests_total | Counter | Rack request count | +| http_request_duration_seconds | Histogram | HTTP response time from rack middleware | +| rack_uncaught_errors_total | Counter | Rack connections handling uncaught errors count | +| redis_ping_timeout | Gauge | Whether or not the last redis ping timed out | +| redis_ping_success | Gauge | Whether or not the last redis ping succeeded | +| redis_ping_latency_seconds | Gauge | Round trip time of the redis ping | +| user_session_logins_total | Counter | Counter of how many users have logged in | ## Metrics shared directory diff --git a/lib/gitlab/health_checks/fs_shards_check.rb b/lib/gitlab/health_checks/fs_shards_check.rb index 70da4080cae..bebde857b16 100644 --- a/lib/gitlab/health_checks/fs_shards_check.rb +++ b/lib/gitlab/health_checks/fs_shards_check.rb @@ -35,9 +35,9 @@ module Gitlab repository_storages.flat_map do |storage_name| tmp_file_path = tmp_file_path(storage_name) [ - operation_metrics(:filesystem_accessible, :filesystem_access_latency, -> { storage_stat_test(storage_name) }, shard: storage_name), - operation_metrics(:filesystem_writable, :filesystem_write_latency, -> { storage_write_test(tmp_file_path) }, shard: storage_name), - operation_metrics(:filesystem_readable, :filesystem_read_latency, -> { storage_read_test(tmp_file_path) }, shard: storage_name) + operation_metrics(:filesystem_accessible, :filesystem_access_latency_seconds, -> { storage_stat_test(storage_name) }, shard: storage_name), + operation_metrics(:filesystem_writable, :filesystem_write_latency_seconds, -> { storage_write_test(tmp_file_path) }, shard: storage_name), + operation_metrics(:filesystem_readable, :filesystem_read_latency_seconds, -> { storage_read_test(tmp_file_path) }, shard: storage_name) ].flatten end end diff --git a/lib/gitlab/health_checks/simple_abstract_check.rb b/lib/gitlab/health_checks/simple_abstract_check.rb index fbe1645c1b1..3dcb28a193c 100644 --- a/lib/gitlab/health_checks/simple_abstract_check.rb +++ b/lib/gitlab/health_checks/simple_abstract_check.rb @@ -20,7 +20,7 @@ module Gitlab [ metric("#{metric_prefix}_timeout", result.is_a?(Timeout::Error) ? 1 : 0), metric("#{metric_prefix}_success", is_successful?(result) ? 1 : 0), - metric("#{metric_prefix}_latency", elapsed) + metric("#{metric_prefix}_latency_seconds", elapsed) ] end end diff --git a/lib/gitlab/metrics/connection_rack_middleware.rb b/lib/gitlab/metrics/connection_rack_middleware.rb deleted file mode 100644 index b3da360be8f..00000000000 --- a/lib/gitlab/metrics/connection_rack_middleware.rb +++ /dev/null @@ -1,45 +0,0 @@ -module Gitlab - module Metrics - class ConnectionRackMiddleware - def initialize(app) - @app = app - end - - def self.rack_request_count - @rack_request_count ||= Gitlab::Metrics.counter(:rack_request, 'Rack request count') - end - - def self.rack_response_count - @rack_response_count ||= Gitlab::Metrics.counter(:rack_response, 'Rack response count') - end - - def self.rack_uncaught_errors_count - @rack_uncaught_errors_count ||= Gitlab::Metrics.counter(:rack_uncaught_errors, 'Rack connections handling uncaught errors count') - end - - def self.rack_execution_time - @rack_execution_time ||= Gitlab::Metrics.histogram(:rack_execution_time, 'Rack connection handling execution time', - {}, [0.05, 0.1, 0.25, 0.5, 0.7, 1, 1.5, 2, 2.5, 3, 5, 7, 10]) - end - - def call(env) - method = env['REQUEST_METHOD'].downcase - started = Time.now.to_f - begin - ConnectionRackMiddleware.rack_request_count.increment(method: method) - - status, headers, body = @app.call(env) - - ConnectionRackMiddleware.rack_response_count.increment(method: method, status: status) - [status, headers, body] - rescue - ConnectionRackMiddleware.rack_uncaught_errors_count.increment - raise - ensure - elapsed = Time.now.to_f - started - ConnectionRackMiddleware.rack_execution_time.observe({}, elapsed) - end - end - end - end -end diff --git a/lib/gitlab/metrics/requests_rack_middleware.rb b/lib/gitlab/metrics/requests_rack_middleware.rb new file mode 100644 index 00000000000..0902336e00f --- /dev/null +++ b/lib/gitlab/metrics/requests_rack_middleware.rb @@ -0,0 +1,39 @@ +module Gitlab + module Metrics + class RequestsRackMiddleware + def initialize(app) + @app = app + end + + def self.http_request_total + @http_request_total ||= Gitlab::Metrics.counter(:http_requests_total, 'Request count') + end + + def self.rack_uncaught_errors_count + @rack_uncaught_errors_count ||= Gitlab::Metrics.counter(:rack_uncaught_errors_total, 'Request handling uncaught errors count') + end + + def self.http_request_duration_seconds + @http_request_duration_seconds ||= Gitlab::Metrics.histogram(:http_request_duration_seconds, 'Request handling execution time', + {}, [0.05, 0.1, 0.25, 0.5, 0.7, 1, 2.5, 5, 10, 25]) + end + + def call(env) + method = env['REQUEST_METHOD'].downcase + started = Time.now.to_f + begin + RequestsRackMiddleware.http_request_total.increment(method: method) + + status, headers, body = @app.call(env) + + elapsed = Time.now.to_f - started + RequestsRackMiddleware.http_request_duration_seconds.observe({ method: method, status: status }, elapsed) + [status, headers, body] + rescue + RequestsRackMiddleware.rack_uncaught_errors_count.increment + raise + end + end + end + end +end diff --git a/spec/controllers/metrics_controller_spec.rb b/spec/controllers/metrics_controller_spec.rb index 265efb84cf2..d604cd02df8 100644 --- a/spec/controllers/metrics_controller_spec.rb +++ b/spec/controllers/metrics_controller_spec.rb @@ -24,7 +24,7 @@ describe MetricsController do expect(response.body).to match(/^db_ping_timeout 0$/) expect(response.body).to match(/^db_ping_success 1$/) - expect(response.body).to match(/^db_ping_latency [0-9\.]+$/) + expect(response.body).to match(/^db_ping_latency_seconds [0-9\.]+$/) end it 'returns Redis ping metrics' do @@ -32,17 +32,17 @@ describe MetricsController do expect(response.body).to match(/^redis_ping_timeout 0$/) expect(response.body).to match(/^redis_ping_success 1$/) - expect(response.body).to match(/^redis_ping_latency [0-9\.]+$/) + expect(response.body).to match(/^redis_ping_latency_seconds [0-9\.]+$/) end it 'returns file system check metrics' do get :index - expect(response.body).to match(/^filesystem_access_latency{shard="default"} [0-9\.]+$/) + expect(response.body).to match(/^filesystem_access_latency_seconds{shard="default"} [0-9\.]+$/) expect(response.body).to match(/^filesystem_accessible{shard="default"} 1$/) - expect(response.body).to match(/^filesystem_write_latency{shard="default"} [0-9\.]+$/) + expect(response.body).to match(/^filesystem_write_latency_seconds{shard="default"} [0-9\.]+$/) expect(response.body).to match(/^filesystem_writable{shard="default"} 1$/) - expect(response.body).to match(/^filesystem_read_latency{shard="default"} [0-9\.]+$/) + expect(response.body).to match(/^filesystem_read_latency_seconds{shard="default"} [0-9\.]+$/) expect(response.body).to match(/^filesystem_readable{shard="default"} 1$/) end diff --git a/spec/lib/gitlab/health_checks/fs_shards_check_spec.rb b/spec/lib/gitlab/health_checks/fs_shards_check_spec.rb index b333e162909..3de73a9ff65 100644 --- a/spec/lib/gitlab/health_checks/fs_shards_check_spec.rb +++ b/spec/lib/gitlab/health_checks/fs_shards_check_spec.rb @@ -109,9 +109,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0)) end end @@ -127,9 +127,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 1)) expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 1)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0)) end end end @@ -159,9 +159,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0)) end end end diff --git a/spec/lib/gitlab/health_checks/simple_check_shared.rb b/spec/lib/gitlab/health_checks/simple_check_shared.rb index 3f871d66034..67b19f2cefa 100644 --- a/spec/lib/gitlab/health_checks/simple_check_shared.rb +++ b/spec/lib/gitlab/health_checks/simple_check_shared.rb @@ -8,7 +8,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result| it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 1)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 0)) } - it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be >= 0)) } + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency_seconds", value: be >= 0)) } end context 'Check is misbehaving' do @@ -18,7 +18,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result| it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 0)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 0)) } - it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be >= 0)) } + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency_seconds", value: be >= 0)) } end context 'Check is timeouting' do @@ -28,7 +28,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result| it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 0)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 1)) } - it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be >= 0)) } + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency_seconds", value: be >= 0)) } end end diff --git a/spec/lib/gitlab/metrics/connection_rack_middleware_spec.rb b/spec/lib/gitlab/metrics/requests_rack_middleware_spec.rb index 94251af305f..461b1e4182a 100644 --- a/spec/lib/gitlab/metrics/connection_rack_middleware_spec.rb +++ b/spec/lib/gitlab/metrics/requests_rack_middleware_spec.rb @@ -1,6 +1,6 @@ require 'spec_helper' -describe Gitlab::Metrics::ConnectionRackMiddleware do +describe Gitlab::Metrics::RequestsRackMiddleware do let(:app) { double('app') } subject { described_class.new(app) } @@ -22,14 +22,8 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do allow(app).to receive(:call).and_return([200, nil, nil]) end - it 'increments response count with status label' do - expect(described_class).to receive_message_chain(:rack_response_count, :increment).with(include(status: 200, method: 'get')) - - subject.call(env) - end - it 'increments requests count' do - expect(described_class).to receive_message_chain(:rack_request_count, :increment).with(method: 'get') + expect(described_class).to receive_message_chain(:http_request_total, :increment).with(method: 'get') subject.call(env) end @@ -38,20 +32,21 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do execution_time = 10 allow(app).to receive(:call) do |*args| Timecop.freeze(execution_time.seconds) + [200, nil, nil] end - expect(described_class).to receive_message_chain(:rack_execution_time, :observe).with({}, execution_time) + expect(described_class).to receive_message_chain(:http_request_duration_seconds, :observe).with({ status: 200, method: 'get' }, execution_time) subject.call(env) end end context '@app.call throws exception' do - let(:rack_response_count) { double('rack_response_count') } + let(:http_request_duration_seconds) { double('http_request_duration_seconds') } before do allow(app).to receive(:call).and_raise(StandardError) - allow(described_class).to receive(:rack_response_count).and_return(rack_response_count) + allow(described_class).to receive(:http_request_duration_seconds).and_return(http_request_duration_seconds) end it 'increments exceptions count' do @@ -61,25 +56,13 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do end it 'increments requests count' do - expect(described_class).to receive_message_chain(:rack_request_count, :increment).with(method: 'get') - - expect { subject.call(env) }.to raise_error(StandardError) - end - - it "does't increment response count" do - expect(described_class.rack_response_count).not_to receive(:increment) + expect(described_class).to receive_message_chain(:http_request_total, :increment).with(method: 'get') expect { subject.call(env) }.to raise_error(StandardError) end - it 'measures execution time' do - execution_time = 10 - allow(app).to receive(:call) do |*args| - Timecop.freeze(execution_time.seconds) - raise StandardError - end - - expect(described_class).to receive_message_chain(:rack_execution_time, :observe).with({}, execution_time) + it "does't measure request execution time" do + expect(described_class.http_request_duration_seconds).not_to receive(:increment) expect { subject.call(env) }.to raise_error(StandardError) end |