diff options
19 files changed, 622 insertions, 373 deletions
diff --git a/changelogs/unreleased/41771-reduce-cardinality-of-metrics.yml b/changelogs/unreleased/41771-reduce-cardinality-of-metrics.yml new file mode 100644 index 00000000000..f64fd66ef79 --- /dev/null +++ b/changelogs/unreleased/41771-reduce-cardinality-of-metrics.yml @@ -0,0 +1,5 @@ +--- +title: Reduce the number of Prometheus metrics +merge_request: 16443 +author: +type: performance diff --git a/lib/gitlab/gitaly_client.rb b/lib/gitlab/gitaly_client.rb index 6bd256f57c7..c5d3e944f7d 100644 --- a/lib/gitlab/gitaly_client.rb +++ b/lib/gitlab/gitaly_client.rb @@ -6,6 +6,7 @@ require 'grpc/health/v1/health_services_pb' module Gitlab module GitalyClient + include Gitlab::Metrics::Methods module MigrationStatus DISABLED = 1 OPT_IN = 2 @@ -33,8 +34,6 @@ module Gitlab CLIENT_NAME = (Sidekiq.server? ? 'gitlab-sidekiq' : 'gitlab-web').freeze MUTEX = Mutex.new - METRICS_MUTEX = Mutex.new - private_constant :MUTEX, :METRICS_MUTEX class << self attr_accessor :query_time @@ -42,28 +41,14 @@ module Gitlab self.query_time = 0 - def self.migrate_histogram - @migrate_histogram ||= - METRICS_MUTEX.synchronize do - # If a thread was blocked on the mutex, the value was set already - return @migrate_histogram if @migrate_histogram - - Gitlab::Metrics.histogram(:gitaly_migrate_call_duration_seconds, - "Gitaly migration call execution timings", - gitaly_enabled: nil, feature: nil) - end + define_histogram :gitaly_migrate_call_duration_seconds do + docstring "Gitaly migration call execution timings" + base_labels gitaly_enabled: nil, feature: nil end - def self.gitaly_call_histogram - @gitaly_call_histogram ||= - METRICS_MUTEX.synchronize do - # If a thread was blocked on the mutex, the value was set already - return @gitaly_call_histogram if @gitaly_call_histogram - - Gitlab::Metrics.histogram(:gitaly_controller_action_duration_seconds, - "Gitaly endpoint histogram by controller and action combination", - Gitlab::Metrics::Transaction::BASE_LABELS.merge(gitaly_service: nil, rpc: nil)) - end + define_histogram :gitaly_controller_action_duration_seconds do + docstring "Gitaly endpoint histogram by controller and action combination" + base_labels Gitlab::Metrics::Transaction::BASE_LABELS.merge(gitaly_service: nil, rpc: nil) end def self.stub(name, storage) @@ -145,7 +130,7 @@ module Gitlab # Keep track, seperately, for the performance bar self.query_time += duration - gitaly_call_histogram.observe( + gitaly_controller_action_duration_seconds.observe( current_transaction_labels.merge(gitaly_service: service.to_s, rpc: rpc.to_s), duration) end @@ -247,7 +232,7 @@ module Gitlab yield is_enabled ensure total_time = Gitlab::Metrics::System.monotonic_time - start - migrate_histogram.observe({ gitaly_enabled: is_enabled, feature: feature }, total_time) + gitaly_migrate_call_duration_seconds.observe({ gitaly_enabled: is_enabled, feature: feature }, total_time) feature_stack.shift Thread.current[:gitaly_feature_stack] = nil if feature_stack.empty? end diff --git a/lib/gitlab/metrics.rb b/lib/gitlab/metrics.rb index 4779755bb22..7d63ca5627d 100644 --- a/lib/gitlab/metrics.rb +++ b/lib/gitlab/metrics.rb @@ -1,7 +1,7 @@ module Gitlab module Metrics - extend Gitlab::Metrics::InfluxDb - extend Gitlab::Metrics::Prometheus + include Gitlab::Metrics::InfluxDb + include Gitlab::Metrics::Prometheus def self.enabled? influx_metrics_enabled? || prometheus_metrics_enabled? diff --git a/lib/gitlab/metrics/influx_db.rb b/lib/gitlab/metrics/influx_db.rb index ef44a13df51..66f30e3b397 100644 --- a/lib/gitlab/metrics/influx_db.rb +++ b/lib/gitlab/metrics/influx_db.rb @@ -1,179 +1,187 @@ module Gitlab module Metrics module InfluxDb - include Gitlab::CurrentSettings - extend self + extend ActiveSupport::Concern + include Gitlab::Metrics::Methods + + EXECUTION_MEASUREMENT_BUCKETS = [0.001, 0.01, 0.1, 1].freeze MUTEX = Mutex.new private_constant :MUTEX - def influx_metrics_enabled? - settings[:enabled] || false - end + class_methods do + def influx_metrics_enabled? + settings[:enabled] || false + end - # Prometheus histogram buckets used for arbitrary code measurements - EXECUTION_MEASUREMENT_BUCKETS = [0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1].freeze - RAILS_ROOT = Rails.root.to_s - METRICS_ROOT = Rails.root.join('lib', 'gitlab', 'metrics').to_s - PATH_REGEX = /^#{RAILS_ROOT}\/?/ - - def settings - @settings ||= { - enabled: current_application_settings[:metrics_enabled], - pool_size: current_application_settings[:metrics_pool_size], - timeout: current_application_settings[:metrics_timeout], - method_call_threshold: current_application_settings[:metrics_method_call_threshold], - host: current_application_settings[:metrics_host], - port: current_application_settings[:metrics_port], - sample_interval: current_application_settings[:metrics_sample_interval] || 15, - packet_size: current_application_settings[:metrics_packet_size] || 1 - } - end + # Prometheus histogram buckets used for arbitrary code measurements + + def settings + @settings ||= begin + current_settings = Gitlab::CurrentSettings.current_application_settings + + { + enabled: current_settings[:metrics_enabled], + pool_size: current_settings[:metrics_pool_size], + timeout: current_settings[:metrics_timeout], + method_call_threshold: current_settings[:metrics_method_call_threshold], + host: current_settings[:metrics_host], + port: current_settings[:metrics_port], + sample_interval: current_settings[:metrics_sample_interval] || 15, + packet_size: current_settings[:metrics_packet_size] || 1 + } + end + end - def mri? - RUBY_ENGINE == 'ruby' - end + def mri? + RUBY_ENGINE == 'ruby' + end - def method_call_threshold - # This is memoized since this method is called for every instrumented - # method. Loading data from an external cache on every method call slows - # things down too much. - # in milliseconds - @method_call_threshold ||= settings[:method_call_threshold] - end + def method_call_threshold + # This is memoized since this method is called for every instrumented + # method. Loading data from an external cache on every method call slows + # things down too much. + # in milliseconds + @method_call_threshold ||= settings[:method_call_threshold] + end - def submit_metrics(metrics) - prepared = prepare_metrics(metrics) + def submit_metrics(metrics) + prepared = prepare_metrics(metrics) - pool&.with do |connection| - prepared.each_slice(settings[:packet_size]) do |slice| - begin - connection.write_points(slice) - rescue StandardError + pool&.with do |connection| + prepared.each_slice(settings[:packet_size]) do |slice| + begin + connection.write_points(slice) + rescue StandardError + end end end + rescue Errno::EADDRNOTAVAIL, SocketError => ex + Gitlab::EnvironmentLogger.error('Cannot resolve InfluxDB address. GitLab Performance Monitoring will not work.') + Gitlab::EnvironmentLogger.error(ex) end - rescue Errno::EADDRNOTAVAIL, SocketError => ex - Gitlab::EnvironmentLogger.error('Cannot resolve InfluxDB address. GitLab Performance Monitoring will not work.') - Gitlab::EnvironmentLogger.error(ex) - end - def prepare_metrics(metrics) - metrics.map do |hash| - new_hash = hash.symbolize_keys + def prepare_metrics(metrics) + metrics.map do |hash| + new_hash = hash.symbolize_keys - new_hash[:tags].each do |key, value| - if value.blank? - new_hash[:tags].delete(key) - else - new_hash[:tags][key] = escape_value(value) + new_hash[:tags].each do |key, value| + if value.blank? + new_hash[:tags].delete(key) + else + new_hash[:tags][key] = escape_value(value) + end end + + new_hash end + end - new_hash + def escape_value(value) + value.to_s.gsub('=', '\\=') end - end - def escape_value(value) - value.to_s.gsub('=', '\\=') - end + # Measures the execution time of a block. + # + # Example: + # + # Gitlab::Metrics.measure(:find_by_username_duration) do + # User.find_by_username(some_username) + # end + # + # name - The name of the field to store the execution time in. + # + # Returns the value yielded by the supplied block. + def measure(name) + trans = current_transaction + + return yield unless trans + + real_start = Time.now.to_f + cpu_start = System.cpu_time + + retval = yield + + cpu_stop = System.cpu_time + real_stop = Time.now.to_f + + real_time = (real_stop - real_start) + cpu_time = cpu_stop - cpu_start + + real_duration_seconds = fetch_histogram("gitlab_#{name}_real_duration_seconds".to_sym) do + docstring "Measure #{name}" + base_labels Transaction::BASE_LABELS + buckets EXECUTION_MEASUREMENT_BUCKETS + end - # Measures the execution time of a block. - # - # Example: - # - # Gitlab::Metrics.measure(:find_by_username_duration) do - # User.find_by_username(some_username) - # end - # - # name - The name of the field to store the execution time in. - # - # Returns the value yielded by the supplied block. - def measure(name) - trans = current_transaction - - return yield unless trans - - real_start = Time.now.to_f - cpu_start = System.cpu_time - - retval = yield - - cpu_stop = System.cpu_time - real_stop = Time.now.to_f - - real_time = (real_stop - real_start) - cpu_time = cpu_stop - cpu_start - - Gitlab::Metrics.histogram("gitlab_#{name}_real_duration_seconds".to_sym, - "Measure #{name}", - Transaction::BASE_LABELS, - EXECUTION_MEASUREMENT_BUCKETS) - .observe(trans.labels, real_time) - - Gitlab::Metrics.histogram("gitlab_#{name}_cpu_duration_seconds".to_sym, - "Measure #{name}", - Transaction::BASE_LABELS, - EXECUTION_MEASUREMENT_BUCKETS) - .observe(trans.labels, cpu_time / 1000.0) - - # InfluxDB stores the _real_time time values as milliseconds - trans.increment("#{name}_real_time", real_time * 1000, false) - trans.increment("#{name}_cpu_time", cpu_time, false) - trans.increment("#{name}_call_count", 1, false) - - retval - end + real_duration_seconds.observe(trans.labels, real_time) - # Sets the action of the current transaction (if any) - # - # action - The name of the action. - def action=(action) - trans = current_transaction + cpu_duration_seconds = fetch_histogram("gitlab_#{name}_cpu_duration_seconds".to_sym) do + docstring "Measure #{name}" + base_labels Transaction::BASE_LABELS + buckets EXECUTION_MEASUREMENT_BUCKETS + with_feature "prometheus_metrics_measure_#{name}_cpu_duration" + end + cpu_duration_seconds.observe(trans.labels, cpu_time) - trans&.action = action - end + # InfluxDB stores the _real_time and _cpu_time time values as milliseconds + trans.increment("#{name}_real_time", real_time.in_milliseconds, false) + trans.increment("#{name}_cpu_time", cpu_time.in_milliseconds, false) + trans.increment("#{name}_call_count", 1, false) - # Tracks an event. - # - # See `Gitlab::Metrics::Transaction#add_event` for more details. - def add_event(*args) - trans = current_transaction + retval + end - trans&.add_event(*args) - end + # Sets the action of the current transaction (if any) + # + # action - The name of the action. + def action=(action) + trans = current_transaction - # Returns the prefix to use for the name of a series. - def series_prefix - @series_prefix ||= Sidekiq.server? ? 'sidekiq_' : 'rails_' - end + trans&.action = action + end - # Allow access from other metrics related middlewares - def current_transaction - Transaction.current - end + # Tracks an event. + # + # See `Gitlab::Metrics::Transaction#add_event` for more details. + def add_event(*args) + trans = current_transaction - # When enabled this should be set before being used as the usual pattern - # "@foo ||= bar" is _not_ thread-safe. - # rubocop:disable Gitlab/ModuleWithInstanceVariables - def pool - if influx_metrics_enabled? - if @pool.nil? - MUTEX.synchronize do - @pool ||= ConnectionPool.new(size: settings[:pool_size], timeout: settings[:timeout]) do - host = settings[:host] - port = settings[:port] - - InfluxDB::Client - .new(udp: { host: host, port: port }) + trans&.add_event(*args) + end + + # Returns the prefix to use for the name of a series. + def series_prefix + @series_prefix ||= Sidekiq.server? ? 'sidekiq_' : 'rails_' + end + + # Allow access from other metrics related middlewares + def current_transaction + Transaction.current + end + + # When enabled this should be set before being used as the usual pattern + # "@foo ||= bar" is _not_ thread-safe. + # rubocop:disable Gitlab/ModuleWithInstanceVariables + def pool + if influx_metrics_enabled? + if @pool.nil? + MUTEX.synchronize do + @pool ||= ConnectionPool.new(size: settings[:pool_size], timeout: settings[:timeout]) do + host = settings[:host] + port = settings[:port] + + InfluxDB::Client + .new(udp: { host: host, port: port }) + end end end - end - @pool + @pool + end end + # rubocop:enable Gitlab/ModuleWithInstanceVariables end - # rubocop:enable Gitlab/ModuleWithInstanceVariables end end end diff --git a/lib/gitlab/metrics/method_call.rb b/lib/gitlab/metrics/method_call.rb index c2f9db56824..b11520a79bb 100644 --- a/lib/gitlab/metrics/method_call.rb +++ b/lib/gitlab/metrics/method_call.rb @@ -4,26 +4,15 @@ module Gitlab module Metrics # Class for tracking timing information about method calls class MethodCall - @@measurement_enabled_cache = Concurrent::AtomicBoolean.new(false) - @@measurement_enabled_cache_expires_at = Concurrent::AtomicReference.new(Time.now.to_i) - MUTEX = Mutex.new + include Gitlab::Metrics::Methods BASE_LABELS = { module: nil, method: nil }.freeze attr_reader :real_time, :cpu_time, :call_count, :labels - def self.call_duration_histogram - return @call_duration_histogram if @call_duration_histogram - - MUTEX.synchronize do - @call_duration_histogram ||= Gitlab::Metrics.histogram( - :gitlab_method_call_duration_seconds, - 'Method calls real duration', - Transaction::BASE_LABELS.merge(BASE_LABELS), - [0.01, 0.05, 0.1, 0.5, 1]) - end - end - - def self.measurement_enabled_cache_expires_at - @@measurement_enabled_cache_expires_at + define_histogram :gitlab_method_call_duration_seconds do + docstring 'Method calls real duration' + base_labels Transaction::BASE_LABELS.merge(BASE_LABELS) + buckets [0.01, 0.05, 0.1, 0.5, 1] + with_feature :prometheus_metrics_method_instrumentation end # name - The full name of the method (including namespace) such as @@ -53,8 +42,8 @@ module Gitlab @cpu_time += cpu_time @call_count += 1 - if call_measurement_enabled? && above_threshold? - self.class.call_duration_histogram.observe(@transaction.labels.merge(labels), real_time) + if above_threshold? + self.class.gitlab_method_call_duration_seconds.observe(@transaction.labels.merge(labels), real_time) end retval @@ -78,17 +67,6 @@ module Gitlab def above_threshold? real_time.in_milliseconds >= Metrics.method_call_threshold end - - def call_measurement_enabled? - expires_at = @@measurement_enabled_cache_expires_at.value - if expires_at < Time.now.to_i - if @@measurement_enabled_cache_expires_at.compare_and_set(expires_at, 1.minute.from_now.to_i) - @@measurement_enabled_cache.value = Feature.get(:prometheus_metrics_method_instrumentation).enabled? - end - end - - @@measurement_enabled_cache.value - end end end end diff --git a/lib/gitlab/metrics/methods.rb b/lib/gitlab/metrics/methods.rb new file mode 100644 index 00000000000..cd7c1e507f7 --- /dev/null +++ b/lib/gitlab/metrics/methods.rb @@ -0,0 +1,129 @@ +# rubocop:disable Style/ClassVars + +module Gitlab + module Metrics + module Methods + extend ActiveSupport::Concern + + included do + @@_metric_provider_mutex ||= Mutex.new + @@_metrics_provider_cache = {} + end + + class_methods do + def reload_metric!(name) + @@_metrics_provider_cache.delete(name) + end + + private + + def define_metric(type, name, opts = {}, &block) + if respond_to?(name) + raise ArgumentError, "method #{name} already exists" + end + + define_singleton_method(name) do + # inlining fetch_metric method to avoid method call overhead when instrumenting hot spots + @@_metrics_provider_cache[name] || init_metric(type, name, opts, &block) + end + end + + def fetch_metric(type, name, opts = {}, &block) + @@_metrics_provider_cache[name] || init_metric(type, name, opts, &block) + end + + def init_metric(type, name, opts = {}, &block) + options = MetricOptions.new(opts) + options.evaluate(&block) + + if disabled_by_feature(options) + synchronized_cache_fill(name) { NullMetric.instance } + else + synchronized_cache_fill(name) { build_metric!(type, name, options) } + end + end + + def synchronized_cache_fill(key) + @@_metric_provider_mutex.synchronize do + @@_metrics_provider_cache[key] ||= yield + end + end + + def disabled_by_feature(options) + options.with_feature && !Feature.get(options.with_feature).enabled? + end + + def build_metric!(type, name, options) + case type + when :gauge + Gitlab::Metrics.gauge(name, options.docstring, options.base_labels, options.multiprocess_mode) + when :counter + Gitlab::Metrics.counter(name, options.docstring, options.base_labels) + when :histogram + Gitlab::Metrics.histogram(name, options.docstring, options.base_labels, options.buckets) + when :summary + raise NotImplementedError, "summary metrics are not currently supported" + else + raise ArgumentError, "uknown metric type #{type}" + end + end + + # Fetch and/or initialize counter metric + # @param [Symbol] name + # @param [Hash] opts + def fetch_counter(name, opts = {}, &block) + fetch_metric(:counter, name, opts, &block) + end + + # Fetch and/or initialize gauge metric + # @param [Symbol] name + # @param [Hash] opts + def fetch_gauge(name, opts = {}, &block) + fetch_metric(:gauge, name, opts, &block) + end + + # Fetch and/or initialize histogram metric + # @param [Symbol] name + # @param [Hash] opts + def fetch_histogram(name, opts = {}, &block) + fetch_metric(:histogram, name, opts, &block) + end + + # Fetch and/or initialize summary metric + # @param [Symbol] name + # @param [Hash] opts + def fetch_summary(name, opts = {}, &block) + fetch_metric(:summary, name, opts, &block) + end + + # Define metric accessor method for a Counter + # @param [Symbol] name + # @param [Hash] opts + def define_counter(name, opts = {}, &block) + define_metric(:counter, name, opts, &block) + end + + # Define metric accessor method for a Gauge + # @param [Symbol] name + # @param [Hash] opts + def define_gauge(name, opts = {}, &block) + define_metric(:gauge, name, opts, &block) + end + + # Define metric accessor method for a Histogram + # @param [Symbol] name + # @param [Hash] opts + def define_histogram(name, opts = {}, &block) + define_metric(:histogram, name, opts, &block) + end + + # Define metric accessor method for a Summary + # @param [Symbol] name + # @param [Hash] opts + def define_summary(name, opts = {}, &block) + define_metric(:summary, name, opts, &block) + end + end + end + end +end diff --git a/lib/gitlab/metrics/methods/metric_options.rb b/lib/gitlab/metrics/methods/metric_options.rb new file mode 100644 index 00000000000..70e122d4e15 --- /dev/null +++ b/lib/gitlab/metrics/methods/metric_options.rb @@ -0,0 +1,61 @@ +module Gitlab + module Metrics + module Methods + class MetricOptions + SMALL_NETWORK_BUCKETS = [0.005, 0.01, 0.1, 1, 10].freeze + + def initialize(options = {}) + @multiprocess_mode = options[:multiprocess_mode] || :all + @buckets = options[:buckets] || SMALL_NETWORK_BUCKETS + @base_labels = options[:base_labels] || {} + @docstring = options[:docstring] + @with_feature = options[:with_feature] + end + + # Documentation describing metric in metrics endpoint '/-/metrics' + def docstring(docstring = nil) + @docstring = docstring unless docstring.nil? + + @docstring + end + + # Gauge aggregation mode for multiprocess metrics + # - :all (default) returns each gauge for every process + # - :livesum all process'es gauges summed up + # - :max maximum value of per process gauges + # - :min minimum value of per process gauges + def multiprocess_mode(mode = nil) + @multiprocess_mode = mode unless mode.nil? + + @multiprocess_mode + end + + # Measurement buckets for histograms + def buckets(buckets = nil) + @buckets = buckets unless buckets.nil? + + @buckets + end + + # Base labels are merged with per metric labels + def base_labels(base_labels = nil) + @base_labels = base_labels unless base_labels.nil? + + @base_labels + end + + # Use feature toggle to control whether certain metric is enabled/disabled + def with_feature(name = nil) + @with_feature = name unless name.nil? + + @with_feature + end + + def evaluate(&block) + instance_eval(&block) if block_given? + self + end + end + end + end +end diff --git a/lib/gitlab/metrics/null_metric.rb b/lib/gitlab/metrics/null_metric.rb index 3b5a2907195..aabada5c21a 100644 --- a/lib/gitlab/metrics/null_metric.rb +++ b/lib/gitlab/metrics/null_metric.rb @@ -2,6 +2,8 @@ module Gitlab module Metrics # Mocks ::Prometheus::Client::Metric and all derived metrics class NullMetric + include Singleton + def method_missing(name, *args, &block) nil end diff --git a/lib/gitlab/metrics/prometheus.rb b/lib/gitlab/metrics/prometheus.rb index b0b8e8436db..f07ea3560ff 100644 --- a/lib/gitlab/metrics/prometheus.rb +++ b/lib/gitlab/metrics/prometheus.rb @@ -3,73 +3,77 @@ require 'prometheus/client' module Gitlab module Metrics module Prometheus - include Gitlab::CurrentSettings - include Gitlab::Utils::StrongMemoize + extend ActiveSupport::Concern REGISTRY_MUTEX = Mutex.new PROVIDER_MUTEX = Mutex.new - def metrics_folder_present? - multiprocess_files_dir = ::Prometheus::Client.configuration.multiprocess_files_dir + class_methods do + include Gitlab::Utils::StrongMemoize - multiprocess_files_dir && - ::Dir.exist?(multiprocess_files_dir) && - ::File.writable?(multiprocess_files_dir) - end + def metrics_folder_present? + multiprocess_files_dir = ::Prometheus::Client.configuration.multiprocess_files_dir - def prometheus_metrics_enabled? - strong_memoize(:prometheus_metrics_enabled) do - prometheus_metrics_enabled_unmemoized + multiprocess_files_dir && + ::Dir.exist?(multiprocess_files_dir) && + ::File.writable?(multiprocess_files_dir) + end + + def prometheus_metrics_enabled? + strong_memoize(:prometheus_metrics_enabled) do + prometheus_metrics_enabled_unmemoized + end end - end - def registry - strong_memoize(:registry) do - REGISTRY_MUTEX.synchronize do - strong_memoize(:registry) do - ::Prometheus::Client.registry + def registry + strong_memoize(:registry) do + REGISTRY_MUTEX.synchronize do + strong_memoize(:registry) do + ::Prometheus::Client.registry + end end end end - end - def counter(name, docstring, base_labels = {}) - safe_provide_metric(:counter, name, docstring, base_labels) - end + def counter(name, docstring, base_labels = {}) + safe_provide_metric(:counter, name, docstring, base_labels) + end - def summary(name, docstring, base_labels = {}) - safe_provide_metric(:summary, name, docstring, base_labels) - end + def summary(name, docstring, base_labels = {}) + safe_provide_metric(:summary, name, docstring, base_labels) + end - def gauge(name, docstring, base_labels = {}, multiprocess_mode = :all) - safe_provide_metric(:gauge, name, docstring, base_labels, multiprocess_mode) - end + def gauge(name, docstring, base_labels = {}, multiprocess_mode = :all) + safe_provide_metric(:gauge, name, docstring, base_labels, multiprocess_mode) + end - def histogram(name, docstring, base_labels = {}, buckets = ::Prometheus::Client::Histogram::DEFAULT_BUCKETS) - safe_provide_metric(:histogram, name, docstring, base_labels, buckets) - end + def histogram(name, docstring, base_labels = {}, buckets = ::Prometheus::Client::Histogram::DEFAULT_BUCKETS) + safe_provide_metric(:histogram, name, docstring, base_labels, buckets) + end - private + private - def safe_provide_metric(method, name, *args) - metric = provide_metric(name) - return metric if metric + def safe_provide_metric(method, name, *args) + metric = provide_metric(name) + return metric if metric - PROVIDER_MUTEX.synchronize do - provide_metric(name) || registry.method(method).call(name, *args) + PROVIDER_MUTEX.synchronize do + provide_metric(name) || registry.method(method).call(name, *args) + end end - end - def provide_metric(name) - if prometheus_metrics_enabled? - registry.get(name) - else - NullMetric.new + def provide_metric(name) + if prometheus_metrics_enabled? + registry.get(name) + else + NullMetric.instance + end end - end - def prometheus_metrics_enabled_unmemoized - metrics_folder_present? && current_application_settings[:prometheus_metrics_enabled] || false + def prometheus_metrics_enabled_unmemoized + metrics_folder_present? && + Gitlab::CurrentSettings.current_application_settings[:prometheus_metrics_enabled] || false + end end end end diff --git a/lib/gitlab/metrics/subscribers/action_view.rb b/lib/gitlab/metrics/subscribers/action_view.rb index 3da474fc1ec..274436ca2b4 100644 --- a/lib/gitlab/metrics/subscribers/action_view.rb +++ b/lib/gitlab/metrics/subscribers/action_view.rb @@ -3,6 +3,14 @@ module Gitlab module Subscribers # Class for tracking the rendering timings of views. class ActionView < ActiveSupport::Subscriber + include Gitlab::Metrics::Methods + define_histogram :gitlab_view_rendering_duration_seconds do + docstring 'View rendering time' + base_labels Transaction::BASE_LABELS.merge({ path: nil }) + buckets [0.001, 0.01, 0.1, 1, 10.0] + with_feature :prometheus_metrics_view_instrumentation + end + attach_to :action_view SERIES = 'views'.freeze @@ -15,23 +23,11 @@ module Gitlab private - def metric_view_rendering_duration_seconds - @metric_view_rendering_duration_seconds ||= Gitlab::Metrics.histogram( - :gitlab_view_rendering_duration_seconds, - 'View rendering time', - Transaction::BASE_LABELS.merge({ path: nil }), - [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.500, 2.0, 10.0] - ) - end - def track(event) values = values_for(event) tags = tags_for(event) - metric_view_rendering_duration_seconds.observe( - current_transaction.labels.merge(tags), - event.duration - ) + self.class.gitlab_view_rendering_duration_seconds.observe(current_transaction.labels.merge(tags), event.duration) current_transaction.increment(:view_duration, event.duration) current_transaction.add_metric(SERIES, values, tags) diff --git a/lib/gitlab/metrics/subscribers/active_record.rb b/lib/gitlab/metrics/subscribers/active_record.rb index ead1acb8d44..4b3e8d0a6a0 100644 --- a/lib/gitlab/metrics/subscribers/active_record.rb +++ b/lib/gitlab/metrics/subscribers/active_record.rb @@ -3,12 +3,13 @@ module Gitlab module Subscribers # Class for tracking the total query duration of a transaction. class ActiveRecord < ActiveSupport::Subscriber + include Gitlab::Metrics::Methods attach_to :active_record def sql(event) return unless current_transaction - metric_sql_duration_seconds.observe(current_transaction.labels, event.duration / 1000.0) + self.class.gitlab_sql_duration_seconds.observe(current_transaction.labels, event.duration / 1000.0) current_transaction.increment(:sql_duration, event.duration, false) current_transaction.increment(:sql_count, 1, false) @@ -16,17 +17,14 @@ module Gitlab private - def current_transaction - Transaction.current + define_histogram :gitlab_sql_duration_seconds do + docstring 'SQL time' + base_labels Transaction::BASE_LABELS + buckets [0.001, 0.01, 0.1, 1.0, 10.0] end - def metric_sql_duration_seconds - @metric_sql_duration_seconds ||= Gitlab::Metrics.histogram( - :gitlab_sql_duration_seconds, - 'SQL time', - Transaction::BASE_LABELS, - [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.500, 2.0, 10.0] - ) + def current_transaction + Transaction.current end end end diff --git a/lib/gitlab/metrics/transaction.rb b/lib/gitlab/metrics/transaction.rb index e7975c023a9..45b9e14ba55 100644 --- a/lib/gitlab/metrics/transaction.rb +++ b/lib/gitlab/metrics/transaction.rb @@ -2,11 +2,12 @@ module Gitlab module Metrics # Class for storing metrics information of a single transaction. class Transaction + include Gitlab::Metrics::Methods + # base labels shared among all transactions BASE_LABELS = { controller: nil, action: nil }.freeze THREAD_KEY = :_gitlab_metrics_transaction - METRICS_MUTEX = Mutex.new # The series to store events (e.g. Git pushes) in. EVENT_SERIES = 'events'.freeze @@ -54,8 +55,8 @@ module Gitlab @memory_after = System.memory_usage @finished_at = System.monotonic_time - self.class.metric_transaction_duration_seconds.observe(labels, duration) - self.class.metric_transaction_allocated_memory_bytes.observe(labels, allocated_memory * 1024.0) + self.class.gitlab_transaction_duration_seconds.observe(labels, duration) + self.class.gitlab_transaction_allocated_memory_bytes.observe(labels, allocated_memory * 1024.0) Thread.current[THREAD_KEY] = nil end @@ -72,7 +73,7 @@ module Gitlab # event_name - The name of the event (e.g. "git_push"). # tags - A set of tags to attach to the event. def add_event(event_name, tags = {}) - self.class.metric_event_counter(event_name, tags).increment(tags.merge(labels)) + self.class.transaction_metric(event_name, :counter, prefix: 'event_', tags: tags).increment(tags.merge(labels)) @metrics << Metric.new(EVENT_SERIES, { count: 1 }, tags.merge(event: event_name), :event) end @@ -86,12 +87,12 @@ module Gitlab end def increment(name, value, use_prometheus = true) - self.class.metric_transaction_counter(name).increment(labels, value) if use_prometheus + self.class.transaction_metric(name, :counter).increment(labels, value) if use_prometheus @values[name] += value end def set(name, value, use_prometheus = true) - self.class.metric_transaction_gauge(name).set(labels, value) if use_prometheus + self.class.transaction_metric(name, :gauge).set(labels, value) if use_prometheus @values[name] = value end @@ -136,64 +137,28 @@ module Gitlab "#{labels[:controller]}##{labels[:action]}" if labels && !labels.empty? end - def self.metric_transaction_duration_seconds - return @metric_transaction_duration_seconds if @metric_transaction_duration_seconds - - METRICS_MUTEX.synchronize do - @metric_transaction_duration_seconds ||= Gitlab::Metrics.histogram( - :gitlab_transaction_duration_seconds, - 'Transaction duration', - BASE_LABELS, - [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.500, 2.0, 10.0] - ) - end - end - - def self.metric_transaction_allocated_memory_bytes - return @metric_transaction_allocated_memory_bytes if @metric_transaction_allocated_memory_bytes - - METRICS_MUTEX.synchronize do - @metric_transaction_allocated_memory_bytes ||= Gitlab::Metrics.histogram( - :gitlab_transaction_allocated_memory_bytes, - 'Transaction allocated memory bytes', - BASE_LABELS, - [1000, 10000, 20000, 500000, 1000000, 2000000, 5000000, 10000000, 20000000, 100000000] - ) - end + define_histogram :gitlab_transaction_duration_seconds do + docstring 'Transaction duration' + base_labels BASE_LABELS + buckets [0.001, 0.01, 0.1, 1.0, 10.0] end - def self.metric_event_counter(event_name, tags) - return @metric_event_counters[event_name] if @metric_event_counters&.has_key?(event_name) - - METRICS_MUTEX.synchronize do - @metric_event_counters ||= {} - @metric_event_counters[event_name] ||= Gitlab::Metrics.counter( - "gitlab_transaction_event_#{event_name}_total".to_sym, - "Transaction event #{event_name} counter", - tags.merge(BASE_LABELS) - ) - end - end - - def self.metric_transaction_counter(name) - return @metric_transaction_counters[name] if @metric_transaction_counters&.has_key?(name) - - METRICS_MUTEX.synchronize do - @metric_transaction_counters ||= {} - @metric_transaction_counters[name] ||= Gitlab::Metrics.counter( - "gitlab_transaction_#{name}_total".to_sym, "Transaction #{name} counter", BASE_LABELS - ) - end + define_histogram :gitlab_transaction_allocated_memory_bytes do + docstring 'Transaction allocated memory bytes' + base_labels BASE_LABELS + buckets [100, 1000, 10000, 100000, 1000000, 10000000] + with_feature :prometheus_metrics_transaction_allocated_memory end - def self.metric_transaction_gauge(name) - return @metric_transaction_gauges[name] if @metric_transaction_gauges&.has_key?(name) + def self.transaction_metric(name, type, prefix: nil, tags: {}) + metric_name = "gitlab_transaction_#{prefix}#{name}_total".to_sym + fetch_metric(type, metric_name) do + docstring "Transaction #{prefix}#{name} #{type}" + base_labels tags.merge(BASE_LABELS) - METRICS_MUTEX.synchronize do - @metric_transaction_gauges ||= {} - @metric_transaction_gauges[name] ||= Gitlab::Metrics.gauge( - "gitlab_transaction_#{name}".to_sym, "Transaction gauge #{name}", BASE_LABELS, :livesum - ) + if type == :gauge + multiprocess_mode :livesum + end end end end diff --git a/spec/lib/gitlab/metrics/method_call_spec.rb b/spec/lib/gitlab/metrics/method_call_spec.rb index 41a9d1d9c90..d9379cfe674 100644 --- a/spec/lib/gitlab/metrics/method_call_spec.rb +++ b/spec/lib/gitlab/metrics/method_call_spec.rb @@ -5,6 +5,10 @@ describe Gitlab::Metrics::MethodCall do let(:method_call) { described_class.new('Foo#bar', :Foo, '#bar', transaction) } describe '#measure' do + after do + described_class.reload_metric!(:gitlab_method_call_duration_seconds) + end + it 'measures the performance of the supplied block' do method_call.measure { 'foo' } @@ -20,8 +24,6 @@ describe Gitlab::Metrics::MethodCall do context 'prometheus instrumentation is enabled' do before do - allow(Feature.get(:prometheus_metrics_method_instrumentation)).to receive(:enabled?).and_call_original - described_class.measurement_enabled_cache_expires_at.value = Time.now.to_i - 1 Feature.get(:prometheus_metrics_method_instrumentation).enable end @@ -31,30 +33,12 @@ describe Gitlab::Metrics::MethodCall do end end - it 'caches subsequent invocations of feature check' do - 10.times do - method_call.measure { 'foo' } - end - - expect(Feature.get(:prometheus_metrics_method_instrumentation)).to have_received(:enabled?).once - end - - it 'expires feature check cache after 1 minute' do - method_call.measure { 'foo' } - - Timecop.travel(1.minute.from_now) do - method_call.measure { 'foo' } - end - - Timecop.travel(1.minute.from_now + 1.second) do - method_call.measure { 'foo' } - end - - expect(Feature.get(:prometheus_metrics_method_instrumentation)).to have_received(:enabled?).twice + it 'metric is not a NullMetric' do + expect(described_class).not_to be_instance_of(Gitlab::Metrics::NullMetric) end it 'observes the performance of the supplied block' do - expect(described_class.call_duration_histogram) + expect(described_class.gitlab_method_call_duration_seconds) .to receive(:observe) .with({ module: :Foo, method: '#bar' }, be_a_kind_of(Numeric)) @@ -64,14 +48,12 @@ describe Gitlab::Metrics::MethodCall do context 'prometheus instrumentation is disabled' do before do - described_class.measurement_enabled_cache_expires_at.value = Time.now.to_i - 1 - Feature.get(:prometheus_metrics_method_instrumentation).disable end - it 'does not observe the performance' do - expect(described_class.call_duration_histogram) - .not_to receive(:observe) + it 'observes using NullMetric' do + expect(described_class.gitlab_method_call_duration_seconds).to be_instance_of(Gitlab::Metrics::NullMetric) + expect(described_class.gitlab_method_call_duration_seconds).to receive(:observe) method_call.measure { 'foo' } end @@ -81,12 +63,10 @@ describe Gitlab::Metrics::MethodCall do context 'when measurement is below threshold' do before do allow(method_call).to receive(:above_threshold?).and_return(false) - - Feature.get(:prometheus_metrics_method_instrumentation).enable end it 'does not observe the performance' do - expect(described_class.call_duration_histogram) + expect(described_class.gitlab_method_call_duration_seconds) .not_to receive(:observe) method_call.measure { 'foo' } @@ -96,7 +76,7 @@ describe Gitlab::Metrics::MethodCall do describe '#to_metric' do it 'returns a Metric instance' do - expect(method_call).to receive(:real_time).and_return(4.0001) + expect(method_call).to receive(:real_time).and_return(4.0001).twice expect(method_call).to receive(:cpu_time).and_return(3.0001) method_call.measure { 'foo' } diff --git a/spec/lib/gitlab/metrics/methods_spec.rb b/spec/lib/gitlab/metrics/methods_spec.rb new file mode 100644 index 00000000000..9d41ed2442b --- /dev/null +++ b/spec/lib/gitlab/metrics/methods_spec.rb @@ -0,0 +1,137 @@ +require 'spec_helper' + +describe Gitlab::Metrics::Methods do + subject { Class.new { include Gitlab::Metrics::Methods } } + + shared_context 'metric' do |metric_type, *args| + let(:docstring) { 'description' } + let(:metric_name) { :sample_metric } + + describe "#define_#{metric_type}" do + define_method(:call_define_metric_method) do |**args| + subject.__send__("define_#{metric_type}", metric_name, **args) + end + + context 'metrics access method not defined' do + it "defines metrics accessing method" do + expect(subject).not_to respond_to(metric_name) + + call_define_metric_method(docstring: docstring) + + expect(subject).to respond_to(metric_name) + end + end + + context 'metrics access method defined' do + before do + call_define_metric_method(docstring: docstring) + end + + it 'raises error when trying to redefine method' do + expect { call_define_metric_method(docstring: docstring) }.to raise_error(ArgumentError) + end + + context 'metric is not cached' do + it 'calls fetch_metric' do + expect(subject).to receive(:init_metric).with(metric_type, metric_name, docstring: docstring) + + subject.public_send(metric_name) + end + end + + context 'metric is cached' do + before do + subject.public_send(metric_name) + end + + it 'returns cached metric' do + expect(subject).not_to receive(:init_metric) + + subject.public_send(metric_name) + end + end + end + end + + describe "#fetch_#{metric_type}" do + let(:null_metric) { Gitlab::Metrics::NullMetric.instance } + + define_method(:call_fetch_metric_method) do |**args| + subject.__send__("fetch_#{metric_type}", metric_name, **args) + end + + context "when #{metric_type} is not cached" do + it 'initializes counter metric' do + allow(Gitlab::Metrics).to receive(metric_type).and_return(null_metric) + + call_fetch_metric_method(docstring: docstring) + + expect(Gitlab::Metrics).to have_received(metric_type).with(metric_name, docstring, *args) + end + end + + context "when #{metric_type} is cached" do + before do + call_fetch_metric_method(docstring: docstring) + end + + it 'uses class metric cache' do + expect(Gitlab::Metrics).not_to receive(metric_type) + + call_fetch_metric_method(docstring: docstring) + end + + context 'when metric is reloaded' do + before do + subject.reload_metric!(metric_name) + end + + it "initializes #{metric_type} metric" do + allow(Gitlab::Metrics).to receive(metric_type).and_return(null_metric) + + call_fetch_metric_method(docstring: docstring) + + expect(Gitlab::Metrics).to have_received(metric_type).with(metric_name, docstring, *args) + end + end + end + + context 'when metric is configured with feature' do + let(:feature_name) { :some_metric_feature } + let(:metric) { call_fetch_metric_method(docstring: docstring, with_feature: feature_name) } + + context 'when feature is enabled' do + before do + Feature.get(feature_name).enable + end + + it "initializes #{metric_type} metric" do + allow(Gitlab::Metrics).to receive(metric_type).and_return(null_metric) + + metric + + expect(Gitlab::Metrics).to have_received(metric_type).with(metric_name, docstring, *args) + end + end + + context 'when feature is disabled' do + before do + Feature.get(feature_name).disable + end + + it "returns NullMetric" do + allow(Gitlab::Metrics).to receive(metric_type) + + expect(metric).to be_instance_of(Gitlab::Metrics::NullMetric) + + expect(Gitlab::Metrics).not_to have_received(metric_type) + end + end + end + end + end + + include_examples 'metric', :counter, {} + include_examples 'metric', :gauge, {}, :all + include_examples 'metric', :histogram, {}, [0.005, 0.01, 0.1, 1, 10] +end diff --git a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb index 375cbf8a9ca..54781dd52fc 100644 --- a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb +++ b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb @@ -2,6 +2,11 @@ require 'spec_helper' describe Gitlab::Metrics::Samplers::RubySampler do let(:sampler) { described_class.new(5) } + let(:null_metric) { double('null_metric', set: nil, observe: nil) } + + before do + allow(Gitlab::Metrics::NullMetric).to receive(:instance).and_return(null_metric) + end after do Allocations.stop if Gitlab::Metrics.mri? @@ -17,12 +22,9 @@ describe Gitlab::Metrics::Samplers::RubySampler do end it 'adds a metric containing the memory usage' do - expect(Gitlab::Metrics::System).to receive(:memory_usage) - .and_return(9000) + expect(Gitlab::Metrics::System).to receive(:memory_usage).and_return(9000) - expect(sampler.metrics[:memory_usage]).to receive(:set) - .with({}, 9000) - .and_call_original + expect(sampler.metrics[:memory_usage]).to receive(:set).with({}, 9000) sampler.sample end @@ -31,9 +33,7 @@ describe Gitlab::Metrics::Samplers::RubySampler do expect(Gitlab::Metrics::System).to receive(:file_descriptor_count) .and_return(4) - expect(sampler.metrics[:file_descriptors]).to receive(:set) - .with({}, 4) - .and_call_original + expect(sampler.metrics[:file_descriptors]).to receive(:set).with({}, 4) sampler.sample end @@ -49,16 +49,14 @@ describe Gitlab::Metrics::Samplers::RubySampler do it 'adds a metric containing garbage collection time statistics' do expect(GC::Profiler).to receive(:total_time).and_return(0.24) - expect(sampler.metrics[:total_time]).to receive(:set) - .with({}, 240) - .and_call_original + expect(sampler.metrics[:total_time]).to receive(:set).with({}, 240) sampler.sample end it 'adds a metric containing garbage collection statistics' do GC.stat.keys.each do |key| - expect(sampler.metrics[key]).to receive(:set).with({}, anything).and_call_original + expect(sampler.metrics[key]).to receive(:set).with({}, anything) end sampler.sample diff --git a/spec/lib/gitlab/metrics/subscribers/action_view_spec.rb b/spec/lib/gitlab/metrics/subscribers/action_view_spec.rb index eca75a4fac1..9f3af1acef7 100644 --- a/spec/lib/gitlab/metrics/subscribers/action_view_spec.rb +++ b/spec/lib/gitlab/metrics/subscribers/action_view_spec.rb @@ -32,7 +32,7 @@ describe Gitlab::Metrics::Subscribers::ActionView do end it 'observes view rendering time' do - expect(subscriber.send(:metric_view_rendering_duration_seconds)) + expect(described_class.gitlab_view_rendering_duration_seconds) .to receive(:observe) .with({ view: 'app/views/x.html.haml' }, 2.1) diff --git a/spec/lib/gitlab/metrics/subscribers/active_record_spec.rb b/spec/lib/gitlab/metrics/subscribers/active_record_spec.rb index 9b3698fb4a8..4e7bd433a9c 100644 --- a/spec/lib/gitlab/metrics/subscribers/active_record_spec.rb +++ b/spec/lib/gitlab/metrics/subscribers/active_record_spec.rb @@ -25,7 +25,7 @@ describe Gitlab::Metrics::Subscribers::ActiveRecord do expect(subscriber).to receive(:current_transaction) .at_least(:once) .and_return(transaction) - expect(subscriber.send(:metric_sql_duration_seconds)).to receive(:observe).with({}, 0.002) + expect(described_class.send(:gitlab_sql_duration_seconds)).to receive(:observe).with({}, 0.002) subscriber.sql(event) end diff --git a/spec/lib/gitlab/metrics/subscribers/rails_cache_spec.rb b/spec/lib/gitlab/metrics/subscribers/rails_cache_spec.rb index 58e28592cf9..6795c1ab56b 100644 --- a/spec/lib/gitlab/metrics/subscribers/rails_cache_spec.rb +++ b/spec/lib/gitlab/metrics/subscribers/rails_cache_spec.rb @@ -144,7 +144,10 @@ describe Gitlab::Metrics::Subscribers::RailsCache do end context 'with a transaction' do + let(:metric_cache_misses_total) { double('metric_cache_misses_total', increment: nil) } + before do + allow(subscriber).to receive(:metric_cache_misses_total).and_return(metric_cache_misses_total) allow(subscriber).to receive(:current_transaction) .and_return(transaction) end @@ -157,9 +160,9 @@ describe Gitlab::Metrics::Subscribers::RailsCache do end it 'increments the cache_read_miss total' do - expect(subscriber.send(:metric_cache_misses_total)).to receive(:increment).with({}) - subscriber.cache_generate(event) + + expect(metric_cache_misses_total).to have_received(:increment).with({}) end end end diff --git a/spec/lib/gitlab/metrics_spec.rb b/spec/lib/gitlab/metrics_spec.rb index 1619fbd88b1..9e405e9f736 100644 --- a/spec/lib/gitlab/metrics_spec.rb +++ b/spec/lib/gitlab/metrics_spec.rb @@ -20,7 +20,7 @@ describe Gitlab::Metrics do context 'prometheus metrics enabled in config' do before do - allow(described_class).to receive(:current_application_settings).and_return(prometheus_metrics_enabled: true) + allow(Gitlab::CurrentSettings).to receive(:current_application_settings).and_return(prometheus_metrics_enabled: true) end context 'when metrics folder is present' do |