summaryrefslogtreecommitdiff
path: root/kafka/metrics/metrics.py
blob: d02f48d0991664a6d8cc21e0d5280e35981ce5ec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
import logging
import sys
import time
import threading

from kafka.metrics import AnonMeasurable, KafkaMetric, MetricConfig, MetricName
from kafka.metrics.stats import Sensor

logger = logging.getLogger(__name__)


class Metrics(object):
    """
    A registry of sensors and metrics.

    A metric is a named, numerical measurement. A sensor is a handle to
    record numerical measurements as they occur. Each Sensor has zero or
    more associated metrics. For example a Sensor might represent message
    sizes and we might associate with this sensor a metric for the average,
    maximum, or other statistics computed off the sequence of message sizes
    that are recorded by the sensor.

    Usage looks something like this:
        # set up metrics:
        metrics = Metrics() # the global repository of metrics and sensors
        sensor = metrics.sensor('message-sizes')
        metric_name = MetricName('message-size-avg', 'producer-metrics')
        sensor.add(metric_name, Avg())
        metric_name = MetricName('message-size-max', 'producer-metrics')
        sensor.add(metric_name, Max())

        # as messages are sent we record the sizes
        sensor.record(message_size);
    """
    def __init__(self, default_config=None, reporters=None,
                 enable_expiration=False):
        """
        Create a metrics repository with a default config, given metric
        reporters and the ability to expire eligible sensors

        Arguments:
            default_config (MetricConfig, optional): The default config
            reporters (list of AbstractMetricsReporter, optional):
                The metrics reporters
            enable_expiration (bool, optional): true if the metrics instance
                can garbage collect inactive sensors, false otherwise
        """
        self._lock = threading.RLock()
        self._config = default_config or MetricConfig()
        self._sensors = {}
        self._metrics = {}
        self._children_sensors = {}
        self._reporters = reporters or []
        for reporter in self._reporters:
            reporter.init([])

        if enable_expiration:
            def expire_loop():
                while True:
                    # delay 30 seconds
                    time.sleep(30)
                    self.ExpireSensorTask.run(self)
            metrics_scheduler = threading.Thread(target=expire_loop)
            # Creating a daemon thread to not block shutdown
            metrics_scheduler.daemon = True
            metrics_scheduler.start()

        self.add_metric(self.metric_name('count', 'kafka-metrics-count',
                                         'total number of registered metrics'),
                        AnonMeasurable(lambda config, now: len(self._metrics)))

    @property
    def config(self):
        return self._config

    @property
    def metrics(self):
        """
        Get all the metrics currently maintained and indexed by metricName
        """
        return self._metrics

    def metric_name(self, name, group, description='', tags=None):
        """
        Create a MetricName with the given name, group, description and tags,
        plus default tags specified in the metric configuration.
        Tag in tags takes precedence if the same tag key is specified in
        the default metric configuration.

        Arguments:
            name (str): The name of the metric
            group (str): logical group name of the metrics to which this
                metric belongs
            description (str, optional): A human-readable description to
                include in the metric
            tags (dict, optionals): additional key/value attributes of
                the metric
        """
        combined_tags = dict(self.config.tags)
        combined_tags.update(tags or {})
        return MetricName(name, group, description, combined_tags)

    def get_sensor(self, name):
        """
        Get the sensor with the given name if it exists

        Arguments:
            name (str): The name of the sensor

        Returns:
            Sensor: The sensor or None if no such sensor exists
        """
        if not name:
            raise ValueError('name must be non-empty')
        return self._sensors.get(name, None)

    def sensor(self, name, config=None,
               inactive_sensor_expiration_time_seconds=sys.maxsize,
               parents=None):
        """
        Get or create a sensor with the given unique name and zero or
        more parent sensors. All parent sensors will receive every value
        recorded with this sensor.

        Arguments:
            name (str): The name of the sensor
            config (MetricConfig, optional): A default configuration to use
                for this sensor for metrics that don't have their own config
            inactive_sensor_expiration_time_seconds (int, optional):
                If no value if recorded on the Sensor for this duration of
                time, it is eligible for removal
            parents (list of Sensor): The parent sensors

        Returns:
            Sensor: The sensor that is created
        """
        sensor = self.get_sensor(name)
        if sensor:
            return sensor

        with self._lock:
            sensor = self.get_sensor(name)
            if not sensor:
                sensor = Sensor(self, name, parents, config or self.config,
                                inactive_sensor_expiration_time_seconds)
                self._sensors[name] = sensor
                if parents:
                    for parent in parents:
                        children = self._children_sensors.get(parent)
                        if not children:
                            children = []
                            self._children_sensors[parent] = children
                        children.append(sensor)
                logger.debug('Added sensor with name %s', name)
            return sensor

    def remove_sensor(self, name):
        """
        Remove a sensor (if it exists), associated metrics and its children.

        Arguments:
            name (str): The name of the sensor to be removed
        """
        sensor = self._sensors.get(name)
        if sensor:
            child_sensors = None
            with sensor._lock:
                with self._lock:
                    val = self._sensors.pop(name, None)
                    if val and val == sensor:
                        for metric in sensor.metrics:
                            self.remove_metric(metric.metric_name)
                        logger.debug('Removed sensor with name %s', name)
                        child_sensors = self._children_sensors.pop(sensor, None)
            if child_sensors:
                for child_sensor in child_sensors:
                    self.remove_sensor(child_sensor.name)

    def add_metric(self, metric_name, measurable, config=None):
        """
        Add a metric to monitor an object that implements measurable.
        This metric won't be associated with any sensor.
        This is a way to expose existing values as metrics.

        Arguments:
            metricName (MetricName): The name of the metric
            measurable (AbstractMeasurable): The measurable that will be
                measured by this metric
            config (MetricConfig, optional): The configuration to use when
                measuring this measurable
        """
        # NOTE there was a lock here, but i don't think it's needed
        metric = KafkaMetric(metric_name, measurable, config or self.config)
        self.register_metric(metric)

    def remove_metric(self, metric_name):
        """
        Remove a metric if it exists and return it. Return None otherwise.
        If a metric is removed, `metric_removal` will be invoked
        for each reporter.

        Arguments:
            metric_name (MetricName): The name of the metric

        Returns:
            KafkaMetric: the removed `KafkaMetric` or None if no such
                metric exists
        """
        with self._lock:
            metric = self._metrics.pop(metric_name, None)
            if metric:
                for reporter in self._reporters:
                    reporter.metric_removal(metric)
            return metric

    def add_reporter(self, reporter):
        """Add a MetricReporter"""
        with self._lock:
            reporter.init(list(self.metrics.values()))
            self._reporters.append(reporter)

    def register_metric(self, metric):
        with self._lock:
            if metric.metric_name in self.metrics:
                raise ValueError('A metric named "%s" already exists, cannot'
                                 ' register another one.' % metric.metric_name)
            self.metrics[metric.metric_name] = metric
            for reporter in self._reporters:
                reporter.metric_change(metric)

    class ExpireSensorTask(object):
        """
        This iterates over every Sensor and triggers a remove_sensor
        if it has expired. Package private for testing
        """
        @staticmethod
        def run(metrics):
            items = list(metrics._sensors.items())
            for name, sensor in items:
                # remove_sensor also locks the sensor object. This is fine
                # because synchronized is reentrant. There is however a minor
                # race condition here. Assume we have a parent sensor P and
                # child sensor C. Calling record on C would cause a record on
                # P as well. So expiration time for P == expiration time for C.
                # If the record on P happens via C just after P is removed,
                # that will cause C to also get removed. Since the expiration
                # time is typically high it is not expected to be a significant
                # concern and thus not necessary to optimize
                with sensor._lock:
                    if sensor.has_expired():
                        logger.debug('Removing expired sensor %s', name)
                        metrics.remove_sensor(name)

    def close(self):
        """Close this metrics repository."""
        for reporter in self._reporters:
            reporter.close()