summaryrefslogtreecommitdiff
path: root/chromium/base/metrics/histogram.h
blob: 0f059457aaa1a66657fd79b991529020b4523d24 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Histogram is an object that aggregates statistics, and can summarize them in
// various forms, including ASCII graphical, HTML, and numerically (as a
// vector of numbers corresponding to each of the aggregating buckets).

// It supports calls to accumulate either time intervals (which are processed
// as integral number of milliseconds), or arbitrary integral units.

// For Histogram(exponential histogram), LinearHistogram and CustomHistogram,
// the minimum for a declared range is 1 (instead of 0), while the maximum is
// (HistogramBase::kSampleType_MAX - 1). Currently you can declare histograms
// with ranges exceeding those limits (e.g. 0 as minimal or
// HistogramBase::kSampleType_MAX as maximal), but those excesses will be
// silently clamped to those limits (for backwards compatibility with existing
// code). Best practice is to not exceed the limits.

// Each use of a histogram with the same name will reference the same underlying
// data, so it is safe to record to the same histogram from multiple locations
// in the code. It is a runtime error if all uses of the same histogram do not
// agree exactly in type, bucket size and range.

// For Histogram and LinearHistogram, the maximum for a declared range should
// always be larger (not equal) than minimal range. Zero and
// HistogramBase::kSampleType_MAX are implicitly added as first and last ranges,
// so the smallest legal bucket_count is 3. However CustomHistogram can have
// bucket count as 2 (when you give a custom ranges vector containing only 1
// range).
// For these 3 kinds of histograms, the max bucket count is always
// (Histogram::kBucketCount_MAX - 1).

// The buckets layout of class Histogram is exponential. For example, buckets
// might contain (sequentially) the count of values in the following intervals:
// [0,1), [1,2), [2,4), [4,8), [8,16), [16,32), [32,64), [64,infinity)
// That bucket allocation would actually result from construction of a histogram
// for values between 1 and 64, with 8 buckets, such as:
// Histogram count("some name", 1, 64, 8);
// Note that the underflow bucket [0,1) and the overflow bucket [64,infinity)
// are also counted by the constructor in the user supplied "bucket_count"
// argument.
// The above example has an exponential ratio of 2 (doubling the bucket width
// in each consecutive bucket.  The Histogram class automatically calculates
// the smallest ratio that it can use to construct the number of buckets
// selected in the constructor.  An another example, if you had 50 buckets,
// and millisecond time values from 1 to 10000, then the ratio between
// consecutive bucket widths will be approximately somewhere around the 50th
// root of 10000.  This approach provides very fine grain (narrow) buckets
// at the low end of the histogram scale, but allows the histogram to cover a
// gigantic range with the addition of very few buckets.

// Usually we use macros to define and use a histogram, which are defined in
// base/metrics/histogram_macros.h. Note: Callers should include that header
// directly if they only access the histogram APIs through macros.
//
// Macros use a pattern involving a function static variable, that is a pointer
// to a histogram.  This static is explicitly initialized on any thread
// that detects a uninitialized (NULL) pointer.  The potentially racy
// initialization is not a problem as it is always set to point to the same
// value (i.e., the FactoryGet always returns the same value).  FactoryGet
// is also completely thread safe, which results in a completely thread safe,
// and relatively fast, set of counters.  To avoid races at shutdown, the static
// pointer is NOT deleted, and we leak the histograms at process termination.

#ifndef BASE_METRICS_HISTOGRAM_H_
#define BASE_METRICS_HISTOGRAM_H_

#include <stddef.h>
#include <stdint.h>

#include <map>
#include <memory>
#include <string>
#include <vector>

#include "base/base_export.h"
#include "base/compiler_specific.h"
#include "base/gtest_prod_util.h"
#include "base/logging.h"
#include "base/macros.h"
#include "base/metrics/bucket_ranges.h"
#include "base/metrics/histogram_base.h"
// TODO(asvitkine): Migrate callers to to include this directly and remove this.
#include "base/metrics/histogram_macros.h"
#include "base/metrics/histogram_samples.h"
#include "base/time/time.h"

namespace base {

class BooleanHistogram;
class CustomHistogram;
class Histogram;
class LinearHistogram;
class Pickle;
class PickleIterator;
class SampleVector;

class BASE_EXPORT Histogram : public HistogramBase {
 public:
  // Initialize maximum number of buckets in histograms as 16,384.
  static const uint32_t kBucketCount_MAX;

  typedef std::vector<Count> Counts;

  ~Histogram() override;

  //----------------------------------------------------------------------------
  // For a valid histogram, input should follow these restrictions:
  // minimum > 0 (if a minimum below 1 is specified, it will implicitly be
  //              normalized up to 1)
  // maximum > minimum
  // buckets > 2 [minimum buckets needed: underflow, overflow and the range]
  // Additionally,
  // buckets <= (maximum - minimum + 2) - this is to ensure that we don't have
  // more buckets than the range of numbers; having more buckets than 1 per
  // value in the range would be nonsensical.
  static HistogramBase* FactoryGet(const std::string& name,
                                   Sample minimum,
                                   Sample maximum,
                                   uint32_t bucket_count,
                                   int32_t flags);
  static HistogramBase* FactoryTimeGet(const std::string& name,
                                       base::TimeDelta minimum,
                                       base::TimeDelta maximum,
                                       uint32_t bucket_count,
                                       int32_t flags);

  // Overloads of the above two functions that take a const char* |name| param,
  // to avoid code bloat from the std::string constructor being inlined into
  // call sites.
  static HistogramBase* FactoryGet(const char* name,
                                   Sample minimum,
                                   Sample maximum,
                                   uint32_t bucket_count,
                                   int32_t flags);
  static HistogramBase* FactoryTimeGet(const char* name,
                                       base::TimeDelta minimum,
                                       base::TimeDelta maximum,
                                       uint32_t bucket_count,
                                       int32_t flags);

  // Create a histogram using data in persistent storage.
  static std::unique_ptr<HistogramBase> PersistentCreate(
      const std::string& name,
      Sample minimum,
      Sample maximum,
      const BucketRanges* ranges,
      HistogramBase::AtomicCount* counts,
      HistogramBase::AtomicCount* logged_counts,
      uint32_t counts_size,
      HistogramSamples::Metadata* meta,
      HistogramSamples::Metadata* logged_meta);

  static void InitializeBucketRanges(Sample minimum,
                                     Sample maximum,
                                     BucketRanges* ranges);

  // This constant if for FindCorruption. Since snapshots of histograms are
  // taken asynchronously relative to sampling, and our counting code currently
  // does not prevent race conditions, it is pretty likely that we'll catch a
  // redundant count that doesn't match the sample count.  We allow for a
  // certain amount of slop before flagging this as an inconsistency. Even with
  // an inconsistency, we'll snapshot it again (for UMA in about a half hour),
  // so we'll eventually get the data, if it was not the result of a corruption.
  static const int kCommonRaceBasedCountMismatch;

  // Check to see if bucket ranges, counts and tallies in the snapshot are
  // consistent with the bucket ranges and checksums in our histogram.  This can
  // produce a false-alarm if a race occurred in the reading of the data during
  // a SnapShot process, but should otherwise be false at all times (unless we
  // have memory over-writes, or DRAM failures). Flag definitions are located
  // under "enum Inconsistency" in base/metrics/histogram_base.h.
  uint32_t FindCorruption(const HistogramSamples& samples) const override;

  //----------------------------------------------------------------------------
  // Accessors for factory construction, serialization and testing.
  //----------------------------------------------------------------------------
  Sample declared_min() const { return declared_min_; }
  Sample declared_max() const { return declared_max_; }
  virtual Sample ranges(uint32_t i) const;
  virtual uint32_t bucket_count() const;
  const BucketRanges* bucket_ranges() const { return bucket_ranges_; }

  // This function validates histogram construction arguments. It returns false
  // if some of the arguments are totally bad.
  // Note. Currently it allow some bad input, e.g. 0 as minimum, but silently
  // converts it to good input: 1.
  // TODO(kaiwang): Be more restrict and return false for any bad input, and
  // make this a readonly validating function.
  static bool InspectConstructionArguments(const std::string& name,
                                           Sample* minimum,
                                           Sample* maximum,
                                           uint32_t* bucket_count);

  // HistogramBase implementation:
  uint64_t name_hash() const override;
  HistogramType GetHistogramType() const override;
  bool HasConstructionArguments(Sample expected_minimum,
                                Sample expected_maximum,
                                uint32_t expected_bucket_count) const override;
  void Add(Sample value) override;
  void AddCount(Sample value, int count) override;
  std::unique_ptr<HistogramSamples> SnapshotSamples() const override;
  std::unique_ptr<HistogramSamples> SnapshotDelta() override;
  std::unique_ptr<HistogramSamples> SnapshotFinalDelta() const override;
  void AddSamples(const HistogramSamples& samples) override;
  bool AddSamplesFromPickle(base::PickleIterator* iter) override;
  void WriteHTMLGraph(std::string* output) const override;
  void WriteAscii(std::string* output) const override;

 protected:
  // This class, defined entirely within the .cc file, contains all the
  // common logic for building a Histogram and can be overridden by more
  // specific types to alter details of how the creation is done. It is
  // defined as an embedded class (rather than an anonymous one) so it
  // can access the protected constructors.
  class Factory;

  // |ranges| should contain the underflow and overflow buckets. See top
  // comments for example.
  Histogram(const std::string& name,
            Sample minimum,
            Sample maximum,
            const BucketRanges* ranges);

  // Traditionally, histograms allocate their own memory for the bucket
  // vector but "shared" histograms use memory regions allocated from a
  // special memory segment that is passed in here.  It is assumed that
  // the life of this memory is managed externally and exceeds the lifetime
  // of this object. Practically, this memory is never released until the
  // process exits and the OS cleans it up.
  Histogram(const std::string& name,
            Sample minimum,
            Sample maximum,
            const BucketRanges* ranges,
            HistogramBase::AtomicCount* counts,
            HistogramBase::AtomicCount* logged_counts,
            uint32_t counts_size,
            HistogramSamples::Metadata* meta,
            HistogramSamples::Metadata* logged_meta);

  // HistogramBase implementation:
  bool SerializeInfoImpl(base::Pickle* pickle) const override;

  // Method to override to skip the display of the i'th bucket if it's empty.
  virtual bool PrintEmptyBucket(uint32_t index) const;

  // Get normalized size, relative to the ranges(i).
  virtual double GetBucketSize(Count current, uint32_t i) const;

  // Return a string description of what goes in a given bucket.
  // Most commonly this is the numeric value, but in derived classes it may
  // be a name (or string description) given to the bucket.
  virtual const std::string GetAsciiBucketRange(uint32_t it) const;

 private:
  // Allow tests to corrupt our innards for testing purposes.
  FRIEND_TEST_ALL_PREFIXES(HistogramTest, BoundsTest);
  FRIEND_TEST_ALL_PREFIXES(HistogramTest, BucketPlacementTest);
  FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptSampleCounts);

  friend class StatisticsRecorder;  // To allow it to delete duplicates.
  friend class StatisticsRecorderTest;

  friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
      base::PickleIterator* iter);
  static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);

  // Implementation of SnapshotSamples function.
  std::unique_ptr<SampleVector> SnapshotSampleVector() const;

  //----------------------------------------------------------------------------
  // Helpers for emitting Ascii graphic.  Each method appends data to output.

  void WriteAsciiImpl(bool graph_it,
                      const std::string& newline,
                      std::string* output) const;

  // Find out how large (graphically) the largest bucket will appear to be.
  double GetPeakBucketSize(const SampleVector& samples) const;

  // Write a common header message describing this histogram.
  void WriteAsciiHeader(const SampleVector& samples,
                        Count sample_count,
                        std::string* output) const;

  // Write information about previous, current, and next buckets.
  // Information such as cumulative percentage, etc.
  void WriteAsciiBucketContext(const int64_t past,
                               const Count current,
                               const int64_t remaining,
                               const uint32_t i,
                               std::string* output) const;

  // WriteJSON calls these.
  void GetParameters(DictionaryValue* params) const override;

  void GetCountAndBucketData(Count* count,
                             int64_t* sum,
                             ListValue* buckets) const override;

  // Does not own this object. Should get from StatisticsRecorder.
  const BucketRanges* bucket_ranges_;

  Sample declared_min_;  // Less than this goes into the first bucket.
  Sample declared_max_;  // Over this goes into the last bucket.

  // Finally, provide the state that changes with the addition of each new
  // sample.
  std::unique_ptr<SampleVector> samples_;

  // Also keep a previous uploaded state for calculating deltas.
  std::unique_ptr<HistogramSamples> logged_samples_;

  // Flag to indicate if PrepareFinalDelta has been previously called. It is
  // used to DCHECK that a final delta is not created multiple times.
  mutable bool final_delta_created_ = false;

  DISALLOW_COPY_AND_ASSIGN(Histogram);
};

//------------------------------------------------------------------------------

// LinearHistogram is a more traditional histogram, with evenly spaced
// buckets.
class BASE_EXPORT LinearHistogram : public Histogram {
 public:
  ~LinearHistogram() override;

  /* minimum should start from 1. 0 is as minimum is invalid. 0 is an implicit
     default underflow bucket. */
  static HistogramBase* FactoryGet(const std::string& name,
                                   Sample minimum,
                                   Sample maximum,
                                   uint32_t bucket_count,
                                   int32_t flags);
  static HistogramBase* FactoryTimeGet(const std::string& name,
                                       TimeDelta minimum,
                                       TimeDelta maximum,
                                       uint32_t bucket_count,
                                       int32_t flags);

  // Overloads of the above two functions that take a const char* |name| param,
  // to avoid code bloat from the std::string constructor being inlined into
  // call sites.
  static HistogramBase* FactoryGet(const char* name,
                                   Sample minimum,
                                   Sample maximum,
                                   uint32_t bucket_count,
                                   int32_t flags);
  static HistogramBase* FactoryTimeGet(const char* name,
                                       TimeDelta minimum,
                                       TimeDelta maximum,
                                       uint32_t bucket_count,
                                       int32_t flags);

  // Create a histogram using data in persistent storage.
  static std::unique_ptr<HistogramBase> PersistentCreate(
      const std::string& name,
      Sample minimum,
      Sample maximum,
      const BucketRanges* ranges,
      HistogramBase::AtomicCount* counts,
      HistogramBase::AtomicCount* logged_counts,
      uint32_t counts_size,
      HistogramSamples::Metadata* meta,
      HistogramSamples::Metadata* logged_meta);

  struct DescriptionPair {
    Sample sample;
    const char* description;  // Null means end of a list of pairs.
  };

  // Create a LinearHistogram and store a list of number/text values for use in
  // writing the histogram graph.
  // |descriptions| can be NULL, which means no special descriptions to set. If
  // it's not NULL, the last element in the array must has a NULL in its
  // "description" field.
  static HistogramBase* FactoryGetWithRangeDescription(
      const std::string& name,
      Sample minimum,
      Sample maximum,
      uint32_t bucket_count,
      int32_t flags,
      const DescriptionPair descriptions[]);

  static void InitializeBucketRanges(Sample minimum,
                                     Sample maximum,
                                     BucketRanges* ranges);

  // Overridden from Histogram:
  HistogramType GetHistogramType() const override;

 protected:
  class Factory;

  LinearHistogram(const std::string& name,
                  Sample minimum,
                  Sample maximum,
                  const BucketRanges* ranges);

  LinearHistogram(const std::string& name,
                  Sample minimum,
                  Sample maximum,
                  const BucketRanges* ranges,
                  HistogramBase::AtomicCount* counts,
                  HistogramBase::AtomicCount* logged_counts,
                  uint32_t counts_size,
                  HistogramSamples::Metadata* meta,
                  HistogramSamples::Metadata* logged_meta);

  double GetBucketSize(Count current, uint32_t i) const override;

  // If we have a description for a bucket, then return that.  Otherwise
  // let parent class provide a (numeric) description.
  const std::string GetAsciiBucketRange(uint32_t i) const override;

  // Skip printing of name for numeric range if we have a name (and if this is
  // an empty bucket).
  bool PrintEmptyBucket(uint32_t index) const override;

 private:
  friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
      base::PickleIterator* iter);
  static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);

  // For some ranges, we store a printable description of a bucket range.
  // If there is no description, then GetAsciiBucketRange() uses parent class
  // to provide a description.
  typedef std::map<Sample, std::string> BucketDescriptionMap;
  BucketDescriptionMap bucket_description_;

  DISALLOW_COPY_AND_ASSIGN(LinearHistogram);
};

//------------------------------------------------------------------------------

// BooleanHistogram is a histogram for booleans.
class BASE_EXPORT BooleanHistogram : public LinearHistogram {
 public:
  static HistogramBase* FactoryGet(const std::string& name, int32_t flags);

  // Overload of the above function that takes a const char* |name| param,
  // to avoid code bloat from the std::string constructor being inlined into
  // call sites.
  static HistogramBase* FactoryGet(const char* name, int32_t flags);

  // Create a histogram using data in persistent storage.
  static std::unique_ptr<HistogramBase> PersistentCreate(
      const std::string& name,
      const BucketRanges* ranges,
      HistogramBase::AtomicCount* counts,
      HistogramBase::AtomicCount* logged_counts,
      HistogramSamples::Metadata* meta,
      HistogramSamples::Metadata* logged_meta);

  HistogramType GetHistogramType() const override;

 protected:
  class Factory;

 private:
  BooleanHistogram(const std::string& name, const BucketRanges* ranges);
  BooleanHistogram(const std::string& name,
                   const BucketRanges* ranges,
                   HistogramBase::AtomicCount* counts,
                   HistogramBase::AtomicCount* logged_counts,
                   HistogramSamples::Metadata* meta,
                   HistogramSamples::Metadata* logged_meta);

  friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
      base::PickleIterator* iter);
  static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);

  DISALLOW_COPY_AND_ASSIGN(BooleanHistogram);
};

//------------------------------------------------------------------------------

// CustomHistogram is a histogram for a set of custom integers.
class BASE_EXPORT CustomHistogram : public Histogram {
 public:
  // |custom_ranges| contains a vector of limits on ranges. Each limit should be
  // > 0 and < kSampleType_MAX. (Currently 0 is still accepted for backward
  // compatibility). The limits can be unordered or contain duplication, but
  // client should not depend on this.
  static HistogramBase* FactoryGet(const std::string& name,
                                   const std::vector<Sample>& custom_ranges,
                                   int32_t flags);

  // Overload of the above function that takes a const char* |name| param,
  // to avoid code bloat from the std::string constructor being inlined into
  // call sites.
  static HistogramBase* FactoryGet(const char* name,
                                   const std::vector<Sample>& custom_ranges,
                                   int32_t flags);

  // Create a histogram using data in persistent storage.
  static std::unique_ptr<HistogramBase> PersistentCreate(
      const std::string& name,
      const BucketRanges* ranges,
      HistogramBase::AtomicCount* counts,
      HistogramBase::AtomicCount* logged_counts,
      uint32_t counts_size,
      HistogramSamples::Metadata* meta,
      HistogramSamples::Metadata* logged_meta);

  // Overridden from Histogram:
  HistogramType GetHistogramType() const override;

  // Helper method for transforming an array of valid enumeration values
  // to the std::vector<int> expected by UMA_HISTOGRAM_CUSTOM_ENUMERATION.
  // This function ensures that a guard bucket exists right after any
  // valid sample value (unless the next higher sample is also a valid value),
  // so that invalid samples never fall into the same bucket as valid samples.
  // TODO(kaiwang): Change name to ArrayToCustomEnumRanges.
  static std::vector<Sample> ArrayToCustomRanges(const Sample* values,
                                                 uint32_t num_values);
 protected:
  class Factory;

  CustomHistogram(const std::string& name,
                  const BucketRanges* ranges);

  CustomHistogram(const std::string& name,
                  const BucketRanges* ranges,
                  HistogramBase::AtomicCount* counts,
                  HistogramBase::AtomicCount* logged_counts,
                  uint32_t counts_size,
                  HistogramSamples::Metadata* meta,
                  HistogramSamples::Metadata* logged_meta);

  // HistogramBase implementation:
  bool SerializeInfoImpl(base::Pickle* pickle) const override;

  double GetBucketSize(Count current, uint32_t i) const override;

 private:
  friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
      base::PickleIterator* iter);
  static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);

  static bool ValidateCustomRanges(const std::vector<Sample>& custom_ranges);

  DISALLOW_COPY_AND_ASSIGN(CustomHistogram);
};

}  // namespace base

#endif  // BASE_METRICS_HISTOGRAM_H_