// Copyright 2017 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "components/ukm/ukm_recorder_impl.h" #include #include #include #include #include "base/component_export.h" #include "base/feature_list.h" #include "base/metrics/crc32.h" #include "base/metrics/field_trial.h" #include "base/metrics/field_trial_params.h" #include "base/metrics/histogram_functions.h" #include "base/metrics/histogram_macros.h" #include "base/metrics/metrics_hashes.h" #include "base/rand_util.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_split.h" #include "components/ukm/scheme_constants.h" #include "components/variations/variations_associated_data.h" #include "services/metrics/public/cpp/ukm_builders.h" #include "services/metrics/public/cpp/ukm_decode.h" #include "services/metrics/public/cpp/ukm_source.h" #include "services/metrics/public/cpp/ukm_source_id.h" #include "services/metrics/public/mojom/ukm_interface.mojom.h" #include "third_party/metrics_proto/ukm/entry.pb.h" #include "third_party/metrics_proto/ukm/report.pb.h" #include "third_party/metrics_proto/ukm/source.pb.h" #include "url/gurl.h" namespace ukm { COMPONENT_EXPORT(UKM_RECORDER) const base::Feature kUkmSamplingRateFeature{"UkmSamplingRate", base::FEATURE_DISABLED_BY_DEFAULT}; namespace { // Gets the list of whitelisted Entries as string. Format is a comma separated // list of Entry names (as strings). std::string GetWhitelistEntries() { return base::GetFieldTrialParamValueByFeature(kUkmFeature, "WhitelistEntries"); } bool IsWhitelistedSourceId(SourceId source_id) { SourceIdType type = GetSourceIdType(source_id); return type == SourceIdType::NAVIGATION_ID || type == SourceIdType::APP_ID || type == SourceIdType::HISTORY_ID || type == SourceIdType::WEBAPK_ID || type == SourceIdType::PAYMENT_APP_ID || type == SourceIdType::NO_URL_ID; } // Returns whether |url| has one of the schemes supported for logging to UKM. // URLs with other schemes will not be logged. bool HasSupportedScheme(const GURL& url) { return url.SchemeIsHTTPOrHTTPS() || url.SchemeIs(url::kFtpScheme) || url.SchemeIs(url::kAboutScheme) || url.SchemeIs(kChromeUIScheme) || url.SchemeIs(kExtensionScheme) || url.SchemeIs(kAppScheme); } void LogEventHashAsUmaHistogram(const std::string& histogram_name, uint64_t event_hash) { // The enum for this histogram gets populated by the PopulateEnumWithUkmEvents // function in populate_enums.py when producing the merged XML. base::UmaHistogramSparse(histogram_name, // Truncate the unsigned 64-bit hash to 31 bits, to // make it a suitable histogram sample. event_hash & 0x7fffffff); } enum class DroppedDataReason { NOT_DROPPED = 0, RECORDING_DISABLED = 1, MAX_HIT = 2, NOT_WHITELISTED = 3, UNSUPPORTED_URL_SCHEME = 4, SAMPLED_OUT = 5, EXTENSION_URLS_DISABLED = 6, EXTENSION_NOT_SYNCED = 7, NOT_MATCHED = 8, EMPTY_URL = 9, REJECTED_BY_FILTER = 10, NUM_DROPPED_DATA_REASONS }; void RecordDroppedSource(DroppedDataReason reason) { UMA_HISTOGRAM_ENUMERATION( "UKM.Sources.Dropped", static_cast(reason), static_cast(DroppedDataReason::NUM_DROPPED_DATA_REASONS)); } void RecordDroppedEntry(uint64_t event_hash, DroppedDataReason reason) { LogEventHashAsUmaHistogram("UKM.Entries.Dropped.ByEntryHash", event_hash); UMA_HISTOGRAM_ENUMERATION( "UKM.Entries.Dropped", static_cast(reason), static_cast(DroppedDataReason::NUM_DROPPED_DATA_REASONS)); } void StoreEntryProto(const mojom::UkmEntry& in, Entry* out) { DCHECK(!out->has_source_id()); DCHECK(!out->has_event_hash()); out->set_source_id(in.source_id); out->set_event_hash(in.event_hash); for (const auto& metric : in.metrics) { Entry::Metric* proto_metric = out->add_metrics(); proto_metric->set_metric_hash(metric.first); proto_metric->set_value(metric.second); } } GURL SanitizeURL(const GURL& url) { GURL::Replacements remove_params; remove_params.ClearUsername(); remove_params.ClearPassword(); // chrome:// and about: URLs params are never used for navigation, only to // prepopulate data on the page, so don't include their params. if (url.SchemeIs(url::kAboutScheme) || url.SchemeIs("chrome")) { remove_params.ClearQuery(); } if (url.SchemeIs(kExtensionScheme)) { remove_params.ClearPath(); remove_params.ClearQuery(); remove_params.ClearRef(); } return url.ReplaceComponents(remove_params); } void AppendWhitelistedUrls( const std::map>& sources, std::unordered_set* urls) { for (const auto& kv : sources) { if (IsWhitelistedSourceId(kv.first)) { urls->insert(kv.second->url().spec()); // Some non-navigation sources only record origin as a URL. // Add the origin from the navigation source to match those too. urls->insert(kv.second->url().GetOrigin().spec()); } } } // Returns true if the event corresponding to |event_hash| has a comprehensive // decode map that includes all valid metrics. bool HasComprehensiveDecodeMap(int64_t event_hash) { // All events other than "Identifiability" conforms to its decode map. // TODO(asanka): It is technically an abstraction violation for // //components/ukm to know this fact. return event_hash != builders::Identifiability::kEntryNameHash; } bool HasUnknownMetrics(const builders::DecodeMap& decode_map, const mojom::UkmEntry& entry) { const auto it = decode_map.find(entry.event_hash); if (it == decode_map.end()) return true; if (!HasComprehensiveDecodeMap(entry.event_hash)) return false; const auto& metric_map = it->second.metric_map; for (const auto& metric : entry.metrics) { if (metric_map.count(metric.first) == 0) return true; } return false; } } // namespace UkmRecorderImpl::UkmRecorderImpl() : sampling_seed_(static_cast(base::RandUint64())) { max_sources_ = static_cast(base::GetFieldTrialParamByFeatureAsInt( kUkmFeature, "MaxSources", max_sources_)); max_kept_sources_ = static_cast(base::GetFieldTrialParamByFeatureAsInt( kUkmFeature, "MaxKeptSources", max_kept_sources_)); max_entries_ = static_cast(base::GetFieldTrialParamByFeatureAsInt( kUkmFeature, "MaxEntries", max_entries_)); } UkmRecorderImpl::~UkmRecorderImpl() = default; // static void UkmRecorderImpl::CreateFallbackSamplingTrial( bool is_stable_channel, base::FeatureList* feature_list) { static const char kSampledGroup_Stable[] = "Sampled_NoSeed_Stable"; static const char kSampledGroup_Other[] = "Sampled_NoSeed_Other"; const char* sampled_group = kSampledGroup_Other; int default_sampling = 1; // Sampling is 1-in-N; this is N. // Nothing is sampled out except for "stable" which omits almost everything // in this configuration. This is done so that clients that fail to receive // a configuration from the server do not bias aggregated results because // of a relatively large number of records from them. if (is_stable_channel) { sampled_group = kSampledGroup_Stable; default_sampling = 1000000; } scoped_refptr trial( base::FieldTrialList::FactoryGetFieldTrial( kUkmSamplingRateFeature.name, 100, sampled_group, base::FieldTrial::ONE_TIME_RANDOMIZED, nullptr)); // Everybody (100%) should have a sampling configuration. std::map params = { {"_default_sampling", base::NumberToString(default_sampling)}}; variations::AssociateVariationParams(trial->trial_name(), sampled_group, params); trial->AppendGroup(sampled_group, 100); // Setup the feature. feature_list->RegisterFieldTrialOverride( kUkmSamplingRateFeature.name, base::FeatureList::OVERRIDE_ENABLE_FEATURE, trial.get()); } UkmRecorderImpl::EventAggregate::EventAggregate() = default; UkmRecorderImpl::EventAggregate::~EventAggregate() = default; UkmRecorderImpl::Recordings::Recordings() = default; UkmRecorderImpl::Recordings& UkmRecorderImpl::Recordings::operator=( Recordings&&) = default; UkmRecorderImpl::Recordings::~Recordings() = default; void UkmRecorderImpl::Recordings::Reset() { *this = Recordings(); } void UkmRecorderImpl::Recordings::SourceCounts::Reset() { *this = SourceCounts(); } void UkmRecorderImpl::EnableRecording(bool extensions) { DVLOG(1) << "UkmRecorderImpl::EnableRecording, extensions=" << extensions; recording_enabled_ = true; extensions_enabled_ = extensions; } void UkmRecorderImpl::DisableRecording() { DVLOG(1) << "UkmRecorderImpl::DisableRecording"; if (recording_enabled_) recording_is_continuous_ = false; recording_enabled_ = false; extensions_enabled_ = false; } void UkmRecorderImpl::DisableSamplingForTesting() { sampling_enabled_ = false; } bool UkmRecorderImpl::IsSamplingEnabled() const { return sampling_enabled_ && base::FeatureList::IsEnabled(kUkmSamplingRateFeature); } void UkmRecorderImpl::Purge() { DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); recordings_.Reset(); recording_is_continuous_ = false; } void UkmRecorderImpl::PurgeExtensionRecordings() { DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); // Discard all sources that have an extension URL as well as all the entries // related to any of these sources. std::unordered_set extension_source_ids; for (const auto& kv : recordings_.sources) { if (kv.second->url().SchemeIs(kExtensionScheme)) { extension_source_ids.insert(kv.first); } } for (const auto source_id : extension_source_ids) { recordings_.sources.erase(source_id); } std::vector& events = recordings_.entries; events.erase( std::remove_if(events.begin(), events.end(), [&](const auto& event) { return extension_source_ids.count(event->source_id); }), events.end()); recording_is_continuous_ = false; } void UkmRecorderImpl::MarkSourceForDeletion(SourceId source_id) { if (source_id == kInvalidSourceId) return; recordings_.obsolete_source_ids.insert(source_id); } void UkmRecorderImpl::SetIsWebstoreExtensionCallback( const IsWebstoreExtensionCallback& callback) { is_webstore_extension_callback_ = callback; } void UkmRecorderImpl::SetEntryFilter( std::unique_ptr entry_filter) { DCHECK(!entry_filter_ || !entry_filter); entry_filter_ = std::move(entry_filter); } // TODO(rkaplow): This should be refactored. void UkmRecorderImpl::StoreRecordingsInReport(Report* report) { DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); // Set of source ids seen by entries in recordings_. std::set source_ids_seen; for (const auto& entry : recordings_.entries) { Entry* proto_entry = report->add_entries(); StoreEntryProto(*entry, proto_entry); source_ids_seen.insert(entry->source_id); } // Number of sources excluded from this report because no entries referred to // them. const int num_sources_unsent = recordings_.sources.size() - source_ids_seen.size(); // Construct set of whitelisted URLs by merging those carried over from the // previous report cycle and those from sources recorded in this cycle. std::unordered_set url_whitelist; recordings_.carryover_urls_whitelist.swap(url_whitelist); AppendWhitelistedUrls(recordings_.sources, &url_whitelist); // Number of sources discarded due to not matching a navigation URL. int num_sources_unmatched = 0; std::unordered_map serialized_source_type_counts; for (const auto& kv : recordings_.sources) { // Don't keep sources of these types after current report because their // entries are logged only at source creation time. SourceIdType type = GetSourceIdType(kv.first); if (type == ukm::SourceIdObj::Type::APP_ID || type == ukm::SourceIdObj::Type::HISTORY_ID || type == ukm::SourceIdObj::Type::WEBAPK_ID || type == SourceIdType::PAYMENT_APP_ID || type == SourceIdType::NO_URL_ID) { MarkSourceForDeletion(kv.first); } // If the source id is not whitelisted, don't send it unless it has // associated entries and the URL matches that of a whitelisted source. // Note: If ShouldRestrictToWhitelistedSourceIds() is true, this logic will // not be hit as the source would have already been filtered in // UpdateSourceURL(). if (!IsWhitelistedSourceId(kv.first)) { // UkmSource should not keep initial_url for non-navigation source IDs. DCHECK_EQ(1u, kv.second->urls().size()); if (!url_whitelist.count(kv.second->url().spec())) { RecordDroppedSource(DroppedDataReason::NOT_MATCHED); MarkSourceForDeletion(kv.first); num_sources_unmatched++; continue; } // Omit entryless sources from the report. if (!base::Contains(source_ids_seen, kv.first)) { continue; } else { // Source of ukm::SourceIdObj::Type::DEFAULT type will not be kept // after entries are logged. MarkSourceForDeletion(kv.first); } } Source* proto_source = report->add_sources(); kv.second->PopulateProto(proto_source); serialized_source_type_counts[GetSourceIdType(kv.first)]++; } for (const auto& event_and_aggregate : recordings_.event_aggregations) { const EventAggregate& event_aggregate = event_and_aggregate.second; Aggregate* proto_aggregate = report->add_aggregates(); proto_aggregate->set_source_id(0); // Across all sources. proto_aggregate->set_event_hash(event_and_aggregate.first); proto_aggregate->set_total_count(event_aggregate.total_count); proto_aggregate->set_dropped_due_to_limits( event_aggregate.dropped_due_to_limits); proto_aggregate->set_dropped_due_to_sampling( event_aggregate.dropped_due_to_sampling); proto_aggregate->set_dropped_due_to_whitelist( event_aggregate.dropped_due_to_whitelist); proto_aggregate->set_dropped_due_to_filter( event_aggregate.dropped_due_to_filter); for (const auto& metric_and_aggregate : event_aggregate.metrics) { const MetricAggregate& aggregate = metric_and_aggregate.second; Aggregate::Metric* proto_metric = proto_aggregate->add_metrics(); proto_metric->set_metric_hash(metric_and_aggregate.first); proto_metric->set_value_sum(aggregate.value_sum); proto_metric->set_value_square_sum(aggregate.value_square_sum); if (aggregate.total_count != event_aggregate.total_count) { proto_metric->set_total_count(aggregate.total_count); } if (aggregate.dropped_due_to_limits != event_aggregate.dropped_due_to_limits) { proto_metric->set_dropped_due_to_limits( aggregate.dropped_due_to_limits); } if (aggregate.dropped_due_to_sampling != event_aggregate.dropped_due_to_sampling) { proto_metric->set_dropped_due_to_sampling( aggregate.dropped_due_to_sampling); } if (aggregate.dropped_due_to_whitelist != event_aggregate.dropped_due_to_whitelist) { proto_metric->set_dropped_due_to_whitelist( aggregate.dropped_due_to_whitelist); } if (aggregate.dropped_due_to_filter != event_aggregate.dropped_due_to_filter) { proto_metric->set_dropped_due_to_filter( aggregate.dropped_due_to_filter); } } } int num_serialized_sources = 0; for (const auto& entry : serialized_source_type_counts) { num_serialized_sources += entry.second; } UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.SerializedCount2", num_serialized_sources); UMA_HISTOGRAM_COUNTS_100000("UKM.Entries.SerializedCount2", recordings_.entries.size()); UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.UnsentSourcesCount", num_sources_unsent); UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.UnmatchedSourcesCount", num_sources_unmatched); UMA_HISTOGRAM_COUNTS_1000( "UKM.Sources.SerializedCount2.Default", serialized_source_type_counts[SourceIdType::DEFAULT]); UMA_HISTOGRAM_COUNTS_1000( "UKM.Sources.SerializedCount2.Navigation", serialized_source_type_counts[SourceIdType::NAVIGATION_ID]); UMA_HISTOGRAM_COUNTS_1000( "UKM.Sources.SerializedCount2.App", serialized_source_type_counts[SourceIdType::APP_ID]); // We record a UMA metric specifically for the number of serialized events // with the FCP metric. This is for data quality verification. const uint64_t pageload_hash = base::HashMetricName(ukm::builders::PageLoad::kEntryName); const uint64_t fcp_hash = base::HashMetricName( ukm::builders::PageLoad:: kPaintTiming_NavigationToFirstContentfulPaintName); int num_recorded_fcp = 0; for (const auto& entry : recordings_.entries) { if (entry->event_hash == pageload_hash) { if (entry->metrics.find(fcp_hash) != entry->metrics.end()) { num_recorded_fcp++; } } } UMA_HISTOGRAM_COUNTS_100000("UKM.Entries.SerializedCountFCP", num_recorded_fcp); // For each matching id in obsolete_source_ids, remove the Source from // recordings_.sources. The remaining sources form the deferred sources for // the next report. for (const SourceId& source_id : recordings_.obsolete_source_ids) { recordings_.sources.erase(source_id); } recordings_.obsolete_source_ids.clear(); // Populate SourceCounts field on the report then clear the recordings. Report::SourceCounts* source_counts_proto = report->mutable_source_counts(); source_counts_proto->set_observed(recordings_.source_counts.observed); source_counts_proto->set_navigation_sources( recordings_.source_counts.navigation_sources); source_counts_proto->set_unmatched_sources(num_sources_unmatched); source_counts_proto->set_carryover_sources( recordings_.source_counts.carryover_sources); recordings_.source_counts.Reset(); recordings_.entries.clear(); recordings_.event_aggregations.clear(); report->set_is_continuous(recording_is_continuous_); recording_is_continuous_ = true; // Defer at most GetMaxKeptSources() sources to the next report, // prioritizing most recently created ones. int pruned_sources_age = PruneOldSources(max_kept_sources_); // Record how old the newest truncated source is. source_counts_proto->set_pruned_sources_age_seconds(pruned_sources_age); // Set deferred sources count after pruning. source_counts_proto->set_deferred_sources(recordings_.sources.size()); // Same value as the deferred source count, for setting the carryover count in // the next reporting cycle. recordings_.source_counts.carryover_sources = recordings_.sources.size(); // We already matched these deferred sources against the URL whitelist. // Re-whitelist them for the next report. for (const auto& kv : recordings_.sources) { recordings_.carryover_urls_whitelist.insert(kv.second->url().spec()); } UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.KeptSourcesCount", recordings_.sources.size()); // Record number of sources after pruning that were carried over due to not // having any events in this reporting cycle. int num_sources_entryless = 0; for (const auto& kv : recordings_.sources) { if (!base::Contains(source_ids_seen, kv.first)) { num_sources_entryless++; } } source_counts_proto->set_entryless_sources(num_sources_entryless); // Notify observers that a report was generated. if (entry_filter_) { entry_filter_->OnStoreRecordingsInReport(); } } bool UkmRecorderImpl::ShouldRestrictToWhitelistedSourceIds() const { return base::GetFieldTrialParamByFeatureAsBool( kUkmFeature, "RestrictToWhitelistedSourceIds", false); } bool UkmRecorderImpl::ShouldRestrictToWhitelistedEntries() const { return true; } bool UkmRecorderImpl::ApplyEntryFilter(mojom::UkmEntry* entry) { base::flat_set dropped_metric_hashes; if (!entry_filter_) return true; bool keep_entry = entry_filter_->FilterEntry(entry, &dropped_metric_hashes); for (auto metric : dropped_metric_hashes) { recordings_.event_aggregations[entry->event_hash] .metrics[metric] .dropped_due_to_filter++; } if (!keep_entry) { recordings_.event_aggregations[entry->event_hash].dropped_due_to_filter++; return false; } return true; } int UkmRecorderImpl::PruneOldSources(size_t max_kept_sources) { if (recordings_.sources.size() <= max_kept_sources) return 0; std::vector> timestamp_source_id_pairs; for (const auto& kv : recordings_.sources) { timestamp_source_id_pairs.push_back( std::make_pair(kv.second->creation_time(), kv.first)); } // Partially sort so that the last |max_kept_sources| elements are the // newest. std::nth_element(timestamp_source_id_pairs.begin(), timestamp_source_id_pairs.end() - max_kept_sources, timestamp_source_id_pairs.end()); for (auto kv = timestamp_source_id_pairs.begin(); kv != timestamp_source_id_pairs.end() - max_kept_sources; ++kv) { recordings_.sources.erase(kv->second); } base::TimeDelta pruned_sources_age = base::TimeTicks::Now() - (timestamp_source_id_pairs.end() - (max_kept_sources + 1))->first; return pruned_sources_age.InSeconds(); } void UkmRecorderImpl::UpdateSourceURL(SourceId source_id, const GURL& unsanitized_url) { DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); DCHECK(GetSourceIdType(source_id) != SourceIdType::NO_URL_ID); if (base::Contains(recordings_.sources, source_id)) return; const GURL sanitized_url = SanitizeURL(unsanitized_url); if (!ShouldRecordUrl(source_id, sanitized_url)) return; RecordSource(std::make_unique(source_id, sanitized_url)); } void UkmRecorderImpl::UpdateAppURL(SourceId source_id, const GURL& url, const AppType app_type) { if (app_type != AppType::kPWA && !extensions_enabled_) { RecordDroppedSource(DroppedDataReason::EXTENSION_URLS_DISABLED); return; } UpdateSourceURL(source_id, url); } void UkmRecorderImpl::RecordNavigation( SourceId source_id, const UkmSource::NavigationData& unsanitized_navigation_data) { DCHECK(GetSourceIdType(source_id) == SourceIdType::NAVIGATION_ID); DCHECK(!base::Contains(recordings_.sources, source_id)); // TODO(csharrison): Consider changing this behavior so the Source isn't even // recorded at all if the final URL in |unsanitized_navigation_data| should // not be recorded. std::vector urls; for (const GURL& url : unsanitized_navigation_data.urls) { const GURL sanitized_url = SanitizeURL(url); if (ShouldRecordUrl(source_id, sanitized_url)) urls.push_back(std::move(sanitized_url)); } // None of the URLs passed the ShouldRecordUrl check, so do not create a new // Source for them. if (urls.empty()) return; UkmSource::NavigationData sanitized_navigation_data = unsanitized_navigation_data.CopyWithSanitizedUrls(urls); RecordSource( std::make_unique(source_id, sanitized_navigation_data)); } bool UkmRecorderImpl::ShouldRecordUrl(SourceId source_id, const GURL& sanitized_url) const { if (!recording_enabled_) { RecordDroppedSource(DroppedDataReason::RECORDING_DISABLED); return false; } if (recordings_.sources.size() >= max_sources_) { RecordDroppedSource(DroppedDataReason::MAX_HIT); return false; } if (ShouldRestrictToWhitelistedSourceIds() && !IsWhitelistedSourceId(source_id)) { RecordDroppedSource(DroppedDataReason::NOT_WHITELISTED); return false; } if (sanitized_url.is_empty()) { RecordDroppedSource(DroppedDataReason::EMPTY_URL); return false; } if (!HasSupportedScheme(sanitized_url)) { RecordDroppedSource(DroppedDataReason::UNSUPPORTED_URL_SCHEME); DVLOG(2) << "Dropped Unsupported UKM URL:" << source_id << ":" << sanitized_url.spec(); return false; } // Extension URLs need to be specifically enabled and the extension synced. if (sanitized_url.SchemeIs(kExtensionScheme)) { DCHECK_EQ(sanitized_url.GetWithEmptyPath(), sanitized_url); if (!extensions_enabled_) { RecordDroppedSource(DroppedDataReason::EXTENSION_URLS_DISABLED); return false; } if (!is_webstore_extension_callback_ || !is_webstore_extension_callback_.Run(sanitized_url.host_piece())) { RecordDroppedSource(DroppedDataReason::EXTENSION_NOT_SYNCED); return false; } } return true; } void UkmRecorderImpl::RecordSource(std::unique_ptr source) { SourceId source_id = source->id(); if (GetSourceIdType(source_id) == SourceIdType::NAVIGATION_ID) recordings_.source_counts.navigation_sources++; recordings_.source_counts.observed++; recordings_.sources.emplace(source_id, std::move(source)); } void UkmRecorderImpl::AddEntry(mojom::UkmEntryPtr entry) { DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); DCHECK(!HasUnknownMetrics(decode_map_, *entry)); if (!recording_enabled_) { RecordDroppedEntry(entry->event_hash, DroppedDataReason::RECORDING_DISABLED); return; } if (!ApplyEntryFilter(entry.get())) { RecordDroppedEntry(entry->event_hash, DroppedDataReason::REJECTED_BY_FILTER); return; } EventAggregate& event_aggregate = recordings_.event_aggregations[entry->event_hash]; event_aggregate.total_count++; for (const auto& metric : entry->metrics) { MetricAggregate& aggregate = event_aggregate.metrics[metric.first]; double value = metric.second; aggregate.total_count++; aggregate.value_sum += value; aggregate.value_square_sum += value * value; } if (ShouldRestrictToWhitelistedEntries() && !base::Contains(whitelisted_entry_hashes_, entry->event_hash)) { RecordDroppedEntry(entry->event_hash, DroppedDataReason::NOT_WHITELISTED); event_aggregate.dropped_due_to_whitelist++; for (auto& metric : entry->metrics) event_aggregate.metrics[metric.first].dropped_due_to_whitelist++; return; } if (IsSamplingEnabled()) { if (default_sampling_rate_ < 0) { LoadExperimentSamplingInfo(); } bool sampled_in = IsSampledIn(entry->source_id, entry->event_hash); if (!sampled_in) { RecordDroppedEntry(entry->event_hash, DroppedDataReason::SAMPLED_OUT); event_aggregate.dropped_due_to_sampling++; for (auto& metric : entry->metrics) event_aggregate.metrics[metric.first].dropped_due_to_sampling++; return; } } if (recordings_.entries.size() >= max_entries_) { RecordDroppedEntry(entry->event_hash, DroppedDataReason::MAX_HIT); event_aggregate.dropped_due_to_limits++; for (auto& metric : entry->metrics) event_aggregate.metrics[metric.first].dropped_due_to_limits++; return; } // Log a corresponding entry to UMA so we get a per-metric breakdown of UKM // entry counts. LogEventHashAsUmaHistogram("UKM.Entries.Recorded.ByEntryHash", entry->event_hash); recordings_.entries.push_back(std::move(entry)); } void UkmRecorderImpl::LoadExperimentSamplingInfo() { // This should be called only if a sampling rate hasn't been loaded. DCHECK_LT(default_sampling_rate_, 0); // Default rate must be >= 0 to indicate that load is complete. default_sampling_rate_ = 1; // If we don't have the feature, no parameters to load. if (!base::FeatureList::IsEnabled(kUkmSamplingRateFeature)) { return; } // Check the parameters for sampling controls. std::map params; if (base::GetFieldTrialParamsByFeature(kUkmSamplingRateFeature, ¶ms)) { LoadExperimentSamplingParams(params); } } void UkmRecorderImpl::LoadExperimentSamplingParams( const std::map& params) { for (const auto& kv : params) { const std::string& key = kv.first; if (key.length() == 0) continue; // Keys starting with an underscore are global configuration. if (key.at(0) == '_') { if (key == "_default_sampling") { int sampling; // We only load non-negative global sampling rates. if (base::StringToInt(kv.second, &sampling) && sampling >= 0) default_sampling_rate_ = sampling; } continue; } // Anything else is an event name. int sampling; auto hash = base::HashMetricName(key); if (base::StringToInt(kv.second, &sampling)) { // If the parameter is a number then that's the sampling rate. if (sampling >= 0) event_sampling_rates_[hash] = sampling; } else { // If the parameter is a string then it's the name of another metric // to which it should be slaved. This allows different metrics to be // sampled in or out together. event_sampling_master_[hash] = base::HashMetricName(kv.second); } } } bool UkmRecorderImpl::IsSampledIn(int64_t source_id, uint64_t event_id) { // Determine the sampling rate. It's one of: // - the default // - an explicit sampling rate // - a group sampling rate int sampling_rate = default_sampling_rate_; uint64_t sampling_hash = event_id; auto master_found = event_sampling_master_.find(sampling_hash); if (master_found != event_sampling_master_.end()) { sampling_hash = master_found->second; } auto rate_found = event_sampling_rates_.find(sampling_hash); if (rate_found != event_sampling_rates_.end()) { sampling_rate = rate_found->second; } return IsSampledIn(source_id, sampling_hash, sampling_rate); } bool UkmRecorderImpl::IsSampledIn(int64_t source_id, uint64_t event_id, int sampling_rate) { // A sampling rate of 0 is "never"; everything else is 1-in-N but calculated // deterministically based on a seed, the source-id, and the event-id. Skip // the calculation, though, if N==1 because it will always be true. A negative // rate means "unset"; treat it like "never". if (sampling_rate <= 0) return false; if (sampling_rate == 1) return true; // Mutate the "sampling seed" number in a predictable manner based on the // source and event IDs. This makes the result of this function be always // the same for the same input parameters (since the seed is fixed during // construction of this object) which is important for proper sampling // behavior. CRC32 is fast and statistically random enough for these // purposes. uint32_t sampled_num = sampling_seed_; sampled_num = base::Crc32(sampled_num, &source_id, sizeof(source_id)); sampled_num = base::Crc32(sampled_num, &event_id, sizeof(event_id)); return sampled_num % sampling_rate == 0; } void UkmRecorderImpl::StoreWhitelistedEntries() { DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); const auto entries = base::SplitString(GetWhitelistEntries(), ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); for (const auto& entry_string : entries) whitelisted_entry_hashes_.insert(base::HashMetricName(entry_string)); decode_map_ = builders::CreateDecodeMap(); } } // namespace ukm