summaryrefslogtreecommitdiff
path: root/chromium/components/ukm/ukm_recorder_impl.cc
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/components/ukm/ukm_recorder_impl.cc')
-rw-r--r--chromium/components/ukm/ukm_recorder_impl.cc341
1 files changed, 299 insertions, 42 deletions
diff --git a/chromium/components/ukm/ukm_recorder_impl.cc b/chromium/components/ukm/ukm_recorder_impl.cc
index c24b930c61b..70f51137e3d 100644
--- a/chromium/components/ukm/ukm_recorder_impl.cc
+++ b/chromium/components/ukm/ukm_recorder_impl.cc
@@ -22,6 +22,7 @@
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "components/ukm/scheme_constants.h"
+#include "components/ukm/ukm_recorder_observer.h"
#include "components/variations/variations_associated_data.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/cpp/ukm_decode.h"
@@ -49,6 +50,11 @@ bool IsWhitelistedSourceId(SourceId source_id) {
type == SourceIdType::NO_URL_ID;
}
+bool IsAppIdType(SourceId source_id) {
+ SourceIdType type = GetSourceIdType(source_id);
+ return type == SourceIdType::APP_ID;
+}
+
// Returns whether |url| has one of the schemes supported for logging to UKM.
// URLs with other schemes will not be logged.
bool HasSupportedScheme(const GURL& url) {
@@ -89,6 +95,12 @@ void RecordDroppedSource(DroppedDataReason reason) {
static_cast<int>(DroppedDataReason::NUM_DROPPED_DATA_REASONS));
}
+void RecordDroppedSource(bool already_recorded_another_reason,
+ DroppedDataReason reason) {
+ if (!already_recorded_another_reason)
+ RecordDroppedSource(reason);
+}
+
void RecordDroppedEntry(uint64_t event_hash, DroppedDataReason reason) {
LogEventHashAsUmaHistogram("UKM.Entries.Dropped.ByEntryHash", event_hash);
@@ -256,6 +268,8 @@ void UkmRecorderImpl::Purge() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
recordings_.Reset();
recording_is_continuous_ = false;
+
+ NotifyAllObservers(&UkmRecorderObserver::OnPurge);
}
void UkmRecorderImpl::PurgeRecordingsWithUrlScheme(
@@ -273,6 +287,9 @@ void UkmRecorderImpl::PurgeRecordingsWithUrlScheme(
PurgeSourcesAndEventsBySourceIds(relevant_source_ids);
recording_is_continuous_ = false;
+
+ NotifyAllObservers(&UkmRecorderObserver::OnPurgeRecordingsWithUrlScheme,
+ url_scheme);
}
void UkmRecorderImpl::PurgeRecordingsWithSourceIdType(
@@ -322,6 +339,32 @@ void UkmRecorderImpl::SetEntryFilter(
entry_filter_ = std::move(entry_filter);
}
+void UkmRecorderImpl::AddUkmRecorderObserver(
+ const base::flat_set<uint64_t>& event_hashes,
+ UkmRecorderObserver* observer) {
+ DCHECK(observer);
+ base::AutoLock auto_lock(lock_);
+ scoped_refptr<UkmRecorderObserverList> observers;
+ if (observers_.find(event_hashes) == observers_.end()) {
+ observers_.insert(
+ {event_hashes, base::MakeRefCounted<UkmRecorderObserverList>()});
+ }
+
+ observers_[event_hashes]->AddObserver(observer);
+}
+
+void UkmRecorderImpl::RemoveUkmRecorderObserver(UkmRecorderObserver* observer) {
+ base::AutoLock auto_lock(lock_);
+ for (auto it = observers_.begin(); it != observers_.end();) {
+ if (it->second->RemoveObserver(observer) ==
+ UkmRecorderObserverList::RemoveObserverResult::kWasOrBecameEmpty) {
+ it = observers_.erase(it);
+ } else {
+ ++it;
+ }
+ }
+}
+
// TODO(rkaplow): This should be refactored.
void UkmRecorderImpl::StoreRecordingsInReport(Report* report) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
@@ -333,6 +376,7 @@ void UkmRecorderImpl::StoreRecordingsInReport(Report* report) {
StoreEntryProto(*entry, proto_entry);
source_ids_seen.insert(entry->source_id);
}
+
// Number of sources excluded from this report because no entries referred to
// them.
const int num_sources_unsent =
@@ -346,6 +390,7 @@ void UkmRecorderImpl::StoreRecordingsInReport(Report* report) {
// Number of sources discarded due to not matching a navigation URL.
int num_sources_unmatched = 0;
+
std::unordered_map<SourceIdType, int> serialized_source_type_counts;
for (const auto& kv : recordings_.sources) {
@@ -375,11 +420,10 @@ void UkmRecorderImpl::StoreRecordingsInReport(Report* report) {
// Omit entryless sources from the report.
if (!base::Contains(source_ids_seen, kv.first)) {
continue;
- } else {
- // Source of ukm::SourceIdObj::Type::DEFAULT type will not be kept
- // after entries are logged.
- MarkSourceForDeletion(kv.first);
}
+
+ // Non-whitelisted Source types will not be kept after entries are logged.
+ MarkSourceForDeletion(kv.first);
}
// Minimal validations before serializing into a proto message.
// See crbug/1274876.
@@ -437,8 +481,8 @@ void UkmRecorderImpl::StoreRecordingsInReport(Report* report) {
}
}
int num_serialized_sources = 0;
- for (const auto& entry : serialized_source_type_counts) {
- num_serialized_sources += entry.second;
+ for (const auto& source_type_and_count : serialized_source_type_counts) {
+ num_serialized_sources += source_type_and_count.second;
}
UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.SerializedCount2",
@@ -502,16 +546,151 @@ void UkmRecorderImpl::StoreRecordingsInReport(Report* report) {
report->set_is_continuous(recording_is_continuous_);
recording_is_continuous_ = true;
- // Defer at most GetMaxKeptSources() sources to the next report,
- // prioritizing most recently created ones.
- int pruned_sources_age = PruneOldSources(max_kept_sources_);
+ // Modify the set source_ids_seen by removing sources that aren't in
+ // recordings_. We do this here as there is a few places for
+ // recordings_.sources to be modified. The resulting set will be currently
+ // existing sources that were seen in this report.
+ auto it = source_ids_seen.begin();
+ while (it != source_ids_seen.end()) {
+ if (!base::Contains(recordings_.sources, *it)) {
+ it = source_ids_seen.erase(it);
+ } else {
+ it++;
+ }
+ }
+
+ // Build the set of sources that exist in recordings_.sources that were not
+ // seen in this report.
+ std::set<SourceId> source_ids_unseen;
+ for (const auto& kv : recordings_.sources) {
+ if (!base::Contains(source_ids_seen, kv.first)) {
+ source_ids_unseen.insert(kv.first);
+ }
+ }
+
+ // Special case APP_IDs. Ideally this is not going to exist for too long, as
+ // it would be preferable to have a more general purpose solution.
+ std::set<SourceId> source_ids_app_id;
+
+ // Only done if we are in the experiment that will leave APP_ID metrics for
+ // last when pruning. This block extracts out all source_ids from the
+ // seen/unseen lists and stores them in |source_ids_app_id|.
+ if (base::GetFieldTrialParamByFeatureAsBool(kUkmFeature, "PruneAppIdLast",
+ false)) {
+ it = source_ids_seen.begin();
+ while (it != source_ids_seen.end()) {
+ if (IsAppIdType(*it)) {
+ source_ids_app_id.insert(*it);
+ it = source_ids_seen.erase(it);
+ } else {
+ it++;
+ }
+ }
+
+ it = source_ids_unseen.begin();
+ while (it != source_ids_unseen.end()) {
+ if (IsAppIdType(*it)) {
+ source_ids_app_id.insert(*it);
+ it = source_ids_unseen.erase(it);
+ } else {
+ it++;
+ }
+ }
+ }
+
+ int pruned_sources_age_sec = 0;
+ int num_sources = recordings_.sources.size();
+ // Setup an experiment to test what will occur if we prune unseen sources
+ // first.
+ if (base::GetFieldTrialParamByFeatureAsBool(
+ kUkmFeature, "PruneUnseenSourcesFirst", false)) {
+ int pruned_sources_age_from_unseen_sec =
+ PruneOldSources(max_kept_sources_, source_ids_unseen);
+
+ UMA_HISTOGRAM_COUNTS_10000("UKM.PrunedSources.NumUnseen",
+ num_sources - recordings_.sources.size());
+ num_sources = recordings_.sources.size();
+
+ // Prune again from seen sources. Note that if we've already pruned enough
+ // from the unseen sources, this will be a noop.
+ int pruned_sources_age_from_seen_sec =
+ PruneOldSources(max_kept_sources_, source_ids_seen);
+
+ UMA_HISTOGRAM_COUNTS_10000("UKM.PrunedSources.NumSeen",
+ num_sources - recordings_.sources.size());
+ num_sources = recordings_.sources.size();
+
+ int pruned_sources_age_from_app_id_sec = 0;
+
+ // Technically this should be fine without the feature, since the group
+ // will be empty, but might as well add the feature check.
+ // Still prune the APP_ID entries. We don't want it to be unbounded, but
+ // providing a higher default here in case.
+ if (base::GetFieldTrialParamByFeatureAsBool(kUkmFeature, "PruneAppIdLast",
+ false)) {
+ pruned_sources_age_from_app_id_sec =
+ PruneOldSources(500, source_ids_app_id);
+
+ UMA_HISTOGRAM_COUNTS_10000("UKM.PrunedSources.NumAppId",
+ num_sources - recordings_.sources.size());
+ }
+
+ // We're looking for the newest age, which will be the largest between the
+ // two sets we pruned from.
+ pruned_sources_age_sec = std::max({pruned_sources_age_from_unseen_sec,
+ pruned_sources_age_from_seen_sec,
+ pruned_sources_age_from_app_id_sec});
+
+ } else {
+ // In this case, we prune all sources without caring if they were seen or
+ // not. Make a set of all existing sources so we can use the same
+ // PruneOldSources method.
+ std::set<SourceId> all_sources;
+ for (const auto& kv : recordings_.sources) {
+ all_sources.insert(kv.first);
+ }
+ if (base::GetFieldTrialParamByFeatureAsBool(kUkmFeature, "PruneAppIdLast",
+ false)) {
+ std::set<SourceId> all_sources_without_app_id;
+
+ // This will put into |all_sources_without_app_id| the set of
+ // |all_sources| - |source_ids_app_id|.
+ std::set_difference(all_sources.begin(), all_sources.end(),
+ source_ids_app_id.begin(), source_ids_app_id.end(),
+ std::inserter(all_sources_without_app_id,
+ all_sources_without_app_id.end()));
+
+ // Now, prune the non-APP_ID, then the APP_ID.
+ int pruned_sources_age_sec_non_app_id =
+ PruneOldSources(max_kept_sources_, all_sources_without_app_id);
+
+ UMA_HISTOGRAM_COUNTS_10000("UKM.PrunedSources.AppExpNumNonAppId",
+ num_sources - recordings_.sources.size());
+ num_sources = recordings_.sources.size();
+
+ int pruned_sources_age_sec_app_id =
+ PruneOldSources(500, source_ids_app_id);
+
+ UMA_HISTOGRAM_COUNTS_10000("UKM.PrunedSources.AppExpNumAppId",
+ num_sources - recordings_.sources.size());
+
+ pruned_sources_age_sec = std::max(pruned_sources_age_sec_non_app_id,
+ pruned_sources_age_sec_app_id);
+
+ } else {
+ pruned_sources_age_sec = PruneOldSources(max_kept_sources_, all_sources);
+ UMA_HISTOGRAM_COUNTS_10000("UKM.PrunedSources.NoExp",
+ num_sources - recordings_.sources.size());
+ }
+ }
+
// Record how old the newest truncated source is.
- source_counts_proto->set_pruned_sources_age_seconds(pruned_sources_age);
+ source_counts_proto->set_pruned_sources_age_seconds(pruned_sources_age_sec);
// Set deferred sources count after pruning.
source_counts_proto->set_deferred_sources(recordings_.sources.size());
- // Same value as the deferred source count, for setting the carryover count in
- // the next reporting cycle.
+ // Same value as the deferred source count, for setting the carryover count
+ // in the next reporting cycle.
recordings_.source_counts.carryover_sources = recordings_.sources.size();
// We already matched these deferred sources against the URL whitelist.
@@ -565,29 +744,62 @@ bool UkmRecorderImpl::ApplyEntryFilter(mojom::UkmEntry* entry) {
return true;
}
-int UkmRecorderImpl::PruneOldSources(size_t max_kept_sources) {
- if (recordings_.sources.size() <= max_kept_sources)
+int UkmRecorderImpl::PruneOldSources(size_t max_kept_sources,
+ const std::set<SourceId>& pruning_set) {
+ long num_prune_required = recordings_.sources.size() - max_kept_sources;
+ // In either case here, nothing to be done.
+ if (num_prune_required <= 0 || pruning_set.size() == 0)
return 0;
+ // We can prune everything, so let's do that directly.
+ if (static_cast<unsigned long>(num_prune_required) >= pruning_set.size()) {
+ base::TimeTicks pruned_sources_age = base::TimeTicks();
+ for (const auto& source_id : pruning_set) {
+ auto creation_time = recordings_.sources[source_id]->creation_time();
+ if (creation_time > pruned_sources_age)
+ pruned_sources_age = creation_time;
+
+ recordings_.sources.erase(source_id);
+ }
+ base::TimeDelta age_delta = base::TimeTicks::Now() - pruned_sources_age;
+ // Technically the age we return here isn't quite right, this is the age of
+ // the newest element of the pruned set, while we actually want the age of
+ // the last one kept. However it's very unlikely to make a difference in
+ // practice as if all are pruned here, it is very likely we'll need to prune
+ // from the seen set next. Since it would be logically quite a bit more
+ // complex to get this exactly right, it's ok for this to be very slightly
+ // off in an edge case just to keep complexity down.
+ return age_delta.InSeconds();
+ }
+
+ // In this case we cannot prune everything, so we will select only the oldest
+ // sources to prune.
+
+ // Build a list of timestamp->source pairs for all source we consider for
+ // pruning.
std::vector<std::pair<base::TimeTicks, SourceId>> timestamp_source_id_pairs;
- for (const auto& kv : recordings_.sources) {
- timestamp_source_id_pairs.push_back(
- std::make_pair(kv.second->creation_time(), kv.first));
+ for (const auto& source_id : pruning_set) {
+ auto creation_time = recordings_.sources[source_id]->creation_time();
+ timestamp_source_id_pairs.emplace_back(
+ std::make_pair(creation_time, source_id));
}
- // Partially sort so that the last |max_kept_sources| elements are the
+
+ // Partially sort so that the last |num_prune_required| elements are the
// newest.
std::nth_element(timestamp_source_id_pairs.begin(),
- timestamp_source_id_pairs.end() - max_kept_sources,
+ timestamp_source_id_pairs.end() - num_prune_required,
timestamp_source_id_pairs.end());
- for (auto kv = timestamp_source_id_pairs.begin();
- kv != timestamp_source_id_pairs.end() - max_kept_sources; ++kv) {
- recordings_.sources.erase(kv->second);
+ // Actually prune |num_prune_required| sources.
+ for (int i = 0; i < num_prune_required; i++) {
+ auto source_id = timestamp_source_id_pairs[i].second;
+ recordings_.sources.erase(source_id);
}
base::TimeDelta pruned_sources_age =
base::TimeTicks::Now() -
- (timestamp_source_id_pairs.end() - (max_kept_sources + 1))->first;
+ (timestamp_source_id_pairs.end() - (num_prune_required + 1))->first;
+
return pruned_sources_age.InSeconds();
}
@@ -600,9 +812,10 @@ void UkmRecorderImpl::UpdateSourceURL(SourceId source_id,
return;
const GURL sanitized_url = SanitizeURL(unsanitized_url);
- if (!ShouldRecordUrl(source_id, sanitized_url))
+ if (ShouldRecordUrl(source_id, sanitized_url) ==
+ ShouldRecordUrlResult::kDropped) {
return;
-
+ }
RecordSource(std::make_unique<UkmSource>(source_id, sanitized_url));
}
@@ -627,8 +840,10 @@ void UkmRecorderImpl::RecordNavigation(
std::vector<GURL> urls;
for (const GURL& url : unsanitized_navigation_data.urls) {
const GURL sanitized_url = SanitizeURL(url);
- if (ShouldRecordUrl(source_id, sanitized_url))
+ if (ShouldRecordUrl(source_id, sanitized_url) !=
+ ShouldRecordUrlResult::kDropped) {
urls.push_back(std::move(sanitized_url));
+ }
}
// None of the URLs passed the ShouldRecordUrl check, so do not create a new
@@ -642,54 +857,73 @@ void UkmRecorderImpl::RecordNavigation(
std::make_unique<UkmSource>(source_id, sanitized_navigation_data));
}
-bool UkmRecorderImpl::ShouldRecordUrl(SourceId source_id,
- const GURL& sanitized_url) const {
+UkmRecorderImpl::ShouldRecordUrlResult UkmRecorderImpl::ShouldRecordUrl(
+ SourceId source_id,
+ const GURL& sanitized_url) const {
+ ShouldRecordUrlResult result = ShouldRecordUrlResult::kOk;
+ bool has_recorded_reason = false;
if (!recording_enabled_) {
RecordDroppedSource(DroppedDataReason::RECORDING_DISABLED);
- return false;
+ // Don't return the result yet. Check if the we are allowed to notify
+ // observers, as they may rely on the not uploaded metrics to determine
+ // how some features should work.
+ result = ShouldRecordUrlResult::kObserverOnly;
+ has_recorded_reason = true;
}
if (recordings_.sources.size() >= max_sources_) {
- RecordDroppedSource(DroppedDataReason::MAX_HIT);
- return false;
+ RecordDroppedSource(has_recorded_reason, DroppedDataReason::MAX_HIT);
+ return ShouldRecordUrlResult::kDropped;
}
if (ShouldRestrictToWhitelistedSourceIds() &&
!IsWhitelistedSourceId(source_id)) {
- RecordDroppedSource(DroppedDataReason::NOT_WHITELISTED);
- return false;
+ RecordDroppedSource(has_recorded_reason,
+ DroppedDataReason::NOT_WHITELISTED);
+ return ShouldRecordUrlResult::kDropped;
}
if (sanitized_url.is_empty()) {
- RecordDroppedSource(DroppedDataReason::EMPTY_URL);
- return false;
+ RecordDroppedSource(has_recorded_reason, DroppedDataReason::EMPTY_URL);
+ return ShouldRecordUrlResult::kDropped;
}
if (!HasSupportedScheme(sanitized_url)) {
- RecordDroppedSource(DroppedDataReason::UNSUPPORTED_URL_SCHEME);
+ RecordDroppedSource(has_recorded_reason,
+ DroppedDataReason::UNSUPPORTED_URL_SCHEME);
DVLOG(2) << "Dropped Unsupported UKM URL:" << source_id << ":"
<< sanitized_url.spec();
- return false;
+ return ShouldRecordUrlResult::kDropped;
}
// Extension URLs need to be specifically enabled and the extension synced.
if (sanitized_url.SchemeIs(kExtensionScheme)) {
DCHECK_EQ(sanitized_url.GetWithEmptyPath(), sanitized_url);
if (!extensions_enabled_) {
- RecordDroppedSource(DroppedDataReason::EXTENSION_URLS_DISABLED);
- return false;
+ RecordDroppedSource(has_recorded_reason,
+ DroppedDataReason::EXTENSION_URLS_DISABLED);
+ return ShouldRecordUrlResult::kDropped;
}
if (!is_webstore_extension_callback_ ||
!is_webstore_extension_callback_.Run(sanitized_url.host_piece())) {
- RecordDroppedSource(DroppedDataReason::EXTENSION_NOT_SYNCED);
- return false;
+ RecordDroppedSource(has_recorded_reason,
+ DroppedDataReason::EXTENSION_NOT_SYNCED);
+ return ShouldRecordUrlResult::kDropped;
}
}
- return true;
+ return result;
}
void UkmRecorderImpl::RecordSource(std::unique_ptr<UkmSource> source) {
SourceId source_id = source->id();
+ // If UKM recording is disabled due to |recording_enabled_|, still notify
+ // observers as they might be interested in it.
+ NotifyAllObservers(&UkmRecorderObserver::OnUpdateSourceURL, source_id,
+ source->urls());
+ if (!recording_enabled_) {
+ return;
+ }
+
if (GetSourceIdType(source_id) == SourceIdType::NAVIGATION_ID)
recordings_.source_counts.navigation_sources++;
recordings_.source_counts.observed++;
@@ -700,6 +934,8 @@ void UkmRecorderImpl::AddEntry(mojom::UkmEntryPtr entry) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(!HasUnknownMetrics(decode_map_, *entry));
+ NotifyObserversWithNewEntry(*entry);
+
if (!recording_enabled_) {
RecordDroppedEntry(entry->event_hash,
DroppedDataReason::RECORDING_DISABLED);
@@ -864,4 +1100,25 @@ void UkmRecorderImpl::InitDecodeMap() {
decode_map_ = builders::CreateDecodeMap();
}
+void UkmRecorderImpl::NotifyObserversWithNewEntry(
+ const mojom::UkmEntry& entry) {
+ base::AutoLock auto_lock(lock_);
+
+ for (const auto& observer : observers_) {
+ if (observer.first.contains(entry.event_hash)) {
+ mojom::UkmEntryPtr cloned = entry.Clone();
+ observer.second->Notify(FROM_HERE, &UkmRecorderObserver::OnEntryAdded,
+ base::Passed(&cloned));
+ }
+ }
+}
+
+template <typename Method, typename... Params>
+void UkmRecorderImpl::NotifyAllObservers(Method m, Params&&... params) {
+ base::AutoLock auto_lock(lock_);
+ for (const auto& observer : observers_) {
+ observer.second->Notify(FROM_HERE, m, std::forward<Params>(params)...);
+ }
+}
+
} // namespace ukm