summaryrefslogtreecommitdiff
path: root/chromium/components/ukm
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/components/ukm')
-rw-r--r--chromium/components/ukm/test_ukm_recorder.cc6
-rw-r--r--chromium/components/ukm/test_ukm_recorder.h2
-rw-r--r--chromium/components/ukm/ukm_recorder_impl.cc72
-rw-r--r--chromium/components/ukm/ukm_recorder_impl.h8
-rw-r--r--chromium/components/ukm/ukm_service_unittest.cc172
-rw-r--r--chromium/components/ukm/ukm_source.cc4
-rw-r--r--chromium/components/ukm/ukm_source.h7
7 files changed, 247 insertions, 24 deletions
diff --git a/chromium/components/ukm/test_ukm_recorder.cc b/chromium/components/ukm/test_ukm_recorder.cc
index 0a3fcd16a6e..5f9e9e60bb6 100644
--- a/chromium/components/ukm/test_ukm_recorder.cc
+++ b/chromium/components/ukm/test_ukm_recorder.cc
@@ -58,6 +58,12 @@ TestUkmRecorder::TestUkmRecorder() {
TestUkmRecorder::~TestUkmRecorder() {
};
+bool TestUkmRecorder::ShouldRestrictToWhitelistedSourceIds() const {
+ // In tests, we want to record all source ids (not just hose that are
+ // whitelisted).
+ return false;
+}
+
std::set<ukm::SourceId> TestUkmRecorder::GetSourceIds() const {
std::set<ukm::SourceId> result;
for (const auto& kv : sources()) {
diff --git a/chromium/components/ukm/test_ukm_recorder.h b/chromium/components/ukm/test_ukm_recorder.h
index b14f9d79cdb..5bce2e87c39 100644
--- a/chromium/components/ukm/test_ukm_recorder.h
+++ b/chromium/components/ukm/test_ukm_recorder.h
@@ -27,6 +27,8 @@ class TestUkmRecorder : public UkmRecorderImpl {
TestUkmRecorder();
~TestUkmRecorder() override;
+ bool ShouldRestrictToWhitelistedSourceIds() const override;
+
size_t sources_count() const { return sources().size(); }
// Get all SourceIds with any data associated with them.
diff --git a/chromium/components/ukm/ukm_recorder_impl.cc b/chromium/components/ukm/ukm_recorder_impl.cc
index cb670812ebf..43adec45b40 100644
--- a/chromium/components/ukm/ukm_recorder_impl.cc
+++ b/chromium/components/ukm/ukm_recorder_impl.cc
@@ -13,6 +13,7 @@
#include "components/metrics/proto/ukm/report.pb.h"
#include "components/metrics/proto/ukm/source.pb.h"
#include "components/ukm/ukm_source.h"
+#include "services/metrics/public/cpp/ukm_source_id.h"
namespace ukm {
@@ -25,6 +26,11 @@ std::string GetWhitelistEntries() {
"WhitelistEntries");
}
+bool IsWhitelistedSourceId(SourceId source_id) {
+ return (static_cast<int64_t>(source_id) &
+ static_cast<int64_t>(SourceIdType::NAVIGATION_ID)) != 0;
+}
+
// Gets the maximum number of Sources we'll keep in memory before discarding any
// new ones being added.
size_t GetMaxSources() {
@@ -33,6 +39,14 @@ size_t GetMaxSources() {
kUkmFeature, "MaxSources", kDefaultMaxSources));
}
+// Gets the maximum number of unferenced Sources kept after purging sources
+// that were added to the log.
+size_t GetMaxKeptSources() {
+ constexpr size_t kDefaultMaxKeptSources = 100;
+ return static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
+ kUkmFeature, "MaxKeptSources", kDefaultMaxKeptSources));
+}
+
// Gets the maximum number of Entries we'll keep in memory before discarding any
// new ones being added.
size_t GetMaxEntries() {
@@ -101,21 +115,61 @@ void UkmRecorderImpl::Purge() {
}
void UkmRecorderImpl::StoreRecordingsInReport(Report* report) {
- for (const auto& kv : sources_) {
+ std::set<SourceId> ids_seen;
+ for (const auto& entry : entries_) {
+ Entry* proto_entry = report->add_entries();
+ StoreEntryProto(*entry, proto_entry);
+ ids_seen.insert(entry->source_id);
+ }
+
+ std::vector<std::unique_ptr<UkmSource>> unsent_sources;
+ for (auto& kv : sources_) {
+ // If the source id is not whitelisted, don't send it unless it has
+ // associated entries. Note: If ShouldRestrictToWhitelistedSourceIds() is
+ // true, this logic will not be hit as the source would have already been
+ // filtered in UpdateSourceURL().
+ if (!IsWhitelistedSourceId(kv.first) &&
+ !base::ContainsKey(ids_seen, kv.first)) {
+ unsent_sources.push_back(std::move(kv.second));
+ continue;
+ }
Source* proto_source = report->add_sources();
kv.second->PopulateProto(proto_source);
if (!ShouldRecordInitialUrl())
proto_source->clear_initial_url();
}
- for (const auto& entry : entries_) {
- Entry* proto_entry = report->add_entries();
- StoreEntryProto(*entry, proto_entry);
- }
- UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.SerializedCount", sources_.size());
+ UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.SerializedCount",
+ sources_.size() - unsent_sources.size());
UMA_HISTOGRAM_COUNTS_1000("UKM.Entries.SerializedCount", entries_.size());
+ UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.UnsentSourcesCount",
+ unsent_sources.size());
sources_.clear();
entries_.clear();
+
+ // Keep at most |max_kept_sources|, prioritizing most-recent entries (by
+ // creation time).
+ const size_t max_kept_sources = GetMaxKeptSources();
+ if (unsent_sources.size() > max_kept_sources) {
+ std::nth_element(unsent_sources.begin(),
+ unsent_sources.begin() + max_kept_sources,
+ unsent_sources.end(),
+ [](const std::unique_ptr<ukm::UkmSource>& lhs,
+ const std::unique_ptr<ukm::UkmSource>& rhs) {
+ return lhs->creation_time() > rhs->creation_time();
+ });
+ unsent_sources.resize(max_kept_sources);
+ }
+
+ for (auto& source : unsent_sources) {
+ sources_.emplace(source->id(), std::move(source));
+ }
+ UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.KeptSourcesCount", sources_.size());
+}
+
+bool UkmRecorderImpl::ShouldRestrictToWhitelistedSourceIds() const {
+ return base::GetFieldTrialParamByFeatureAsBool(
+ kUkmFeature, "RestrictToWhitelistedSourceIds", true);
}
void UkmRecorderImpl::UpdateSourceURL(ukm::SourceId source_id,
@@ -127,6 +181,12 @@ void UkmRecorderImpl::UpdateSourceURL(ukm::SourceId source_id,
return;
}
+ if (ShouldRestrictToWhitelistedSourceIds() &&
+ !IsWhitelistedSourceId(source_id)) {
+ RecordDroppedSource(DroppedDataReason::NOT_WHITELISTED);
+ return;
+ }
+
// Update the pre-existing source if there is any. This happens when the
// initial URL is different from the committed URL for the same source, e.g.,
// when there is redirection.
diff --git a/chromium/components/ukm/ukm_recorder_impl.h b/chromium/components/ukm/ukm_recorder_impl.h
index 1a20ddab4d1..c3cdc8d254b 100644
--- a/chromium/components/ukm/ukm_recorder_impl.h
+++ b/chromium/components/ukm/ukm_recorder_impl.h
@@ -45,12 +45,14 @@ class UkmRecorderImpl : public UkmRecorder {
// Writes recordings into a report proto, and clears recordings.
void StoreRecordingsInReport(Report* report);
- const std::map<ukm::SourceId, std::unique_ptr<UkmSource>>& sources() const {
+ const std::map<SourceId, std::unique_ptr<UkmSource>>& sources() const {
return sources_;
}
const std::vector<mojom::UkmEntryPtr>& entries() const { return entries_; }
+ virtual bool ShouldRestrictToWhitelistedSourceIds() const;
+
private:
friend ::metrics::UkmBrowserTest;
friend ::ukm::debug::DebugPage;
@@ -63,8 +65,8 @@ class UkmRecorderImpl : public UkmRecorder {
bool recording_enabled_;
// Contains newly added sources and entries of UKM metrics which periodically
- // get serialized and cleared by BuildAndStoreLog().
- std::map<ukm::SourceId, std::unique_ptr<UkmSource>> sources_;
+ // get serialized and cleared by StoreRecordingsInReport().
+ std::map<SourceId, std::unique_ptr<UkmSource>> sources_;
std::vector<mojom::UkmEntryPtr> entries_;
// Whitelisted Entry hashes, only the ones in this set will be recorded.
diff --git a/chromium/components/ukm/ukm_service_unittest.cc b/chromium/components/ukm/ukm_service_unittest.cc
index 75b383ab3a9..4c5b80e401c 100644
--- a/chromium/components/ukm/ukm_service_unittest.cc
+++ b/chromium/components/ukm/ukm_service_unittest.cc
@@ -10,9 +10,13 @@
#include "base/hash.h"
#include "base/metrics/metrics_hashes.h"
+#include "base/strings/string_number_conversions.h"
+#include "base/strings/string_util.h"
#include "base/test/scoped_feature_list.h"
#include "base/test/test_simple_task_runner.h"
+#include "base/threading/platform_thread.h"
#include "base/threading/thread_task_runner_handle.h"
+#include "base/time/time.h"
#include "components/metrics/proto/ukm/report.pb.h"
#include "components/metrics/proto/ukm/source.pb.h"
#include "components/metrics/test_metrics_provider.h"
@@ -136,6 +140,14 @@ class UkmServiceTest : public testing::Test {
return report;
}
+ static SourceId GetWhitelistedSourceId(int64_t id) {
+ return ConvertToSourceId(id, SourceIdType::NAVIGATION_ID);
+ }
+
+ static SourceId GetNonWhitelistedSourceId(int64_t id) {
+ return ConvertToSourceId(id, SourceIdType::UKM);
+ }
+
protected:
TestingPrefServiceSimple prefs_;
metrics::TestMetricsServiceClient client_;
@@ -173,7 +185,7 @@ TEST_F(UkmServiceTest, PersistAndPurge) {
service.EnableRecording();
service.EnableReporting();
- ukm::SourceId id = UkmRecorder::GetNewSourceID();
+ SourceId id = GetWhitelistedSourceId(0);
recorder.UpdateSourceURL(id, GURL("https://google.com/foobar"));
// Should init, generate a log, and start an upload for source.
task_runner_->RunPendingTasks();
@@ -199,7 +211,7 @@ TEST_F(UkmServiceTest, SourceSerialization) {
service.EnableRecording();
service.EnableReporting();
- ukm::SourceId id = UkmRecorder::GetNewSourceID();
+ ukm::SourceId id = GetWhitelistedSourceId(0);
recorder.UpdateSourceURL(id, GURL("https://google.com/initial"));
recorder.UpdateSourceURL(id, GURL("https://google.com/intermediate"));
recorder.UpdateSourceURL(id, GURL("https://google.com/foobar"));
@@ -226,7 +238,7 @@ TEST_F(UkmServiceTest, EntryBuilderAndSerialization) {
service.EnableRecording();
service.EnableReporting();
- ukm::SourceId id = UkmRecorder::GetNewSourceID();
+ ukm::SourceId id = GetWhitelistedSourceId(0);
recorder.UpdateSourceURL(id, GURL("https://google.com/foobar"));
{
std::unique_ptr<UkmEntryBuilder> foo_builder =
@@ -290,7 +302,7 @@ TEST_F(UkmServiceTest, AddEntryWithEmptyMetrics) {
service.EnableRecording();
service.EnableReporting();
- ukm::SourceId id = UkmRecorder::GetNewSourceID();
+ ukm::SourceId id = GetWhitelistedSourceId(0);
recorder.UpdateSourceURL(id, GURL("https://google.com/foobar"));
{ ::ukm::builders::PageLoad(id).Record(&service); }
@@ -317,7 +329,7 @@ TEST_F(UkmServiceTest, MetricsProviderTest) {
service.EnableRecording();
service.EnableReporting();
- ukm::SourceId id = UkmRecorder::GetNewSourceID();
+ ukm::SourceId id = GetWhitelistedSourceId(0);
recorder.UpdateSourceURL(id, GURL("https://google.com/foobar"));
{
::ukm::builders::PageLoad(id)
@@ -350,7 +362,7 @@ TEST_F(UkmServiceTest, LogsUploadedOnlyWhenHavingSourcesOrEntries) {
service.Flush();
EXPECT_EQ(GetPersistedLogCount(), 0);
- ukm::SourceId id = UkmRecorder::GetNewSourceID();
+ ukm::SourceId id = GetWhitelistedSourceId(0);
recorder.UpdateSourceURL(id, GURL("https://google.com/foobar"));
// Includes a Source, so will persist.
service.Flush();
@@ -405,7 +417,7 @@ TEST_F(UkmServiceTest, RecordInitialUrl) {
service.EnableRecording();
service.EnableReporting();
- ukm::SourceId id = UkmRecorder::GetNewSourceID();
+ ukm::SourceId id = GetWhitelistedSourceId(0);
recorder.UpdateSourceURL(id, GURL("https://google.com/initial"));
recorder.UpdateSourceURL(id, GURL("https://google.com/intermediate"));
recorder.UpdateSourceURL(id, GURL("https://google.com/foobar"));
@@ -427,6 +439,55 @@ TEST_F(UkmServiceTest, RecordInitialUrl) {
}
}
+TEST_F(UkmServiceTest, RestrictToWhitelistedSourceIds) {
+ for (bool restrict_to_whitelisted_source_ids : {true, false}) {
+ base::FieldTrialList field_trial_list(nullptr /* entropy_provider */);
+ ScopedUkmFeatureParams params(
+ base::FeatureList::OVERRIDE_ENABLE_FEATURE,
+ {{"RestrictToWhitelistedSourceIds",
+ restrict_to_whitelisted_source_ids ? "true" : "false"}});
+
+ ClearPrefs();
+ UkmService service(&prefs_, &client_);
+ TestRecordingHelper recorder(&service);
+ EXPECT_EQ(GetPersistedLogCount(), 0);
+ service.Initialize();
+ task_runner_->RunUntilIdle();
+ service.EnableRecording();
+ service.EnableReporting();
+
+ ukm::SourceId id1 = GetWhitelistedSourceId(0);
+ recorder.UpdateSourceURL(id1, GURL("https://other.com/"));
+ recorder.GetEntryBuilder(id1, "FakeEntry");
+
+ // Create a non-navigation-based sourceid, which should not be whitelisted.
+ ukm::SourceId id2 = UkmRecorder::GetNewSourceID();
+ recorder.UpdateSourceURL(id2, GURL("https://example.com/"));
+ recorder.GetEntryBuilder(id2, "FakeEntry");
+
+ service.Flush();
+ EXPECT_EQ(GetPersistedLogCount(), 1);
+ Report proto_report = GetPersistedReport();
+ EXPECT_GE(proto_report.sources_size(), 1);
+
+ // The whitelisted source should always be recorded.
+ const Source& proto_source1 = proto_report.sources(0);
+ EXPECT_EQ(id1, proto_source1.id());
+ EXPECT_EQ(GURL("https://other.com/").spec(), proto_source1.url());
+
+ // The non-whitelisted source should only be recorded if we aren't
+ // restricted to whitelisted source ids.
+ if (restrict_to_whitelisted_source_ids) {
+ EXPECT_EQ(1, proto_report.sources_size());
+ } else {
+ EXPECT_EQ(2, proto_report.sources_size());
+ const Source& proto_source2 = proto_report.sources(1);
+ EXPECT_EQ(id2, proto_source2.id());
+ EXPECT_EQ(GURL("https://example.com/").spec(), proto_source2.url());
+ }
+ }
+}
+
TEST_F(UkmServiceTest, RecordSessionId) {
ClearPrefs();
UkmService service(&prefs_, &client_);
@@ -437,7 +498,7 @@ TEST_F(UkmServiceTest, RecordSessionId) {
service.EnableRecording();
service.EnableReporting();
- auto id = UkmRecorder::GetNewSourceID();
+ auto id = GetWhitelistedSourceId(0);
recorder.UpdateSourceURL(id, GURL("https://google.com/foobar"));
service.Flush();
@@ -463,11 +524,11 @@ TEST_F(UkmServiceTest, SourceSize) {
service.EnableRecording();
service.EnableReporting();
- auto id = UkmRecorder::GetNewSourceID();
+ auto id = GetWhitelistedSourceId(0);
recorder.UpdateSourceURL(id, GURL("https://google.com/foobar1"));
- id = UkmRecorder::GetNewSourceID();
+ id = GetWhitelistedSourceId(1);
recorder.UpdateSourceURL(id, GURL("https://google.com/foobar2"));
- id = UkmRecorder::GetNewSourceID();
+ id = GetWhitelistedSourceId(2);
recorder.UpdateSourceURL(id, GURL("https://google.com/foobar3"));
service.Flush();
@@ -487,7 +548,7 @@ TEST_F(UkmServiceTest, PurgeMidUpload) {
task_runner_->RunUntilIdle();
service.EnableRecording();
service.EnableReporting();
- auto id = UkmRecorder::GetNewSourceID();
+ auto id = GetWhitelistedSourceId(0);
recorder.UpdateSourceURL(id, GURL("https://google.com/foobar1"));
// Should init, generate a log, and start an upload.
task_runner_->RunPendingTasks();
@@ -515,7 +576,7 @@ TEST_F(UkmServiceTest, WhitelistEntryTest) {
service.EnableRecording();
service.EnableReporting();
- auto id = UkmRecorder::GetNewSourceID();
+ auto id = GetWhitelistedSourceId(0);
recorder.UpdateSourceURL(id, GURL("https://google.com/foobar1"));
{
@@ -561,7 +622,7 @@ TEST_F(UkmServiceTest, SourceURLLength) {
service.EnableRecording();
service.EnableReporting();
- auto id = UkmRecorder::GetNewSourceID();
+ auto id = GetWhitelistedSourceId(0);
// This URL is too long to be recorded fully.
const std::string long_string = "https://" + std::string(10000, 'a');
@@ -576,4 +637,87 @@ TEST_F(UkmServiceTest, SourceURLLength) {
EXPECT_EQ("URLTooLong", proto_source.url());
}
+TEST_F(UkmServiceTest, UnreferencedNonWhitelistedSources) {
+ for (bool restrict_to_whitelisted_source_ids : {true, false}) {
+ base::FieldTrialList field_trial_list(nullptr /* entropy_provider */);
+ // Set a threshold of number of Sources via Feature Params.
+ ScopedUkmFeatureParams params(
+ base::FeatureList::OVERRIDE_ENABLE_FEATURE,
+ {{"MaxKeptSources", "3"},
+ {"WhitelistEntries", "EntryA,EntryB"},
+ {"RestrictToWhitelistedSourceIds",
+ restrict_to_whitelisted_source_ids ? "true" : "false"}});
+
+ ClearPrefs();
+ UkmService service(&prefs_, &client_);
+ TestRecordingHelper recorder(&service);
+ EXPECT_EQ(0, GetPersistedLogCount());
+ service.Initialize();
+ task_runner_->RunUntilIdle();
+ service.EnableRecording();
+ service.EnableReporting();
+
+ std::vector<SourceId> ids;
+ base::TimeTicks last_time = base::TimeTicks::Now();
+ for (int i = 0; i < 6; ++i) {
+ // Wait until base::TimeTicks::Now() no longer equals |last_time|. This
+ // ensures each source has a unique timestamp to avoid flakes. Should take
+ // between 1-15ms per documented resolution of base::TimeTicks.
+ while (base::TimeTicks::Now() == last_time) {
+ base::PlatformThread::Sleep(base::TimeDelta::FromMilliseconds(1));
+ }
+
+ ids.push_back(GetNonWhitelistedSourceId(i));
+ recorder.UpdateSourceURL(
+ ids.back(), GURL("https://google.com/foobar" + base::IntToString(i)));
+ last_time = base::TimeTicks::Now();
+ }
+
+ // Add whitelisted entries for 0, 2 and non-whitelisted entries for 2, 3.
+ recorder.GetEntryBuilder(ids[0], "EntryA")->AddMetric("Metric", 500);
+ recorder.GetEntryBuilder(ids[2], "EntryB")->AddMetric("Metric", 500);
+ recorder.GetEntryBuilder(ids[2], "EntryC")->AddMetric("Metric", 500);
+ recorder.GetEntryBuilder(ids[3], "EntryC")->AddMetric("Metric", 500);
+
+ service.Flush();
+ EXPECT_EQ(1, GetPersistedLogCount());
+ auto proto_report = GetPersistedReport();
+
+ if (restrict_to_whitelisted_source_ids) {
+ ASSERT_EQ(0, proto_report.sources_size());
+ } else {
+ ASSERT_EQ(2, proto_report.sources_size());
+ EXPECT_EQ(ids[0], proto_report.sources(0).id());
+ EXPECT_EQ("https://google.com/foobar0", proto_report.sources(0).url());
+ EXPECT_EQ(ids[2], proto_report.sources(1).id());
+ EXPECT_EQ("https://google.com/foobar2", proto_report.sources(1).url());
+ }
+
+ // Since MaxKeptSources is 3, only Sources 5, 4, 3 should be retained.
+ // Log entries under 0, 1, 3 and 4. Log them in reverse order - which
+ // shouldn't affect source ordering in the output.
+ // - Source 0 should not be re-transmitted since it was sent before.
+ // - Source 1 should not be transmitted due to MaxKeptSources param.
+ // - Sources 3 and 4 should be transmitted since they were not sent before.
+ recorder.GetEntryBuilder(ids[4], "EntryA")->AddMetric("Metric", 500);
+ recorder.GetEntryBuilder(ids[3], "EntryA")->AddMetric("Metric", 500);
+ recorder.GetEntryBuilder(ids[1], "EntryA")->AddMetric("Metric", 500);
+ recorder.GetEntryBuilder(ids[0], "EntryA")->AddMetric("Metric", 500);
+
+ service.Flush();
+ EXPECT_EQ(2, GetPersistedLogCount());
+ proto_report = GetPersistedReport();
+
+ if (restrict_to_whitelisted_source_ids) {
+ ASSERT_EQ(0, proto_report.sources_size());
+ } else {
+ ASSERT_EQ(2, proto_report.sources_size());
+ EXPECT_EQ(ids[3], proto_report.sources(0).id());
+ EXPECT_EQ("https://google.com/foobar3", proto_report.sources(0).url());
+ EXPECT_EQ(ids[4], proto_report.sources(1).id());
+ EXPECT_EQ("https://google.com/foobar4", proto_report.sources(1).url());
+ }
+ }
+}
+
} // namespace ukm
diff --git a/chromium/components/ukm/ukm_source.cc b/chromium/components/ukm/ukm_source.cc
index 5324048fc28..4a701495ecb 100644
--- a/chromium/components/ukm/ukm_source.cc
+++ b/chromium/components/ukm/ukm_source.cc
@@ -39,7 +39,9 @@ void UkmSource::SetCustomTabVisible(bool visible) {
g_custom_tab_state = visible ? kCustomTabTrue : kCustomTabFalse;
}
-UkmSource::UkmSource() : custom_tab_state_(g_custom_tab_state) {}
+UkmSource::UkmSource()
+ : custom_tab_state_(g_custom_tab_state),
+ creation_time_(base::TimeTicks::Now()) {}
UkmSource::~UkmSource() = default;
diff --git a/chromium/components/ukm/ukm_source.h b/chromium/components/ukm/ukm_source.h
index 6492cd1cf5b..e053a1f22c7 100644
--- a/chromium/components/ukm/ukm_source.h
+++ b/chromium/components/ukm/ukm_source.h
@@ -35,6 +35,10 @@ class UkmSource {
const GURL& initial_url() const { return initial_url_; }
const GURL& url() const { return url_; }
+ // The object creation time. This is for internal purposes only and is not
+ // intended to be anything useful for UKM clients.
+ const base::TimeTicks creation_time() const { return creation_time_; }
+
// Sets the URL for this source. Should be invoked when a source is
// initialized.
void set_url(const GURL& url) { url_ = url; }
@@ -66,6 +70,9 @@ class UkmSource {
// the metric was created.
const CustomTabState custom_tab_state_;
+ // When this object was created.
+ const base::TimeTicks creation_time_;
+
DISALLOW_COPY_AND_ASSIGN(UkmSource);
};