diff options
Diffstat (limited to 'chromium/components/ukm')
-rw-r--r-- | chromium/components/ukm/test_ukm_recorder.cc | 6 | ||||
-rw-r--r-- | chromium/components/ukm/test_ukm_recorder.h | 2 | ||||
-rw-r--r-- | chromium/components/ukm/ukm_recorder_impl.cc | 72 | ||||
-rw-r--r-- | chromium/components/ukm/ukm_recorder_impl.h | 8 | ||||
-rw-r--r-- | chromium/components/ukm/ukm_service_unittest.cc | 172 | ||||
-rw-r--r-- | chromium/components/ukm/ukm_source.cc | 4 | ||||
-rw-r--r-- | chromium/components/ukm/ukm_source.h | 7 |
7 files changed, 247 insertions, 24 deletions
diff --git a/chromium/components/ukm/test_ukm_recorder.cc b/chromium/components/ukm/test_ukm_recorder.cc index 0a3fcd16a6e..5f9e9e60bb6 100644 --- a/chromium/components/ukm/test_ukm_recorder.cc +++ b/chromium/components/ukm/test_ukm_recorder.cc @@ -58,6 +58,12 @@ TestUkmRecorder::TestUkmRecorder() { TestUkmRecorder::~TestUkmRecorder() { }; +bool TestUkmRecorder::ShouldRestrictToWhitelistedSourceIds() const { + // In tests, we want to record all source ids (not just hose that are + // whitelisted). + return false; +} + std::set<ukm::SourceId> TestUkmRecorder::GetSourceIds() const { std::set<ukm::SourceId> result; for (const auto& kv : sources()) { diff --git a/chromium/components/ukm/test_ukm_recorder.h b/chromium/components/ukm/test_ukm_recorder.h index b14f9d79cdb..5bce2e87c39 100644 --- a/chromium/components/ukm/test_ukm_recorder.h +++ b/chromium/components/ukm/test_ukm_recorder.h @@ -27,6 +27,8 @@ class TestUkmRecorder : public UkmRecorderImpl { TestUkmRecorder(); ~TestUkmRecorder() override; + bool ShouldRestrictToWhitelistedSourceIds() const override; + size_t sources_count() const { return sources().size(); } // Get all SourceIds with any data associated with them. diff --git a/chromium/components/ukm/ukm_recorder_impl.cc b/chromium/components/ukm/ukm_recorder_impl.cc index cb670812ebf..43adec45b40 100644 --- a/chromium/components/ukm/ukm_recorder_impl.cc +++ b/chromium/components/ukm/ukm_recorder_impl.cc @@ -13,6 +13,7 @@ #include "components/metrics/proto/ukm/report.pb.h" #include "components/metrics/proto/ukm/source.pb.h" #include "components/ukm/ukm_source.h" +#include "services/metrics/public/cpp/ukm_source_id.h" namespace ukm { @@ -25,6 +26,11 @@ std::string GetWhitelistEntries() { "WhitelistEntries"); } +bool IsWhitelistedSourceId(SourceId source_id) { + return (static_cast<int64_t>(source_id) & + static_cast<int64_t>(SourceIdType::NAVIGATION_ID)) != 0; +} + // Gets the maximum number of Sources we'll keep in memory before discarding any // new ones being added. size_t GetMaxSources() { @@ -33,6 +39,14 @@ size_t GetMaxSources() { kUkmFeature, "MaxSources", kDefaultMaxSources)); } +// Gets the maximum number of unferenced Sources kept after purging sources +// that were added to the log. +size_t GetMaxKeptSources() { + constexpr size_t kDefaultMaxKeptSources = 100; + return static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt( + kUkmFeature, "MaxKeptSources", kDefaultMaxKeptSources)); +} + // Gets the maximum number of Entries we'll keep in memory before discarding any // new ones being added. size_t GetMaxEntries() { @@ -101,21 +115,61 @@ void UkmRecorderImpl::Purge() { } void UkmRecorderImpl::StoreRecordingsInReport(Report* report) { - for (const auto& kv : sources_) { + std::set<SourceId> ids_seen; + for (const auto& entry : entries_) { + Entry* proto_entry = report->add_entries(); + StoreEntryProto(*entry, proto_entry); + ids_seen.insert(entry->source_id); + } + + std::vector<std::unique_ptr<UkmSource>> unsent_sources; + for (auto& kv : sources_) { + // If the source id is not whitelisted, don't send it unless it has + // associated entries. Note: If ShouldRestrictToWhitelistedSourceIds() is + // true, this logic will not be hit as the source would have already been + // filtered in UpdateSourceURL(). + if (!IsWhitelistedSourceId(kv.first) && + !base::ContainsKey(ids_seen, kv.first)) { + unsent_sources.push_back(std::move(kv.second)); + continue; + } Source* proto_source = report->add_sources(); kv.second->PopulateProto(proto_source); if (!ShouldRecordInitialUrl()) proto_source->clear_initial_url(); } - for (const auto& entry : entries_) { - Entry* proto_entry = report->add_entries(); - StoreEntryProto(*entry, proto_entry); - } - UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.SerializedCount", sources_.size()); + UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.SerializedCount", + sources_.size() - unsent_sources.size()); UMA_HISTOGRAM_COUNTS_1000("UKM.Entries.SerializedCount", entries_.size()); + UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.UnsentSourcesCount", + unsent_sources.size()); sources_.clear(); entries_.clear(); + + // Keep at most |max_kept_sources|, prioritizing most-recent entries (by + // creation time). + const size_t max_kept_sources = GetMaxKeptSources(); + if (unsent_sources.size() > max_kept_sources) { + std::nth_element(unsent_sources.begin(), + unsent_sources.begin() + max_kept_sources, + unsent_sources.end(), + [](const std::unique_ptr<ukm::UkmSource>& lhs, + const std::unique_ptr<ukm::UkmSource>& rhs) { + return lhs->creation_time() > rhs->creation_time(); + }); + unsent_sources.resize(max_kept_sources); + } + + for (auto& source : unsent_sources) { + sources_.emplace(source->id(), std::move(source)); + } + UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.KeptSourcesCount", sources_.size()); +} + +bool UkmRecorderImpl::ShouldRestrictToWhitelistedSourceIds() const { + return base::GetFieldTrialParamByFeatureAsBool( + kUkmFeature, "RestrictToWhitelistedSourceIds", true); } void UkmRecorderImpl::UpdateSourceURL(ukm::SourceId source_id, @@ -127,6 +181,12 @@ void UkmRecorderImpl::UpdateSourceURL(ukm::SourceId source_id, return; } + if (ShouldRestrictToWhitelistedSourceIds() && + !IsWhitelistedSourceId(source_id)) { + RecordDroppedSource(DroppedDataReason::NOT_WHITELISTED); + return; + } + // Update the pre-existing source if there is any. This happens when the // initial URL is different from the committed URL for the same source, e.g., // when there is redirection. diff --git a/chromium/components/ukm/ukm_recorder_impl.h b/chromium/components/ukm/ukm_recorder_impl.h index 1a20ddab4d1..c3cdc8d254b 100644 --- a/chromium/components/ukm/ukm_recorder_impl.h +++ b/chromium/components/ukm/ukm_recorder_impl.h @@ -45,12 +45,14 @@ class UkmRecorderImpl : public UkmRecorder { // Writes recordings into a report proto, and clears recordings. void StoreRecordingsInReport(Report* report); - const std::map<ukm::SourceId, std::unique_ptr<UkmSource>>& sources() const { + const std::map<SourceId, std::unique_ptr<UkmSource>>& sources() const { return sources_; } const std::vector<mojom::UkmEntryPtr>& entries() const { return entries_; } + virtual bool ShouldRestrictToWhitelistedSourceIds() const; + private: friend ::metrics::UkmBrowserTest; friend ::ukm::debug::DebugPage; @@ -63,8 +65,8 @@ class UkmRecorderImpl : public UkmRecorder { bool recording_enabled_; // Contains newly added sources and entries of UKM metrics which periodically - // get serialized and cleared by BuildAndStoreLog(). - std::map<ukm::SourceId, std::unique_ptr<UkmSource>> sources_; + // get serialized and cleared by StoreRecordingsInReport(). + std::map<SourceId, std::unique_ptr<UkmSource>> sources_; std::vector<mojom::UkmEntryPtr> entries_; // Whitelisted Entry hashes, only the ones in this set will be recorded. diff --git a/chromium/components/ukm/ukm_service_unittest.cc b/chromium/components/ukm/ukm_service_unittest.cc index 75b383ab3a9..4c5b80e401c 100644 --- a/chromium/components/ukm/ukm_service_unittest.cc +++ b/chromium/components/ukm/ukm_service_unittest.cc @@ -10,9 +10,13 @@ #include "base/hash.h" #include "base/metrics/metrics_hashes.h" +#include "base/strings/string_number_conversions.h" +#include "base/strings/string_util.h" #include "base/test/scoped_feature_list.h" #include "base/test/test_simple_task_runner.h" +#include "base/threading/platform_thread.h" #include "base/threading/thread_task_runner_handle.h" +#include "base/time/time.h" #include "components/metrics/proto/ukm/report.pb.h" #include "components/metrics/proto/ukm/source.pb.h" #include "components/metrics/test_metrics_provider.h" @@ -136,6 +140,14 @@ class UkmServiceTest : public testing::Test { return report; } + static SourceId GetWhitelistedSourceId(int64_t id) { + return ConvertToSourceId(id, SourceIdType::NAVIGATION_ID); + } + + static SourceId GetNonWhitelistedSourceId(int64_t id) { + return ConvertToSourceId(id, SourceIdType::UKM); + } + protected: TestingPrefServiceSimple prefs_; metrics::TestMetricsServiceClient client_; @@ -173,7 +185,7 @@ TEST_F(UkmServiceTest, PersistAndPurge) { service.EnableRecording(); service.EnableReporting(); - ukm::SourceId id = UkmRecorder::GetNewSourceID(); + SourceId id = GetWhitelistedSourceId(0); recorder.UpdateSourceURL(id, GURL("https://google.com/foobar")); // Should init, generate a log, and start an upload for source. task_runner_->RunPendingTasks(); @@ -199,7 +211,7 @@ TEST_F(UkmServiceTest, SourceSerialization) { service.EnableRecording(); service.EnableReporting(); - ukm::SourceId id = UkmRecorder::GetNewSourceID(); + ukm::SourceId id = GetWhitelistedSourceId(0); recorder.UpdateSourceURL(id, GURL("https://google.com/initial")); recorder.UpdateSourceURL(id, GURL("https://google.com/intermediate")); recorder.UpdateSourceURL(id, GURL("https://google.com/foobar")); @@ -226,7 +238,7 @@ TEST_F(UkmServiceTest, EntryBuilderAndSerialization) { service.EnableRecording(); service.EnableReporting(); - ukm::SourceId id = UkmRecorder::GetNewSourceID(); + ukm::SourceId id = GetWhitelistedSourceId(0); recorder.UpdateSourceURL(id, GURL("https://google.com/foobar")); { std::unique_ptr<UkmEntryBuilder> foo_builder = @@ -290,7 +302,7 @@ TEST_F(UkmServiceTest, AddEntryWithEmptyMetrics) { service.EnableRecording(); service.EnableReporting(); - ukm::SourceId id = UkmRecorder::GetNewSourceID(); + ukm::SourceId id = GetWhitelistedSourceId(0); recorder.UpdateSourceURL(id, GURL("https://google.com/foobar")); { ::ukm::builders::PageLoad(id).Record(&service); } @@ -317,7 +329,7 @@ TEST_F(UkmServiceTest, MetricsProviderTest) { service.EnableRecording(); service.EnableReporting(); - ukm::SourceId id = UkmRecorder::GetNewSourceID(); + ukm::SourceId id = GetWhitelistedSourceId(0); recorder.UpdateSourceURL(id, GURL("https://google.com/foobar")); { ::ukm::builders::PageLoad(id) @@ -350,7 +362,7 @@ TEST_F(UkmServiceTest, LogsUploadedOnlyWhenHavingSourcesOrEntries) { service.Flush(); EXPECT_EQ(GetPersistedLogCount(), 0); - ukm::SourceId id = UkmRecorder::GetNewSourceID(); + ukm::SourceId id = GetWhitelistedSourceId(0); recorder.UpdateSourceURL(id, GURL("https://google.com/foobar")); // Includes a Source, so will persist. service.Flush(); @@ -405,7 +417,7 @@ TEST_F(UkmServiceTest, RecordInitialUrl) { service.EnableRecording(); service.EnableReporting(); - ukm::SourceId id = UkmRecorder::GetNewSourceID(); + ukm::SourceId id = GetWhitelistedSourceId(0); recorder.UpdateSourceURL(id, GURL("https://google.com/initial")); recorder.UpdateSourceURL(id, GURL("https://google.com/intermediate")); recorder.UpdateSourceURL(id, GURL("https://google.com/foobar")); @@ -427,6 +439,55 @@ TEST_F(UkmServiceTest, RecordInitialUrl) { } } +TEST_F(UkmServiceTest, RestrictToWhitelistedSourceIds) { + for (bool restrict_to_whitelisted_source_ids : {true, false}) { + base::FieldTrialList field_trial_list(nullptr /* entropy_provider */); + ScopedUkmFeatureParams params( + base::FeatureList::OVERRIDE_ENABLE_FEATURE, + {{"RestrictToWhitelistedSourceIds", + restrict_to_whitelisted_source_ids ? "true" : "false"}}); + + ClearPrefs(); + UkmService service(&prefs_, &client_); + TestRecordingHelper recorder(&service); + EXPECT_EQ(GetPersistedLogCount(), 0); + service.Initialize(); + task_runner_->RunUntilIdle(); + service.EnableRecording(); + service.EnableReporting(); + + ukm::SourceId id1 = GetWhitelistedSourceId(0); + recorder.UpdateSourceURL(id1, GURL("https://other.com/")); + recorder.GetEntryBuilder(id1, "FakeEntry"); + + // Create a non-navigation-based sourceid, which should not be whitelisted. + ukm::SourceId id2 = UkmRecorder::GetNewSourceID(); + recorder.UpdateSourceURL(id2, GURL("https://example.com/")); + recorder.GetEntryBuilder(id2, "FakeEntry"); + + service.Flush(); + EXPECT_EQ(GetPersistedLogCount(), 1); + Report proto_report = GetPersistedReport(); + EXPECT_GE(proto_report.sources_size(), 1); + + // The whitelisted source should always be recorded. + const Source& proto_source1 = proto_report.sources(0); + EXPECT_EQ(id1, proto_source1.id()); + EXPECT_EQ(GURL("https://other.com/").spec(), proto_source1.url()); + + // The non-whitelisted source should only be recorded if we aren't + // restricted to whitelisted source ids. + if (restrict_to_whitelisted_source_ids) { + EXPECT_EQ(1, proto_report.sources_size()); + } else { + EXPECT_EQ(2, proto_report.sources_size()); + const Source& proto_source2 = proto_report.sources(1); + EXPECT_EQ(id2, proto_source2.id()); + EXPECT_EQ(GURL("https://example.com/").spec(), proto_source2.url()); + } + } +} + TEST_F(UkmServiceTest, RecordSessionId) { ClearPrefs(); UkmService service(&prefs_, &client_); @@ -437,7 +498,7 @@ TEST_F(UkmServiceTest, RecordSessionId) { service.EnableRecording(); service.EnableReporting(); - auto id = UkmRecorder::GetNewSourceID(); + auto id = GetWhitelistedSourceId(0); recorder.UpdateSourceURL(id, GURL("https://google.com/foobar")); service.Flush(); @@ -463,11 +524,11 @@ TEST_F(UkmServiceTest, SourceSize) { service.EnableRecording(); service.EnableReporting(); - auto id = UkmRecorder::GetNewSourceID(); + auto id = GetWhitelistedSourceId(0); recorder.UpdateSourceURL(id, GURL("https://google.com/foobar1")); - id = UkmRecorder::GetNewSourceID(); + id = GetWhitelistedSourceId(1); recorder.UpdateSourceURL(id, GURL("https://google.com/foobar2")); - id = UkmRecorder::GetNewSourceID(); + id = GetWhitelistedSourceId(2); recorder.UpdateSourceURL(id, GURL("https://google.com/foobar3")); service.Flush(); @@ -487,7 +548,7 @@ TEST_F(UkmServiceTest, PurgeMidUpload) { task_runner_->RunUntilIdle(); service.EnableRecording(); service.EnableReporting(); - auto id = UkmRecorder::GetNewSourceID(); + auto id = GetWhitelistedSourceId(0); recorder.UpdateSourceURL(id, GURL("https://google.com/foobar1")); // Should init, generate a log, and start an upload. task_runner_->RunPendingTasks(); @@ -515,7 +576,7 @@ TEST_F(UkmServiceTest, WhitelistEntryTest) { service.EnableRecording(); service.EnableReporting(); - auto id = UkmRecorder::GetNewSourceID(); + auto id = GetWhitelistedSourceId(0); recorder.UpdateSourceURL(id, GURL("https://google.com/foobar1")); { @@ -561,7 +622,7 @@ TEST_F(UkmServiceTest, SourceURLLength) { service.EnableRecording(); service.EnableReporting(); - auto id = UkmRecorder::GetNewSourceID(); + auto id = GetWhitelistedSourceId(0); // This URL is too long to be recorded fully. const std::string long_string = "https://" + std::string(10000, 'a'); @@ -576,4 +637,87 @@ TEST_F(UkmServiceTest, SourceURLLength) { EXPECT_EQ("URLTooLong", proto_source.url()); } +TEST_F(UkmServiceTest, UnreferencedNonWhitelistedSources) { + for (bool restrict_to_whitelisted_source_ids : {true, false}) { + base::FieldTrialList field_trial_list(nullptr /* entropy_provider */); + // Set a threshold of number of Sources via Feature Params. + ScopedUkmFeatureParams params( + base::FeatureList::OVERRIDE_ENABLE_FEATURE, + {{"MaxKeptSources", "3"}, + {"WhitelistEntries", "EntryA,EntryB"}, + {"RestrictToWhitelistedSourceIds", + restrict_to_whitelisted_source_ids ? "true" : "false"}}); + + ClearPrefs(); + UkmService service(&prefs_, &client_); + TestRecordingHelper recorder(&service); + EXPECT_EQ(0, GetPersistedLogCount()); + service.Initialize(); + task_runner_->RunUntilIdle(); + service.EnableRecording(); + service.EnableReporting(); + + std::vector<SourceId> ids; + base::TimeTicks last_time = base::TimeTicks::Now(); + for (int i = 0; i < 6; ++i) { + // Wait until base::TimeTicks::Now() no longer equals |last_time|. This + // ensures each source has a unique timestamp to avoid flakes. Should take + // between 1-15ms per documented resolution of base::TimeTicks. + while (base::TimeTicks::Now() == last_time) { + base::PlatformThread::Sleep(base::TimeDelta::FromMilliseconds(1)); + } + + ids.push_back(GetNonWhitelistedSourceId(i)); + recorder.UpdateSourceURL( + ids.back(), GURL("https://google.com/foobar" + base::IntToString(i))); + last_time = base::TimeTicks::Now(); + } + + // Add whitelisted entries for 0, 2 and non-whitelisted entries for 2, 3. + recorder.GetEntryBuilder(ids[0], "EntryA")->AddMetric("Metric", 500); + recorder.GetEntryBuilder(ids[2], "EntryB")->AddMetric("Metric", 500); + recorder.GetEntryBuilder(ids[2], "EntryC")->AddMetric("Metric", 500); + recorder.GetEntryBuilder(ids[3], "EntryC")->AddMetric("Metric", 500); + + service.Flush(); + EXPECT_EQ(1, GetPersistedLogCount()); + auto proto_report = GetPersistedReport(); + + if (restrict_to_whitelisted_source_ids) { + ASSERT_EQ(0, proto_report.sources_size()); + } else { + ASSERT_EQ(2, proto_report.sources_size()); + EXPECT_EQ(ids[0], proto_report.sources(0).id()); + EXPECT_EQ("https://google.com/foobar0", proto_report.sources(0).url()); + EXPECT_EQ(ids[2], proto_report.sources(1).id()); + EXPECT_EQ("https://google.com/foobar2", proto_report.sources(1).url()); + } + + // Since MaxKeptSources is 3, only Sources 5, 4, 3 should be retained. + // Log entries under 0, 1, 3 and 4. Log them in reverse order - which + // shouldn't affect source ordering in the output. + // - Source 0 should not be re-transmitted since it was sent before. + // - Source 1 should not be transmitted due to MaxKeptSources param. + // - Sources 3 and 4 should be transmitted since they were not sent before. + recorder.GetEntryBuilder(ids[4], "EntryA")->AddMetric("Metric", 500); + recorder.GetEntryBuilder(ids[3], "EntryA")->AddMetric("Metric", 500); + recorder.GetEntryBuilder(ids[1], "EntryA")->AddMetric("Metric", 500); + recorder.GetEntryBuilder(ids[0], "EntryA")->AddMetric("Metric", 500); + + service.Flush(); + EXPECT_EQ(2, GetPersistedLogCount()); + proto_report = GetPersistedReport(); + + if (restrict_to_whitelisted_source_ids) { + ASSERT_EQ(0, proto_report.sources_size()); + } else { + ASSERT_EQ(2, proto_report.sources_size()); + EXPECT_EQ(ids[3], proto_report.sources(0).id()); + EXPECT_EQ("https://google.com/foobar3", proto_report.sources(0).url()); + EXPECT_EQ(ids[4], proto_report.sources(1).id()); + EXPECT_EQ("https://google.com/foobar4", proto_report.sources(1).url()); + } + } +} + } // namespace ukm diff --git a/chromium/components/ukm/ukm_source.cc b/chromium/components/ukm/ukm_source.cc index 5324048fc28..4a701495ecb 100644 --- a/chromium/components/ukm/ukm_source.cc +++ b/chromium/components/ukm/ukm_source.cc @@ -39,7 +39,9 @@ void UkmSource::SetCustomTabVisible(bool visible) { g_custom_tab_state = visible ? kCustomTabTrue : kCustomTabFalse; } -UkmSource::UkmSource() : custom_tab_state_(g_custom_tab_state) {} +UkmSource::UkmSource() + : custom_tab_state_(g_custom_tab_state), + creation_time_(base::TimeTicks::Now()) {} UkmSource::~UkmSource() = default; diff --git a/chromium/components/ukm/ukm_source.h b/chromium/components/ukm/ukm_source.h index 6492cd1cf5b..e053a1f22c7 100644 --- a/chromium/components/ukm/ukm_source.h +++ b/chromium/components/ukm/ukm_source.h @@ -35,6 +35,10 @@ class UkmSource { const GURL& initial_url() const { return initial_url_; } const GURL& url() const { return url_; } + // The object creation time. This is for internal purposes only and is not + // intended to be anything useful for UKM clients. + const base::TimeTicks creation_time() const { return creation_time_; } + // Sets the URL for this source. Should be invoked when a source is // initialized. void set_url(const GURL& url) { url_ = url; } @@ -66,6 +70,9 @@ class UkmSource { // the metric was created. const CustomTabState custom_tab_state_; + // When this object was created. + const base::TimeTicks creation_time_; + DISALLOW_COPY_AND_ASSIGN(UkmSource); }; |