summaryrefslogtreecommitdiff
path: root/chromium/components/optimization_guide/content/browser/page_content_annotations_service.cc
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/components/optimization_guide/content/browser/page_content_annotations_service.cc')
-rw-r--r--chromium/components/optimization_guide/content/browser/page_content_annotations_service.cc333
1 files changed, 202 insertions, 131 deletions
diff --git a/chromium/components/optimization_guide/content/browser/page_content_annotations_service.cc b/chromium/components/optimization_guide/content/browser/page_content_annotations_service.cc
index cd7fd07cc7e..92ae6edc33f 100644
--- a/chromium/components/optimization_guide/content/browser/page_content_annotations_service.cc
+++ b/chromium/components/optimization_guide/content/browser/page_content_annotations_service.cc
@@ -4,17 +4,16 @@
#include "components/optimization_guide/content/browser/page_content_annotations_service.h"
+#include "base/barrier_closure.h"
#include "base/callback_helpers.h"
#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros_local.h"
#include "base/rand_util.h"
-#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
-#include "base/time/default_tick_clock.h"
-#include "base/timer/timer.h"
#include "components/history/core/browser/history_service.h"
#include "components/leveldb_proto/public/proto_database_provider.h"
+#include "components/optimization_guide/content/browser/page_content_annotations_validator.h"
#include "components/optimization_guide/core/local_page_entities_metadata_provider.h"
#include "components/optimization_guide/core/noisy_metrics_recorder.h"
#include "components/optimization_guide/core/optimization_guide_enums.h"
@@ -37,13 +36,39 @@ namespace optimization_guide {
namespace {
+// Keep this in sync with the PageContentAnnotationsStorageType variant in
+// ../optimization/histograms.xml.
+std::string PageContentAnnotationsTypeToString(
+ PageContentAnnotationsType annotation_type) {
+ switch (annotation_type) {
+ case PageContentAnnotationsType::kUnknown:
+ return "Unknown";
+ case PageContentAnnotationsType::kModelAnnotations:
+ return "ModelAnnotations";
+ case PageContentAnnotationsType::kRelatedSearches:
+ return "RelatedSearches";
+ case PageContentAnnotationsType::kSearchMetadata:
+ return "SearchMetadata";
+ case PageContentAnnotationsType::kRemoteMetdata:
+ return "RemoteMetadata";
+ }
+}
+
void LogPageContentAnnotationsStorageStatus(
- PageContentAnnotationsStorageStatus status) {
+ PageContentAnnotationsStorageStatus status,
+ PageContentAnnotationsType annotation_type) {
DCHECK_NE(status, PageContentAnnotationsStorageStatus::kUnknown);
+ DCHECK_NE(annotation_type, PageContentAnnotationsType::kUnknown);
base::UmaHistogramEnumeration(
"OptimizationGuide.PageContentAnnotationsService."
"ContentAnnotationsStorageStatus",
status);
+
+ base::UmaHistogramEnumeration(
+ "OptimizationGuide.PageContentAnnotationsService."
+ "ContentAnnotationsStorageStatus." +
+ PageContentAnnotationsTypeToString(annotation_type),
+ status);
}
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
@@ -80,14 +105,6 @@ void MaybeRecordVisibilityUKM(
}
#endif /* BUILDFLAG(BUILD_WITH_TFLITE_LIB) */
-const char* kRandomWords[] = {
- "interesting", "chunky", "maniacal", "tickle", "lettuce",
- "obsequious", "stir", "bless", "colossal", "squealing",
- "elegant", "ambitious", "eight", "frighten", "descriptive",
- "pretty", "curly", "regular", "uneven", "heap",
-};
-const size_t kCountRandomWords = 20;
-
} // namespace
PageContentAnnotationsService::PageContentAnnotationsService(
@@ -105,8 +122,21 @@ PageContentAnnotationsService::PageContentAnnotationsService(
history_service_ = history_service;
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
model_manager_ = std::make_unique<PageContentAnnotationsModelManager>(
- application_locale, optimization_guide_model_provider);
+ optimization_guide_model_provider);
annotator_ = model_manager_.get();
+
+ if (features::ShouldExecutePageVisibilityModelOnPageContent(
+ application_locale)) {
+ model_manager_->RequestAndNotifyWhenModelAvailable(
+ AnnotationType::kContentVisibility, base::DoNothing());
+ annotation_types_to_execute_.push_back(AnnotationType::kContentVisibility);
+ }
+ if (features::ShouldExecutePageEntitiesModelOnPageContent(
+ application_locale)) {
+ model_manager_->RequestAndNotifyWhenModelAvailable(
+ AnnotationType::kPageEntities, base::DoNothing());
+ annotation_types_to_execute_.push_back(AnnotationType::kPageEntities);
+ }
#endif
if (features::UseLocalPageEntitiesMetadataProvider()) {
@@ -116,22 +146,8 @@ PageContentAnnotationsService::PageContentAnnotationsService(
database_provider, database_dir, background_task_runner);
}
- if (features::BatchAnnotationsValidationEnabled()) {
- // Normally the caller would do this, but we are our own caller.
- RequestAndNotifyWhenModelAvailable(
- features::BatchAnnotationsValidationUsePageTopics()
- ? AnnotationType::kPageTopics
- : AnnotationType::kContentVisibility,
- base::DoNothing());
-
- validation_timer_ = std::make_unique<base::OneShotTimer>(
- base::DefaultTickClock::GetInstance());
- validation_timer_->Start(
- FROM_HERE, features::BatchAnnotationValidationStartupDelay(),
- base::BindRepeating(
- &PageContentAnnotationsService::RunBatchAnnotationValidation,
- weak_ptr_factory_.GetWeakPtr()));
- }
+ validator_ =
+ PageContentAnnotationsValidator::MaybeCreateAndStartTimer(annotator_);
}
PageContentAnnotationsService::~PageContentAnnotationsService() = default;
@@ -171,18 +187,19 @@ void PageContentAnnotationsService::Annotate(const HistoryVisit& visit) {
<< "Text: " << visit.text_to_annotate.value_or(std::string());
}
visits_to_annotate_.emplace_back(visit);
+
base::UmaHistogramBoolean(
"OptimizationGuide.PageContentAnnotations.AnnotateVisitResultCached",
false);
- if (visits_to_annotate_.size() >= features::AnnotateVisitBatchSize()) {
- if (current_visit_annotation_batch_.empty()) {
- // Used for testing.
- LOCAL_HISTOGRAM_BOOLEAN(
- "PageContentAnnotations.AnnotateVisit.BatchAnnotationStarted", true);
- current_visit_annotation_batch_ = std::move(visits_to_annotate_);
- AnnotateVisitBatch();
- return;
- }
+
+ if (MaybeStartAnnotateVisitBatch())
+ return;
+
+ // Used for testing.
+ LOCAL_HISTOGRAM_BOOLEAN(
+ "PageContentAnnotations.AnnotateVisit.AnnotationRequestQueued", true);
+
+ if (visits_to_annotate_.size() > features::AnnotateVisitBatchSize()) {
// The queue is full and an batch annotation is actively being done so
// we will remove the "oldest" visit.
visits_to_annotate_.erase(visits_to_annotate_.begin());
@@ -190,96 +207,135 @@ void PageContentAnnotationsService::Annotate(const HistoryVisit& visit) {
LOCAL_HISTOGRAM_BOOLEAN(
"PageContentAnnotations.AnnotateVisit.QueueFullVisitDropped", true);
}
- // Used for testing.
- LOCAL_HISTOGRAM_BOOLEAN(
- "PageContentAnnotations.AnnotateVisit.AnnotationRequestQueued", true);
#endif
}
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
+bool PageContentAnnotationsService::MaybeStartAnnotateVisitBatch() {
+ bool is_full_batch_available =
+ visits_to_annotate_.size() >= features::AnnotateVisitBatchSize();
+ bool batch_already_running = !current_visit_annotation_batch_.empty();
+
+ if (is_full_batch_available && !batch_already_running) {
+ // Used for testing.
+ LOCAL_HISTOGRAM_BOOLEAN(
+ "PageContentAnnotations.AnnotateVisit.BatchAnnotationStarted", true);
+ current_visit_annotation_batch_ = std::move(visits_to_annotate_);
+ AnnotateVisitBatch();
+
+ return true;
+ }
+ return false;
+}
+
void PageContentAnnotationsService::AnnotateVisitBatch() {
DCHECK(!current_visit_annotation_batch_.empty());
- if (switches::StopHistoryVisitBatchAnnotateForTesting()) {
- // Code beyond this is tested in multiple places. This just ensures the
- // calls up to this point can be more easily configured.
- return;
+ std::vector<std::string> inputs;
+ for (const HistoryVisit& visit : current_visit_annotation_batch_) {
+ DCHECK(visit.text_to_annotate);
+ inputs.push_back(*visit.text_to_annotate);
}
- if (current_visit_annotation_batch_.empty()) {
- return;
+ std::unique_ptr<
+ std::vector<absl::optional<history::VisitContentModelAnnotations>>>
+ merged_annotation_outputs = std::make_unique<
+ std::vector<absl::optional<history::VisitContentModelAnnotations>>>();
+ merged_annotation_outputs->reserve(inputs.size());
+ for (size_t i = 0; i < inputs.size(); i++) {
+ merged_annotation_outputs->push_back(absl::nullopt);
}
- auto visit = current_visit_annotation_batch_.back();
- DCHECK(visit.text_to_annotate);
- if (visit.text_to_annotate) {
- model_manager_->Annotate(
- *(visit.text_to_annotate),
- base::BindOnce(&PageContentAnnotationsService::OnBatchVisitAnnotated,
- weak_ptr_factory_.GetWeakPtr(), visit));
- }
-}
-void PageContentAnnotationsService::OnBatchVisitAnnotated(
- const HistoryVisit& visit,
- const absl::optional<history::VisitContentModelAnnotations>&
- content_annotations) {
- OnPageContentAnnotated(visit, content_annotations);
- DCHECK_EQ(visit.navigation_id,
- current_visit_annotation_batch_.back().navigation_id);
- current_visit_annotation_batch_.pop_back();
- if (!current_visit_annotation_batch_.empty()) {
- AnnotateVisitBatch();
+ std::vector<absl::optional<history::VisitContentModelAnnotations>>*
+ merged_annotation_outputs_ptr = merged_annotation_outputs.get();
+
+ base::RepeatingClosure barrier_closure = base::BarrierClosure(
+ annotation_types_to_execute_.size(),
+ base::BindOnce(&PageContentAnnotationsService::OnBatchVisitsAnnotated,
+ weak_ptr_factory_.GetWeakPtr(),
+ std::move(merged_annotation_outputs)));
+
+ for (AnnotationType type : annotation_types_to_execute_) {
+ annotator_->Annotate(
+ base::BindOnce(
+ &PageContentAnnotationsService::OnAnnotationBatchComplete, type,
+ merged_annotation_outputs_ptr, barrier_closure),
+ inputs, type);
}
}
-#endif
-
-void PageContentAnnotationsService::OverridePageContentAnnotatorForTesting(
- PageContentAnnotator* annotator) {
- annotator_ = annotator;
-}
// static
-std::string PageContentAnnotationsService::StringInputForPageTopicsHost(
- const std::string& host) {
- std::string output = base::ToLowerASCII(host);
+void PageContentAnnotationsService::OnAnnotationBatchComplete(
+ AnnotationType type,
+ std::vector<absl::optional<history::VisitContentModelAnnotations>>*
+ merge_to_output,
+ base::OnceClosure signal_merge_complete_callback,
+ const std::vector<BatchAnnotationResult>& batch_result) {
+ DCHECK_EQ(merge_to_output->size(), batch_result.size());
+ for (size_t i = 0; i < batch_result.size(); i++) {
+ const BatchAnnotationResult result = batch_result[i];
+ DCHECK_EQ(type, result.type());
+
+ if (!result.HasOutputForType())
+ continue;
- // Strip the 'www.' if it exists.
- if (base::StartsWith(output, "www.")) {
- output = output.substr(4);
- }
+ history::VisitContentModelAnnotations current_annotations;
+
+ if (type == AnnotationType::kContentVisibility) {
+ DCHECK(result.visibility_score());
+ current_annotations.visibility_score = *result.visibility_score();
+ }
+
+ if (type == AnnotationType::kPageEntities) {
+ DCHECK(result.entities());
+ for (const ScoredEntityMetadata& scored_md : *result.entities()) {
+ DCHECK(scored_md.score >= 0.0 && scored_md.score <= 1.0);
+ history::VisitContentModelAnnotations::Category category(
+ scored_md.metadata.entity_id,
+ static_cast<int>(100 * scored_md.score));
+ history::VisitContentModelAnnotations::MergeCategoryIntoVector(
+ category, &current_annotations.entities);
+ }
+ }
- const char kCharsToReplaceWithSpace[] = {'-', '_', '.', '+'};
- for (char c : kCharsToReplaceWithSpace) {
- std::replace(output.begin(), output.end(), c, ' ');
+ history::VisitContentModelAnnotations previous_annotations =
+ merge_to_output->at(i).value_or(
+ history::VisitContentModelAnnotations());
+ current_annotations.MergeFrom(previous_annotations);
+
+ merge_to_output->at(i) = current_annotations;
}
- return output;
+ // This needs to be ran last because |merge_to_output| may be deleted when
+ // run.
+ std::move(signal_merge_complete_callback).Run();
}
-void PageContentAnnotationsService::BatchAnnotatePageTopics(
- BatchAnnotationCallback callback,
- const std::vector<std::string>& hosts) {
- std::vector<std::string> tokenized_hosts;
- for (const std::string& host : hosts) {
- tokenized_hosts.emplace_back(StringInputForPageTopicsHost(host));
+void PageContentAnnotationsService::OnBatchVisitsAnnotated(
+ std::unique_ptr<
+ std::vector<absl::optional<history::VisitContentModelAnnotations>>>
+ merged_annotation_outputs) {
+ DCHECK_EQ(merged_annotation_outputs->size(),
+ current_visit_annotation_batch_.size());
+ for (size_t i = 0; i < merged_annotation_outputs->size(); i++) {
+ OnPageContentAnnotated(current_visit_annotation_batch_[i],
+ merged_annotation_outputs->at(i));
}
- if (!annotator_) {
- std::move(callback).Run(CreateEmptyBatchAnnotationResults(tokenized_hosts));
- return;
- }
+ current_visit_annotation_batch_.clear();
+ MaybeStartAnnotateVisitBatch();
+}
+#endif
- annotator_->Annotate(std::move(callback), tokenized_hosts,
- AnnotationType::kPageTopics);
+void PageContentAnnotationsService::OverridePageContentAnnotatorForTesting(
+ PageContentAnnotator* annotator) {
+ annotator_ = annotator;
}
void PageContentAnnotationsService::BatchAnnotate(
BatchAnnotationCallback callback,
const std::vector<std::string>& inputs,
AnnotationType annotation_type) {
- DCHECK_NE(annotation_type, AnnotationType::kPageTopics)
- << "Please use |BatchAnnotatePageTopics| instead";
-
if (!annotator_) {
std::move(callback).Run(CreateEmptyBatchAnnotationResults(inputs));
return;
@@ -315,7 +371,8 @@ void PageContentAnnotationsService::PersistSearchMetadata(
base::BindOnce(&history::HistoryService::AddSearchMetadataForVisit,
history_service_->AsWeakPtr(),
search_metadata.normalized_url,
- search_metadata.search_terms));
+ search_metadata.search_terms),
+ PageContentAnnotationsType::kSearchMetadata);
}
void PageContentAnnotationsService::ExtractRelatedSearches(
@@ -350,7 +407,8 @@ void PageContentAnnotationsService::OnPageContentAnnotated(
QueryURL(visit,
base::BindOnce(
&history::HistoryService::AddContentModelAnnotationsForVisit,
- history_service_->AsWeakPtr(), *content_annotations));
+ history_service_->AsWeakPtr(), *content_annotations),
+ PageContentAnnotationsType::kModelAnnotations);
}
#endif
@@ -393,34 +451,44 @@ void PageContentAnnotationsService::OnRelatedSearchesExtracted(
QueryURL(visit,
base::BindOnce(&history::HistoryService::AddRelatedSearchesForVisit,
- history_service_->AsWeakPtr(), related_searches));
+ history_service_->AsWeakPtr(), related_searches),
+ PageContentAnnotationsType::kRelatedSearches);
}
void PageContentAnnotationsService::QueryURL(
const HistoryVisit& visit,
- PersistAnnotationsCallback callback) {
+ PersistAnnotationsCallback callback,
+ PageContentAnnotationsType annotation_type) {
history_service_->QueryURL(
visit.url, /*want_visits=*/true,
base::BindOnce(&PageContentAnnotationsService::OnURLQueried,
- weak_ptr_factory_.GetWeakPtr(), visit,
- std::move(callback)),
+ weak_ptr_factory_.GetWeakPtr(), visit, std::move(callback),
+ annotation_type),
&history_service_task_tracker_);
}
void PageContentAnnotationsService::OnURLQueried(
const HistoryVisit& visit,
PersistAnnotationsCallback callback,
+ PageContentAnnotationsType annotation_type,
history::QueryURLResult url_result) {
if (!url_result.success) {
LogPageContentAnnotationsStorageStatus(
- PageContentAnnotationsStorageStatus::kNoVisitsForUrl);
+ PageContentAnnotationsStorageStatus::kNoVisitsForUrl, annotation_type);
return;
}
+ base::TimeDelta min_magnitude_between_visits = base::TimeDelta::Max();
bool did_store_content_annotations = false;
for (const auto& visit_for_url : url_result.visits) {
- if (visit.nav_entry_timestamp != visit_for_url.visit_time)
+ if (visit.nav_entry_timestamp != visit_for_url.visit_time) {
+ base::TimeDelta magnitude_between_visits =
+ (visit.nav_entry_timestamp - visit_for_url.visit_time).magnitude();
+ if (magnitude_between_visits < min_magnitude_between_visits) {
+ min_magnitude_between_visits = magnitude_between_visits;
+ }
continue;
+ }
std::move(callback).Run(visit_for_url.visit_id);
@@ -428,7 +496,21 @@ void PageContentAnnotationsService::OnURLQueried(
break;
}
LogPageContentAnnotationsStorageStatus(
- did_store_content_annotations ? kSuccess : kSpecificVisitForUrlNotFound);
+ did_store_content_annotations ? kSuccess : kSpecificVisitForUrlNotFound,
+ annotation_type);
+ if (!did_store_content_annotations) {
+ DCHECK_NE(min_magnitude_between_visits, base::TimeDelta::Max());
+ base::UmaHistogramTimes(
+ "OptimizationGuide.PageContentAnnotationsService."
+ "ContentAnnotationsStorageMinMagnitudeForVisitNotFound",
+ min_magnitude_between_visits);
+
+ base::UmaHistogramTimes(
+ "OptimizationGuide.PageContentAnnotationsService."
+ "ContentAnnotationsStorageMinMagnitudeForVisitNotFound." +
+ PageContentAnnotationsTypeToString(annotation_type),
+ min_magnitude_between_visits);
+ }
}
void PageContentAnnotationsService::GetMetadataForEntityId(
@@ -457,33 +539,22 @@ void PageContentAnnotationsService::PersistRemotePageEntities(
QueryURL(history_visit,
base::BindOnce(
&history::HistoryService::AddContentModelAnnotationsForVisit,
- history_service_->AsWeakPtr(), annotations));
+ history_service_->AsWeakPtr(), annotations),
+ // Even though we are persisting remote page entities, we store
+ // these as an override to the model annotations.
+ PageContentAnnotationsType::kModelAnnotations);
}
-void PageContentAnnotationsService::RunBatchAnnotationValidation() {
- DCHECK(features::BatchAnnotationsValidationEnabled());
- DCHECK(validation_timer_);
- validation_timer_.reset();
-
- std::vector<std::string> dummy_inputs;
- dummy_inputs.reserve(features::BatchAnnotationsValidationBatchSize());
- for (size_t i = 0; i < features::BatchAnnotationsValidationBatchSize(); i++) {
- const char* word1 = kRandomWords[base::RandGenerator(kCountRandomWords)];
- const char* word2 = kRandomWords[base::RandGenerator(kCountRandomWords)];
- dummy_inputs.emplace_back(base::StringPrintf("%s-%s.com", word1, word2));
- }
-
- LOCAL_HISTOGRAM_COUNTS_100(
- "OptimizationGuide.PageContentAnnotationsService.ValidationRun",
- dummy_inputs.size());
-
- if (!features::BatchAnnotationsValidationUsePageTopics()) {
- BatchAnnotate(base::DoNothing(), dummy_inputs,
- AnnotationType::kContentVisibility);
+void PageContentAnnotationsService::PersistRemotePageMetadata(
+ const HistoryVisit& visit,
+ const proto::PageEntitiesMetadata& page_metadata) {
+ if (!page_metadata.has_alternative_title())
return;
- }
-
- BatchAnnotatePageTopics(base::DoNothing(), dummy_inputs);
+ QueryURL(visit,
+ base::BindOnce(&history::HistoryService::AddPageMetadataForVisit,
+ history_service_->AsWeakPtr(),
+ page_metadata.alternative_title()),
+ PageContentAnnotationsType::kRemoteMetdata);
}
// static