diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2021-05-20 09:47:09 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2021-06-07 11:15:42 +0000 |
commit | 189d4fd8fad9e3c776873be51938cd31a42b6177 (patch) | |
tree | 6497caeff5e383937996768766ab3bb2081a40b2 /chromium/components/dom_distiller | |
parent | 8bc75099d364490b22f43a7ce366b366c08f4164 (diff) | |
download | qtwebengine-chromium-189d4fd8fad9e3c776873be51938cd31a42b6177.tar.gz |
BASELINE: Update Chromium to 90.0.4430.221
Change-Id: Iff4d9d18d2fcf1a576f3b1f453010f744a232920
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/components/dom_distiller')
18 files changed, 257 insertions, 171 deletions
diff --git a/chromium/components/dom_distiller/DIR_METADATA b/chromium/components/dom_distiller/DIR_METADATA new file mode 100644 index 00000000000..e7456596308 --- /dev/null +++ b/chromium/components/dom_distiller/DIR_METADATA @@ -0,0 +1,5 @@ +monorail { + component: "UI>Browser>ReaderMode" +} + +team_email: "dom-distiller-eng@google.com" diff --git a/chromium/components/dom_distiller/OWNERS b/chromium/components/dom_distiller/OWNERS index f500ab3893e..37dc60f3247 100644 --- a/chromium/components/dom_distiller/OWNERS +++ b/chromium/components/dom_distiller/OWNERS @@ -1,7 +1,3 @@ -bengr@chromium.org mdjones@chromium.org nyquist@chromium.org wychen@chromium.org - -# COMPONENT: UI>Browser>ReaderMode -# TEAM: dom-distiller-eng@google.com diff --git a/chromium/components/dom_distiller/content/browser/dom_distiller_viewer_source.cc b/chromium/components/dom_distiller/content/browser/dom_distiller_viewer_source.cc index 6fd8226fdc9..47810610485 100644 --- a/chromium/components/dom_distiller/content/browser/dom_distiller_viewer_source.cc +++ b/chromium/components/dom_distiller/content/browser/dom_distiller_viewer_source.cc @@ -255,9 +255,12 @@ void DomDistillerViewerSource::StartDataRequest( web_contents->GetContainerBounds().size()); GURL current_url(url_utils::GetOriginalUrlFromDistillerUrl(request_url)); + + // Pass an empty nonce value as the CSP is only inlined on the iOS build. std::string unsafe_page_html = viewer::GetArticleTemplateHtml( dom_distiller_service_->GetDistilledPagePrefs()->GetTheme(), - dom_distiller_service_->GetDistilledPagePrefs()->GetFontFamily()); + dom_distiller_service_->GetDistilledPagePrefs()->GetFontFamily(), + std::string()); if (viewer_handle) { // The service returned a |ViewerHandle| and guarantees it will call diff --git a/chromium/components/dom_distiller/content/renderer/distillability_agent.cc b/chromium/components/dom_distiller/content/renderer/distillability_agent.cc index 9447d5e3c7e..478c658fb25 100644 --- a/chromium/components/dom_distiller/content/renderer/distillability_agent.cc +++ b/chromium/components/dom_distiller/content/renderer/distillability_agent.cc @@ -25,12 +25,13 @@ namespace dom_distiller { namespace { -const char* const kBlacklist[] = {"www.reddit.com", "tools.usps.com"}; +const char* const kFilterlist[] = {"www.reddit.com", "tools.usps.com", + "old.reddit.com"}; enum RejectionBuckets { NOT_ARTICLE = 0, MOBILE_FRIENDLY, - BLACKLISTED, + FILTERED, TOO_SHORT, NOT_REJECTED, REJECTION_BUCKET_BOUNDARY @@ -66,9 +67,9 @@ bool IsLast(bool is_loaded) { return true; } -bool IsBlacklisted(const GURL& url) { - for (size_t i = 0; i < base::size(kBlacklist); ++i) { - if (base::LowerCaseEqualsASCII(url.host(), kBlacklist[i])) { +bool IsFiltered(const GURL& url) { + for (auto* filter : kFilterlist) { + if (base::LowerCaseEqualsASCII(url.host(), filter)) { return true; } } @@ -82,7 +83,7 @@ void DumpDistillability(content::RenderFrame* render_frame, bool distillable, double long_score, bool long_page, - bool blacklisted) { + bool filtered) { base::DictionaryValue dict; std::string msg; @@ -114,7 +115,7 @@ void DumpDistillability(content::RenderFrame* render_frame, dict.SetInteger("distillable", distillable); dict.SetDouble("long_score", long_score); dict.SetInteger("long_page", long_page); - dict.SetInteger("blacklisted", blacklisted); + dict.SetInteger("filtered", filtered); base::JSONWriter::WriteWithOptions( dict, base::JSONWriter::OPTIONS_PRETTY_PRINT, &msg); msg = "adaboost_classification = " + msg; @@ -144,11 +145,11 @@ bool IsDistillablePageAdaboost(blink::WebDocument& doc, double long_score = long_page->Score(derived) - long_page->GetThreshold(); bool distillable = score > 0; bool long_article = long_score > 0; - bool blacklisted = IsBlacklisted(parsed_url); + bool filtered = IsFiltered(parsed_url); if (dump_info) { DumpDistillability(render_frame, features, derived, score, distillable, - long_score, long_article, blacklisted); + long_score, long_article, filtered); } if (!features.is_mobile_friendly) { @@ -189,9 +190,9 @@ bool IsDistillablePageAdaboost(blink::WebDocument& doc, } else if (features.is_mobile_friendly) { UMA_HISTOGRAM_ENUMERATION("DomDistiller.DistillabilityRejection", MOBILE_FRIENDLY, REJECTION_BUCKET_BOUNDARY); - } else if (blacklisted) { + } else if (filtered) { UMA_HISTOGRAM_ENUMERATION("DomDistiller.DistillabilityRejection", - BLACKLISTED, REJECTION_BUCKET_BOUNDARY); + FILTERED, REJECTION_BUCKET_BOUNDARY); } else if (!long_article) { UMA_HISTOGRAM_ENUMERATION("DomDistiller.DistillabilityRejection", TOO_SHORT, REJECTION_BUCKET_BOUNDARY); @@ -201,7 +202,7 @@ bool IsDistillablePageAdaboost(blink::WebDocument& doc, } } - if (blacklisted) { + if (filtered) { return false; } return distillable && long_article; diff --git a/chromium/components/dom_distiller/core/BUILD.gn b/chromium/components/dom_distiller/core/BUILD.gn index 3c60b0d6132..c03dbc91618 100644 --- a/chromium/components/dom_distiller/core/BUILD.gn +++ b/chromium/components/dom_distiller/core/BUILD.gn @@ -76,7 +76,10 @@ static_library("core") { "dom_distiller_service_android.h", "url_utils_android.cc", ] - deps += [ ":jni_headers" ] + deps += [ + ":jni_headers", + "//url:gurl_android", + ] } } diff --git a/chromium/components/dom_distiller/core/android/BUILD.gn b/chromium/components/dom_distiller/core/android/BUILD.gn index d8a624f37c7..56f40eb83ff 100644 --- a/chromium/components/dom_distiller/core/android/BUILD.gn +++ b/chromium/components/dom_distiller/core/android/BUILD.gn @@ -9,7 +9,8 @@ android_library("dom_distiller_core_java") { "//base:base_java", "//base:jni_java", "//components/dom_distiller/core/mojom:mojom_java", - "//third_party/android_deps:androidx_annotation_annotation_java", + "//third_party/androidx:androidx_annotation_annotation_java", + "//url:gurl_java", ] annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ] sources = [ diff --git a/chromium/components/dom_distiller/core/distiller.cc b/chromium/components/dom_distiller/core/distiller.cc index 7bdd649c6a9..d742b90a3dd 100644 --- a/chromium/components/dom_distiller/core/distiller.cc +++ b/chromium/components/dom_distiller/core/distiller.cc @@ -253,20 +253,22 @@ void DistillerImpl::OnPageDistillationFinished( GURL next_page_url(pagination_info.next_page()); if (next_page_url.is_valid()) { // The pages should be in same origin. - DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin()); - AddToDistillationQueue(page_num + 1, next_page_url); - page_data->distilled_page_proto->data.mutable_pagination_info() - ->set_next_page(next_page_url.spec()); + if (next_page_url.GetOrigin() == page_url.GetOrigin()) { + AddToDistillationQueue(page_num + 1, next_page_url); + page_data->distilled_page_proto->data.mutable_pagination_info() + ->set_next_page(next_page_url.spec()); + } } } if (pagination_info.has_prev_page()) { GURL prev_page_url(pagination_info.prev_page()); if (prev_page_url.is_valid()) { - DCHECK_EQ(prev_page_url.GetOrigin(), page_url.GetOrigin()); - AddToDistillationQueue(page_num - 1, prev_page_url); - page_data->distilled_page_proto->data.mutable_pagination_info() - ->set_prev_page(prev_page_url.spec()); + if (prev_page_url.GetOrigin() == page_url.GetOrigin()) { + AddToDistillationQueue(page_num - 1, prev_page_url); + page_data->distilled_page_proto->data.mutable_pagination_info() + ->set_prev_page(prev_page_url.spec()); + } } } diff --git a/chromium/components/dom_distiller/core/distiller_page.cc b/chromium/components/dom_distiller/core/distiller_page.cc index 46f07ab0cd6..81230d8bc66 100644 --- a/chromium/components/dom_distiller/core/distiller_page.cc +++ b/chromium/components/dom_distiller/core/distiller_page.cc @@ -41,10 +41,10 @@ std::string GetDistillerScriptWithOptions( return ""; } - std::unique_ptr<base::Value> options_value( - dom_distiller::proto::json::DomDistillerOptions::WriteToValue(options)); + base::Value options_value = + dom_distiller::proto::json::DomDistillerOptions::WriteToValue(options); std::string options_json; - if (!base::JSONWriter::Write(*options_value, &options_json)) { + if (!base::JSONWriter::Write(options_value, &options_json)) { NOTREACHED(); } size_t options_offset = script.find(kOptionsPlaceholder); @@ -100,7 +100,7 @@ void DistillerPage::OnDistillationDone(const GURL& page_url, } else { found_content = dom_distiller::proto::json::DomDistillerResult::ReadFromValue( - value, distiller_result.get()); + *value, distiller_result.get()); if (!found_content) { DVLOG(1) << "Unable to parse DomDistillerResult."; } else { diff --git a/chromium/components/dom_distiller/core/distiller_unittest.cc b/chromium/components/dom_distiller/core/distiller_unittest.cc index 1aed94ad9e9..d41f9789414 100644 --- a/chromium/components/dom_distiller/core/distiller_unittest.cc +++ b/chromium/components/dom_distiller/core/distiller_unittest.cc @@ -47,6 +47,7 @@ namespace { const char kTitle[] = "Title"; const char kContent[] = "Content"; const char kURL[] = "http://a.com/"; +const char kOtherURL[] = "http://b.com/"; const size_t kTotalGoodImages = 2; const size_t kTotalImages = 3; // Good images need to be in the front. @@ -59,7 +60,7 @@ const std::string GetImageName(int page_num, int image_num) { return base::NumberToString(page_num) + "_" + base::NumberToString(image_num); } -std::unique_ptr<base::Value> CreateDistilledValueReturnedFromJS( +base::Value CreateDistilledValueReturnedFromJS( const std::string& title, const std::string& content, const std::vector<int>& image_indices, @@ -174,11 +175,11 @@ std::unique_ptr<MultipageDistillerData> CreateMultipageDistillerDataWithImages( std::string next_page_url = GenerateNextPageUrl(url_prefix, page_num, pages_size); std::string prev_page_url = GeneratePrevPageUrl(url_prefix, page_num); - std::unique_ptr<base::Value> distilled_value = - CreateDistilledValueReturnedFromJS(kTitle, result->content[page_num], - image_ids[page_num], next_page_url, - prev_page_url); - result->distilled_values.push_back(std::move(distilled_value)); + base::Value distilled_value = CreateDistilledValueReturnedFromJS( + kTitle, result->content[page_num], image_ids[page_num], next_page_url, + prev_page_url); + result->distilled_values.push_back( + std::make_unique<base::Value>(std::move(distilled_value))); } return result; } @@ -193,14 +194,16 @@ void VerifyArticleProtoMatchesMultipageData( const dom_distiller::DistilledArticleProto* article_proto, const MultipageDistillerData* distiller_data, size_t distilled_pages_size, - size_t total_pages_size) { + size_t total_pages_size, + size_t start_page_offset = 0) { ASSERT_EQ(distilled_pages_size, static_cast<size_t>(article_proto->pages_size())); EXPECT_EQ(kTitle, article_proto->title()); std::string url_prefix = kURL; - for (size_t page_num = 0; page_num < distilled_pages_size; ++page_num) { + for (size_t page_num = start_page_offset; page_num < distilled_pages_size; + ++page_num) { const dom_distiller::DistilledPageProto& page = - article_proto->pages(page_num); + article_proto->pages(page_num - start_page_offset); EXPECT_EQ(distiller_data->content[page_num], page.html()); EXPECT_EQ(distiller_data->page_urls[page_num], page.url()); EXPECT_EQ(distiller_data->image_ids[page_num].size(), @@ -217,10 +220,16 @@ void VerifyArticleProtoMatchesMultipageData( EXPECT_EQ(GetImageName(page_num + 1, img_num), page.image(img_num).name()); } + std::string expected_next_page_url = GenerateNextPageUrl(url_prefix, page_num, total_pages_size); - std::string expected_prev_page_url = - GeneratePrevPageUrl(url_prefix, page_num); + + std::string expected_prev_page_url; + + if (page_num > start_page_offset) { + expected_prev_page_url = GeneratePrevPageUrl(url_prefix, page_num); + } + EXPECT_EQ(expected_next_page_url, page.pagination_info().next_page()); EXPECT_EQ(expected_prev_page_url, page.pagination_info().prev_page()); EXPECT_FALSE(page.pagination_info().has_canonical_page()); @@ -337,16 +346,13 @@ std::unique_ptr<DistillerPage> CreateMockDistillerPageWithPendingJSCallback( return std::unique_ptr<DistillerPage>(distiller_page); } -std::unique_ptr<DistillerPage> CreateMockDistillerPages( +std::unique_ptr<DistillerPage> CreateMockDistillerPagesWithSequence( MultipageDistillerData* distiller_data, - size_t pages_size, - int start_page_num) { + const std::vector<int>& page_num_sequence) { MockDistillerPage* distiller_page = new MockDistillerPage(); { testing::InSequence s; - std::vector<int> page_nums = GetPagesInSequence(start_page_num, pages_size); - for (size_t page_num = 0; page_num < pages_size; ++page_num) { - int page = page_nums[page_num]; + for (int page : page_num_sequence) { GURL url = GURL(distiller_data->page_urls[page]); EXPECT_CALL(*distiller_page, DistillPageImpl(url, _)) .WillOnce(DistillerPageOnDistillationDone( @@ -357,12 +363,20 @@ std::unique_ptr<DistillerPage> CreateMockDistillerPages( return std::unique_ptr<DistillerPage>(distiller_page); } +std::unique_ptr<DistillerPage> CreateMockDistillerPages( + MultipageDistillerData* distiller_data, + size_t pages_size, + int start_page_num) { + std::vector<int> page_nums = GetPagesInSequence(start_page_num, pages_size); + return CreateMockDistillerPagesWithSequence(distiller_data, page_nums); +} + TEST_F(DistillerTest, DistillPage) { - std::unique_ptr<base::Value> result = CreateDistilledValueReturnedFromJS( + base::Value result = CreateDistilledValueReturnedFromJS( kTitle, kContent, std::vector<int>(), ""); distiller_.reset( new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); - DistillPage(kURL, CreateMockDistillerPage(result.get(), GURL(kURL))); + DistillPage(kURL, CreateMockDistillerPage(&result, GURL(kURL))); base::RunLoop().RunUntilIdle(); EXPECT_EQ(kTitle, article_proto_->title()); ASSERT_EQ(article_proto_->pages_size(), 1); @@ -374,11 +388,11 @@ TEST_F(DistillerTest, DistillPage) { TEST_F(DistillerTest, DistillPageWithDebugInfo) { DomDistillerResult dd_result; dd_result.mutable_debug_info()->set_log(kDebugLog); - std::unique_ptr<base::Value> result = + base::Value result = dom_distiller::proto::json::DomDistillerResult::WriteToValue(dd_result); distiller_.reset( new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); - DistillPage(kURL, CreateMockDistillerPage(result.get(), GURL(kURL))); + DistillPage(kURL, CreateMockDistillerPage(&result, GURL(kURL))); base::RunLoop().RunUntilIdle(); const DistilledPageProto& first_page = article_proto_->pages(0); EXPECT_EQ(kDebugLog, first_page.debug_info().log()); @@ -400,11 +414,11 @@ TEST_F(DistillerTest, DistillPageWithTimingInfo) { 6.0); SetTimingEntry(dd_result.mutable_timing_info()->add_other_times(), "time1", 7.0); - std::unique_ptr<base::Value> result = + base::Value result = dom_distiller::proto::json::DomDistillerResult::WriteToValue(dd_result); distiller_.reset( new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); - DistillPage(kURL, CreateMockDistillerPage(result.get(), GURL(kURL))); + DistillPage(kURL, CreateMockDistillerPage(&result, GURL(kURL))); base::RunLoop().RunUntilIdle(); const DistilledPageProto& first_page = article_proto_->pages(0); std::map<std::string, double> timings; @@ -427,11 +441,11 @@ TEST_F(DistillerTest, DistillPageWithImages) { image_indices.push_back(0); image_indices.push_back(1); image_indices.push_back(2); - std::unique_ptr<base::Value> result = + base::Value result = CreateDistilledValueReturnedFromJS(kTitle, kContent, image_indices, ""); distiller_.reset( new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); - DistillPage(kURL, CreateMockDistillerPage(result.get(), GURL(kURL))); + DistillPage(kURL, CreateMockDistillerPage(&result, GURL(kURL))); base::RunLoop().RunUntilIdle(); EXPECT_EQ(kTitle, article_proto_->title()); ASSERT_EQ(article_proto_->pages_size(), 1); @@ -489,11 +503,11 @@ TEST_F(DistillerTest, DistillMultiplePages) { TEST_F(DistillerTest, DistillLinkLoop) { // Create a loop, the next page is same as the current page. This could // happen if javascript misparses a next page link. - std::unique_ptr<base::Value> result = CreateDistilledValueReturnedFromJS( + base::Value result = CreateDistilledValueReturnedFromJS( kTitle, kContent, std::vector<int>(), kURL); distiller_.reset( new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); - DistillPage(kURL, CreateMockDistillerPage(result.get(), GURL(kURL))); + DistillPage(kURL, CreateMockDistillerPage(&result, GURL(kURL))); base::RunLoop().RunUntilIdle(); EXPECT_EQ(kTitle, article_proto_->title()); EXPECT_EQ(article_proto_->pages_size(), 1); @@ -507,14 +521,13 @@ TEST_F(DistillerTest, CheckMaxPageLimitExtraPage) { // Note: Next page url of the last page of article is set. So distiller will // try to do kMaxPagesInArticle + 1 calls if the max article limit does not // work. - std::unique_ptr<base::Value> last_page_data = - CreateDistilledValueReturnedFromJS( - kTitle, distiller_data->content[kMaxPagesInArticle - 1], - std::vector<int>(), "", - distiller_data->page_urls[kMaxPagesInArticle - 2]); + base::Value last_page_data = CreateDistilledValueReturnedFromJS( + kTitle, distiller_data->content[kMaxPagesInArticle - 1], + std::vector<int>(), "", + distiller_data->page_urls[kMaxPagesInArticle - 2]); - distiller_data->distilled_values.pop_back(); - distiller_data->distilled_values.push_back(std::move(last_page_data)); + distiller_data->distilled_values.back() = + std::make_unique<base::Value>(std::move(last_page_data)); distiller_.reset( new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); @@ -552,10 +565,10 @@ TEST_F(DistillerTest, CheckMaxPageLimitExactLimit) { TEST_F(DistillerTest, SinglePageDistillationFailure) { // To simulate failure return a null value. - auto null_value = std::make_unique<base::Value>(); + base::Value null_value; distiller_.reset( new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); - DistillPage(kURL, CreateMockDistillerPage(null_value.get(), GURL(kURL))); + DistillPage(kURL, CreateMockDistillerPage(&null_value, GURL(kURL))); base::RunLoop().RunUntilIdle(); EXPECT_EQ("", article_proto_->title()); EXPECT_EQ(0, article_proto_->pages_size()); @@ -569,11 +582,8 @@ TEST_F(DistillerTest, MultiplePagesDistillationFailure) { // The page number of the failed page. size_t failed_page_num = 3; // reset distilled data of the failed page. - distiller_data->distilled_values.erase( - distiller_data->distilled_values.begin() + failed_page_num); - distiller_data->distilled_values.insert( - distiller_data->distilled_values.begin() + failed_page_num, - std::make_unique<base::Value>()); + distiller_data->distilled_values[failed_page_num] = + std::make_unique<base::Value>(); // Expect only calls till the failed page number. distiller_.reset( new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); @@ -594,17 +604,13 @@ TEST_F(DistillerTest, DistillMultiplePagesFirstEmpty) { // The first page has no content. const size_t empty_page_num = 0; distiller_data->content[empty_page_num] = ""; - std::unique_ptr<base::Value> distilled_value = - CreateDistilledValueReturnedFromJS( - kTitle, "", std::vector<int>(), - GenerateNextPageUrl(kURL, empty_page_num, kNumPages), - GeneratePrevPageUrl(kURL, empty_page_num)); + base::Value distilled_value = CreateDistilledValueReturnedFromJS( + kTitle, "", std::vector<int>(), + GenerateNextPageUrl(kURL, empty_page_num, kNumPages), + GeneratePrevPageUrl(kURL, empty_page_num)); // Reset distilled data of the first page. - distiller_data->distilled_values.erase( - distiller_data->distilled_values.begin() + empty_page_num); - distiller_data->distilled_values.insert( - distiller_data->distilled_values.begin() + empty_page_num, - std::move(distilled_value)); + distiller_data->distilled_values[empty_page_num] = + std::make_unique<base::Value>(std::move(distilled_value)); distiller_.reset( new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); @@ -625,17 +631,13 @@ TEST_F(DistillerTest, DistillMultiplePagesSecondEmpty) { // The second page has no content. const size_t empty_page_num = 1; distiller_data->content[empty_page_num] = ""; - std::unique_ptr<base::Value> distilled_value = - CreateDistilledValueReturnedFromJS( - kTitle, "", std::vector<int>(), - GenerateNextPageUrl(kURL, empty_page_num, kNumPages), - GeneratePrevPageUrl(kURL, empty_page_num)); + base::Value distilled_value = CreateDistilledValueReturnedFromJS( + kTitle, "", std::vector<int>(), + GenerateNextPageUrl(kURL, empty_page_num, kNumPages), + GeneratePrevPageUrl(kURL, empty_page_num)); // Reset distilled data of the second page. - distiller_data->distilled_values.erase( - distiller_data->distilled_values.begin() + empty_page_num); - distiller_data->distilled_values.insert( - distiller_data->distilled_values.begin() + empty_page_num, - std::move(distilled_value)); + distiller_data->distilled_values[empty_page_num] = + std::make_unique<base::Value>(std::move(distilled_value)); distiller_.reset( new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); @@ -647,6 +649,66 @@ TEST_F(DistillerTest, DistillMultiplePagesSecondEmpty) { article_proto_.get(), distiller_data.get(), kNumPages, kNumPages); } +TEST_F(DistillerTest, DistillMultiplePagesNextDifferingOrigin) { + const size_t kNumPages = 8; + const size_t kActualPages = 4; + std::unique_ptr<MultipageDistillerData> distiller_data = + CreateMultipageDistillerDataWithoutImages(kNumPages); + + // The next page came from a different origin. All pages after + // it will be dropped as well. + const size_t target_page_num = 3; + distiller_data->content[target_page_num] = kContent; + base::Value distilled_value = CreateDistilledValueReturnedFromJS( + kTitle, kContent, std::vector<int>(), + GenerateNextPageUrl(kOtherURL, target_page_num, kNumPages), + GeneratePrevPageUrl(kURL, target_page_num)); + // Reset distilled data of the second page. + distiller_data->distilled_values[target_page_num] = + std::make_unique<base::Value>(std::move(distilled_value)); + + distiller_ = std::make_unique<DistillerImpl>(url_fetcher_factory_, + DomDistillerOptions()); + + DistillPage(distiller_data->page_urls[0], + CreateMockDistillerPages(distiller_data.get(), kActualPages, 0)); + base::RunLoop().RunUntilIdle(); + + VerifyArticleProtoMatchesMultipageData( + article_proto_.get(), distiller_data.get(), kActualPages, kActualPages); +} + +TEST_F(DistillerTest, DistillMultiplePagesPrevDifferingOrigin) { + const size_t kNumPages = 8; + const size_t kActualPages = 6; + std::vector<int> page_num_seq{3, 2, 4, 5, 6, 7}; + std::unique_ptr<MultipageDistillerData> distiller_data = + CreateMultipageDistillerDataWithoutImages(kNumPages); + + // The prev page came from a different origin. All pages before + // it will be dropped. + const size_t target_page_num = 2; + distiller_data->content[target_page_num] = kContent; + base::Value distilled_value = CreateDistilledValueReturnedFromJS( + kTitle, kContent, std::vector<int>(), + GenerateNextPageUrl(kURL, target_page_num, kNumPages), + GeneratePrevPageUrl(kOtherURL, target_page_num)); + // Reset distilled data of the second page. + distiller_data->distilled_values[target_page_num] = + std::make_unique<base::Value>(std::move(distilled_value)); + + distiller_ = std::make_unique<DistillerImpl>(url_fetcher_factory_, + DomDistillerOptions()); + DistillPage( + distiller_data->page_urls[target_page_num + 1], + CreateMockDistillerPagesWithSequence(distiller_data.get(), page_num_seq)); + base::RunLoop().RunUntilIdle(); + + VerifyArticleProtoMatchesMultipageData(article_proto_.get(), + distiller_data.get(), kActualPages, + kNumPages, target_page_num); +} + TEST_F(DistillerTest, DistillPreviousPage) { const size_t kNumPages = 8; @@ -737,7 +799,7 @@ TEST_F(DistillerTest, CancelWithDelayedImageFetchCallback) { std::vector<int> image_indices; image_indices.push_back(0); - std::unique_ptr<base::Value> distilled_value = + base::Value distilled_value = CreateDistilledValueReturnedFromJS(kTitle, kContent, image_indices, ""); TestDistillerURLFetcher* delayed_fetcher = new TestDistillerURLFetcher(true); MockDistillerURLFetcherFactory mock_url_fetcher_factory; @@ -745,7 +807,7 @@ TEST_F(DistillerTest, CancelWithDelayedImageFetchCallback) { .WillOnce(Return(delayed_fetcher)); distiller_.reset( new DistillerImpl(mock_url_fetcher_factory, DomDistillerOptions())); - DistillPage(kURL, CreateMockDistillerPage(distilled_value.get(), GURL(kURL))); + DistillPage(kURL, CreateMockDistillerPage(&distilled_value, GURL(kURL))); base::RunLoop().RunUntilIdle(); // Post callback from the url fetcher and then delete the distiller. @@ -756,9 +818,8 @@ TEST_F(DistillerTest, CancelWithDelayedImageFetchCallback) { } TEST_F(DistillerTest, CancelWithDelayedJSCallback) { - std::unique_ptr<base::Value> distilled_value = - CreateDistilledValueReturnedFromJS(kTitle, kContent, std::vector<int>(), - ""); + base::Value distilled_value = CreateDistilledValueReturnedFromJS( + kTitle, kContent, std::vector<int>(), ""); MockDistillerPage* distiller_page = nullptr; distiller_.reset( new DistillerImpl(url_fetcher_factory_, DomDistillerOptions())); @@ -768,7 +829,7 @@ TEST_F(DistillerTest, CancelWithDelayedJSCallback) { ASSERT_TRUE(distiller_page); // Post the task to execute javascript and then delete the distiller. - distiller_page->OnDistillationDone(GURL(kURL), distilled_value.get()); + distiller_page->OnDistillationDone(GURL(kURL), &distilled_value); distiller_.reset(); base::RunLoop().RunUntilIdle(); diff --git a/chromium/components/dom_distiller/core/html/dom_distiller_viewer.html b/chromium/components/dom_distiller/core/html/dom_distiller_viewer.html index b791d0ebc0c..6d681a08f00 100644 --- a/chromium/components/dom_distiller/core/html/dom_distiller_viewer.html +++ b/chromium/components/dom_distiller/core/html/dom_distiller_viewer.html @@ -9,13 +9,15 @@ found in the LICENSE file. <meta charset="utf-8"> <meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=1,user-scalable=no"> <meta name="theme-color" id="theme-color"> + <!-- Placeholder for CSP. --> + $1 <title>$i18n{title}</title> <link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet"> <!-- Placeholder for CSS. --> - $1 + $2 </head> -<body class="$2"> +<body class="$3"> <div id="content-wrap"> <div id="main-content"> <div id="settings-container" class="desktop-only"> @@ -98,11 +100,11 @@ found in the LICENSE file. </div> </div> </header> - <div id="content"><noscript>$3</noscript></div> + <div id="content"><noscript>$4</noscript></div> </article> <div id="loading-indicator" class="visible"> <!-- SVG material loading spinner. --> - $4 + $5 </div> </div> </div> diff --git a/chromium/components/dom_distiller/core/task_tracker.cc b/chromium/components/dom_distiller/core/task_tracker.cc index e66a62c4091..f22c88967bc 100644 --- a/chromium/components/dom_distiller/core/task_tracker.cc +++ b/chromium/components/dom_distiller/core/task_tracker.cc @@ -85,7 +85,7 @@ void TaskTracker::AddSaveCallback(SaveCallback callback) { std::unique_ptr<ViewerHandle> TaskTracker::AddViewer( ViewRequestDelegate* delegate) { - viewers_.push_back(delegate); + viewers_.AddObserver(delegate); if (content_ready_) { // Distillation for this task has already completed, and so the delegate can // be immediately told of the result. @@ -115,7 +115,7 @@ bool TaskTracker::HasUrl(const GURL& url) const { } void TaskTracker::RemoveViewer(ViewRequestDelegate* delegate) { - base::Erase(viewers_, delegate); + viewers_.RemoveObserver(delegate); if (viewers_.empty()) { MaybeCancel(); } @@ -219,8 +219,8 @@ void TaskTracker::DistilledArticleReady( } void TaskTracker::NotifyViewersAndCallbacks() { - for (auto* viewer : viewers_) { - NotifyViewer(viewer); + for (auto& viewer : viewers_) { + NotifyViewer(&viewer); } // Already inside a callback run SaveCallbacks directly. @@ -242,8 +242,8 @@ void TaskTracker::DoSaveCallbacks(bool success) { void TaskTracker::OnArticleDistillationUpdated( const ArticleDistillationUpdate& article_update) { - for (auto* viewer : viewers_) { - viewer->OnArticleUpdated(article_update); + for (auto& viewer : viewers_) { + viewer.OnArticleUpdated(article_update); } } diff --git a/chromium/components/dom_distiller/core/task_tracker.h b/chromium/components/dom_distiller/core/task_tracker.h index 484145cf7d1..cc13e727292 100644 --- a/chromium/components/dom_distiller/core/task_tracker.h +++ b/chromium/components/dom_distiller/core/task_tracker.h @@ -11,6 +11,7 @@ #include "base/bind.h" #include "base/callback.h" #include "base/memory/weak_ptr.h" +#include "base/observer_list.h" #include "components/dom_distiller/core/article_distillation_update.h" #include "components/dom_distiller/core/article_entry.h" #include "components/dom_distiller/core/distiller.h" @@ -40,9 +41,9 @@ class ViewerHandle { // Interface for a DOM distiller entry viewer. Implement this to make a view // request and receive the data for an entry when it becomes available. -class ViewRequestDelegate { +class ViewRequestDelegate : public base::CheckedObserver { public: - virtual ~ViewRequestDelegate() = default; + ~ViewRequestDelegate() override = default; // Called when the distilled article contents are available. The // DistilledArticleProto is owned by a TaskTracker instance and is invalidated @@ -140,7 +141,7 @@ class TaskTracker { std::vector<SaveCallback> save_callbacks_; // A ViewRequestDelegate will be added to this list when a view request is // made and removed when the corresponding ViewerHandle is destroyed. - std::vector<ViewRequestDelegate*> viewers_; + base::ObserverList<ViewRequestDelegate> viewers_; std::unique_ptr<Distiller> distiller_; bool blob_fetcher_running_; diff --git a/chromium/components/dom_distiller/core/url_utils_android.cc b/chromium/components/dom_distiller/core/url_utils_android.cc index f51db0fff53..35dafdef9d8 100644 --- a/chromium/components/dom_distiller/core/url_utils_android.cc +++ b/chromium/components/dom_distiller/core/url_utils_android.cc @@ -9,6 +9,7 @@ #include "components/dom_distiller/core/url_constants.h" #include "components/dom_distiller/core/url_utils.h" #include "net/base/url_util.h" +#include "url/android/gurl_android.h" #include "url/gurl.h" using base::android::JavaParamRef; @@ -39,7 +40,7 @@ ScopedJavaLocalRef<jstring> JNI_DomDistillerUrlUtils_GetDistillerViewUrlFromUrl( return base::android::ConvertUTF8ToJavaString(env, view_url.spec()); } -ScopedJavaLocalRef<jstring> +ScopedJavaLocalRef<jobject> JNI_DomDistillerUrlUtils_GetOriginalUrlFromDistillerUrl( JNIEnv* env, const JavaParamRef<jstring>& j_url) { @@ -50,9 +51,9 @@ JNI_DomDistillerUrlUtils_GetOriginalUrlFromDistillerUrl( GURL original_url = dom_distiller::url_utils::GetOriginalUrlFromDistillerUrl(url); if (!original_url.is_valid()) - return ScopedJavaLocalRef<jstring>(); + return url::GURLAndroid::EmptyGURL(env); - return base::android::ConvertUTF8ToJavaString(env, original_url.spec()); + return url::GURLAndroid::FromNativeGURL(env, original_url); } jboolean JNI_DomDistillerUrlUtils_IsDistilledPage( diff --git a/chromium/components/dom_distiller/core/viewer.cc b/chromium/components/dom_distiller/core/viewer.cc index 69285f58ca5..dffef376602 100644 --- a/chromium/components/dom_distiller/core/viewer.cc +++ b/chromium/components/dom_distiller/core/viewer.cc @@ -109,7 +109,8 @@ void EnsureNonEmptyContent(std::string* content) { } std::string ReplaceHtmlTemplateValues(const mojom::Theme theme, - const mojom::FontFamily font_family) { + const mojom::FontFamily font_family, + const std::string& csp_nonce) { std::string html_template = ui::ResourceBundle::GetSharedInstance().LoadDataResourceString( IDR_DOM_DISTILLER_VIEWER_HTML); @@ -156,6 +157,7 @@ std::string ReplaceHtmlTemplateValues(const mojom::Theme theme, // Now do other non-i18n string replacements. std::vector<std::string> substitutions; + std::ostringstream csp; std::ostringstream css; std::ostringstream svg; #if defined(OS_IOS) @@ -163,19 +165,37 @@ std::string ReplaceHtmlTemplateValues(const mojom::Theme theme, // and return the local data once a page is loaded. css << "<style>" << viewer::GetCss() << "</style>"; svg << viewer::GetLoadingImage(); + + // iOS specific CSP policy to mitigate leaking of data from different + // origins. + csp << "<meta http-equiv=\"Content-Security-Policy\" content=\""; + csp << "default-src 'none'; "; + csp << "script-src 'nonce-" << csp_nonce << "'; "; + // YouTube videos are embedded as an iframe. + csp << "frame-src http://www.youtube.com; "; + csp << "style-src 'unsafe-inline' https://fonts.googleapis.com; "; + // Allows the fallback font-face from the main stylesheet. + csp << "font-src https://fonts.gstatic.com; "; + // Images will be inlined as data-uri if they are valid. + csp << "img-src data:; "; + csp << "form-action 'none'; "; + csp << "base-uri 'none'; "; + csp << "\">"; + #else css << "<link rel=\"stylesheet\" href=\"/" << kViewerCssPath << "\">"; svg << "<img src=\"/" << kViewerLoadingImagePath << "\">"; #endif // defined(OS_IOS) - substitutions.push_back(css.str()); // $1 + substitutions.push_back(csp.str()); // $1 + substitutions.push_back(css.str()); // $2 substitutions.push_back(GetThemeCssClass(theme) + " " + - GetFontCssClass(font_family)); // $2 + GetFontCssClass(font_family)); // $3 substitutions.push_back(l10n_util::GetStringUTF8( - IDS_DOM_DISTILLER_JAVASCRIPT_DISABLED_CONTENT)); // $3 + IDS_DOM_DISTILLER_JAVASCRIPT_DISABLED_CONTENT)); // $4 - substitutions.push_back(svg.str()); // $4 + substitutions.push_back(svg.str()); // $5 return base::ReplaceStringPlaceholders(html_template, substitutions, nullptr); } @@ -239,8 +259,9 @@ const std::string GetToggleLoadingIndicatorJs(bool is_last_page) { } const std::string GetArticleTemplateHtml(mojom::Theme theme, - mojom::FontFamily font_family) { - return ReplaceHtmlTemplateValues(theme, font_family); + mojom::FontFamily font_family, + const std::string& csp_nonce) { + return ReplaceHtmlTemplateValues(theme, font_family, csp_nonce); } const std::string GetUnsafeArticleContentJs( diff --git a/chromium/components/dom_distiller/core/viewer.h b/chromium/components/dom_distiller/core/viewer.h index 9cfbff27356..fd74d9485d1 100644 --- a/chromium/components/dom_distiller/core/viewer.h +++ b/chromium/components/dom_distiller/core/viewer.h @@ -28,7 +28,8 @@ namespace viewer { // parameters. Information from the original article has not yet been inserted, // so the returned HTML should be safe. const std::string GetArticleTemplateHtml(mojom::Theme theme, - mojom::FontFamily font_family); + mojom::FontFamily font_family, + const std::string& csp_nonce); // Returns the JavaScript to place a full article's HTML on the page. The // returned HTML should be considered unsafe, so callers must ensure diff --git a/chromium/components/dom_distiller/ios/BUILD.gn b/chromium/components/dom_distiller/ios/BUILD.gn index 0bfc6a6c3c3..29c300ef966 100644 --- a/chromium/components/dom_distiller/ios/BUILD.gn +++ b/chromium/components/dom_distiller/ios/BUILD.gn @@ -17,7 +17,6 @@ source_set("ios") { "//components/dom_distiller/core/proto", "//components/favicon/ios", "//ios/web/public", - "//ios/web/public/deprecated", "//url", ] } diff --git a/chromium/components/dom_distiller/ios/distiller_page_ios.h b/chromium/components/dom_distiller/ios/distiller_page_ios.h index c7cfa775c98..2e28b366e05 100644 --- a/chromium/components/dom_distiller/ios/distiller_page_ios.h +++ b/chromium/components/dom_distiller/ios/distiller_page_ios.h @@ -5,7 +5,6 @@ #ifndef COMPONENTS_DOM_DISTILLER_IOS_DISTILLER_PAGE_IOS_H_ #define COMPONENTS_DOM_DISTILLER_IOS_DISTILLER_PAGE_IOS_H_ -#include <objc/objc.h> #include <memory> #include <string> @@ -52,7 +51,7 @@ class DistillerPageIOS : public DistillerPage, public web::WebStateObserver { private: // Called once the |script_| has been evaluated on the page. - void HandleJavaScriptResult(id result); + void HandleJavaScriptResult(const base::Value* result); // web::WebStateObserver implementation. void PageLoaded( diff --git a/chromium/components/dom_distiller/ios/distiller_page_ios.mm b/chromium/components/dom_distiller/ios/distiller_page_ios.mm index b53503a5765..4c58f7f434d 100644 --- a/chromium/components/dom_distiller/ios/distiller_page_ios.mm +++ b/chromium/components/dom_distiller/ios/distiller_page_ios.mm @@ -13,12 +13,9 @@ #include "base/logging.h" #include "base/mac/foundation_util.h" #include "base/strings/string_split.h" -#include "base/strings/sys_string_conversions.h" #include "base/strings/utf_string_conversions.h" #include "base/values.h" #include "ios/web/public/browser_state.h" -#import "ios/web/public/deprecated/crw_js_injection_manager.h" -#import "ios/web/public/deprecated/crw_js_injection_receiver.h" #import "ios/web/public/navigation/navigation_manager.h" #import "ios/web/public/navigation/web_state_policy_decider.h" #import "ios/web/public/web_state.h" @@ -40,11 +37,13 @@ namespace { int const kMaximumParsingRecursionDepth = 6; -// Converts result of WKWebView script evaluation to base::Value, parsing -// |wk_result| up to a depth of |max_depth|. -base::Value ValueResultFromScriptResult(id wk_result, int max_depth) { +// Returns a clone of |value| where double values are converted to integers if +// the numbers has no fraction. |value| is only processed up to |max_depth|. +base::Value ConvertedResultFromScriptResult(const base::Value* value, + int max_depth) { base::Value result; - if (!wk_result) { + if (!value || value->is_none()) { + DCHECK_EQ(result.type(), base::Value::Type::NONE); return result; } @@ -53,52 +52,48 @@ base::Value ValueResultFromScriptResult(id wk_result, int max_depth) { return result; } - CFTypeID result_type = CFGetTypeID(reinterpret_cast<CFTypeRef>(wk_result)); - if (result_type == CFStringGetTypeID()) { - result = base::Value(base::SysNSStringToUTF8(wk_result)); + if (value->is_string()) { + result = base::Value(value->GetString()); DCHECK_EQ(result.type(), base::Value::Type::STRING); - } else if (result_type == CFNumberGetTypeID()) { + } else if (value->is_double()) { // Different implementation is here. - if ([wk_result intValue] != [wk_result doubleValue]) { - result = base::Value([wk_result doubleValue]); - DCHECK_EQ(result.type(), base::Value::Type::DOUBLE); - } else { - result = base::Value([wk_result intValue]); + double double_value = value->GetDouble(); + int int_value = round(double_value); + if (double_value == int_value) { + result = base::Value(int_value); DCHECK_EQ(result.type(), base::Value::Type::INTEGER); + } else { + result = base::Value(double_value); + DCHECK_EQ(result.type(), base::Value::Type::DOUBLE); } // End of different implementation. - } else if (result_type == CFBooleanGetTypeID()) { - result = base::Value(static_cast<bool>([wk_result boolValue])); + } else if (value->is_bool()) { + result = base::Value(value); DCHECK_EQ(result.type(), base::Value::Type::BOOLEAN); - } else if (result_type == CFNullGetTypeID()) { - DCHECK_EQ(result.type(), base::Value::Type::NONE); - } else if (result_type == CFDictionaryGetTypeID()) { + } else if (value->is_dict()) { base::Value dictionary(base::Value::Type::DICTIONARY); - for (id key in wk_result) { - NSString* obj_c_string = base::mac::ObjCCast<NSString>(key); - base::Value value = - ValueResultFromScriptResult(wk_result[obj_c_string], max_depth - 1); + for (const auto kv : value->DictItems()) { + base::Value item_value = + ConvertedResultFromScriptResult(&kv.second, max_depth - 1); - if (value.type() == base::Value::Type::NONE) { + if (item_value.type() == base::Value::Type::NONE) { return result; } - - std::string combined_path = base::SysNSStringToUTF8(obj_c_string); - std::vector<base::StringPiece> path = base::SplitStringPiece( - combined_path, ".", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL); - dictionary.SetPath(path, std::move(value)); + dictionary.SetPath(kv.first, std::move(item_value)); } result = std::move(dictionary); DCHECK_EQ(result.type(), base::Value::Type::DICTIONARY); - } else if (result_type == CFArrayGetTypeID()) { + + } else if (value->is_list()) { std::vector<base::Value> list; - for (id list_item in wk_result) { - base::Value value = ValueResultFromScriptResult(list_item, max_depth - 1); - if (value.type() == base::Value::Type::NONE) { + for (const base::Value& list_item : value->GetList()) { + base::Value converted_item = + ConvertedResultFromScriptResult(&list_item, max_depth - 1); + if (converted_item.type() == base::Value::Type::NONE) { return result; } - list.push_back(std::move(value)); + list.push_back(std::move(converted_item)); } result = base::Value(list); DCHECK_EQ(result.type(), base::Value::Type::LIST); @@ -214,19 +209,14 @@ void DistillerPageIOS::OnLoadURLDone( } // Inject the script. base::WeakPtr<DistillerPageIOS> weak_this = weak_ptr_factory_.GetWeakPtr(); - [[web_state_->GetJSInjectionReceiver() - instanceOfClass:[CRWJSInjectionManager class]] - executeJavaScript:base::SysUTF8ToNSString(script_) - completionHandler:^(id result, NSError* error) { - DistillerPageIOS* distiller_page = weak_this.get(); - if (distiller_page) - distiller_page->HandleJavaScriptResult(result); - }]; + web_state_->ExecuteJavaScript( + base::UTF8ToUTF16(script_), + base::BindOnce(&DistillerPageIOS::HandleJavaScriptResult, weak_this)); } -void DistillerPageIOS::HandleJavaScriptResult(id result) { +void DistillerPageIOS::HandleJavaScriptResult(const base::Value* result) { base::Value result_as_value = - ValueResultFromScriptResult(result, kMaximumParsingRecursionDepth); + ConvertedResultFromScriptResult(result, kMaximumParsingRecursionDepth); OnDistillationDone(url_, &result_as_value); } |