BASELINE: Update Chromium to 90.0.4430.221

Change-Id: Iff4d9d18d2fcf1a576f3b1f453010f744a232920 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
author: Allan Sandfeld Jensen <allan.jensen@qt.io> 2021-05-20 09:47:09 +0200
committer: Allan Sandfeld Jensen <allan.jensen@qt.io> 2021-06-07 11:15:42 +0000
commit: 189d4fd8fad9e3c776873be51938cd31a42b6177 (patch)
tree: 6497caeff5e383937996768766ab3bb2081a40b2 /chromium/components/lookalikes
parent: 8bc75099d364490b22f43a7ce366b366c08f4164 (diff)
download: qtwebengine-chromium-189d4fd8fad9e3c776873be51938cd31a42b6177.tar.gz
8 files changed, 120 insertions, 22 deletions
diff --git a/chromium/components/lookalikes/DIR_METADATA b/chromium/components/lookalikes/DIR_METADATA
new file mode 100644
index 00000000000..b3022156e2a
--- /dev/null
+++ b/chromium/components/lookalikes/DIR_METADATA
@@ -0,0 +1,3 @@
+monorail {
+  component: "UI>Browser>Interstitials"
+}
diff --git a/chromium/components/lookalikes/OWNERS b/chromium/components/lookalikes/OWNERS
index 1d70c9fef31..f050018fa29 100644
--- a/chromium/components/lookalikes/OWNERS
+++ b/chromium/components/lookalikes/OWNERS
@@ -1,3 +1 @@
 file://chrome/browser/lookalikes/OWNERS
-
-# COMPONENT: UI>Browser>Interstitials
diff --git a/chromium/components/lookalikes/core/BUILD.gn b/chromium/components/lookalikes/core/BUILD.gn
index 30aab561767..fccb48046c8 100644
--- a/chromium/components/lookalikes/core/BUILD.gn
+++ b/chromium/components/lookalikes/core/BUILD.gn
@@ -18,6 +18,7 @@ static_library("core") {
     "//components/security_state/core:features",
     "//components/strings",
     "//components/url_formatter",
+    "//components/url_formatter/spoof_checks/common_words:common",
     "//components/url_formatter/spoof_checks/top_domains:common",
     "//components/url_formatter/spoof_checks/top_domains:top500_domains",
     "//components/url_formatter/spoof_checks/top_domains:top500_domains_header",
diff --git a/chromium/components/lookalikes/core/features.cc b/chromium/components/lookalikes/core/features.cc
index dc362a97a1b..d9e4926452d 100644
--- a/chromium/components/lookalikes/core/features.cc
+++ b/chromium/components/lookalikes/core/features.cc
@@ -13,5 +13,10 @@ const base::Feature kDetectTargetEmbeddingLookalikes{
 const base::Feature kLookalikeInterstitialForPunycode{
     "LookalikeInterstitialForPunycode", base::FEATURE_ENABLED_BY_DEFAULT};
 
+const base::Feature kLookalikeDigitalAssetLinks{
+    "LookalikeDigitalAssetLinks", base::FEATURE_DISABLED_BY_DEFAULT};
+
+const char kLookalikeDigitalAssetLinksTimeoutParameter[] = "timeout";
+
 }  // namespace features
 }  // namespace lookalikes
diff --git a/chromium/components/lookalikes/core/features.h b/chromium/components/lookalikes/core/features.h
index 453e1146082..d85eb6089c2 100644
--- a/chromium/components/lookalikes/core/features.h
+++ b/chromium/components/lookalikes/core/features.h
@@ -19,6 +19,16 @@ extern const base::Feature kDetectTargetEmbeddingLookalikes;
 COMPONENT_EXPORT(LOOKALIKES_FEATURES)
 extern const base::Feature kLookalikeInterstitialForPunycode;
 
+// This feature enables Digital Asset Link validations for lookalikes.
+COMPONENT_EXPORT(LOOKALIKES_FEATURES)
+extern const base::Feature kLookalikeDigitalAssetLinks;
+
+// Timeout before giving up on Digital Asset Link manifest fetches. The feature
+// fetches manifests from both the lookalike and the target URLs. If it fails to
+// fetch either manifest within this period, the validation is assumed to fail.
+COMPONENT_EXPORT(LOOKALIKES_FEATURES)
+extern const char kLookalikeDigitalAssetLinksTimeoutParameter[];
+
 }  // namespace features
 }  // namespace lookalikes
 
diff --git a/chromium/components/lookalikes/core/lookalike_url_util.cc b/chromium/components/lookalikes/core/lookalike_url_util.cc
index 3e8fcddc395..2daa1378f8c 100644
--- a/chromium/components/lookalikes/core/lookalike_url_util.cc
+++ b/chromium/components/lookalikes/core/lookalike_url_util.cc
@@ -23,10 +23,12 @@
 #include "base/task/post_task.h"
 #include "base/task/thread_pool.h"
 #include "base/time/default_clock.h"
+#include "base/trace_event/trace_event.h"
 #include "base/values.h"
 #include "components/lookalikes/core/features.h"
 #include "components/security_interstitials/core/pref_names.h"
 #include "components/security_state/core/features.h"
+#include "components/url_formatter/spoof_checks/common_words/common_words_util.h"
 #include "components/url_formatter/spoof_checks/top_domains/top500_domains.h"
 #include "components/url_formatter/spoof_checks/top_domains/top_domain_util.h"
 #include "components/url_formatter/url_formatter.h"
@@ -56,18 +58,15 @@ const size_t kMinE2LDLengthForTargetEmbedding = 4;
 
 // This list will be added to the static list of common words so common words
 // could be added to the list using a flag if needed.
-const base::FeatureParam<std::string> kAdditionalCommonWords{
+const base::FeatureParam<std::string> kRemoveAdditionalCommonWords{
     &lookalikes::features::kDetectTargetEmbeddingLookalikes,
     "additional_common_words", ""};
 
 // We might not protect a domain whose e2LD is a common word in target embedding
-// based on the TLD that is paired with it.
-const char* kCommonWords[] = {
-    "shop",      "jobs",      "live",       "info",    "study",   "asahi",
-    "weather",   "health",    "forum",      "radio",   "ideal",   "research",
-    "france",    "free",      "mobile",     "sky",     "ask",     "booking",
-    "canada",    "dating",    "dictionary", "express", "hoteles", "hotels",
-    "investing", "jharkhand", "nifty"};
+// based on the TLD that is paired with it. This list supplements words from
+// url_formatter::common_words::IsCommonWord().
+const char* kLocalAdditionalCommonWords[] = {"asahi", "hoteles", "jharkhand",
+                                             "nifty"};
 
 // These domains are plausible lookalike targets, but they also use common words
 // in their names. Selectively prevent flagging embeddings where the embedder
@@ -241,6 +240,13 @@ std::string GetMatchingTopDomainWithoutSeparators(
   return std::string();
 }
 
+// Returns whether the visited domain is either for a bare eTLD+1 (e.g.
+// 'google.com') or a trivial subdomain (e.g. 'www.google.com').
+bool IsETLDPlusOneOrTrivialSubdomain(const DomainInfo& host) {
+  return (host.domain_and_registry == host.hostname ||
+          "www." + host.domain_and_registry == host.hostname);
+}
+
 // Returns if |etld_plus_one| shares the skeleton of an eTLD+1 with an engaged
 // site or a top 500 domain. |embedded_target| is set to matching eTLD+1.
 bool DoesETLDPlus1MatchTopDomainOrEngagedSite(
@@ -249,7 +255,11 @@ bool DoesETLDPlus1MatchTopDomainOrEngagedSite(
     std::string* embedded_target) {
   for (const auto& skeleton : domain.skeletons) {
     for (const auto& engaged_site : engaged_sites) {
-      if (base::Contains(engaged_site.skeletons, skeleton)) {
+      // Skeleton matching only calculates skeletons of the eTLD+1, so only
+      // consider engaged sites that are bare eTLD+1s (or a trivial subdomain)
+      // and are a skeleton match.
+      if (IsETLDPlusOneOrTrivialSubdomain(engaged_site) &&
+          base::Contains(engaged_site.skeletons, skeleton)) {
         *embedded_target = engaged_site.domain_and_registry;
         return true;
       }
@@ -271,17 +281,33 @@ bool DoesETLDPlus1MatchTopDomainOrEngagedSite(
 // weather.com, ask.com). Target embeddings of these domains are often false
 // positives (e.g. "super-best-fancy-hotels.com" isn't spoofing "hotels.com").
 bool UsesCommonWord(const DomainInfo& domain) {
-  std::vector<std::string> additional_common_words =
-      base::SplitString(kAdditionalCommonWords.Get(), ",",
-                        base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
-  if (base::Contains(additional_common_words, domain.domain_without_registry)) {
+  // kDomainsPermittedInEndEmbeddings are based on domains with common words,
+  // but they should not be excluded here (and instead are checked later).
+  for (auto* permitted_ending : kDomainsPermittedInEndEmbeddings) {
+    if (domain.domain_and_registry == permitted_ending) {
+      return false;
+    }
+  }
+
+  // Search for words in the big common word list.
+  if (url_formatter::common_words::IsCommonWord(
+          domain.domain_without_registry)) {
     return true;
   }
-  for (auto* common_word : kCommonWords) {
+
+  // Also check the local lists.
+  for (auto* common_word : kLocalAdditionalCommonWords) {
     if (domain.domain_without_registry == common_word) {
       return true;
     }
   }
+  std::vector<std::string> additional_common_words =
+      base::SplitString(kRemoveAdditionalCommonWords.Get(), ",",
+                        base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
+  if (base::Contains(additional_common_words, domain.domain_without_registry)) {
+    return true;
+  }
+
   return false;
 }
 
@@ -317,9 +343,8 @@ bool IsCrossTLDMatch(const DomainInfo& embedded_target,
 }
 
 // Returns whether |embedded_target| is one of kDomainsPermittedInEndEmbeddings
-// and that |embedding_domain| ends with that domain (e.g. is of the form
-// "*-outlook.com" for each example.com in kDomainsPermittedInEndEmbeddings).
-// (e.g. will return true if |embedded_target| matches "evil-office.com"). Only
+// and that |embedding_domain| ends with that domain, e.g. "evil-office.com" is
+// permitted, as "office.com" is in kDomainsPermittedInEndEmbeddings.  Only
 // impacts Target Embedding matches.
 bool EndsWithPermittedDomains(const DomainInfo& embedded_target,
                               const std::string& embedding_domain) {
@@ -348,6 +373,22 @@ bool IsAllowedToBeEmbedded(
          EndsWithPermittedDomains(embedded_target, embedding_domain);
 }
 
+// Returns the first character of the first string that is different from the
+// second string. Strings should be at least 1 edit distance apart.
+char GetFirstDifferentChar(const std::string& str1, const std::string& str2) {
+  std::string::const_iterator i1 = str1.begin();
+  std::string::const_iterator i2 = str2.begin();
+  while (i1 != str1.end() && i2 != str2.end()) {
+    if (*i1 != *i2) {
+      return *i1;
+    }
+    i1++;
+    i2++;
+  }
+  NOTREACHED();
+  return 0;
+}
+
 }  // namespace
 
 DomainInfo::DomainInfo(const std::string& arg_hostname,
@@ -366,6 +407,7 @@ DomainInfo::~DomainInfo() = default;
 DomainInfo::DomainInfo(const DomainInfo&) = default;
 
 DomainInfo GetDomainInfo(const std::string& hostname) {
+  TRACE_EVENT0("navigation", "GetDomainInfo");
   if (net::HostStringIsLocalhost(hostname) ||
       net::IsHostnameNonUnique(hostname)) {
     return DomainInfo(std::string(), std::string(), std::string(),
@@ -498,6 +540,17 @@ bool IsLikelyEditDistanceFalsePositive(const DomainInfo& navigated_domain,
     }
   }
 
+  // Ignore domains that only differ by an insertion of a "-".
+  if (nav_dom_len != matched_dom_len) {
+    if (nav_dom_len < matched_dom_len &&
+        GetFirstDifferentChar(matched_dom, nav_dom) == '-') {
+      return true;
+    } else if (nav_dom_len > matched_dom_len &&
+               GetFirstDifferentChar(nav_dom, matched_dom) == '-') {
+      return true;
+    }
+  }
+
   return false;
 }
 
diff --git a/chromium/components/lookalikes/core/lookalike_url_util.h b/chromium/components/lookalikes/core/lookalike_url_util.h
index 73d2d8afb0c..c5a27718cef 100644
--- a/chromium/components/lookalikes/core/lookalike_url_util.h
+++ b/chromium/components/lookalikes/core/lookalike_url_util.h
@@ -138,7 +138,7 @@ bool IsEditDistanceAtMostOne(const base::string16& str1,
 // Returns whether |navigated_domain| and |matched_domain| are likely to be edit
 // distance false positives, and thus the user should *not* be warned.
 //
-// Assumes |navigated_domain| and |matched_domain| are edit distance matches.
+// Assumes |navigated_domain| and |matched_domain| are edit distance of 1 apart.
 bool IsLikelyEditDistanceFalsePositive(const DomainInfo& navigated_domain,
                                        const DomainInfo& matched_domain);
 
diff --git a/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc b/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc
index c0f049a1528..25f30d6ad8d 100644
--- a/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc
+++ b/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc
@@ -105,6 +105,14 @@ TEST(LookalikeUrlUtilTest, EditDistanceExcludesCommonFalsePositives) {
       {"abcde.com", "axbcde.com", false},   // Deletion
       {"axbcde.com", "abcde.com", false},   // Insertion
       {"axbcde.com", "aybcde.com", false},  // Substitution
+
+      // We permit matches that only differ due to a single "-".
+      {"-abcde.com", "abcde.com", true},
+      {"ab-cde.com", "abcde.com", true},
+      {"abcde-.com", "abcde.com", true},
+      {"abcde.com", "-abcde.com", true},
+      {"abcde.com", "ab-cde.com", true},
+      {"abcde.com", "abcde-.com", true},
   };
   for (const TestCase& test_case : kTestCases) {
     auto navigated =
@@ -134,8 +142,10 @@ struct TargetEmbeddingHeuristicTestCase {
 TEST(LookalikeUrlUtilTest, TargetEmbeddingTest) {
   const std::vector<DomainInfo> kEngagedSites = {
       GetDomainInfo(GURL("https://highengagement.com")),
+      GetDomainInfo(GURL("https://highengagement.inthesubdomain.com")),
       GetDomainInfo(GURL("https://highengagement.co.uk")),
       GetDomainInfo(GURL("https://subdomain.highengagement.com")),
+      GetDomainInfo(GURL("https://www.highengagementwithwww.com")),
       GetDomainInfo(GURL("https://subdomain.google.com")),
   };
   const std::vector<TargetEmbeddingHeuristicTestCase> kTestCases = {
@@ -207,6 +217,9 @@ TEST(LookalikeUrlUtilTest, TargetEmbeddingTest) {
        TargetEmbeddingType::kInterstitial},
       {"foo.jobs.org-foo.com", "", TargetEmbeddingType::kNone},
       {"foo.office.org-foo.com", "", TargetEmbeddingType::kNone},
+      // Common words (like 'jobs' are included in the big common word list.
+      // Ensure that the supplemental kCommonWords list is also checked.
+      {"foo.hoteles.com-foo.com", "", TargetEmbeddingType::kNone},
 
       // Targets could be embedded without their dots and dashes.
       {"googlecom-foo.com", "google.com", TargetEmbeddingType::kInterstitial},
@@ -242,9 +255,19 @@ TEST(LookalikeUrlUtilTest, TargetEmbeddingTest) {
       {"foo.subdomain.google.com.foo.com", "subdomain.google.com",
        TargetEmbeddingType::kInterstitial},
 
-      // Skeleton matching should work against engaged sites at the eTLD level.
+      // Skeleton matching should work against engaged sites at a eTLD+1 level,
+      {"highengagement.inthesubdomain.com-foo.com",
+       "highengagement.inthesubdomain.com", TargetEmbeddingType::kInterstitial},
+      // but only if the bare eTLD+1, or www.[eTLD+1] has been engaged.
       {"subdomain.highéngagement.com-foo.com", "highengagement.com",
        TargetEmbeddingType::kInterstitial},
+      {"subdomain.highéngagementwithwww.com-foo.com",
+       "highengagementwithwww.com", TargetEmbeddingType::kInterstitial},
+      {"other.inthésubdomain.com-foo.com", "", TargetEmbeddingType::kNone},
+      // Ideally, we'd be able to combine subdomains and skeleton matching, but
+      // our current algorithm can't detect that precisely.
+      {"highengagement.inthésubdomain.com-foo.com", "",
+       TargetEmbeddingType::kNone},
 
       // Domains should be allowed to embed themselves.
       {"highengagement.com.highengagement.com", "", TargetEmbeddingType::kNone},
@@ -291,7 +314,12 @@ TEST(LookalikeUrlUtilTest, TargetEmbeddingTest) {
           << test_case.expected_safe_host << ", but "
           << (safe_hostname.empty() ? "it didn't trigger at all."
                                     : "triggered on " + safe_hostname);
-      EXPECT_EQ(embedding_type, test_case.expected_type);
+      EXPECT_EQ(embedding_type, test_case.expected_type)
+          << test_case.hostname << " should trigger on "
+          << test_case.expected_safe_host << " but it returned "
+          << (embedding_type == TargetEmbeddingType::kNone
+                  ? "kNone."
+                  : "something unexpected");
     } else {
       EXPECT_EQ(embedding_type, TargetEmbeddingType::kNone)
           << test_case.hostname << " unexpectedly triggered on "
author	Allan Sandfeld Jensen <allan.jensen@qt.io>	2021-05-20 09:47:09 +0200
committer	Allan Sandfeld Jensen <allan.jensen@qt.io>	2021-06-07 11:15:42 +0000
commit	189d4fd8fad9e3c776873be51938cd31a42b6177 (patch)
tree	6497caeff5e383937996768766ab3bb2081a40b2 /chromium/components/lookalikes
parent	8bc75099d364490b22f43a7ce366b366c08f4164 (diff)
download	qtwebengine-chromium-189d4fd8fad9e3c776873be51938cd31a42b6177.tar.gz