diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2021-01-08 13:11:51 +0100 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2021-01-12 14:24:45 +0000 |
commit | fa98118a45f7e169f8846086dc2c22c49a8ba310 (patch) | |
tree | 3d21874df649136e2df0d6fc16da117d1484d93f /chromium/components/lookalikes | |
parent | 42165222878a38f10aaedf3a123ae7200a85a091 (diff) | |
download | qtwebengine-chromium-fa98118a45f7e169f8846086dc2c22c49a8ba310.tar.gz |
BASELINE: Update Chromium to 87.0.4280.144
Change-Id: I9c1b2ad99474c7252ee250024961d8ed86464e32
Reviewed-by: Michael BrĂ¼ning <michael.bruning@qt.io>
Diffstat (limited to 'chromium/components/lookalikes')
-rw-r--r-- | chromium/components/lookalikes/core/lookalike_url_util.cc | 56 | ||||
-rw-r--r-- | chromium/components/lookalikes/core/lookalike_url_util_unittest.cc | 11 |
2 files changed, 59 insertions, 8 deletions
diff --git a/chromium/components/lookalikes/core/lookalike_url_util.cc b/chromium/components/lookalikes/core/lookalike_url_util.cc index 15d92edc78b..6092d94ccfe 100644 --- a/chromium/components/lookalikes/core/lookalike_url_util.cc +++ b/chromium/components/lookalikes/core/lookalike_url_util.cc @@ -15,6 +15,7 @@ #include "base/memory/singleton.h" #include "base/metrics/field_trial_params.h" #include "base/metrics/histogram_macros.h" +#include "base/strings/strcat.h" #include "base/strings/string_piece.h" #include "base/strings/string_split.h" #include "base/strings/string_util.h" @@ -61,10 +62,18 @@ const base::FeatureParam<std::string> kAdditionalCommonWords{ // We might not protect a domain whose e2LD is a common word in target embedding // based on the TLD that is paired with it. -const char* kCommonWords[] = {"shop", "jobs", "live", "info", "study", - "asahi", "weather", "health", "forum", "radio", - "ideal", "research", "france", "free", "mobile", - "sky", "ask"}; +const char* kCommonWords[] = { + "shop", "jobs", "live", "info", "study", "asahi", + "weather", "health", "forum", "radio", "ideal", "research", + "france", "free", "mobile", "sky", "ask", "booking", + "canada", "dating", "dictionary", "express", "hoteles", "hotels", + "investing", "jharkhand", "nifty"}; + +// These domains are plausible lookalike targets, but they also use common words +// in their names. Selectively prevent flagging embeddings where the embedder +// ends in "-DOMAIN.TLD", since these tend to have higher false positive rates. +const char* kDomainsPermittedInEndEmbeddings[] = {"office.com", "medium.com", + "orange.fr"}; // What separators can be used to separate tokens in target embedding spoofs? // e.g. www-google.com.example.com uses "-" (www-google) and "." (google.com). @@ -258,8 +267,9 @@ bool DoesETLDPlus1MatchTopDomainOrEngagedSite( return false; } -// Returns whether the provided token includes a common word, which is a common -// indication of a likely false positive. +// Returns whether the e2LD of the provided domain is a common word (e.g. +// weather.com, ask.com). Target embeddings of these domains are often false +// positives (e.g. "super-best-fancy-hotels.com" isn't spoofing "hotels.com"). bool UsesCommonWord(const DomainInfo& domain) { std::vector<std::string> additional_common_words = base::SplitString(kAdditionalCommonWords.Get(), ",", @@ -296,8 +306,36 @@ bool IsEmbeddingItself(const base::span<const base::StringPiece>& domain_labels, return false; } +// Returns whether |embedded_target| and |embedding_domain| share the same e2LD, +// (as in, e.g., google.com and google.org, or airbnb.com.br and airbnb.com). +// Assumes |embedding_domain| is an eTLD+1. +bool IsCrossTLDMatch(const DomainInfo& embedded_target, + const std::string& embedding_domain) { + return ( + embedded_target.domain_without_registry == + url_formatter::top_domains::HostnameWithoutRegistry(embedding_domain)); +} + +// Returns whether |embedded_target| is one of kDomainsPermittedInEndEmbeddings +// and that |embedding_domain| ends with that domain (e.g. is of the form +// "*-outlook.com" for each example.com in kDomainsPermittedInEndEmbeddings). +// (e.g. will return true if |embedded_target| matches "evil-office.com"). Only +// impacts Target Embedding matches. +bool EndsWithPermittedDomains(const DomainInfo& embedded_target, + const std::string& embedding_domain) { + for (auto* permitted_ending : kDomainsPermittedInEndEmbeddings) { + if (embedded_target.domain_and_registry == permitted_ending && + base::EndsWith(embedding_domain, + base::StrCat({"-", permitted_ending}))) { + return true; + } + } + return false; +} + // A domain is allowed to be embedded if is embedding itself, if its e2LD is a -// common word or any valid partial subdomain is allowlisted. +// common word, any valid partial subdomain is allowlisted, or if it's a +// cross-TLD match (e.g. google.com vs google.com.mx). bool IsAllowedToBeEmbedded( const DomainInfo& embedded_target, const base::span<const base::StringPiece>& subdomain_span, @@ -305,7 +343,9 @@ bool IsAllowedToBeEmbedded( const std::string& embedding_domain) { return UsesCommonWord(embedded_target) || ASubdomainIsAllowlisted(subdomain_span, in_target_allowlist) || - IsEmbeddingItself(subdomain_span, embedding_domain); + IsEmbeddingItself(subdomain_span, embedding_domain) || + IsCrossTLDMatch(embedded_target, embedding_domain) || + EndsWithPermittedDomains(embedded_target, embedding_domain); } } // namespace diff --git a/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc b/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc index 6c324296c73..4b951b7d58e 100644 --- a/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc +++ b/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc @@ -268,6 +268,17 @@ TEST(LookalikeUrlUtilTest, TargetEmbeddingTest) { TargetEmbeddingType::kInterstitial}, {"google.com-google.com-google.com", "google.com", TargetEmbeddingType::kInterstitial}, + + // Ignore end-of-domain embeddings when they're also cross-TLD matches. + {"google.com.mx", "", TargetEmbeddingType::kNone}, + + // For a small set of high-value domains that are also common words (see + // kDomainsPermittedInEndEmbeddings), we block all embeddings except those + // at the very end of the domain (e.g. foo-{domain.com}). Ensure this + // works for domains on the list, but not for others. + {"office.com-foo.com", "office.com", TargetEmbeddingType::kInterstitial}, + {"example-office.com", "", TargetEmbeddingType::kNone}, + {"example-google.com", "google.com", TargetEmbeddingType::kInterstitial}, }; for (auto& test_case : kTestCases) { |