summaryrefslogtreecommitdiff
path: root/chromium/components/lookalikes
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2021-01-08 13:11:51 +0100
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2021-01-12 14:24:45 +0000
commitfa98118a45f7e169f8846086dc2c22c49a8ba310 (patch)
tree3d21874df649136e2df0d6fc16da117d1484d93f /chromium/components/lookalikes
parent42165222878a38f10aaedf3a123ae7200a85a091 (diff)
downloadqtwebengine-chromium-fa98118a45f7e169f8846086dc2c22c49a8ba310.tar.gz
BASELINE: Update Chromium to 87.0.4280.144
Change-Id: I9c1b2ad99474c7252ee250024961d8ed86464e32 Reviewed-by: Michael BrĂ¼ning <michael.bruning@qt.io>
Diffstat (limited to 'chromium/components/lookalikes')
-rw-r--r--chromium/components/lookalikes/core/lookalike_url_util.cc56
-rw-r--r--chromium/components/lookalikes/core/lookalike_url_util_unittest.cc11
2 files changed, 59 insertions, 8 deletions
diff --git a/chromium/components/lookalikes/core/lookalike_url_util.cc b/chromium/components/lookalikes/core/lookalike_url_util.cc
index 15d92edc78b..6092d94ccfe 100644
--- a/chromium/components/lookalikes/core/lookalike_url_util.cc
+++ b/chromium/components/lookalikes/core/lookalike_url_util.cc
@@ -15,6 +15,7 @@
#include "base/memory/singleton.h"
#include "base/metrics/field_trial_params.h"
#include "base/metrics/histogram_macros.h"
+#include "base/strings/strcat.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
@@ -61,10 +62,18 @@ const base::FeatureParam<std::string> kAdditionalCommonWords{
// We might not protect a domain whose e2LD is a common word in target embedding
// based on the TLD that is paired with it.
-const char* kCommonWords[] = {"shop", "jobs", "live", "info", "study",
- "asahi", "weather", "health", "forum", "radio",
- "ideal", "research", "france", "free", "mobile",
- "sky", "ask"};
+const char* kCommonWords[] = {
+ "shop", "jobs", "live", "info", "study", "asahi",
+ "weather", "health", "forum", "radio", "ideal", "research",
+ "france", "free", "mobile", "sky", "ask", "booking",
+ "canada", "dating", "dictionary", "express", "hoteles", "hotels",
+ "investing", "jharkhand", "nifty"};
+
+// These domains are plausible lookalike targets, but they also use common words
+// in their names. Selectively prevent flagging embeddings where the embedder
+// ends in "-DOMAIN.TLD", since these tend to have higher false positive rates.
+const char* kDomainsPermittedInEndEmbeddings[] = {"office.com", "medium.com",
+ "orange.fr"};
// What separators can be used to separate tokens in target embedding spoofs?
// e.g. www-google.com.example.com uses "-" (www-google) and "." (google.com).
@@ -258,8 +267,9 @@ bool DoesETLDPlus1MatchTopDomainOrEngagedSite(
return false;
}
-// Returns whether the provided token includes a common word, which is a common
-// indication of a likely false positive.
+// Returns whether the e2LD of the provided domain is a common word (e.g.
+// weather.com, ask.com). Target embeddings of these domains are often false
+// positives (e.g. "super-best-fancy-hotels.com" isn't spoofing "hotels.com").
bool UsesCommonWord(const DomainInfo& domain) {
std::vector<std::string> additional_common_words =
base::SplitString(kAdditionalCommonWords.Get(), ",",
@@ -296,8 +306,36 @@ bool IsEmbeddingItself(const base::span<const base::StringPiece>& domain_labels,
return false;
}
+// Returns whether |embedded_target| and |embedding_domain| share the same e2LD,
+// (as in, e.g., google.com and google.org, or airbnb.com.br and airbnb.com).
+// Assumes |embedding_domain| is an eTLD+1.
+bool IsCrossTLDMatch(const DomainInfo& embedded_target,
+ const std::string& embedding_domain) {
+ return (
+ embedded_target.domain_without_registry ==
+ url_formatter::top_domains::HostnameWithoutRegistry(embedding_domain));
+}
+
+// Returns whether |embedded_target| is one of kDomainsPermittedInEndEmbeddings
+// and that |embedding_domain| ends with that domain (e.g. is of the form
+// "*-outlook.com" for each example.com in kDomainsPermittedInEndEmbeddings).
+// (e.g. will return true if |embedded_target| matches "evil-office.com"). Only
+// impacts Target Embedding matches.
+bool EndsWithPermittedDomains(const DomainInfo& embedded_target,
+ const std::string& embedding_domain) {
+ for (auto* permitted_ending : kDomainsPermittedInEndEmbeddings) {
+ if (embedded_target.domain_and_registry == permitted_ending &&
+ base::EndsWith(embedding_domain,
+ base::StrCat({"-", permitted_ending}))) {
+ return true;
+ }
+ }
+ return false;
+}
+
// A domain is allowed to be embedded if is embedding itself, if its e2LD is a
-// common word or any valid partial subdomain is allowlisted.
+// common word, any valid partial subdomain is allowlisted, or if it's a
+// cross-TLD match (e.g. google.com vs google.com.mx).
bool IsAllowedToBeEmbedded(
const DomainInfo& embedded_target,
const base::span<const base::StringPiece>& subdomain_span,
@@ -305,7 +343,9 @@ bool IsAllowedToBeEmbedded(
const std::string& embedding_domain) {
return UsesCommonWord(embedded_target) ||
ASubdomainIsAllowlisted(subdomain_span, in_target_allowlist) ||
- IsEmbeddingItself(subdomain_span, embedding_domain);
+ IsEmbeddingItself(subdomain_span, embedding_domain) ||
+ IsCrossTLDMatch(embedded_target, embedding_domain) ||
+ EndsWithPermittedDomains(embedded_target, embedding_domain);
}
} // namespace
diff --git a/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc b/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc
index 6c324296c73..4b951b7d58e 100644
--- a/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc
+++ b/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc
@@ -268,6 +268,17 @@ TEST(LookalikeUrlUtilTest, TargetEmbeddingTest) {
TargetEmbeddingType::kInterstitial},
{"google.com-google.com-google.com", "google.com",
TargetEmbeddingType::kInterstitial},
+
+ // Ignore end-of-domain embeddings when they're also cross-TLD matches.
+ {"google.com.mx", "", TargetEmbeddingType::kNone},
+
+ // For a small set of high-value domains that are also common words (see
+ // kDomainsPermittedInEndEmbeddings), we block all embeddings except those
+ // at the very end of the domain (e.g. foo-{domain.com}). Ensure this
+ // works for domains on the list, but not for others.
+ {"office.com-foo.com", "office.com", TargetEmbeddingType::kInterstitial},
+ {"example-office.com", "", TargetEmbeddingType::kNone},
+ {"example-google.com", "google.com", TargetEmbeddingType::kInterstitial},
};
for (auto& test_case : kTestCases) {