summaryrefslogtreecommitdiff
path: root/chromium/components/lookalikes
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2020-10-29 10:46:47 +0100
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2020-11-02 12:02:10 +0000
commit99677208ff3b216fdfec551fbe548da5520cd6fb (patch)
tree476a4865c10320249360e859d8fdd3e01833b03a /chromium/components/lookalikes
parentc30a6232df03e1efbd9f3b226777b07e087a1122 (diff)
downloadqtwebengine-chromium-99677208ff3b216fdfec551fbe548da5520cd6fb.tar.gz
BASELINE: Update Chromium to 86.0.4240.124
Change-Id: Ide0ff151e94cd665ae6521a446995d34a9d1d644 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/components/lookalikes')
-rw-r--r--chromium/components/lookalikes/DEPS6
-rw-r--r--chromium/components/lookalikes/core/BUILD.gn15
-rw-r--r--chromium/components/lookalikes/core/lookalike_url_ui_util.cc113
-rw-r--r--chromium/components/lookalikes/core/lookalike_url_ui_util.h36
-rw-r--r--chromium/components/lookalikes/core/lookalike_url_util.cc129
-rw-r--r--chromium/components/lookalikes/core/lookalike_url_util.h17
-rw-r--r--chromium/components/lookalikes/core/lookalike_url_util_unittest.cc9
7 files changed, 314 insertions, 11 deletions
diff --git a/chromium/components/lookalikes/DEPS b/chromium/components/lookalikes/DEPS
index 563bb1e8d24..d725daeca95 100644
--- a/chromium/components/lookalikes/DEPS
+++ b/chromium/components/lookalikes/DEPS
@@ -1,5 +1,11 @@
include_rules = [
+ "+components/pref_registry",
+ "+components/prefs",
+ "+components/security_interstitials/core",
"+components/security_state",
+ "+components/strings/grit/components_strings.h",
"+components/url_formatter",
"+net/base",
+ "+services/metrics/public/cpp",
+ "+ui/base",
]
diff --git a/chromium/components/lookalikes/core/BUILD.gn b/chromium/components/lookalikes/core/BUILD.gn
index 65e89c84808..30aab561767 100644
--- a/chromium/components/lookalikes/core/BUILD.gn
+++ b/chromium/components/lookalikes/core/BUILD.gn
@@ -2,26 +2,33 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
-import("//build/config/jumbo.gni")
-
-jumbo_static_library("core") {
+static_library("core") {
sources = [
+ "lookalike_url_ui_util.cc",
+ "lookalike_url_ui_util.h",
"lookalike_url_util.cc",
"lookalike_url_util.h",
]
deps = [
":features",
"//base",
+ "//components/pref_registry",
+ "//components/prefs:prefs",
+ "//components/security_interstitials/core",
"//components/security_state/core:features",
+ "//components/strings",
"//components/url_formatter",
"//components/url_formatter/spoof_checks/top_domains:common",
"//components/url_formatter/spoof_checks/top_domains:top500_domains",
"//components/url_formatter/spoof_checks/top_domains:top500_domains_header",
"//net",
+ "//services/metrics/public/cpp:metrics_cpp",
+ "//services/metrics/public/cpp:ukm_builders",
+ "//ui/base",
]
}
-jumbo_source_set("unit_tests") {
+source_set("unit_tests") {
testonly = true
sources = [ "lookalike_url_util_unittest.cc" ]
diff --git a/chromium/components/lookalikes/core/lookalike_url_ui_util.cc b/chromium/components/lookalikes/core/lookalike_url_ui_util.cc
new file mode 100644
index 00000000000..7f90a141f29
--- /dev/null
+++ b/chromium/components/lookalikes/core/lookalike_url_ui_util.cc
@@ -0,0 +1,113 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/lookalikes/core/lookalike_url_ui_util.h"
+
+#include "build/build_config.h"
+#include "components/lookalikes/core/lookalike_url_util.h"
+#include "components/security_interstitials/core/common_string_util.h"
+#include "components/strings/grit/components_strings.h"
+#include "services/metrics/public/cpp/ukm_builders.h"
+#include "services/metrics/public/cpp/ukm_recorder.h"
+#include "ui/base/l10n/l10n_util.h"
+
+void RecordUkmForLookalikeUrlBlockingPage(
+ ukm::SourceId source_id,
+ LookalikeUrlMatchType match_type,
+ LookalikeUrlBlockingPageUserAction user_action) {
+ ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
+ CHECK(ukm_recorder);
+
+ ukm::builders::LookalikeUrl_NavigationSuggestion(source_id)
+ .SetMatchType(static_cast<int>(match_type))
+ .SetUserAction(static_cast<int>(user_action))
+ .Record(ukm_recorder);
+}
+
+void ReportUkmForLookalikeUrlBlockingPageIfNeeded(
+ ukm::SourceId& source_id,
+ LookalikeUrlMatchType match_type,
+ LookalikeUrlBlockingPageUserAction action) {
+ // Rely on the saved SourceId because deconstruction happens after the next
+ // navigation occurs, so web contents points to the new destination.
+ if (source_id != ukm::kInvalidSourceId) {
+ RecordUkmForLookalikeUrlBlockingPage(source_id, match_type, action);
+ source_id = ukm::kInvalidSourceId;
+ }
+}
+
+void PopulateLookalikeUrlBlockingPageStrings(
+ base::DictionaryValue* load_time_data,
+ const GURL& safe_url,
+ const GURL& request_url) {
+ CHECK(load_time_data);
+
+ PopulateStringsForSharedHTML(load_time_data);
+ load_time_data->SetString("tabTitle",
+ l10n_util::GetStringUTF16(IDS_LOOKALIKE_URL_TITLE));
+ load_time_data->SetString(
+ "optInLink",
+ l10n_util::GetStringUTF16(IDS_SAFE_BROWSING_SCOUT_REPORTING_AGREE));
+
+ if (safe_url.is_valid()) {
+ const base::string16 hostname =
+ security_interstitials::common_string_util::GetFormattedHostName(
+ safe_url);
+ load_time_data->SetString(
+ "heading",
+ l10n_util::GetStringFUTF16(IDS_LOOKALIKE_URL_HEADING, hostname));
+ load_time_data->SetString(
+ "primaryParagraph",
+ l10n_util::GetStringUTF16(IDS_LOOKALIKE_URL_PRIMARY_PARAGRAPH));
+ load_time_data->SetString(
+ "proceedButtonText",
+ l10n_util::GetStringUTF16(IDS_LOOKALIKE_URL_IGNORE));
+ load_time_data->SetString(
+ "primaryButtonText",
+ l10n_util::GetStringFUTF16(IDS_LOOKALIKE_URL_CONTINUE, hostname));
+ } else {
+ // No safe URL available to suggest. This can happen when the navigated
+ // domain fails IDN spoof checks but isn't a lookalike of a known domain.
+ // TODO: Change to actual strings.
+ load_time_data->SetString(
+ "heading",
+ l10n_util::GetStringUTF16(IDS_LOOKALIKE_URL_HEADING_NO_SUGGESTED_URL));
+ load_time_data->SetString(
+ "primaryParagraph",
+ l10n_util::GetStringUTF16(
+ IDS_LOOKALIKE_URL_PRIMARY_PARAGRAPH_NO_SUGGESTED_URL));
+ load_time_data->SetString(
+ "proceedButtonText",
+ l10n_util::GetStringUTF16(IDS_LOOKALIKE_URL_IGNORE));
+ load_time_data->SetString(
+ "primaryButtonText",
+ l10n_util::GetStringUTF16(IDS_LOOKALIKE_URL_BACK_TO_SAFETY));
+#if defined(OS_IOS)
+ // On iOS, offer to close the page instead of navigating to NTP when the
+ // safe URL is empty or invalid, and unable to go back.
+ bool show_close_page = false;
+ load_time_data->GetBoolean("cant_go_back", &show_close_page);
+ if (show_close_page) {
+ load_time_data->SetString(
+ "primaryButtonText",
+ l10n_util::GetStringUTF16(IDS_LOOKALIKE_URL_CLOSE_PAGE));
+ }
+#endif
+ }
+ load_time_data->SetString("lookalikeRequestHostname", request_url.host());
+}
+
+void PopulateStringsForSharedHTML(base::DictionaryValue* load_time_data) {
+ load_time_data->SetBoolean("lookalike_url", true);
+ load_time_data->SetBoolean("overridable", false);
+ load_time_data->SetBoolean("hide_primary_button", false);
+ load_time_data->SetBoolean("show_recurrent_error_paragraph", false);
+
+ load_time_data->SetString("recurrentErrorParagraph", "");
+ load_time_data->SetString("openDetails", "");
+ load_time_data->SetString("explanationParagraph", "");
+ load_time_data->SetString("finalParagraph", "");
+
+ load_time_data->SetString("type", "LOOKALIKE");
+}
diff --git a/chromium/components/lookalikes/core/lookalike_url_ui_util.h b/chromium/components/lookalikes/core/lookalike_url_ui_util.h
new file mode 100644
index 00000000000..c15ea9a3186
--- /dev/null
+++ b/chromium/components/lookalikes/core/lookalike_url_ui_util.h
@@ -0,0 +1,36 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_LOOKALIKES_CORE_LOOKALIKE_URL_UI_UTIL_H_
+#define COMPONENTS_LOOKALIKES_CORE_LOOKALIKE_URL_UI_UTIL_H_
+
+#include "components/lookalikes/core/lookalike_url_util.h"
+#include "services/metrics/public/cpp/ukm_source_id.h"
+
+namespace base {
+class DictionaryValue;
+} // namespace base
+
+// Allow easier reporting of UKM when no interstitial is shown.
+void RecordUkmForLookalikeUrlBlockingPage(
+ ukm::SourceId source_id,
+ LookalikeUrlMatchType match_type,
+ LookalikeUrlBlockingPageUserAction user_action);
+
+// Record UKM if not already reported for this page.
+void ReportUkmForLookalikeUrlBlockingPageIfNeeded(
+ ukm::SourceId& source_id,
+ LookalikeUrlMatchType match_type,
+ LookalikeUrlBlockingPageUserAction action);
+
+// Populates |load_time_data| for interstitial HTML.
+void PopulateLookalikeUrlBlockingPageStrings(
+ base::DictionaryValue* load_time_data,
+ const GURL& safe_url,
+ const GURL& request_url);
+
+// Values added to get shared interstitial HTML to play nice.
+void PopulateStringsForSharedHTML(base::DictionaryValue* load_time_data);
+
+#endif // COMPONENTS_LOOKALIKES_CORE_LOOKALIKE_URL_UI_UTIL_H_
diff --git a/chromium/components/lookalikes/core/lookalike_url_util.cc b/chromium/components/lookalikes/core/lookalike_url_util.cc
index 4a350ed122e..65c3b27b477 100644
--- a/chromium/components/lookalikes/core/lookalike_url_util.cc
+++ b/chromium/components/lookalikes/core/lookalike_url_util.cc
@@ -9,6 +9,7 @@
#include "base/bind.h"
#include "base/callback.h"
#include "base/feature_list.h"
+#include "base/i18n/char_iterator.h"
#include "base/macros.h"
#include "base/memory/scoped_refptr.h"
#include "base/memory/singleton.h"
@@ -21,7 +22,9 @@
#include "base/task/post_task.h"
#include "base/task/thread_pool.h"
#include "base/time/default_clock.h"
+#include "base/values.h"
#include "components/lookalikes/core/features.h"
+#include "components/security_interstitials/core/pref_names.h"
#include "components/security_state/core/features.h"
#include "components/url_formatter/spoof_checks/top_domains/top500_domains.h"
#include "components/url_formatter/spoof_checks/top_domains/top_domain_util.h"
@@ -33,6 +36,10 @@ namespace lookalikes {
const char kHistogramName[] = "NavigationSuggestion.Event";
+void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry) {
+ registry->RegisterListPref(prefs::kLookalikeWarningAllowlistDomains);
+}
+
} // namespace lookalikes
namespace {
@@ -59,6 +66,10 @@ const char* kCommonWords[] = {"shop", "jobs", "live", "info", "study",
"ideal", "research", "france", "free", "mobile",
"sky", "ask"};
+// What separators can be used to separate tokens in target embedding spoofs?
+// e.g. www-google.com.example.com uses "-" (www-google) and "." (google.com).
+const char kTargetEmbeddingSeparators[] = "-.";
+
bool SkeletonsMatch(const url_formatter::Skeletons& skeletons1,
const url_formatter::Skeletons& skeletons2) {
DCHECK(!skeletons1.empty());
@@ -175,7 +186,8 @@ void RecordEvent(NavigationSuggestionEvent event) {
// StringPieces.
std::vector<base::StringPiece> SplitDomainWithouteTLDIntoTokens(
const std::string& host_without_etld) {
- return base::SplitStringPiece(host_without_etld, "-.", base::TRIM_WHITESPACE,
+ return base::SplitStringPiece(host_without_etld, kTargetEmbeddingSeparators,
+ base::TRIM_WHITESPACE,
base::SPLIT_WANT_NONEMPTY);
}
@@ -588,11 +600,29 @@ TargetEmbeddingType GetTargetEmbeddingType(
// This check happens first so that we can exclude invalid eTLD+1s next.
std::string embedded_target = GetMatchingTopDomainWithoutSeparators(
hostname_tokens_without_etld[end - 1]);
- if (!embedded_target.empty() &&
- !IsAllowedToBeEmbedded(etld_check_dominfo, etld_check_span,
- in_target_allowlist)) {
- *safe_hostname = embedded_target;
- return TargetEmbeddingType::kInterstitial;
+ if (!embedded_target.empty()) {
+ // Extract the full possibly-spoofed domain. To get this, we take the
+ // hostname up until this point, strip off the no-separator bit (e.g.
+ // googlecom) and then re-add the the separated version (e.g. google.com).
+ auto spoofed_domain =
+ etld_check_host.substr(
+ 0, etld_check_host.length() -
+ hostname_tokens_without_etld[end - 1].length()) +
+ embedded_target;
+ const auto no_separator_tokens = base::SplitStringPiece(
+ spoofed_domain, kTargetEmbeddingSeparators, base::TRIM_WHITESPACE,
+ base::SPLIT_WANT_NONEMPTY);
+ auto no_separator_dominfo = GetDomainInfo(embedded_target);
+
+ // Only flag on domains that are long enough, don't use common words, and
+ // aren't target-allowlisted.
+ if (no_separator_dominfo.domain_without_registry.length() >
+ kMinE2LDLengthForTargetEmbedding &&
+ !IsAllowedToBeEmbedded(no_separator_dominfo, no_separator_tokens,
+ in_target_allowlist)) {
+ *safe_hostname = embedded_target;
+ return TargetEmbeddingType::kInterstitial;
+ }
}
// Exclude otherwise-invalid eTLDs.
@@ -636,3 +666,90 @@ TargetEmbeddingType GetTargetEmbeddingType(
}
return TargetEmbeddingType::kNone;
}
+
+bool IsASCII(UChar32 codepoint) {
+ return !(codepoint & ~0x7F);
+}
+
+// Returns true if |codepoint| has emoji related properties.
+bool IsEmojiRelatedCodepoint(UChar32 codepoint) {
+ return u_hasBinaryProperty(codepoint, UCHAR_EMOJI) ||
+ // Characters that have emoji presentation by default (e.g. hourglass)
+ u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION) ||
+ // Characters displayed as country flags when used as a valid pair.
+ // E.g. Regional Indicator Symbol Letter B used once in a string
+ // is rendered as 🇧, used twice is rendered as the flag of Barbados
+ // (with country code BB). It's therefore possible to come up with
+ // a spoof using regional indicator characters as text, but these
+ // domain names will be readily punycoded and detecting pairs isn't
+ // easy so we keep the code simple here.
+ u_hasBinaryProperty(codepoint, UCHAR_REGIONAL_INDICATOR) ||
+ // Pictographs such as Black Cross On Shield (U+26E8).
+ u_hasBinaryProperty(codepoint, UCHAR_EXTENDED_PICTOGRAPHIC);
+}
+
+// Returns true if |text| contains only ASCII characters, pictographs
+// or emojis. This check is only used to determine if a domain that already
+// failed spoof checks should be blocked by an interstitial. Ideally, we would
+// check this for non-ASCII scripts as well (e.g. Cyrillic + emoji), but such
+// usage isn't common.
+bool IsASCIIAndEmojiOnly(const base::StringPiece16& text) {
+ base::i18n::UTF16CharIterator iter(text.data(), text.length());
+ while (!iter.end()) {
+ const UChar32 codepoint = iter.get();
+ if (!IsASCII(codepoint) && !IsEmojiRelatedCodepoint(codepoint)) {
+ return false;
+ }
+ iter.Advance();
+ }
+ return true;
+}
+
+bool ShouldBlockBySpoofCheckResult(const DomainInfo& navigated_domain) {
+ // Here, only a subset of spoof checks that cause an IDN to fallback to
+ // punycode are configured to show an interstitial.
+ switch (navigated_domain.idn_result.spoof_check_result) {
+ case url_formatter::IDNSpoofChecker::Result::kNone:
+ case url_formatter::IDNSpoofChecker::Result::kSafe:
+ return false;
+
+ case url_formatter::IDNSpoofChecker::Result::kICUSpoofChecks:
+ // If the eTLD+1 contains only a mix of ASCII + Emoji, allow.
+ return !IsASCIIAndEmojiOnly(navigated_domain.idn_result.result);
+
+ case url_formatter::IDNSpoofChecker::Result::kDeviationCharacters:
+ // Failures because of deviation characters, especially ß, is common.
+ return false;
+
+ case url_formatter::IDNSpoofChecker::Result::kTLDSpecificCharacters:
+ case url_formatter::IDNSpoofChecker::Result::kUnsafeMiddleDot:
+ case url_formatter::IDNSpoofChecker::Result::kWholeScriptConfusable:
+ case url_formatter::IDNSpoofChecker::Result::kDigitLookalikes:
+ case url_formatter::IDNSpoofChecker::Result::
+ kNonAsciiLatinCharMixedWithNonLatin:
+ case url_formatter::IDNSpoofChecker::Result::kDangerousPattern:
+ return true;
+ }
+}
+
+bool IsAllowedByEnterprisePolicy(const PrefService* pref_service,
+ const GURL& url) {
+ const auto* list =
+ pref_service->GetList(prefs::kLookalikeWarningAllowlistDomains);
+ for (const auto& domain_val : *list) {
+ auto domain = domain_val.GetString();
+ if (url.DomainIs(domain)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void SetEnterpriseAllowlistForTesting(PrefService* pref_service,
+ const std::vector<std::string>& hosts) {
+ base::Value list(base::Value::Type::LIST);
+ for (const auto& host : hosts) {
+ list.Append(host);
+ }
+ pref_service->Set(prefs::kLookalikeWarningAllowlistDomains, std::move(list));
+}
diff --git a/chromium/components/lookalikes/core/lookalike_url_util.h b/chromium/components/lookalikes/core/lookalike_url_util.h
index 00946f6d909..cfac43cd0ae 100644
--- a/chromium/components/lookalikes/core/lookalike_url_util.h
+++ b/chromium/components/lookalikes/core/lookalike_url_util.h
@@ -10,6 +10,8 @@
#include "base/callback.h"
#include "base/time/time.h"
+#include "components/pref_registry/pref_registry_syncable.h"
+#include "components/prefs/pref_service.h"
#include "components/url_formatter/url_formatter.h"
#include "url/gurl.h"
@@ -17,6 +19,9 @@ class GURL;
namespace lookalikes {
extern const char kHistogramName[];
+
+// Register applicable preferences with the provided registry.
+void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry);
}
using LookalikeTargetAllowlistChecker =
@@ -186,4 +191,16 @@ TargetEmbeddingType GetTargetEmbeddingType(
const LookalikeTargetAllowlistChecker& in_target_allowlist,
std::string* safe_hostname);
+// Returns true if a navigation to an IDN should be blocked.
+bool ShouldBlockBySpoofCheckResult(const DomainInfo& navigated_domain);
+
+// Checks whether the given url is allowlisted by enterprise policy, and
+// thus no warnings should be shown on that host.
+bool IsAllowedByEnterprisePolicy(const PrefService* pref_service,
+ const GURL& url);
+
+// Add the given hosts to the allowlist policy setting.
+void SetEnterpriseAllowlistForTesting(PrefService* pref_service,
+ const std::vector<std::string>& hosts);
+
#endif // COMPONENTS_LOOKALIKES_CORE_LOOKALIKE_URL_UTIL_H_
diff --git a/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc b/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc
index 1aed2eddeec..e57b0b974e2 100644
--- a/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc
+++ b/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc
@@ -198,7 +198,7 @@ TEST(LookalikeUrlUtilTest, TargetEmbeddingTest) {
{"scholar.foo.google.com.foo.com", "google.com",
TargetEmbeddingType::kInterstitial},
- // Targets should be longer than 6 characters.
+ // e2LDs should be longer than 3 characters.
{"hp.com-foo.com", "", TargetEmbeddingType::kNone},
// Targets with common words as e2LD are not considered embedded targets
@@ -210,8 +210,15 @@ TEST(LookalikeUrlUtilTest, TargetEmbeddingTest) {
{"foo.office.org-foo.com", "", TargetEmbeddingType::kNone},
// Targets could be embedded without their dots and dashes.
+ {"googlecom-foo.com", "google.com", TargetEmbeddingType::kInterstitial},
{"foo.googlecom-foo.com", "google.com",
TargetEmbeddingType::kInterstitial},
+ // But should not be detected if they're using a common word. weather.com
+ // is on the top domain list, but 'weather' is a common word.
+ {"weathercom-foo.com", "", TargetEmbeddingType::kNone},
+ // And should also not be detected if they're too short. vk.com is on the
+ // top domain list, but is shorter than kMinE2LDLengthForTargetEmbedding.
+ {"vkcom-foo.com", "", TargetEmbeddingType::kNone},
// Ensure legitimate domains don't trigger.
{"foo.google.com", "", TargetEmbeddingType::kNone},