diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-29 10:46:47 +0100 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-11-02 12:02:10 +0000 |
commit | 99677208ff3b216fdfec551fbe548da5520cd6fb (patch) | |
tree | 476a4865c10320249360e859d8fdd3e01833b03a /chromium/components/lookalikes | |
parent | c30a6232df03e1efbd9f3b226777b07e087a1122 (diff) | |
download | qtwebengine-chromium-99677208ff3b216fdfec551fbe548da5520cd6fb.tar.gz |
BASELINE: Update Chromium to 86.0.4240.124
Change-Id: Ide0ff151e94cd665ae6521a446995d34a9d1d644
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/components/lookalikes')
7 files changed, 314 insertions, 11 deletions
diff --git a/chromium/components/lookalikes/DEPS b/chromium/components/lookalikes/DEPS index 563bb1e8d24..d725daeca95 100644 --- a/chromium/components/lookalikes/DEPS +++ b/chromium/components/lookalikes/DEPS @@ -1,5 +1,11 @@ include_rules = [ + "+components/pref_registry", + "+components/prefs", + "+components/security_interstitials/core", "+components/security_state", + "+components/strings/grit/components_strings.h", "+components/url_formatter", "+net/base", + "+services/metrics/public/cpp", + "+ui/base", ] diff --git a/chromium/components/lookalikes/core/BUILD.gn b/chromium/components/lookalikes/core/BUILD.gn index 65e89c84808..30aab561767 100644 --- a/chromium/components/lookalikes/core/BUILD.gn +++ b/chromium/components/lookalikes/core/BUILD.gn @@ -2,26 +2,33 @@ # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. -import("//build/config/jumbo.gni") - -jumbo_static_library("core") { +static_library("core") { sources = [ + "lookalike_url_ui_util.cc", + "lookalike_url_ui_util.h", "lookalike_url_util.cc", "lookalike_url_util.h", ] deps = [ ":features", "//base", + "//components/pref_registry", + "//components/prefs:prefs", + "//components/security_interstitials/core", "//components/security_state/core:features", + "//components/strings", "//components/url_formatter", "//components/url_formatter/spoof_checks/top_domains:common", "//components/url_formatter/spoof_checks/top_domains:top500_domains", "//components/url_formatter/spoof_checks/top_domains:top500_domains_header", "//net", + "//services/metrics/public/cpp:metrics_cpp", + "//services/metrics/public/cpp:ukm_builders", + "//ui/base", ] } -jumbo_source_set("unit_tests") { +source_set("unit_tests") { testonly = true sources = [ "lookalike_url_util_unittest.cc" ] diff --git a/chromium/components/lookalikes/core/lookalike_url_ui_util.cc b/chromium/components/lookalikes/core/lookalike_url_ui_util.cc new file mode 100644 index 00000000000..7f90a141f29 --- /dev/null +++ b/chromium/components/lookalikes/core/lookalike_url_ui_util.cc @@ -0,0 +1,113 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/lookalikes/core/lookalike_url_ui_util.h" + +#include "build/build_config.h" +#include "components/lookalikes/core/lookalike_url_util.h" +#include "components/security_interstitials/core/common_string_util.h" +#include "components/strings/grit/components_strings.h" +#include "services/metrics/public/cpp/ukm_builders.h" +#include "services/metrics/public/cpp/ukm_recorder.h" +#include "ui/base/l10n/l10n_util.h" + +void RecordUkmForLookalikeUrlBlockingPage( + ukm::SourceId source_id, + LookalikeUrlMatchType match_type, + LookalikeUrlBlockingPageUserAction user_action) { + ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get(); + CHECK(ukm_recorder); + + ukm::builders::LookalikeUrl_NavigationSuggestion(source_id) + .SetMatchType(static_cast<int>(match_type)) + .SetUserAction(static_cast<int>(user_action)) + .Record(ukm_recorder); +} + +void ReportUkmForLookalikeUrlBlockingPageIfNeeded( + ukm::SourceId& source_id, + LookalikeUrlMatchType match_type, + LookalikeUrlBlockingPageUserAction action) { + // Rely on the saved SourceId because deconstruction happens after the next + // navigation occurs, so web contents points to the new destination. + if (source_id != ukm::kInvalidSourceId) { + RecordUkmForLookalikeUrlBlockingPage(source_id, match_type, action); + source_id = ukm::kInvalidSourceId; + } +} + +void PopulateLookalikeUrlBlockingPageStrings( + base::DictionaryValue* load_time_data, + const GURL& safe_url, + const GURL& request_url) { + CHECK(load_time_data); + + PopulateStringsForSharedHTML(load_time_data); + load_time_data->SetString("tabTitle", + l10n_util::GetStringUTF16(IDS_LOOKALIKE_URL_TITLE)); + load_time_data->SetString( + "optInLink", + l10n_util::GetStringUTF16(IDS_SAFE_BROWSING_SCOUT_REPORTING_AGREE)); + + if (safe_url.is_valid()) { + const base::string16 hostname = + security_interstitials::common_string_util::GetFormattedHostName( + safe_url); + load_time_data->SetString( + "heading", + l10n_util::GetStringFUTF16(IDS_LOOKALIKE_URL_HEADING, hostname)); + load_time_data->SetString( + "primaryParagraph", + l10n_util::GetStringUTF16(IDS_LOOKALIKE_URL_PRIMARY_PARAGRAPH)); + load_time_data->SetString( + "proceedButtonText", + l10n_util::GetStringUTF16(IDS_LOOKALIKE_URL_IGNORE)); + load_time_data->SetString( + "primaryButtonText", + l10n_util::GetStringFUTF16(IDS_LOOKALIKE_URL_CONTINUE, hostname)); + } else { + // No safe URL available to suggest. This can happen when the navigated + // domain fails IDN spoof checks but isn't a lookalike of a known domain. + // TODO: Change to actual strings. + load_time_data->SetString( + "heading", + l10n_util::GetStringUTF16(IDS_LOOKALIKE_URL_HEADING_NO_SUGGESTED_URL)); + load_time_data->SetString( + "primaryParagraph", + l10n_util::GetStringUTF16( + IDS_LOOKALIKE_URL_PRIMARY_PARAGRAPH_NO_SUGGESTED_URL)); + load_time_data->SetString( + "proceedButtonText", + l10n_util::GetStringUTF16(IDS_LOOKALIKE_URL_IGNORE)); + load_time_data->SetString( + "primaryButtonText", + l10n_util::GetStringUTF16(IDS_LOOKALIKE_URL_BACK_TO_SAFETY)); +#if defined(OS_IOS) + // On iOS, offer to close the page instead of navigating to NTP when the + // safe URL is empty or invalid, and unable to go back. + bool show_close_page = false; + load_time_data->GetBoolean("cant_go_back", &show_close_page); + if (show_close_page) { + load_time_data->SetString( + "primaryButtonText", + l10n_util::GetStringUTF16(IDS_LOOKALIKE_URL_CLOSE_PAGE)); + } +#endif + } + load_time_data->SetString("lookalikeRequestHostname", request_url.host()); +} + +void PopulateStringsForSharedHTML(base::DictionaryValue* load_time_data) { + load_time_data->SetBoolean("lookalike_url", true); + load_time_data->SetBoolean("overridable", false); + load_time_data->SetBoolean("hide_primary_button", false); + load_time_data->SetBoolean("show_recurrent_error_paragraph", false); + + load_time_data->SetString("recurrentErrorParagraph", ""); + load_time_data->SetString("openDetails", ""); + load_time_data->SetString("explanationParagraph", ""); + load_time_data->SetString("finalParagraph", ""); + + load_time_data->SetString("type", "LOOKALIKE"); +} diff --git a/chromium/components/lookalikes/core/lookalike_url_ui_util.h b/chromium/components/lookalikes/core/lookalike_url_ui_util.h new file mode 100644 index 00000000000..c15ea9a3186 --- /dev/null +++ b/chromium/components/lookalikes/core/lookalike_url_ui_util.h @@ -0,0 +1,36 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_LOOKALIKES_CORE_LOOKALIKE_URL_UI_UTIL_H_ +#define COMPONENTS_LOOKALIKES_CORE_LOOKALIKE_URL_UI_UTIL_H_ + +#include "components/lookalikes/core/lookalike_url_util.h" +#include "services/metrics/public/cpp/ukm_source_id.h" + +namespace base { +class DictionaryValue; +} // namespace base + +// Allow easier reporting of UKM when no interstitial is shown. +void RecordUkmForLookalikeUrlBlockingPage( + ukm::SourceId source_id, + LookalikeUrlMatchType match_type, + LookalikeUrlBlockingPageUserAction user_action); + +// Record UKM if not already reported for this page. +void ReportUkmForLookalikeUrlBlockingPageIfNeeded( + ukm::SourceId& source_id, + LookalikeUrlMatchType match_type, + LookalikeUrlBlockingPageUserAction action); + +// Populates |load_time_data| for interstitial HTML. +void PopulateLookalikeUrlBlockingPageStrings( + base::DictionaryValue* load_time_data, + const GURL& safe_url, + const GURL& request_url); + +// Values added to get shared interstitial HTML to play nice. +void PopulateStringsForSharedHTML(base::DictionaryValue* load_time_data); + +#endif // COMPONENTS_LOOKALIKES_CORE_LOOKALIKE_URL_UI_UTIL_H_ diff --git a/chromium/components/lookalikes/core/lookalike_url_util.cc b/chromium/components/lookalikes/core/lookalike_url_util.cc index 4a350ed122e..65c3b27b477 100644 --- a/chromium/components/lookalikes/core/lookalike_url_util.cc +++ b/chromium/components/lookalikes/core/lookalike_url_util.cc @@ -9,6 +9,7 @@ #include "base/bind.h" #include "base/callback.h" #include "base/feature_list.h" +#include "base/i18n/char_iterator.h" #include "base/macros.h" #include "base/memory/scoped_refptr.h" #include "base/memory/singleton.h" @@ -21,7 +22,9 @@ #include "base/task/post_task.h" #include "base/task/thread_pool.h" #include "base/time/default_clock.h" +#include "base/values.h" #include "components/lookalikes/core/features.h" +#include "components/security_interstitials/core/pref_names.h" #include "components/security_state/core/features.h" #include "components/url_formatter/spoof_checks/top_domains/top500_domains.h" #include "components/url_formatter/spoof_checks/top_domains/top_domain_util.h" @@ -33,6 +36,10 @@ namespace lookalikes { const char kHistogramName[] = "NavigationSuggestion.Event"; +void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry) { + registry->RegisterListPref(prefs::kLookalikeWarningAllowlistDomains); +} + } // namespace lookalikes namespace { @@ -59,6 +66,10 @@ const char* kCommonWords[] = {"shop", "jobs", "live", "info", "study", "ideal", "research", "france", "free", "mobile", "sky", "ask"}; +// What separators can be used to separate tokens in target embedding spoofs? +// e.g. www-google.com.example.com uses "-" (www-google) and "." (google.com). +const char kTargetEmbeddingSeparators[] = "-."; + bool SkeletonsMatch(const url_formatter::Skeletons& skeletons1, const url_formatter::Skeletons& skeletons2) { DCHECK(!skeletons1.empty()); @@ -175,7 +186,8 @@ void RecordEvent(NavigationSuggestionEvent event) { // StringPieces. std::vector<base::StringPiece> SplitDomainWithouteTLDIntoTokens( const std::string& host_without_etld) { - return base::SplitStringPiece(host_without_etld, "-.", base::TRIM_WHITESPACE, + return base::SplitStringPiece(host_without_etld, kTargetEmbeddingSeparators, + base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); } @@ -588,11 +600,29 @@ TargetEmbeddingType GetTargetEmbeddingType( // This check happens first so that we can exclude invalid eTLD+1s next. std::string embedded_target = GetMatchingTopDomainWithoutSeparators( hostname_tokens_without_etld[end - 1]); - if (!embedded_target.empty() && - !IsAllowedToBeEmbedded(etld_check_dominfo, etld_check_span, - in_target_allowlist)) { - *safe_hostname = embedded_target; - return TargetEmbeddingType::kInterstitial; + if (!embedded_target.empty()) { + // Extract the full possibly-spoofed domain. To get this, we take the + // hostname up until this point, strip off the no-separator bit (e.g. + // googlecom) and then re-add the the separated version (e.g. google.com). + auto spoofed_domain = + etld_check_host.substr( + 0, etld_check_host.length() - + hostname_tokens_without_etld[end - 1].length()) + + embedded_target; + const auto no_separator_tokens = base::SplitStringPiece( + spoofed_domain, kTargetEmbeddingSeparators, base::TRIM_WHITESPACE, + base::SPLIT_WANT_NONEMPTY); + auto no_separator_dominfo = GetDomainInfo(embedded_target); + + // Only flag on domains that are long enough, don't use common words, and + // aren't target-allowlisted. + if (no_separator_dominfo.domain_without_registry.length() > + kMinE2LDLengthForTargetEmbedding && + !IsAllowedToBeEmbedded(no_separator_dominfo, no_separator_tokens, + in_target_allowlist)) { + *safe_hostname = embedded_target; + return TargetEmbeddingType::kInterstitial; + } } // Exclude otherwise-invalid eTLDs. @@ -636,3 +666,90 @@ TargetEmbeddingType GetTargetEmbeddingType( } return TargetEmbeddingType::kNone; } + +bool IsASCII(UChar32 codepoint) { + return !(codepoint & ~0x7F); +} + +// Returns true if |codepoint| has emoji related properties. +bool IsEmojiRelatedCodepoint(UChar32 codepoint) { + return u_hasBinaryProperty(codepoint, UCHAR_EMOJI) || + // Characters that have emoji presentation by default (e.g. hourglass) + u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION) || + // Characters displayed as country flags when used as a valid pair. + // E.g. Regional Indicator Symbol Letter B used once in a string + // is rendered as 🇧, used twice is rendered as the flag of Barbados + // (with country code BB). It's therefore possible to come up with + // a spoof using regional indicator characters as text, but these + // domain names will be readily punycoded and detecting pairs isn't + // easy so we keep the code simple here. + u_hasBinaryProperty(codepoint, UCHAR_REGIONAL_INDICATOR) || + // Pictographs such as Black Cross On Shield (U+26E8). + u_hasBinaryProperty(codepoint, UCHAR_EXTENDED_PICTOGRAPHIC); +} + +// Returns true if |text| contains only ASCII characters, pictographs +// or emojis. This check is only used to determine if a domain that already +// failed spoof checks should be blocked by an interstitial. Ideally, we would +// check this for non-ASCII scripts as well (e.g. Cyrillic + emoji), but such +// usage isn't common. +bool IsASCIIAndEmojiOnly(const base::StringPiece16& text) { + base::i18n::UTF16CharIterator iter(text.data(), text.length()); + while (!iter.end()) { + const UChar32 codepoint = iter.get(); + if (!IsASCII(codepoint) && !IsEmojiRelatedCodepoint(codepoint)) { + return false; + } + iter.Advance(); + } + return true; +} + +bool ShouldBlockBySpoofCheckResult(const DomainInfo& navigated_domain) { + // Here, only a subset of spoof checks that cause an IDN to fallback to + // punycode are configured to show an interstitial. + switch (navigated_domain.idn_result.spoof_check_result) { + case url_formatter::IDNSpoofChecker::Result::kNone: + case url_formatter::IDNSpoofChecker::Result::kSafe: + return false; + + case url_formatter::IDNSpoofChecker::Result::kICUSpoofChecks: + // If the eTLD+1 contains only a mix of ASCII + Emoji, allow. + return !IsASCIIAndEmojiOnly(navigated_domain.idn_result.result); + + case url_formatter::IDNSpoofChecker::Result::kDeviationCharacters: + // Failures because of deviation characters, especially ß, is common. + return false; + + case url_formatter::IDNSpoofChecker::Result::kTLDSpecificCharacters: + case url_formatter::IDNSpoofChecker::Result::kUnsafeMiddleDot: + case url_formatter::IDNSpoofChecker::Result::kWholeScriptConfusable: + case url_formatter::IDNSpoofChecker::Result::kDigitLookalikes: + case url_formatter::IDNSpoofChecker::Result:: + kNonAsciiLatinCharMixedWithNonLatin: + case url_formatter::IDNSpoofChecker::Result::kDangerousPattern: + return true; + } +} + +bool IsAllowedByEnterprisePolicy(const PrefService* pref_service, + const GURL& url) { + const auto* list = + pref_service->GetList(prefs::kLookalikeWarningAllowlistDomains); + for (const auto& domain_val : *list) { + auto domain = domain_val.GetString(); + if (url.DomainIs(domain)) { + return true; + } + } + return false; +} + +void SetEnterpriseAllowlistForTesting(PrefService* pref_service, + const std::vector<std::string>& hosts) { + base::Value list(base::Value::Type::LIST); + for (const auto& host : hosts) { + list.Append(host); + } + pref_service->Set(prefs::kLookalikeWarningAllowlistDomains, std::move(list)); +} diff --git a/chromium/components/lookalikes/core/lookalike_url_util.h b/chromium/components/lookalikes/core/lookalike_url_util.h index 00946f6d909..cfac43cd0ae 100644 --- a/chromium/components/lookalikes/core/lookalike_url_util.h +++ b/chromium/components/lookalikes/core/lookalike_url_util.h @@ -10,6 +10,8 @@ #include "base/callback.h" #include "base/time/time.h" +#include "components/pref_registry/pref_registry_syncable.h" +#include "components/prefs/pref_service.h" #include "components/url_formatter/url_formatter.h" #include "url/gurl.h" @@ -17,6 +19,9 @@ class GURL; namespace lookalikes { extern const char kHistogramName[]; + +// Register applicable preferences with the provided registry. +void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry); } using LookalikeTargetAllowlistChecker = @@ -186,4 +191,16 @@ TargetEmbeddingType GetTargetEmbeddingType( const LookalikeTargetAllowlistChecker& in_target_allowlist, std::string* safe_hostname); +// Returns true if a navigation to an IDN should be blocked. +bool ShouldBlockBySpoofCheckResult(const DomainInfo& navigated_domain); + +// Checks whether the given url is allowlisted by enterprise policy, and +// thus no warnings should be shown on that host. +bool IsAllowedByEnterprisePolicy(const PrefService* pref_service, + const GURL& url); + +// Add the given hosts to the allowlist policy setting. +void SetEnterpriseAllowlistForTesting(PrefService* pref_service, + const std::vector<std::string>& hosts); + #endif // COMPONENTS_LOOKALIKES_CORE_LOOKALIKE_URL_UTIL_H_ diff --git a/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc b/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc index 1aed2eddeec..e57b0b974e2 100644 --- a/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc +++ b/chromium/components/lookalikes/core/lookalike_url_util_unittest.cc @@ -198,7 +198,7 @@ TEST(LookalikeUrlUtilTest, TargetEmbeddingTest) { {"scholar.foo.google.com.foo.com", "google.com", TargetEmbeddingType::kInterstitial}, - // Targets should be longer than 6 characters. + // e2LDs should be longer than 3 characters. {"hp.com-foo.com", "", TargetEmbeddingType::kNone}, // Targets with common words as e2LD are not considered embedded targets @@ -210,8 +210,15 @@ TEST(LookalikeUrlUtilTest, TargetEmbeddingTest) { {"foo.office.org-foo.com", "", TargetEmbeddingType::kNone}, // Targets could be embedded without their dots and dashes. + {"googlecom-foo.com", "google.com", TargetEmbeddingType::kInterstitial}, {"foo.googlecom-foo.com", "google.com", TargetEmbeddingType::kInterstitial}, + // But should not be detected if they're using a common word. weather.com + // is on the top domain list, but 'weather' is a common word. + {"weathercom-foo.com", "", TargetEmbeddingType::kNone}, + // And should also not be detected if they're too short. vk.com is on the + // top domain list, but is shorter than kMinE2LDLengthForTargetEmbedding. + {"vkcom-foo.com", "", TargetEmbeddingType::kNone}, // Ensure legitimate domains don't trigger. {"foo.google.com", "", TargetEmbeddingType::kNone}, |