diff options
author | meacer <meacer@chromium.org> | 2019-11-05 21:46:26 +0000 |
---|---|---|
committer | Michael Brüning <michael.bruning@qt.io> | 2020-03-10 15:48:15 +0000 |
commit | f938fe1765e30dea5789ddb9113d218abe2ec532 (patch) | |
tree | 13f016f15105c29974179112a97cedb648cc5e7e /chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc | |
parent | 2643eee04e099c1b649167ec7e646d7832d19000 (diff) | |
download | qtwebengine-chromium-f938fe1765e30dea5789ddb9113d218abe2ec532.tar.gz |
[Backport] CVE-2020-6412 - Insufficient validation of untrusted input in Omnibox
Manual backport of patch originally reviewed on
https://chromium-review.googlesource.com/c/chromium/src/+/1881887:
Allow whole-script confusable Cyrillic domains only on Cyrillic TLDs
A whole-script confusable Cyrillic domain consists of entirely Cyrillic
characters that look identical to Latin characters (e.g. xn--80ak6aa92e[.]com
decodes to аррӏе[.]com where аррӏе is in fact '\x0430\x0440\x0440\x04cf\x0435').
A previous change allowed whole-script confusable Cyrillic characters on
non-ASCII top level domains only. This means that xn--80ak6aa92e[.]com remains
punycode (TLD is .com) but xn--80ak6aa92e[.]xn--p1ai is decoded as аррӏе[.]рф
(TLD is Cyrillic). However, this also allows spoofs in other non-ASCII TLDs
such as аррӏе[.]中国 so it's not a sufficient measure.
This change further limits allowable whole-script confusable Cyrillic domains
to Cyrillic TLDs (instead of non-ASCII) and a small list of additional TLDs
containing a large number of Cyrillic domains (bg, by, kz, pyc, ru, su,
ua, uz). The idea is that users familiar with Cyrillic are more likely
to encounter these TLDs and notice any discrepancies in the displayed
domain name.
Bug: 968505
Change-Id: I83dbb215c5177f3faa80e0e0c157aeb483fe7138
Reviewed-by: Jüri Valdmann <juri.valdmann@qt.io>
Diffstat (limited to 'chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc')
-rw-r--r-- | chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc | 30 |
1 files changed, 24 insertions, 6 deletions
diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc index d0c22538cd4..f394f73cd3f 100644 --- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc +++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc @@ -256,7 +256,8 @@ IDNSpoofChecker::~IDNSpoofChecker() { bool IDNSpoofChecker::SafeToDisplayAsUnicode( base::StringPiece16 label, - base::StringPiece top_level_domain) { + base::StringPiece top_level_domain, + base::StringPiece16 top_level_domain_unicode) { UErrorCode status = U_ZERO_ERROR; int32_t result = uspoof_check(checker_, label.data(), @@ -266,7 +267,7 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( if (U_FAILURE(status) || (result & USPOOF_ALL_CHECKS)) return false; - icu::UnicodeString label_string(FALSE, label.data(), + icu::UnicodeString label_string(FALSE /* isTerminated */, label.data(), base::checked_cast<int32_t>(label.size())); // A punycode label with 'xn--' prefix is not subject to the URL @@ -284,7 +285,7 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( return false; // Disallow Icelandic confusables for domains outside Iceland's ccTLD (.is). - if (label_string.length() > 1 && top_level_domain != ".is" && + if (label_string.length() > 1 && top_level_domain != "is" && icelandic_characters_.containsSome(label_string)) // Disallow Latin Schwa (U+0259) for domains outside Azerbaijan's ccTLD (.az). @@ -309,9 +310,11 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE && kana_letters_exceptions_.containsNone(label_string) && combining_diacritics_exceptions_.containsNone(label_string)) { - bool is_tld_ascii = !top_level_domain.starts_with(".xn--"); - // Check Cyrillic confusable only for ASCII TLDs. - return !is_tld_ascii || !IsMadeOfLatinAlikeCyrillic(label_string); + // Check Cyrillic confusable only for TLDs where Cyrillic characters are + // uncommon. + return IsCyrillicTopLevelDomain(top_level_domain, + top_level_domain_unicode) || + !IsMadeOfLatinAlikeCyrillic(label_string); } // Additional checks for |label| with multiple scripts, one of which is Latin. @@ -592,6 +595,21 @@ bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( cyrillic_letters_latin_alike_.containsAll(cyrillic_in_label); } +bool IDNSpoofChecker::IsCyrillicTopLevelDomain( + base::StringPiece tld, + base::StringPiece16 tld_unicode) const { + icu::UnicodeString tld_string( + FALSE /* isTerminated */, tld_unicode.data(), + base::checked_cast<int32_t>(tld_unicode.size())); + if (cyrillic_letters_.containsSome(tld_string)) { + return true; + } + // These ASCII TLDs contain a large number of domains with Cyrillic + // characters. + return tld == "bg" || tld == "by" || tld == "kz" || tld == "pyc" || + tld == "ru" || tld == "su" || tld == "ua" || tld == "uz"; +} + // static void IDNSpoofChecker::SetTrieParamsForTesting( const HuffmanTrieParams& trie_params) { |