[Backport] CVE-2020-6412 - Insufficient validation of untrusted input in Omnibox

Manual backport of patch originally reviewed on https://chromium-review.googlesource.com/c/chromium/src/+/1881887: Allow whole-script confusable Cyrillic domains only on Cyrillic TLDs A whole-script confusable Cyrillic domain consists of entirely Cyrillic characters that look identical to Latin characters (e.g. xn--80ak6aa92e[.]com decodes to аррӏе[.]com where аррӏе is in fact '\x0430\x0440\x0440\x04cf\x0435'). A previous change allowed whole-script confusable Cyrillic characters on non-ASCII top level domains only. This means that xn--80ak6aa92e[.]com remains punycode (TLD is .com) but xn--80ak6aa92e[.]xn--p1ai is decoded as аррӏе[.]рф (TLD is Cyrillic). However, this also allows spoofs in other non-ASCII TLDs such as аррӏе[.]中国 so it's not a sufficient measure. This change further limits allowable whole-script confusable Cyrillic domains to Cyrillic TLDs (instead of non-ASCII) and a small list of additional TLDs containing a large number of Cyrillic domains (bg, by, kz, pyc, ru, su, ua, uz). The idea is that users familiar with Cyrillic are more likely to encounter these TLDs and notice any discrepancies in the displayed domain name. Bug: 968505 Change-Id: I83dbb215c5177f3faa80e0e0c157aeb483fe7138 Reviewed-by: Jüri Valdmann <juri.valdmann@qt.io>
author: meacer <meacer@chromium.org> 2019-11-05 21:46:26 +0000
committer: Michael Brüning <michael.bruning@qt.io> 2020-03-10 15:48:15 +0000
commit: f938fe1765e30dea5789ddb9113d218abe2ec532 (patch)
tree: 13f016f15105c29974179112a97cedb648cc5e7e /chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
parent: 2643eee04e099c1b649167ec7e646d7832d19000 (diff)
download: qtwebengine-chromium-f938fe1765e30dea5789ddb9113d218abe2ec532.tar.gz
1 files changed, 24 insertions, 6 deletions
diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
index d0c22538cd4..f394f73cd3f 100644
--- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
+++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
@@ -256,7 +256,8 @@ IDNSpoofChecker::~IDNSpoofChecker() {
 
 bool IDNSpoofChecker::SafeToDisplayAsUnicode(
     base::StringPiece16 label,
-    base::StringPiece top_level_domain) {
+    base::StringPiece top_level_domain,
+    base::StringPiece16 top_level_domain_unicode) {
   UErrorCode status = U_ZERO_ERROR;
   int32_t result =
       uspoof_check(checker_, label.data(),
@@ -266,7 +267,7 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(
   if (U_FAILURE(status) || (result & USPOOF_ALL_CHECKS))
     return false;
 
-  icu::UnicodeString label_string(FALSE, label.data(),
+  icu::UnicodeString label_string(FALSE /* isTerminated */, label.data(),
                                   base::checked_cast<int32_t>(label.size()));
 
   // A punycode label with 'xn--' prefix is not subject to the URL
@@ -284,7 +285,7 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(
     return false;
 
   // Disallow Icelandic confusables for domains outside Iceland's ccTLD (.is).
-  if (label_string.length() > 1 && top_level_domain != ".is" &&
+  if (label_string.length() > 1 && top_level_domain != "is" &&
       icelandic_characters_.containsSome(label_string))
 
   // Disallow Latin Schwa (U+0259) for domains outside Azerbaijan's ccTLD (.az).
@@ -309,9 +310,11 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(
   if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE &&
       kana_letters_exceptions_.containsNone(label_string) &&
       combining_diacritics_exceptions_.containsNone(label_string)) {
-    bool is_tld_ascii = !top_level_domain.starts_with(".xn--");
-    // Check Cyrillic confusable only for ASCII TLDs.
-    return !is_tld_ascii || !IsMadeOfLatinAlikeCyrillic(label_string);
+    // Check Cyrillic confusable only for TLDs where Cyrillic characters are
+    // uncommon.
+    return IsCyrillicTopLevelDomain(top_level_domain,
+                                    top_level_domain_unicode) ||
+           !IsMadeOfLatinAlikeCyrillic(label_string);
   }
 
   // Additional checks for |label| with multiple scripts, one of which is Latin.
@@ -592,6 +595,21 @@ bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic(
          cyrillic_letters_latin_alike_.containsAll(cyrillic_in_label);
 }
 
+bool IDNSpoofChecker::IsCyrillicTopLevelDomain(
+    base::StringPiece tld,
+    base::StringPiece16 tld_unicode) const {
+  icu::UnicodeString tld_string(
+      FALSE /* isTerminated */, tld_unicode.data(),
+      base::checked_cast<int32_t>(tld_unicode.size()));
+  if (cyrillic_letters_.containsSome(tld_string)) {
+    return true;
+  }
+  // These ASCII TLDs contain a large number of domains with Cyrillic
+  // characters.
+  return tld == "bg" || tld == "by" || tld == "kz" || tld == "pyc" ||
+         tld == "ru" || tld == "su" || tld == "ua" || tld == "uz";
+}
+
 // static
 void IDNSpoofChecker::SetTrieParamsForTesting(
     const HuffmanTrieParams& trie_params) {
author	meacer <meacer@chromium.org>	2019-11-05 21:46:26 +0000
committer	Michael Brüning <michael.bruning@qt.io>	2020-03-10 15:48:15 +0000
commit	f938fe1765e30dea5789ddb9113d218abe2ec532 (patch)
tree	13f016f15105c29974179112a97cedb648cc5e7e /chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
parent	2643eee04e099c1b649167ec7e646d7832d19000 (diff)
download	qtwebengine-chromium-f938fe1765e30dea5789ddb9113d218abe2ec532.tar.gz