From 4d5dbe41ae38dff77b94d9c93944442fb2f4fabd Mon Sep 17 00:00:00 2001 From: meacer Date: Fri, 25 Oct 2019 01:09:31 +0000 Subject: [Backport] CVE-2020-6401 (1/3) and CVE-2020-6411 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Manual backport of patch originally reviewed on https://chromium-review.googlesource.com/c/chromium/src/+/1879992: Restrict Latin Small Letter Thorn (U+00FE) to Icelandic domains This character (þ) can be confused with both b and p when used in a domain name. IDN spoof checker doesn't have a good way of flagging a character as confusable with multiple characters, so it can't catch spoofs containing this character. As a practical fix, this CL restricts this character to domains under Iceland's ccTLD (.is). With this change, a domain name containing "þ" with a non-.is TLD will be displayed in punycode in the UI. This change affects less than 10 real world domains with limited popularity. Bug: 798892, 843352, 904327, 1017707 Change-Id: I7ade7305a4235e51ed3c7d0a6acb1ce6df7544f1 Reviewed-by: Jüri Valdmann --- .../url_formatter/spoof_checks/idn_spoof_checker.cc | 18 ++++++++++++++---- .../url_formatter/spoof_checks/idn_spoof_checker.h | 3 ++- chromium/components/url_formatter/url_formatter.cc | 21 +++++++++++---------- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc index de66bffa980..933a985b510 100644 --- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc +++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc @@ -180,7 +180,7 @@ IDNSpoofChecker::IDNSpoofChecker() { // Supplement the Unicode confusable list by the following mapping. // - {U+00E6 (æ), U+04D5 (ӕ)} => "ae" - // - {U+00FE (þ), U+03FC (ϼ), U+048F (ҏ)} => p + // - {U+03FC (ϼ), U+048F (ҏ)} => p // - {U+0127 (ħ), U+043D (н), U+045B (ћ), U+04A3 (ң), U+04A5 (ҥ), // U+04C8 (ӈ), U+04CA (ӊ), U+050B (ԋ), U+0527 (ԧ), U+0529 (ԩ)} => h // - {U+0138 (ĸ), U+03BA (κ), U+043A (к), U+049B (қ), U+049D (ҝ), @@ -221,7 +221,7 @@ IDNSpoofChecker::IDNSpoofChecker() { extra_confusable_mapper_.reset(icu::Transliterator::createFromRules( UNICODE_STRING_SIMPLE("ExtraConf"), icu::UnicodeString::fromUTF8( - "[æӕ] > ae; [þϼҏ] > p; [ħнћңҥӈӊԋԧԩ] > h;" + "[æӕ] > ae; [ϼҏ] > p; [ħнћңҥӈӊԋԧԩ] > h;" "[ĸκкқҝҟҡӄԟ] > k; [ŋпԥกח] > n; œ > ce;" "[ŧтҭԏ七丅丆丁] > t; [ƅьҍв] > b; [ωшщพฟພຟ] > w;" "[мӎ] > m; [єҽҿၔ] > e; ґ > r; [ғӻ] > f;" @@ -246,8 +246,9 @@ IDNSpoofChecker::~IDNSpoofChecker() { uspoof_close(checker_); } -bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label, - bool is_tld_ascii) { +bool IDNSpoofChecker::SafeToDisplayAsUnicode( + base::StringPiece16 label, + base::StringPiece top_level_domain) { UErrorCode status = U_ZERO_ERROR; int32_t result = uspoof_check(checker_, label.data(), @@ -274,6 +275,14 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label, if (deviation_characters_.containsSome(label_string)) return false; + // Latin small letter thorn (U+00FE) can be used to spoof both b and p. It's + // used in modern Icelandic orthography, so allow it for the Icelandic ccTLD + // (.is) but block in any other TLD. + if (label_string.length() > 1 && label_string.indexOf("þ") != -1 && + top_level_domain != ".is") { + return false; + } + // If there's no script mixing, the input is regarded as safe without any // extra check unless it falls into one of three categories: // - contains Kana letter exceptions @@ -291,6 +300,7 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label, if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE && kana_letters_exceptions_.containsNone(label_string) && combining_diacritics_exceptions_.containsNone(label_string)) { + bool is_tld_ascii = !top_level_domain.starts_with(".xn--"); // Check Cyrillic confusable only for ASCII TLDs. return !is_tld_ascii || !IsMadeOfLatinAlikeCyrillic(label_string); } diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h index 1a30355caab..bd955952d45 100644 --- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h +++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h @@ -54,7 +54,8 @@ class IDNSpoofChecker { // Returns true if |label| is safe to display as Unicode. In the event of // library failure, all IDN inputs will be treated as unsafe. // See the function body for details on the specific safety checks performed. - bool SafeToDisplayAsUnicode(base::StringPiece16 label, bool is_tld_ascii); + bool SafeToDisplayAsUnicode(base::StringPiece16 label, + base::StringPiece top_level_domain); // Returns the matching top domain if |hostname| or the last few components of // |hostname| looks similar to one of top domains listed i diff --git a/chromium/components/url_formatter/url_formatter.cc b/chromium/components/url_formatter/url_formatter.cc index 236cac1aee4..e7d8f37ce8e 100644 --- a/chromium/components/url_formatter/url_formatter.cc +++ b/chromium/components/url_formatter/url_formatter.cc @@ -34,7 +34,7 @@ IDNConversionResult IDNToUnicodeWithAdjustments( bool IDNToUnicodeOneComponent(const base::char16* comp, size_t comp_len, - bool is_tld_ascii, + base::StringPiece top_level_domain, bool enable_spoof_checks, base::string16* out, bool* has_idn_component); @@ -245,11 +245,10 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( input16.reserve(host.length()); input16.insert(input16.end(), host.begin(), host.end()); - bool is_tld_ascii = true; + base::StringPiece top_level_domain; size_t last_dot = host.rfind('.'); - if (last_dot != base::StringPiece::npos && - host.substr(last_dot).starts_with(".xn--")) { - is_tld_ascii = false; + if (last_dot != base::StringPiece::npos) { + top_level_domain = host.substr(last_dot); } IDNConversionResult result; @@ -270,7 +269,7 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( // Add the substring that we just found. bool has_idn_component = false; converted_idn = IDNToUnicodeOneComponent( - input16.data() + component_start, component_length, is_tld_ascii, + input16.data() + component_start, component_length, top_level_domain, enable_spoof_checks, &out16, &has_idn_component); result.has_idn_component |= has_idn_component; } @@ -320,8 +319,10 @@ IDNConversionResult UnsafeIDNToUnicodeWithAdjustments( // user. Note that this function does not deal with pure ASCII domain labels at // all even though it's possible to make up look-alike labels with ASCII // characters alone. -bool IsIDNComponentSafe(base::StringPiece16 label, bool is_tld_ascii) { - return g_idn_spoof_checker.Get().SafeToDisplayAsUnicode(label, is_tld_ascii); +bool IsIDNComponentSafe(base::StringPiece16 label, + base::StringPiece top_level_domain) { + return g_idn_spoof_checker.Get().SafeToDisplayAsUnicode(label, + top_level_domain); } // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to @@ -373,7 +374,7 @@ base::LazyInstance::Leaky g_uidna = LAZY_INSTANCE_INITIALIZER; // input has IDN, regardless of whether it was converted to unicode or not. bool IDNToUnicodeOneComponent(const base::char16* comp, size_t comp_len, - bool is_tld_ascii, + base::StringPiece top_level_domain, bool enable_spoof_checks, base::string16* out, bool* has_idn_component) { @@ -419,7 +420,7 @@ bool IDNToUnicodeOneComponent(const base::char16* comp, if (IsIDNComponentSafe( base::StringPiece16(out->data() + original_length, base::checked_cast(output_length)), - is_tld_ascii)) { + top_level_domain)) { return true; } } -- cgit v1.2.1