From b88a10e7a666792cc8a2d9a9310748a79b1f032b Mon Sep 17 00:00:00 2001 From: meacer Date: Fri, 25 Oct 2019 19:29:59 +0000 Subject: [Backport] CVE-2020-6401 (2/3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Manual backport of patch originally reviewed on https://chromium-review.googlesource.com/c/chromium/src/+/1881344: Restrict Latin Small Letter Eth (U+00F0) to Icelandic domains crrev.com/c/1879992 restricted Latin Small Letter Thorn to Icelandic domains. This CL does the same for Eth (ð) as it can be confused with the characters "o" and "d" in some fonts. This change affects less than 10 real world domains with limited popularity. Bug: 1017707, 929711 Change-Id: I8f0394d4c1531eb2051d38c78afe00f550d3da73 Reviewed-by: Jüri Valdmann --- .../url_formatter/spoof_checks/idn_spoof_checker.cc | 18 +++++++++++------- .../url_formatter/spoof_checks/idn_spoof_checker.h | 1 + 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc index 933a985b510..9b37fd34b81 100644 --- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc +++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc @@ -167,6 +167,14 @@ IDNSpoofChecker::IDNSpoofChecker() { status); lgc_letters_n_ascii_.freeze(); + // Latin small letter thorn ("þ", U+00FE) can be used to spoof both b and p. + // It's used in modern Icelandic orthography, so allow it for the Icelandic + // ccTLD (.is) but block in any other TLD. Also block Latin small letter eth + // ("ð", U+00F0) which can be used to spoof the letter o. + icelandic_characters_ = + icu::UnicodeSet(UNICODE_STRING_SIMPLE("[\\u00fe\\u00f0]"), status); + icelandic_characters_.freeze(); + // Used for diacritics-removal before the skeleton calculation. Add // "ł > l; ø > o; đ > d" that are not handled by "NFD; Nonspacing mark // removal; NFC". @@ -275,13 +283,9 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( if (deviation_characters_.containsSome(label_string)) return false; - // Latin small letter thorn (U+00FE) can be used to spoof both b and p. It's - // used in modern Icelandic orthography, so allow it for the Icelandic ccTLD - // (.is) but block in any other TLD. - if (label_string.length() > 1 && label_string.indexOf("þ") != -1 && - top_level_domain != ".is") { - return false; - } + // Disallow Icelandic confusables for domains outside Iceland's ccTLD (.is). + if (label_string.length() > 1 && top_level_domain != ".is" && + icelandic_characters_.containsSome(label_string)) // If there's no script mixing, the input is regarded as safe without any // extra check unless it falls into one of three categories: diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h index bd955952d45..b981c403e3d 100644 --- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h +++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h @@ -95,6 +95,7 @@ class IDNSpoofChecker { icu::UnicodeSet cyrillic_letters_; icu::UnicodeSet cyrillic_letters_latin_alike_; icu::UnicodeSet lgc_letters_n_ascii_; + icu::UnicodeSet icelandic_characters_; std::unique_ptr diacritic_remover_; std::unique_ptr extra_confusable_mapper_; -- cgit v1.2.1