summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormeacer <meacer@chromium.org>2019-10-25 19:29:59 +0000
committerMichael Brüning <michael.bruning@qt.io>2020-03-06 12:03:56 +0000
commitb88a10e7a666792cc8a2d9a9310748a79b1f032b (patch)
tree2465c8a3a139767efd4d706d0af178c0f1101ebc
parent4d5dbe41ae38dff77b94d9c93944442fb2f4fabd (diff)
downloadqtwebengine-chromium-b88a10e7a666792cc8a2d9a9310748a79b1f032b.tar.gz
[Backport] CVE-2020-6401 (2/3)
Manual backport of patch originally reviewed on https://chromium-review.googlesource.com/c/chromium/src/+/1881344: Restrict Latin Small Letter Eth (U+00F0) to Icelandic domains crrev.com/c/1879992 restricted Latin Small Letter Thorn to Icelandic domains. This CL does the same for Eth (ð) as it can be confused with the characters "o" and "d" in some fonts. This change affects less than 10 real world domains with limited popularity. Bug: 1017707, 929711 Change-Id: I8f0394d4c1531eb2051d38c78afe00f550d3da73 Reviewed-by: Jüri Valdmann <juri.valdmann@qt.io>
-rw-r--r--chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc18
-rw-r--r--chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h1
2 files changed, 12 insertions, 7 deletions
diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
index 933a985b510..9b37fd34b81 100644
--- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
+++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
@@ -167,6 +167,14 @@ IDNSpoofChecker::IDNSpoofChecker() {
status);
lgc_letters_n_ascii_.freeze();
+ // Latin small letter thorn ("þ", U+00FE) can be used to spoof both b and p.
+ // It's used in modern Icelandic orthography, so allow it for the Icelandic
+ // ccTLD (.is) but block in any other TLD. Also block Latin small letter eth
+ // ("ð", U+00F0) which can be used to spoof the letter o.
+ icelandic_characters_ =
+ icu::UnicodeSet(UNICODE_STRING_SIMPLE("[\\u00fe\\u00f0]"), status);
+ icelandic_characters_.freeze();
+
// Used for diacritics-removal before the skeleton calculation. Add
// "ł > l; ø > o; đ > d" that are not handled by "NFD; Nonspacing mark
// removal; NFC".
@@ -275,13 +283,9 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(
if (deviation_characters_.containsSome(label_string))
return false;
- // Latin small letter thorn (U+00FE) can be used to spoof both b and p. It's
- // used in modern Icelandic orthography, so allow it for the Icelandic ccTLD
- // (.is) but block in any other TLD.
- if (label_string.length() > 1 && label_string.indexOf("þ") != -1 &&
- top_level_domain != ".is") {
- return false;
- }
+ // Disallow Icelandic confusables for domains outside Iceland's ccTLD (.is).
+ if (label_string.length() > 1 && top_level_domain != ".is" &&
+ icelandic_characters_.containsSome(label_string))
// If there's no script mixing, the input is regarded as safe without any
// extra check unless it falls into one of three categories:
diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h
index bd955952d45..b981c403e3d 100644
--- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h
+++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.h
@@ -95,6 +95,7 @@ class IDNSpoofChecker {
icu::UnicodeSet cyrillic_letters_;
icu::UnicodeSet cyrillic_letters_latin_alike_;
icu::UnicodeSet lgc_letters_n_ascii_;
+ icu::UnicodeSet icelandic_characters_;
std::unique_ptr<icu::Transliterator> diacritic_remover_;
std::unique_ptr<icu::Transliterator> extra_confusable_mapper_;