summaryrefslogtreecommitdiff
path: root/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
diff options
context:
space:
mode:
authormeacer <meacer@chromium.org>2019-10-25 01:09:31 +0000
committerMichael Brüning <michael.bruning@qt.io>2020-03-06 12:03:48 +0000
commit4d5dbe41ae38dff77b94d9c93944442fb2f4fabd (patch)
tree3867b8df4140b034d6ffd8341bf9181656581546 /chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
parentd8c1659ae97b316a2424d120f67e78b71a532976 (diff)
downloadqtwebengine-chromium-4d5dbe41ae38dff77b94d9c93944442fb2f4fabd.tar.gz
[Backport] CVE-2020-6401 (1/3) and CVE-2020-6411
Manual backport of patch originally reviewed on https://chromium-review.googlesource.com/c/chromium/src/+/1879992: Restrict Latin Small Letter Thorn (U+00FE) to Icelandic domains This character (þ) can be confused with both b and p when used in a domain name. IDN spoof checker doesn't have a good way of flagging a character as confusable with multiple characters, so it can't catch spoofs containing this character. As a practical fix, this CL restricts this character to domains under Iceland's ccTLD (.is). With this change, a domain name containing "þ" with a non-.is TLD will be displayed in punycode in the UI. This change affects less than 10 real world domains with limited popularity. Bug: 798892, 843352, 904327, 1017707 Change-Id: I7ade7305a4235e51ed3c7d0a6acb1ce6df7544f1 Reviewed-by: Jüri Valdmann <juri.valdmann@qt.io>
Diffstat (limited to 'chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc')
-rw-r--r--chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc18
1 files changed, 14 insertions, 4 deletions
diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
index de66bffa980..933a985b510 100644
--- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
+++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
@@ -180,7 +180,7 @@ IDNSpoofChecker::IDNSpoofChecker() {
// Supplement the Unicode confusable list by the following mapping.
// - {U+00E6 (æ), U+04D5 (ӕ)} => "ae"
- // - {U+00FE (þ), U+03FC (ϼ), U+048F (ҏ)} => p
+ // - {U+03FC (ϼ), U+048F (ҏ)} => p
// - {U+0127 (ħ), U+043D (н), U+045B (ћ), U+04A3 (ң), U+04A5 (ҥ),
// U+04C8 (ӈ), U+04CA (ӊ), U+050B (ԋ), U+0527 (ԧ), U+0529 (ԩ)} => h
// - {U+0138 (ĸ), U+03BA (κ), U+043A (к), U+049B (қ), U+049D (ҝ),
@@ -221,7 +221,7 @@ IDNSpoofChecker::IDNSpoofChecker() {
extra_confusable_mapper_.reset(icu::Transliterator::createFromRules(
UNICODE_STRING_SIMPLE("ExtraConf"),
icu::UnicodeString::fromUTF8(
- "[æӕ] > ae; [þϼҏ] > p; [ħнћңҥӈӊԋԧԩ] > h;"
+ "[æӕ] > ae; [ϼҏ] > p; [ħнћңҥӈӊԋԧԩ] > h;"
"[ĸκкқҝҟҡӄԟ] > k; [ŋпԥกח] > n; œ > ce;"
"[ŧтҭԏ七丅丆丁] > t; [ƅьҍв] > b; [ωшщพฟພຟ] > w;"
"[мӎ] > m; [єҽҿၔ] > e; ґ > r; [ғӻ] > f;"
@@ -246,8 +246,9 @@ IDNSpoofChecker::~IDNSpoofChecker() {
uspoof_close(checker_);
}
-bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label,
- bool is_tld_ascii) {
+bool IDNSpoofChecker::SafeToDisplayAsUnicode(
+ base::StringPiece16 label,
+ base::StringPiece top_level_domain) {
UErrorCode status = U_ZERO_ERROR;
int32_t result =
uspoof_check(checker_, label.data(),
@@ -274,6 +275,14 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label,
if (deviation_characters_.containsSome(label_string))
return false;
+ // Latin small letter thorn (U+00FE) can be used to spoof both b and p. It's
+ // used in modern Icelandic orthography, so allow it for the Icelandic ccTLD
+ // (.is) but block in any other TLD.
+ if (label_string.length() > 1 && label_string.indexOf("þ") != -1 &&
+ top_level_domain != ".is") {
+ return false;
+ }
+
// If there's no script mixing, the input is regarded as safe without any
// extra check unless it falls into one of three categories:
// - contains Kana letter exceptions
@@ -291,6 +300,7 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label,
if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE &&
kana_letters_exceptions_.containsNone(label_string) &&
combining_diacritics_exceptions_.containsNone(label_string)) {
+ bool is_tld_ascii = !top_level_domain.starts_with(".xn--");
// Check Cyrillic confusable only for ASCII TLDs.
return !is_tld_ascii || !IsMadeOfLatinAlikeCyrillic(label_string);
}