summaryrefslogtreecommitdiff
path: root/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc')
-rw-r--r--chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc18
1 files changed, 11 insertions, 7 deletions
diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
index 933a985b510..9b37fd34b81 100644
--- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
+++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
@@ -167,6 +167,14 @@ IDNSpoofChecker::IDNSpoofChecker() {
status);
lgc_letters_n_ascii_.freeze();
+ // Latin small letter thorn ("þ", U+00FE) can be used to spoof both b and p.
+ // It's used in modern Icelandic orthography, so allow it for the Icelandic
+ // ccTLD (.is) but block in any other TLD. Also block Latin small letter eth
+ // ("ð", U+00F0) which can be used to spoof the letter o.
+ icelandic_characters_ =
+ icu::UnicodeSet(UNICODE_STRING_SIMPLE("[\\u00fe\\u00f0]"), status);
+ icelandic_characters_.freeze();
+
// Used for diacritics-removal before the skeleton calculation. Add
// "ł > l; ø > o; đ > d" that are not handled by "NFD; Nonspacing mark
// removal; NFC".
@@ -275,13 +283,9 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(
if (deviation_characters_.containsSome(label_string))
return false;
- // Latin small letter thorn (U+00FE) can be used to spoof both b and p. It's
- // used in modern Icelandic orthography, so allow it for the Icelandic ccTLD
- // (.is) but block in any other TLD.
- if (label_string.length() > 1 && label_string.indexOf("þ") != -1 &&
- top_level_domain != ".is") {
- return false;
- }
+ // Disallow Icelandic confusables for domains outside Iceland's ccTLD (.is).
+ if (label_string.length() > 1 && top_level_domain != ".is" &&
+ icelandic_characters_.containsSome(label_string))
// If there's no script mixing, the input is regarded as safe without any
// extra check unless it falls into one of three categories: