diff options
Diffstat (limited to 'chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc')
-rw-r--r-- | chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc | 30 |
1 files changed, 24 insertions, 6 deletions
diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc index d0c22538cd4..f394f73cd3f 100644 --- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc +++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc @@ -256,7 +256,8 @@ IDNSpoofChecker::~IDNSpoofChecker() { bool IDNSpoofChecker::SafeToDisplayAsUnicode( base::StringPiece16 label, - base::StringPiece top_level_domain) { + base::StringPiece top_level_domain, + base::StringPiece16 top_level_domain_unicode) { UErrorCode status = U_ZERO_ERROR; int32_t result = uspoof_check(checker_, label.data(), @@ -266,7 +267,7 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( if (U_FAILURE(status) || (result & USPOOF_ALL_CHECKS)) return false; - icu::UnicodeString label_string(FALSE, label.data(), + icu::UnicodeString label_string(FALSE /* isTerminated */, label.data(), base::checked_cast<int32_t>(label.size())); // A punycode label with 'xn--' prefix is not subject to the URL @@ -284,7 +285,7 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( return false; // Disallow Icelandic confusables for domains outside Iceland's ccTLD (.is). - if (label_string.length() > 1 && top_level_domain != ".is" && + if (label_string.length() > 1 && top_level_domain != "is" && icelandic_characters_.containsSome(label_string)) // Disallow Latin Schwa (U+0259) for domains outside Azerbaijan's ccTLD (.az). @@ -309,9 +310,11 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE && kana_letters_exceptions_.containsNone(label_string) && combining_diacritics_exceptions_.containsNone(label_string)) { - bool is_tld_ascii = !top_level_domain.starts_with(".xn--"); - // Check Cyrillic confusable only for ASCII TLDs. - return !is_tld_ascii || !IsMadeOfLatinAlikeCyrillic(label_string); + // Check Cyrillic confusable only for TLDs where Cyrillic characters are + // uncommon. + return IsCyrillicTopLevelDomain(top_level_domain, + top_level_domain_unicode) || + !IsMadeOfLatinAlikeCyrillic(label_string); } // Additional checks for |label| with multiple scripts, one of which is Latin. @@ -592,6 +595,21 @@ bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( cyrillic_letters_latin_alike_.containsAll(cyrillic_in_label); } +bool IDNSpoofChecker::IsCyrillicTopLevelDomain( + base::StringPiece tld, + base::StringPiece16 tld_unicode) const { + icu::UnicodeString tld_string( + FALSE /* isTerminated */, tld_unicode.data(), + base::checked_cast<int32_t>(tld_unicode.size())); + if (cyrillic_letters_.containsSome(tld_string)) { + return true; + } + // These ASCII TLDs contain a large number of domains with Cyrillic + // characters. + return tld == "bg" || tld == "by" || tld == "kz" || tld == "pyc" || + tld == "ru" || tld == "su" || tld == "ua" || tld == "uz"; +} + // static void IDNSpoofChecker::SetTrieParamsForTesting( const HuffmanTrieParams& trie_params) { |