diff options
Diffstat (limited to 'chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc')
-rw-r--r-- | chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc index f394f73cd3f..6afb8b9e736 100644 --- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc +++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc @@ -76,6 +76,35 @@ base::ThreadLocalStorage::Slot& DangerousPatternTLS() { return *dangerous_pattern_tls; } +// Allow middle dot (U+00B7) only on Catalan domains when between two 'l's, to +// permit the Catalan character ela geminada to be expressed. +// See https://tools.ietf.org/html/rfc5892#appendix-A.3 for details. +bool HasUnsafeMiddleDot(const icu::UnicodeString& label_string, + base::StringPiece top_level_domain) { + int last_index = 0; + while (true) { + int index = label_string.indexOf("·", last_index); + if (index < 0) { + break; + } + DCHECK_LT(index, label_string.length()); + if (top_level_domain != "cat") { + // Non-Catalan domains cannot contain middle dot. + return true; + } + // Middle dot at the beginning or end. + if (index == 0 || index == label_string.length() - 1) { + return true; + } + // Middle dot not surrounded by an 'l'. + if (label_string[index - 1] != 'l' || label_string[index + 1] != 'l') { + return true; + } + last_index = index + 1; + } + return false; +} + #include "components/url_formatter/top_domains/alexa_domains-trie-inc.cc" // All the domains in the above file have 3 or fewer labels. @@ -293,6 +322,11 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( label_string.indexOf("ə") != -1) return false; + // Disallow middle dot (U+00B7) when unsafe. + if (HasUnsafeMiddleDot(label_string, top_level_domain)) { + return false; + } + // If there's no script mixing, the input is regarded as safe without any // extra check unless it falls into one of three categories: // - contains Kana letter exceptions |