summaryrefslogtreecommitdiff
path: root/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc')
-rw-r--r--chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc34
1 files changed, 34 insertions, 0 deletions
diff --git a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
index f394f73cd3f..6afb8b9e736 100644
--- a/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
+++ b/chromium/components/url_formatter/spoof_checks/idn_spoof_checker.cc
@@ -76,6 +76,35 @@ base::ThreadLocalStorage::Slot& DangerousPatternTLS() {
return *dangerous_pattern_tls;
}
+// Allow middle dot (U+00B7) only on Catalan domains when between two 'l's, to
+// permit the Catalan character ela geminada to be expressed.
+// See https://tools.ietf.org/html/rfc5892#appendix-A.3 for details.
+bool HasUnsafeMiddleDot(const icu::UnicodeString& label_string,
+ base::StringPiece top_level_domain) {
+ int last_index = 0;
+ while (true) {
+ int index = label_string.indexOf("·", last_index);
+ if (index < 0) {
+ break;
+ }
+ DCHECK_LT(index, label_string.length());
+ if (top_level_domain != "cat") {
+ // Non-Catalan domains cannot contain middle dot.
+ return true;
+ }
+ // Middle dot at the beginning or end.
+ if (index == 0 || index == label_string.length() - 1) {
+ return true;
+ }
+ // Middle dot not surrounded by an 'l'.
+ if (label_string[index - 1] != 'l' || label_string[index + 1] != 'l') {
+ return true;
+ }
+ last_index = index + 1;
+ }
+ return false;
+}
+
#include "components/url_formatter/top_domains/alexa_domains-trie-inc.cc"
// All the domains in the above file have 3 or fewer labels.
@@ -293,6 +322,11 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(
label_string.indexOf("ə") != -1)
return false;
+ // Disallow middle dot (U+00B7) when unsafe.
+ if (HasUnsafeMiddleDot(label_string, top_level_domain)) {
+ return false;
+ }
+
// If there's no script mixing, the input is regarded as safe without any
// extra check unless it falls into one of three categories:
// - contains Kana letter exceptions