summaryrefslogtreecommitdiff
path: root/chromium/components/url_formatter/url_formatter.cc
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/components/url_formatter/url_formatter.cc')
-rw-r--r--chromium/components/url_formatter/url_formatter.cc68
1 files changed, 47 insertions, 21 deletions
diff --git a/chromium/components/url_formatter/url_formatter.cc b/chromium/components/url_formatter/url_formatter.cc
index e7d8f37ce8e..4aada4a7ac4 100644
--- a/chromium/components/url_formatter/url_formatter.cc
+++ b/chromium/components/url_formatter/url_formatter.cc
@@ -35,6 +35,7 @@ IDNConversionResult IDNToUnicodeWithAdjustments(
bool IDNToUnicodeOneComponent(const base::char16* comp,
size_t comp_len,
base::StringPiece top_level_domain,
+ base::StringPiece16 top_level_domain_unicode,
bool enable_spoof_checks,
base::string16* out,
bool* has_idn_component);
@@ -234,6 +235,28 @@ base::string16 FormatViewSourceUrl(
base::LazyInstance<IDNSpoofChecker>::Leaky g_idn_spoof_checker =
LAZY_INSTANCE_INITIALIZER;
+// Computes the top level domain from |host|. top_level_domain_unicode will
+// contain the unicode version of top_level_domain. top_level_domain_unicode can
+// remain empty if the TLD is not well formed punycode.
+void GetTopLevelDomain(base::StringPiece host,
+ base::StringPiece* top_level_domain,
+ base::string16* top_level_domain_unicode) {
+ size_t last_dot = host.rfind('.');
+ if (last_dot == base::StringPiece::npos)
+ return;
+
+ *top_level_domain = host.substr(last_dot + 1);
+ base::string16 tld16;
+ tld16.reserve(top_level_domain->length());
+ tld16.insert(tld16.end(), top_level_domain->begin(), top_level_domain->end());
+
+ // Convert the TLD to unicode with the spoof checks disabled.
+ bool tld_has_idn_component = false;
+ IDNToUnicodeOneComponent(tld16.data(), tld16.size(), std::string(),
+ base::string16(), false /* enable_spoof_checks */,
+ top_level_domain_unicode, &tld_has_idn_component);
+}
+
IDNConversionResult IDNToUnicodeWithAdjustmentsImpl(
base::StringPiece host,
base::OffsetAdjuster::Adjustments* adjustments,
@@ -241,27 +264,25 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl(
if (adjustments)
adjustments->clear();
// Convert the ASCII input to a base::string16 for ICU.
- base::string16 input16;
- input16.reserve(host.length());
- input16.insert(input16.end(), host.begin(), host.end());
+ base::string16 host16;
+ host16.reserve(host.length());
+ host16.insert(host16.end(), host.begin(), host.end());
+ // Compute the top level domain to be used in spoof checks later.
base::StringPiece top_level_domain;
- size_t last_dot = host.rfind('.');
- if (last_dot != base::StringPiece::npos) {
- top_level_domain = host.substr(last_dot);
- }
+ base::string16 top_level_domain_unicode;
+ GetTopLevelDomain(host, &top_level_domain, &top_level_domain_unicode);
IDNConversionResult result;
// Do each component of the host separately, since we enforce script matching
// on a per-component basis.
base::string16 out16;
for (size_t component_start = 0, component_end;
- component_start < input16.length();
- component_start = component_end + 1) {
+ component_start < host16.length(); component_start = component_end + 1) {
// Find the end of the component.
- component_end = input16.find('.', component_start);
+ component_end = host16.find('.', component_start);
if (component_end == base::string16::npos)
- component_end = input16.length(); // For getting the last component.
+ component_end = host16.length(); // For getting the last component.
size_t component_length = component_end - component_start;
size_t new_component_start = out16.length();
bool converted_idn = false;
@@ -269,8 +290,9 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl(
// Add the substring that we just found.
bool has_idn_component = false;
converted_idn = IDNToUnicodeOneComponent(
- input16.data() + component_start, component_length, top_level_domain,
- enable_spoof_checks, &out16, &has_idn_component);
+ host16.data() + component_start, component_length, top_level_domain,
+ top_level_domain_unicode, enable_spoof_checks, &out16,
+ &has_idn_component);
result.has_idn_component |= has_idn_component;
}
size_t new_component_length = out16.length() - new_component_start;
@@ -281,7 +303,7 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl(
}
// Need to add the dot we just found (if we found one).
- if (component_end < input16.length())
+ if (component_end < host16.length())
out16.push_back('.');
}
@@ -294,7 +316,7 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl(
if (enable_spoof_checks && !result.matching_top_domain.empty()) {
if (adjustments)
adjustments->clear();
- result.result = input16;
+ result.result = host16;
}
}
@@ -320,9 +342,10 @@ IDNConversionResult UnsafeIDNToUnicodeWithAdjustments(
// all even though it's possible to make up look-alike labels with ASCII
// characters alone.
bool IsIDNComponentSafe(base::StringPiece16 label,
- base::StringPiece top_level_domain) {
- return g_idn_spoof_checker.Get().SafeToDisplayAsUnicode(label,
- top_level_domain);
+ base::StringPiece top_level_domain,
+ base::StringPiece16 top_level_domain_unicode) {
+ return g_idn_spoof_checker.Get().SafeToDisplayAsUnicode(
+ label, top_level_domain, top_level_domain_unicode);
}
// A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to
@@ -375,6 +398,7 @@ base::LazyInstance<UIDNAWrapper>::Leaky g_uidna = LAZY_INSTANCE_INITIALIZER;
bool IDNToUnicodeOneComponent(const base::char16* comp,
size_t comp_len,
base::StringPiece top_level_domain,
+ base::StringPiece16 top_level_domain_unicode,
bool enable_spoof_checks,
base::string16* out,
bool* has_idn_component) {
@@ -411,8 +435,10 @@ bool IDNToUnicodeOneComponent(const base::char16* comp,
if (U_SUCCESS(status) && info.errors == 0) {
*has_idn_component = true;
- // Converted successfully. Ensure that the converted component
- // can be safely displayed to the user.
+ // Converted successfully. At this point the length of the output string
+ // is original_length + output_length which may be shorter than the current
+ // length of |out|. Trim |out| and ensure that the converted component can
+ // be safely displayed to the user.
out->resize(original_length + output_length);
if (!enable_spoof_checks) {
return true;
@@ -420,7 +446,7 @@ bool IDNToUnicodeOneComponent(const base::char16* comp,
if (IsIDNComponentSafe(
base::StringPiece16(out->data() + original_length,
base::checked_cast<size_t>(output_length)),
- top_level_domain)) {
+ top_level_domain, top_level_domain_unicode)) {
return true;
}
}