diff options
Diffstat (limited to 'chromium/components/url_formatter/url_formatter.cc')
-rw-r--r-- | chromium/components/url_formatter/url_formatter.cc | 68 |
1 files changed, 47 insertions, 21 deletions
diff --git a/chromium/components/url_formatter/url_formatter.cc b/chromium/components/url_formatter/url_formatter.cc index e7d8f37ce8e..4aada4a7ac4 100644 --- a/chromium/components/url_formatter/url_formatter.cc +++ b/chromium/components/url_formatter/url_formatter.cc @@ -35,6 +35,7 @@ IDNConversionResult IDNToUnicodeWithAdjustments( bool IDNToUnicodeOneComponent(const base::char16* comp, size_t comp_len, base::StringPiece top_level_domain, + base::StringPiece16 top_level_domain_unicode, bool enable_spoof_checks, base::string16* out, bool* has_idn_component); @@ -234,6 +235,28 @@ base::string16 FormatViewSourceUrl( base::LazyInstance<IDNSpoofChecker>::Leaky g_idn_spoof_checker = LAZY_INSTANCE_INITIALIZER; +// Computes the top level domain from |host|. top_level_domain_unicode will +// contain the unicode version of top_level_domain. top_level_domain_unicode can +// remain empty if the TLD is not well formed punycode. +void GetTopLevelDomain(base::StringPiece host, + base::StringPiece* top_level_domain, + base::string16* top_level_domain_unicode) { + size_t last_dot = host.rfind('.'); + if (last_dot == base::StringPiece::npos) + return; + + *top_level_domain = host.substr(last_dot + 1); + base::string16 tld16; + tld16.reserve(top_level_domain->length()); + tld16.insert(tld16.end(), top_level_domain->begin(), top_level_domain->end()); + + // Convert the TLD to unicode with the spoof checks disabled. + bool tld_has_idn_component = false; + IDNToUnicodeOneComponent(tld16.data(), tld16.size(), std::string(), + base::string16(), false /* enable_spoof_checks */, + top_level_domain_unicode, &tld_has_idn_component); +} + IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( base::StringPiece host, base::OffsetAdjuster::Adjustments* adjustments, @@ -241,27 +264,25 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( if (adjustments) adjustments->clear(); // Convert the ASCII input to a base::string16 for ICU. - base::string16 input16; - input16.reserve(host.length()); - input16.insert(input16.end(), host.begin(), host.end()); + base::string16 host16; + host16.reserve(host.length()); + host16.insert(host16.end(), host.begin(), host.end()); + // Compute the top level domain to be used in spoof checks later. base::StringPiece top_level_domain; - size_t last_dot = host.rfind('.'); - if (last_dot != base::StringPiece::npos) { - top_level_domain = host.substr(last_dot); - } + base::string16 top_level_domain_unicode; + GetTopLevelDomain(host, &top_level_domain, &top_level_domain_unicode); IDNConversionResult result; // Do each component of the host separately, since we enforce script matching // on a per-component basis. base::string16 out16; for (size_t component_start = 0, component_end; - component_start < input16.length(); - component_start = component_end + 1) { + component_start < host16.length(); component_start = component_end + 1) { // Find the end of the component. - component_end = input16.find('.', component_start); + component_end = host16.find('.', component_start); if (component_end == base::string16::npos) - component_end = input16.length(); // For getting the last component. + component_end = host16.length(); // For getting the last component. size_t component_length = component_end - component_start; size_t new_component_start = out16.length(); bool converted_idn = false; @@ -269,8 +290,9 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( // Add the substring that we just found. bool has_idn_component = false; converted_idn = IDNToUnicodeOneComponent( - input16.data() + component_start, component_length, top_level_domain, - enable_spoof_checks, &out16, &has_idn_component); + host16.data() + component_start, component_length, top_level_domain, + top_level_domain_unicode, enable_spoof_checks, &out16, + &has_idn_component); result.has_idn_component |= has_idn_component; } size_t new_component_length = out16.length() - new_component_start; @@ -281,7 +303,7 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( } // Need to add the dot we just found (if we found one). - if (component_end < input16.length()) + if (component_end < host16.length()) out16.push_back('.'); } @@ -294,7 +316,7 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( if (enable_spoof_checks && !result.matching_top_domain.empty()) { if (adjustments) adjustments->clear(); - result.result = input16; + result.result = host16; } } @@ -320,9 +342,10 @@ IDNConversionResult UnsafeIDNToUnicodeWithAdjustments( // all even though it's possible to make up look-alike labels with ASCII // characters alone. bool IsIDNComponentSafe(base::StringPiece16 label, - base::StringPiece top_level_domain) { - return g_idn_spoof_checker.Get().SafeToDisplayAsUnicode(label, - top_level_domain); + base::StringPiece top_level_domain, + base::StringPiece16 top_level_domain_unicode) { + return g_idn_spoof_checker.Get().SafeToDisplayAsUnicode( + label, top_level_domain, top_level_domain_unicode); } // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to @@ -375,6 +398,7 @@ base::LazyInstance<UIDNAWrapper>::Leaky g_uidna = LAZY_INSTANCE_INITIALIZER; bool IDNToUnicodeOneComponent(const base::char16* comp, size_t comp_len, base::StringPiece top_level_domain, + base::StringPiece16 top_level_domain_unicode, bool enable_spoof_checks, base::string16* out, bool* has_idn_component) { @@ -411,8 +435,10 @@ bool IDNToUnicodeOneComponent(const base::char16* comp, if (U_SUCCESS(status) && info.errors == 0) { *has_idn_component = true; - // Converted successfully. Ensure that the converted component - // can be safely displayed to the user. + // Converted successfully. At this point the length of the output string + // is original_length + output_length which may be shorter than the current + // length of |out|. Trim |out| and ensure that the converted component can + // be safely displayed to the user. out->resize(original_length + output_length); if (!enable_spoof_checks) { return true; @@ -420,7 +446,7 @@ bool IDNToUnicodeOneComponent(const base::char16* comp, if (IsIDNComponentSafe( base::StringPiece16(out->data() + original_length, base::checked_cast<size_t>(output_length)), - top_level_domain)) { + top_level_domain, top_level_domain_unicode)) { return true; } } |