diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2021-09-01 11:08:40 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2021-10-01 12:16:21 +0000 |
commit | 03c549e0392f92c02536d3f86d5e1d8dfa3435ac (patch) | |
tree | fe49d170a929b34ba82cd10db1a0bd8e3760fa4b /chromium/url | |
parent | 5d013f5804a0d91fcf6c626b2d6fb6eca5c845b0 (diff) | |
download | qtwebengine-chromium-03c549e0392f92c02536d3f86d5e1d8dfa3435ac.tar.gz |
BASELINE: Update Chromium to 91.0.4472.160
Change-Id: I0def1f08a2412aeed79a9ab95dd50eb5c3f65f31
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/url')
40 files changed, 963 insertions, 490 deletions
diff --git a/chromium/url/BUILD.gn b/chromium/url/BUILD.gn index 73776036cd2..440aa18ffd7 100644 --- a/chromium/url/BUILD.gn +++ b/chromium/url/BUILD.gn @@ -140,7 +140,6 @@ if (is_android) { ] deps = [ "//base:base_java", - "//base:jni_java", "//third_party/androidx:androidx_annotation_annotation_java", ] annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ] @@ -291,7 +290,6 @@ if (is_android) { ":gurl_java", "//base:base_java", "//base:base_java_test_support", - "//base:jni_java", ] } @@ -313,7 +311,6 @@ if (is_android) { ":gurl_junit_test_support", "//base:base_java", "//base:base_java_test_support", - "//base:jni_java", "//content/public/test/android:content_java_test_support", "//third_party/android_support_test_runner:rules_java", "//third_party/android_support_test_runner:runner_java", diff --git a/chromium/url/gurl.cc b/chromium/url/gurl.cc index d6196695810..dc3fb21c3ae 100644 --- a/chromium/url/gurl.cc +++ b/chromium/url/gurl.cc @@ -7,6 +7,7 @@ #include <stddef.h> #include <algorithm> +#include <memory> #include <ostream> #include <utility> @@ -27,7 +28,7 @@ GURL::GURL(const GURL& other) is_valid_(other.is_valid_), parsed_(other.parsed_) { if (other.inner_url_) - inner_url_.reset(new GURL(*other.inner_url_)); + inner_url_ = std::make_unique<GURL>(*other.inner_url_); // Valid filesystem urls should always have an inner_url_. DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_); } @@ -68,8 +69,8 @@ GURL::GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid) InitializeFromCanonicalSpec(); } -template<typename STR> -void GURL::InitCanonical(base::BasicStringPiece<STR> input_spec, +template <typename CharT> +void GURL::InitCanonical(base::BasicStringPiece<CharT> input_spec, bool trim_path_end) { url::StdStringCanonOutput output(&spec_); is_valid_ = url::Canonicalize( @@ -78,8 +79,8 @@ void GURL::InitCanonical(base::BasicStringPiece<STR> input_spec, output.Complete(); // Must be done before using string. if (is_valid_ && SchemeIsFileSystem()) { - inner_url_.reset(new GURL(spec_.data(), parsed_.Length(), - *parsed_.inner_parsed(), true)); + inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(), + *parsed_.inner_parsed(), true); } // Valid URLs always have non-empty specs. DCHECK(!is_valid_ || !spec_.empty()); @@ -87,9 +88,8 @@ void GURL::InitCanonical(base::BasicStringPiece<STR> input_spec, void GURL::InitializeFromCanonicalSpec() { if (is_valid_ && SchemeIsFileSystem()) { - inner_url_.reset( - new GURL(spec_.data(), parsed_.Length(), - *parsed_.inner_parsed(), true)); + inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(), + *parsed_.inner_parsed(), true); } #ifndef NDEBUG @@ -139,7 +139,7 @@ GURL& GURL::operator=(const GURL& other) { else if (inner_url_) *inner_url_ = *other.inner_url_; else - inner_url_.reset(new GURL(*other.inner_url_)); + inner_url_ = std::make_unique<GURL>(*other.inner_url_); return *this; } @@ -190,9 +190,9 @@ GURL GURL::Resolve(base::StringPiece relative) const { output.Complete(); result.is_valid_ = true; if (result.SchemeIsFileSystem()) { - result.inner_url_.reset( - new GURL(result.spec_.data(), result.parsed_.Length(), - *result.parsed_.inner_parsed(), true)); + result.inner_url_ = + std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(), + *result.parsed_.inner_parsed(), true); } return result; } @@ -216,9 +216,9 @@ GURL GURL::Resolve(base::StringPiece16 relative) const { output.Complete(); result.is_valid_ = true; if (result.SchemeIsFileSystem()) { - result.inner_url_.reset( - new GURL(result.spec_.data(), result.parsed_.Length(), - *result.parsed_.inner_parsed(), true)); + result.inner_url_ = + std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(), + *result.parsed_.inner_parsed(), true); } return result; } @@ -239,16 +239,16 @@ GURL GURL::ReplaceComponents( output.Complete(); if (result.is_valid_ && result.SchemeIsFileSystem()) { - result.inner_url_.reset(new GURL(result.spec_.data(), - result.parsed_.Length(), - *result.parsed_.inner_parsed(), true)); + result.inner_url_ = + std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(), + *result.parsed_.inner_parsed(), true); } return result; } // Note: code duplicated above (it's inconvenient to use a template here). GURL GURL::ReplaceComponents( - const url::Replacements<base::char16>& replacements) const { + const url::Replacements<char16_t>& replacements) const { GURL result; // Not allowed for invalid URLs. @@ -262,9 +262,9 @@ GURL GURL::ReplaceComponents( output.Complete(); if (result.is_valid_ && result.SchemeIsFileSystem()) { - result.inner_url_.reset(new GURL(result.spec_.data(), - result.parsed_.Length(), - *result.parsed_.inner_parsed(), true)); + result.inner_url_ = + std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(), + *result.parsed_.inner_parsed(), true); } return result; } diff --git a/chromium/url/gurl.h b/chromium/url/gurl.h index baa2ad328bc..7dcf1d55b05 100644 --- a/chromium/url/gurl.h +++ b/chromium/url/gurl.h @@ -13,7 +13,6 @@ #include "base/component_export.h" #include "base/debug/alias.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "third_party/perfetto/include/perfetto/tracing/traced_value_forward.h" #include "url/third_party/mozilla/url_parse.h" @@ -46,8 +45,8 @@ // will know to escape this and produce the desired result. class COMPONENT_EXPORT(URL) GURL { public: - typedef url::StringPieceReplacements<std::string> Replacements; - typedef url::StringPieceReplacements<base::string16> ReplacementsW; + typedef url::StringPieceReplacements<char> Replacements; + typedef url::StringPieceReplacements<char16_t> ReplacementsW; // Creates an empty, invalid URL. GURL(); @@ -167,8 +166,7 @@ class COMPONENT_EXPORT(URL) GURL { // Note that we use the more general url::Replacements type to give // callers extra flexibility rather than our override. GURL ReplaceComponents(const url::Replacements<char>& replacements) const; - GURL ReplaceComponents( - const url::Replacements<base::char16>& replacements) const; + GURL ReplaceComponents(const url::Replacements<char16_t>& replacements) const; // A helper function that is equivalent to replacing the path with a slash // and clearing out everything after that. We sometimes need to know just the @@ -450,8 +448,8 @@ class COMPONENT_EXPORT(URL) GURL { enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE }; GURL(const std::string& url_string, RetainWhiteSpaceSelector); - template<typename STR> - void InitCanonical(base::BasicStringPiece<STR> input_spec, + template <typename CharT> + void InitCanonical(base::BasicStringPiece<CharT> input_spec, bool trim_path_end); void InitializeFromCanonicalSpec(); diff --git a/chromium/url/gurl_fuzzer.cc b/chromium/url/gurl_fuzzer.cc index c5c22a68250..e3676ea1659 100644 --- a/chromium/url/gurl_fuzzer.cc +++ b/chromium/url/gurl_fuzzer.cc @@ -52,9 +52,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { CheckReplaceComponentsPreservesSpec(url_from_string_piece); } // Test for StringPiece16 if size is even. - if (size % 2 == 0) { + if (size % sizeof(char16_t) == 0) { base::StringPiece16 string_piece_input16( - reinterpret_cast<const base::char16*>(data), size / 2); + reinterpret_cast<const char16_t*>(data), size / sizeof(char16_t)); const GURL url_from_string_piece16(string_piece_input16); CheckIdempotency(url_from_string_piece16); CheckReplaceComponentsPreservesSpec(url_from_string_piece16); @@ -78,10 +78,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { url_from_string_piece_part.Resolve(relative_string); - if (relative_size % 2 == 0) { - base::string16 relative_string16( - reinterpret_cast<const base::char16*>(data + size_t_bytes), - relative_size / 2); + if (relative_size % sizeof(char16_t) == 0) { + std::u16string relative_string16( + reinterpret_cast<const char16_t*>(data + size_t_bytes), + relative_size / sizeof(char16_t)); url_from_string_piece_part.Resolve(relative_string16); } } diff --git a/chromium/url/gurl_unittest.cc b/chromium/url/gurl_unittest.cc index 33195138681..6d23d6534a7 100644 --- a/chromium/url/gurl_unittest.cc +++ b/chromium/url/gurl_unittest.cc @@ -68,11 +68,11 @@ TEST(GURLTest, Types) { // the parser is already tested and works, so we are mostly interested if the // object does the right thing with the results. TEST(GURLTest, Components) { - GURL empty_url(base::UTF8ToUTF16("")); + GURL empty_url(u""); EXPECT_TRUE(empty_url.is_empty()); EXPECT_FALSE(empty_url.is_valid()); - GURL url(base::UTF8ToUTF16("http://user:pass@google.com:99/foo;bar?q=a#ref")); + GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref"); EXPECT_FALSE(url.is_empty()); EXPECT_TRUE(url.is_valid()); EXPECT_TRUE(url.SchemeIs("http")); @@ -268,21 +268,49 @@ TEST(GURLTest, ExtraSlashesBeforeAuthority) { EXPECT_EQ("/", url.path()); } -// Given an invalid URL, we should still get most of the components. +// Given invalid URLs, we should still get most of the components. TEST(GURLTest, ComponentGettersWorkEvenForInvalidURL) { - GURL url("http:google.com:foo"); - EXPECT_FALSE(url.is_valid()); - EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec()); + constexpr struct InvalidURLTestExpectations { + const char* url; + const char* spec; + const char* scheme; + const char* host; + const char* port; + const char* path; + // Extend as needed... + } expectations[] = { + { + "http:google.com:foo", + "http://google.com:foo/", + "http", + "google.com", + "foo", + "/", + }, + { + "https:google.com:foo", + "https://google.com:foo/", + "https", + "google.com", + "foo", + "/", + }, + }; - EXPECT_EQ("http", url.scheme()); - EXPECT_EQ("", url.username()); - EXPECT_EQ("", url.password()); - EXPECT_EQ("google.com", url.host()); - EXPECT_EQ("foo", url.port()); - EXPECT_EQ(PORT_INVALID, url.IntPort()); - EXPECT_EQ("/", url.path()); - EXPECT_EQ("", url.query()); - EXPECT_EQ("", url.ref()); + for (const auto& e : expectations) { + const GURL url(e.url); + EXPECT_FALSE(url.is_valid()); + EXPECT_EQ(e.spec, url.possibly_invalid_spec()); + EXPECT_EQ(e.scheme, url.scheme()); + EXPECT_EQ("", url.username()); + EXPECT_EQ("", url.password()); + EXPECT_EQ(e.host, url.host()); + EXPECT_EQ(e.port, url.port()); + EXPECT_EQ(PORT_INVALID, url.IntPort()); + EXPECT_EQ(e.path, url.path()); + EXPECT_EQ("", url.query()); + EXPECT_EQ("", url.ref()); + } } TEST(GURLTest, Resolve) { @@ -314,6 +342,7 @@ TEST(GURLTest, Resolve) { // A non-standard base can be replaced with a standard absolute URL. {"data:blahblah", "http://google.com/", true, "http://google.com/"}, {"data:blahblah", "http:google.com", true, "http://google.com/"}, + {"data:blahblah", "https:google.com", true, "https://google.com/"}, // Filesystem URLs have different paths to test. {"filesystem:http://www.google.com/type/", "foo.html", true, "filesystem:http://www.google.com/type/foo.html"}, diff --git a/chromium/url/origin.h b/chromium/url/origin.h index 4f3e01790fb..ae6eacf2456 100644 --- a/chromium/url/origin.h +++ b/chromium/url/origin.h @@ -14,7 +14,6 @@ #include "base/debug/alias.h" #include "base/debug/crash_logging.h" #include "base/optional.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "base/strings/string_util.h" #include "base/unguessable_token.h" diff --git a/chromium/url/third_party/mozilla/url_parse.cc b/chromium/url/third_party/mozilla/url_parse.cc index 2b3003209ad..d882e3fd572 100644 --- a/chromium/url/third_party/mozilla/url_parse.cc +++ b/chromium/url/third_party/mozilla/url_parse.cc @@ -48,7 +48,7 @@ namespace url { namespace { // Returns true if the given character is a valid digit to use in a port. -inline bool IsPortDigit(base::char16 ch) { +inline bool IsPortDigit(char16_t ch) { return ch >= '0' && ch <= '9'; } @@ -812,13 +812,13 @@ bool ExtractScheme(const char* url, int url_len, Component* scheme) { return DoExtractScheme(url, url_len, scheme); } -bool ExtractScheme(const base::char16* url, int url_len, Component* scheme) { +bool ExtractScheme(const char16_t* url, int url_len, Component* scheme) { return DoExtractScheme(url, url_len, scheme); } // This handles everything that may be an authority terminator, including // backslash. For special backslash handling see DoParseAfterScheme. -bool IsAuthorityTerminator(base::char16 ch) { +bool IsAuthorityTerminator(char16_t ch) { return IsURLSlash(ch) || ch == '?' || ch == '#'; } @@ -828,7 +828,7 @@ void ExtractFileName(const char* url, DoExtractFileName(url, path, file_name); } -void ExtractFileName(const base::char16* url, +void ExtractFileName(const char16_t* url, const Component& path, Component* file_name) { DoExtractFileName(url, path, file_name); @@ -841,7 +841,7 @@ bool ExtractQueryKeyValue(const char* url, return DoExtractQueryKeyValue(url, query, key, value); } -bool ExtractQueryKeyValue(const base::char16* url, +bool ExtractQueryKeyValue(const char16_t* url, Component* query, Component* key, Component* value) { @@ -857,7 +857,7 @@ void ParseAuthority(const char* spec, DoParseAuthority(spec, auth, username, password, hostname, port_num); } -void ParseAuthority(const base::char16* spec, +void ParseAuthority(const char16_t* spec, const Component& auth, Component* username, Component* password, @@ -870,7 +870,7 @@ int ParsePort(const char* url, const Component& port) { return DoParsePort(url, port); } -int ParsePort(const base::char16* url, const Component& port) { +int ParsePort(const char16_t* url, const Component& port) { return DoParsePort(url, port); } @@ -878,7 +878,7 @@ void ParseStandardURL(const char* url, int url_len, Parsed* parsed) { DoParseStandardURL(url, url_len, parsed); } -void ParseStandardURL(const base::char16* url, int url_len, Parsed* parsed) { +void ParseStandardURL(const char16_t* url, int url_len, Parsed* parsed) { DoParseStandardURL(url, url_len, parsed); } @@ -889,7 +889,7 @@ void ParsePathURL(const char* url, DoParsePathURL(url, url_len, trim_path_end, parsed); } -void ParsePathURL(const base::char16* url, +void ParsePathURL(const char16_t* url, int url_len, bool trim_path_end, Parsed* parsed) { @@ -900,7 +900,7 @@ void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed) { DoParseFileSystemURL(url, url_len, parsed); } -void ParseFileSystemURL(const base::char16* url, int url_len, Parsed* parsed) { +void ParseFileSystemURL(const char16_t* url, int url_len, Parsed* parsed) { DoParseFileSystemURL(url, url_len, parsed); } @@ -908,7 +908,7 @@ void ParseMailtoURL(const char* url, int url_len, Parsed* parsed) { DoParseMailtoURL(url, url_len, parsed); } -void ParseMailtoURL(const base::char16* url, int url_len, Parsed* parsed) { +void ParseMailtoURL(const char16_t* url, int url_len, Parsed* parsed) { DoParseMailtoURL(url, url_len, parsed); } @@ -920,7 +920,7 @@ void ParsePathInternal(const char* spec, ParsePath(spec, path, filepath, query, ref); } -void ParsePathInternal(const base::char16* spec, +void ParsePathInternal(const char16_t* spec, const Component& path, Component* filepath, Component* query, @@ -935,7 +935,7 @@ void ParseAfterScheme(const char* spec, DoParseAfterScheme(spec, spec_len, after_scheme, parsed); } -void ParseAfterScheme(const base::char16* spec, +void ParseAfterScheme(const char16_t* spec, int spec_len, int after_scheme, Parsed* parsed) { diff --git a/chromium/url/third_party/mozilla/url_parse.h b/chromium/url/third_party/mozilla/url_parse.h index 8a1c823d4f8..b662022b433 100644 --- a/chromium/url/third_party/mozilla/url_parse.h +++ b/chromium/url/third_party/mozilla/url_parse.h @@ -6,7 +6,6 @@ #define URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_ #include "base/component_export.h" -#include "base/strings/string16.h" namespace url { @@ -202,7 +201,7 @@ struct COMPONENT_EXPORT(URL) Parsed { void clear_inner_parsed() { if (inner_parsed_) { delete inner_parsed_; - inner_parsed_ = NULL; + inner_parsed_ = nullptr; } } @@ -230,7 +229,7 @@ struct COMPONENT_EXPORT(URL) Parsed { COMPONENT_EXPORT(URL) void ParseStandardURL(const char* url, int url_len, Parsed* parsed); COMPONENT_EXPORT(URL) -void ParseStandardURL(const base::char16* url, int url_len, Parsed* parsed); +void ParseStandardURL(const char16_t* url, int url_len, Parsed* parsed); // PathURL is for when the scheme is known not to have an authority (host) // section but that aren't file URLs either. The scheme is parsed, and @@ -242,7 +241,7 @@ void ParsePathURL(const char* url, bool trim_path_end, Parsed* parsed); COMPONENT_EXPORT(URL) -void ParsePathURL(const base::char16* url, +void ParsePathURL(const char16_t* url, int url_len, bool trim_path_end, Parsed* parsed); @@ -252,19 +251,19 @@ void ParsePathURL(const base::char16* url, COMPONENT_EXPORT(URL) void ParseFileURL(const char* url, int url_len, Parsed* parsed); COMPONENT_EXPORT(URL) -void ParseFileURL(const base::char16* url, int url_len, Parsed* parsed); +void ParseFileURL(const char16_t* url, int url_len, Parsed* parsed); // Filesystem URLs are structured differently than other URLs. COMPONENT_EXPORT(URL) void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed); COMPONENT_EXPORT(URL) -void ParseFileSystemURL(const base::char16* url, int url_len, Parsed* parsed); +void ParseFileSystemURL(const char16_t* url, int url_len, Parsed* parsed); // MailtoURL is for mailto: urls. They are made up scheme,path,query COMPONENT_EXPORT(URL) void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); COMPONENT_EXPORT(URL) -void ParseMailtoURL(const base::char16* url, int url_len, Parsed* parsed); +void ParseMailtoURL(const char16_t* url, int url_len, Parsed* parsed); // Helper functions ----------------------------------------------------------- @@ -291,11 +290,11 @@ void ParseMailtoURL(const base::char16* url, int url_len, Parsed* parsed); COMPONENT_EXPORT(URL) bool ExtractScheme(const char* url, int url_len, Component* scheme); COMPONENT_EXPORT(URL) -bool ExtractScheme(const base::char16* url, int url_len, Component* scheme); +bool ExtractScheme(const char16_t* url, int url_len, Component* scheme); // Returns true if ch is a character that terminates the authority segment // of a URL. -COMPONENT_EXPORT(URL) bool IsAuthorityTerminator(base::char16 ch); +COMPONENT_EXPORT(URL) bool IsAuthorityTerminator(char16_t ch); // Does a best effort parse of input |spec|, in range |auth|. If a particular // component is not found, it will be set to invalid. @@ -307,7 +306,7 @@ void ParseAuthority(const char* spec, Component* hostname, Component* port_num); COMPONENT_EXPORT(URL) -void ParseAuthority(const base::char16* spec, +void ParseAuthority(const char16_t* spec, const Component& auth, Component* username, Component* password, @@ -323,7 +322,7 @@ void ParseAuthority(const base::char16* spec, enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; COMPONENT_EXPORT(URL) int ParsePort(const char* url, const Component& port); COMPONENT_EXPORT(URL) -int ParsePort(const base::char16* url, const Component& port); +int ParsePort(const char16_t* url, const Component& port); // Extracts the range of the file name in the given url. The path must // already have been computed by the parse function, and the matching URL @@ -340,7 +339,7 @@ void ExtractFileName(const char* url, const Component& path, Component* file_name); COMPONENT_EXPORT(URL) -void ExtractFileName(const base::char16* url, +void ExtractFileName(const char16_t* url, const Component& path, Component* file_name); @@ -365,7 +364,7 @@ bool ExtractQueryKeyValue(const char* url, Component* key, Component* value); COMPONENT_EXPORT(URL) -bool ExtractQueryKeyValue(const base::char16* url, +bool ExtractQueryKeyValue(const char16_t* url, Component* query, Component* key, Component* value); diff --git a/chromium/url/url_canon.cc b/chromium/url/url_canon.cc index cde280d2be3..6cfdd23aed8 100644 --- a/chromium/url/url_canon.cc +++ b/chromium/url/url_canon.cc @@ -10,6 +10,6 @@ namespace url { template class EXPORT_TEMPLATE_DEFINE(COMPONENT_EXPORT(URL)) CanonOutputT<char>; template class EXPORT_TEMPLATE_DEFINE(COMPONENT_EXPORT(URL)) - CanonOutputT<base::char16>; + CanonOutputT<char16_t>; } // namespace url diff --git a/chromium/url/url_canon.h b/chromium/url/url_canon.h index b6d7533d0a7..83d4e016caf 100644 --- a/chromium/url/url_canon.h +++ b/chromium/url/url_canon.h @@ -8,9 +8,10 @@ #include <stdlib.h> #include <string.h> +#include <string> + #include "base/component_export.h" #include "base/export_template.h" -#include "base/strings/string16.h" #include "url/third_party/mozilla/url_parse.h" namespace url { @@ -178,18 +179,18 @@ class RawCanonOutputT : public CanonOutputT<T> { extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL)) CanonOutputT<char>; extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL)) - CanonOutputT<base::char16>; + CanonOutputT<char16_t>; // Normally, all canonicalization output is in narrow characters. We support // the templates so it can also be used internally if a wide buffer is // required. typedef CanonOutputT<char> CanonOutput; -typedef CanonOutputT<base::char16> CanonOutputW; +typedef CanonOutputT<char16_t> CanonOutputW; template<int fixed_capacity> class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {}; -template<int fixed_capacity> -class RawCanonOutputW : public RawCanonOutputT<base::char16, fixed_capacity> {}; +template <int fixed_capacity> +class RawCanonOutputW : public RawCanonOutputT<char16_t, fixed_capacity> {}; // Character set converter ---------------------------------------------------- // @@ -215,7 +216,7 @@ class COMPONENT_EXPORT(URL) CharsetConverter { // decimal, (such as "你") with escaping of the ampersand, number // sign, and semicolon (in the previous example it would be // "%26%2320320%3B"). This rule is based on what IE does in this situation. - virtual void ConvertFromUTF16(const base::char16* input, + virtual void ConvertFromUTF16(const char16_t* input, int input_len, CanonOutput* output) = 0; }; @@ -273,11 +274,11 @@ const char* RemoveURLWhitespace(const char* input, int* output_len, bool* potentially_dangling_markup); COMPONENT_EXPORT(URL) -const base::char16* RemoveURLWhitespace(const base::char16* input, - int input_len, - CanonOutputT<base::char16>* buffer, - int* output_len, - bool* potentially_dangling_markup); +const char16_t* RemoveURLWhitespace(const char16_t* input, + int input_len, + CanonOutputT<char16_t>* buffer, + int* output_len, + bool* potentially_dangling_markup); // IDN ------------------------------------------------------------------------ @@ -291,7 +292,7 @@ const base::char16* RemoveURLWhitespace(const base::char16* input, // // On error, returns false. The output in this case is undefined. COMPONENT_EXPORT(URL) -bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output); +bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output); // Piece-by-piece canonicalizers ---------------------------------------------- // @@ -323,7 +324,7 @@ bool CanonicalizeScheme(const char* spec, CanonOutput* output, Component* out_scheme); COMPONENT_EXPORT(URL) -bool CanonicalizeScheme(const base::char16* spec, +bool CanonicalizeScheme(const char16_t* spec, const Component& scheme, CanonOutput* output, Component* out_scheme); @@ -347,9 +348,9 @@ bool CanonicalizeUserInfo(const char* username_source, Component* out_username, Component* out_password); COMPONENT_EXPORT(URL) -bool CanonicalizeUserInfo(const base::char16* username_source, +bool CanonicalizeUserInfo(const char16_t* username_source, const Component& username, - const base::char16* password_source, + const char16_t* password_source, const Component& password, CanonOutput* output, Component* out_username, @@ -411,7 +412,7 @@ bool CanonicalizeHost(const char* spec, CanonOutput* output, Component* out_host); COMPONENT_EXPORT(URL) -bool CanonicalizeHost(const base::char16* spec, +bool CanonicalizeHost(const char16_t* spec, const Component& host, CanonOutput* output, Component* out_host); @@ -426,7 +427,7 @@ void CanonicalizeHostVerbose(const char* spec, CanonOutput* output, CanonHostInfo* host_info); COMPONENT_EXPORT(URL) -void CanonicalizeHostVerbose(const base::char16* spec, +void CanonicalizeHostVerbose(const char16_t* spec, const Component& host, CanonOutput* output, CanonHostInfo* host_info); @@ -456,7 +457,7 @@ bool CanonicalizeHostSubstring(const char* spec, const Component& host, CanonOutput* output); COMPONENT_EXPORT(URL) -bool CanonicalizeHostSubstring(const base::char16* spec, +bool CanonicalizeHostSubstring(const char16_t* spec, const Component& host, CanonOutput* output); @@ -476,7 +477,7 @@ void CanonicalizeIPAddress(const char* spec, CanonOutput* output, CanonHostInfo* host_info); COMPONENT_EXPORT(URL) -void CanonicalizeIPAddress(const base::char16* spec, +void CanonicalizeIPAddress(const char16_t* spec, const Component& host, CanonOutput* output, CanonHostInfo* host_info); @@ -493,7 +494,7 @@ bool CanonicalizePort(const char* spec, CanonOutput* output, Component* out_port); COMPONENT_EXPORT(URL) -bool CanonicalizePort(const base::char16* spec, +bool CanonicalizePort(const char16_t* spec, const Component& port, int default_port_for_scheme, CanonOutput* output, @@ -519,11 +520,24 @@ bool CanonicalizePath(const char* spec, CanonOutput* output, Component* out_path); COMPONENT_EXPORT(URL) -bool CanonicalizePath(const base::char16* spec, +bool CanonicalizePath(const char16_t* spec, const Component& path, CanonOutput* output, Component* out_path); +// Like CanonicalizePath(), but does not assume that its operating on the +// entire path. It therefore does not prepend a slash, etc. +COMPONENT_EXPORT(URL) +bool CanonicalizePartialPath(const char* spec, + const Component& path, + CanonOutput* output, + Component* out_path); +COMPONENT_EXPORT(URL) +bool CanonicalizePartialPath(const char16_t* spec, + const Component& path, + CanonOutput* output, + Component* out_path); + // Canonicalizes the input as a file path. This is like CanonicalizePath except // that it also handles Windows drive specs. For example, the path can begin // with "c|\" and it will get properly canonicalized to "C:/". @@ -536,7 +550,7 @@ bool FileCanonicalizePath(const char* spec, CanonOutput* output, Component* out_path); COMPONENT_EXPORT(URL) -bool FileCanonicalizePath(const base::char16* spec, +bool FileCanonicalizePath(const char16_t* spec, const Component& path, CanonOutput* output, Component* out_path); @@ -560,7 +574,7 @@ void CanonicalizeQuery(const char* spec, CanonOutput* output, Component* out_query); COMPONENT_EXPORT(URL) -void CanonicalizeQuery(const base::char16* spec, +void CanonicalizeQuery(const char16_t* spec, const Component& query, CharsetConverter* converter, CanonOutput* output, @@ -578,7 +592,7 @@ void CanonicalizeRef(const char* spec, CanonOutput* output, Component* out_path); COMPONENT_EXPORT(URL) -void CanonicalizeRef(const base::char16* spec, +void CanonicalizeRef(const char16_t* spec, const Component& path, CanonOutput* output, Component* out_path); @@ -603,7 +617,7 @@ bool CanonicalizeStandardURL(const char* spec, CanonOutput* output, Parsed* new_parsed); COMPONENT_EXPORT(URL) -bool CanonicalizeStandardURL(const base::char16* spec, +bool CanonicalizeStandardURL(const char16_t* spec, int spec_len, const Parsed& parsed, SchemeType scheme_type, @@ -620,7 +634,7 @@ bool CanonicalizeFileURL(const char* spec, CanonOutput* output, Parsed* new_parsed); COMPONENT_EXPORT(URL) -bool CanonicalizeFileURL(const base::char16* spec, +bool CanonicalizeFileURL(const char16_t* spec, int spec_len, const Parsed& parsed, CharsetConverter* query_converter, @@ -636,7 +650,7 @@ bool CanonicalizeFileSystemURL(const char* spec, CanonOutput* output, Parsed* new_parsed); COMPONENT_EXPORT(URL) -bool CanonicalizeFileSystemURL(const base::char16* spec, +bool CanonicalizeFileSystemURL(const char16_t* spec, int spec_len, const Parsed& parsed, CharsetConverter* query_converter, @@ -652,12 +666,25 @@ bool CanonicalizePathURL(const char* spec, CanonOutput* output, Parsed* new_parsed); COMPONENT_EXPORT(URL) -bool CanonicalizePathURL(const base::char16* spec, +bool CanonicalizePathURL(const char16_t* spec, int spec_len, const Parsed& parsed, CanonOutput* output, Parsed* new_parsed); +// Use to canonicalize just the path component of a "path" URL; e.g. the +// path of a javascript URL. +COMPONENT_EXPORT(URL) +void CanonicalizePathURLPath(const char* source, + const Component& component, + CanonOutput* output, + Component* new_component); +COMPONENT_EXPORT(URL) +void CanonicalizePathURLPath(const char16_t* source, + const Component& component, + CanonOutput* output, + Component* new_component); + // Use for mailto URLs. This "canonicalizes" the URL into a path and query // component. It does not attempt to merge "to" fields. It uses UTF-8 for // the query encoding if there is a query. This is because a mailto URL is @@ -670,7 +697,7 @@ bool CanonicalizeMailtoURL(const char* spec, CanonOutput* output, Parsed* new_parsed); COMPONENT_EXPORT(URL) -bool CanonicalizeMailtoURL(const base::char16* spec, +bool CanonicalizeMailtoURL(const char16_t* spec, int spec_len, const Parsed& parsed, CanonOutput* output, @@ -869,7 +896,7 @@ bool ReplaceStandardURL(const char* base, COMPONENT_EXPORT(URL) bool ReplaceStandardURL(const char* base, const Parsed& base_parsed, - const Replacements<base::char16>& replacements, + const Replacements<char16_t>& replacements, SchemeType scheme_type, CharsetConverter* query_converter, CanonOutput* output, @@ -887,7 +914,7 @@ bool ReplaceFileSystemURL(const char* base, COMPONENT_EXPORT(URL) bool ReplaceFileSystemURL(const char* base, const Parsed& base_parsed, - const Replacements<base::char16>& replacements, + const Replacements<char16_t>& replacements, CharsetConverter* query_converter, CanonOutput* output, Parsed* new_parsed); @@ -904,7 +931,7 @@ bool ReplaceFileURL(const char* base, COMPONENT_EXPORT(URL) bool ReplaceFileURL(const char* base, const Parsed& base_parsed, - const Replacements<base::char16>& replacements, + const Replacements<char16_t>& replacements, CharsetConverter* query_converter, CanonOutput* output, Parsed* new_parsed); @@ -920,7 +947,7 @@ bool ReplacePathURL(const char* base, COMPONENT_EXPORT(URL) bool ReplacePathURL(const char* base, const Parsed& base_parsed, - const Replacements<base::char16>& replacements, + const Replacements<char16_t>& replacements, CanonOutput* output, Parsed* new_parsed); @@ -935,7 +962,7 @@ bool ReplaceMailtoURL(const char* base, COMPONENT_EXPORT(URL) bool ReplaceMailtoURL(const char* base, const Parsed& base_parsed, - const Replacements<base::char16>& replacements, + const Replacements<char16_t>& replacements, CanonOutput* output, Parsed* new_parsed); @@ -963,7 +990,7 @@ bool IsRelativeURL(const char* base, COMPONENT_EXPORT(URL) bool IsRelativeURL(const char* base, const Parsed& base_parsed, - const base::char16* fragment, + const char16_t* fragment, int fragment_len, bool is_base_hierarchical, bool* is_relative, @@ -1000,7 +1027,7 @@ COMPONENT_EXPORT(URL) bool ResolveRelativeURL(const char* base_url, const Parsed& base_parsed, bool base_is_file, - const base::char16* relative_url, + const char16_t* relative_url, const Component& relative_component, CharsetConverter* query_converter, CanonOutput* output, diff --git a/chromium/url/url_canon_etc.cc b/chromium/url/url_canon_etc.cc index 1ca9193ba71..7ddf1621026 100644 --- a/chromium/url/url_canon_etc.cc +++ b/chromium/url/url_canon_etc.cc @@ -329,16 +329,16 @@ const char* RemoveURLWhitespace(const char* input, potentially_dangling_markup); } -const base::char16* RemoveURLWhitespace(const base::char16* input, - int input_len, - CanonOutputT<base::char16>* buffer, - int* output_len, - bool* potentially_dangling_markup) { +const char16_t* RemoveURLWhitespace(const char16_t* input, + int input_len, + CanonOutputT<char16_t>* buffer, + int* output_len, + bool* potentially_dangling_markup) { return DoRemoveURLWhitespace(input, input_len, buffer, output_len, potentially_dangling_markup); } -char CanonicalSchemeChar(base::char16 ch) { +char CanonicalSchemeChar(char16_t ch) { if (ch >= 0x80) return 0; // Non-ASCII is not supported by schemes. return kSchemeCanonical[ch]; @@ -351,11 +351,11 @@ bool CanonicalizeScheme(const char* spec, return DoScheme<char, unsigned char>(spec, scheme, output, out_scheme); } -bool CanonicalizeScheme(const base::char16* spec, +bool CanonicalizeScheme(const char16_t* spec, const Component& scheme, CanonOutput* output, Component* out_scheme) { - return DoScheme<base::char16, base::char16>(spec, scheme, output, out_scheme); + return DoScheme<char16_t, char16_t>(spec, scheme, output, out_scheme); } bool CanonicalizeUserInfo(const char* username_source, @@ -370,16 +370,16 @@ bool CanonicalizeUserInfo(const char* username_source, output, out_username, out_password); } -bool CanonicalizeUserInfo(const base::char16* username_source, +bool CanonicalizeUserInfo(const char16_t* username_source, const Component& username, - const base::char16* password_source, + const char16_t* password_source, const Component& password, CanonOutput* output, Component* out_username, Component* out_password) { - return DoUserInfo<base::char16, base::char16>( - username_source, username, password_source, password, - output, out_username, out_password); + return DoUserInfo<char16_t, char16_t>(username_source, username, + password_source, password, output, + out_username, out_password); } bool CanonicalizePort(const char* spec, @@ -392,13 +392,13 @@ bool CanonicalizePort(const char* spec, output, out_port); } -bool CanonicalizePort(const base::char16* spec, +bool CanonicalizePort(const char16_t* spec, const Component& port, int default_port_for_scheme, CanonOutput* output, Component* out_port) { - return DoPort<base::char16, base::char16>(spec, port, default_port_for_scheme, - output, out_port); + return DoPort<char16_t, char16_t>(spec, port, default_port_for_scheme, output, + out_port); } void CanonicalizeRef(const char* spec, @@ -408,11 +408,11 @@ void CanonicalizeRef(const char* spec, DoCanonicalizeRef<char, unsigned char>(spec, ref, output, out_ref); } -void CanonicalizeRef(const base::char16* spec, +void CanonicalizeRef(const char16_t* spec, const Component& ref, CanonOutput* output, Component* out_ref) { - DoCanonicalizeRef<base::char16, base::char16>(spec, ref, output, out_ref); + DoCanonicalizeRef<char16_t, char16_t>(spec, ref, output, out_ref); } } // namespace url diff --git a/chromium/url/url_canon_filesystemurl.cc b/chromium/url/url_canon_filesystemurl.cc index 2cfaa29eafb..b36198a6bef 100644 --- a/chromium/url/url_canon_filesystemurl.cc +++ b/chromium/url/url_canon_filesystemurl.cc @@ -94,14 +94,14 @@ bool CanonicalizeFileSystemURL(const char* spec, new_parsed); } -bool CanonicalizeFileSystemURL(const base::char16* spec, +bool CanonicalizeFileSystemURL(const char16_t* spec, int spec_len, const Parsed& parsed, CharsetConverter* charset_converter, CanonOutput* output, Parsed* new_parsed) { - return DoCanonicalizeFileSystemURL<base::char16, base::char16>( - spec, URLComponentSource<base::char16>(spec), parsed, charset_converter, + return DoCanonicalizeFileSystemURL<char16_t, char16_t>( + spec, URLComponentSource<char16_t>(spec), parsed, charset_converter, output, new_parsed); } @@ -120,7 +120,7 @@ bool ReplaceFileSystemURL(const char* base, bool ReplaceFileSystemURL(const char* base, const Parsed& base_parsed, - const Replacements<base::char16>& replacements, + const Replacements<char16_t>& replacements, CharsetConverter* charset_converter, CanonOutput* output, Parsed* new_parsed) { diff --git a/chromium/url/url_canon_fileurl.cc b/chromium/url/url_canon_fileurl.cc index 067ed58c51a..2aa582470ba 100644 --- a/chromium/url/url_canon_fileurl.cc +++ b/chromium/url/url_canon_fileurl.cc @@ -133,15 +133,15 @@ bool CanonicalizeFileURL(const char* spec, output, new_parsed); } -bool CanonicalizeFileURL(const base::char16* spec, +bool CanonicalizeFileURL(const char16_t* spec, int spec_len, const Parsed& parsed, CharsetConverter* query_converter, CanonOutput* output, Parsed* new_parsed) { - return DoCanonicalizeFileURL<base::char16, base::char16>( - URLComponentSource<base::char16>(spec), parsed, query_converter, - output, new_parsed); + return DoCanonicalizeFileURL<char16_t, char16_t>( + URLComponentSource<char16_t>(spec), parsed, query_converter, output, + new_parsed); } bool FileCanonicalizePath(const char* spec, @@ -152,12 +152,12 @@ bool FileCanonicalizePath(const char* spec, output, out_path); } -bool FileCanonicalizePath(const base::char16* spec, +bool FileCanonicalizePath(const char16_t* spec, const Component& path, CanonOutput* output, Component* out_path) { - return DoFileCanonicalizePath<base::char16, base::char16>(spec, path, - output, out_path); + return DoFileCanonicalizePath<char16_t, char16_t>(spec, path, output, + out_path); } bool ReplaceFileURL(const char* base, @@ -175,7 +175,7 @@ bool ReplaceFileURL(const char* base, bool ReplaceFileURL(const char* base, const Parsed& base_parsed, - const Replacements<base::char16>& replacements, + const Replacements<char16_t>& replacements, CharsetConverter* query_converter, CanonOutput* output, Parsed* new_parsed) { diff --git a/chromium/url/url_canon_host.cc b/chromium/url/url_canon_host.cc index 819a7831fe0..0d178c714a4 100644 --- a/chromium/url/url_canon_host.cc +++ b/chromium/url/url_canon_host.cc @@ -3,8 +3,10 @@ // found in the LICENSE file. #include "base/check.h" +#include "base/metrics/histogram_macros.h" #include "url/url_canon.h" #include "url/url_canon_internal.h" +#include "url/url_canon_ip.h" namespace url { @@ -80,7 +82,7 @@ constexpr int kMaxHostBufferLength = kMaxHostLength*5; const int kTempHostBufferLen = 1024; typedef RawCanonOutputT<char, kTempHostBufferLen> StackBuffer; -typedef RawCanonOutputT<base::char16, kTempHostBufferLen> StackBufferW; +typedef RawCanonOutputT<char16_t, kTempHostBufferLen> StackBufferW; // Scans a host name and fills in the output flags according to what we find. // |has_non_ascii| will be true if there are any non-7-bit characters, and @@ -174,7 +176,7 @@ bool DoSimpleHost(const INCHAR* host, } // Canonicalizes a host that requires IDN conversion. Returns true on success -bool DoIDNHost(const base::char16* src, int src_len, CanonOutput* output) { +bool DoIDNHost(const char16_t* src, int src_len, CanonOutput* output) { int original_output_len = output->length(); // So we can rewind below. // We need to escape URL before doing IDN conversion, since punicode strings @@ -296,8 +298,11 @@ bool DoComplexHost(const char* host, int host_len, // UTF-16 convert host to its ASCII version. The set up is already ready for // the backend, so we just pass through. The has_escaped flag should be set if // the input string requires unescaping. -bool DoComplexHost(const base::char16* host, int host_len, - bool has_non_ascii, bool has_escaped, CanonOutput* output) { +bool DoComplexHost(const char16_t* host, + int host_len, + bool has_non_ascii, + bool has_escaped, + CanonOutput* output) { if (has_escaped) { // Yikes, we have escaped characters with wide input. The escaped // characters should be interpreted as UTF-8. To solve this problem, @@ -374,6 +379,16 @@ void DoHost(const CHAR* spec, if (host_info->IsIPAddress()) { output->set_length(output_begin); output->Append(canon_ip.data(), canon_ip.length()); + } else if (host_info->family == CanonHostInfo::NEUTRAL) { + // Only need to call CheckHostnameSafety() for valid hosts that aren't IP + // addresses and aren't broken. + HostSafetyStatus host_safety_status = CheckHostnameSafety(spec, host); + // Don't record kOK. Ratio of OK to not-OK statuses is not meaningful at + // this layer, and hostnames are canonicalized a lot. + if (host_safety_status != HostSafetyStatus::kOk) { + UMA_HISTOGRAM_ENUMERATION("Net.Url.HostSafetyStatus", + host_safety_status); + } } } else { // Canonicalization failed. Set BROKEN to notify the caller. @@ -395,12 +410,12 @@ bool CanonicalizeHost(const char* spec, return (host_info.family != CanonHostInfo::BROKEN); } -bool CanonicalizeHost(const base::char16* spec, +bool CanonicalizeHost(const char16_t* spec, const Component& host, CanonOutput* output, Component* out_host) { CanonHostInfo host_info; - DoHost<base::char16, base::char16>(spec, host, output, &host_info); + DoHost<char16_t, char16_t>(spec, host, output, &host_info); *out_host = host_info.out_host; return (host_info.family != CanonHostInfo::BROKEN); } @@ -412,11 +427,11 @@ void CanonicalizeHostVerbose(const char* spec, DoHost<char, unsigned char>(spec, host, output, host_info); } -void CanonicalizeHostVerbose(const base::char16* spec, +void CanonicalizeHostVerbose(const char16_t* spec, const Component& host, CanonOutput* output, CanonHostInfo* host_info) { - DoHost<base::char16, base::char16>(spec, host, output, host_info); + DoHost<char16_t, char16_t>(spec, host, output, host_info); } bool CanonicalizeHostSubstring(const char* spec, @@ -425,10 +440,10 @@ bool CanonicalizeHostSubstring(const char* spec, return DoHostSubstring<char, unsigned char>(spec, host, output); } -bool CanonicalizeHostSubstring(const base::char16* spec, +bool CanonicalizeHostSubstring(const char16_t* spec, const Component& host, CanonOutput* output) { - return DoHostSubstring<base::char16, base::char16>(spec, host, output); + return DoHostSubstring<char16_t, char16_t>(spec, host, output); } } // namespace url diff --git a/chromium/url/url_canon_icu.cc b/chromium/url/url_canon_icu.cc index a5a54e41c43..f1e05fdb5ea 100644 --- a/chromium/url/url_canon_icu.cc +++ b/chromium/url/url_canon_icu.cc @@ -9,7 +9,6 @@ #include <string.h> #include "base/check.h" -#include "base/i18n/uchar.h" #include "third_party/icu/source/common/unicode/ucnv.h" #include "third_party/icu/source/common/unicode/ucnv_cb.h" #include "third_party/icu/source/common/unicode/utypes.h" @@ -81,7 +80,7 @@ ICUCharsetConverter::ICUCharsetConverter(UConverter* converter) ICUCharsetConverter::~ICUCharsetConverter() = default; -void ICUCharsetConverter::ConvertFromUTF16(const base::char16* input, +void ICUCharsetConverter::ConvertFromUTF16(const char16_t* input, int input_len, CanonOutput* output) { // Install our error handler. It will be called for character that can not @@ -95,9 +94,8 @@ void ICUCharsetConverter::ConvertFromUTF16(const base::char16* input, do { UErrorCode err = U_ZERO_ERROR; char* dest = &output->data()[begin_offset]; - int required_capacity = - ucnv_fromUChars(converter_, dest, dest_capacity, - base::i18n::ToUCharPtr(input), input_len, &err); + int required_capacity = ucnv_fromUChars(converter_, dest, dest_capacity, + input, input_len, &err); if (err != U_BUFFER_OVERFLOW_ERROR) { output->set_length(begin_offset + required_capacity); return; diff --git a/chromium/url/url_canon_icu.h b/chromium/url/url_canon_icu.h index 367715119d2..e3b9aa0e3cd 100644 --- a/chromium/url/url_canon_icu.h +++ b/chromium/url/url_canon_icu.h @@ -26,7 +26,7 @@ class COMPONENT_EXPORT(URL) ICUCharsetConverter : public CharsetConverter { ~ICUCharsetConverter() override; - void ConvertFromUTF16(const base::char16* input, + void ConvertFromUTF16(const char16_t* input, int input_len, CanonOutput* output) override; diff --git a/chromium/url/url_canon_icu_unittest.cc b/chromium/url/url_canon_icu_unittest.cc index 4ce31d4eb2d..5e1fcf5e539 100644 --- a/chromium/url/url_canon_icu_unittest.cc +++ b/chromium/url/url_canon_icu_unittest.cc @@ -67,7 +67,7 @@ TEST(URLCanonIcuTest, ICUCharsetConverter) { std::string str; StdStringCanonOutput output(&str); - base::string16 input_str( + std::u16string input_str( test_utils::TruncateWStringToUTF16(icu_cases[i].input)); int input_len = static_cast<int>(input_str.length()); converter.ConvertFromUTF16(input_str.c_str(), input_len, &output); @@ -84,7 +84,7 @@ TEST(URLCanonIcuTest, ICUCharsetConverter) { ICUCharsetConverter converter(conv.converter()); for (int i = static_size - 2; i <= static_size + 2; i++) { // Make a string with the appropriate length. - base::string16 input; + std::u16string input; for (int ch = 0; ch < i; ch++) input.push_back('a'); @@ -138,7 +138,7 @@ TEST(URLCanonIcuTest, QueryWithConverter) { } if (query_cases[i].input16) { - base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(query_cases[i].input16)); int len = static_cast<int>(input16.length()); Component in_comp(0, len); diff --git a/chromium/url/url_canon_internal.cc b/chromium/url/url_canon_internal.cc index a727ca2b271..03e1ad1a955 100644 --- a/chromium/url/url_canon_internal.cc +++ b/chromium/url/url_canon_internal.cc @@ -85,7 +85,7 @@ void DoOverrideComponent(const char* override_source, // may get resized while we're overriding a subsequent component. Instead, the // caller should use the beginning of the |utf8_buffer| as the string pointer // for all components once all overrides have been prepared. -bool PrepareUTF16OverrideComponent(const base::char16* override_source, +bool PrepareUTF16OverrideComponent(const char16_t* override_source, const Component& override_component, CanonOutput* utf8_buffer, Component* dest_component) { @@ -233,7 +233,7 @@ const char kCharToHexLookup[8] = { 0, // 0xE0 - 0xFF }; -const base::char16 kUnicodeReplacementCharacter = 0xfffd; +const char16_t kUnicodeReplacementCharacter = 0xfffd; void AppendStringOfType(const char* source, int length, SharedCharTypes type, @@ -241,11 +241,11 @@ void AppendStringOfType(const char* source, int length, DoAppendStringOfType<char, unsigned char>(source, length, type, output); } -void AppendStringOfType(const base::char16* source, int length, +void AppendStringOfType(const char16_t* source, + int length, SharedCharTypes type, CanonOutput* output) { - DoAppendStringOfType<base::char16, base::char16>( - source, length, type, output); + DoAppendStringOfType<char16_t, char16_t>(source, length, type, output); } bool ReadUTFChar(const char* str, int* begin, int length, @@ -261,7 +261,9 @@ bool ReadUTFChar(const char* str, int* begin, int length, return true; } -bool ReadUTFChar(const base::char16* str, int* begin, int length, +bool ReadUTFChar(const char16_t* str, + int* begin, + int length, unsigned* code_point_out) { // This depends on ints and int32s being the same thing. If they're not, it // will fail to compile. @@ -279,13 +281,15 @@ void AppendInvalidNarrowString(const char* spec, int begin, int end, DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output); } -void AppendInvalidNarrowString(const base::char16* spec, int begin, int end, +void AppendInvalidNarrowString(const char16_t* spec, + int begin, + int end, CanonOutput* output) { - DoAppendInvalidNarrowString<base::char16, base::char16>( - spec, begin, end, output); + DoAppendInvalidNarrowString<char16_t, char16_t>(spec, begin, end, output); } -bool ConvertUTF16ToUTF8(const base::char16* input, int input_len, +bool ConvertUTF16ToUTF8(const char16_t* input, + int input_len, CanonOutput* output) { bool success = true; for (int i = 0; i < input_len; i++) { @@ -296,8 +300,9 @@ bool ConvertUTF16ToUTF8(const base::char16* input, int input_len, return success; } -bool ConvertUTF8ToUTF16(const char* input, int input_len, - CanonOutputT<base::char16>* output) { +bool ConvertUTF8ToUTF16(const char* input, + int input_len, + CanonOutputT<char16_t>* output) { bool success = true; for (int i = 0; i < input_len; i++) { unsigned code_point; @@ -339,14 +344,14 @@ void SetupOverrideComponents(const char* base, } bool SetupUTF16OverrideComponents(const char* base, - const Replacements<base::char16>& repl, + const Replacements<char16_t>& repl, CanonOutput* utf8_buffer, URLComponentSource<char>* source, Parsed* parsed) { bool success = true; // Get the source and parsed structures of the things we are replacing. - const URLComponentSource<base::char16>& repl_source = repl.sources(); + const URLComponentSource<char16_t>& repl_source = repl.sources(); const Parsed& repl_parsed = repl.components(); success &= PrepareUTF16OverrideComponent( @@ -408,7 +413,7 @@ int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix) { return 0; } -int _itow_s(int value, base::char16* buffer, size_t size_in_chars, int radix) { +int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix) { if (radix != 10) return EINVAL; @@ -422,7 +427,7 @@ int _itow_s(int value, base::char16* buffer, size_t size_in_chars, int radix) { } for (int i = 0; i < written; ++i) { - buffer[i] = static_cast<base::char16>(temp[i]); + buffer[i] = char16_t{temp[i]}; } buffer[written] = '\0'; return 0; diff --git a/chromium/url/url_canon_internal.h b/chromium/url/url_canon_internal.h index 53ae8dd3ffc..5ace5f2ce73 100644 --- a/chromium/url/url_canon_internal.h +++ b/chromium/url/url_canon_internal.h @@ -79,7 +79,8 @@ inline bool IsComponentChar(unsigned char c) { void AppendStringOfType(const char* source, int length, SharedCharTypes type, CanonOutput* output); -void AppendStringOfType(const base::char16* source, int length, +void AppendStringOfType(const char16_t* source, + int length, SharedCharTypes type, CanonOutput* output); @@ -123,7 +124,7 @@ inline int IsDot(const CHAR* spec, int offset, int end) { // required for relative URL resolving to test for scheme equality. // // Returns 0 if the input character is not a valid scheme character. -char CanonicalSchemeChar(base::char16 ch); +char CanonicalSchemeChar(char16_t ch); // Write a single character, escaped, to the output. This always escapes: it // does no checking that thee character requires escaping. @@ -138,7 +139,7 @@ inline void AppendEscapedChar(UINCHAR ch, } // The character we'll substitute for undecodable or invalid characters. -extern const base::char16 kUnicodeReplacementCharacter; +extern const char16_t kUnicodeReplacementCharacter; // UTF-8 functions ------------------------------------------------------------ @@ -229,19 +230,19 @@ inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) { // can be incremented in a loop and will be ready for the next character. // (for a single-16-bit-word character, it will not be changed). COMPONENT_EXPORT(URL) -bool ReadUTFChar(const base::char16* str, +bool ReadUTFChar(const char16_t* str, int* begin, int length, unsigned* code_point_out); // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method. inline void AppendUTF16Value(unsigned code_point, - CanonOutputT<base::char16>* output) { + CanonOutputT<char16_t>* output) { if (code_point > 0xffff) { - output->push_back(static_cast<base::char16>((code_point >> 10) + 0xd7c0)); - output->push_back(static_cast<base::char16>((code_point & 0x3ff) | 0xdc00)); + output->push_back(static_cast<char16_t>((code_point >> 10) + 0xd7c0)); + output->push_back(static_cast<char16_t>((code_point & 0x3ff) | 0xdc00)); } else { - output->push_back(static_cast<base::char16>(code_point)); + output->push_back(static_cast<char16_t>(code_point)); } } @@ -266,8 +267,10 @@ inline void AppendUTF16Value(unsigned code_point, // // Assumes that ch[begin] is within range in the array, but does not assume // that any following characters are. -inline bool AppendUTF8EscapedChar(const base::char16* str, int* begin, - int length, CanonOutput* output) { +inline bool AppendUTF8EscapedChar(const char16_t* str, + int* begin, + int length, + CanonOutput* output) { // UTF-16 input. ReadUTFChar will handle invalid characters for us and give // us the kUnicodeReplacementCharacter, so we don't have to do special // checking after failure, just pass through the failure to the caller. @@ -301,7 +304,7 @@ inline bool AppendUTF8EscapedChar(const char* str, int* begin, int length, inline bool Is8BitChar(char c) { return true; // this case is specialized to avoid a warning } -inline bool Is8BitChar(base::char16 c) { +inline bool Is8BitChar(char16_t c) { return c <= 255; } @@ -337,7 +340,9 @@ inline bool DecodeEscaped(const CHAR* spec, int* begin, int end, // the escaping rules are not guaranteed! void AppendInvalidNarrowString(const char* spec, int begin, int end, CanonOutput* output); -void AppendInvalidNarrowString(const base::char16* spec, int begin, int end, +void AppendInvalidNarrowString(const char16_t* spec, + int begin, + int end, CanonOutput* output); // Misc canonicalization helpers ---------------------------------------------- @@ -351,17 +356,17 @@ void AppendInvalidNarrowString(const base::char16* spec, int begin, int end, // return false in the failure case, and the caller should not continue as // normal. COMPONENT_EXPORT(URL) -bool ConvertUTF16ToUTF8(const base::char16* input, +bool ConvertUTF16ToUTF8(const char16_t* input, int input_len, CanonOutput* output); COMPONENT_EXPORT(URL) bool ConvertUTF8ToUTF16(const char* input, int input_len, - CanonOutputT<base::char16>* output); + CanonOutputT<char16_t>* output); // Converts from UTF-16 to 8-bit using the character set converter. If the // converter is NULL, this will use UTF-8. -void ConvertUTF16ToQueryEncoding(const base::char16* input, +void ConvertUTF16ToQueryEncoding(const char16_t* input, const Component& query, CharsetConverter* converter, CanonOutput* output); @@ -397,21 +402,21 @@ void SetupOverrideComponents(const char* base, // although we will have still done the override with "invalid characters" in // place of errors. bool SetupUTF16OverrideComponents(const char* base, - const Replacements<base::char16>& repl, + const Replacements<char16_t>& repl, CanonOutput* utf8_buffer, URLComponentSource<char>* source, Parsed* parsed); // Implemented in url_canon_path.cc, these are required by the relative URL // resolver as well, so we declare them here. -bool CanonicalizePartialPath(const char* spec, - const Component& path, - int path_begin_in_output, - CanonOutput* output); -bool CanonicalizePartialPath(const base::char16* spec, - const Component& path, - int path_begin_in_output, - CanonOutput* output); +bool CanonicalizePartialPathInternal(const char* spec, + const Component& path, + int path_begin_in_output, + CanonOutput* output); +bool CanonicalizePartialPathInternal(const char16_t* spec, + const Component& path, + int path_begin_in_output, + CanonOutput* output); #ifndef WIN32 @@ -419,7 +424,7 @@ bool CanonicalizePartialPath(const base::char16* spec, COMPONENT_EXPORT(URL) int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix); COMPONENT_EXPORT(URL) -int _itow_s(int value, base::char16* buffer, size_t size_in_chars, int radix); +int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix); // Secure template overloads for these functions template<size_t N> @@ -427,8 +432,8 @@ inline int _itoa_s(int value, char (&buffer)[N], int radix) { return _itoa_s(value, buffer, N, radix); } -template<size_t N> -inline int _itow_s(int value, base::char16 (&buffer)[N], int radix) { +template <size_t N> +inline int _itow_s(int value, char16_t (&buffer)[N], int radix) { return _itow_s(value, buffer, N, radix); } diff --git a/chromium/url/url_canon_ip.cc b/chromium/url/url_canon_ip.cc index 1859119f687..10e776d9e78 100644 --- a/chromium/url/url_canon_ip.cc +++ b/chromium/url/url_canon_ip.cc @@ -9,6 +9,8 @@ #include <limits> #include "base/check.h" +#include "base/strings/string_piece.h" +#include "base/strings/string_util.h" #include "url/url_canon_internal.h" namespace url { @@ -593,6 +595,105 @@ bool DoCanonicalizeIPv6Address(const CHAR* spec, return true; } +// Method to check if something looks like a number. Used instead of +// IPv4ComponentToNumber() so that it counts things that look like bad base-8 +// (e.g. 09). +// +// TODO(https://crbug.com/1149194): Remove this once the bug is fixed. +template <typename CHAR> +bool LooksLikeANumber(const CHAR* spec, const Component& component) { + // Empty components don't look like numbers. + if (!component.is_nonempty()) + return false; + + SharedCharTypes base = CHAR_DEC; + size_t start = component.begin; + if (component.len >= 2 && spec[start] == '0' && + (spec[start + 1] == 'x' || spec[start + 1] == 'X')) { + base = CHAR_HEX; + start += 2; + } + for (int i = start; i < component.end(); i++) { + if (!IsCharOfType(spec[i], base)) + return false; + } + return true; +} + +// Calculates the "HostSafetyStatus" of the provided hostname. +// +// TODO(https://crbug.com/1149194): Remove this once the bug is fixed. +template <typename CHAR> +HostSafetyStatus DoCheckHostnameSafety(const CHAR* spec, + const Component& host) { + if (!host.is_nonempty()) + return HostSafetyStatus::kOk; + + // Find the last two components. + + // Number of identified components. Stops after second component. Does not + // include the empty terminal component, if the host ends with a dot. + int existing_components = 0; + // Parsed component values. Populated last component first. + Component components[2]; + + // Index of the character after the end of the current component. + int cur_component_end = host.end(); + + // Ignore terminal dot, if there is one. + if (spec[cur_component_end - 1] == '.') { + cur_component_end--; + // Nothing else to do if the host is just a dot. + if (host.begin == cur_component_end) + return HostSafetyStatus::kOk; + } + + for (int i = cur_component_end; /* nothing */; i--) { + DCHECK_GE(i, host.begin); + + // If `i` is not the first character of the component, continue. + if (i != host.begin && spec[i - 1] != '.') + continue; + + // Otherwise, i is the index of the the start of a component. + components[existing_components] = Component(i, cur_component_end - i); + existing_components++; + + // Finished parsing last component. + if (i == host.begin) + break; + + // If there's anything left to parse after the 2th component, nothing more + // to do. + if (existing_components == 2) + break; + + // The next component ends before the dot at spec[i]. `i` will be + // decremented when restarting the loop, so no need to modify it. + cur_component_end = i - 1; + } + + // If the last value doesn't look like a number, no need to do more work, as + // IPv6 and hostnames with non-numeric final components are all considered OK. + if (!LooksLikeANumber(spec, components[0])) + return HostSafetyStatus::kOk; + + url::RawCanonOutputT<char> ignored_output; + CanonHostInfo host_info; + CanonicalizeIPAddress(spec, host, &ignored_output, &host_info); + // Ignore valid IPv4 addresses, and hostnames considered invalid by the IPv4 + // and IPv6 parsers. The IPv6 check doesn't provide a whole lot, but does mean + // things like "].6" will correctly be considered already invalid, so will + // return kOk. + if (host_info.family != CanonHostInfo::NEUTRAL) + return HostSafetyStatus::kOk; + + if (LooksLikeANumber(spec, components[1])) + return HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric; + + return HostSafetyStatus::kTopLevelDomainIsNumeric; +} + } // namespace void AppendIPv4Address(const unsigned char address[4], CanonOutput* output) { @@ -650,11 +751,10 @@ bool FindIPv4Components(const char* spec, return DoFindIPv4Components<char, unsigned char>(spec, host, components); } -bool FindIPv4Components(const base::char16* spec, +bool FindIPv4Components(const char16_t* spec, const Component& host, Component components[4]) { - return DoFindIPv4Components<base::char16, base::char16>( - spec, host, components); + return DoFindIPv4Components<char16_t, char16_t>(spec, host, components); } void CanonicalizeIPAddress(const char* spec, @@ -669,15 +769,15 @@ void CanonicalizeIPAddress(const char* spec, return; } -void CanonicalizeIPAddress(const base::char16* spec, +void CanonicalizeIPAddress(const char16_t* spec, const Component& host, CanonOutput* output, CanonHostInfo* host_info) { - if (DoCanonicalizeIPv4Address<base::char16, base::char16>( - spec, host, output, host_info)) + if (DoCanonicalizeIPv4Address<char16_t, char16_t>(spec, host, output, + host_info)) return; - if (DoCanonicalizeIPv6Address<base::char16, base::char16>( - spec, host, output, host_info)) + if (DoCanonicalizeIPv6Address<char16_t, char16_t>(spec, host, output, + host_info)) return; } @@ -688,12 +788,12 @@ CanonHostInfo::Family IPv4AddressToNumber(const char* spec, return DoIPv4AddressToNumber<char>(spec, host, address, num_ipv4_components); } -CanonHostInfo::Family IPv4AddressToNumber(const base::char16* spec, +CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec, const Component& host, unsigned char address[4], int* num_ipv4_components) { - return DoIPv4AddressToNumber<base::char16>( - spec, host, address, num_ipv4_components); + return DoIPv4AddressToNumber<char16_t>(spec, host, address, + num_ipv4_components); } bool IPv6AddressToNumber(const char* spec, @@ -702,10 +802,19 @@ bool IPv6AddressToNumber(const char* spec, return DoIPv6AddressToNumber<char, unsigned char>(spec, host, address); } -bool IPv6AddressToNumber(const base::char16* spec, +bool IPv6AddressToNumber(const char16_t* spec, const Component& host, unsigned char address[16]) { - return DoIPv6AddressToNumber<base::char16, base::char16>(spec, host, address); + return DoIPv6AddressToNumber<char16_t, char16_t>(spec, host, address); +} + +HostSafetyStatus CheckHostnameSafety(const char* spec, const Component& host) { + return DoCheckHostnameSafety(spec, host); +} + +HostSafetyStatus CheckHostnameSafety(const char16_t* spec, + const Component& host) { + return DoCheckHostnameSafety(spec, host); } } // namespace url diff --git a/chromium/url/url_canon_ip.h b/chromium/url/url_canon_ip.h index d3cad367ff2..32d8f8875d7 100644 --- a/chromium/url/url_canon_ip.h +++ b/chromium/url/url_canon_ip.h @@ -5,8 +5,10 @@ #ifndef URL_URL_CANON_IP_H_ #define URL_URL_CANON_IP_H_ +#include <string> + #include "base/component_export.h" -#include "base/strings/string16.h" +#include "base/strings/string_piece_forward.h" #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h" @@ -43,7 +45,7 @@ bool FindIPv4Components(const char* spec, const Component& host, Component components[4]); COMPONENT_EXPORT(URL) -bool FindIPv4Components(const base::char16* spec, +bool FindIPv4Components(const char16_t* spec, const Component& host, Component components[4]); @@ -64,7 +66,7 @@ CanonHostInfo::Family IPv4AddressToNumber(const char* spec, unsigned char address[4], int* num_ipv4_components); COMPONENT_EXPORT(URL) -CanonHostInfo::Family IPv4AddressToNumber(const base::char16* spec, +CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec, const Component& host, unsigned char address[4], int* num_ipv4_components); @@ -79,10 +81,52 @@ bool IPv6AddressToNumber(const char* spec, const Component& host, unsigned char address[16]); COMPONENT_EXPORT(URL) -bool IPv6AddressToNumber(const base::char16* spec, +bool IPv6AddressToNumber(const char16_t* spec, const Component& host, unsigned char address[16]); +// Temporary enum for collecting histograms at the DNS and URL level about +// hostname validity, for potentially updating the URL spec. +// +// This is used in histograms, so old values should not be reused, and new +// values should be added at the bottom. +// +// TODO(https://crbug.com/1149194): Remove this once the bug is fixed. +enum class HostSafetyStatus { + // Any canonical hostname that doesn't fit into any other class. IPv4 + // hostnames, hostnames that don't have numeric eTLDs, etc. Hostnames that are + // broken are also considered OK. + kOk = 0, + + // The top level domain looks numeric. This is basically means it either + // parses as a number per the URL spec, or is entirely numeric ("09" doesn't + // currently parse as a number, since the leading "0" indicates an octal + // value). + kTopLevelDomainIsNumeric = 1, + + // Both the top level domain and the next level domain look like a number, + // using the above definition. This is the case that is actually concerning - + // for these domains, the eTLD+1 is purely numeric, which means putting it as + // the hostname of a URL will potentially result in an IPv4 hostname. This is + // logically a subset of kTopLevelDomainIsNumeric, but when both apply, this + // label will be returned instead. + kTwoHighestLevelDomainsAreNumeric = 2, + + kMaxValue = kTwoHighestLevelDomainsAreNumeric, +}; + +// Calculates the HostSafetyStatus of a hostname. Hostname should have been +// canonicalized. This function is only intended to be temporary, to inform +// decisions around tightening up what the URL parser considers valid hostnames. +// +// TODO(https://crbug.com/1149194): Remove this once the bug is fixed. +COMPONENT_EXPORT(URL) +HostSafetyStatus CheckHostnameSafety(const char* hostname, + const Component& host); +COMPONENT_EXPORT(URL) +HostSafetyStatus CheckHostnameSafety(const char16_t* hostname, + const Component& host); + } // namespace url #endif // URL_URL_CANON_IP_H_ diff --git a/chromium/url/url_canon_mailtourl.cc b/chromium/url/url_canon_mailtourl.cc index 8a7ff1ae6b7..f4fe2b4e819 100644 --- a/chromium/url/url_canon_mailtourl.cc +++ b/chromium/url/url_canon_mailtourl.cc @@ -90,13 +90,13 @@ bool CanonicalizeMailtoURL(const char* spec, URLComponentSource<char>(spec), parsed, output, new_parsed); } -bool CanonicalizeMailtoURL(const base::char16* spec, +bool CanonicalizeMailtoURL(const char16_t* spec, int spec_len, const Parsed& parsed, CanonOutput* output, Parsed* new_parsed) { - return DoCanonicalizeMailtoURL<base::char16, base::char16>( - URLComponentSource<base::char16>(spec), parsed, output, new_parsed); + return DoCanonicalizeMailtoURL<char16_t, char16_t>( + URLComponentSource<char16_t>(spec), parsed, output, new_parsed); } bool ReplaceMailtoURL(const char* base, @@ -113,7 +113,7 @@ bool ReplaceMailtoURL(const char* base, bool ReplaceMailtoURL(const char* base, const Parsed& base_parsed, - const Replacements<base::char16>& replacements, + const Replacements<char16_t>& replacements, CanonOutput* output, Parsed* new_parsed) { RawCanonOutput<1024> utf8; diff --git a/chromium/url/url_canon_path.cc b/chromium/url/url_canon_path.cc index c457ea32e61..d6fb64b5fa7 100644 --- a/chromium/url/url_canon_path.cc +++ b/chromium/url/url_canon_path.cc @@ -20,7 +20,8 @@ enum CharacterFlags { // table below more clear when neither ESCAPE or UNESCAPE is set. PASS = 0, - // This character requires special handling in DoPartialPath. Doing this test + // This character requires special handling in DoPartialPathInternal. Doing + // this test // first allows us to filter out the common cases of regular characters that // can be directly copied. SPECIAL = 1, @@ -235,10 +236,8 @@ void CheckForNestedEscapes(const CHAR* spec, } } -// Appends the given path to the output. It assumes that if the input path -// starts with a slash, it should be copied to the output. If no path has -// already been appended to the output (the case when not resolving -// relative URLs), the path should begin with a slash. +// Canonicalizes and appends the given path to the output. It assumes that if +// the input path starts with a slash, it should be copied to the output. // // If there are already path components (this mode is used when appending // relative paths for resolving), it assumes that the output already has @@ -248,11 +247,11 @@ void CheckForNestedEscapes(const CHAR* spec, // We do not collapse multiple slashes in a row to a single slash. It seems // no web browsers do this, and we don't want incompatibilities, even though // it would be correct for most systems. -template<typename CHAR, typename UCHAR> -bool DoPartialPath(const CHAR* spec, - const Component& path, - int path_begin_in_output, - CanonOutput* output) { +template <typename CHAR, typename UCHAR> +bool DoPartialPathInternal(const CHAR* spec, + const Component& path, + int path_begin_in_output, + CanonOutput* output) { int end = path.end(); // We use this variable to minimize the amount of work done when unescaping -- @@ -279,16 +278,12 @@ bool DoPartialPath(const CHAR* spec, // Needs special handling of some sort. int dotlen; if ((dotlen = IsDot(spec, i, end)) > 0) { - // See if this dot was preceded by a slash in the output. We - // assume that when canonicalizing paths, they will always - // start with a slash and not a dot, so we don't have to - // bounds check the output. + // See if this dot was preceded by a slash in the output. // // Note that we check this in the case of dots so we don't have to // special case slashes. Since slashes are much more common than // dots, this actually increases performance measurably (though // slightly). - DCHECK(output->length() > path_begin_in_output); if (output->length() > path_begin_in_output && output->at(output->length() - 1) == '/') { // Slash followed by a dot, check to see if this is means relative @@ -382,6 +377,21 @@ bool DoPartialPath(const CHAR* spec, return success; } +// Perform the same logic as in DoPartialPathInternal(), but updates the +// publicly exposed CanonOutput structure similar to DoPath(). Returns +// true if successful. +template <typename CHAR, typename UCHAR> +bool DoPartialPath(const CHAR* spec, + const Component& path, + CanonOutput* output, + Component* out_path) { + out_path->begin = output->length(); + bool success = + DoPartialPathInternal<CHAR, UCHAR>(spec, path, out_path->begin, output); + out_path->len = output->length() - out_path->begin; + return success; +} + template<typename CHAR, typename UCHAR> bool DoPath(const CHAR* spec, const Component& path, @@ -397,7 +407,8 @@ bool DoPath(const CHAR* spec, if (!IsURLSlash(spec[path.begin])) output->push_back('/'); - success = DoPartialPath<CHAR, UCHAR>(spec, path, out_path->begin, output); + success = + DoPartialPathInternal<CHAR, UCHAR>(spec, path, out_path->begin, output); } else { // No input, canonical path is a slash. output->push_back('/'); @@ -415,28 +426,41 @@ bool CanonicalizePath(const char* spec, return DoPath<char, unsigned char>(spec, path, output, out_path); } -bool CanonicalizePath(const base::char16* spec, +bool CanonicalizePath(const char16_t* spec, const Component& path, CanonOutput* output, Component* out_path) { - return DoPath<base::char16, base::char16>(spec, path, output, out_path); + return DoPath<char16_t, char16_t>(spec, path, output, out_path); } bool CanonicalizePartialPath(const char* spec, const Component& path, - int path_begin_in_output, - CanonOutput* output) { - return DoPartialPath<char, unsigned char>(spec, path, path_begin_in_output, - output); + CanonOutput* output, + Component* out_path) { + return DoPartialPath<char, unsigned char>(spec, path, output, out_path); } -bool CanonicalizePartialPath(const base::char16* spec, +bool CanonicalizePartialPath(const char16_t* spec, const Component& path, - int path_begin_in_output, - CanonOutput* output) { - return DoPartialPath<base::char16, base::char16>(spec, path, - path_begin_in_output, - output); + CanonOutput* output, + Component* out_path) { + return DoPartialPath<char16_t, char16_t>(spec, path, output, out_path); +} + +bool CanonicalizePartialPathInternal(const char* spec, + const Component& path, + int path_begin_in_output, + CanonOutput* output) { + return DoPartialPathInternal<char, unsigned char>( + spec, path, path_begin_in_output, output); +} + +bool CanonicalizePartialPathInternal(const char16_t* spec, + const Component& path, + int path_begin_in_output, + CanonOutput* output) { + return DoPartialPathInternal<char16_t, char16_t>( + spec, path, path_begin_in_output, output); } } // namespace url diff --git a/chromium/url/url_canon_pathurl.cc b/chromium/url/url_canon_pathurl.cc index 7f30e608f5c..12c424829ee 100644 --- a/chromium/url/url_canon_pathurl.cc +++ b/chromium/url/url_canon_pathurl.cc @@ -88,13 +88,29 @@ bool CanonicalizePathURL(const char* spec, URLComponentSource<char>(spec), parsed, output, new_parsed); } -bool CanonicalizePathURL(const base::char16* spec, +bool CanonicalizePathURL(const char16_t* spec, int spec_len, const Parsed& parsed, CanonOutput* output, Parsed* new_parsed) { - return DoCanonicalizePathURL<base::char16, base::char16>( - URLComponentSource<base::char16>(spec), parsed, output, new_parsed); + return DoCanonicalizePathURL<char16_t, char16_t>( + URLComponentSource<char16_t>(spec), parsed, output, new_parsed); +} + +void CanonicalizePathURLPath(const char* source, + const Component& component, + CanonOutput* output, + Component* new_component) { + DoCanonicalizePathComponent<char, unsigned char>(source, component, '\0', + output, new_component); +} + +void CanonicalizePathURLPath(const char16_t* source, + const Component& component, + CanonOutput* output, + Component* new_component) { + DoCanonicalizePathComponent<char16_t, char16_t>(source, component, '\0', + output, new_component); } bool ReplacePathURL(const char* base, @@ -111,7 +127,7 @@ bool ReplacePathURL(const char* base, bool ReplacePathURL(const char* base, const Parsed& base_parsed, - const Replacements<base::char16>& replacements, + const Replacements<char16_t>& replacements, CanonOutput* output, Parsed* new_parsed) { RawCanonOutput<1024> utf8; diff --git a/chromium/url/url_canon_query.cc b/chromium/url/url_canon_query.cc index bf59d104ca1..b3a1118cdf2 100644 --- a/chromium/url/url_canon_query.cc +++ b/chromium/url/url_canon_query.cc @@ -82,7 +82,7 @@ void RunConverter(const char* spec, // Runs the converter with the given UTF-16 input. We don't have to do // anything, but this overridden function allows us to use the same code // for both UTF-8 and UTF-16 input. -void RunConverter(const base::char16* spec, +void RunConverter(const char16_t* spec, const Component& query, CharsetConverter* converter, CanonOutput* output) { @@ -144,21 +144,20 @@ void CanonicalizeQuery(const char* spec, output, out_query); } -void CanonicalizeQuery(const base::char16* spec, +void CanonicalizeQuery(const char16_t* spec, const Component& query, CharsetConverter* converter, CanonOutput* output, Component* out_query) { - DoCanonicalizeQuery<base::char16, base::char16>(spec, query, converter, - output, out_query); + DoCanonicalizeQuery<char16_t, char16_t>(spec, query, converter, output, + out_query); } -void ConvertUTF16ToQueryEncoding(const base::char16* input, +void ConvertUTF16ToQueryEncoding(const char16_t* input, const Component& query, CharsetConverter* converter, CanonOutput* output) { - DoConvertToQueryEncoding<base::char16, base::char16>(input, query, - converter, output); + DoConvertToQueryEncoding<char16_t, char16_t>(input, query, converter, output); } } // namespace url diff --git a/chromium/url/url_canon_relative.cc b/chromium/url/url_canon_relative.cc index 9db211a22ef..2552251ad2a 100644 --- a/chromium/url/url_canon_relative.cc +++ b/chromium/url/url_canon_relative.cc @@ -358,8 +358,8 @@ bool DoResolveRelativePath(const char* base_url, int path_begin = output->length(); CopyToLastSlash(base_url, base_path_begin, base_parsed.path.end(), output); - success &= CanonicalizePartialPath(relative_url, path, path_begin, - output); + success &= CanonicalizePartialPathInternal(relative_url, path, path_begin, + output); out_parsed->path = MakeRange(path_begin, output->length()); // Copy the rest of the stuff after the path from the relative path. @@ -581,14 +581,14 @@ bool IsRelativeURL(const char* base, bool IsRelativeURL(const char* base, const Parsed& base_parsed, - const base::char16* fragment, + const char16_t* fragment, int fragment_len, bool is_base_hierarchical, bool* is_relative, Component* relative_component) { - return DoIsRelativeURL<base::char16>( - base, base_parsed, fragment, fragment_len, is_base_hierarchical, - is_relative, relative_component); + return DoIsRelativeURL<char16_t>(base, base_parsed, fragment, fragment_len, + is_base_hierarchical, is_relative, + relative_component); } bool ResolveRelativeURL(const char* base_url, @@ -607,14 +607,14 @@ bool ResolveRelativeURL(const char* base_url, bool ResolveRelativeURL(const char* base_url, const Parsed& base_parsed, bool base_is_file, - const base::char16* relative_url, + const char16_t* relative_url, const Component& relative_component, CharsetConverter* query_converter, CanonOutput* output, Parsed* out_parsed) { - return DoResolveRelativeURL<base::char16>( - base_url, base_parsed, base_is_file, relative_url, - relative_component, query_converter, output, out_parsed); + return DoResolveRelativeURL<char16_t>(base_url, base_parsed, base_is_file, + relative_url, relative_component, + query_converter, output, out_parsed); } } // namespace url diff --git a/chromium/url/url_canon_stdstring.h b/chromium/url/url_canon_stdstring.h index c9a3feebbce..44edab7f56d 100644 --- a/chromium/url/url_canon_stdstring.h +++ b/chromium/url/url_canon_stdstring.h @@ -59,11 +59,11 @@ class COMPONENT_EXPORT(URL) StdStringCanonOutput : public CanonOutput { // references to std::strings. // Note: Extra const char* overloads are necessary to break ambiguities that // would otherwise exist for char literals. -template <typename STR> -class StringPieceReplacements : public Replacements<typename STR::value_type> { +template <typename CharT> +class StringPieceReplacements : public Replacements<CharT> { private: - using CharT = typename STR::value_type; - using StringPieceT = base::BasicStringPiece<STR>; + using StringT = std::basic_string<CharT>; + using StringPieceT = base::BasicStringPiece<CharT>; using ParentT = Replacements<CharT>; using SetterFun = void (ParentT::*)(const CharT*, const Component&); @@ -74,35 +74,35 @@ class StringPieceReplacements : public Replacements<typename STR::value_type> { public: void SetSchemeStr(const CharT* str) { SetImpl(&ParentT::SetScheme, str); } void SetSchemeStr(StringPieceT str) { SetImpl(&ParentT::SetScheme, str); } - void SetSchemeStr(const STR&&) = delete; + void SetSchemeStr(const StringT&&) = delete; void SetUsernameStr(const CharT* str) { SetImpl(&ParentT::SetUsername, str); } void SetUsernameStr(StringPieceT str) { SetImpl(&ParentT::SetUsername, str); } - void SetUsernameStr(const STR&&) = delete; + void SetUsernameStr(const StringT&&) = delete; void SetPasswordStr(const CharT* str) { SetImpl(&ParentT::SetPassword, str); } void SetPasswordStr(StringPieceT str) { SetImpl(&ParentT::SetPassword, str); } - void SetPasswordStr(const STR&&) = delete; + void SetPasswordStr(const StringT&&) = delete; void SetHostStr(const CharT* str) { SetImpl(&ParentT::SetHost, str); } void SetHostStr(StringPieceT str) { SetImpl(&ParentT::SetHost, str); } - void SetHostStr(const STR&&) = delete; + void SetHostStr(const StringT&&) = delete; void SetPortStr(const CharT* str) { SetImpl(&ParentT::SetPort, str); } void SetPortStr(StringPieceT str) { SetImpl(&ParentT::SetPort, str); } - void SetPortStr(const STR&&) = delete; + void SetPortStr(const StringT&&) = delete; void SetPathStr(const CharT* str) { SetImpl(&ParentT::SetPath, str); } void SetPathStr(StringPieceT str) { SetImpl(&ParentT::SetPath, str); } - void SetPathStr(const STR&&) = delete; + void SetPathStr(const StringT&&) = delete; void SetQueryStr(const CharT* str) { SetImpl(&ParentT::SetQuery, str); } void SetQueryStr(StringPieceT str) { SetImpl(&ParentT::SetQuery, str); } - void SetQueryStr(const STR&&) = delete; + void SetQueryStr(const StringT&&) = delete; void SetRefStr(const CharT* str) { SetImpl(&ParentT::SetRef, str); } void SetRefStr(StringPieceT str) { SetImpl(&ParentT::SetRef, str); } - void SetRefStr(const STR&&) = delete; + void SetRefStr(const StringT&&) = delete; }; } // namespace url diff --git a/chromium/url/url_canon_stdurl.cc b/chromium/url/url_canon_stdurl.cc index 8e59feeea95..c7e745445db 100644 --- a/chromium/url/url_canon_stdurl.cc +++ b/chromium/url/url_canon_stdurl.cc @@ -150,16 +150,16 @@ bool CanonicalizeStandardURL(const char* spec, output, new_parsed); } -bool CanonicalizeStandardURL(const base::char16* spec, +bool CanonicalizeStandardURL(const char16_t* spec, int spec_len, const Parsed& parsed, SchemeType scheme_type, CharsetConverter* query_converter, CanonOutput* output, Parsed* new_parsed) { - return DoCanonicalizeStandardURL<base::char16, base::char16>( - URLComponentSource<base::char16>(spec), parsed, scheme_type, - query_converter, output, new_parsed); + return DoCanonicalizeStandardURL<char16_t, char16_t>( + URLComponentSource<char16_t>(spec), parsed, scheme_type, query_converter, + output, new_parsed); } // It might be nice in the future to optimize this so unchanged components don't @@ -189,7 +189,7 @@ bool ReplaceStandardURL(const char* base, // regular code path can be used. bool ReplaceStandardURL(const char* base, const Parsed& base_parsed, - const Replacements<base::char16>& replacements, + const Replacements<char16_t>& replacements, SchemeType scheme_type, CharsetConverter* query_converter, CanonOutput* output, diff --git a/chromium/url/url_canon_unittest.cc b/chromium/url/url_canon_unittest.cc index 625021950fd..fb27fe7b020 100644 --- a/chromium/url/url_canon_unittest.cc +++ b/chromium/url/url_canon_unittest.cc @@ -12,6 +12,7 @@ #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h" #include "url/url_canon_internal.h" +#include "url/url_canon_ip.h" #include "url/url_canon_stdstring.h" #include "url/url_test_utils.h" @@ -186,7 +187,7 @@ TEST(URLCanonTest, UTF) { out_str.clear(); StdStringCanonOutput output(&out_str); - base::string16 input_str( + std::u16string input_str( test_utils::TruncateWStringToUTF16(utf_cases[i].input16)); int input_len = static_cast<int>(input_str.length()); bool success = true; @@ -205,7 +206,7 @@ TEST(URLCanonTest, UTF) { // UTF-16 -> UTF-8 std::string input8_str(utf_cases[i].input8); - base::string16 input16_str( + std::u16string input16_str( test_utils::TruncateWStringToUTF16(utf_cases[i].input16)); EXPECT_EQ(input8_str, base::UTF16ToUTF8(input16_str)); @@ -258,7 +259,7 @@ TEST(URLCanonTest, Scheme) { out_str.clear(); StdStringCanonOutput output2(&out_str); - base::string16 wide_input(base::UTF8ToUTF16(scheme_cases[i].input)); + std::u16string wide_input(base::UTF8ToUTF16(scheme_cases[i].input)); in_comp.len = static_cast<int>(wide_input.length()); success = CanonicalizeScheme(wide_input.c_str(), in_comp, &output2, &out_comp); @@ -529,7 +530,7 @@ TEST(URLCanonTest, Host) { // Wide version. if (host_cases[i].input16) { - base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(host_cases[i].input16)); int host_len = static_cast<int>(input16.length()); Component in_comp(0, host_len); @@ -580,7 +581,7 @@ TEST(URLCanonTest, Host) { // Wide version. if (host_cases[i].input16) { - base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(host_cases[i].input16)); int host_len = static_cast<int>(input16.length()); Component in_comp(0, host_len); @@ -703,7 +704,7 @@ TEST(URLCanonTest, IPv4) { } // 16-bit version. - base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(cases[i].input16)); component = Component(0, static_cast<int>(input16.length())); @@ -856,7 +857,7 @@ TEST(URLCanonTest, IPv6) { } // 16-bit version. - base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(cases[i].input16)); component = Component(0, static_cast<int>(input16.length())); @@ -988,7 +989,7 @@ TEST(URLCanonTest, UserInfo) { // Now try the wide version out_str.clear(); StdStringCanonOutput output2(&out_str); - base::string16 wide_input(base::UTF8ToUTF16(user_info_cases[i].input)); + std::u16string wide_input(base::UTF8ToUTF16(user_info_cases[i].input)); success = CanonicalizeUserInfo(wide_input.c_str(), parsed.username, wide_input.c_str(), @@ -1051,7 +1052,7 @@ TEST(URLCanonTest, Port) { // Now try the wide version out_str.clear(); StdStringCanonOutput output2(&out_str); - base::string16 wide_input(base::UTF8ToUTF16(port_cases[i].input)); + std::u16string wide_input(base::UTF8ToUTF16(port_cases[i].input)); success = CanonicalizePort(wide_input.c_str(), in_comp, port_cases[i].default_port, @@ -1066,105 +1067,117 @@ TEST(URLCanonTest, Port) { } } -TEST(URLCanonTest, Path) { - DualComponentCase path_cases[] = { - // ----- path collapsing tests ----- - {"/././foo", L"/././foo", "/foo", Component(0, 4), true}, - {"/./.foo", L"/./.foo", "/.foo", Component(0, 5), true}, - {"/foo/.", L"/foo/.", "/foo/", Component(0, 5), true}, - {"/foo/./", L"/foo/./", "/foo/", Component(0, 5), true}, - // double dots followed by a slash or the end of the string count - {"/foo/bar/..", L"/foo/bar/..", "/foo/", Component(0, 5), true}, - {"/foo/bar/../", L"/foo/bar/../", "/foo/", Component(0, 5), true}, - // don't count double dots when they aren't followed by a slash - {"/foo/..bar", L"/foo/..bar", "/foo/..bar", Component(0, 10), true}, - // some in the middle - {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", Component(0, 8), - true}, - {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a", - Component(0, 2), true}, - // we should not be able to go above the root - {"/foo/../../..", L"/foo/../../..", "/", Component(0, 1), true}, - {"/foo/../../../ton", L"/foo/../../../ton", "/ton", Component(0, 4), - true}, - // escaped dots should be unescaped and treated the same as dots - {"/foo/%2e", L"/foo/%2e", "/foo/", Component(0, 5), true}, - {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", Component(0, 8), true}, - {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar", - "/..bar", Component(0, 6), true}, - // Multiple slashes in a row should be preserved and treated like empty - // directory names. - {"////../..", L"////../..", "//", Component(0, 2), true}, - - // ----- escaping tests ----- - {"/foo", L"/foo", "/foo", Component(0, 4), true}, - // Valid escape sequence - {"/%20foo", L"/%20foo", "/%20foo", Component(0, 7), true}, - // Invalid escape sequence we should pass through unchanged. - {"/foo%", L"/foo%", "/foo%", Component(0, 5), true}, - {"/foo%2", L"/foo%2", "/foo%2", Component(0, 6), true}, - // Invalid escape sequence: bad characters should be treated the same as - // the sourrounding text, not as escaped (in this case, UTF-8). - {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", Component(0, 10), true}, - {"/foo%2\xc2\xa9zbar", nullptr, "/foo%2%C2%A9zbar", Component(0, 16), - true}, - {nullptr, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", - Component(0, 22), true}, - // Regular characters that are escaped should be unescaped - {"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true}, - // Funny characters that are unescaped should be escaped - {"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true}, - {nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true}, - // Invalid characters that are escaped should cause a failure. - {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false}, - // Some characters should be passed through unchanged regardless of esc. - {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13), - true}, - // Characters that are properly escaped should not have the case changed - // of hex letters. - {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13), - true}, - // Funny characters that are unescaped should be escaped - {"/foo\tbar", L"/foo\tbar", "/foo%09bar", Component(0, 10), true}, - // Backslashes should get converted to forward slashes - {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", Component(0, 8), true}, - // Hashes found in paths (possibly only when the caller explicitly sets - // the path on an already-parsed URL) should be escaped. - {"/foo#bar", L"/foo#bar", "/foo%23bar", Component(0, 10), true}, - // %7f should be allowed and %3D should not be unescaped (these were wrong - // in a previous version). - {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd", - "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true}, - // @ should be passed through unchanged (escaped or unescaped). - {"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true}, - // Nested escape sequences should result in escaping the leading '%' if - // unescaping would result in a new escape sequence. - {"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true}, - {"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true}, - {"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true}, - // Make sure truncated "nested" escapes don't result in reading off the - // string end. - {"/%%41", L"/%%41", "/%A", Component(0, 3), true}, - // Don't unescape the leading '%' if unescaping doesn't result in a valid - // new escape sequence. - {"/%%470", L"/%%470", "/%G0", Component(0, 4), true}, - {"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true}, - // Don't erroneously downcast a UTF-16 charater in a way that makes it - // look like part of an escape sequence. - {nullptr, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true}, - - // ----- encoding tests ----- - // Basic conversions - {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", - L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", - Component(0, 37), true}, - // Invalid unicode characters should fail. We only do validation on - // UTF-16 input, so this doesn't happen on 8-bit. - {"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true}, - {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false}, - }; +DualComponentCase kCommonPathCases[] = { + // ----- path collapsing tests ----- + {"/././foo", L"/././foo", "/foo", Component(0, 4), true}, + {"/./.foo", L"/./.foo", "/.foo", Component(0, 5), true}, + {"/foo/.", L"/foo/.", "/foo/", Component(0, 5), true}, + {"/foo/./", L"/foo/./", "/foo/", Component(0, 5), true}, + // double dots followed by a slash or the end of the string count + {"/foo/bar/..", L"/foo/bar/..", "/foo/", Component(0, 5), true}, + {"/foo/bar/../", L"/foo/bar/../", "/foo/", Component(0, 5), true}, + // don't count double dots when they aren't followed by a slash + {"/foo/..bar", L"/foo/..bar", "/foo/..bar", Component(0, 10), true}, + // some in the middle + {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", Component(0, 8), true}, + {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a", + Component(0, 2), true}, + // we should not be able to go above the root + {"/foo/../../..", L"/foo/../../..", "/", Component(0, 1), true}, + {"/foo/../../../ton", L"/foo/../../../ton", "/ton", Component(0, 4), true}, + // escaped dots should be unescaped and treated the same as dots + {"/foo/%2e", L"/foo/%2e", "/foo/", Component(0, 5), true}, + {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", Component(0, 8), true}, + {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar", + "/..bar", Component(0, 6), true}, + // Multiple slashes in a row should be preserved and treated like empty + // directory names. + {"////../..", L"////../..", "//", Component(0, 2), true}, + + // ----- escaping tests ----- + {"/foo", L"/foo", "/foo", Component(0, 4), true}, + // Valid escape sequence + {"/%20foo", L"/%20foo", "/%20foo", Component(0, 7), true}, + // Invalid escape sequence we should pass through unchanged. + {"/foo%", L"/foo%", "/foo%", Component(0, 5), true}, + {"/foo%2", L"/foo%2", "/foo%2", Component(0, 6), true}, + // Invalid escape sequence: bad characters should be treated the same as + // the surrounding text, not as escaped (in this case, UTF-8). + {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", Component(0, 10), true}, + {"/foo%2\xc2\xa9zbar", nullptr, "/foo%2%C2%A9zbar", Component(0, 16), true}, + {nullptr, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", Component(0, 22), + true}, + // Regular characters that are escaped should be unescaped + {"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true}, + // Funny characters that are unescaped should be escaped + {"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true}, + {nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true}, + // Invalid characters that are escaped should cause a failure. + {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false}, + // Some characters should be passed through unchanged regardless of esc. + {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13), + true}, + // Characters that are properly escaped should not have the case changed + // of hex letters. + {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13), + true}, + // Funny characters that are unescaped should be escaped + {"/foo\tbar", L"/foo\tbar", "/foo%09bar", Component(0, 10), true}, + // Backslashes should get converted to forward slashes + {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", Component(0, 8), true}, + // Hashes found in paths (possibly only when the caller explicitly sets + // the path on an already-parsed URL) should be escaped. + {"/foo#bar", L"/foo#bar", "/foo%23bar", Component(0, 10), true}, + // %7f should be allowed and %3D should not be unescaped (these were wrong + // in a previous version). + {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd", + "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true}, + // @ should be passed through unchanged (escaped or unescaped). + {"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true}, + // Nested escape sequences should result in escaping the leading '%' if + // unescaping would result in a new escape sequence. + {"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true}, + {"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true}, + {"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true}, + // Make sure truncated "nested" escapes don't result in reading off the + // string end. + {"/%%41", L"/%%41", "/%A", Component(0, 3), true}, + // Don't unescape the leading '%' if unescaping doesn't result in a valid + // new escape sequence. + {"/%%470", L"/%%470", "/%G0", Component(0, 4), true}, + {"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true}, + // Don't erroneously downcast a UTF-16 character in a way that makes it + // look like part of an escape sequence. + {nullptr, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true}, + + // ----- encoding tests ----- + // Basic conversions + {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", + L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", + Component(0, 37), true}, + // Invalid unicode characters should fail. We only do validation on + // UTF-16 input, so this doesn't happen on 8-bit. + {"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true}, + {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false}, +}; - for (size_t i = 0; i < base::size(path_cases); i++) { +typedef bool (*CanonFunc8Bit)(const char*, + const Component&, + CanonOutput*, + Component*); +typedef bool (*CanonFunc16Bit)(const char16_t*, + const Component&, + CanonOutput*, + Component*); + +void DoPathTest(const DualComponentCase* path_cases, + size_t num_cases, + CanonFunc8Bit canon_func_8, + CanonFunc16Bit canon_func_16) { + for (size_t i = 0; i < num_cases; i++) { + testing::Message scope_message; + scope_message << path_cases[i].input8 << "," << path_cases[i].input16; + SCOPED_TRACE(scope_message); if (path_cases[i].input8) { int len = static_cast<int>(strlen(path_cases[i].input8)); Component in_comp(0, len); @@ -1172,7 +1185,7 @@ TEST(URLCanonTest, Path) { std::string out_str; StdStringCanonOutput output(&out_str); bool success = - CanonicalizePath(path_cases[i].input8, in_comp, &output, &out_comp); + canon_func_8(path_cases[i].input8, in_comp, &output, &out_comp); output.Complete(); EXPECT_EQ(path_cases[i].expected_success, success); @@ -1182,7 +1195,7 @@ TEST(URLCanonTest, Path) { } if (path_cases[i].input16) { - base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(path_cases[i].input16)); int len = static_cast<int>(input16.length()); Component in_comp(0, len); @@ -1191,7 +1204,7 @@ TEST(URLCanonTest, Path) { StdStringCanonOutput output(&out_str); bool success = - CanonicalizePath(input16.c_str(), in_comp, &output, &out_comp); + canon_func_16(input16.c_str(), in_comp, &output, &out_comp); output.Complete(); EXPECT_EQ(path_cases[i].expected_success, success); @@ -1200,6 +1213,11 @@ TEST(URLCanonTest, Path) { EXPECT_EQ(path_cases[i].expected, out_str); } } +} + +TEST(URLCanonTest, Path) { + DoPathTest(kCommonPathCases, base::size(kCommonPathCases), CanonicalizePath, + CanonicalizePath); // Manual test: embedded NULLs should be escaped and the URL should be marked // as invalid. @@ -1215,6 +1233,18 @@ TEST(URLCanonTest, Path) { EXPECT_EQ("/ab%00c", out_str); } +TEST(URLCanonTest, PartialPath) { + DualComponentCase partial_path_cases[] = { + {".html", L".html", ".html", Component(0, 5), true}, + {"", L"", "", Component(0, 0), true}, + }; + + DoPathTest(kCommonPathCases, base::size(kCommonPathCases), + CanonicalizePartialPath, CanonicalizePartialPath); + DoPathTest(partial_path_cases, base::size(partial_path_cases), + CanonicalizePartialPath, CanonicalizePartialPath); +} + TEST(URLCanonTest, Query) { struct QueryCase { const char* input8; @@ -1258,7 +1288,7 @@ TEST(URLCanonTest, Query) { } if (query_cases[i].input16) { - base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(query_cases[i].input16)); int len = static_cast<int>(input16.length()); Component in_comp(0, len); @@ -1332,7 +1362,7 @@ TEST(URLCanonTest, Ref) { // 16-bit input if (ref_cases[i].input16) { - base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(ref_cases[i].input16)); int len = static_cast<int>(input16.length()); Component in_comp(0, len); @@ -1952,6 +1982,53 @@ TEST(URLCanonTest, CanonicalizePathURL) { } } +TEST(URLCanonTest, CanonicalizePathURLPath) { + struct PathCase { + std::string input; + std::wstring input16; + std::string expected; + } path_cases[] = { + {"Foo", L"Foo", "Foo"}, + {"\":This /is interesting;?#", L"\":This /is interesting;?#", + "\":This /is interesting;?#"}, + {"\uFFFF", L"\uFFFF", "%EF%BF%BD"}, + }; + + for (size_t i = 0; i < base::size(path_cases); i++) { + // 8-bit string input + std::string out_str; + StdStringCanonOutput output(&out_str); + url::Component out_component; + CanonicalizePathURLPath(path_cases[i].input.data(), + Component(0, path_cases[i].input.size()), &output, + &out_component); + output.Complete(); + + EXPECT_EQ(path_cases[i].expected, out_str); + + EXPECT_EQ(0, out_component.begin); + EXPECT_EQ(path_cases[i].expected.size(), + static_cast<size_t>(out_component.len)); + + // 16-bit string input + std::string out_str16; + StdStringCanonOutput output16(&out_str16); + url::Component out_component16; + std::u16string input16( + test_utils::TruncateWStringToUTF16(path_cases[i].input16.data())); + CanonicalizePathURLPath(input16.c_str(), + Component(0, path_cases[i].input16.size()), + &output16, &out_component16); + output16.Complete(); + + EXPECT_EQ(path_cases[i].expected, out_str16); + + EXPECT_EQ(0, out_component16.begin); + EXPECT_EQ(path_cases[i].expected.size(), + static_cast<size_t>(out_component16.len)); + } +} + TEST(URLCanonTest, CanonicalizeMailtoURL) { struct URLCase { const char* input; @@ -2086,17 +2163,17 @@ TEST(URLCanonTest, _itow_s) { // We fill the buffer with 0xff to ensure that it's getting properly // null-terminated. We also allocate one byte more than what we tell // _itoa_s about, and ensure that the extra byte is untouched. - base::char16 buf[6]; + char16_t buf[6]; const char fill_mem = 0xff; - const base::char16 fill_char = 0xffff; + const char16_t fill_char = 0xffff; memset(buf, fill_mem, sizeof(buf)); EXPECT_EQ(0, _itow_s(12, buf, sizeof(buf) / 2 - 1, 10)); - EXPECT_EQ(base::UTF8ToUTF16("12"), base::string16(buf)); + EXPECT_EQ(u"12", std::u16string(buf)); EXPECT_EQ(fill_char, buf[3]); // Test the edge cases - exactly the buffer size and one over EXPECT_EQ(0, _itow_s(1234, buf, sizeof(buf) / 2 - 1, 10)); - EXPECT_EQ(base::UTF8ToUTF16("1234"), base::string16(buf)); + EXPECT_EQ(u"1234", std::u16string(buf)); EXPECT_EQ(fill_char, buf[5]); memset(buf, fill_mem, sizeof(buf)); @@ -2106,13 +2183,12 @@ TEST(URLCanonTest, _itow_s) { // Test the template overload (note that this will see the full buffer) memset(buf, fill_mem, sizeof(buf)); EXPECT_EQ(0, _itow_s(12, buf, 10)); - EXPECT_EQ(base::UTF8ToUTF16("12"), - base::string16(buf)); + EXPECT_EQ(u"12", std::u16string(buf)); EXPECT_EQ(fill_char, buf[3]); memset(buf, fill_mem, sizeof(buf)); EXPECT_EQ(0, _itow_s(12345, buf, 10)); - EXPECT_EQ(base::UTF8ToUTF16("12345"), base::string16(buf)); + EXPECT_EQ(u"12345", std::u16string(buf)); EXPECT_EQ(EINVAL, _itow_s(123456, buf, 10)); } @@ -2343,12 +2419,12 @@ TEST(URLCanonTest, ReplacementOverflow) { // Override two components, the path with something short, and the query with // something long enough to trigger the bug. - Replacements<base::char16> repl; - base::string16 new_query; + Replacements<char16_t> repl; + std::u16string new_query; for (int i = 0; i < 4800; i++) new_query.push_back('a'); - base::string16 new_path(test_utils::TruncateWStringToUTF16(L"/foo")); + std::u16string new_path(test_utils::TruncateWStringToUTF16(L"/foo")); repl.SetPath(new_path.c_str(), Component(0, 4)); repl.SetQuery(new_query.c_str(), Component(0, static_cast<int>(new_query.length()))); @@ -2398,41 +2474,41 @@ TEST(URLCanonTest, IDNToASCII) { RawCanonOutputW<1024> output; // Basic ASCII test. - base::string16 str = base::UTF8ToUTF16("hello"); + std::u16string str = u"hello"; EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(base::UTF8ToUTF16("hello"), base::string16(output.data())); + EXPECT_EQ(u"hello", std::u16string(output.data())); output.set_length(0); // Mixed ASCII/non-ASCII. - str = base::UTF8ToUTF16("hellö"); + str = u"hellö"; EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(base::UTF8ToUTF16("xn--hell-8qa"), base::string16(output.data())); + EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data())); output.set_length(0); // All non-ASCII. - str = base::UTF8ToUTF16("你好"); + str = u"你好"; EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(base::UTF8ToUTF16("xn--6qq79v"), base::string16(output.data())); + EXPECT_EQ(u"xn--6qq79v", std::u16string(output.data())); output.set_length(0); // Characters that need mapping (the resulting Punycode is the encoding for // "1⁄4"). - str = base::UTF8ToUTF16("¼"); + str = u"¼"; EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(base::UTF8ToUTF16("xn--14-c6t"), base::string16(output.data())); + EXPECT_EQ(u"xn--14-c6t", std::u16string(output.data())); output.set_length(0); // String to encode already starts with "xn--", and all ASCII. Should not // modify the string. - str = base::UTF8ToUTF16("xn--hell-8qa"); + str = u"xn--hell-8qa"; EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(base::UTF8ToUTF16("xn--hell-8qa"), base::string16(output.data())); + EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data())); output.set_length(0); // String to encode already starts with "xn--", and mixed ASCII/non-ASCII. // Should fail, due to a special case: if the label starts with "xn--", it // should be parsed as Punycode, which must be all ASCII. - str = base::UTF8ToUTF16("xn--hellö"); + str = u"xn--hellö"; EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output)); output.set_length(0); @@ -2440,9 +2516,120 @@ TEST(URLCanonTest, IDNToASCII) { // This tests that there is still an error for the character '⁄' (U+2044), // which would be a valid ASCII character, U+0044, if the high byte were // ignored. - str = base::UTF8ToUTF16("xn--1⁄4"); + str = u"xn--1⁄4"; EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output)); output.set_length(0); } +TEST(URLCanonTest, URLSafetyStatus) { + const struct { + const char* host; + HostSafetyStatus expected_safety_status; + } kTestCases[] = { + // Empty components are ok. + {"", HostSafetyStatus::kOk}, + {".", HostSafetyStatus::kOk}, + {"..", HostSafetyStatus::kOk}, + + // Hostnames with purely non-numeric components are ok. + {"com", HostSafetyStatus::kOk}, + {"a.com", HostSafetyStatus::kOk}, + {"a.b.com", HostSafetyStatus::kOk}, + + // Hostnames with components with letters and numbers are ok. + {"1com", HostSafetyStatus::kOk}, + {"0a.0com", HostSafetyStatus::kOk}, + {"0xa.0xb.0xcom", HostSafetyStatus::kOk}, + {"com1", HostSafetyStatus::kOk}, + {"a1.com1", HostSafetyStatus::kOk}, + {"a1.b1.com1", HostSafetyStatus::kOk}, + + // Hostnames components that are numbers that are before a final + // non-numeric component are ok. + {"1.com", HostSafetyStatus::kOk}, + {"0.1.2com", HostSafetyStatus::kOk}, + + // Invalid hostnames are ok. + {"[", HostSafetyStatus::kOk}, + + // IPv6 hostnames are ok. + {"[::]", HostSafetyStatus::kOk}, + {"[2001:db8::1]", HostSafetyStatus::kOk}, + + // IPv4 hostnames are ok. + {"1.2.3.4", HostSafetyStatus::kOk}, + // IPv4 hostnames with creative representations are ok. + {"01.02.03.04", HostSafetyStatus::kOk}, + {"0x1.0x2.0x3.0x4", HostSafetyStatus::kOk}, + {"1.2", HostSafetyStatus::kOk}, + {"1.2.3", HostSafetyStatus::kOk}, + {"0", HostSafetyStatus::kOk}, + {"0x0", HostSafetyStatus::kOk}, + {"07", HostSafetyStatus::kOk}, + + // Hostnames with a final problematic top level domain. + {"a.0", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.123", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.123456", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.999999999999999999", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.0x1", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.0xabcdef", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.0XABCDEF", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.07", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.09", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {".0", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"foo.bar.0", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"1.bar.0", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a..0", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"1..0", HostSafetyStatus::kTopLevelDomainIsNumeric}, + + // Hostnames with problematic two highest level domains. + {"a.1.2", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric}, + {"a.0x1.0x2f", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric}, + {"a.06.09", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric}, + }; + + for (const auto& test_case : kTestCases) { + // Test with ASCII. + SCOPED_TRACE(test_case.host); + EXPECT_EQ(test_case.expected_safety_status, + CheckHostnameSafety(test_case.host, + Component(0, strlen(test_case.host)))); + + // Test with ASCII and terminal dot, which shouldn't affect results for + // anything that doesn't already end in a dot (or anything that only has + // dots). + std::string host_with_dot = test_case.host; + host_with_dot += "."; + EXPECT_EQ(test_case.expected_safety_status, + CheckHostnameSafety(host_with_dot.c_str(), + Component(0, host_with_dot.size()))); + + // Test with ASCII and characters that are not part of the component. + std::string host_with_bonus_characters = test_case.host; + host_with_bonus_characters = "00" + host_with_bonus_characters + "00"; + EXPECT_EQ(test_case.expected_safety_status, + CheckHostnameSafety(host_with_bonus_characters.c_str(), + Component(2, strlen(test_case.host)))); + + // Test with UTF-16. + std::u16string utf16 = base::UTF8ToUTF16(test_case.host); + EXPECT_EQ(test_case.expected_safety_status, + CheckHostnameSafety(utf16.c_str(), Component(0, utf16.size()))); + + // Test with UTF-16 and terminal dot. + std::u16string utf16_with_dot = base::UTF8ToUTF16(host_with_dot); + EXPECT_EQ(test_case.expected_safety_status, + CheckHostnameSafety(utf16_with_dot.c_str(), + Component(0, utf16_with_dot.size()))); + + // Test with UTF-16 and characters that are not part of the component. + std::u16string utf16_with_bonus_characters = + base::UTF8ToUTF16(host_with_bonus_characters); + EXPECT_EQ(test_case.expected_safety_status, + CheckHostnameSafety(utf16_with_bonus_characters.c_str(), + Component(2, utf16.size()))); + } +} + } // namespace url diff --git a/chromium/url/url_file.h b/chromium/url/url_file.h index 796d12c1880..45b8d9a5bee 100644 --- a/chromium/url/url_file.h +++ b/chromium/url/url_file.h @@ -16,7 +16,7 @@ namespace url { #ifdef WIN32 // We allow both "c:" and "c|" as drive identifiers. -inline bool IsWindowsDriveSeparator(base::char16 ch) { +inline bool IsWindowsDriveSeparator(char16_t ch) { return ch == ':' || ch == '|'; } diff --git a/chromium/url/url_idna_icu.cc b/chromium/url/url_idna_icu.cc index c92029c8e6d..4bd6a885dda 100644 --- a/chromium/url/url_idna_icu.cc +++ b/chromium/url/url_idna_icu.cc @@ -11,7 +11,6 @@ #include <ostream> #include "base/check_op.h" -#include "base/i18n/uchar.h" #include "base/no_destructor.h" #include "third_party/icu/source/common/unicode/uidna.h" #include "third_party/icu/source/common/unicode/utypes.h" @@ -83,7 +82,7 @@ UIDNA* GetUIDNA() { // conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII // version with StringByteSink. That way, we can avoid C wrappers and additional // string conversion. -bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) { +bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) { DCHECK(output->length() == 0); // Output buffer is assumed empty. UIDNA* uidna = GetUIDNA(); @@ -91,10 +90,8 @@ bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) { while (true) { UErrorCode err = U_ZERO_ERROR; UIDNAInfo info = UIDNA_INFO_INITIALIZER; - int output_length = - uidna_nameToASCII(uidna, base::i18n::ToUCharPtr(src), src_len, - base::i18n::ToUCharPtr(output->data()), - output->capacity(), &info, &err); + int output_length = uidna_nameToASCII(uidna, src, src_len, output->data(), + output->capacity(), &info, &err); if (U_SUCCESS(err) && info.errors == 0) { output->set_length(output_length); return true; diff --git a/chromium/url/url_idna_icu_alternatives_android.cc b/chromium/url/url_idna_icu_alternatives_android.cc index 6f31c81d16d..19df70f9987 100644 --- a/chromium/url/url_idna_icu_alternatives_android.cc +++ b/chromium/url/url_idna_icu_alternatives_android.cc @@ -4,9 +4,10 @@ #include <string.h> +#include <string> + #include "base/android/jni_android.h" #include "base/android/jni_string.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "url/url_canon_internal.h" #include "url/url_jni_headers/IDNStringUtil_jni.h" @@ -17,7 +18,7 @@ namespace url { // This uses the JDK's conversion function, which uses IDNA 2003, unlike the // ICU implementation. -bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) { +bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) { DCHECK_EQ(0, output->length()); // Output buffer is assumed empty. JNIEnv* env = base::android::AttachCurrentThread(); @@ -30,7 +31,7 @@ bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) { if (java_result.is_null()) return false; - base::string16 utf16_result = + std::u16string utf16_result = base::android::ConvertJavaStringToUTF16(java_result); output->Append(utf16_result.data(), static_cast<int>(utf16_result.size())); return true; diff --git a/chromium/url/url_idna_icu_alternatives_ios.mm b/chromium/url/url_idna_icu_alternatives_ios.mm index 66b844e8a81..c5da3593269 100644 --- a/chromium/url/url_idna_icu_alternatives_ios.mm +++ b/chromium/url/url_idna_icu_alternatives_ios.mm @@ -4,7 +4,8 @@ #include <string.h> -#include "base/strings/string16.h" +#include <string> + #include "base/strings/string_piece.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" @@ -14,7 +15,7 @@ namespace url { // Only allow ASCII to avoid ICU dependency. Use NSString+IDN // to convert non-ASCII URL prior to passing to API. -bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) { +bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) { if (base::IsStringASCII(base::StringPiece16(src, src_len))) { output->Append(src, src_len); return true; diff --git a/chromium/url/url_parse_file.cc b/chromium/url/url_parse_file.cc index dac995941be..7d86c7b8f2d 100644 --- a/chromium/url/url_parse_file.cc +++ b/chromium/url/url_parse_file.cc @@ -215,7 +215,7 @@ void ParseFileURL(const char* url, int url_len, Parsed* parsed) { DoParseFileURL(url, url_len, parsed); } -void ParseFileURL(const base::char16* url, int url_len, Parsed* parsed) { +void ParseFileURL(const char16_t* url, int url_len, Parsed* parsed) { DoParseFileURL(url, url_len, parsed); } diff --git a/chromium/url/url_parse_internal.h b/chromium/url/url_parse_internal.h index 76308780c46..4e2527a3642 100644 --- a/chromium/url/url_parse_internal.h +++ b/chromium/url/url_parse_internal.h @@ -12,13 +12,13 @@ namespace url { // We treat slashes and backslashes the same for IE compatibility. -inline bool IsURLSlash(base::char16 ch) { +inline bool IsURLSlash(char16_t ch) { return ch == '/' || ch == '\\'; } // Returns true if we should trim this character from the URL because it is a // space or a control character. -inline bool ShouldTrimFromURL(base::char16 ch) { +inline bool ShouldTrimFromURL(char16_t ch) { return ch <= ' '; } @@ -67,13 +67,12 @@ void ParsePathInternal(const char* spec, Component* filepath, Component* query, Component* ref); -void ParsePathInternal(const base::char16* spec, +void ParsePathInternal(const char16_t* spec, const Component& path, Component* filepath, Component* query, Component* ref); - // Given a spec and a pointer to the character after the colon following the // scheme, this parses it and fills in the structure, Every item in the parsed // structure is filled EXCEPT for the scheme, which is untouched. @@ -81,7 +80,7 @@ void ParseAfterScheme(const char* spec, int spec_len, int after_scheme, Parsed* parsed); -void ParseAfterScheme(const base::char16* spec, +void ParseAfterScheme(const char16_t* spec, int spec_len, int after_scheme, Parsed* parsed); diff --git a/chromium/url/url_test_utils.h b/chromium/url/url_test_utils.h index f4f51da4eb4..bb75c747e39 100644 --- a/chromium/url/url_test_utils.h +++ b/chromium/url/url_test_utils.h @@ -10,7 +10,6 @@ #include <string> -#include "base/strings/string16.h" #include "base/strings/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" #include "url/url_canon_internal.h" @@ -24,11 +23,11 @@ namespace test_utils { // in base bacause it passes invalid UTF-16 characters which is important for // test purposes. As a result, this is not meant to handle true UTF-32 encoded // strings. -inline base::string16 TruncateWStringToUTF16(const wchar_t* src) { - base::string16 str; +inline std::u16string TruncateWStringToUTF16(const wchar_t* src) { + std::u16string str; int length = static_cast<int>(wcslen(src)); for (int i = 0; i < length; ++i) { - str.push_back(static_cast<base::char16>(src[i])); + str.push_back(static_cast<char16_t>(src[i])); } return str; } diff --git a/chromium/url/url_util.cc b/chromium/url/url_util.cc index 49cc6e689f3..c2456e20df8 100644 --- a/chromium/url/url_util.cc +++ b/chromium/url/url_util.cc @@ -137,7 +137,8 @@ template<typename CHAR> struct CharToStringPiece { template<> struct CharToStringPiece<char> { typedef base::StringPiece Piece; }; -template<> struct CharToStringPiece<base::char16> { +template <> +struct CharToStringPiece<char16_t> { typedef base::StringPiece16 Piece; }; @@ -468,10 +469,10 @@ void DoSchemeModificationPreamble() { // the SchemeRegistry has been used. // // This normally means you're trying to set up a new scheme too late or using - // the SchemeRegistry too early in your application's init process. Make sure - // that you haven't added any static GURL initializers in tests. + // the SchemeRegistry too early in your application's init process. DCHECK(!g_scheme_registries_used.load()) - << "Trying to add a scheme after the lists have been used."; + << "Trying to add a scheme after the lists have been used. " + "Make sure that you haven't added any static GURL initializers in tests."; // If this assert triggers, it means you've called Add*Scheme after // LockSchemeRegistries has been called (see the header file for @@ -557,6 +558,15 @@ void AddStandardScheme(const char* new_scheme, SchemeType type) { &GetSchemeRegistryWithoutLocking()->standard_schemes); } +std::vector<std::string> GetStandardSchemes() { + std::vector<std::string> result; + result.reserve(GetSchemeRegistry().standard_schemes.size()); + for (const auto& entry : GetSchemeRegistry().standard_schemes) { + result.push_back(entry.scheme); + } + return result; +} + void AddReferrerScheme(const char* new_scheme, SchemeType type) { DoAddSchemeWithType(new_scheme, type, &GetSchemeRegistryWithoutLocking()->referrer_schemes); @@ -638,13 +648,13 @@ bool GetStandardSchemeType(const char* spec, return DoIsStandard(spec, scheme, type); } -bool GetStandardSchemeType(const base::char16* spec, +bool GetStandardSchemeType(const char16_t* spec, const Component& scheme, SchemeType* type) { return DoIsStandard(spec, scheme, type); } -bool IsStandard(const base::char16* spec, const Component& scheme) { +bool IsStandard(const char16_t* spec, const Component& scheme) { SchemeType unused_scheme_type; return DoIsStandard(spec, scheme, &unused_scheme_type); } @@ -662,7 +672,7 @@ bool FindAndCompareScheme(const char* str, return DoFindAndCompareScheme(str, str_len, compare, found_scheme); } -bool FindAndCompareScheme(const base::char16* str, +bool FindAndCompareScheme(const char16_t* str, int str_len, const char* compare, Component* found_scheme) { @@ -723,7 +733,7 @@ bool Canonicalize(const char* spec, charset_converter, output, output_parsed); } -bool Canonicalize(const base::char16* spec, +bool Canonicalize(const char16_t* spec, int spec_len, bool trim_path_end, CharsetConverter* charset_converter, @@ -749,7 +759,7 @@ bool ResolveRelative(const char* base_spec, bool ResolveRelative(const char* base_spec, int base_spec_len, const Parsed& base_parsed, - const base::char16* relative, + const char16_t* relative, int relative_length, CharsetConverter* charset_converter, CanonOutput* output, @@ -773,7 +783,7 @@ bool ReplaceComponents(const char* spec, bool ReplaceComponents(const char* spec, int spec_len, const Parsed& parsed, - const Replacements<base::char16>& replacements, + const Replacements<char16_t>& replacements, CharsetConverter* charset_converter, CanonOutput* output, Parsed* out_parsed) { @@ -853,7 +863,7 @@ bool CompareSchemeComponent(const char* spec, return DoCompareSchemeComponent(spec, component, compare_to); } -bool CompareSchemeComponent(const base::char16* spec, +bool CompareSchemeComponent(const char16_t* spec, const Component& component, const char* compare_to) { return DoCompareSchemeComponent(spec, component, compare_to); diff --git a/chromium/url/url_util.h b/chromium/url/url_util.h index d4f5e1798dd..db7e08cc29c 100644 --- a/chromium/url/url_util.h +++ b/chromium/url/url_util.h @@ -10,7 +10,6 @@ #include <vector> #include "base/component_export.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h" @@ -63,6 +62,14 @@ COMPONENT_EXPORT(URL) bool AllowNonStandardSchemesForAndroidWebView(); COMPONENT_EXPORT(URL) void AddStandardScheme(const char* new_scheme, SchemeType scheme_type); +// Returns the list of schemes registered for "standard" URLs. Note, this +// should not be used if you just need to check if your protocol is standard +// or not. Instead use the IsStandard() function above as its much more +// efficient. This function should only be used where you need to perform +// other operations against the standard scheme list. +COMPONENT_EXPORT(URL) +std::vector<std::string> GetStandardSchemes(); + // Adds an application-defined scheme to the internal list of schemes allowed // for referrers. COMPONENT_EXPORT(URL) @@ -134,7 +141,7 @@ bool FindAndCompareScheme(const char* str, const char* compare, Component* found_scheme); COMPONENT_EXPORT(URL) -bool FindAndCompareScheme(const base::char16* str, +bool FindAndCompareScheme(const char16_t* str, int str_len, const char* compare, Component* found_scheme); @@ -144,7 +151,7 @@ inline bool FindAndCompareScheme(const std::string& str, return FindAndCompareScheme(str.data(), static_cast<int>(str.size()), compare, found_scheme); } -inline bool FindAndCompareScheme(const base::string16& str, +inline bool FindAndCompareScheme(const std::u16string& str, const char* compare, Component* found_scheme) { return FindAndCompareScheme(str.data(), static_cast<int>(str.size()), @@ -156,7 +163,7 @@ inline bool FindAndCompareScheme(const base::string16& str, COMPONENT_EXPORT(URL) bool IsStandard(const char* spec, const Component& scheme); COMPONENT_EXPORT(URL) -bool IsStandard(const base::char16* spec, const Component& scheme); +bool IsStandard(const char16_t* spec, const Component& scheme); // Returns true if the given scheme identified by |scheme| within |spec| is in // the list of allowed schemes for referrers (see AddReferrerScheme). @@ -171,7 +178,7 @@ bool GetStandardSchemeType(const char* spec, const Component& scheme, SchemeType* type); COMPONENT_EXPORT(URL) -bool GetStandardSchemeType(const base::char16* spec, +bool GetStandardSchemeType(const char16_t* spec, const Component& scheme, SchemeType* type); @@ -213,7 +220,7 @@ bool Canonicalize(const char* spec, CanonOutput* output, Parsed* output_parsed); COMPONENT_EXPORT(URL) -bool Canonicalize(const base::char16* spec, +bool Canonicalize(const char16_t* spec, int spec_len, bool trim_path_end, CharsetConverter* charset_converter, @@ -243,7 +250,7 @@ COMPONENT_EXPORT(URL) bool ResolveRelative(const char* base_spec, int base_spec_len, const Parsed& base_parsed, - const base::char16* relative, + const char16_t* relative, int relative_length, CharsetConverter* charset_converter, CanonOutput* output, @@ -265,7 +272,7 @@ COMPONENT_EXPORT(URL) bool ReplaceComponents(const char* spec, int spec_len, const Parsed& parsed, - const Replacements<base::char16>& replacements, + const Replacements<char16_t>& replacements, CharsetConverter* charset_converter, CanonOutput* output, Parsed* out_parsed); diff --git a/chromium/url/url_util_internal.h b/chromium/url/url_util_internal.h index 756c736d369..91466e1d5d6 100644 --- a/chromium/url/url_util_internal.h +++ b/chromium/url/url_util_internal.h @@ -7,7 +7,6 @@ #include <string> -#include "base/strings/string16.h" #include "url/third_party/mozilla/url_parse.h" namespace url { @@ -17,7 +16,7 @@ namespace url { bool CompareSchemeComponent(const char* spec, const Component& component, const char* compare_to); -bool CompareSchemeComponent(const base::char16* spec, +bool CompareSchemeComponent(const char16_t* spec, const Component& component, const char* compare_to); diff --git a/chromium/url/url_util_unittest.cc b/chromium/url/url_util_unittest.cc index ea4cd82aa7a..d41da4f4d14 100644 --- a/chromium/url/url_util_unittest.cc +++ b/chromium/url/url_util_unittest.cc @@ -136,6 +136,16 @@ TEST_F(URLUtilTest, GetStandardSchemeType) { &scheme_type)); } +TEST_F(URLUtilTest, GetStandardSchemes) { + std::vector<std::string> expected = { + kHttpsScheme, kHttpScheme, kFileScheme, + kFtpScheme, kWssScheme, kWsScheme, + kFileSystemScheme, kQuicTransportScheme, "foo", + }; + AddStandardScheme("foo", url::SCHEME_WITHOUT_AUTHORITY); + EXPECT_EQ(expected, GetStandardSchemes()); +} + TEST_F(URLUtilTest, ReplaceComponents) { Parsed parsed; RawCanonOutputT<char> output; @@ -236,34 +246,33 @@ TEST_F(URLUtilTest, DecodeURLEscapeSequences) { for (size_t i = 0; i < base::size(decode_cases); i++) { const char* input = decode_cases[i].input; - RawCanonOutputT<base::char16> output; + RawCanonOutputT<char16_t> output; DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8OrIsomorphic, &output); - EXPECT_EQ(decode_cases[i].output, - base::UTF16ToUTF8(base::string16(output.data(), - output.length()))); + EXPECT_EQ(decode_cases[i].output, base::UTF16ToUTF8(std::u16string( + output.data(), output.length()))); - RawCanonOutputT<base::char16> output_utf8; + RawCanonOutputT<char16_t> output_utf8; DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8, &output_utf8); EXPECT_EQ(decode_cases[i].output, base::UTF16ToUTF8( - base::string16(output_utf8.data(), output_utf8.length()))); + std::u16string(output_utf8.data(), output_utf8.length()))); } // Our decode should decode %00 const char zero_input[] = "%00"; - RawCanonOutputT<base::char16> zero_output; + RawCanonOutputT<char16_t> zero_output; DecodeURLEscapeSequences(zero_input, strlen(zero_input), DecodeURLMode::kUTF8, &zero_output); - EXPECT_NE("%00", base::UTF16ToUTF8( - base::string16(zero_output.data(), zero_output.length()))); + EXPECT_NE("%00", base::UTF16ToUTF8(std::u16string(zero_output.data(), + zero_output.length()))); // Test the error behavior for invalid UTF-8. struct Utf8DecodeCase { const char* input; - std::vector<base::char16> expected_iso; - std::vector<base::char16> expected_utf8; + std::vector<char16_t> expected_iso; + std::vector<char16_t> expected_utf8; } utf8_decode_cases[] = { // %e5%a5%bd is a valid UTF-8 sequence. U+597D {"%e4%a0%e5%a5%bd", @@ -279,17 +288,17 @@ TEST_F(URLUtilTest, DecodeURLEscapeSequences) { for (const auto& test : utf8_decode_cases) { const char* input = test.input; - RawCanonOutputT<base::char16> output_iso; + RawCanonOutputT<char16_t> output_iso; DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8OrIsomorphic, &output_iso); - EXPECT_EQ(base::string16(test.expected_iso.data()), - base::string16(output_iso.data(), output_iso.length())); + EXPECT_EQ(std::u16string(test.expected_iso.data()), + std::u16string(output_iso.data(), output_iso.length())); - RawCanonOutputT<base::char16> output_utf8; + RawCanonOutputT<char16_t> output_utf8; DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8, &output_utf8); - EXPECT_EQ(base::string16(test.expected_utf8.data()), - base::string16(output_utf8.data(), output_utf8.length())); + EXPECT_EQ(std::u16string(test.expected_utf8.data()), + std::u16string(output_utf8.data(), output_utf8.length())); } } |