summaryrefslogtreecommitdiff
path: root/chromium/url
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2021-09-01 11:08:40 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2021-10-01 12:16:21 +0000
commit03c549e0392f92c02536d3f86d5e1d8dfa3435ac (patch)
treefe49d170a929b34ba82cd10db1a0bd8e3760fa4b /chromium/url
parent5d013f5804a0d91fcf6c626b2d6fb6eca5c845b0 (diff)
downloadqtwebengine-chromium-03c549e0392f92c02536d3f86d5e1d8dfa3435ac.tar.gz
BASELINE: Update Chromium to 91.0.4472.160
Change-Id: I0def1f08a2412aeed79a9ab95dd50eb5c3f65f31 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/url')
-rw-r--r--chromium/url/BUILD.gn3
-rw-r--r--chromium/url/gurl.cc44
-rw-r--r--chromium/url/gurl.h12
-rw-r--r--chromium/url/gurl_fuzzer.cc12
-rw-r--r--chromium/url/gurl_unittest.cc59
-rw-r--r--chromium/url/origin.h1
-rw-r--r--chromium/url/third_party/mozilla/url_parse.cc26
-rw-r--r--chromium/url/third_party/mozilla/url_parse.h25
-rw-r--r--chromium/url/url_canon.cc2
-rw-r--r--chromium/url/url_canon.h99
-rw-r--r--chromium/url/url_canon_etc.cc36
-rw-r--r--chromium/url/url_canon_filesystemurl.cc8
-rw-r--r--chromium/url/url_canon_fileurl.cc16
-rw-r--r--chromium/url/url_canon_host.cc35
-rw-r--r--chromium/url/url_canon_icu.cc8
-rw-r--r--chromium/url/url_canon_icu.h2
-rw-r--r--chromium/url/url_canon_icu_unittest.cc6
-rw-r--r--chromium/url/url_canon_internal.cc37
-rw-r--r--chromium/url/url_canon_internal.h59
-rw-r--r--chromium/url/url_canon_ip.cc135
-rw-r--r--chromium/url/url_canon_ip.h52
-rw-r--r--chromium/url/url_canon_mailtourl.cc8
-rw-r--r--chromium/url/url_canon_path.cc80
-rw-r--r--chromium/url/url_canon_pathurl.cc24
-rw-r--r--chromium/url/url_canon_query.cc13
-rw-r--r--chromium/url/url_canon_relative.cc20
-rw-r--r--chromium/url/url_canon_stdstring.h24
-rw-r--r--chromium/url/url_canon_stdurl.cc10
-rw-r--r--chromium/url/url_canon_unittest.cc455
-rw-r--r--chromium/url/url_file.h2
-rw-r--r--chromium/url/url_idna_icu.cc9
-rw-r--r--chromium/url/url_idna_icu_alternatives_android.cc7
-rw-r--r--chromium/url/url_idna_icu_alternatives_ios.mm5
-rw-r--r--chromium/url/url_parse_file.cc2
-rw-r--r--chromium/url/url_parse_internal.h9
-rw-r--r--chromium/url/url_test_utils.h7
-rw-r--r--chromium/url/url_util.cc32
-rw-r--r--chromium/url/url_util.h23
-rw-r--r--chromium/url/url_util_internal.h3
-rw-r--r--chromium/url/url_util_unittest.cc43
40 files changed, 963 insertions, 490 deletions
diff --git a/chromium/url/BUILD.gn b/chromium/url/BUILD.gn
index 73776036cd2..440aa18ffd7 100644
--- a/chromium/url/BUILD.gn
+++ b/chromium/url/BUILD.gn
@@ -140,7 +140,6 @@ if (is_android) {
]
deps = [
"//base:base_java",
- "//base:jni_java",
"//third_party/androidx:androidx_annotation_annotation_java",
]
annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
@@ -291,7 +290,6 @@ if (is_android) {
":gurl_java",
"//base:base_java",
"//base:base_java_test_support",
- "//base:jni_java",
]
}
@@ -313,7 +311,6 @@ if (is_android) {
":gurl_junit_test_support",
"//base:base_java",
"//base:base_java_test_support",
- "//base:jni_java",
"//content/public/test/android:content_java_test_support",
"//third_party/android_support_test_runner:rules_java",
"//third_party/android_support_test_runner:runner_java",
diff --git a/chromium/url/gurl.cc b/chromium/url/gurl.cc
index d6196695810..dc3fb21c3ae 100644
--- a/chromium/url/gurl.cc
+++ b/chromium/url/gurl.cc
@@ -7,6 +7,7 @@
#include <stddef.h>
#include <algorithm>
+#include <memory>
#include <ostream>
#include <utility>
@@ -27,7 +28,7 @@ GURL::GURL(const GURL& other)
is_valid_(other.is_valid_),
parsed_(other.parsed_) {
if (other.inner_url_)
- inner_url_.reset(new GURL(*other.inner_url_));
+ inner_url_ = std::make_unique<GURL>(*other.inner_url_);
// Valid filesystem urls should always have an inner_url_.
DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_);
}
@@ -68,8 +69,8 @@ GURL::GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid)
InitializeFromCanonicalSpec();
}
-template<typename STR>
-void GURL::InitCanonical(base::BasicStringPiece<STR> input_spec,
+template <typename CharT>
+void GURL::InitCanonical(base::BasicStringPiece<CharT> input_spec,
bool trim_path_end) {
url::StdStringCanonOutput output(&spec_);
is_valid_ = url::Canonicalize(
@@ -78,8 +79,8 @@ void GURL::InitCanonical(base::BasicStringPiece<STR> input_spec,
output.Complete(); // Must be done before using string.
if (is_valid_ && SchemeIsFileSystem()) {
- inner_url_.reset(new GURL(spec_.data(), parsed_.Length(),
- *parsed_.inner_parsed(), true));
+ inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(),
+ *parsed_.inner_parsed(), true);
}
// Valid URLs always have non-empty specs.
DCHECK(!is_valid_ || !spec_.empty());
@@ -87,9 +88,8 @@ void GURL::InitCanonical(base::BasicStringPiece<STR> input_spec,
void GURL::InitializeFromCanonicalSpec() {
if (is_valid_ && SchemeIsFileSystem()) {
- inner_url_.reset(
- new GURL(spec_.data(), parsed_.Length(),
- *parsed_.inner_parsed(), true));
+ inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(),
+ *parsed_.inner_parsed(), true);
}
#ifndef NDEBUG
@@ -139,7 +139,7 @@ GURL& GURL::operator=(const GURL& other) {
else if (inner_url_)
*inner_url_ = *other.inner_url_;
else
- inner_url_.reset(new GURL(*other.inner_url_));
+ inner_url_ = std::make_unique<GURL>(*other.inner_url_);
return *this;
}
@@ -190,9 +190,9 @@ GURL GURL::Resolve(base::StringPiece relative) const {
output.Complete();
result.is_valid_ = true;
if (result.SchemeIsFileSystem()) {
- result.inner_url_.reset(
- new GURL(result.spec_.data(), result.parsed_.Length(),
- *result.parsed_.inner_parsed(), true));
+ result.inner_url_ =
+ std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(),
+ *result.parsed_.inner_parsed(), true);
}
return result;
}
@@ -216,9 +216,9 @@ GURL GURL::Resolve(base::StringPiece16 relative) const {
output.Complete();
result.is_valid_ = true;
if (result.SchemeIsFileSystem()) {
- result.inner_url_.reset(
- new GURL(result.spec_.data(), result.parsed_.Length(),
- *result.parsed_.inner_parsed(), true));
+ result.inner_url_ =
+ std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(),
+ *result.parsed_.inner_parsed(), true);
}
return result;
}
@@ -239,16 +239,16 @@ GURL GURL::ReplaceComponents(
output.Complete();
if (result.is_valid_ && result.SchemeIsFileSystem()) {
- result.inner_url_.reset(new GURL(result.spec_.data(),
- result.parsed_.Length(),
- *result.parsed_.inner_parsed(), true));
+ result.inner_url_ =
+ std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(),
+ *result.parsed_.inner_parsed(), true);
}
return result;
}
// Note: code duplicated above (it's inconvenient to use a template here).
GURL GURL::ReplaceComponents(
- const url::Replacements<base::char16>& replacements) const {
+ const url::Replacements<char16_t>& replacements) const {
GURL result;
// Not allowed for invalid URLs.
@@ -262,9 +262,9 @@ GURL GURL::ReplaceComponents(
output.Complete();
if (result.is_valid_ && result.SchemeIsFileSystem()) {
- result.inner_url_.reset(new GURL(result.spec_.data(),
- result.parsed_.Length(),
- *result.parsed_.inner_parsed(), true));
+ result.inner_url_ =
+ std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(),
+ *result.parsed_.inner_parsed(), true);
}
return result;
}
diff --git a/chromium/url/gurl.h b/chromium/url/gurl.h
index baa2ad328bc..7dcf1d55b05 100644
--- a/chromium/url/gurl.h
+++ b/chromium/url/gurl.h
@@ -13,7 +13,6 @@
#include "base/component_export.h"
#include "base/debug/alias.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "third_party/perfetto/include/perfetto/tracing/traced_value_forward.h"
#include "url/third_party/mozilla/url_parse.h"
@@ -46,8 +45,8 @@
// will know to escape this and produce the desired result.
class COMPONENT_EXPORT(URL) GURL {
public:
- typedef url::StringPieceReplacements<std::string> Replacements;
- typedef url::StringPieceReplacements<base::string16> ReplacementsW;
+ typedef url::StringPieceReplacements<char> Replacements;
+ typedef url::StringPieceReplacements<char16_t> ReplacementsW;
// Creates an empty, invalid URL.
GURL();
@@ -167,8 +166,7 @@ class COMPONENT_EXPORT(URL) GURL {
// Note that we use the more general url::Replacements type to give
// callers extra flexibility rather than our override.
GURL ReplaceComponents(const url::Replacements<char>& replacements) const;
- GURL ReplaceComponents(
- const url::Replacements<base::char16>& replacements) const;
+ GURL ReplaceComponents(const url::Replacements<char16_t>& replacements) const;
// A helper function that is equivalent to replacing the path with a slash
// and clearing out everything after that. We sometimes need to know just the
@@ -450,8 +448,8 @@ class COMPONENT_EXPORT(URL) GURL {
enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE };
GURL(const std::string& url_string, RetainWhiteSpaceSelector);
- template<typename STR>
- void InitCanonical(base::BasicStringPiece<STR> input_spec,
+ template <typename CharT>
+ void InitCanonical(base::BasicStringPiece<CharT> input_spec,
bool trim_path_end);
void InitializeFromCanonicalSpec();
diff --git a/chromium/url/gurl_fuzzer.cc b/chromium/url/gurl_fuzzer.cc
index c5c22a68250..e3676ea1659 100644
--- a/chromium/url/gurl_fuzzer.cc
+++ b/chromium/url/gurl_fuzzer.cc
@@ -52,9 +52,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
CheckReplaceComponentsPreservesSpec(url_from_string_piece);
}
// Test for StringPiece16 if size is even.
- if (size % 2 == 0) {
+ if (size % sizeof(char16_t) == 0) {
base::StringPiece16 string_piece_input16(
- reinterpret_cast<const base::char16*>(data), size / 2);
+ reinterpret_cast<const char16_t*>(data), size / sizeof(char16_t));
const GURL url_from_string_piece16(string_piece_input16);
CheckIdempotency(url_from_string_piece16);
CheckReplaceComponentsPreservesSpec(url_from_string_piece16);
@@ -78,10 +78,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
url_from_string_piece_part.Resolve(relative_string);
- if (relative_size % 2 == 0) {
- base::string16 relative_string16(
- reinterpret_cast<const base::char16*>(data + size_t_bytes),
- relative_size / 2);
+ if (relative_size % sizeof(char16_t) == 0) {
+ std::u16string relative_string16(
+ reinterpret_cast<const char16_t*>(data + size_t_bytes),
+ relative_size / sizeof(char16_t));
url_from_string_piece_part.Resolve(relative_string16);
}
}
diff --git a/chromium/url/gurl_unittest.cc b/chromium/url/gurl_unittest.cc
index 33195138681..6d23d6534a7 100644
--- a/chromium/url/gurl_unittest.cc
+++ b/chromium/url/gurl_unittest.cc
@@ -68,11 +68,11 @@ TEST(GURLTest, Types) {
// the parser is already tested and works, so we are mostly interested if the
// object does the right thing with the results.
TEST(GURLTest, Components) {
- GURL empty_url(base::UTF8ToUTF16(""));
+ GURL empty_url(u"");
EXPECT_TRUE(empty_url.is_empty());
EXPECT_FALSE(empty_url.is_valid());
- GURL url(base::UTF8ToUTF16("http://user:pass@google.com:99/foo;bar?q=a#ref"));
+ GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");
EXPECT_FALSE(url.is_empty());
EXPECT_TRUE(url.is_valid());
EXPECT_TRUE(url.SchemeIs("http"));
@@ -268,21 +268,49 @@ TEST(GURLTest, ExtraSlashesBeforeAuthority) {
EXPECT_EQ("/", url.path());
}
-// Given an invalid URL, we should still get most of the components.
+// Given invalid URLs, we should still get most of the components.
TEST(GURLTest, ComponentGettersWorkEvenForInvalidURL) {
- GURL url("http:google.com:foo");
- EXPECT_FALSE(url.is_valid());
- EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec());
+ constexpr struct InvalidURLTestExpectations {
+ const char* url;
+ const char* spec;
+ const char* scheme;
+ const char* host;
+ const char* port;
+ const char* path;
+ // Extend as needed...
+ } expectations[] = {
+ {
+ "http:google.com:foo",
+ "http://google.com:foo/",
+ "http",
+ "google.com",
+ "foo",
+ "/",
+ },
+ {
+ "https:google.com:foo",
+ "https://google.com:foo/",
+ "https",
+ "google.com",
+ "foo",
+ "/",
+ },
+ };
- EXPECT_EQ("http", url.scheme());
- EXPECT_EQ("", url.username());
- EXPECT_EQ("", url.password());
- EXPECT_EQ("google.com", url.host());
- EXPECT_EQ("foo", url.port());
- EXPECT_EQ(PORT_INVALID, url.IntPort());
- EXPECT_EQ("/", url.path());
- EXPECT_EQ("", url.query());
- EXPECT_EQ("", url.ref());
+ for (const auto& e : expectations) {
+ const GURL url(e.url);
+ EXPECT_FALSE(url.is_valid());
+ EXPECT_EQ(e.spec, url.possibly_invalid_spec());
+ EXPECT_EQ(e.scheme, url.scheme());
+ EXPECT_EQ("", url.username());
+ EXPECT_EQ("", url.password());
+ EXPECT_EQ(e.host, url.host());
+ EXPECT_EQ(e.port, url.port());
+ EXPECT_EQ(PORT_INVALID, url.IntPort());
+ EXPECT_EQ(e.path, url.path());
+ EXPECT_EQ("", url.query());
+ EXPECT_EQ("", url.ref());
+ }
}
TEST(GURLTest, Resolve) {
@@ -314,6 +342,7 @@ TEST(GURLTest, Resolve) {
// A non-standard base can be replaced with a standard absolute URL.
{"data:blahblah", "http://google.com/", true, "http://google.com/"},
{"data:blahblah", "http:google.com", true, "http://google.com/"},
+ {"data:blahblah", "https:google.com", true, "https://google.com/"},
// Filesystem URLs have different paths to test.
{"filesystem:http://www.google.com/type/", "foo.html", true,
"filesystem:http://www.google.com/type/foo.html"},
diff --git a/chromium/url/origin.h b/chromium/url/origin.h
index 4f3e01790fb..ae6eacf2456 100644
--- a/chromium/url/origin.h
+++ b/chromium/url/origin.h
@@ -14,7 +14,6 @@
#include "base/debug/alias.h"
#include "base/debug/crash_logging.h"
#include "base/optional.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/unguessable_token.h"
diff --git a/chromium/url/third_party/mozilla/url_parse.cc b/chromium/url/third_party/mozilla/url_parse.cc
index 2b3003209ad..d882e3fd572 100644
--- a/chromium/url/third_party/mozilla/url_parse.cc
+++ b/chromium/url/third_party/mozilla/url_parse.cc
@@ -48,7 +48,7 @@ namespace url {
namespace {
// Returns true if the given character is a valid digit to use in a port.
-inline bool IsPortDigit(base::char16 ch) {
+inline bool IsPortDigit(char16_t ch) {
return ch >= '0' && ch <= '9';
}
@@ -812,13 +812,13 @@ bool ExtractScheme(const char* url, int url_len, Component* scheme) {
return DoExtractScheme(url, url_len, scheme);
}
-bool ExtractScheme(const base::char16* url, int url_len, Component* scheme) {
+bool ExtractScheme(const char16_t* url, int url_len, Component* scheme) {
return DoExtractScheme(url, url_len, scheme);
}
// This handles everything that may be an authority terminator, including
// backslash. For special backslash handling see DoParseAfterScheme.
-bool IsAuthorityTerminator(base::char16 ch) {
+bool IsAuthorityTerminator(char16_t ch) {
return IsURLSlash(ch) || ch == '?' || ch == '#';
}
@@ -828,7 +828,7 @@ void ExtractFileName(const char* url,
DoExtractFileName(url, path, file_name);
}
-void ExtractFileName(const base::char16* url,
+void ExtractFileName(const char16_t* url,
const Component& path,
Component* file_name) {
DoExtractFileName(url, path, file_name);
@@ -841,7 +841,7 @@ bool ExtractQueryKeyValue(const char* url,
return DoExtractQueryKeyValue(url, query, key, value);
}
-bool ExtractQueryKeyValue(const base::char16* url,
+bool ExtractQueryKeyValue(const char16_t* url,
Component* query,
Component* key,
Component* value) {
@@ -857,7 +857,7 @@ void ParseAuthority(const char* spec,
DoParseAuthority(spec, auth, username, password, hostname, port_num);
}
-void ParseAuthority(const base::char16* spec,
+void ParseAuthority(const char16_t* spec,
const Component& auth,
Component* username,
Component* password,
@@ -870,7 +870,7 @@ int ParsePort(const char* url, const Component& port) {
return DoParsePort(url, port);
}
-int ParsePort(const base::char16* url, const Component& port) {
+int ParsePort(const char16_t* url, const Component& port) {
return DoParsePort(url, port);
}
@@ -878,7 +878,7 @@ void ParseStandardURL(const char* url, int url_len, Parsed* parsed) {
DoParseStandardURL(url, url_len, parsed);
}
-void ParseStandardURL(const base::char16* url, int url_len, Parsed* parsed) {
+void ParseStandardURL(const char16_t* url, int url_len, Parsed* parsed) {
DoParseStandardURL(url, url_len, parsed);
}
@@ -889,7 +889,7 @@ void ParsePathURL(const char* url,
DoParsePathURL(url, url_len, trim_path_end, parsed);
}
-void ParsePathURL(const base::char16* url,
+void ParsePathURL(const char16_t* url,
int url_len,
bool trim_path_end,
Parsed* parsed) {
@@ -900,7 +900,7 @@ void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed) {
DoParseFileSystemURL(url, url_len, parsed);
}
-void ParseFileSystemURL(const base::char16* url, int url_len, Parsed* parsed) {
+void ParseFileSystemURL(const char16_t* url, int url_len, Parsed* parsed) {
DoParseFileSystemURL(url, url_len, parsed);
}
@@ -908,7 +908,7 @@ void ParseMailtoURL(const char* url, int url_len, Parsed* parsed) {
DoParseMailtoURL(url, url_len, parsed);
}
-void ParseMailtoURL(const base::char16* url, int url_len, Parsed* parsed) {
+void ParseMailtoURL(const char16_t* url, int url_len, Parsed* parsed) {
DoParseMailtoURL(url, url_len, parsed);
}
@@ -920,7 +920,7 @@ void ParsePathInternal(const char* spec,
ParsePath(spec, path, filepath, query, ref);
}
-void ParsePathInternal(const base::char16* spec,
+void ParsePathInternal(const char16_t* spec,
const Component& path,
Component* filepath,
Component* query,
@@ -935,7 +935,7 @@ void ParseAfterScheme(const char* spec,
DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
}
-void ParseAfterScheme(const base::char16* spec,
+void ParseAfterScheme(const char16_t* spec,
int spec_len,
int after_scheme,
Parsed* parsed) {
diff --git a/chromium/url/third_party/mozilla/url_parse.h b/chromium/url/third_party/mozilla/url_parse.h
index 8a1c823d4f8..b662022b433 100644
--- a/chromium/url/third_party/mozilla/url_parse.h
+++ b/chromium/url/third_party/mozilla/url_parse.h
@@ -6,7 +6,6 @@
#define URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_
#include "base/component_export.h"
-#include "base/strings/string16.h"
namespace url {
@@ -202,7 +201,7 @@ struct COMPONENT_EXPORT(URL) Parsed {
void clear_inner_parsed() {
if (inner_parsed_) {
delete inner_parsed_;
- inner_parsed_ = NULL;
+ inner_parsed_ = nullptr;
}
}
@@ -230,7 +229,7 @@ struct COMPONENT_EXPORT(URL) Parsed {
COMPONENT_EXPORT(URL)
void ParseStandardURL(const char* url, int url_len, Parsed* parsed);
COMPONENT_EXPORT(URL)
-void ParseStandardURL(const base::char16* url, int url_len, Parsed* parsed);
+void ParseStandardURL(const char16_t* url, int url_len, Parsed* parsed);
// PathURL is for when the scheme is known not to have an authority (host)
// section but that aren't file URLs either. The scheme is parsed, and
@@ -242,7 +241,7 @@ void ParsePathURL(const char* url,
bool trim_path_end,
Parsed* parsed);
COMPONENT_EXPORT(URL)
-void ParsePathURL(const base::char16* url,
+void ParsePathURL(const char16_t* url,
int url_len,
bool trim_path_end,
Parsed* parsed);
@@ -252,19 +251,19 @@ void ParsePathURL(const base::char16* url,
COMPONENT_EXPORT(URL)
void ParseFileURL(const char* url, int url_len, Parsed* parsed);
COMPONENT_EXPORT(URL)
-void ParseFileURL(const base::char16* url, int url_len, Parsed* parsed);
+void ParseFileURL(const char16_t* url, int url_len, Parsed* parsed);
// Filesystem URLs are structured differently than other URLs.
COMPONENT_EXPORT(URL)
void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed);
COMPONENT_EXPORT(URL)
-void ParseFileSystemURL(const base::char16* url, int url_len, Parsed* parsed);
+void ParseFileSystemURL(const char16_t* url, int url_len, Parsed* parsed);
// MailtoURL is for mailto: urls. They are made up scheme,path,query
COMPONENT_EXPORT(URL)
void ParseMailtoURL(const char* url, int url_len, Parsed* parsed);
COMPONENT_EXPORT(URL)
-void ParseMailtoURL(const base::char16* url, int url_len, Parsed* parsed);
+void ParseMailtoURL(const char16_t* url, int url_len, Parsed* parsed);
// Helper functions -----------------------------------------------------------
@@ -291,11 +290,11 @@ void ParseMailtoURL(const base::char16* url, int url_len, Parsed* parsed);
COMPONENT_EXPORT(URL)
bool ExtractScheme(const char* url, int url_len, Component* scheme);
COMPONENT_EXPORT(URL)
-bool ExtractScheme(const base::char16* url, int url_len, Component* scheme);
+bool ExtractScheme(const char16_t* url, int url_len, Component* scheme);
// Returns true if ch is a character that terminates the authority segment
// of a URL.
-COMPONENT_EXPORT(URL) bool IsAuthorityTerminator(base::char16 ch);
+COMPONENT_EXPORT(URL) bool IsAuthorityTerminator(char16_t ch);
// Does a best effort parse of input |spec|, in range |auth|. If a particular
// component is not found, it will be set to invalid.
@@ -307,7 +306,7 @@ void ParseAuthority(const char* spec,
Component* hostname,
Component* port_num);
COMPONENT_EXPORT(URL)
-void ParseAuthority(const base::char16* spec,
+void ParseAuthority(const char16_t* spec,
const Component& auth,
Component* username,
Component* password,
@@ -323,7 +322,7 @@ void ParseAuthority(const base::char16* spec,
enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 };
COMPONENT_EXPORT(URL) int ParsePort(const char* url, const Component& port);
COMPONENT_EXPORT(URL)
-int ParsePort(const base::char16* url, const Component& port);
+int ParsePort(const char16_t* url, const Component& port);
// Extracts the range of the file name in the given url. The path must
// already have been computed by the parse function, and the matching URL
@@ -340,7 +339,7 @@ void ExtractFileName(const char* url,
const Component& path,
Component* file_name);
COMPONENT_EXPORT(URL)
-void ExtractFileName(const base::char16* url,
+void ExtractFileName(const char16_t* url,
const Component& path,
Component* file_name);
@@ -365,7 +364,7 @@ bool ExtractQueryKeyValue(const char* url,
Component* key,
Component* value);
COMPONENT_EXPORT(URL)
-bool ExtractQueryKeyValue(const base::char16* url,
+bool ExtractQueryKeyValue(const char16_t* url,
Component* query,
Component* key,
Component* value);
diff --git a/chromium/url/url_canon.cc b/chromium/url/url_canon.cc
index cde280d2be3..6cfdd23aed8 100644
--- a/chromium/url/url_canon.cc
+++ b/chromium/url/url_canon.cc
@@ -10,6 +10,6 @@ namespace url {
template class EXPORT_TEMPLATE_DEFINE(COMPONENT_EXPORT(URL)) CanonOutputT<char>;
template class EXPORT_TEMPLATE_DEFINE(COMPONENT_EXPORT(URL))
- CanonOutputT<base::char16>;
+ CanonOutputT<char16_t>;
} // namespace url
diff --git a/chromium/url/url_canon.h b/chromium/url/url_canon.h
index b6d7533d0a7..83d4e016caf 100644
--- a/chromium/url/url_canon.h
+++ b/chromium/url/url_canon.h
@@ -8,9 +8,10 @@
#include <stdlib.h>
#include <string.h>
+#include <string>
+
#include "base/component_export.h"
#include "base/export_template.h"
-#include "base/strings/string16.h"
#include "url/third_party/mozilla/url_parse.h"
namespace url {
@@ -178,18 +179,18 @@ class RawCanonOutputT : public CanonOutputT<T> {
extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL))
CanonOutputT<char>;
extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL))
- CanonOutputT<base::char16>;
+ CanonOutputT<char16_t>;
// Normally, all canonicalization output is in narrow characters. We support
// the templates so it can also be used internally if a wide buffer is
// required.
typedef CanonOutputT<char> CanonOutput;
-typedef CanonOutputT<base::char16> CanonOutputW;
+typedef CanonOutputT<char16_t> CanonOutputW;
template<int fixed_capacity>
class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {};
-template<int fixed_capacity>
-class RawCanonOutputW : public RawCanonOutputT<base::char16, fixed_capacity> {};
+template <int fixed_capacity>
+class RawCanonOutputW : public RawCanonOutputT<char16_t, fixed_capacity> {};
// Character set converter ----------------------------------------------------
//
@@ -215,7 +216,7 @@ class COMPONENT_EXPORT(URL) CharsetConverter {
// decimal, (such as "&#20320;") with escaping of the ampersand, number
// sign, and semicolon (in the previous example it would be
// "%26%2320320%3B"). This rule is based on what IE does in this situation.
- virtual void ConvertFromUTF16(const base::char16* input,
+ virtual void ConvertFromUTF16(const char16_t* input,
int input_len,
CanonOutput* output) = 0;
};
@@ -273,11 +274,11 @@ const char* RemoveURLWhitespace(const char* input,
int* output_len,
bool* potentially_dangling_markup);
COMPONENT_EXPORT(URL)
-const base::char16* RemoveURLWhitespace(const base::char16* input,
- int input_len,
- CanonOutputT<base::char16>* buffer,
- int* output_len,
- bool* potentially_dangling_markup);
+const char16_t* RemoveURLWhitespace(const char16_t* input,
+ int input_len,
+ CanonOutputT<char16_t>* buffer,
+ int* output_len,
+ bool* potentially_dangling_markup);
// IDN ------------------------------------------------------------------------
@@ -291,7 +292,7 @@ const base::char16* RemoveURLWhitespace(const base::char16* input,
//
// On error, returns false. The output in this case is undefined.
COMPONENT_EXPORT(URL)
-bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output);
+bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output);
// Piece-by-piece canonicalizers ----------------------------------------------
//
@@ -323,7 +324,7 @@ bool CanonicalizeScheme(const char* spec,
CanonOutput* output,
Component* out_scheme);
COMPONENT_EXPORT(URL)
-bool CanonicalizeScheme(const base::char16* spec,
+bool CanonicalizeScheme(const char16_t* spec,
const Component& scheme,
CanonOutput* output,
Component* out_scheme);
@@ -347,9 +348,9 @@ bool CanonicalizeUserInfo(const char* username_source,
Component* out_username,
Component* out_password);
COMPONENT_EXPORT(URL)
-bool CanonicalizeUserInfo(const base::char16* username_source,
+bool CanonicalizeUserInfo(const char16_t* username_source,
const Component& username,
- const base::char16* password_source,
+ const char16_t* password_source,
const Component& password,
CanonOutput* output,
Component* out_username,
@@ -411,7 +412,7 @@ bool CanonicalizeHost(const char* spec,
CanonOutput* output,
Component* out_host);
COMPONENT_EXPORT(URL)
-bool CanonicalizeHost(const base::char16* spec,
+bool CanonicalizeHost(const char16_t* spec,
const Component& host,
CanonOutput* output,
Component* out_host);
@@ -426,7 +427,7 @@ void CanonicalizeHostVerbose(const char* spec,
CanonOutput* output,
CanonHostInfo* host_info);
COMPONENT_EXPORT(URL)
-void CanonicalizeHostVerbose(const base::char16* spec,
+void CanonicalizeHostVerbose(const char16_t* spec,
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info);
@@ -456,7 +457,7 @@ bool CanonicalizeHostSubstring(const char* spec,
const Component& host,
CanonOutput* output);
COMPONENT_EXPORT(URL)
-bool CanonicalizeHostSubstring(const base::char16* spec,
+bool CanonicalizeHostSubstring(const char16_t* spec,
const Component& host,
CanonOutput* output);
@@ -476,7 +477,7 @@ void CanonicalizeIPAddress(const char* spec,
CanonOutput* output,
CanonHostInfo* host_info);
COMPONENT_EXPORT(URL)
-void CanonicalizeIPAddress(const base::char16* spec,
+void CanonicalizeIPAddress(const char16_t* spec,
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info);
@@ -493,7 +494,7 @@ bool CanonicalizePort(const char* spec,
CanonOutput* output,
Component* out_port);
COMPONENT_EXPORT(URL)
-bool CanonicalizePort(const base::char16* spec,
+bool CanonicalizePort(const char16_t* spec,
const Component& port,
int default_port_for_scheme,
CanonOutput* output,
@@ -519,11 +520,24 @@ bool CanonicalizePath(const char* spec,
CanonOutput* output,
Component* out_path);
COMPONENT_EXPORT(URL)
-bool CanonicalizePath(const base::char16* spec,
+bool CanonicalizePath(const char16_t* spec,
const Component& path,
CanonOutput* output,
Component* out_path);
+// Like CanonicalizePath(), but does not assume that its operating on the
+// entire path. It therefore does not prepend a slash, etc.
+COMPONENT_EXPORT(URL)
+bool CanonicalizePartialPath(const char* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePartialPath(const char16_t* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
+
// Canonicalizes the input as a file path. This is like CanonicalizePath except
// that it also handles Windows drive specs. For example, the path can begin
// with "c|\" and it will get properly canonicalized to "C:/".
@@ -536,7 +550,7 @@ bool FileCanonicalizePath(const char* spec,
CanonOutput* output,
Component* out_path);
COMPONENT_EXPORT(URL)
-bool FileCanonicalizePath(const base::char16* spec,
+bool FileCanonicalizePath(const char16_t* spec,
const Component& path,
CanonOutput* output,
Component* out_path);
@@ -560,7 +574,7 @@ void CanonicalizeQuery(const char* spec,
CanonOutput* output,
Component* out_query);
COMPONENT_EXPORT(URL)
-void CanonicalizeQuery(const base::char16* spec,
+void CanonicalizeQuery(const char16_t* spec,
const Component& query,
CharsetConverter* converter,
CanonOutput* output,
@@ -578,7 +592,7 @@ void CanonicalizeRef(const char* spec,
CanonOutput* output,
Component* out_path);
COMPONENT_EXPORT(URL)
-void CanonicalizeRef(const base::char16* spec,
+void CanonicalizeRef(const char16_t* spec,
const Component& path,
CanonOutput* output,
Component* out_path);
@@ -603,7 +617,7 @@ bool CanonicalizeStandardURL(const char* spec,
CanonOutput* output,
Parsed* new_parsed);
COMPONENT_EXPORT(URL)
-bool CanonicalizeStandardURL(const base::char16* spec,
+bool CanonicalizeStandardURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
SchemeType scheme_type,
@@ -620,7 +634,7 @@ bool CanonicalizeFileURL(const char* spec,
CanonOutput* output,
Parsed* new_parsed);
COMPONENT_EXPORT(URL)
-bool CanonicalizeFileURL(const base::char16* spec,
+bool CanonicalizeFileURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CharsetConverter* query_converter,
@@ -636,7 +650,7 @@ bool CanonicalizeFileSystemURL(const char* spec,
CanonOutput* output,
Parsed* new_parsed);
COMPONENT_EXPORT(URL)
-bool CanonicalizeFileSystemURL(const base::char16* spec,
+bool CanonicalizeFileSystemURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CharsetConverter* query_converter,
@@ -652,12 +666,25 @@ bool CanonicalizePathURL(const char* spec,
CanonOutput* output,
Parsed* new_parsed);
COMPONENT_EXPORT(URL)
-bool CanonicalizePathURL(const base::char16* spec,
+bool CanonicalizePathURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CanonOutput* output,
Parsed* new_parsed);
+// Use to canonicalize just the path component of a "path" URL; e.g. the
+// path of a javascript URL.
+COMPONENT_EXPORT(URL)
+void CanonicalizePathURLPath(const char* source,
+ const Component& component,
+ CanonOutput* output,
+ Component* new_component);
+COMPONENT_EXPORT(URL)
+void CanonicalizePathURLPath(const char16_t* source,
+ const Component& component,
+ CanonOutput* output,
+ Component* new_component);
+
// Use for mailto URLs. This "canonicalizes" the URL into a path and query
// component. It does not attempt to merge "to" fields. It uses UTF-8 for
// the query encoding if there is a query. This is because a mailto URL is
@@ -670,7 +697,7 @@ bool CanonicalizeMailtoURL(const char* spec,
CanonOutput* output,
Parsed* new_parsed);
COMPONENT_EXPORT(URL)
-bool CanonicalizeMailtoURL(const base::char16* spec,
+bool CanonicalizeMailtoURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CanonOutput* output,
@@ -869,7 +896,7 @@ bool ReplaceStandardURL(const char* base,
COMPONENT_EXPORT(URL)
bool ReplaceStandardURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
@@ -887,7 +914,7 @@ bool ReplaceFileSystemURL(const char* base,
COMPONENT_EXPORT(URL)
bool ReplaceFileSystemURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed);
@@ -904,7 +931,7 @@ bool ReplaceFileURL(const char* base,
COMPONENT_EXPORT(URL)
bool ReplaceFileURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed);
@@ -920,7 +947,7 @@ bool ReplacePathURL(const char* base,
COMPONENT_EXPORT(URL)
bool ReplacePathURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CanonOutput* output,
Parsed* new_parsed);
@@ -935,7 +962,7 @@ bool ReplaceMailtoURL(const char* base,
COMPONENT_EXPORT(URL)
bool ReplaceMailtoURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CanonOutput* output,
Parsed* new_parsed);
@@ -963,7 +990,7 @@ bool IsRelativeURL(const char* base,
COMPONENT_EXPORT(URL)
bool IsRelativeURL(const char* base,
const Parsed& base_parsed,
- const base::char16* fragment,
+ const char16_t* fragment,
int fragment_len,
bool is_base_hierarchical,
bool* is_relative,
@@ -1000,7 +1027,7 @@ COMPONENT_EXPORT(URL)
bool ResolveRelativeURL(const char* base_url,
const Parsed& base_parsed,
bool base_is_file,
- const base::char16* relative_url,
+ const char16_t* relative_url,
const Component& relative_component,
CharsetConverter* query_converter,
CanonOutput* output,
diff --git a/chromium/url/url_canon_etc.cc b/chromium/url/url_canon_etc.cc
index 1ca9193ba71..7ddf1621026 100644
--- a/chromium/url/url_canon_etc.cc
+++ b/chromium/url/url_canon_etc.cc
@@ -329,16 +329,16 @@ const char* RemoveURLWhitespace(const char* input,
potentially_dangling_markup);
}
-const base::char16* RemoveURLWhitespace(const base::char16* input,
- int input_len,
- CanonOutputT<base::char16>* buffer,
- int* output_len,
- bool* potentially_dangling_markup) {
+const char16_t* RemoveURLWhitespace(const char16_t* input,
+ int input_len,
+ CanonOutputT<char16_t>* buffer,
+ int* output_len,
+ bool* potentially_dangling_markup) {
return DoRemoveURLWhitespace(input, input_len, buffer, output_len,
potentially_dangling_markup);
}
-char CanonicalSchemeChar(base::char16 ch) {
+char CanonicalSchemeChar(char16_t ch) {
if (ch >= 0x80)
return 0; // Non-ASCII is not supported by schemes.
return kSchemeCanonical[ch];
@@ -351,11 +351,11 @@ bool CanonicalizeScheme(const char* spec,
return DoScheme<char, unsigned char>(spec, scheme, output, out_scheme);
}
-bool CanonicalizeScheme(const base::char16* spec,
+bool CanonicalizeScheme(const char16_t* spec,
const Component& scheme,
CanonOutput* output,
Component* out_scheme) {
- return DoScheme<base::char16, base::char16>(spec, scheme, output, out_scheme);
+ return DoScheme<char16_t, char16_t>(spec, scheme, output, out_scheme);
}
bool CanonicalizeUserInfo(const char* username_source,
@@ -370,16 +370,16 @@ bool CanonicalizeUserInfo(const char* username_source,
output, out_username, out_password);
}
-bool CanonicalizeUserInfo(const base::char16* username_source,
+bool CanonicalizeUserInfo(const char16_t* username_source,
const Component& username,
- const base::char16* password_source,
+ const char16_t* password_source,
const Component& password,
CanonOutput* output,
Component* out_username,
Component* out_password) {
- return DoUserInfo<base::char16, base::char16>(
- username_source, username, password_source, password,
- output, out_username, out_password);
+ return DoUserInfo<char16_t, char16_t>(username_source, username,
+ password_source, password, output,
+ out_username, out_password);
}
bool CanonicalizePort(const char* spec,
@@ -392,13 +392,13 @@ bool CanonicalizePort(const char* spec,
output, out_port);
}
-bool CanonicalizePort(const base::char16* spec,
+bool CanonicalizePort(const char16_t* spec,
const Component& port,
int default_port_for_scheme,
CanonOutput* output,
Component* out_port) {
- return DoPort<base::char16, base::char16>(spec, port, default_port_for_scheme,
- output, out_port);
+ return DoPort<char16_t, char16_t>(spec, port, default_port_for_scheme, output,
+ out_port);
}
void CanonicalizeRef(const char* spec,
@@ -408,11 +408,11 @@ void CanonicalizeRef(const char* spec,
DoCanonicalizeRef<char, unsigned char>(spec, ref, output, out_ref);
}
-void CanonicalizeRef(const base::char16* spec,
+void CanonicalizeRef(const char16_t* spec,
const Component& ref,
CanonOutput* output,
Component* out_ref) {
- DoCanonicalizeRef<base::char16, base::char16>(spec, ref, output, out_ref);
+ DoCanonicalizeRef<char16_t, char16_t>(spec, ref, output, out_ref);
}
} // namespace url
diff --git a/chromium/url/url_canon_filesystemurl.cc b/chromium/url/url_canon_filesystemurl.cc
index 2cfaa29eafb..b36198a6bef 100644
--- a/chromium/url/url_canon_filesystemurl.cc
+++ b/chromium/url/url_canon_filesystemurl.cc
@@ -94,14 +94,14 @@ bool CanonicalizeFileSystemURL(const char* spec,
new_parsed);
}
-bool CanonicalizeFileSystemURL(const base::char16* spec,
+bool CanonicalizeFileSystemURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizeFileSystemURL<base::char16, base::char16>(
- spec, URLComponentSource<base::char16>(spec), parsed, charset_converter,
+ return DoCanonicalizeFileSystemURL<char16_t, char16_t>(
+ spec, URLComponentSource<char16_t>(spec), parsed, charset_converter,
output, new_parsed);
}
@@ -120,7 +120,7 @@ bool ReplaceFileSystemURL(const char* base,
bool ReplaceFileSystemURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* new_parsed) {
diff --git a/chromium/url/url_canon_fileurl.cc b/chromium/url/url_canon_fileurl.cc
index 067ed58c51a..2aa582470ba 100644
--- a/chromium/url/url_canon_fileurl.cc
+++ b/chromium/url/url_canon_fileurl.cc
@@ -133,15 +133,15 @@ bool CanonicalizeFileURL(const char* spec,
output, new_parsed);
}
-bool CanonicalizeFileURL(const base::char16* spec,
+bool CanonicalizeFileURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizeFileURL<base::char16, base::char16>(
- URLComponentSource<base::char16>(spec), parsed, query_converter,
- output, new_parsed);
+ return DoCanonicalizeFileURL<char16_t, char16_t>(
+ URLComponentSource<char16_t>(spec), parsed, query_converter, output,
+ new_parsed);
}
bool FileCanonicalizePath(const char* spec,
@@ -152,12 +152,12 @@ bool FileCanonicalizePath(const char* spec,
output, out_path);
}
-bool FileCanonicalizePath(const base::char16* spec,
+bool FileCanonicalizePath(const char16_t* spec,
const Component& path,
CanonOutput* output,
Component* out_path) {
- return DoFileCanonicalizePath<base::char16, base::char16>(spec, path,
- output, out_path);
+ return DoFileCanonicalizePath<char16_t, char16_t>(spec, path, output,
+ out_path);
}
bool ReplaceFileURL(const char* base,
@@ -175,7 +175,7 @@ bool ReplaceFileURL(const char* base,
bool ReplaceFileURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
diff --git a/chromium/url/url_canon_host.cc b/chromium/url/url_canon_host.cc
index 819a7831fe0..0d178c714a4 100644
--- a/chromium/url/url_canon_host.cc
+++ b/chromium/url/url_canon_host.cc
@@ -3,8 +3,10 @@
// found in the LICENSE file.
#include "base/check.h"
+#include "base/metrics/histogram_macros.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
+#include "url/url_canon_ip.h"
namespace url {
@@ -80,7 +82,7 @@ constexpr int kMaxHostBufferLength = kMaxHostLength*5;
const int kTempHostBufferLen = 1024;
typedef RawCanonOutputT<char, kTempHostBufferLen> StackBuffer;
-typedef RawCanonOutputT<base::char16, kTempHostBufferLen> StackBufferW;
+typedef RawCanonOutputT<char16_t, kTempHostBufferLen> StackBufferW;
// Scans a host name and fills in the output flags according to what we find.
// |has_non_ascii| will be true if there are any non-7-bit characters, and
@@ -174,7 +176,7 @@ bool DoSimpleHost(const INCHAR* host,
}
// Canonicalizes a host that requires IDN conversion. Returns true on success
-bool DoIDNHost(const base::char16* src, int src_len, CanonOutput* output) {
+bool DoIDNHost(const char16_t* src, int src_len, CanonOutput* output) {
int original_output_len = output->length(); // So we can rewind below.
// We need to escape URL before doing IDN conversion, since punicode strings
@@ -296,8 +298,11 @@ bool DoComplexHost(const char* host, int host_len,
// UTF-16 convert host to its ASCII version. The set up is already ready for
// the backend, so we just pass through. The has_escaped flag should be set if
// the input string requires unescaping.
-bool DoComplexHost(const base::char16* host, int host_len,
- bool has_non_ascii, bool has_escaped, CanonOutput* output) {
+bool DoComplexHost(const char16_t* host,
+ int host_len,
+ bool has_non_ascii,
+ bool has_escaped,
+ CanonOutput* output) {
if (has_escaped) {
// Yikes, we have escaped characters with wide input. The escaped
// characters should be interpreted as UTF-8. To solve this problem,
@@ -374,6 +379,16 @@ void DoHost(const CHAR* spec,
if (host_info->IsIPAddress()) {
output->set_length(output_begin);
output->Append(canon_ip.data(), canon_ip.length());
+ } else if (host_info->family == CanonHostInfo::NEUTRAL) {
+ // Only need to call CheckHostnameSafety() for valid hosts that aren't IP
+ // addresses and aren't broken.
+ HostSafetyStatus host_safety_status = CheckHostnameSafety(spec, host);
+ // Don't record kOK. Ratio of OK to not-OK statuses is not meaningful at
+ // this layer, and hostnames are canonicalized a lot.
+ if (host_safety_status != HostSafetyStatus::kOk) {
+ UMA_HISTOGRAM_ENUMERATION("Net.Url.HostSafetyStatus",
+ host_safety_status);
+ }
}
} else {
// Canonicalization failed. Set BROKEN to notify the caller.
@@ -395,12 +410,12 @@ bool CanonicalizeHost(const char* spec,
return (host_info.family != CanonHostInfo::BROKEN);
}
-bool CanonicalizeHost(const base::char16* spec,
+bool CanonicalizeHost(const char16_t* spec,
const Component& host,
CanonOutput* output,
Component* out_host) {
CanonHostInfo host_info;
- DoHost<base::char16, base::char16>(spec, host, output, &host_info);
+ DoHost<char16_t, char16_t>(spec, host, output, &host_info);
*out_host = host_info.out_host;
return (host_info.family != CanonHostInfo::BROKEN);
}
@@ -412,11 +427,11 @@ void CanonicalizeHostVerbose(const char* spec,
DoHost<char, unsigned char>(spec, host, output, host_info);
}
-void CanonicalizeHostVerbose(const base::char16* spec,
+void CanonicalizeHostVerbose(const char16_t* spec,
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info) {
- DoHost<base::char16, base::char16>(spec, host, output, host_info);
+ DoHost<char16_t, char16_t>(spec, host, output, host_info);
}
bool CanonicalizeHostSubstring(const char* spec,
@@ -425,10 +440,10 @@ bool CanonicalizeHostSubstring(const char* spec,
return DoHostSubstring<char, unsigned char>(spec, host, output);
}
-bool CanonicalizeHostSubstring(const base::char16* spec,
+bool CanonicalizeHostSubstring(const char16_t* spec,
const Component& host,
CanonOutput* output) {
- return DoHostSubstring<base::char16, base::char16>(spec, host, output);
+ return DoHostSubstring<char16_t, char16_t>(spec, host, output);
}
} // namespace url
diff --git a/chromium/url/url_canon_icu.cc b/chromium/url/url_canon_icu.cc
index a5a54e41c43..f1e05fdb5ea 100644
--- a/chromium/url/url_canon_icu.cc
+++ b/chromium/url/url_canon_icu.cc
@@ -9,7 +9,6 @@
#include <string.h>
#include "base/check.h"
-#include "base/i18n/uchar.h"
#include "third_party/icu/source/common/unicode/ucnv.h"
#include "third_party/icu/source/common/unicode/ucnv_cb.h"
#include "third_party/icu/source/common/unicode/utypes.h"
@@ -81,7 +80,7 @@ ICUCharsetConverter::ICUCharsetConverter(UConverter* converter)
ICUCharsetConverter::~ICUCharsetConverter() = default;
-void ICUCharsetConverter::ConvertFromUTF16(const base::char16* input,
+void ICUCharsetConverter::ConvertFromUTF16(const char16_t* input,
int input_len,
CanonOutput* output) {
// Install our error handler. It will be called for character that can not
@@ -95,9 +94,8 @@ void ICUCharsetConverter::ConvertFromUTF16(const base::char16* input,
do {
UErrorCode err = U_ZERO_ERROR;
char* dest = &output->data()[begin_offset];
- int required_capacity =
- ucnv_fromUChars(converter_, dest, dest_capacity,
- base::i18n::ToUCharPtr(input), input_len, &err);
+ int required_capacity = ucnv_fromUChars(converter_, dest, dest_capacity,
+ input, input_len, &err);
if (err != U_BUFFER_OVERFLOW_ERROR) {
output->set_length(begin_offset + required_capacity);
return;
diff --git a/chromium/url/url_canon_icu.h b/chromium/url/url_canon_icu.h
index 367715119d2..e3b9aa0e3cd 100644
--- a/chromium/url/url_canon_icu.h
+++ b/chromium/url/url_canon_icu.h
@@ -26,7 +26,7 @@ class COMPONENT_EXPORT(URL) ICUCharsetConverter : public CharsetConverter {
~ICUCharsetConverter() override;
- void ConvertFromUTF16(const base::char16* input,
+ void ConvertFromUTF16(const char16_t* input,
int input_len,
CanonOutput* output) override;
diff --git a/chromium/url/url_canon_icu_unittest.cc b/chromium/url/url_canon_icu_unittest.cc
index 4ce31d4eb2d..5e1fcf5e539 100644
--- a/chromium/url/url_canon_icu_unittest.cc
+++ b/chromium/url/url_canon_icu_unittest.cc
@@ -67,7 +67,7 @@ TEST(URLCanonIcuTest, ICUCharsetConverter) {
std::string str;
StdStringCanonOutput output(&str);
- base::string16 input_str(
+ std::u16string input_str(
test_utils::TruncateWStringToUTF16(icu_cases[i].input));
int input_len = static_cast<int>(input_str.length());
converter.ConvertFromUTF16(input_str.c_str(), input_len, &output);
@@ -84,7 +84,7 @@ TEST(URLCanonIcuTest, ICUCharsetConverter) {
ICUCharsetConverter converter(conv.converter());
for (int i = static_size - 2; i <= static_size + 2; i++) {
// Make a string with the appropriate length.
- base::string16 input;
+ std::u16string input;
for (int ch = 0; ch < i; ch++)
input.push_back('a');
@@ -138,7 +138,7 @@ TEST(URLCanonIcuTest, QueryWithConverter) {
}
if (query_cases[i].input16) {
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(query_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
diff --git a/chromium/url/url_canon_internal.cc b/chromium/url/url_canon_internal.cc
index a727ca2b271..03e1ad1a955 100644
--- a/chromium/url/url_canon_internal.cc
+++ b/chromium/url/url_canon_internal.cc
@@ -85,7 +85,7 @@ void DoOverrideComponent(const char* override_source,
// may get resized while we're overriding a subsequent component. Instead, the
// caller should use the beginning of the |utf8_buffer| as the string pointer
// for all components once all overrides have been prepared.
-bool PrepareUTF16OverrideComponent(const base::char16* override_source,
+bool PrepareUTF16OverrideComponent(const char16_t* override_source,
const Component& override_component,
CanonOutput* utf8_buffer,
Component* dest_component) {
@@ -233,7 +233,7 @@ const char kCharToHexLookup[8] = {
0, // 0xE0 - 0xFF
};
-const base::char16 kUnicodeReplacementCharacter = 0xfffd;
+const char16_t kUnicodeReplacementCharacter = 0xfffd;
void AppendStringOfType(const char* source, int length,
SharedCharTypes type,
@@ -241,11 +241,11 @@ void AppendStringOfType(const char* source, int length,
DoAppendStringOfType<char, unsigned char>(source, length, type, output);
}
-void AppendStringOfType(const base::char16* source, int length,
+void AppendStringOfType(const char16_t* source,
+ int length,
SharedCharTypes type,
CanonOutput* output) {
- DoAppendStringOfType<base::char16, base::char16>(
- source, length, type, output);
+ DoAppendStringOfType<char16_t, char16_t>(source, length, type, output);
}
bool ReadUTFChar(const char* str, int* begin, int length,
@@ -261,7 +261,9 @@ bool ReadUTFChar(const char* str, int* begin, int length,
return true;
}
-bool ReadUTFChar(const base::char16* str, int* begin, int length,
+bool ReadUTFChar(const char16_t* str,
+ int* begin,
+ int length,
unsigned* code_point_out) {
// This depends on ints and int32s being the same thing. If they're not, it
// will fail to compile.
@@ -279,13 +281,15 @@ void AppendInvalidNarrowString(const char* spec, int begin, int end,
DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
}
-void AppendInvalidNarrowString(const base::char16* spec, int begin, int end,
+void AppendInvalidNarrowString(const char16_t* spec,
+ int begin,
+ int end,
CanonOutput* output) {
- DoAppendInvalidNarrowString<base::char16, base::char16>(
- spec, begin, end, output);
+ DoAppendInvalidNarrowString<char16_t, char16_t>(spec, begin, end, output);
}
-bool ConvertUTF16ToUTF8(const base::char16* input, int input_len,
+bool ConvertUTF16ToUTF8(const char16_t* input,
+ int input_len,
CanonOutput* output) {
bool success = true;
for (int i = 0; i < input_len; i++) {
@@ -296,8 +300,9 @@ bool ConvertUTF16ToUTF8(const base::char16* input, int input_len,
return success;
}
-bool ConvertUTF8ToUTF16(const char* input, int input_len,
- CanonOutputT<base::char16>* output) {
+bool ConvertUTF8ToUTF16(const char* input,
+ int input_len,
+ CanonOutputT<char16_t>* output) {
bool success = true;
for (int i = 0; i < input_len; i++) {
unsigned code_point;
@@ -339,14 +344,14 @@ void SetupOverrideComponents(const char* base,
}
bool SetupUTF16OverrideComponents(const char* base,
- const Replacements<base::char16>& repl,
+ const Replacements<char16_t>& repl,
CanonOutput* utf8_buffer,
URLComponentSource<char>* source,
Parsed* parsed) {
bool success = true;
// Get the source and parsed structures of the things we are replacing.
- const URLComponentSource<base::char16>& repl_source = repl.sources();
+ const URLComponentSource<char16_t>& repl_source = repl.sources();
const Parsed& repl_parsed = repl.components();
success &= PrepareUTF16OverrideComponent(
@@ -408,7 +413,7 @@ int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix) {
return 0;
}
-int _itow_s(int value, base::char16* buffer, size_t size_in_chars, int radix) {
+int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix) {
if (radix != 10)
return EINVAL;
@@ -422,7 +427,7 @@ int _itow_s(int value, base::char16* buffer, size_t size_in_chars, int radix) {
}
for (int i = 0; i < written; ++i) {
- buffer[i] = static_cast<base::char16>(temp[i]);
+ buffer[i] = char16_t{temp[i]};
}
buffer[written] = '\0';
return 0;
diff --git a/chromium/url/url_canon_internal.h b/chromium/url/url_canon_internal.h
index 53ae8dd3ffc..5ace5f2ce73 100644
--- a/chromium/url/url_canon_internal.h
+++ b/chromium/url/url_canon_internal.h
@@ -79,7 +79,8 @@ inline bool IsComponentChar(unsigned char c) {
void AppendStringOfType(const char* source, int length,
SharedCharTypes type,
CanonOutput* output);
-void AppendStringOfType(const base::char16* source, int length,
+void AppendStringOfType(const char16_t* source,
+ int length,
SharedCharTypes type,
CanonOutput* output);
@@ -123,7 +124,7 @@ inline int IsDot(const CHAR* spec, int offset, int end) {
// required for relative URL resolving to test for scheme equality.
//
// Returns 0 if the input character is not a valid scheme character.
-char CanonicalSchemeChar(base::char16 ch);
+char CanonicalSchemeChar(char16_t ch);
// Write a single character, escaped, to the output. This always escapes: it
// does no checking that thee character requires escaping.
@@ -138,7 +139,7 @@ inline void AppendEscapedChar(UINCHAR ch,
}
// The character we'll substitute for undecodable or invalid characters.
-extern const base::char16 kUnicodeReplacementCharacter;
+extern const char16_t kUnicodeReplacementCharacter;
// UTF-8 functions ------------------------------------------------------------
@@ -229,19 +230,19 @@ inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) {
// can be incremented in a loop and will be ready for the next character.
// (for a single-16-bit-word character, it will not be changed).
COMPONENT_EXPORT(URL)
-bool ReadUTFChar(const base::char16* str,
+bool ReadUTFChar(const char16_t* str,
int* begin,
int length,
unsigned* code_point_out);
// Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
inline void AppendUTF16Value(unsigned code_point,
- CanonOutputT<base::char16>* output) {
+ CanonOutputT<char16_t>* output) {
if (code_point > 0xffff) {
- output->push_back(static_cast<base::char16>((code_point >> 10) + 0xd7c0));
- output->push_back(static_cast<base::char16>((code_point & 0x3ff) | 0xdc00));
+ output->push_back(static_cast<char16_t>((code_point >> 10) + 0xd7c0));
+ output->push_back(static_cast<char16_t>((code_point & 0x3ff) | 0xdc00));
} else {
- output->push_back(static_cast<base::char16>(code_point));
+ output->push_back(static_cast<char16_t>(code_point));
}
}
@@ -266,8 +267,10 @@ inline void AppendUTF16Value(unsigned code_point,
//
// Assumes that ch[begin] is within range in the array, but does not assume
// that any following characters are.
-inline bool AppendUTF8EscapedChar(const base::char16* str, int* begin,
- int length, CanonOutput* output) {
+inline bool AppendUTF8EscapedChar(const char16_t* str,
+ int* begin,
+ int length,
+ CanonOutput* output) {
// UTF-16 input. ReadUTFChar will handle invalid characters for us and give
// us the kUnicodeReplacementCharacter, so we don't have to do special
// checking after failure, just pass through the failure to the caller.
@@ -301,7 +304,7 @@ inline bool AppendUTF8EscapedChar(const char* str, int* begin, int length,
inline bool Is8BitChar(char c) {
return true; // this case is specialized to avoid a warning
}
-inline bool Is8BitChar(base::char16 c) {
+inline bool Is8BitChar(char16_t c) {
return c <= 255;
}
@@ -337,7 +340,9 @@ inline bool DecodeEscaped(const CHAR* spec, int* begin, int end,
// the escaping rules are not guaranteed!
void AppendInvalidNarrowString(const char* spec, int begin, int end,
CanonOutput* output);
-void AppendInvalidNarrowString(const base::char16* spec, int begin, int end,
+void AppendInvalidNarrowString(const char16_t* spec,
+ int begin,
+ int end,
CanonOutput* output);
// Misc canonicalization helpers ----------------------------------------------
@@ -351,17 +356,17 @@ void AppendInvalidNarrowString(const base::char16* spec, int begin, int end,
// return false in the failure case, and the caller should not continue as
// normal.
COMPONENT_EXPORT(URL)
-bool ConvertUTF16ToUTF8(const base::char16* input,
+bool ConvertUTF16ToUTF8(const char16_t* input,
int input_len,
CanonOutput* output);
COMPONENT_EXPORT(URL)
bool ConvertUTF8ToUTF16(const char* input,
int input_len,
- CanonOutputT<base::char16>* output);
+ CanonOutputT<char16_t>* output);
// Converts from UTF-16 to 8-bit using the character set converter. If the
// converter is NULL, this will use UTF-8.
-void ConvertUTF16ToQueryEncoding(const base::char16* input,
+void ConvertUTF16ToQueryEncoding(const char16_t* input,
const Component& query,
CharsetConverter* converter,
CanonOutput* output);
@@ -397,21 +402,21 @@ void SetupOverrideComponents(const char* base,
// although we will have still done the override with "invalid characters" in
// place of errors.
bool SetupUTF16OverrideComponents(const char* base,
- const Replacements<base::char16>& repl,
+ const Replacements<char16_t>& repl,
CanonOutput* utf8_buffer,
URLComponentSource<char>* source,
Parsed* parsed);
// Implemented in url_canon_path.cc, these are required by the relative URL
// resolver as well, so we declare them here.
-bool CanonicalizePartialPath(const char* spec,
- const Component& path,
- int path_begin_in_output,
- CanonOutput* output);
-bool CanonicalizePartialPath(const base::char16* spec,
- const Component& path,
- int path_begin_in_output,
- CanonOutput* output);
+bool CanonicalizePartialPathInternal(const char* spec,
+ const Component& path,
+ int path_begin_in_output,
+ CanonOutput* output);
+bool CanonicalizePartialPathInternal(const char16_t* spec,
+ const Component& path,
+ int path_begin_in_output,
+ CanonOutput* output);
#ifndef WIN32
@@ -419,7 +424,7 @@ bool CanonicalizePartialPath(const base::char16* spec,
COMPONENT_EXPORT(URL)
int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix);
COMPONENT_EXPORT(URL)
-int _itow_s(int value, base::char16* buffer, size_t size_in_chars, int radix);
+int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix);
// Secure template overloads for these functions
template<size_t N>
@@ -427,8 +432,8 @@ inline int _itoa_s(int value, char (&buffer)[N], int radix) {
return _itoa_s(value, buffer, N, radix);
}
-template<size_t N>
-inline int _itow_s(int value, base::char16 (&buffer)[N], int radix) {
+template <size_t N>
+inline int _itow_s(int value, char16_t (&buffer)[N], int radix) {
return _itow_s(value, buffer, N, radix);
}
diff --git a/chromium/url/url_canon_ip.cc b/chromium/url/url_canon_ip.cc
index 1859119f687..10e776d9e78 100644
--- a/chromium/url/url_canon_ip.cc
+++ b/chromium/url/url_canon_ip.cc
@@ -9,6 +9,8 @@
#include <limits>
#include "base/check.h"
+#include "base/strings/string_piece.h"
+#include "base/strings/string_util.h"
#include "url/url_canon_internal.h"
namespace url {
@@ -593,6 +595,105 @@ bool DoCanonicalizeIPv6Address(const CHAR* spec,
return true;
}
+// Method to check if something looks like a number. Used instead of
+// IPv4ComponentToNumber() so that it counts things that look like bad base-8
+// (e.g. 09).
+//
+// TODO(https://crbug.com/1149194): Remove this once the bug is fixed.
+template <typename CHAR>
+bool LooksLikeANumber(const CHAR* spec, const Component& component) {
+ // Empty components don't look like numbers.
+ if (!component.is_nonempty())
+ return false;
+
+ SharedCharTypes base = CHAR_DEC;
+ size_t start = component.begin;
+ if (component.len >= 2 && spec[start] == '0' &&
+ (spec[start + 1] == 'x' || spec[start + 1] == 'X')) {
+ base = CHAR_HEX;
+ start += 2;
+ }
+ for (int i = start; i < component.end(); i++) {
+ if (!IsCharOfType(spec[i], base))
+ return false;
+ }
+ return true;
+}
+
+// Calculates the "HostSafetyStatus" of the provided hostname.
+//
+// TODO(https://crbug.com/1149194): Remove this once the bug is fixed.
+template <typename CHAR>
+HostSafetyStatus DoCheckHostnameSafety(const CHAR* spec,
+ const Component& host) {
+ if (!host.is_nonempty())
+ return HostSafetyStatus::kOk;
+
+ // Find the last two components.
+
+ // Number of identified components. Stops after second component. Does not
+ // include the empty terminal component, if the host ends with a dot.
+ int existing_components = 0;
+ // Parsed component values. Populated last component first.
+ Component components[2];
+
+ // Index of the character after the end of the current component.
+ int cur_component_end = host.end();
+
+ // Ignore terminal dot, if there is one.
+ if (spec[cur_component_end - 1] == '.') {
+ cur_component_end--;
+ // Nothing else to do if the host is just a dot.
+ if (host.begin == cur_component_end)
+ return HostSafetyStatus::kOk;
+ }
+
+ for (int i = cur_component_end; /* nothing */; i--) {
+ DCHECK_GE(i, host.begin);
+
+ // If `i` is not the first character of the component, continue.
+ if (i != host.begin && spec[i - 1] != '.')
+ continue;
+
+ // Otherwise, i is the index of the the start of a component.
+ components[existing_components] = Component(i, cur_component_end - i);
+ existing_components++;
+
+ // Finished parsing last component.
+ if (i == host.begin)
+ break;
+
+ // If there's anything left to parse after the 2th component, nothing more
+ // to do.
+ if (existing_components == 2)
+ break;
+
+ // The next component ends before the dot at spec[i]. `i` will be
+ // decremented when restarting the loop, so no need to modify it.
+ cur_component_end = i - 1;
+ }
+
+ // If the last value doesn't look like a number, no need to do more work, as
+ // IPv6 and hostnames with non-numeric final components are all considered OK.
+ if (!LooksLikeANumber(spec, components[0]))
+ return HostSafetyStatus::kOk;
+
+ url::RawCanonOutputT<char> ignored_output;
+ CanonHostInfo host_info;
+ CanonicalizeIPAddress(spec, host, &ignored_output, &host_info);
+ // Ignore valid IPv4 addresses, and hostnames considered invalid by the IPv4
+ // and IPv6 parsers. The IPv6 check doesn't provide a whole lot, but does mean
+ // things like "].6" will correctly be considered already invalid, so will
+ // return kOk.
+ if (host_info.family != CanonHostInfo::NEUTRAL)
+ return HostSafetyStatus::kOk;
+
+ if (LooksLikeANumber(spec, components[1]))
+ return HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric;
+
+ return HostSafetyStatus::kTopLevelDomainIsNumeric;
+}
+
} // namespace
void AppendIPv4Address(const unsigned char address[4], CanonOutput* output) {
@@ -650,11 +751,10 @@ bool FindIPv4Components(const char* spec,
return DoFindIPv4Components<char, unsigned char>(spec, host, components);
}
-bool FindIPv4Components(const base::char16* spec,
+bool FindIPv4Components(const char16_t* spec,
const Component& host,
Component components[4]) {
- return DoFindIPv4Components<base::char16, base::char16>(
- spec, host, components);
+ return DoFindIPv4Components<char16_t, char16_t>(spec, host, components);
}
void CanonicalizeIPAddress(const char* spec,
@@ -669,15 +769,15 @@ void CanonicalizeIPAddress(const char* spec,
return;
}
-void CanonicalizeIPAddress(const base::char16* spec,
+void CanonicalizeIPAddress(const char16_t* spec,
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info) {
- if (DoCanonicalizeIPv4Address<base::char16, base::char16>(
- spec, host, output, host_info))
+ if (DoCanonicalizeIPv4Address<char16_t, char16_t>(spec, host, output,
+ host_info))
return;
- if (DoCanonicalizeIPv6Address<base::char16, base::char16>(
- spec, host, output, host_info))
+ if (DoCanonicalizeIPv6Address<char16_t, char16_t>(spec, host, output,
+ host_info))
return;
}
@@ -688,12 +788,12 @@ CanonHostInfo::Family IPv4AddressToNumber(const char* spec,
return DoIPv4AddressToNumber<char>(spec, host, address, num_ipv4_components);
}
-CanonHostInfo::Family IPv4AddressToNumber(const base::char16* spec,
+CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec,
const Component& host,
unsigned char address[4],
int* num_ipv4_components) {
- return DoIPv4AddressToNumber<base::char16>(
- spec, host, address, num_ipv4_components);
+ return DoIPv4AddressToNumber<char16_t>(spec, host, address,
+ num_ipv4_components);
}
bool IPv6AddressToNumber(const char* spec,
@@ -702,10 +802,19 @@ bool IPv6AddressToNumber(const char* spec,
return DoIPv6AddressToNumber<char, unsigned char>(spec, host, address);
}
-bool IPv6AddressToNumber(const base::char16* spec,
+bool IPv6AddressToNumber(const char16_t* spec,
const Component& host,
unsigned char address[16]) {
- return DoIPv6AddressToNumber<base::char16, base::char16>(spec, host, address);
+ return DoIPv6AddressToNumber<char16_t, char16_t>(spec, host, address);
+}
+
+HostSafetyStatus CheckHostnameSafety(const char* spec, const Component& host) {
+ return DoCheckHostnameSafety(spec, host);
+}
+
+HostSafetyStatus CheckHostnameSafety(const char16_t* spec,
+ const Component& host) {
+ return DoCheckHostnameSafety(spec, host);
}
} // namespace url
diff --git a/chromium/url/url_canon_ip.h b/chromium/url/url_canon_ip.h
index d3cad367ff2..32d8f8875d7 100644
--- a/chromium/url/url_canon_ip.h
+++ b/chromium/url/url_canon_ip.h
@@ -5,8 +5,10 @@
#ifndef URL_URL_CANON_IP_H_
#define URL_URL_CANON_IP_H_
+#include <string>
+
#include "base/component_export.h"
-#include "base/strings/string16.h"
+#include "base/strings/string_piece_forward.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
@@ -43,7 +45,7 @@ bool FindIPv4Components(const char* spec,
const Component& host,
Component components[4]);
COMPONENT_EXPORT(URL)
-bool FindIPv4Components(const base::char16* spec,
+bool FindIPv4Components(const char16_t* spec,
const Component& host,
Component components[4]);
@@ -64,7 +66,7 @@ CanonHostInfo::Family IPv4AddressToNumber(const char* spec,
unsigned char address[4],
int* num_ipv4_components);
COMPONENT_EXPORT(URL)
-CanonHostInfo::Family IPv4AddressToNumber(const base::char16* spec,
+CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec,
const Component& host,
unsigned char address[4],
int* num_ipv4_components);
@@ -79,10 +81,52 @@ bool IPv6AddressToNumber(const char* spec,
const Component& host,
unsigned char address[16]);
COMPONENT_EXPORT(URL)
-bool IPv6AddressToNumber(const base::char16* spec,
+bool IPv6AddressToNumber(const char16_t* spec,
const Component& host,
unsigned char address[16]);
+// Temporary enum for collecting histograms at the DNS and URL level about
+// hostname validity, for potentially updating the URL spec.
+//
+// This is used in histograms, so old values should not be reused, and new
+// values should be added at the bottom.
+//
+// TODO(https://crbug.com/1149194): Remove this once the bug is fixed.
+enum class HostSafetyStatus {
+ // Any canonical hostname that doesn't fit into any other class. IPv4
+ // hostnames, hostnames that don't have numeric eTLDs, etc. Hostnames that are
+ // broken are also considered OK.
+ kOk = 0,
+
+ // The top level domain looks numeric. This is basically means it either
+ // parses as a number per the URL spec, or is entirely numeric ("09" doesn't
+ // currently parse as a number, since the leading "0" indicates an octal
+ // value).
+ kTopLevelDomainIsNumeric = 1,
+
+ // Both the top level domain and the next level domain look like a number,
+ // using the above definition. This is the case that is actually concerning -
+ // for these domains, the eTLD+1 is purely numeric, which means putting it as
+ // the hostname of a URL will potentially result in an IPv4 hostname. This is
+ // logically a subset of kTopLevelDomainIsNumeric, but when both apply, this
+ // label will be returned instead.
+ kTwoHighestLevelDomainsAreNumeric = 2,
+
+ kMaxValue = kTwoHighestLevelDomainsAreNumeric,
+};
+
+// Calculates the HostSafetyStatus of a hostname. Hostname should have been
+// canonicalized. This function is only intended to be temporary, to inform
+// decisions around tightening up what the URL parser considers valid hostnames.
+//
+// TODO(https://crbug.com/1149194): Remove this once the bug is fixed.
+COMPONENT_EXPORT(URL)
+HostSafetyStatus CheckHostnameSafety(const char* hostname,
+ const Component& host);
+COMPONENT_EXPORT(URL)
+HostSafetyStatus CheckHostnameSafety(const char16_t* hostname,
+ const Component& host);
+
} // namespace url
#endif // URL_URL_CANON_IP_H_
diff --git a/chromium/url/url_canon_mailtourl.cc b/chromium/url/url_canon_mailtourl.cc
index 8a7ff1ae6b7..f4fe2b4e819 100644
--- a/chromium/url/url_canon_mailtourl.cc
+++ b/chromium/url/url_canon_mailtourl.cc
@@ -90,13 +90,13 @@ bool CanonicalizeMailtoURL(const char* spec,
URLComponentSource<char>(spec), parsed, output, new_parsed);
}
-bool CanonicalizeMailtoURL(const base::char16* spec,
+bool CanonicalizeMailtoURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizeMailtoURL<base::char16, base::char16>(
- URLComponentSource<base::char16>(spec), parsed, output, new_parsed);
+ return DoCanonicalizeMailtoURL<char16_t, char16_t>(
+ URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
}
bool ReplaceMailtoURL(const char* base,
@@ -113,7 +113,7 @@ bool ReplaceMailtoURL(const char* base,
bool ReplaceMailtoURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CanonOutput* output,
Parsed* new_parsed) {
RawCanonOutput<1024> utf8;
diff --git a/chromium/url/url_canon_path.cc b/chromium/url/url_canon_path.cc
index c457ea32e61..d6fb64b5fa7 100644
--- a/chromium/url/url_canon_path.cc
+++ b/chromium/url/url_canon_path.cc
@@ -20,7 +20,8 @@ enum CharacterFlags {
// table below more clear when neither ESCAPE or UNESCAPE is set.
PASS = 0,
- // This character requires special handling in DoPartialPath. Doing this test
+ // This character requires special handling in DoPartialPathInternal. Doing
+ // this test
// first allows us to filter out the common cases of regular characters that
// can be directly copied.
SPECIAL = 1,
@@ -235,10 +236,8 @@ void CheckForNestedEscapes(const CHAR* spec,
}
}
-// Appends the given path to the output. It assumes that if the input path
-// starts with a slash, it should be copied to the output. If no path has
-// already been appended to the output (the case when not resolving
-// relative URLs), the path should begin with a slash.
+// Canonicalizes and appends the given path to the output. It assumes that if
+// the input path starts with a slash, it should be copied to the output.
//
// If there are already path components (this mode is used when appending
// relative paths for resolving), it assumes that the output already has
@@ -248,11 +247,11 @@ void CheckForNestedEscapes(const CHAR* spec,
// We do not collapse multiple slashes in a row to a single slash. It seems
// no web browsers do this, and we don't want incompatibilities, even though
// it would be correct for most systems.
-template<typename CHAR, typename UCHAR>
-bool DoPartialPath(const CHAR* spec,
- const Component& path,
- int path_begin_in_output,
- CanonOutput* output) {
+template <typename CHAR, typename UCHAR>
+bool DoPartialPathInternal(const CHAR* spec,
+ const Component& path,
+ int path_begin_in_output,
+ CanonOutput* output) {
int end = path.end();
// We use this variable to minimize the amount of work done when unescaping --
@@ -279,16 +278,12 @@ bool DoPartialPath(const CHAR* spec,
// Needs special handling of some sort.
int dotlen;
if ((dotlen = IsDot(spec, i, end)) > 0) {
- // See if this dot was preceded by a slash in the output. We
- // assume that when canonicalizing paths, they will always
- // start with a slash and not a dot, so we don't have to
- // bounds check the output.
+ // See if this dot was preceded by a slash in the output.
//
// Note that we check this in the case of dots so we don't have to
// special case slashes. Since slashes are much more common than
// dots, this actually increases performance measurably (though
// slightly).
- DCHECK(output->length() > path_begin_in_output);
if (output->length() > path_begin_in_output &&
output->at(output->length() - 1) == '/') {
// Slash followed by a dot, check to see if this is means relative
@@ -382,6 +377,21 @@ bool DoPartialPath(const CHAR* spec,
return success;
}
+// Perform the same logic as in DoPartialPathInternal(), but updates the
+// publicly exposed CanonOutput structure similar to DoPath(). Returns
+// true if successful.
+template <typename CHAR, typename UCHAR>
+bool DoPartialPath(const CHAR* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path) {
+ out_path->begin = output->length();
+ bool success =
+ DoPartialPathInternal<CHAR, UCHAR>(spec, path, out_path->begin, output);
+ out_path->len = output->length() - out_path->begin;
+ return success;
+}
+
template<typename CHAR, typename UCHAR>
bool DoPath(const CHAR* spec,
const Component& path,
@@ -397,7 +407,8 @@ bool DoPath(const CHAR* spec,
if (!IsURLSlash(spec[path.begin]))
output->push_back('/');
- success = DoPartialPath<CHAR, UCHAR>(spec, path, out_path->begin, output);
+ success =
+ DoPartialPathInternal<CHAR, UCHAR>(spec, path, out_path->begin, output);
} else {
// No input, canonical path is a slash.
output->push_back('/');
@@ -415,28 +426,41 @@ bool CanonicalizePath(const char* spec,
return DoPath<char, unsigned char>(spec, path, output, out_path);
}
-bool CanonicalizePath(const base::char16* spec,
+bool CanonicalizePath(const char16_t* spec,
const Component& path,
CanonOutput* output,
Component* out_path) {
- return DoPath<base::char16, base::char16>(spec, path, output, out_path);
+ return DoPath<char16_t, char16_t>(spec, path, output, out_path);
}
bool CanonicalizePartialPath(const char* spec,
const Component& path,
- int path_begin_in_output,
- CanonOutput* output) {
- return DoPartialPath<char, unsigned char>(spec, path, path_begin_in_output,
- output);
+ CanonOutput* output,
+ Component* out_path) {
+ return DoPartialPath<char, unsigned char>(spec, path, output, out_path);
}
-bool CanonicalizePartialPath(const base::char16* spec,
+bool CanonicalizePartialPath(const char16_t* spec,
const Component& path,
- int path_begin_in_output,
- CanonOutput* output) {
- return DoPartialPath<base::char16, base::char16>(spec, path,
- path_begin_in_output,
- output);
+ CanonOutput* output,
+ Component* out_path) {
+ return DoPartialPath<char16_t, char16_t>(spec, path, output, out_path);
+}
+
+bool CanonicalizePartialPathInternal(const char* spec,
+ const Component& path,
+ int path_begin_in_output,
+ CanonOutput* output) {
+ return DoPartialPathInternal<char, unsigned char>(
+ spec, path, path_begin_in_output, output);
+}
+
+bool CanonicalizePartialPathInternal(const char16_t* spec,
+ const Component& path,
+ int path_begin_in_output,
+ CanonOutput* output) {
+ return DoPartialPathInternal<char16_t, char16_t>(
+ spec, path, path_begin_in_output, output);
}
} // namespace url
diff --git a/chromium/url/url_canon_pathurl.cc b/chromium/url/url_canon_pathurl.cc
index 7f30e608f5c..12c424829ee 100644
--- a/chromium/url/url_canon_pathurl.cc
+++ b/chromium/url/url_canon_pathurl.cc
@@ -88,13 +88,29 @@ bool CanonicalizePathURL(const char* spec,
URLComponentSource<char>(spec), parsed, output, new_parsed);
}
-bool CanonicalizePathURL(const base::char16* spec,
+bool CanonicalizePathURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizePathURL<base::char16, base::char16>(
- URLComponentSource<base::char16>(spec), parsed, output, new_parsed);
+ return DoCanonicalizePathURL<char16_t, char16_t>(
+ URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
+}
+
+void CanonicalizePathURLPath(const char* source,
+ const Component& component,
+ CanonOutput* output,
+ Component* new_component) {
+ DoCanonicalizePathComponent<char, unsigned char>(source, component, '\0',
+ output, new_component);
+}
+
+void CanonicalizePathURLPath(const char16_t* source,
+ const Component& component,
+ CanonOutput* output,
+ Component* new_component) {
+ DoCanonicalizePathComponent<char16_t, char16_t>(source, component, '\0',
+ output, new_component);
}
bool ReplacePathURL(const char* base,
@@ -111,7 +127,7 @@ bool ReplacePathURL(const char* base,
bool ReplacePathURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CanonOutput* output,
Parsed* new_parsed) {
RawCanonOutput<1024> utf8;
diff --git a/chromium/url/url_canon_query.cc b/chromium/url/url_canon_query.cc
index bf59d104ca1..b3a1118cdf2 100644
--- a/chromium/url/url_canon_query.cc
+++ b/chromium/url/url_canon_query.cc
@@ -82,7 +82,7 @@ void RunConverter(const char* spec,
// Runs the converter with the given UTF-16 input. We don't have to do
// anything, but this overridden function allows us to use the same code
// for both UTF-8 and UTF-16 input.
-void RunConverter(const base::char16* spec,
+void RunConverter(const char16_t* spec,
const Component& query,
CharsetConverter* converter,
CanonOutput* output) {
@@ -144,21 +144,20 @@ void CanonicalizeQuery(const char* spec,
output, out_query);
}
-void CanonicalizeQuery(const base::char16* spec,
+void CanonicalizeQuery(const char16_t* spec,
const Component& query,
CharsetConverter* converter,
CanonOutput* output,
Component* out_query) {
- DoCanonicalizeQuery<base::char16, base::char16>(spec, query, converter,
- output, out_query);
+ DoCanonicalizeQuery<char16_t, char16_t>(spec, query, converter, output,
+ out_query);
}
-void ConvertUTF16ToQueryEncoding(const base::char16* input,
+void ConvertUTF16ToQueryEncoding(const char16_t* input,
const Component& query,
CharsetConverter* converter,
CanonOutput* output) {
- DoConvertToQueryEncoding<base::char16, base::char16>(input, query,
- converter, output);
+ DoConvertToQueryEncoding<char16_t, char16_t>(input, query, converter, output);
}
} // namespace url
diff --git a/chromium/url/url_canon_relative.cc b/chromium/url/url_canon_relative.cc
index 9db211a22ef..2552251ad2a 100644
--- a/chromium/url/url_canon_relative.cc
+++ b/chromium/url/url_canon_relative.cc
@@ -358,8 +358,8 @@ bool DoResolveRelativePath(const char* base_url,
int path_begin = output->length();
CopyToLastSlash(base_url, base_path_begin, base_parsed.path.end(),
output);
- success &= CanonicalizePartialPath(relative_url, path, path_begin,
- output);
+ success &= CanonicalizePartialPathInternal(relative_url, path, path_begin,
+ output);
out_parsed->path = MakeRange(path_begin, output->length());
// Copy the rest of the stuff after the path from the relative path.
@@ -581,14 +581,14 @@ bool IsRelativeURL(const char* base,
bool IsRelativeURL(const char* base,
const Parsed& base_parsed,
- const base::char16* fragment,
+ const char16_t* fragment,
int fragment_len,
bool is_base_hierarchical,
bool* is_relative,
Component* relative_component) {
- return DoIsRelativeURL<base::char16>(
- base, base_parsed, fragment, fragment_len, is_base_hierarchical,
- is_relative, relative_component);
+ return DoIsRelativeURL<char16_t>(base, base_parsed, fragment, fragment_len,
+ is_base_hierarchical, is_relative,
+ relative_component);
}
bool ResolveRelativeURL(const char* base_url,
@@ -607,14 +607,14 @@ bool ResolveRelativeURL(const char* base_url,
bool ResolveRelativeURL(const char* base_url,
const Parsed& base_parsed,
bool base_is_file,
- const base::char16* relative_url,
+ const char16_t* relative_url,
const Component& relative_component,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* out_parsed) {
- return DoResolveRelativeURL<base::char16>(
- base_url, base_parsed, base_is_file, relative_url,
- relative_component, query_converter, output, out_parsed);
+ return DoResolveRelativeURL<char16_t>(base_url, base_parsed, base_is_file,
+ relative_url, relative_component,
+ query_converter, output, out_parsed);
}
} // namespace url
diff --git a/chromium/url/url_canon_stdstring.h b/chromium/url/url_canon_stdstring.h
index c9a3feebbce..44edab7f56d 100644
--- a/chromium/url/url_canon_stdstring.h
+++ b/chromium/url/url_canon_stdstring.h
@@ -59,11 +59,11 @@ class COMPONENT_EXPORT(URL) StdStringCanonOutput : public CanonOutput {
// references to std::strings.
// Note: Extra const char* overloads are necessary to break ambiguities that
// would otherwise exist for char literals.
-template <typename STR>
-class StringPieceReplacements : public Replacements<typename STR::value_type> {
+template <typename CharT>
+class StringPieceReplacements : public Replacements<CharT> {
private:
- using CharT = typename STR::value_type;
- using StringPieceT = base::BasicStringPiece<STR>;
+ using StringT = std::basic_string<CharT>;
+ using StringPieceT = base::BasicStringPiece<CharT>;
using ParentT = Replacements<CharT>;
using SetterFun = void (ParentT::*)(const CharT*, const Component&);
@@ -74,35 +74,35 @@ class StringPieceReplacements : public Replacements<typename STR::value_type> {
public:
void SetSchemeStr(const CharT* str) { SetImpl(&ParentT::SetScheme, str); }
void SetSchemeStr(StringPieceT str) { SetImpl(&ParentT::SetScheme, str); }
- void SetSchemeStr(const STR&&) = delete;
+ void SetSchemeStr(const StringT&&) = delete;
void SetUsernameStr(const CharT* str) { SetImpl(&ParentT::SetUsername, str); }
void SetUsernameStr(StringPieceT str) { SetImpl(&ParentT::SetUsername, str); }
- void SetUsernameStr(const STR&&) = delete;
+ void SetUsernameStr(const StringT&&) = delete;
void SetPasswordStr(const CharT* str) { SetImpl(&ParentT::SetPassword, str); }
void SetPasswordStr(StringPieceT str) { SetImpl(&ParentT::SetPassword, str); }
- void SetPasswordStr(const STR&&) = delete;
+ void SetPasswordStr(const StringT&&) = delete;
void SetHostStr(const CharT* str) { SetImpl(&ParentT::SetHost, str); }
void SetHostStr(StringPieceT str) { SetImpl(&ParentT::SetHost, str); }
- void SetHostStr(const STR&&) = delete;
+ void SetHostStr(const StringT&&) = delete;
void SetPortStr(const CharT* str) { SetImpl(&ParentT::SetPort, str); }
void SetPortStr(StringPieceT str) { SetImpl(&ParentT::SetPort, str); }
- void SetPortStr(const STR&&) = delete;
+ void SetPortStr(const StringT&&) = delete;
void SetPathStr(const CharT* str) { SetImpl(&ParentT::SetPath, str); }
void SetPathStr(StringPieceT str) { SetImpl(&ParentT::SetPath, str); }
- void SetPathStr(const STR&&) = delete;
+ void SetPathStr(const StringT&&) = delete;
void SetQueryStr(const CharT* str) { SetImpl(&ParentT::SetQuery, str); }
void SetQueryStr(StringPieceT str) { SetImpl(&ParentT::SetQuery, str); }
- void SetQueryStr(const STR&&) = delete;
+ void SetQueryStr(const StringT&&) = delete;
void SetRefStr(const CharT* str) { SetImpl(&ParentT::SetRef, str); }
void SetRefStr(StringPieceT str) { SetImpl(&ParentT::SetRef, str); }
- void SetRefStr(const STR&&) = delete;
+ void SetRefStr(const StringT&&) = delete;
};
} // namespace url
diff --git a/chromium/url/url_canon_stdurl.cc b/chromium/url/url_canon_stdurl.cc
index 8e59feeea95..c7e745445db 100644
--- a/chromium/url/url_canon_stdurl.cc
+++ b/chromium/url/url_canon_stdurl.cc
@@ -150,16 +150,16 @@ bool CanonicalizeStandardURL(const char* spec,
output, new_parsed);
}
-bool CanonicalizeStandardURL(const base::char16* spec,
+bool CanonicalizeStandardURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizeStandardURL<base::char16, base::char16>(
- URLComponentSource<base::char16>(spec), parsed, scheme_type,
- query_converter, output, new_parsed);
+ return DoCanonicalizeStandardURL<char16_t, char16_t>(
+ URLComponentSource<char16_t>(spec), parsed, scheme_type, query_converter,
+ output, new_parsed);
}
// It might be nice in the future to optimize this so unchanged components don't
@@ -189,7 +189,7 @@ bool ReplaceStandardURL(const char* base,
// regular code path can be used.
bool ReplaceStandardURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
diff --git a/chromium/url/url_canon_unittest.cc b/chromium/url/url_canon_unittest.cc
index 625021950fd..fb27fe7b020 100644
--- a/chromium/url/url_canon_unittest.cc
+++ b/chromium/url/url_canon_unittest.cc
@@ -12,6 +12,7 @@
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
+#include "url/url_canon_ip.h"
#include "url/url_canon_stdstring.h"
#include "url/url_test_utils.h"
@@ -186,7 +187,7 @@ TEST(URLCanonTest, UTF) {
out_str.clear();
StdStringCanonOutput output(&out_str);
- base::string16 input_str(
+ std::u16string input_str(
test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
int input_len = static_cast<int>(input_str.length());
bool success = true;
@@ -205,7 +206,7 @@ TEST(URLCanonTest, UTF) {
// UTF-16 -> UTF-8
std::string input8_str(utf_cases[i].input8);
- base::string16 input16_str(
+ std::u16string input16_str(
test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
EXPECT_EQ(input8_str, base::UTF16ToUTF8(input16_str));
@@ -258,7 +259,7 @@ TEST(URLCanonTest, Scheme) {
out_str.clear();
StdStringCanonOutput output2(&out_str);
- base::string16 wide_input(base::UTF8ToUTF16(scheme_cases[i].input));
+ std::u16string wide_input(base::UTF8ToUTF16(scheme_cases[i].input));
in_comp.len = static_cast<int>(wide_input.length());
success = CanonicalizeScheme(wide_input.c_str(), in_comp, &output2,
&out_comp);
@@ -529,7 +530,7 @@ TEST(URLCanonTest, Host) {
// Wide version.
if (host_cases[i].input16) {
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(host_cases[i].input16));
int host_len = static_cast<int>(input16.length());
Component in_comp(0, host_len);
@@ -580,7 +581,7 @@ TEST(URLCanonTest, Host) {
// Wide version.
if (host_cases[i].input16) {
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(host_cases[i].input16));
int host_len = static_cast<int>(input16.length());
Component in_comp(0, host_len);
@@ -703,7 +704,7 @@ TEST(URLCanonTest, IPv4) {
}
// 16-bit version.
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(cases[i].input16));
component = Component(0, static_cast<int>(input16.length()));
@@ -856,7 +857,7 @@ TEST(URLCanonTest, IPv6) {
}
// 16-bit version.
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(cases[i].input16));
component = Component(0, static_cast<int>(input16.length()));
@@ -988,7 +989,7 @@ TEST(URLCanonTest, UserInfo) {
// Now try the wide version
out_str.clear();
StdStringCanonOutput output2(&out_str);
- base::string16 wide_input(base::UTF8ToUTF16(user_info_cases[i].input));
+ std::u16string wide_input(base::UTF8ToUTF16(user_info_cases[i].input));
success = CanonicalizeUserInfo(wide_input.c_str(),
parsed.username,
wide_input.c_str(),
@@ -1051,7 +1052,7 @@ TEST(URLCanonTest, Port) {
// Now try the wide version
out_str.clear();
StdStringCanonOutput output2(&out_str);
- base::string16 wide_input(base::UTF8ToUTF16(port_cases[i].input));
+ std::u16string wide_input(base::UTF8ToUTF16(port_cases[i].input));
success = CanonicalizePort(wide_input.c_str(),
in_comp,
port_cases[i].default_port,
@@ -1066,105 +1067,117 @@ TEST(URLCanonTest, Port) {
}
}
-TEST(URLCanonTest, Path) {
- DualComponentCase path_cases[] = {
- // ----- path collapsing tests -----
- {"/././foo", L"/././foo", "/foo", Component(0, 4), true},
- {"/./.foo", L"/./.foo", "/.foo", Component(0, 5), true},
- {"/foo/.", L"/foo/.", "/foo/", Component(0, 5), true},
- {"/foo/./", L"/foo/./", "/foo/", Component(0, 5), true},
- // double dots followed by a slash or the end of the string count
- {"/foo/bar/..", L"/foo/bar/..", "/foo/", Component(0, 5), true},
- {"/foo/bar/../", L"/foo/bar/../", "/foo/", Component(0, 5), true},
- // don't count double dots when they aren't followed by a slash
- {"/foo/..bar", L"/foo/..bar", "/foo/..bar", Component(0, 10), true},
- // some in the middle
- {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", Component(0, 8),
- true},
- {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a",
- Component(0, 2), true},
- // we should not be able to go above the root
- {"/foo/../../..", L"/foo/../../..", "/", Component(0, 1), true},
- {"/foo/../../../ton", L"/foo/../../../ton", "/ton", Component(0, 4),
- true},
- // escaped dots should be unescaped and treated the same as dots
- {"/foo/%2e", L"/foo/%2e", "/foo/", Component(0, 5), true},
- {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", Component(0, 8), true},
- {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar",
- "/..bar", Component(0, 6), true},
- // Multiple slashes in a row should be preserved and treated like empty
- // directory names.
- {"////../..", L"////../..", "//", Component(0, 2), true},
-
- // ----- escaping tests -----
- {"/foo", L"/foo", "/foo", Component(0, 4), true},
- // Valid escape sequence
- {"/%20foo", L"/%20foo", "/%20foo", Component(0, 7), true},
- // Invalid escape sequence we should pass through unchanged.
- {"/foo%", L"/foo%", "/foo%", Component(0, 5), true},
- {"/foo%2", L"/foo%2", "/foo%2", Component(0, 6), true},
- // Invalid escape sequence: bad characters should be treated the same as
- // the sourrounding text, not as escaped (in this case, UTF-8).
- {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", Component(0, 10), true},
- {"/foo%2\xc2\xa9zbar", nullptr, "/foo%2%C2%A9zbar", Component(0, 16),
- true},
- {nullptr, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar",
- Component(0, 22), true},
- // Regular characters that are escaped should be unescaped
- {"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true},
- // Funny characters that are unescaped should be escaped
- {"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true},
- {nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true},
- // Invalid characters that are escaped should cause a failure.
- {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false},
- // Some characters should be passed through unchanged regardless of esc.
- {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13),
- true},
- // Characters that are properly escaped should not have the case changed
- // of hex letters.
- {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13),
- true},
- // Funny characters that are unescaped should be escaped
- {"/foo\tbar", L"/foo\tbar", "/foo%09bar", Component(0, 10), true},
- // Backslashes should get converted to forward slashes
- {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", Component(0, 8), true},
- // Hashes found in paths (possibly only when the caller explicitly sets
- // the path on an already-parsed URL) should be escaped.
- {"/foo#bar", L"/foo#bar", "/foo%23bar", Component(0, 10), true},
- // %7f should be allowed and %3D should not be unescaped (these were wrong
- // in a previous version).
- {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd",
- "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true},
- // @ should be passed through unchanged (escaped or unescaped).
- {"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true},
- // Nested escape sequences should result in escaping the leading '%' if
- // unescaping would result in a new escape sequence.
- {"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true},
- {"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true},
- {"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true},
- // Make sure truncated "nested" escapes don't result in reading off the
- // string end.
- {"/%%41", L"/%%41", "/%A", Component(0, 3), true},
- // Don't unescape the leading '%' if unescaping doesn't result in a valid
- // new escape sequence.
- {"/%%470", L"/%%470", "/%G0", Component(0, 4), true},
- {"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true},
- // Don't erroneously downcast a UTF-16 charater in a way that makes it
- // look like part of an escape sequence.
- {nullptr, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true},
-
- // ----- encoding tests -----
- // Basic conversions
- {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd",
- L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD",
- Component(0, 37), true},
- // Invalid unicode characters should fail. We only do validation on
- // UTF-16 input, so this doesn't happen on 8-bit.
- {"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true},
- {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false},
- };
+DualComponentCase kCommonPathCases[] = {
+ // ----- path collapsing tests -----
+ {"/././foo", L"/././foo", "/foo", Component(0, 4), true},
+ {"/./.foo", L"/./.foo", "/.foo", Component(0, 5), true},
+ {"/foo/.", L"/foo/.", "/foo/", Component(0, 5), true},
+ {"/foo/./", L"/foo/./", "/foo/", Component(0, 5), true},
+ // double dots followed by a slash or the end of the string count
+ {"/foo/bar/..", L"/foo/bar/..", "/foo/", Component(0, 5), true},
+ {"/foo/bar/../", L"/foo/bar/../", "/foo/", Component(0, 5), true},
+ // don't count double dots when they aren't followed by a slash
+ {"/foo/..bar", L"/foo/..bar", "/foo/..bar", Component(0, 10), true},
+ // some in the middle
+ {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", Component(0, 8), true},
+ {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a",
+ Component(0, 2), true},
+ // we should not be able to go above the root
+ {"/foo/../../..", L"/foo/../../..", "/", Component(0, 1), true},
+ {"/foo/../../../ton", L"/foo/../../../ton", "/ton", Component(0, 4), true},
+ // escaped dots should be unescaped and treated the same as dots
+ {"/foo/%2e", L"/foo/%2e", "/foo/", Component(0, 5), true},
+ {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", Component(0, 8), true},
+ {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar",
+ "/..bar", Component(0, 6), true},
+ // Multiple slashes in a row should be preserved and treated like empty
+ // directory names.
+ {"////../..", L"////../..", "//", Component(0, 2), true},
+
+ // ----- escaping tests -----
+ {"/foo", L"/foo", "/foo", Component(0, 4), true},
+ // Valid escape sequence
+ {"/%20foo", L"/%20foo", "/%20foo", Component(0, 7), true},
+ // Invalid escape sequence we should pass through unchanged.
+ {"/foo%", L"/foo%", "/foo%", Component(0, 5), true},
+ {"/foo%2", L"/foo%2", "/foo%2", Component(0, 6), true},
+ // Invalid escape sequence: bad characters should be treated the same as
+ // the surrounding text, not as escaped (in this case, UTF-8).
+ {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", Component(0, 10), true},
+ {"/foo%2\xc2\xa9zbar", nullptr, "/foo%2%C2%A9zbar", Component(0, 16), true},
+ {nullptr, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", Component(0, 22),
+ true},
+ // Regular characters that are escaped should be unescaped
+ {"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true},
+ // Funny characters that are unescaped should be escaped
+ {"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true},
+ {nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true},
+ // Invalid characters that are escaped should cause a failure.
+ {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false},
+ // Some characters should be passed through unchanged regardless of esc.
+ {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13),
+ true},
+ // Characters that are properly escaped should not have the case changed
+ // of hex letters.
+ {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13),
+ true},
+ // Funny characters that are unescaped should be escaped
+ {"/foo\tbar", L"/foo\tbar", "/foo%09bar", Component(0, 10), true},
+ // Backslashes should get converted to forward slashes
+ {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", Component(0, 8), true},
+ // Hashes found in paths (possibly only when the caller explicitly sets
+ // the path on an already-parsed URL) should be escaped.
+ {"/foo#bar", L"/foo#bar", "/foo%23bar", Component(0, 10), true},
+ // %7f should be allowed and %3D should not be unescaped (these were wrong
+ // in a previous version).
+ {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd",
+ "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true},
+ // @ should be passed through unchanged (escaped or unescaped).
+ {"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true},
+ // Nested escape sequences should result in escaping the leading '%' if
+ // unescaping would result in a new escape sequence.
+ {"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true},
+ {"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true},
+ {"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true},
+ // Make sure truncated "nested" escapes don't result in reading off the
+ // string end.
+ {"/%%41", L"/%%41", "/%A", Component(0, 3), true},
+ // Don't unescape the leading '%' if unescaping doesn't result in a valid
+ // new escape sequence.
+ {"/%%470", L"/%%470", "/%G0", Component(0, 4), true},
+ {"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true},
+ // Don't erroneously downcast a UTF-16 character in a way that makes it
+ // look like part of an escape sequence.
+ {nullptr, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true},
+
+ // ----- encoding tests -----
+ // Basic conversions
+ {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd",
+ L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD",
+ Component(0, 37), true},
+ // Invalid unicode characters should fail. We only do validation on
+ // UTF-16 input, so this doesn't happen on 8-bit.
+ {"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true},
+ {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false},
+};
- for (size_t i = 0; i < base::size(path_cases); i++) {
+typedef bool (*CanonFunc8Bit)(const char*,
+ const Component&,
+ CanonOutput*,
+ Component*);
+typedef bool (*CanonFunc16Bit)(const char16_t*,
+ const Component&,
+ CanonOutput*,
+ Component*);
+
+void DoPathTest(const DualComponentCase* path_cases,
+ size_t num_cases,
+ CanonFunc8Bit canon_func_8,
+ CanonFunc16Bit canon_func_16) {
+ for (size_t i = 0; i < num_cases; i++) {
+ testing::Message scope_message;
+ scope_message << path_cases[i].input8 << "," << path_cases[i].input16;
+ SCOPED_TRACE(scope_message);
if (path_cases[i].input8) {
int len = static_cast<int>(strlen(path_cases[i].input8));
Component in_comp(0, len);
@@ -1172,7 +1185,7 @@ TEST(URLCanonTest, Path) {
std::string out_str;
StdStringCanonOutput output(&out_str);
bool success =
- CanonicalizePath(path_cases[i].input8, in_comp, &output, &out_comp);
+ canon_func_8(path_cases[i].input8, in_comp, &output, &out_comp);
output.Complete();
EXPECT_EQ(path_cases[i].expected_success, success);
@@ -1182,7 +1195,7 @@ TEST(URLCanonTest, Path) {
}
if (path_cases[i].input16) {
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(path_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
@@ -1191,7 +1204,7 @@ TEST(URLCanonTest, Path) {
StdStringCanonOutput output(&out_str);
bool success =
- CanonicalizePath(input16.c_str(), in_comp, &output, &out_comp);
+ canon_func_16(input16.c_str(), in_comp, &output, &out_comp);
output.Complete();
EXPECT_EQ(path_cases[i].expected_success, success);
@@ -1200,6 +1213,11 @@ TEST(URLCanonTest, Path) {
EXPECT_EQ(path_cases[i].expected, out_str);
}
}
+}
+
+TEST(URLCanonTest, Path) {
+ DoPathTest(kCommonPathCases, base::size(kCommonPathCases), CanonicalizePath,
+ CanonicalizePath);
// Manual test: embedded NULLs should be escaped and the URL should be marked
// as invalid.
@@ -1215,6 +1233,18 @@ TEST(URLCanonTest, Path) {
EXPECT_EQ("/ab%00c", out_str);
}
+TEST(URLCanonTest, PartialPath) {
+ DualComponentCase partial_path_cases[] = {
+ {".html", L".html", ".html", Component(0, 5), true},
+ {"", L"", "", Component(0, 0), true},
+ };
+
+ DoPathTest(kCommonPathCases, base::size(kCommonPathCases),
+ CanonicalizePartialPath, CanonicalizePartialPath);
+ DoPathTest(partial_path_cases, base::size(partial_path_cases),
+ CanonicalizePartialPath, CanonicalizePartialPath);
+}
+
TEST(URLCanonTest, Query) {
struct QueryCase {
const char* input8;
@@ -1258,7 +1288,7 @@ TEST(URLCanonTest, Query) {
}
if (query_cases[i].input16) {
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(query_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
@@ -1332,7 +1362,7 @@ TEST(URLCanonTest, Ref) {
// 16-bit input
if (ref_cases[i].input16) {
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(ref_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
@@ -1952,6 +1982,53 @@ TEST(URLCanonTest, CanonicalizePathURL) {
}
}
+TEST(URLCanonTest, CanonicalizePathURLPath) {
+ struct PathCase {
+ std::string input;
+ std::wstring input16;
+ std::string expected;
+ } path_cases[] = {
+ {"Foo", L"Foo", "Foo"},
+ {"\":This /is interesting;?#", L"\":This /is interesting;?#",
+ "\":This /is interesting;?#"},
+ {"\uFFFF", L"\uFFFF", "%EF%BF%BD"},
+ };
+
+ for (size_t i = 0; i < base::size(path_cases); i++) {
+ // 8-bit string input
+ std::string out_str;
+ StdStringCanonOutput output(&out_str);
+ url::Component out_component;
+ CanonicalizePathURLPath(path_cases[i].input.data(),
+ Component(0, path_cases[i].input.size()), &output,
+ &out_component);
+ output.Complete();
+
+ EXPECT_EQ(path_cases[i].expected, out_str);
+
+ EXPECT_EQ(0, out_component.begin);
+ EXPECT_EQ(path_cases[i].expected.size(),
+ static_cast<size_t>(out_component.len));
+
+ // 16-bit string input
+ std::string out_str16;
+ StdStringCanonOutput output16(&out_str16);
+ url::Component out_component16;
+ std::u16string input16(
+ test_utils::TruncateWStringToUTF16(path_cases[i].input16.data()));
+ CanonicalizePathURLPath(input16.c_str(),
+ Component(0, path_cases[i].input16.size()),
+ &output16, &out_component16);
+ output16.Complete();
+
+ EXPECT_EQ(path_cases[i].expected, out_str16);
+
+ EXPECT_EQ(0, out_component16.begin);
+ EXPECT_EQ(path_cases[i].expected.size(),
+ static_cast<size_t>(out_component16.len));
+ }
+}
+
TEST(URLCanonTest, CanonicalizeMailtoURL) {
struct URLCase {
const char* input;
@@ -2086,17 +2163,17 @@ TEST(URLCanonTest, _itow_s) {
// We fill the buffer with 0xff to ensure that it's getting properly
// null-terminated. We also allocate one byte more than what we tell
// _itoa_s about, and ensure that the extra byte is untouched.
- base::char16 buf[6];
+ char16_t buf[6];
const char fill_mem = 0xff;
- const base::char16 fill_char = 0xffff;
+ const char16_t fill_char = 0xffff;
memset(buf, fill_mem, sizeof(buf));
EXPECT_EQ(0, _itow_s(12, buf, sizeof(buf) / 2 - 1, 10));
- EXPECT_EQ(base::UTF8ToUTF16("12"), base::string16(buf));
+ EXPECT_EQ(u"12", std::u16string(buf));
EXPECT_EQ(fill_char, buf[3]);
// Test the edge cases - exactly the buffer size and one over
EXPECT_EQ(0, _itow_s(1234, buf, sizeof(buf) / 2 - 1, 10));
- EXPECT_EQ(base::UTF8ToUTF16("1234"), base::string16(buf));
+ EXPECT_EQ(u"1234", std::u16string(buf));
EXPECT_EQ(fill_char, buf[5]);
memset(buf, fill_mem, sizeof(buf));
@@ -2106,13 +2183,12 @@ TEST(URLCanonTest, _itow_s) {
// Test the template overload (note that this will see the full buffer)
memset(buf, fill_mem, sizeof(buf));
EXPECT_EQ(0, _itow_s(12, buf, 10));
- EXPECT_EQ(base::UTF8ToUTF16("12"),
- base::string16(buf));
+ EXPECT_EQ(u"12", std::u16string(buf));
EXPECT_EQ(fill_char, buf[3]);
memset(buf, fill_mem, sizeof(buf));
EXPECT_EQ(0, _itow_s(12345, buf, 10));
- EXPECT_EQ(base::UTF8ToUTF16("12345"), base::string16(buf));
+ EXPECT_EQ(u"12345", std::u16string(buf));
EXPECT_EQ(EINVAL, _itow_s(123456, buf, 10));
}
@@ -2343,12 +2419,12 @@ TEST(URLCanonTest, ReplacementOverflow) {
// Override two components, the path with something short, and the query with
// something long enough to trigger the bug.
- Replacements<base::char16> repl;
- base::string16 new_query;
+ Replacements<char16_t> repl;
+ std::u16string new_query;
for (int i = 0; i < 4800; i++)
new_query.push_back('a');
- base::string16 new_path(test_utils::TruncateWStringToUTF16(L"/foo"));
+ std::u16string new_path(test_utils::TruncateWStringToUTF16(L"/foo"));
repl.SetPath(new_path.c_str(), Component(0, 4));
repl.SetQuery(new_query.c_str(),
Component(0, static_cast<int>(new_query.length())));
@@ -2398,41 +2474,41 @@ TEST(URLCanonTest, IDNToASCII) {
RawCanonOutputW<1024> output;
// Basic ASCII test.
- base::string16 str = base::UTF8ToUTF16("hello");
+ std::u16string str = u"hello";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(base::UTF8ToUTF16("hello"), base::string16(output.data()));
+ EXPECT_EQ(u"hello", std::u16string(output.data()));
output.set_length(0);
// Mixed ASCII/non-ASCII.
- str = base::UTF8ToUTF16("hellö");
+ str = u"hellö";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(base::UTF8ToUTF16("xn--hell-8qa"), base::string16(output.data()));
+ EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data()));
output.set_length(0);
// All non-ASCII.
- str = base::UTF8ToUTF16("你好");
+ str = u"你好";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(base::UTF8ToUTF16("xn--6qq79v"), base::string16(output.data()));
+ EXPECT_EQ(u"xn--6qq79v", std::u16string(output.data()));
output.set_length(0);
// Characters that need mapping (the resulting Punycode is the encoding for
// "1⁄4").
- str = base::UTF8ToUTF16("¼");
+ str = u"¼";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(base::UTF8ToUTF16("xn--14-c6t"), base::string16(output.data()));
+ EXPECT_EQ(u"xn--14-c6t", std::u16string(output.data()));
output.set_length(0);
// String to encode already starts with "xn--", and all ASCII. Should not
// modify the string.
- str = base::UTF8ToUTF16("xn--hell-8qa");
+ str = u"xn--hell-8qa";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(base::UTF8ToUTF16("xn--hell-8qa"), base::string16(output.data()));
+ EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data()));
output.set_length(0);
// String to encode already starts with "xn--", and mixed ASCII/non-ASCII.
// Should fail, due to a special case: if the label starts with "xn--", it
// should be parsed as Punycode, which must be all ASCII.
- str = base::UTF8ToUTF16("xn--hellö");
+ str = u"xn--hellö";
EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output));
output.set_length(0);
@@ -2440,9 +2516,120 @@ TEST(URLCanonTest, IDNToASCII) {
// This tests that there is still an error for the character '⁄' (U+2044),
// which would be a valid ASCII character, U+0044, if the high byte were
// ignored.
- str = base::UTF8ToUTF16("xn--1⁄4");
+ str = u"xn--1⁄4";
EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output));
output.set_length(0);
}
+TEST(URLCanonTest, URLSafetyStatus) {
+ const struct {
+ const char* host;
+ HostSafetyStatus expected_safety_status;
+ } kTestCases[] = {
+ // Empty components are ok.
+ {"", HostSafetyStatus::kOk},
+ {".", HostSafetyStatus::kOk},
+ {"..", HostSafetyStatus::kOk},
+
+ // Hostnames with purely non-numeric components are ok.
+ {"com", HostSafetyStatus::kOk},
+ {"a.com", HostSafetyStatus::kOk},
+ {"a.b.com", HostSafetyStatus::kOk},
+
+ // Hostnames with components with letters and numbers are ok.
+ {"1com", HostSafetyStatus::kOk},
+ {"0a.0com", HostSafetyStatus::kOk},
+ {"0xa.0xb.0xcom", HostSafetyStatus::kOk},
+ {"com1", HostSafetyStatus::kOk},
+ {"a1.com1", HostSafetyStatus::kOk},
+ {"a1.b1.com1", HostSafetyStatus::kOk},
+
+ // Hostnames components that are numbers that are before a final
+ // non-numeric component are ok.
+ {"1.com", HostSafetyStatus::kOk},
+ {"0.1.2com", HostSafetyStatus::kOk},
+
+ // Invalid hostnames are ok.
+ {"[", HostSafetyStatus::kOk},
+
+ // IPv6 hostnames are ok.
+ {"[::]", HostSafetyStatus::kOk},
+ {"[2001:db8::1]", HostSafetyStatus::kOk},
+
+ // IPv4 hostnames are ok.
+ {"1.2.3.4", HostSafetyStatus::kOk},
+ // IPv4 hostnames with creative representations are ok.
+ {"01.02.03.04", HostSafetyStatus::kOk},
+ {"0x1.0x2.0x3.0x4", HostSafetyStatus::kOk},
+ {"1.2", HostSafetyStatus::kOk},
+ {"1.2.3", HostSafetyStatus::kOk},
+ {"0", HostSafetyStatus::kOk},
+ {"0x0", HostSafetyStatus::kOk},
+ {"07", HostSafetyStatus::kOk},
+
+ // Hostnames with a final problematic top level domain.
+ {"a.0", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.123", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.123456", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.999999999999999999", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.0x1", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.0xabcdef", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.0XABCDEF", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.07", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.09", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {".0", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"foo.bar.0", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"1.bar.0", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a..0", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"1..0", HostSafetyStatus::kTopLevelDomainIsNumeric},
+
+ // Hostnames with problematic two highest level domains.
+ {"a.1.2", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric},
+ {"a.0x1.0x2f", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric},
+ {"a.06.09", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric},
+ };
+
+ for (const auto& test_case : kTestCases) {
+ // Test with ASCII.
+ SCOPED_TRACE(test_case.host);
+ EXPECT_EQ(test_case.expected_safety_status,
+ CheckHostnameSafety(test_case.host,
+ Component(0, strlen(test_case.host))));
+
+ // Test with ASCII and terminal dot, which shouldn't affect results for
+ // anything that doesn't already end in a dot (or anything that only has
+ // dots).
+ std::string host_with_dot = test_case.host;
+ host_with_dot += ".";
+ EXPECT_EQ(test_case.expected_safety_status,
+ CheckHostnameSafety(host_with_dot.c_str(),
+ Component(0, host_with_dot.size())));
+
+ // Test with ASCII and characters that are not part of the component.
+ std::string host_with_bonus_characters = test_case.host;
+ host_with_bonus_characters = "00" + host_with_bonus_characters + "00";
+ EXPECT_EQ(test_case.expected_safety_status,
+ CheckHostnameSafety(host_with_bonus_characters.c_str(),
+ Component(2, strlen(test_case.host))));
+
+ // Test with UTF-16.
+ std::u16string utf16 = base::UTF8ToUTF16(test_case.host);
+ EXPECT_EQ(test_case.expected_safety_status,
+ CheckHostnameSafety(utf16.c_str(), Component(0, utf16.size())));
+
+ // Test with UTF-16 and terminal dot.
+ std::u16string utf16_with_dot = base::UTF8ToUTF16(host_with_dot);
+ EXPECT_EQ(test_case.expected_safety_status,
+ CheckHostnameSafety(utf16_with_dot.c_str(),
+ Component(0, utf16_with_dot.size())));
+
+ // Test with UTF-16 and characters that are not part of the component.
+ std::u16string utf16_with_bonus_characters =
+ base::UTF8ToUTF16(host_with_bonus_characters);
+ EXPECT_EQ(test_case.expected_safety_status,
+ CheckHostnameSafety(utf16_with_bonus_characters.c_str(),
+ Component(2, utf16.size())));
+ }
+}
+
} // namespace url
diff --git a/chromium/url/url_file.h b/chromium/url/url_file.h
index 796d12c1880..45b8d9a5bee 100644
--- a/chromium/url/url_file.h
+++ b/chromium/url/url_file.h
@@ -16,7 +16,7 @@ namespace url {
#ifdef WIN32
// We allow both "c:" and "c|" as drive identifiers.
-inline bool IsWindowsDriveSeparator(base::char16 ch) {
+inline bool IsWindowsDriveSeparator(char16_t ch) {
return ch == ':' || ch == '|';
}
diff --git a/chromium/url/url_idna_icu.cc b/chromium/url/url_idna_icu.cc
index c92029c8e6d..4bd6a885dda 100644
--- a/chromium/url/url_idna_icu.cc
+++ b/chromium/url/url_idna_icu.cc
@@ -11,7 +11,6 @@
#include <ostream>
#include "base/check_op.h"
-#include "base/i18n/uchar.h"
#include "base/no_destructor.h"
#include "third_party/icu/source/common/unicode/uidna.h"
#include "third_party/icu/source/common/unicode/utypes.h"
@@ -83,7 +82,7 @@ UIDNA* GetUIDNA() {
// conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII
// version with StringByteSink. That way, we can avoid C wrappers and additional
// string conversion.
-bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) {
+bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
DCHECK(output->length() == 0); // Output buffer is assumed empty.
UIDNA* uidna = GetUIDNA();
@@ -91,10 +90,8 @@ bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) {
while (true) {
UErrorCode err = U_ZERO_ERROR;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
- int output_length =
- uidna_nameToASCII(uidna, base::i18n::ToUCharPtr(src), src_len,
- base::i18n::ToUCharPtr(output->data()),
- output->capacity(), &info, &err);
+ int output_length = uidna_nameToASCII(uidna, src, src_len, output->data(),
+ output->capacity(), &info, &err);
if (U_SUCCESS(err) && info.errors == 0) {
output->set_length(output_length);
return true;
diff --git a/chromium/url/url_idna_icu_alternatives_android.cc b/chromium/url/url_idna_icu_alternatives_android.cc
index 6f31c81d16d..19df70f9987 100644
--- a/chromium/url/url_idna_icu_alternatives_android.cc
+++ b/chromium/url/url_idna_icu_alternatives_android.cc
@@ -4,9 +4,10 @@
#include <string.h>
+#include <string>
+
#include "base/android/jni_android.h"
#include "base/android/jni_string.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "url/url_canon_internal.h"
#include "url/url_jni_headers/IDNStringUtil_jni.h"
@@ -17,7 +18,7 @@ namespace url {
// This uses the JDK's conversion function, which uses IDNA 2003, unlike the
// ICU implementation.
-bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) {
+bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
DCHECK_EQ(0, output->length()); // Output buffer is assumed empty.
JNIEnv* env = base::android::AttachCurrentThread();
@@ -30,7 +31,7 @@ bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) {
if (java_result.is_null())
return false;
- base::string16 utf16_result =
+ std::u16string utf16_result =
base::android::ConvertJavaStringToUTF16(java_result);
output->Append(utf16_result.data(), static_cast<int>(utf16_result.size()));
return true;
diff --git a/chromium/url/url_idna_icu_alternatives_ios.mm b/chromium/url/url_idna_icu_alternatives_ios.mm
index 66b844e8a81..c5da3593269 100644
--- a/chromium/url/url_idna_icu_alternatives_ios.mm
+++ b/chromium/url/url_idna_icu_alternatives_ios.mm
@@ -4,7 +4,8 @@
#include <string.h>
-#include "base/strings/string16.h"
+#include <string>
+
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
@@ -14,7 +15,7 @@ namespace url {
// Only allow ASCII to avoid ICU dependency. Use NSString+IDN
// to convert non-ASCII URL prior to passing to API.
-bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) {
+bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
if (base::IsStringASCII(base::StringPiece16(src, src_len))) {
output->Append(src, src_len);
return true;
diff --git a/chromium/url/url_parse_file.cc b/chromium/url/url_parse_file.cc
index dac995941be..7d86c7b8f2d 100644
--- a/chromium/url/url_parse_file.cc
+++ b/chromium/url/url_parse_file.cc
@@ -215,7 +215,7 @@ void ParseFileURL(const char* url, int url_len, Parsed* parsed) {
DoParseFileURL(url, url_len, parsed);
}
-void ParseFileURL(const base::char16* url, int url_len, Parsed* parsed) {
+void ParseFileURL(const char16_t* url, int url_len, Parsed* parsed) {
DoParseFileURL(url, url_len, parsed);
}
diff --git a/chromium/url/url_parse_internal.h b/chromium/url/url_parse_internal.h
index 76308780c46..4e2527a3642 100644
--- a/chromium/url/url_parse_internal.h
+++ b/chromium/url/url_parse_internal.h
@@ -12,13 +12,13 @@
namespace url {
// We treat slashes and backslashes the same for IE compatibility.
-inline bool IsURLSlash(base::char16 ch) {
+inline bool IsURLSlash(char16_t ch) {
return ch == '/' || ch == '\\';
}
// Returns true if we should trim this character from the URL because it is a
// space or a control character.
-inline bool ShouldTrimFromURL(base::char16 ch) {
+inline bool ShouldTrimFromURL(char16_t ch) {
return ch <= ' ';
}
@@ -67,13 +67,12 @@ void ParsePathInternal(const char* spec,
Component* filepath,
Component* query,
Component* ref);
-void ParsePathInternal(const base::char16* spec,
+void ParsePathInternal(const char16_t* spec,
const Component& path,
Component* filepath,
Component* query,
Component* ref);
-
// Given a spec and a pointer to the character after the colon following the
// scheme, this parses it and fills in the structure, Every item in the parsed
// structure is filled EXCEPT for the scheme, which is untouched.
@@ -81,7 +80,7 @@ void ParseAfterScheme(const char* spec,
int spec_len,
int after_scheme,
Parsed* parsed);
-void ParseAfterScheme(const base::char16* spec,
+void ParseAfterScheme(const char16_t* spec,
int spec_len,
int after_scheme,
Parsed* parsed);
diff --git a/chromium/url/url_test_utils.h b/chromium/url/url_test_utils.h
index f4f51da4eb4..bb75c747e39 100644
--- a/chromium/url/url_test_utils.h
+++ b/chromium/url/url_test_utils.h
@@ -10,7 +10,6 @@
#include <string>
-#include "base/strings/string16.h"
#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/url_canon_internal.h"
@@ -24,11 +23,11 @@ namespace test_utils {
// in base bacause it passes invalid UTF-16 characters which is important for
// test purposes. As a result, this is not meant to handle true UTF-32 encoded
// strings.
-inline base::string16 TruncateWStringToUTF16(const wchar_t* src) {
- base::string16 str;
+inline std::u16string TruncateWStringToUTF16(const wchar_t* src) {
+ std::u16string str;
int length = static_cast<int>(wcslen(src));
for (int i = 0; i < length; ++i) {
- str.push_back(static_cast<base::char16>(src[i]));
+ str.push_back(static_cast<char16_t>(src[i]));
}
return str;
}
diff --git a/chromium/url/url_util.cc b/chromium/url/url_util.cc
index 49cc6e689f3..c2456e20df8 100644
--- a/chromium/url/url_util.cc
+++ b/chromium/url/url_util.cc
@@ -137,7 +137,8 @@ template<typename CHAR> struct CharToStringPiece {
template<> struct CharToStringPiece<char> {
typedef base::StringPiece Piece;
};
-template<> struct CharToStringPiece<base::char16> {
+template <>
+struct CharToStringPiece<char16_t> {
typedef base::StringPiece16 Piece;
};
@@ -468,10 +469,10 @@ void DoSchemeModificationPreamble() {
// the SchemeRegistry has been used.
//
// This normally means you're trying to set up a new scheme too late or using
- // the SchemeRegistry too early in your application's init process. Make sure
- // that you haven't added any static GURL initializers in tests.
+ // the SchemeRegistry too early in your application's init process.
DCHECK(!g_scheme_registries_used.load())
- << "Trying to add a scheme after the lists have been used.";
+ << "Trying to add a scheme after the lists have been used. "
+ "Make sure that you haven't added any static GURL initializers in tests.";
// If this assert triggers, it means you've called Add*Scheme after
// LockSchemeRegistries has been called (see the header file for
@@ -557,6 +558,15 @@ void AddStandardScheme(const char* new_scheme, SchemeType type) {
&GetSchemeRegistryWithoutLocking()->standard_schemes);
}
+std::vector<std::string> GetStandardSchemes() {
+ std::vector<std::string> result;
+ result.reserve(GetSchemeRegistry().standard_schemes.size());
+ for (const auto& entry : GetSchemeRegistry().standard_schemes) {
+ result.push_back(entry.scheme);
+ }
+ return result;
+}
+
void AddReferrerScheme(const char* new_scheme, SchemeType type) {
DoAddSchemeWithType(new_scheme, type,
&GetSchemeRegistryWithoutLocking()->referrer_schemes);
@@ -638,13 +648,13 @@ bool GetStandardSchemeType(const char* spec,
return DoIsStandard(spec, scheme, type);
}
-bool GetStandardSchemeType(const base::char16* spec,
+bool GetStandardSchemeType(const char16_t* spec,
const Component& scheme,
SchemeType* type) {
return DoIsStandard(spec, scheme, type);
}
-bool IsStandard(const base::char16* spec, const Component& scheme) {
+bool IsStandard(const char16_t* spec, const Component& scheme) {
SchemeType unused_scheme_type;
return DoIsStandard(spec, scheme, &unused_scheme_type);
}
@@ -662,7 +672,7 @@ bool FindAndCompareScheme(const char* str,
return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
}
-bool FindAndCompareScheme(const base::char16* str,
+bool FindAndCompareScheme(const char16_t* str,
int str_len,
const char* compare,
Component* found_scheme) {
@@ -723,7 +733,7 @@ bool Canonicalize(const char* spec,
charset_converter, output, output_parsed);
}
-bool Canonicalize(const base::char16* spec,
+bool Canonicalize(const char16_t* spec,
int spec_len,
bool trim_path_end,
CharsetConverter* charset_converter,
@@ -749,7 +759,7 @@ bool ResolveRelative(const char* base_spec,
bool ResolveRelative(const char* base_spec,
int base_spec_len,
const Parsed& base_parsed,
- const base::char16* relative,
+ const char16_t* relative,
int relative_length,
CharsetConverter* charset_converter,
CanonOutput* output,
@@ -773,7 +783,7 @@ bool ReplaceComponents(const char* spec,
bool ReplaceComponents(const char* spec,
int spec_len,
const Parsed& parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* out_parsed) {
@@ -853,7 +863,7 @@ bool CompareSchemeComponent(const char* spec,
return DoCompareSchemeComponent(spec, component, compare_to);
}
-bool CompareSchemeComponent(const base::char16* spec,
+bool CompareSchemeComponent(const char16_t* spec,
const Component& component,
const char* compare_to) {
return DoCompareSchemeComponent(spec, component, compare_to);
diff --git a/chromium/url/url_util.h b/chromium/url/url_util.h
index d4f5e1798dd..db7e08cc29c 100644
--- a/chromium/url/url_util.h
+++ b/chromium/url/url_util.h
@@ -10,7 +10,6 @@
#include <vector>
#include "base/component_export.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
@@ -63,6 +62,14 @@ COMPONENT_EXPORT(URL) bool AllowNonStandardSchemesForAndroidWebView();
COMPONENT_EXPORT(URL)
void AddStandardScheme(const char* new_scheme, SchemeType scheme_type);
+// Returns the list of schemes registered for "standard" URLs. Note, this
+// should not be used if you just need to check if your protocol is standard
+// or not. Instead use the IsStandard() function above as its much more
+// efficient. This function should only be used where you need to perform
+// other operations against the standard scheme list.
+COMPONENT_EXPORT(URL)
+std::vector<std::string> GetStandardSchemes();
+
// Adds an application-defined scheme to the internal list of schemes allowed
// for referrers.
COMPONENT_EXPORT(URL)
@@ -134,7 +141,7 @@ bool FindAndCompareScheme(const char* str,
const char* compare,
Component* found_scheme);
COMPONENT_EXPORT(URL)
-bool FindAndCompareScheme(const base::char16* str,
+bool FindAndCompareScheme(const char16_t* str,
int str_len,
const char* compare,
Component* found_scheme);
@@ -144,7 +151,7 @@ inline bool FindAndCompareScheme(const std::string& str,
return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
compare, found_scheme);
}
-inline bool FindAndCompareScheme(const base::string16& str,
+inline bool FindAndCompareScheme(const std::u16string& str,
const char* compare,
Component* found_scheme) {
return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
@@ -156,7 +163,7 @@ inline bool FindAndCompareScheme(const base::string16& str,
COMPONENT_EXPORT(URL)
bool IsStandard(const char* spec, const Component& scheme);
COMPONENT_EXPORT(URL)
-bool IsStandard(const base::char16* spec, const Component& scheme);
+bool IsStandard(const char16_t* spec, const Component& scheme);
// Returns true if the given scheme identified by |scheme| within |spec| is in
// the list of allowed schemes for referrers (see AddReferrerScheme).
@@ -171,7 +178,7 @@ bool GetStandardSchemeType(const char* spec,
const Component& scheme,
SchemeType* type);
COMPONENT_EXPORT(URL)
-bool GetStandardSchemeType(const base::char16* spec,
+bool GetStandardSchemeType(const char16_t* spec,
const Component& scheme,
SchemeType* type);
@@ -213,7 +220,7 @@ bool Canonicalize(const char* spec,
CanonOutput* output,
Parsed* output_parsed);
COMPONENT_EXPORT(URL)
-bool Canonicalize(const base::char16* spec,
+bool Canonicalize(const char16_t* spec,
int spec_len,
bool trim_path_end,
CharsetConverter* charset_converter,
@@ -243,7 +250,7 @@ COMPONENT_EXPORT(URL)
bool ResolveRelative(const char* base_spec,
int base_spec_len,
const Parsed& base_parsed,
- const base::char16* relative,
+ const char16_t* relative,
int relative_length,
CharsetConverter* charset_converter,
CanonOutput* output,
@@ -265,7 +272,7 @@ COMPONENT_EXPORT(URL)
bool ReplaceComponents(const char* spec,
int spec_len,
const Parsed& parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* out_parsed);
diff --git a/chromium/url/url_util_internal.h b/chromium/url/url_util_internal.h
index 756c736d369..91466e1d5d6 100644
--- a/chromium/url/url_util_internal.h
+++ b/chromium/url/url_util_internal.h
@@ -7,7 +7,6 @@
#include <string>
-#include "base/strings/string16.h"
#include "url/third_party/mozilla/url_parse.h"
namespace url {
@@ -17,7 +16,7 @@ namespace url {
bool CompareSchemeComponent(const char* spec,
const Component& component,
const char* compare_to);
-bool CompareSchemeComponent(const base::char16* spec,
+bool CompareSchemeComponent(const char16_t* spec,
const Component& component,
const char* compare_to);
diff --git a/chromium/url/url_util_unittest.cc b/chromium/url/url_util_unittest.cc
index ea4cd82aa7a..d41da4f4d14 100644
--- a/chromium/url/url_util_unittest.cc
+++ b/chromium/url/url_util_unittest.cc
@@ -136,6 +136,16 @@ TEST_F(URLUtilTest, GetStandardSchemeType) {
&scheme_type));
}
+TEST_F(URLUtilTest, GetStandardSchemes) {
+ std::vector<std::string> expected = {
+ kHttpsScheme, kHttpScheme, kFileScheme,
+ kFtpScheme, kWssScheme, kWsScheme,
+ kFileSystemScheme, kQuicTransportScheme, "foo",
+ };
+ AddStandardScheme("foo", url::SCHEME_WITHOUT_AUTHORITY);
+ EXPECT_EQ(expected, GetStandardSchemes());
+}
+
TEST_F(URLUtilTest, ReplaceComponents) {
Parsed parsed;
RawCanonOutputT<char> output;
@@ -236,34 +246,33 @@ TEST_F(URLUtilTest, DecodeURLEscapeSequences) {
for (size_t i = 0; i < base::size(decode_cases); i++) {
const char* input = decode_cases[i].input;
- RawCanonOutputT<base::char16> output;
+ RawCanonOutputT<char16_t> output;
DecodeURLEscapeSequences(input, strlen(input),
DecodeURLMode::kUTF8OrIsomorphic, &output);
- EXPECT_EQ(decode_cases[i].output,
- base::UTF16ToUTF8(base::string16(output.data(),
- output.length())));
+ EXPECT_EQ(decode_cases[i].output, base::UTF16ToUTF8(std::u16string(
+ output.data(), output.length())));
- RawCanonOutputT<base::char16> output_utf8;
+ RawCanonOutputT<char16_t> output_utf8;
DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8,
&output_utf8);
EXPECT_EQ(decode_cases[i].output,
base::UTF16ToUTF8(
- base::string16(output_utf8.data(), output_utf8.length())));
+ std::u16string(output_utf8.data(), output_utf8.length())));
}
// Our decode should decode %00
const char zero_input[] = "%00";
- RawCanonOutputT<base::char16> zero_output;
+ RawCanonOutputT<char16_t> zero_output;
DecodeURLEscapeSequences(zero_input, strlen(zero_input), DecodeURLMode::kUTF8,
&zero_output);
- EXPECT_NE("%00", base::UTF16ToUTF8(
- base::string16(zero_output.data(), zero_output.length())));
+ EXPECT_NE("%00", base::UTF16ToUTF8(std::u16string(zero_output.data(),
+ zero_output.length())));
// Test the error behavior for invalid UTF-8.
struct Utf8DecodeCase {
const char* input;
- std::vector<base::char16> expected_iso;
- std::vector<base::char16> expected_utf8;
+ std::vector<char16_t> expected_iso;
+ std::vector<char16_t> expected_utf8;
} utf8_decode_cases[] = {
// %e5%a5%bd is a valid UTF-8 sequence. U+597D
{"%e4%a0%e5%a5%bd",
@@ -279,17 +288,17 @@ TEST_F(URLUtilTest, DecodeURLEscapeSequences) {
for (const auto& test : utf8_decode_cases) {
const char* input = test.input;
- RawCanonOutputT<base::char16> output_iso;
+ RawCanonOutputT<char16_t> output_iso;
DecodeURLEscapeSequences(input, strlen(input),
DecodeURLMode::kUTF8OrIsomorphic, &output_iso);
- EXPECT_EQ(base::string16(test.expected_iso.data()),
- base::string16(output_iso.data(), output_iso.length()));
+ EXPECT_EQ(std::u16string(test.expected_iso.data()),
+ std::u16string(output_iso.data(), output_iso.length()));
- RawCanonOutputT<base::char16> output_utf8;
+ RawCanonOutputT<char16_t> output_utf8;
DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8,
&output_utf8);
- EXPECT_EQ(base::string16(test.expected_utf8.data()),
- base::string16(output_utf8.data(), output_utf8.length()));
+ EXPECT_EQ(std::u16string(test.expected_utf8.data()),
+ std::u16string(output_utf8.data(), output_utf8.length()));
}
}