summaryrefslogtreecommitdiff
path: root/chromium/url
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2017-04-05 14:08:31 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2017-04-11 07:46:53 +0000
commit6a4cabb866f66d4128a97cdc6d9d08ce074f1247 (patch)
treeab00f70a5e89278d6a0d16ff0c42578dc4d84a2d /chromium/url
parente733310db58160074f574c429d48f8308c0afe17 (diff)
downloadqtwebengine-chromium-6a4cabb866f66d4128a97cdc6d9d08ce074f1247.tar.gz
BASELINE: Update Chromium to 57.0.2987.144
Change-Id: I29db402ff696c71a04c4dbaec822c2e53efe0267 Reviewed-by: Peter Varga <pvarga@inf.u-szeged.hu>
Diffstat (limited to 'chromium/url')
-rw-r--r--chromium/url/DEPS3
-rw-r--r--chromium/url/gurl.cc33
-rw-r--r--chromium/url/gurl_unittest.cc1
-rw-r--r--chromium/url/mojo/url_gurl_struct_traits_unittest.cc2
-rw-r--r--chromium/url/origin.cc26
-rw-r--r--chromium/url/origin.h20
-rw-r--r--chromium/url/origin_unittest.cc9
-rw-r--r--chromium/url/run_all_unittests.cc5
-rw-r--r--chromium/url/scheme_host_port.cc21
-rw-r--r--chromium/url/scheme_host_port.h4
-rw-r--r--chromium/url/scheme_host_port_unittest.cc18
-rw-r--r--chromium/url/third_party/mozilla/url_parse.cc47
-rw-r--r--chromium/url/third_party/mozilla/url_parse.h3
-rw-r--r--chromium/url/url_canon.h5
-rw-r--r--chromium/url/url_canon_relative.cc17
-rw-r--r--chromium/url/url_util.cc258
-rw-r--r--chromium/url/url_util.h42
17 files changed, 338 insertions, 176 deletions
diff --git a/chromium/url/DEPS b/chromium/url/DEPS
index c89ac323c59..946d75fcd66 100644
--- a/chromium/url/DEPS
+++ b/chromium/url/DEPS
@@ -11,7 +11,6 @@ specific_include_rules = {
"+third_party/icu",
],
"run_all_unittests\.cc": [
- "+mojo/edk/embedder/embedder.h",
- "+mojo/edk/test/scoped_ipc_support.h",
+ "+mojo/edk/embedder",
],
}
diff --git a/chromium/url/gurl.cc b/chromium/url/gurl.cc
index bdd35227b0d..43f30528593 100644
--- a/chromium/url/gurl.cc
+++ b/chromium/url/gurl.cc
@@ -108,9 +108,6 @@ GURL::GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid)
template<typename STR>
void GURL::InitCanonical(base::BasicStringPiece<STR> input_spec,
bool trim_path_end) {
- // Reserve enough room in the output for the input, plus some extra so that
- // we have room if we have to escape a few things without reallocating.
- spec_.reserve(input_spec.size() + 32);
url::StdStringCanonOutput output(&spec_);
is_valid_ = url::Canonicalize(
input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end,
@@ -121,6 +118,8 @@ void GURL::InitCanonical(base::BasicStringPiece<STR> input_spec,
inner_url_.reset(new GURL(spec_.data(), parsed_.Length(),
*parsed_.inner_parsed(), true));
}
+ // Valid URLs always have non-empty specs.
+ DCHECK(!is_valid_ || !spec_.empty());
}
void GURL::InitializeFromCanonicalSpec() {
@@ -135,6 +134,7 @@ void GURL::InitializeFromCanonicalSpec() {
// what we would have produced. Skip checking for invalid URLs have no meaning
// and we can't always canonicalize then reproducibly.
if (is_valid_) {
+ DCHECK(!spec_.empty());
url::Component scheme;
// We can't do this check on the inner_url of a filesystem URL, as
// canonical_spec actually points to the start of the outer URL, so we'd
@@ -195,12 +195,7 @@ GURL GURL::Resolve(const std::string& relative) const {
return GURL();
GURL result;
-
- // Reserve enough room in the output for the input, plus some extra so that
- // we have room if we have to escape a few things without reallocating.
- result.spec_.reserve(spec_.size() + 32);
url::StdStringCanonOutput output(&result.spec_);
-
if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
parsed_, relative.data(),
static_cast<int>(relative.length()),
@@ -226,12 +221,7 @@ GURL GURL::Resolve(const base::string16& relative) const {
return GURL();
GURL result;
-
- // Reserve enough room in the output for the input, plus some extra so that
- // we have room if we have to escape a few things without reallocating.
- result.spec_.reserve(spec_.size() + 32);
url::StdStringCanonOutput output(&result.spec_);
-
if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
parsed_, relative.data(),
static_cast<int>(relative.length()),
@@ -259,11 +249,7 @@ GURL GURL::ReplaceComponents(
if (!is_valid_)
return GURL();
- // Reserve enough room in the output for the input, plus some extra so that
- // we have room if we have to escape a few things without reallocating.
- result.spec_.reserve(spec_.size() + 32);
url::StdStringCanonOutput output(&result.spec_);
-
result.is_valid_ = url::ReplaceComponents(
spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
NULL, &output, &result.parsed_);
@@ -286,11 +272,7 @@ GURL GURL::ReplaceComponents(
if (!is_valid_)
return GURL();
- // Reserve enough room in the output for the input, plus some extra so that
- // we have room if we have to escape a few things without reallocating.
- result.spec_.reserve(spec_.size() + 32);
url::StdStringCanonOutput output(&result.spec_);
-
result.is_valid_ = url::ReplaceComponents(
spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
NULL, &output, &result.parsed_);
@@ -440,14 +422,7 @@ std::string GURL::GetContent() const {
}
bool GURL::HostIsIPAddress() const {
- if (!is_valid_ || spec_.empty())
- return false;
-
- url::RawCanonOutputT<char, 128> ignored_output;
- url::CanonHostInfo host_info;
- url::CanonicalizeIPAddress(spec_.c_str(), parsed_.host, &ignored_output,
- &host_info);
- return host_info.IsIPAddress();
+ return is_valid_ && url::HostIsIPAddress(host_piece());
}
#ifdef WIN32
diff --git a/chromium/url/gurl_unittest.cc b/chromium/url/gurl_unittest.cc
index f8d4c05288a..24dee6c2a65 100644
--- a/chromium/url/gurl_unittest.cc
+++ b/chromium/url/gurl_unittest.cc
@@ -294,6 +294,7 @@ TEST(GURLTest, Resolve) {
{"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"},
{"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"},
{"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"},
+ {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html", true, "http://images.google.com/foo.html"},
{"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"},
{"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"},
{"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"},
diff --git a/chromium/url/mojo/url_gurl_struct_traits_unittest.cc b/chromium/url/mojo/url_gurl_struct_traits_unittest.cc
index 4f7e908efc9..8556e0a4ca7 100644
--- a/chromium/url/mojo/url_gurl_struct_traits_unittest.cc
+++ b/chromium/url/mojo/url_gurl_struct_traits_unittest.cc
@@ -38,7 +38,7 @@ TEST(MojoGURLStructTraitsTest, Basic) {
base::MessageLoop message_loop;
mojom::UrlTestPtr proxy;
- UrlTestImpl impl(GetProxy(&proxy));
+ UrlTestImpl impl(MakeRequest(&proxy));
const char* serialize_cases[] = {
"http://www.google.com/",
diff --git a/chromium/url/origin.cc b/chromium/url/origin.cc
index 0cb4c10ced7..53600b1dc96 100644
--- a/chromium/url/origin.cc
+++ b/chromium/url/origin.cc
@@ -85,11 +85,21 @@ Origin::Origin(base::StringPiece scheme,
uint16_t port,
base::StringPiece suborigin,
SchemeHostPort::ConstructPolicy policy)
- : tuple_(scheme, host, port, policy) {
+ : tuple_(scheme.as_string(), host.as_string(), port, policy) {
unique_ = tuple_.IsInvalid();
suborigin_ = suborigin.as_string();
}
+Origin::Origin(std::string scheme,
+ std::string host,
+ uint16_t port,
+ std::string suborigin,
+ SchemeHostPort::ConstructPolicy policy)
+ : tuple_(std::move(scheme), std::move(host), port, policy) {
+ unique_ = tuple_.IsInvalid();
+ suborigin_ = std::move(suborigin);
+}
+
Origin::~Origin() {
}
@@ -101,18 +111,12 @@ Origin Origin::UnsafelyCreateOriginWithoutNormalization(
return Origin(scheme, host, port, "", SchemeHostPort::CHECK_CANONICALIZATION);
}
-Origin Origin::CreateFromNormalizedTuple(base::StringPiece scheme,
- base::StringPiece host,
- uint16_t port) {
- return CreateFromNormalizedTupleWithSuborigin(scheme, host, port, "");
-}
-
Origin Origin::CreateFromNormalizedTupleWithSuborigin(
- base::StringPiece scheme,
- base::StringPiece host,
+ std::string scheme,
+ std::string host,
uint16_t port,
- base::StringPiece suborigin) {
- return Origin(scheme, host, port, suborigin,
+ std::string suborigin) {
+ return Origin(std::move(scheme), std::move(host), port, std::move(suborigin),
SchemeHostPort::ALREADY_CANONICALIZED);
}
diff --git a/chromium/url/origin.h b/chromium/url/origin.h
index 1c28588f311..4b838e420f3 100644
--- a/chromium/url/origin.h
+++ b/chromium/url/origin.h
@@ -104,18 +104,13 @@ class URL_EXPORT Origin {
// Creates an origin without sanity checking that the host is canonicalized.
// This should only be used when converting between already normalized types,
- // and should NOT be used for IPC.
- static Origin CreateFromNormalizedTuple(base::StringPiece scheme,
- base::StringPiece host,
- uint16_t port);
-
- // Same as CreateFromNormalizedTuple() above, but adds a suborigin component
- // as well.
+ // and should NOT be used for IPC. Method takes std::strings for use with move
+ // operators to avoid copies.
static Origin CreateFromNormalizedTupleWithSuborigin(
- base::StringPiece scheme,
- base::StringPiece host,
+ std::string scheme,
+ std::string host,
uint16_t port,
- base::StringPiece suborigin);
+ std::string suborigin);
~Origin();
@@ -173,6 +168,11 @@ class URL_EXPORT Origin {
uint16_t port,
base::StringPiece suborigin,
SchemeHostPort::ConstructPolicy policy);
+ Origin(std::string scheme,
+ std::string host,
+ uint16_t port,
+ std::string suborigin,
+ SchemeHostPort::ConstructPolicy policy);
SchemeHostPort tuple_;
bool unique_;
diff --git a/chromium/url/origin_unittest.cc b/chromium/url/origin_unittest.cc
index 0f17c26ef50..a5c30426f34 100644
--- a/chromium/url/origin_unittest.cc
+++ b/chromium/url/origin_unittest.cc
@@ -90,20 +90,11 @@ TEST(OriginTest, ConstructFromTuple) {
<< test_case.port;
}
SCOPED_TRACE(scope_message);
-
- url::Origin origin_without_suborigin =
- url::Origin::CreateFromNormalizedTuple(test_case.scheme, test_case.host,
- test_case.port);
-
url::Origin origin_with_suborigin =
url::Origin::CreateFromNormalizedTupleWithSuborigin(
test_case.scheme, test_case.host, test_case.port,
test_case.suborigin);
- EXPECT_EQ(test_case.scheme, origin_without_suborigin.scheme());
- EXPECT_EQ(test_case.host, origin_without_suborigin.host());
- EXPECT_EQ(test_case.port, origin_without_suborigin.port());
-
EXPECT_EQ(test_case.scheme, origin_with_suborigin.scheme());
EXPECT_EQ(test_case.host, origin_with_suborigin.host());
EXPECT_EQ(test_case.port, origin_with_suborigin.port());
diff --git a/chromium/url/run_all_unittests.cc b/chromium/url/run_all_unittests.cc
index c0b306a2e03..fcafd0331ee 100644
--- a/chromium/url/run_all_unittests.cc
+++ b/chromium/url/run_all_unittests.cc
@@ -13,7 +13,6 @@
#if !defined(OS_IOS)
#include "mojo/edk/embedder/embedder.h" // nogncheck
-#include "mojo/edk/test/scoped_ipc_support.h" // nogncheck
#endif
int main(int argc, char** argv) {
@@ -21,10 +20,6 @@ int main(int argc, char** argv) {
#if !defined(OS_IOS)
mojo::edk::Init();
- base::TestIOThread test_io_thread(base::TestIOThread::kAutoStart);
- std::unique_ptr<mojo::edk::test::ScopedIPCSupport> ipc_support;
- ipc_support.reset(
- new mojo::edk::test::ScopedIPCSupport(test_io_thread.task_runner()));
#endif
return base::LaunchUnitTests(
diff --git a/chromium/url/scheme_host_port.cc b/chromium/url/scheme_host_port.cc
index 5b359a76aa4..f0f56850f4d 100644
--- a/chromium/url/scheme_host_port.cc
+++ b/chromium/url/scheme_host_port.cc
@@ -59,12 +59,6 @@ bool IsValidInput(const base::StringPiece& scheme,
if (!is_standard)
return false;
- // These schemes do not follow the generic URL syntax, so we treat them as
- // invalid (scheme, host, port) tuples (even though such URLs' _Origin_ might
- // have a (scheme, host, port) tuple, they themselves do not).
- if (scheme == kFileSystemScheme || scheme == kBlobScheme)
- return false;
-
switch (scheme_type) {
case SCHEME_WITH_PORT:
// A URL with |scheme| is required to have the host and port (may be
@@ -116,24 +110,24 @@ bool IsValidInput(const base::StringPiece& scheme,
SchemeHostPort::SchemeHostPort() : port_(0) {
}
-SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
- base::StringPiece host,
+SchemeHostPort::SchemeHostPort(std::string scheme,
+ std::string host,
uint16_t port,
ConstructPolicy policy)
: port_(0) {
if (!IsValidInput(scheme, host, port, policy))
return;
- scheme.CopyToString(&scheme_);
- host.CopyToString(&host_);
+ scheme_ = std::move(scheme);
+ host_ = std::move(host);
port_ = port;
}
SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
base::StringPiece host,
uint16_t port)
- : SchemeHostPort(scheme,
- host,
+ : SchemeHostPort(scheme.as_string(),
+ host.as_string(),
port,
ConstructPolicy::CHECK_CANONICALIZATION) {}
@@ -202,6 +196,9 @@ std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const {
if (IsInvalid())
return result;
+ // Reserve enough space for the "normal" case of scheme://host/.
+ result.reserve(scheme_.size() + host_.size() + 4);
+
if (!scheme_.empty()) {
parsed->scheme = Component(0, scheme_.length());
result.append(scheme_);
diff --git a/chromium/url/scheme_host_port.h b/chromium/url/scheme_host_port.h
index 065e4aa6059..b2e030dfff2 100644
--- a/chromium/url/scheme_host_port.h
+++ b/chromium/url/scheme_host_port.h
@@ -96,8 +96,8 @@ class URL_EXPORT SchemeHostPort {
// that the host and port are canonicalized. This should only be used when
// converting between already normalized types, and should NOT be used for
// IPC.
- SchemeHostPort(base::StringPiece scheme,
- base::StringPiece host,
+ SchemeHostPort(std::string scheme,
+ std::string host,
uint16_t port,
ConstructPolicy policy);
diff --git a/chromium/url/scheme_host_port_unittest.cc b/chromium/url/scheme_host_port_unittest.cc
index 81d4371a85b..ba97a6a5492 100644
--- a/chromium/url/scheme_host_port_unittest.cc
+++ b/chromium/url/scheme_host_port_unittest.cc
@@ -42,11 +42,19 @@ TEST(SchemeHostPortTest, Invalid) {
EXPECT_TRUE(invalid.IsInvalid());
EXPECT_TRUE(invalid.Equals(invalid));
- const char* urls[] = {"data:text/html,Hello!",
- "javascript:alert(1)",
- "file://example.com:443/etc/passwd",
- "blob:https://example.com/uuid-goes-here",
- "filesystem:https://example.com/temporary/yay.png"};
+ const char* urls[] = {
+ "data:text/html,Hello!", "javascript:alert(1)",
+ "file://example.com:443/etc/passwd",
+
+ // These schemes do not follow the generic URL syntax, so make sure we
+ // treat them as invalid (scheme, host, port) tuples (even though such
+ // URLs' _Origin_ might have a (scheme, host, port) tuple, they themselves
+ // do not). This is only *implicitly* checked in the code, by means of
+ // blob schemes not being standard, and filesystem schemes having type
+ // SCHEME_WITHOUT_AUTHORITY. If conditions change such that the implicit
+ // checks no longer hold, this policy should be made explicit.
+ "blob:https://example.com/uuid-goes-here",
+ "filesystem:https://example.com/temporary/yay.png"};
for (auto* test : urls) {
SCOPED_TRACE(test);
diff --git a/chromium/url/third_party/mozilla/url_parse.cc b/chromium/url/third_party/mozilla/url_parse.cc
index ba842b87b5d..41768601244 100644
--- a/chromium/url/third_party/mozilla/url_parse.cc
+++ b/chromium/url/third_party/mozilla/url_parse.cc
@@ -175,6 +175,31 @@ void DoParseAuthority(const CHAR* spec,
}
}
+template <typename CHAR>
+inline void FindQueryAndRefParts(const CHAR* spec,
+ const Component& path,
+ int* query_separator,
+ int* ref_separator) {
+ int path_end = path.begin + path.len;
+ for (int i = path.begin; i < path_end; i++) {
+ switch (spec[i]) {
+ case '?':
+ // Only match the query string if it precedes the reference fragment
+ // and when we haven't found one already.
+ if (*query_separator < 0)
+ *query_separator = i;
+ break;
+ case '#':
+ // Record the first # sign only.
+ if (*ref_separator < 0) {
+ *ref_separator = i;
+ return;
+ }
+ break;
+ }
+ }
+}
+
template<typename CHAR>
void ParsePath(const CHAR* spec,
const Component& path,
@@ -193,25 +218,9 @@ void ParsePath(const CHAR* spec,
DCHECK(path.len > 0) << "We should never have 0 length paths";
// Search for first occurrence of either ? or #.
- int path_end = path.begin + path.len;
-
int query_separator = -1; // Index of the '?'
int ref_separator = -1; // Index of the '#'
- for (int i = path.begin; i < path_end; i++) {
- switch (spec[i]) {
- case '?':
- // Only match the query string if it precedes the reference fragment
- // and when we haven't found one already.
- if (ref_separator < 0 && query_separator < 0)
- query_separator = i;
- break;
- case '#':
- // Record the first # sign only.
- if (ref_separator < 0)
- ref_separator = i;
- break;
- }
- }
+ FindQueryAndRefParts(spec, path, &query_separator, &ref_separator);
// Markers pointing to the character after each of these corresponding
// components. The code below words from the end back to the beginning,
@@ -219,6 +228,7 @@ void ParsePath(const CHAR* spec,
int file_end, query_end;
// Ref fragment: from the # to the end of the path.
+ int path_end = path.begin + path.len;
if (ref_separator >= 0) {
file_end = query_end = ref_separator;
*ref = MakeRange(ref_separator + 1, path_end);
@@ -680,8 +690,7 @@ bool DoExtractQueryKeyValue(const CHAR* spec,
} // namespace
-Parsed::Parsed() : inner_parsed_(NULL) {
-}
+Parsed::Parsed() : whitespace_removed(false), inner_parsed_(NULL) {}
Parsed::Parsed(const Parsed& other) :
scheme(other.scheme),
diff --git a/chromium/url/third_party/mozilla/url_parse.h b/chromium/url/third_party/mozilla/url_parse.h
index 222d6053232..968578badbb 100644
--- a/chromium/url/third_party/mozilla/url_parse.h
+++ b/chromium/url/third_party/mozilla/url_parse.h
@@ -177,6 +177,9 @@ struct URL_EXPORT Parsed {
// the string with the scheme stripped off.
Component GetContent() const;
+ // True if whitespace was removed from the URL during parsing.
+ bool whitespace_removed;
+
// This is used for nested URL types, currently only filesystem. If you
// parse a filesystem URL, the resulting Parsed will have a nested
// inner_parsed_ to hold the parsed inner URL's component information.
diff --git a/chromium/url/url_canon.h b/chromium/url/url_canon.h
index c4852e490b0..ff66c6e3086 100644
--- a/chromium/url/url_canon.h
+++ b/chromium/url/url_canon.h
@@ -117,6 +117,11 @@ class CanonOutputT {
cur_len_ += str_len;
}
+ void ReserveSizeIfNeeded(int estimated_size) {
+ if (estimated_size > buffer_len_)
+ Resize(estimated_size);
+ }
+
protected:
// Grows the given buffer so that it can fit at least |min_additional|
// characters. Returns true if the buffer could be resized, false on OOM.
diff --git a/chromium/url/url_canon_relative.cc b/chromium/url/url_canon_relative.cc
index e34ea2fa249..8259056f5e5 100644
--- a/chromium/url/url_canon_relative.cc
+++ b/chromium/url/url_canon_relative.cc
@@ -4,6 +4,8 @@
// Canonicalizer functions for working with and resolving relative URLs.
+#include <algorithm>
+
#include "base/logging.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
@@ -264,7 +266,7 @@ int CopyBaseDriveSpecIfNecessary(const char* base_url,
#endif // WIN32
// A subroutine of DoResolveRelativeURL, this resolves the URL knowning that
-// the input is a relative path or less (qyuery or ref).
+// the input is a relative path or less (query or ref).
template<typename CHAR>
bool DoResolveRelativePath(const char* base_url,
const Parsed& base_parsed,
@@ -280,7 +282,13 @@ bool DoResolveRelativePath(const char* base_url,
// also know we have a path so can copy up to there.
Component path, query, ref;
ParsePathInternal(relative_url, relative_component, &path, &query, &ref);
- // Canonical URLs always have a path, so we can use that offset.
+
+ // Canonical URLs always have a path, so we can use that offset. Reserve
+ // enough room for the base URL, the new path, and some extra bytes for
+ // possible escaped characters.
+ output->ReserveSizeIfNeeded(
+ base_parsed.path.begin +
+ std::max(path.end(), std::max(query.end(), ref.end())) + 8);
output->Append(base_url, base_parsed.path.begin);
if (path.len > 0) {
@@ -394,6 +402,11 @@ bool DoResolveRelativeHost(const char* base_url,
replacements.SetQuery(relative_url, relative_parsed.query);
replacements.SetRef(relative_url, relative_parsed.ref);
+ // Length() does not include the old scheme, so make sure to add it from the
+ // base URL.
+ output->ReserveSizeIfNeeded(
+ replacements.components().Length() +
+ base_parsed.CountCharactersBefore(Parsed::USERNAME, false) + 8);
return ReplaceStandardURL(base_url, base_parsed, replacements,
query_converter, output, out_parsed);
}
diff --git a/chromium/url/url_util.cc b/chromium/url/url_util.cc
index 0a84d5e23cb..2c8d6978cb7 100644
--- a/chromium/url/url_util.cc
+++ b/chromium/url/url_util.cc
@@ -6,12 +6,12 @@
#include <stddef.h>
#include <string.h>
-#include <vector>
#include "base/debug/leak_annotations.h"
#include "base/logging.h"
#include "base/strings/string_util.h"
#include "url/url_canon_internal.h"
+#include "url/url_constants.h"
#include "url/url_file.h"
#include "url/url_util_internal.h"
@@ -19,8 +19,14 @@ namespace url {
namespace {
-const int kNumStandardURLSchemes = 10;
-const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = {
+// Pass this enum through for methods which would like to know if whitespace
+// removal is necessary.
+enum WhitespaceRemovalPolicy {
+ REMOVE_WHITESPACE,
+ DO_NOT_REMOVE_WHITESPACE,
+};
+
+const SchemeWithType kStandardURLSchemes[] = {
{kHttpScheme, SCHEME_WITH_PORT},
{kHttpsScheme, SCHEME_WITH_PORT},
// Yes, file URLs can have a hostname, so file URLs should be handled as
@@ -36,21 +42,50 @@ const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = {
{kHttpsSuboriginScheme, SCHEME_WITH_PORT},
};
-const int kNumReferrerURLSchemes = 4;
-const SchemeWithType kReferrerURLSchemes[kNumReferrerURLSchemes] = {
+const SchemeWithType kReferrerURLSchemes[] = {
{kHttpScheme, SCHEME_WITH_PORT},
{kHttpsScheme, SCHEME_WITH_PORT},
{kHttpSuboriginScheme, SCHEME_WITH_PORT},
{kHttpsSuboriginScheme, SCHEME_WITH_PORT},
};
+const char* kSecureSchemes[] = {
+ kHttpsScheme,
+ kAboutScheme,
+ kDataScheme,
+ kWssScheme,
+};
+
+const char* kLocalSchemes[] = {
+ kFileScheme,
+};
+
+const char* kNoAccessSchemes[] = {
+ kAboutScheme,
+ kJavaScriptScheme,
+ kDataScheme,
+};
+
+const char* kCORSEnabledSchemes[] = {
+ kHttpScheme,
+ kHttpsScheme,
+ kDataScheme,
+};
+
+bool initialized = false;
+
// Lists of the currently installed standard and referrer schemes. These lists
-// are lazily initialized by InitStandardSchemes and InitReferrerSchemes and are
-// leaked on shutdown to prevent any destructors from being called that will
-// slow us down or cause problems.
+// are lazily initialized by Initialize and are leaked on shutdown to prevent
+// any destructors from being called that will slow us down or cause problems.
std::vector<SchemeWithType>* standard_schemes = nullptr;
std::vector<SchemeWithType>* referrer_schemes = nullptr;
+// Similar to above, initialized by the Init*Schemes methods.
+std::vector<std::string>* secure_schemes = nullptr;
+std::vector<std::string>* local_schemes = nullptr;
+std::vector<std::string>* no_access_schemes = nullptr;
+std::vector<std::string>* cors_enabled_schemes = nullptr;
+
// See the LockSchemeRegistries declaration in the header.
bool scheme_registries_locked = false;
@@ -65,27 +100,22 @@ template<> struct CharToStringPiece<base::char16> {
typedef base::StringPiece16 Piece;
};
-void InitSchemes(std::vector<SchemeWithType>** schemes,
- const SchemeWithType* initial_schemes,
+void InitSchemes(std::vector<std::string>** schemes,
+ const char** initial_schemes,
size_t size) {
- if (*schemes)
- return;
- *schemes = new std::vector<SchemeWithType>(size);
+ *schemes = new std::vector<std::string>(size);
for (size_t i = 0; i < size; i++) {
- (*schemes)->push_back(initial_schemes[i]);
+ (*(*schemes))[i] = initial_schemes[i];
}
}
-// Ensures that the standard_schemes list is initialized, does nothing if
-// it already has values.
-void InitStandardSchemes() {
- InitSchemes(&standard_schemes, kStandardURLSchemes, kNumStandardURLSchemes);
-}
-
-// Ensures that the referrer_schemes list is initialized, does nothing if
-// it already has values.
-void InitReferrerSchemes() {
- InitSchemes(&referrer_schemes, kReferrerURLSchemes, kNumReferrerURLSchemes);
+void InitSchemesWithType(std::vector<SchemeWithType>** schemes,
+ const SchemeWithType* initial_schemes,
+ size_t size) {
+ *schemes = new std::vector<SchemeWithType>(size);
+ for (size_t i = 0; i < size; i++) {
+ (*(*schemes))[i] = initial_schemes[i];
+ }
}
// Given a string and a range inside the string, compares it to the given
@@ -125,7 +155,7 @@ bool DoIsInSchemes(const CHAR* spec,
template<typename CHAR>
bool DoIsStandard(const CHAR* spec, const Component& scheme, SchemeType* type) {
- InitStandardSchemes();
+ Initialize();
return DoIsInSchemes(spec, scheme, type, *standard_schemes);
}
@@ -154,19 +184,28 @@ bool DoFindAndCompareScheme(const CHAR* str,
return DoCompareSchemeComponent(spec, our_scheme, compare);
}
-template<typename CHAR>
-bool DoCanonicalize(const CHAR* in_spec,
- int in_spec_len,
+template <typename CHAR>
+bool DoCanonicalize(const CHAR* spec,
+ int spec_len,
bool trim_path_end,
+ WhitespaceRemovalPolicy whitespace_policy,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* output_parsed) {
- // Remove any whitespace from the middle of the relative URL, possibly
- // copying to the new buffer.
+ // Reserve enough room in the output for the input, plus some extra so that
+ // we have room if we have to escape a few things without reallocating.
+ output->ReserveSizeIfNeeded(spec_len + 8);
+
+ // Remove any whitespace from the middle of the relative URL if necessary.
+ // Possibly this will result in copying to the new buffer.
RawCanonOutputT<CHAR> whitespace_buffer;
- int spec_len;
- const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len,
- &whitespace_buffer, &spec_len);
+ if (whitespace_policy == REMOVE_WHITESPACE) {
+ int original_len = spec_len;
+ spec =
+ RemoveURLWhitespace(spec, original_len, &whitespace_buffer, &spec_len);
+ if (spec_len != original_len)
+ output_parsed->whitespace_removed = true;
+ }
Parsed parsed_input;
#ifdef WIN32
@@ -246,6 +285,9 @@ bool DoResolveRelative(const char* base_spec,
const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length,
&whitespace_buffer,
&relative_length);
+ if (in_relative_length != relative_length)
+ output_parsed->whitespace_removed = true;
+
bool base_is_authority_based = false;
bool base_is_hierarchical = false;
if (base_spec &&
@@ -271,6 +313,9 @@ bool DoResolveRelative(const char* base_spec,
return false;
}
+ // Don't reserve buffer space here. Instead, reserve in DoCanonicalize and
+ // ReserveRelativeURL, to enable more accurate buffer sizes.
+
// Pretend for a moment that |base_spec| is a standard URL. Normally
// non-standard URLs are treated as PathURLs, but if the base has an
// authority we would like to preserve it.
@@ -287,7 +332,8 @@ bool DoResolveRelative(const char* base_spec,
// based on base_parsed_authority instead of base_parsed) and needs to be
// re-created.
DoCanonicalize(temporary_output.data(), temporary_output.length(), true,
- charset_converter, output, output_parsed);
+ REMOVE_WHITESPACE, charset_converter, output,
+ output_parsed);
return did_resolve_succeed;
}
} else if (is_relative) {
@@ -300,8 +346,9 @@ bool DoResolveRelative(const char* base_spec,
}
// Not relative, canonicalize the input.
- return DoCanonicalize(relative, relative_length, true, charset_converter,
- output, output_parsed);
+ return DoCanonicalize(relative, relative_length, true,
+ DO_NOT_REMOVE_WHITESPACE, charset_converter, output,
+ output_parsed);
}
template<typename CHAR>
@@ -348,8 +395,8 @@ bool DoReplaceComponents(const char* spec,
RawCanonOutput<128> recanonicalized;
Parsed recanonicalized_parsed;
DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true,
- charset_converter,
- &recanonicalized, &recanonicalized_parsed);
+ REMOVE_WHITESPACE, charset_converter, &recanonicalized,
+ &recanonicalized_parsed);
// Recurse using the version with the scheme already replaced. This will now
// use the replacement rules for the new scheme.
@@ -371,6 +418,12 @@ bool DoReplaceComponents(const char* spec,
charset_converter, output, out_parsed);
}
+ // TODO(csharrison): We could be smarter about size to reserve if this is done
+ // in callers below, and the code checks to see which components are being
+ // replaced, and with what length. If this ends up being a hot spot it should
+ // be changed.
+ output->ReserveSizeIfNeeded(spec_len + 8);
+
// If we get here, then we know the scheme doesn't need to be replaced, so can
// just key off the scheme in the spec to know how to do the replacements.
if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileScheme)) {
@@ -394,9 +447,7 @@ bool DoReplaceComponents(const char* spec,
return ReplacePathURL(spec, parsed, replacements, output, out_parsed);
}
-void DoAddScheme(const char* new_scheme,
- SchemeType type,
- std::vector<SchemeWithType>* schemes) {
+void DoAddScheme(const char* new_scheme, std::vector<std::string>* schemes) {
DCHECK(schemes);
// If this assert triggers, it means you've called Add*Scheme after
// LockSchemeRegistries has been called (see the header file for
@@ -412,6 +463,29 @@ void DoAddScheme(const char* new_scheme,
if (scheme_len == 0)
return;
+ DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme);
+ schemes->push_back(std::string(new_scheme));
+}
+
+void DoAddSchemeWithType(const char* new_scheme,
+ SchemeType type,
+ std::vector<SchemeWithType>* schemes) {
+ DCHECK(schemes);
+ // If this assert triggers, it means you've called Add*Scheme after
+ // LockSchemeRegistries has been called (see the header file for
+ // LockSchemeRegistries for more).
+ //
+ // This normally means you're trying to set up a new scheme too late in your
+ // application's init process. Locate where your app does this initialization
+ // and calls LockSchemeRegistries, and add your new scheme there.
+ DCHECK(!scheme_registries_locked)
+ << "Trying to add a scheme after the lists have been locked.";
+
+ size_t scheme_len = strlen(new_scheme);
+ if (scheme_len == 0)
+ return;
+
+ DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme);
// Duplicate the scheme into a new buffer and add it to the list of standard
// schemes. This pointer will be leaked on shutdown.
char* dup_scheme = new char[scheme_len + 1];
@@ -427,29 +501,85 @@ void DoAddScheme(const char* new_scheme,
} // namespace
void Initialize() {
- InitStandardSchemes();
- InitReferrerSchemes();
+ if (initialized)
+ return;
+ InitSchemesWithType(&standard_schemes, kStandardURLSchemes,
+ arraysize(kStandardURLSchemes));
+ InitSchemesWithType(&referrer_schemes, kReferrerURLSchemes,
+ arraysize(kReferrerURLSchemes));
+ InitSchemes(&secure_schemes, kSecureSchemes, arraysize(kSecureSchemes));
+ InitSchemes(&local_schemes, kLocalSchemes, arraysize(kLocalSchemes));
+ InitSchemes(&no_access_schemes, kNoAccessSchemes,
+ arraysize(kNoAccessSchemes));
+ InitSchemes(&cors_enabled_schemes, kCORSEnabledSchemes,
+ arraysize(kCORSEnabledSchemes));
+ initialized = true;
}
void Shutdown() {
- if (standard_schemes) {
- delete standard_schemes;
- standard_schemes = NULL;
- }
- if (referrer_schemes) {
- delete referrer_schemes;
- referrer_schemes = NULL;
- }
+ initialized = false;
+ delete standard_schemes;
+ standard_schemes = nullptr;
+ delete referrer_schemes;
+ referrer_schemes = nullptr;
+ delete secure_schemes;
+ secure_schemes = nullptr;
+ delete local_schemes;
+ local_schemes = nullptr;
+ delete no_access_schemes;
+ no_access_schemes = nullptr;
+ delete cors_enabled_schemes;
+ cors_enabled_schemes = nullptr;
}
void AddStandardScheme(const char* new_scheme, SchemeType type) {
- InitStandardSchemes();
- DoAddScheme(new_scheme, type, standard_schemes);
+ Initialize();
+ DoAddSchemeWithType(new_scheme, type, standard_schemes);
}
void AddReferrerScheme(const char* new_scheme, SchemeType type) {
- InitReferrerSchemes();
- DoAddScheme(new_scheme, type, referrer_schemes);
+ Initialize();
+ DoAddSchemeWithType(new_scheme, type, referrer_schemes);
+}
+
+void AddSecureScheme(const char* new_scheme) {
+ Initialize();
+ DoAddScheme(new_scheme, secure_schemes);
+}
+
+const std::vector<std::string>& GetSecureSchemes() {
+ Initialize();
+ return *secure_schemes;
+}
+
+void AddLocalScheme(const char* new_scheme) {
+ Initialize();
+ DoAddScheme(new_scheme, local_schemes);
+}
+
+const std::vector<std::string>& GetLocalSchemes() {
+ Initialize();
+ return *local_schemes;
+}
+
+void AddNoAccessScheme(const char* new_scheme) {
+ Initialize();
+ DoAddScheme(new_scheme, no_access_schemes);
+}
+
+const std::vector<std::string>& GetNoAccessSchemes() {
+ Initialize();
+ return *no_access_schemes;
+}
+
+void AddCORSEnabledScheme(const char* new_scheme) {
+ Initialize();
+ DoAddScheme(new_scheme, cors_enabled_schemes);
+}
+
+const std::vector<std::string>& GetCORSEnabledSchemes() {
+ Initialize();
+ return *cors_enabled_schemes;
}
void LockSchemeRegistries() {
@@ -473,7 +603,7 @@ bool IsStandard(const base::char16* spec, const Component& scheme) {
}
bool IsReferrerScheme(const char* spec, const Component& scheme) {
- InitReferrerSchemes();
+ Initialize();
SchemeType unused_scheme_type;
return DoIsInSchemes(spec, scheme, &unused_scheme_type, *referrer_schemes);
}
@@ -529,14 +659,22 @@ bool DomainIs(base::StringPiece canonicalized_host,
return true;
}
+bool HostIsIPAddress(base::StringPiece host) {
+ url::RawCanonOutputT<char, 128> ignored_output;
+ url::CanonHostInfo host_info;
+ url::CanonicalizeIPAddress(host.data(), Component(0, host.length()),
+ &ignored_output, &host_info);
+ return host_info.IsIPAddress();
+}
+
bool Canonicalize(const char* spec,
int spec_len,
bool trim_path_end,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* output_parsed) {
- return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter,
- output, output_parsed);
+ return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,
+ charset_converter, output, output_parsed);
}
bool Canonicalize(const base::char16* spec,
@@ -545,8 +683,8 @@ bool Canonicalize(const base::char16* spec,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* output_parsed) {
- return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter,
- output, output_parsed);
+ return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,
+ charset_converter, output, output_parsed);
}
bool ResolveRelative(const char* base_spec,
diff --git a/chromium/url/url_util.h b/chromium/url/url_util.h
index 724ce956a7f..a4b74b13e5d 100644
--- a/chromium/url/url_util.h
+++ b/chromium/url/url_util.h
@@ -6,6 +6,7 @@
#define URL_URL_UTIL_H_
#include <string>
+#include <vector>
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
@@ -57,25 +58,44 @@ struct URL_EXPORT SchemeWithType {
SchemeType type;
};
+// The following Add*Scheme method are not threadsafe and can not be called
+// concurrently with any other url_util function. They will assert if the lists
+// of schemes have been locked (see LockSchemeRegistries).
+
// Adds an application-defined scheme to the internal list of "standard-format"
// URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
// URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).
-//
-// This function is not threadsafe and can not be called concurrently with any
-// other url_util function. It will assert if the lists of schemes have
-// been locked (see LockSchemeRegistries).
+
URL_EXPORT void AddStandardScheme(const char* new_scheme,
SchemeType scheme_type);
// Adds an application-defined scheme to the internal list of schemes allowed
// for referrers.
-//
-// This function is not threadsafe and can not be called concurrently with any
-// other url_util function. It will assert if the lists of schemes have
-// been locked (see LockSchemeRegistries).
URL_EXPORT void AddReferrerScheme(const char* new_scheme,
SchemeType scheme_type);
+// Adds an application-defined scheme to the list of schemes that do not trigger
+// mixed content warnings.
+URL_EXPORT void AddSecureScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetSecureSchemes();
+
+// Adds an application-defined scheme to the list of schemes that normal pages
+// cannot link to or access (i.e., with the same security rules as those applied
+// to "file" URLs).
+URL_EXPORT void AddLocalScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetLocalSchemes();
+
+// Adds an application-defined scheme to the list of schemes that cause pages
+// loaded with them to not have access to pages loaded with any other URL
+// scheme.
+URL_EXPORT void AddNoAccessScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetNoAccessSchemes();
+
+// Adds an application-defined scheme to the list of schemes that can be sent
+// CORS requests.
+URL_EXPORT void AddCORSEnabledScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetCORSEnabledSchemes();
+
// Sets a flag to prevent future calls to Add*Scheme from succeeding.
//
// This is designed to help prevent errors for multithreaded applications.
@@ -133,7 +153,7 @@ URL_EXPORT bool GetStandardSchemeType(const char* spec,
const Component& scheme,
SchemeType* type);
-// Domains ---------------------------------------------------------------------
+// Hosts ----------------------------------------------------------------------
// Returns true if the |canonicalized_host| matches or is in the same domain as
// the given |lower_ascii_domain| string. For example, if the canonicalized
@@ -146,6 +166,10 @@ URL_EXPORT bool GetStandardSchemeType(const char* spec,
URL_EXPORT bool DomainIs(base::StringPiece canonicalized_host,
base::StringPiece lower_ascii_domain);
+// Returns true if the hostname is an IP address. Note: this function isn't very
+// cheap, as it must re-parse the host to verify.
+URL_EXPORT bool HostIsIPAddress(base::StringPiece host);
+
// URL library wrappers --------------------------------------------------------
// Parses the given spec according to the extracted scheme type. Normal users