diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2017-04-05 14:08:31 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2017-04-11 07:46:53 +0000 |
commit | 6a4cabb866f66d4128a97cdc6d9d08ce074f1247 (patch) | |
tree | ab00f70a5e89278d6a0d16ff0c42578dc4d84a2d /chromium/url | |
parent | e733310db58160074f574c429d48f8308c0afe17 (diff) | |
download | qtwebengine-chromium-6a4cabb866f66d4128a97cdc6d9d08ce074f1247.tar.gz |
BASELINE: Update Chromium to 57.0.2987.144
Change-Id: I29db402ff696c71a04c4dbaec822c2e53efe0267
Reviewed-by: Peter Varga <pvarga@inf.u-szeged.hu>
Diffstat (limited to 'chromium/url')
-rw-r--r-- | chromium/url/DEPS | 3 | ||||
-rw-r--r-- | chromium/url/gurl.cc | 33 | ||||
-rw-r--r-- | chromium/url/gurl_unittest.cc | 1 | ||||
-rw-r--r-- | chromium/url/mojo/url_gurl_struct_traits_unittest.cc | 2 | ||||
-rw-r--r-- | chromium/url/origin.cc | 26 | ||||
-rw-r--r-- | chromium/url/origin.h | 20 | ||||
-rw-r--r-- | chromium/url/origin_unittest.cc | 9 | ||||
-rw-r--r-- | chromium/url/run_all_unittests.cc | 5 | ||||
-rw-r--r-- | chromium/url/scheme_host_port.cc | 21 | ||||
-rw-r--r-- | chromium/url/scheme_host_port.h | 4 | ||||
-rw-r--r-- | chromium/url/scheme_host_port_unittest.cc | 18 | ||||
-rw-r--r-- | chromium/url/third_party/mozilla/url_parse.cc | 47 | ||||
-rw-r--r-- | chromium/url/third_party/mozilla/url_parse.h | 3 | ||||
-rw-r--r-- | chromium/url/url_canon.h | 5 | ||||
-rw-r--r-- | chromium/url/url_canon_relative.cc | 17 | ||||
-rw-r--r-- | chromium/url/url_util.cc | 258 | ||||
-rw-r--r-- | chromium/url/url_util.h | 42 |
17 files changed, 338 insertions, 176 deletions
diff --git a/chromium/url/DEPS b/chromium/url/DEPS index c89ac323c59..946d75fcd66 100644 --- a/chromium/url/DEPS +++ b/chromium/url/DEPS @@ -11,7 +11,6 @@ specific_include_rules = { "+third_party/icu", ], "run_all_unittests\.cc": [ - "+mojo/edk/embedder/embedder.h", - "+mojo/edk/test/scoped_ipc_support.h", + "+mojo/edk/embedder", ], } diff --git a/chromium/url/gurl.cc b/chromium/url/gurl.cc index bdd35227b0d..43f30528593 100644 --- a/chromium/url/gurl.cc +++ b/chromium/url/gurl.cc @@ -108,9 +108,6 @@ GURL::GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid) template<typename STR> void GURL::InitCanonical(base::BasicStringPiece<STR> input_spec, bool trim_path_end) { - // Reserve enough room in the output for the input, plus some extra so that - // we have room if we have to escape a few things without reallocating. - spec_.reserve(input_spec.size() + 32); url::StdStringCanonOutput output(&spec_); is_valid_ = url::Canonicalize( input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end, @@ -121,6 +118,8 @@ void GURL::InitCanonical(base::BasicStringPiece<STR> input_spec, inner_url_.reset(new GURL(spec_.data(), parsed_.Length(), *parsed_.inner_parsed(), true)); } + // Valid URLs always have non-empty specs. + DCHECK(!is_valid_ || !spec_.empty()); } void GURL::InitializeFromCanonicalSpec() { @@ -135,6 +134,7 @@ void GURL::InitializeFromCanonicalSpec() { // what we would have produced. Skip checking for invalid URLs have no meaning // and we can't always canonicalize then reproducibly. if (is_valid_) { + DCHECK(!spec_.empty()); url::Component scheme; // We can't do this check on the inner_url of a filesystem URL, as // canonical_spec actually points to the start of the outer URL, so we'd @@ -195,12 +195,7 @@ GURL GURL::Resolve(const std::string& relative) const { return GURL(); GURL result; - - // Reserve enough room in the output for the input, plus some extra so that - // we have room if we have to escape a few things without reallocating. - result.spec_.reserve(spec_.size() + 32); url::StdStringCanonOutput output(&result.spec_); - if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()), parsed_, relative.data(), static_cast<int>(relative.length()), @@ -226,12 +221,7 @@ GURL GURL::Resolve(const base::string16& relative) const { return GURL(); GURL result; - - // Reserve enough room in the output for the input, plus some extra so that - // we have room if we have to escape a few things without reallocating. - result.spec_.reserve(spec_.size() + 32); url::StdStringCanonOutput output(&result.spec_); - if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()), parsed_, relative.data(), static_cast<int>(relative.length()), @@ -259,11 +249,7 @@ GURL GURL::ReplaceComponents( if (!is_valid_) return GURL(); - // Reserve enough room in the output for the input, plus some extra so that - // we have room if we have to escape a few things without reallocating. - result.spec_.reserve(spec_.size() + 32); url::StdStringCanonOutput output(&result.spec_); - result.is_valid_ = url::ReplaceComponents( spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements, NULL, &output, &result.parsed_); @@ -286,11 +272,7 @@ GURL GURL::ReplaceComponents( if (!is_valid_) return GURL(); - // Reserve enough room in the output for the input, plus some extra so that - // we have room if we have to escape a few things without reallocating. - result.spec_.reserve(spec_.size() + 32); url::StdStringCanonOutput output(&result.spec_); - result.is_valid_ = url::ReplaceComponents( spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements, NULL, &output, &result.parsed_); @@ -440,14 +422,7 @@ std::string GURL::GetContent() const { } bool GURL::HostIsIPAddress() const { - if (!is_valid_ || spec_.empty()) - return false; - - url::RawCanonOutputT<char, 128> ignored_output; - url::CanonHostInfo host_info; - url::CanonicalizeIPAddress(spec_.c_str(), parsed_.host, &ignored_output, - &host_info); - return host_info.IsIPAddress(); + return is_valid_ && url::HostIsIPAddress(host_piece()); } #ifdef WIN32 diff --git a/chromium/url/gurl_unittest.cc b/chromium/url/gurl_unittest.cc index f8d4c05288a..24dee6c2a65 100644 --- a/chromium/url/gurl_unittest.cc +++ b/chromium/url/gurl_unittest.cc @@ -294,6 +294,7 @@ TEST(GURLTest, Resolve) { {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"}, {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"}, {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"}, + {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html", true, "http://images.google.com/foo.html"}, {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"}, {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"}, {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"}, diff --git a/chromium/url/mojo/url_gurl_struct_traits_unittest.cc b/chromium/url/mojo/url_gurl_struct_traits_unittest.cc index 4f7e908efc9..8556e0a4ca7 100644 --- a/chromium/url/mojo/url_gurl_struct_traits_unittest.cc +++ b/chromium/url/mojo/url_gurl_struct_traits_unittest.cc @@ -38,7 +38,7 @@ TEST(MojoGURLStructTraitsTest, Basic) { base::MessageLoop message_loop; mojom::UrlTestPtr proxy; - UrlTestImpl impl(GetProxy(&proxy)); + UrlTestImpl impl(MakeRequest(&proxy)); const char* serialize_cases[] = { "http://www.google.com/", diff --git a/chromium/url/origin.cc b/chromium/url/origin.cc index 0cb4c10ced7..53600b1dc96 100644 --- a/chromium/url/origin.cc +++ b/chromium/url/origin.cc @@ -85,11 +85,21 @@ Origin::Origin(base::StringPiece scheme, uint16_t port, base::StringPiece suborigin, SchemeHostPort::ConstructPolicy policy) - : tuple_(scheme, host, port, policy) { + : tuple_(scheme.as_string(), host.as_string(), port, policy) { unique_ = tuple_.IsInvalid(); suborigin_ = suborigin.as_string(); } +Origin::Origin(std::string scheme, + std::string host, + uint16_t port, + std::string suborigin, + SchemeHostPort::ConstructPolicy policy) + : tuple_(std::move(scheme), std::move(host), port, policy) { + unique_ = tuple_.IsInvalid(); + suborigin_ = std::move(suborigin); +} + Origin::~Origin() { } @@ -101,18 +111,12 @@ Origin Origin::UnsafelyCreateOriginWithoutNormalization( return Origin(scheme, host, port, "", SchemeHostPort::CHECK_CANONICALIZATION); } -Origin Origin::CreateFromNormalizedTuple(base::StringPiece scheme, - base::StringPiece host, - uint16_t port) { - return CreateFromNormalizedTupleWithSuborigin(scheme, host, port, ""); -} - Origin Origin::CreateFromNormalizedTupleWithSuborigin( - base::StringPiece scheme, - base::StringPiece host, + std::string scheme, + std::string host, uint16_t port, - base::StringPiece suborigin) { - return Origin(scheme, host, port, suborigin, + std::string suborigin) { + return Origin(std::move(scheme), std::move(host), port, std::move(suborigin), SchemeHostPort::ALREADY_CANONICALIZED); } diff --git a/chromium/url/origin.h b/chromium/url/origin.h index 1c28588f311..4b838e420f3 100644 --- a/chromium/url/origin.h +++ b/chromium/url/origin.h @@ -104,18 +104,13 @@ class URL_EXPORT Origin { // Creates an origin without sanity checking that the host is canonicalized. // This should only be used when converting between already normalized types, - // and should NOT be used for IPC. - static Origin CreateFromNormalizedTuple(base::StringPiece scheme, - base::StringPiece host, - uint16_t port); - - // Same as CreateFromNormalizedTuple() above, but adds a suborigin component - // as well. + // and should NOT be used for IPC. Method takes std::strings for use with move + // operators to avoid copies. static Origin CreateFromNormalizedTupleWithSuborigin( - base::StringPiece scheme, - base::StringPiece host, + std::string scheme, + std::string host, uint16_t port, - base::StringPiece suborigin); + std::string suborigin); ~Origin(); @@ -173,6 +168,11 @@ class URL_EXPORT Origin { uint16_t port, base::StringPiece suborigin, SchemeHostPort::ConstructPolicy policy); + Origin(std::string scheme, + std::string host, + uint16_t port, + std::string suborigin, + SchemeHostPort::ConstructPolicy policy); SchemeHostPort tuple_; bool unique_; diff --git a/chromium/url/origin_unittest.cc b/chromium/url/origin_unittest.cc index 0f17c26ef50..a5c30426f34 100644 --- a/chromium/url/origin_unittest.cc +++ b/chromium/url/origin_unittest.cc @@ -90,20 +90,11 @@ TEST(OriginTest, ConstructFromTuple) { << test_case.port; } SCOPED_TRACE(scope_message); - - url::Origin origin_without_suborigin = - url::Origin::CreateFromNormalizedTuple(test_case.scheme, test_case.host, - test_case.port); - url::Origin origin_with_suborigin = url::Origin::CreateFromNormalizedTupleWithSuborigin( test_case.scheme, test_case.host, test_case.port, test_case.suborigin); - EXPECT_EQ(test_case.scheme, origin_without_suborigin.scheme()); - EXPECT_EQ(test_case.host, origin_without_suborigin.host()); - EXPECT_EQ(test_case.port, origin_without_suborigin.port()); - EXPECT_EQ(test_case.scheme, origin_with_suborigin.scheme()); EXPECT_EQ(test_case.host, origin_with_suborigin.host()); EXPECT_EQ(test_case.port, origin_with_suborigin.port()); diff --git a/chromium/url/run_all_unittests.cc b/chromium/url/run_all_unittests.cc index c0b306a2e03..fcafd0331ee 100644 --- a/chromium/url/run_all_unittests.cc +++ b/chromium/url/run_all_unittests.cc @@ -13,7 +13,6 @@ #if !defined(OS_IOS) #include "mojo/edk/embedder/embedder.h" // nogncheck -#include "mojo/edk/test/scoped_ipc_support.h" // nogncheck #endif int main(int argc, char** argv) { @@ -21,10 +20,6 @@ int main(int argc, char** argv) { #if !defined(OS_IOS) mojo::edk::Init(); - base::TestIOThread test_io_thread(base::TestIOThread::kAutoStart); - std::unique_ptr<mojo::edk::test::ScopedIPCSupport> ipc_support; - ipc_support.reset( - new mojo::edk::test::ScopedIPCSupport(test_io_thread.task_runner())); #endif return base::LaunchUnitTests( diff --git a/chromium/url/scheme_host_port.cc b/chromium/url/scheme_host_port.cc index 5b359a76aa4..f0f56850f4d 100644 --- a/chromium/url/scheme_host_port.cc +++ b/chromium/url/scheme_host_port.cc @@ -59,12 +59,6 @@ bool IsValidInput(const base::StringPiece& scheme, if (!is_standard) return false; - // These schemes do not follow the generic URL syntax, so we treat them as - // invalid (scheme, host, port) tuples (even though such URLs' _Origin_ might - // have a (scheme, host, port) tuple, they themselves do not). - if (scheme == kFileSystemScheme || scheme == kBlobScheme) - return false; - switch (scheme_type) { case SCHEME_WITH_PORT: // A URL with |scheme| is required to have the host and port (may be @@ -116,24 +110,24 @@ bool IsValidInput(const base::StringPiece& scheme, SchemeHostPort::SchemeHostPort() : port_(0) { } -SchemeHostPort::SchemeHostPort(base::StringPiece scheme, - base::StringPiece host, +SchemeHostPort::SchemeHostPort(std::string scheme, + std::string host, uint16_t port, ConstructPolicy policy) : port_(0) { if (!IsValidInput(scheme, host, port, policy)) return; - scheme.CopyToString(&scheme_); - host.CopyToString(&host_); + scheme_ = std::move(scheme); + host_ = std::move(host); port_ = port; } SchemeHostPort::SchemeHostPort(base::StringPiece scheme, base::StringPiece host, uint16_t port) - : SchemeHostPort(scheme, - host, + : SchemeHostPort(scheme.as_string(), + host.as_string(), port, ConstructPolicy::CHECK_CANONICALIZATION) {} @@ -202,6 +196,9 @@ std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const { if (IsInvalid()) return result; + // Reserve enough space for the "normal" case of scheme://host/. + result.reserve(scheme_.size() + host_.size() + 4); + if (!scheme_.empty()) { parsed->scheme = Component(0, scheme_.length()); result.append(scheme_); diff --git a/chromium/url/scheme_host_port.h b/chromium/url/scheme_host_port.h index 065e4aa6059..b2e030dfff2 100644 --- a/chromium/url/scheme_host_port.h +++ b/chromium/url/scheme_host_port.h @@ -96,8 +96,8 @@ class URL_EXPORT SchemeHostPort { // that the host and port are canonicalized. This should only be used when // converting between already normalized types, and should NOT be used for // IPC. - SchemeHostPort(base::StringPiece scheme, - base::StringPiece host, + SchemeHostPort(std::string scheme, + std::string host, uint16_t port, ConstructPolicy policy); diff --git a/chromium/url/scheme_host_port_unittest.cc b/chromium/url/scheme_host_port_unittest.cc index 81d4371a85b..ba97a6a5492 100644 --- a/chromium/url/scheme_host_port_unittest.cc +++ b/chromium/url/scheme_host_port_unittest.cc @@ -42,11 +42,19 @@ TEST(SchemeHostPortTest, Invalid) { EXPECT_TRUE(invalid.IsInvalid()); EXPECT_TRUE(invalid.Equals(invalid)); - const char* urls[] = {"data:text/html,Hello!", - "javascript:alert(1)", - "file://example.com:443/etc/passwd", - "blob:https://example.com/uuid-goes-here", - "filesystem:https://example.com/temporary/yay.png"}; + const char* urls[] = { + "data:text/html,Hello!", "javascript:alert(1)", + "file://example.com:443/etc/passwd", + + // These schemes do not follow the generic URL syntax, so make sure we + // treat them as invalid (scheme, host, port) tuples (even though such + // URLs' _Origin_ might have a (scheme, host, port) tuple, they themselves + // do not). This is only *implicitly* checked in the code, by means of + // blob schemes not being standard, and filesystem schemes having type + // SCHEME_WITHOUT_AUTHORITY. If conditions change such that the implicit + // checks no longer hold, this policy should be made explicit. + "blob:https://example.com/uuid-goes-here", + "filesystem:https://example.com/temporary/yay.png"}; for (auto* test : urls) { SCOPED_TRACE(test); diff --git a/chromium/url/third_party/mozilla/url_parse.cc b/chromium/url/third_party/mozilla/url_parse.cc index ba842b87b5d..41768601244 100644 --- a/chromium/url/third_party/mozilla/url_parse.cc +++ b/chromium/url/third_party/mozilla/url_parse.cc @@ -175,6 +175,31 @@ void DoParseAuthority(const CHAR* spec, } } +template <typename CHAR> +inline void FindQueryAndRefParts(const CHAR* spec, + const Component& path, + int* query_separator, + int* ref_separator) { + int path_end = path.begin + path.len; + for (int i = path.begin; i < path_end; i++) { + switch (spec[i]) { + case '?': + // Only match the query string if it precedes the reference fragment + // and when we haven't found one already. + if (*query_separator < 0) + *query_separator = i; + break; + case '#': + // Record the first # sign only. + if (*ref_separator < 0) { + *ref_separator = i; + return; + } + break; + } + } +} + template<typename CHAR> void ParsePath(const CHAR* spec, const Component& path, @@ -193,25 +218,9 @@ void ParsePath(const CHAR* spec, DCHECK(path.len > 0) << "We should never have 0 length paths"; // Search for first occurrence of either ? or #. - int path_end = path.begin + path.len; - int query_separator = -1; // Index of the '?' int ref_separator = -1; // Index of the '#' - for (int i = path.begin; i < path_end; i++) { - switch (spec[i]) { - case '?': - // Only match the query string if it precedes the reference fragment - // and when we haven't found one already. - if (ref_separator < 0 && query_separator < 0) - query_separator = i; - break; - case '#': - // Record the first # sign only. - if (ref_separator < 0) - ref_separator = i; - break; - } - } + FindQueryAndRefParts(spec, path, &query_separator, &ref_separator); // Markers pointing to the character after each of these corresponding // components. The code below words from the end back to the beginning, @@ -219,6 +228,7 @@ void ParsePath(const CHAR* spec, int file_end, query_end; // Ref fragment: from the # to the end of the path. + int path_end = path.begin + path.len; if (ref_separator >= 0) { file_end = query_end = ref_separator; *ref = MakeRange(ref_separator + 1, path_end); @@ -680,8 +690,7 @@ bool DoExtractQueryKeyValue(const CHAR* spec, } // namespace -Parsed::Parsed() : inner_parsed_(NULL) { -} +Parsed::Parsed() : whitespace_removed(false), inner_parsed_(NULL) {} Parsed::Parsed(const Parsed& other) : scheme(other.scheme), diff --git a/chromium/url/third_party/mozilla/url_parse.h b/chromium/url/third_party/mozilla/url_parse.h index 222d6053232..968578badbb 100644 --- a/chromium/url/third_party/mozilla/url_parse.h +++ b/chromium/url/third_party/mozilla/url_parse.h @@ -177,6 +177,9 @@ struct URL_EXPORT Parsed { // the string with the scheme stripped off. Component GetContent() const; + // True if whitespace was removed from the URL during parsing. + bool whitespace_removed; + // This is used for nested URL types, currently only filesystem. If you // parse a filesystem URL, the resulting Parsed will have a nested // inner_parsed_ to hold the parsed inner URL's component information. diff --git a/chromium/url/url_canon.h b/chromium/url/url_canon.h index c4852e490b0..ff66c6e3086 100644 --- a/chromium/url/url_canon.h +++ b/chromium/url/url_canon.h @@ -117,6 +117,11 @@ class CanonOutputT { cur_len_ += str_len; } + void ReserveSizeIfNeeded(int estimated_size) { + if (estimated_size > buffer_len_) + Resize(estimated_size); + } + protected: // Grows the given buffer so that it can fit at least |min_additional| // characters. Returns true if the buffer could be resized, false on OOM. diff --git a/chromium/url/url_canon_relative.cc b/chromium/url/url_canon_relative.cc index e34ea2fa249..8259056f5e5 100644 --- a/chromium/url/url_canon_relative.cc +++ b/chromium/url/url_canon_relative.cc @@ -4,6 +4,8 @@ // Canonicalizer functions for working with and resolving relative URLs. +#include <algorithm> + #include "base/logging.h" #include "url/url_canon.h" #include "url/url_canon_internal.h" @@ -264,7 +266,7 @@ int CopyBaseDriveSpecIfNecessary(const char* base_url, #endif // WIN32 // A subroutine of DoResolveRelativeURL, this resolves the URL knowning that -// the input is a relative path or less (qyuery or ref). +// the input is a relative path or less (query or ref). template<typename CHAR> bool DoResolveRelativePath(const char* base_url, const Parsed& base_parsed, @@ -280,7 +282,13 @@ bool DoResolveRelativePath(const char* base_url, // also know we have a path so can copy up to there. Component path, query, ref; ParsePathInternal(relative_url, relative_component, &path, &query, &ref); - // Canonical URLs always have a path, so we can use that offset. + + // Canonical URLs always have a path, so we can use that offset. Reserve + // enough room for the base URL, the new path, and some extra bytes for + // possible escaped characters. + output->ReserveSizeIfNeeded( + base_parsed.path.begin + + std::max(path.end(), std::max(query.end(), ref.end())) + 8); output->Append(base_url, base_parsed.path.begin); if (path.len > 0) { @@ -394,6 +402,11 @@ bool DoResolveRelativeHost(const char* base_url, replacements.SetQuery(relative_url, relative_parsed.query); replacements.SetRef(relative_url, relative_parsed.ref); + // Length() does not include the old scheme, so make sure to add it from the + // base URL. + output->ReserveSizeIfNeeded( + replacements.components().Length() + + base_parsed.CountCharactersBefore(Parsed::USERNAME, false) + 8); return ReplaceStandardURL(base_url, base_parsed, replacements, query_converter, output, out_parsed); } diff --git a/chromium/url/url_util.cc b/chromium/url/url_util.cc index 0a84d5e23cb..2c8d6978cb7 100644 --- a/chromium/url/url_util.cc +++ b/chromium/url/url_util.cc @@ -6,12 +6,12 @@ #include <stddef.h> #include <string.h> -#include <vector> #include "base/debug/leak_annotations.h" #include "base/logging.h" #include "base/strings/string_util.h" #include "url/url_canon_internal.h" +#include "url/url_constants.h" #include "url/url_file.h" #include "url/url_util_internal.h" @@ -19,8 +19,14 @@ namespace url { namespace { -const int kNumStandardURLSchemes = 10; -const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = { +// Pass this enum through for methods which would like to know if whitespace +// removal is necessary. +enum WhitespaceRemovalPolicy { + REMOVE_WHITESPACE, + DO_NOT_REMOVE_WHITESPACE, +}; + +const SchemeWithType kStandardURLSchemes[] = { {kHttpScheme, SCHEME_WITH_PORT}, {kHttpsScheme, SCHEME_WITH_PORT}, // Yes, file URLs can have a hostname, so file URLs should be handled as @@ -36,21 +42,50 @@ const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = { {kHttpsSuboriginScheme, SCHEME_WITH_PORT}, }; -const int kNumReferrerURLSchemes = 4; -const SchemeWithType kReferrerURLSchemes[kNumReferrerURLSchemes] = { +const SchemeWithType kReferrerURLSchemes[] = { {kHttpScheme, SCHEME_WITH_PORT}, {kHttpsScheme, SCHEME_WITH_PORT}, {kHttpSuboriginScheme, SCHEME_WITH_PORT}, {kHttpsSuboriginScheme, SCHEME_WITH_PORT}, }; +const char* kSecureSchemes[] = { + kHttpsScheme, + kAboutScheme, + kDataScheme, + kWssScheme, +}; + +const char* kLocalSchemes[] = { + kFileScheme, +}; + +const char* kNoAccessSchemes[] = { + kAboutScheme, + kJavaScriptScheme, + kDataScheme, +}; + +const char* kCORSEnabledSchemes[] = { + kHttpScheme, + kHttpsScheme, + kDataScheme, +}; + +bool initialized = false; + // Lists of the currently installed standard and referrer schemes. These lists -// are lazily initialized by InitStandardSchemes and InitReferrerSchemes and are -// leaked on shutdown to prevent any destructors from being called that will -// slow us down or cause problems. +// are lazily initialized by Initialize and are leaked on shutdown to prevent +// any destructors from being called that will slow us down or cause problems. std::vector<SchemeWithType>* standard_schemes = nullptr; std::vector<SchemeWithType>* referrer_schemes = nullptr; +// Similar to above, initialized by the Init*Schemes methods. +std::vector<std::string>* secure_schemes = nullptr; +std::vector<std::string>* local_schemes = nullptr; +std::vector<std::string>* no_access_schemes = nullptr; +std::vector<std::string>* cors_enabled_schemes = nullptr; + // See the LockSchemeRegistries declaration in the header. bool scheme_registries_locked = false; @@ -65,27 +100,22 @@ template<> struct CharToStringPiece<base::char16> { typedef base::StringPiece16 Piece; }; -void InitSchemes(std::vector<SchemeWithType>** schemes, - const SchemeWithType* initial_schemes, +void InitSchemes(std::vector<std::string>** schemes, + const char** initial_schemes, size_t size) { - if (*schemes) - return; - *schemes = new std::vector<SchemeWithType>(size); + *schemes = new std::vector<std::string>(size); for (size_t i = 0; i < size; i++) { - (*schemes)->push_back(initial_schemes[i]); + (*(*schemes))[i] = initial_schemes[i]; } } -// Ensures that the standard_schemes list is initialized, does nothing if -// it already has values. -void InitStandardSchemes() { - InitSchemes(&standard_schemes, kStandardURLSchemes, kNumStandardURLSchemes); -} - -// Ensures that the referrer_schemes list is initialized, does nothing if -// it already has values. -void InitReferrerSchemes() { - InitSchemes(&referrer_schemes, kReferrerURLSchemes, kNumReferrerURLSchemes); +void InitSchemesWithType(std::vector<SchemeWithType>** schemes, + const SchemeWithType* initial_schemes, + size_t size) { + *schemes = new std::vector<SchemeWithType>(size); + for (size_t i = 0; i < size; i++) { + (*(*schemes))[i] = initial_schemes[i]; + } } // Given a string and a range inside the string, compares it to the given @@ -125,7 +155,7 @@ bool DoIsInSchemes(const CHAR* spec, template<typename CHAR> bool DoIsStandard(const CHAR* spec, const Component& scheme, SchemeType* type) { - InitStandardSchemes(); + Initialize(); return DoIsInSchemes(spec, scheme, type, *standard_schemes); } @@ -154,19 +184,28 @@ bool DoFindAndCompareScheme(const CHAR* str, return DoCompareSchemeComponent(spec, our_scheme, compare); } -template<typename CHAR> -bool DoCanonicalize(const CHAR* in_spec, - int in_spec_len, +template <typename CHAR> +bool DoCanonicalize(const CHAR* spec, + int spec_len, bool trim_path_end, + WhitespaceRemovalPolicy whitespace_policy, CharsetConverter* charset_converter, CanonOutput* output, Parsed* output_parsed) { - // Remove any whitespace from the middle of the relative URL, possibly - // copying to the new buffer. + // Reserve enough room in the output for the input, plus some extra so that + // we have room if we have to escape a few things without reallocating. + output->ReserveSizeIfNeeded(spec_len + 8); + + // Remove any whitespace from the middle of the relative URL if necessary. + // Possibly this will result in copying to the new buffer. RawCanonOutputT<CHAR> whitespace_buffer; - int spec_len; - const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len, - &whitespace_buffer, &spec_len); + if (whitespace_policy == REMOVE_WHITESPACE) { + int original_len = spec_len; + spec = + RemoveURLWhitespace(spec, original_len, &whitespace_buffer, &spec_len); + if (spec_len != original_len) + output_parsed->whitespace_removed = true; + } Parsed parsed_input; #ifdef WIN32 @@ -246,6 +285,9 @@ bool DoResolveRelative(const char* base_spec, const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length, &whitespace_buffer, &relative_length); + if (in_relative_length != relative_length) + output_parsed->whitespace_removed = true; + bool base_is_authority_based = false; bool base_is_hierarchical = false; if (base_spec && @@ -271,6 +313,9 @@ bool DoResolveRelative(const char* base_spec, return false; } + // Don't reserve buffer space here. Instead, reserve in DoCanonicalize and + // ReserveRelativeURL, to enable more accurate buffer sizes. + // Pretend for a moment that |base_spec| is a standard URL. Normally // non-standard URLs are treated as PathURLs, but if the base has an // authority we would like to preserve it. @@ -287,7 +332,8 @@ bool DoResolveRelative(const char* base_spec, // based on base_parsed_authority instead of base_parsed) and needs to be // re-created. DoCanonicalize(temporary_output.data(), temporary_output.length(), true, - charset_converter, output, output_parsed); + REMOVE_WHITESPACE, charset_converter, output, + output_parsed); return did_resolve_succeed; } } else if (is_relative) { @@ -300,8 +346,9 @@ bool DoResolveRelative(const char* base_spec, } // Not relative, canonicalize the input. - return DoCanonicalize(relative, relative_length, true, charset_converter, - output, output_parsed); + return DoCanonicalize(relative, relative_length, true, + DO_NOT_REMOVE_WHITESPACE, charset_converter, output, + output_parsed); } template<typename CHAR> @@ -348,8 +395,8 @@ bool DoReplaceComponents(const char* spec, RawCanonOutput<128> recanonicalized; Parsed recanonicalized_parsed; DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true, - charset_converter, - &recanonicalized, &recanonicalized_parsed); + REMOVE_WHITESPACE, charset_converter, &recanonicalized, + &recanonicalized_parsed); // Recurse using the version with the scheme already replaced. This will now // use the replacement rules for the new scheme. @@ -371,6 +418,12 @@ bool DoReplaceComponents(const char* spec, charset_converter, output, out_parsed); } + // TODO(csharrison): We could be smarter about size to reserve if this is done + // in callers below, and the code checks to see which components are being + // replaced, and with what length. If this ends up being a hot spot it should + // be changed. + output->ReserveSizeIfNeeded(spec_len + 8); + // If we get here, then we know the scheme doesn't need to be replaced, so can // just key off the scheme in the spec to know how to do the replacements. if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileScheme)) { @@ -394,9 +447,7 @@ bool DoReplaceComponents(const char* spec, return ReplacePathURL(spec, parsed, replacements, output, out_parsed); } -void DoAddScheme(const char* new_scheme, - SchemeType type, - std::vector<SchemeWithType>* schemes) { +void DoAddScheme(const char* new_scheme, std::vector<std::string>* schemes) { DCHECK(schemes); // If this assert triggers, it means you've called Add*Scheme after // LockSchemeRegistries has been called (see the header file for @@ -412,6 +463,29 @@ void DoAddScheme(const char* new_scheme, if (scheme_len == 0) return; + DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme); + schemes->push_back(std::string(new_scheme)); +} + +void DoAddSchemeWithType(const char* new_scheme, + SchemeType type, + std::vector<SchemeWithType>* schemes) { + DCHECK(schemes); + // If this assert triggers, it means you've called Add*Scheme after + // LockSchemeRegistries has been called (see the header file for + // LockSchemeRegistries for more). + // + // This normally means you're trying to set up a new scheme too late in your + // application's init process. Locate where your app does this initialization + // and calls LockSchemeRegistries, and add your new scheme there. + DCHECK(!scheme_registries_locked) + << "Trying to add a scheme after the lists have been locked."; + + size_t scheme_len = strlen(new_scheme); + if (scheme_len == 0) + return; + + DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme); // Duplicate the scheme into a new buffer and add it to the list of standard // schemes. This pointer will be leaked on shutdown. char* dup_scheme = new char[scheme_len + 1]; @@ -427,29 +501,85 @@ void DoAddScheme(const char* new_scheme, } // namespace void Initialize() { - InitStandardSchemes(); - InitReferrerSchemes(); + if (initialized) + return; + InitSchemesWithType(&standard_schemes, kStandardURLSchemes, + arraysize(kStandardURLSchemes)); + InitSchemesWithType(&referrer_schemes, kReferrerURLSchemes, + arraysize(kReferrerURLSchemes)); + InitSchemes(&secure_schemes, kSecureSchemes, arraysize(kSecureSchemes)); + InitSchemes(&local_schemes, kLocalSchemes, arraysize(kLocalSchemes)); + InitSchemes(&no_access_schemes, kNoAccessSchemes, + arraysize(kNoAccessSchemes)); + InitSchemes(&cors_enabled_schemes, kCORSEnabledSchemes, + arraysize(kCORSEnabledSchemes)); + initialized = true; } void Shutdown() { - if (standard_schemes) { - delete standard_schemes; - standard_schemes = NULL; - } - if (referrer_schemes) { - delete referrer_schemes; - referrer_schemes = NULL; - } + initialized = false; + delete standard_schemes; + standard_schemes = nullptr; + delete referrer_schemes; + referrer_schemes = nullptr; + delete secure_schemes; + secure_schemes = nullptr; + delete local_schemes; + local_schemes = nullptr; + delete no_access_schemes; + no_access_schemes = nullptr; + delete cors_enabled_schemes; + cors_enabled_schemes = nullptr; } void AddStandardScheme(const char* new_scheme, SchemeType type) { - InitStandardSchemes(); - DoAddScheme(new_scheme, type, standard_schemes); + Initialize(); + DoAddSchemeWithType(new_scheme, type, standard_schemes); } void AddReferrerScheme(const char* new_scheme, SchemeType type) { - InitReferrerSchemes(); - DoAddScheme(new_scheme, type, referrer_schemes); + Initialize(); + DoAddSchemeWithType(new_scheme, type, referrer_schemes); +} + +void AddSecureScheme(const char* new_scheme) { + Initialize(); + DoAddScheme(new_scheme, secure_schemes); +} + +const std::vector<std::string>& GetSecureSchemes() { + Initialize(); + return *secure_schemes; +} + +void AddLocalScheme(const char* new_scheme) { + Initialize(); + DoAddScheme(new_scheme, local_schemes); +} + +const std::vector<std::string>& GetLocalSchemes() { + Initialize(); + return *local_schemes; +} + +void AddNoAccessScheme(const char* new_scheme) { + Initialize(); + DoAddScheme(new_scheme, no_access_schemes); +} + +const std::vector<std::string>& GetNoAccessSchemes() { + Initialize(); + return *no_access_schemes; +} + +void AddCORSEnabledScheme(const char* new_scheme) { + Initialize(); + DoAddScheme(new_scheme, cors_enabled_schemes); +} + +const std::vector<std::string>& GetCORSEnabledSchemes() { + Initialize(); + return *cors_enabled_schemes; } void LockSchemeRegistries() { @@ -473,7 +603,7 @@ bool IsStandard(const base::char16* spec, const Component& scheme) { } bool IsReferrerScheme(const char* spec, const Component& scheme) { - InitReferrerSchemes(); + Initialize(); SchemeType unused_scheme_type; return DoIsInSchemes(spec, scheme, &unused_scheme_type, *referrer_schemes); } @@ -529,14 +659,22 @@ bool DomainIs(base::StringPiece canonicalized_host, return true; } +bool HostIsIPAddress(base::StringPiece host) { + url::RawCanonOutputT<char, 128> ignored_output; + url::CanonHostInfo host_info; + url::CanonicalizeIPAddress(host.data(), Component(0, host.length()), + &ignored_output, &host_info); + return host_info.IsIPAddress(); +} + bool Canonicalize(const char* spec, int spec_len, bool trim_path_end, CharsetConverter* charset_converter, CanonOutput* output, Parsed* output_parsed) { - return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter, - output, output_parsed); + return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, + charset_converter, output, output_parsed); } bool Canonicalize(const base::char16* spec, @@ -545,8 +683,8 @@ bool Canonicalize(const base::char16* spec, CharsetConverter* charset_converter, CanonOutput* output, Parsed* output_parsed) { - return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter, - output, output_parsed); + return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, + charset_converter, output, output_parsed); } bool ResolveRelative(const char* base_spec, diff --git a/chromium/url/url_util.h b/chromium/url/url_util.h index 724ce956a7f..a4b74b13e5d 100644 --- a/chromium/url/url_util.h +++ b/chromium/url/url_util.h @@ -6,6 +6,7 @@ #define URL_URL_UTIL_H_ #include <string> +#include <vector> #include "base/strings/string16.h" #include "base/strings/string_piece.h" @@ -57,25 +58,44 @@ struct URL_EXPORT SchemeWithType { SchemeType type; }; +// The following Add*Scheme method are not threadsafe and can not be called +// concurrently with any other url_util function. They will assert if the lists +// of schemes have been locked (see LockSchemeRegistries). + // Adds an application-defined scheme to the internal list of "standard-format" // URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3). -// -// This function is not threadsafe and can not be called concurrently with any -// other url_util function. It will assert if the lists of schemes have -// been locked (see LockSchemeRegistries). + URL_EXPORT void AddStandardScheme(const char* new_scheme, SchemeType scheme_type); // Adds an application-defined scheme to the internal list of schemes allowed // for referrers. -// -// This function is not threadsafe and can not be called concurrently with any -// other url_util function. It will assert if the lists of schemes have -// been locked (see LockSchemeRegistries). URL_EXPORT void AddReferrerScheme(const char* new_scheme, SchemeType scheme_type); +// Adds an application-defined scheme to the list of schemes that do not trigger +// mixed content warnings. +URL_EXPORT void AddSecureScheme(const char* new_scheme); +URL_EXPORT const std::vector<std::string>& GetSecureSchemes(); + +// Adds an application-defined scheme to the list of schemes that normal pages +// cannot link to or access (i.e., with the same security rules as those applied +// to "file" URLs). +URL_EXPORT void AddLocalScheme(const char* new_scheme); +URL_EXPORT const std::vector<std::string>& GetLocalSchemes(); + +// Adds an application-defined scheme to the list of schemes that cause pages +// loaded with them to not have access to pages loaded with any other URL +// scheme. +URL_EXPORT void AddNoAccessScheme(const char* new_scheme); +URL_EXPORT const std::vector<std::string>& GetNoAccessSchemes(); + +// Adds an application-defined scheme to the list of schemes that can be sent +// CORS requests. +URL_EXPORT void AddCORSEnabledScheme(const char* new_scheme); +URL_EXPORT const std::vector<std::string>& GetCORSEnabledSchemes(); + // Sets a flag to prevent future calls to Add*Scheme from succeeding. // // This is designed to help prevent errors for multithreaded applications. @@ -133,7 +153,7 @@ URL_EXPORT bool GetStandardSchemeType(const char* spec, const Component& scheme, SchemeType* type); -// Domains --------------------------------------------------------------------- +// Hosts ---------------------------------------------------------------------- // Returns true if the |canonicalized_host| matches or is in the same domain as // the given |lower_ascii_domain| string. For example, if the canonicalized @@ -146,6 +166,10 @@ URL_EXPORT bool GetStandardSchemeType(const char* spec, URL_EXPORT bool DomainIs(base::StringPiece canonicalized_host, base::StringPiece lower_ascii_domain); +// Returns true if the hostname is an IP address. Note: this function isn't very +// cheap, as it must re-parse the host to verify. +URL_EXPORT bool HostIsIPAddress(base::StringPiece host); + // URL library wrappers -------------------------------------------------------- // Parses the given spec according to the extracted scheme type. Normal users |