BASELINE: Update Chromium to 57.0.2987.144

Change-Id: I29db402ff696c71a04c4dbaec822c2e53efe0267 Reviewed-by: Peter Varga <pvarga@inf.u-szeged.hu>
author: Allan Sandfeld Jensen <allan.jensen@qt.io> 2017-04-05 14:08:31 +0200
committer: Allan Sandfeld Jensen <allan.jensen@qt.io> 2017-04-11 07:46:53 +0000
commit: 6a4cabb866f66d4128a97cdc6d9d08ce074f1247 (patch)
tree: ab00f70a5e89278d6a0d16ff0c42578dc4d84a2d /chromium/url
parent: e733310db58160074f574c429d48f8308c0afe17 (diff)
download: qtwebengine-chromium-6a4cabb866f66d4128a97cdc6d9d08ce074f1247.tar.gz
17 files changed, 338 insertions, 176 deletions
diff --git a/chromium/url/DEPS b/chromium/url/DEPS
index c89ac323c59..946d75fcd66 100644
--- a/chromium/url/DEPS
+++ b/chromium/url/DEPS
@@ -11,7 +11,6 @@ specific_include_rules = {
     "+third_party/icu",
   ],
   "run_all_unittests\.cc": [
-    "+mojo/edk/embedder/embedder.h",
-    "+mojo/edk/test/scoped_ipc_support.h",
+    "+mojo/edk/embedder",
   ],
 }
diff --git a/chromium/url/gurl.cc b/chromium/url/gurl.cc
index bdd35227b0d..43f30528593 100644
--- a/chromium/url/gurl.cc
+++ b/chromium/url/gurl.cc
@@ -108,9 +108,6 @@ GURL::GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid)
 template<typename STR>
 void GURL::InitCanonical(base::BasicStringPiece<STR> input_spec,
                          bool trim_path_end) {
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  spec_.reserve(input_spec.size() + 32);
   url::StdStringCanonOutput output(&spec_);
   is_valid_ = url::Canonicalize(
       input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end,
@@ -121,6 +118,8 @@ void GURL::InitCanonical(base::BasicStringPiece<STR> input_spec,
     inner_url_.reset(new GURL(spec_.data(), parsed_.Length(),
                               *parsed_.inner_parsed(), true));
   }
+  // Valid URLs always have non-empty specs.
+  DCHECK(!is_valid_ || !spec_.empty());
 }
 
 void GURL::InitializeFromCanonicalSpec() {
@@ -135,6 +134,7 @@ void GURL::InitializeFromCanonicalSpec() {
   // what we would have produced. Skip checking for invalid URLs have no meaning
   // and we can't always canonicalize then reproducibly.
   if (is_valid_) {
+    DCHECK(!spec_.empty());
     url::Component scheme;
     // We can't do this check on the inner_url of a filesystem URL, as
     // canonical_spec actually points to the start of the outer URL, so we'd
@@ -195,12 +195,7 @@ GURL GURL::Resolve(const std::string& relative) const {
     return GURL();
 
   GURL result;
-
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  result.spec_.reserve(spec_.size() + 32);
   url::StdStringCanonOutput output(&result.spec_);
-
   if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
                             parsed_, relative.data(),
                             static_cast<int>(relative.length()),
@@ -226,12 +221,7 @@ GURL GURL::Resolve(const base::string16& relative) const {
     return GURL();
 
   GURL result;
-
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  result.spec_.reserve(spec_.size() + 32);
   url::StdStringCanonOutput output(&result.spec_);
-
   if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
                             parsed_, relative.data(),
                             static_cast<int>(relative.length()),
@@ -259,11 +249,7 @@ GURL GURL::ReplaceComponents(
   if (!is_valid_)
     return GURL();
 
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  result.spec_.reserve(spec_.size() + 32);
   url::StdStringCanonOutput output(&result.spec_);
-
   result.is_valid_ = url::ReplaceComponents(
       spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
       NULL, &output, &result.parsed_);
@@ -286,11 +272,7 @@ GURL GURL::ReplaceComponents(
   if (!is_valid_)
     return GURL();
 
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  result.spec_.reserve(spec_.size() + 32);
   url::StdStringCanonOutput output(&result.spec_);
-
   result.is_valid_ = url::ReplaceComponents(
       spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
       NULL, &output, &result.parsed_);
@@ -440,14 +422,7 @@ std::string GURL::GetContent() const {
 }
 
 bool GURL::HostIsIPAddress() const {
-  if (!is_valid_ || spec_.empty())
-     return false;
-
-  url::RawCanonOutputT<char, 128> ignored_output;
-  url::CanonHostInfo host_info;
-  url::CanonicalizeIPAddress(spec_.c_str(), parsed_.host, &ignored_output,
-                             &host_info);
-  return host_info.IsIPAddress();
+  return is_valid_ && url::HostIsIPAddress(host_piece());
 }
 
 #ifdef WIN32
diff --git a/chromium/url/gurl_unittest.cc b/chromium/url/gurl_unittest.cc
index f8d4c05288a..24dee6c2a65 100644
--- a/chromium/url/gurl_unittest.cc
+++ b/chromium/url/gurl_unittest.cc
@@ -294,6 +294,7 @@ TEST(GURLTest, Resolve) {
     {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"},
     {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"},
     {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"},
+    {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html", true, "http://images.google.com/foo.html"},
     {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"},
     {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"},
     {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"},
diff --git a/chromium/url/mojo/url_gurl_struct_traits_unittest.cc b/chromium/url/mojo/url_gurl_struct_traits_unittest.cc
index 4f7e908efc9..8556e0a4ca7 100644
--- a/chromium/url/mojo/url_gurl_struct_traits_unittest.cc
+++ b/chromium/url/mojo/url_gurl_struct_traits_unittest.cc
@@ -38,7 +38,7 @@ TEST(MojoGURLStructTraitsTest, Basic) {
   base::MessageLoop message_loop;
 
   mojom::UrlTestPtr proxy;
-  UrlTestImpl impl(GetProxy(&proxy));
+  UrlTestImpl impl(MakeRequest(&proxy));
 
   const char* serialize_cases[] = {
     "http://www.google.com/",
diff --git a/chromium/url/origin.cc b/chromium/url/origin.cc
index 0cb4c10ced7..53600b1dc96 100644
--- a/chromium/url/origin.cc
+++ b/chromium/url/origin.cc
@@ -85,11 +85,21 @@ Origin::Origin(base::StringPiece scheme,
                uint16_t port,
                base::StringPiece suborigin,
                SchemeHostPort::ConstructPolicy policy)
-    : tuple_(scheme, host, port, policy) {
+    : tuple_(scheme.as_string(), host.as_string(), port, policy) {
   unique_ = tuple_.IsInvalid();
   suborigin_ = suborigin.as_string();
 }
 
+Origin::Origin(std::string scheme,
+               std::string host,
+               uint16_t port,
+               std::string suborigin,
+               SchemeHostPort::ConstructPolicy policy)
+    : tuple_(std::move(scheme), std::move(host), port, policy) {
+  unique_ = tuple_.IsInvalid();
+  suborigin_ = std::move(suborigin);
+}
+
 Origin::~Origin() {
 }
 
@@ -101,18 +111,12 @@ Origin Origin::UnsafelyCreateOriginWithoutNormalization(
   return Origin(scheme, host, port, "", SchemeHostPort::CHECK_CANONICALIZATION);
 }
 
-Origin Origin::CreateFromNormalizedTuple(base::StringPiece scheme,
-                                         base::StringPiece host,
-                                         uint16_t port) {
-  return CreateFromNormalizedTupleWithSuborigin(scheme, host, port, "");
-}
-
 Origin Origin::CreateFromNormalizedTupleWithSuborigin(
-    base::StringPiece scheme,
-    base::StringPiece host,
+    std::string scheme,
+    std::string host,
     uint16_t port,
-    base::StringPiece suborigin) {
-  return Origin(scheme, host, port, suborigin,
+    std::string suborigin) {
+  return Origin(std::move(scheme), std::move(host), port, std::move(suborigin),
                 SchemeHostPort::ALREADY_CANONICALIZED);
 }
 
diff --git a/chromium/url/origin.h b/chromium/url/origin.h
index 1c28588f311..4b838e420f3 100644
--- a/chromium/url/origin.h
+++ b/chromium/url/origin.h
@@ -104,18 +104,13 @@ class URL_EXPORT Origin {
 
   // Creates an origin without sanity checking that the host is canonicalized.
   // This should only be used when converting between already normalized types,
-  // and should NOT be used for IPC.
-  static Origin CreateFromNormalizedTuple(base::StringPiece scheme,
-                                          base::StringPiece host,
-                                          uint16_t port);
-
-  // Same as CreateFromNormalizedTuple() above, but adds a suborigin component
-  // as well.
+  // and should NOT be used for IPC. Method takes std::strings for use with move
+  // operators to avoid copies.
   static Origin CreateFromNormalizedTupleWithSuborigin(
-      base::StringPiece scheme,
-      base::StringPiece host,
+      std::string scheme,
+      std::string host,
       uint16_t port,
-      base::StringPiece suborigin);
+      std::string suborigin);
 
   ~Origin();
 
@@ -173,6 +168,11 @@ class URL_EXPORT Origin {
          uint16_t port,
          base::StringPiece suborigin,
          SchemeHostPort::ConstructPolicy policy);
+  Origin(std::string scheme,
+         std::string host,
+         uint16_t port,
+         std::string suborigin,
+         SchemeHostPort::ConstructPolicy policy);
 
   SchemeHostPort tuple_;
   bool unique_;
diff --git a/chromium/url/origin_unittest.cc b/chromium/url/origin_unittest.cc
index 0f17c26ef50..a5c30426f34 100644
--- a/chromium/url/origin_unittest.cc
+++ b/chromium/url/origin_unittest.cc
@@ -90,20 +90,11 @@ TEST(OriginTest, ConstructFromTuple) {
                     << test_case.port;
     }
     SCOPED_TRACE(scope_message);
-
-    url::Origin origin_without_suborigin =
-        url::Origin::CreateFromNormalizedTuple(test_case.scheme, test_case.host,
-                                               test_case.port);
-
     url::Origin origin_with_suborigin =
         url::Origin::CreateFromNormalizedTupleWithSuborigin(
             test_case.scheme, test_case.host, test_case.port,
             test_case.suborigin);
 
-    EXPECT_EQ(test_case.scheme, origin_without_suborigin.scheme());
-    EXPECT_EQ(test_case.host, origin_without_suborigin.host());
-    EXPECT_EQ(test_case.port, origin_without_suborigin.port());
-
     EXPECT_EQ(test_case.scheme, origin_with_suborigin.scheme());
     EXPECT_EQ(test_case.host, origin_with_suborigin.host());
     EXPECT_EQ(test_case.port, origin_with_suborigin.port());
diff --git a/chromium/url/run_all_unittests.cc b/chromium/url/run_all_unittests.cc
index c0b306a2e03..fcafd0331ee 100644
--- a/chromium/url/run_all_unittests.cc
+++ b/chromium/url/run_all_unittests.cc
@@ -13,7 +13,6 @@
 
 #if !defined(OS_IOS)
 #include "mojo/edk/embedder/embedder.h"  // nogncheck
-#include "mojo/edk/test/scoped_ipc_support.h"  // nogncheck
 #endif
 
 int main(int argc, char** argv) {
@@ -21,10 +20,6 @@ int main(int argc, char** argv) {
 
 #if !defined(OS_IOS)
   mojo::edk::Init();
-  base::TestIOThread test_io_thread(base::TestIOThread::kAutoStart);
-  std::unique_ptr<mojo::edk::test::ScopedIPCSupport> ipc_support;
-  ipc_support.reset(
-      new mojo::edk::test::ScopedIPCSupport(test_io_thread.task_runner()));
 #endif
 
   return base::LaunchUnitTests(
diff --git a/chromium/url/scheme_host_port.cc b/chromium/url/scheme_host_port.cc
index 5b359a76aa4..f0f56850f4d 100644
--- a/chromium/url/scheme_host_port.cc
+++ b/chromium/url/scheme_host_port.cc
@@ -59,12 +59,6 @@ bool IsValidInput(const base::StringPiece& scheme,
   if (!is_standard)
     return false;
 
-  // These schemes do not follow the generic URL syntax, so we treat them as
-  // invalid (scheme, host, port) tuples (even though such URLs' _Origin_ might
-  // have a (scheme, host, port) tuple, they themselves do not).
-  if (scheme == kFileSystemScheme || scheme == kBlobScheme)
-    return false;
-
   switch (scheme_type) {
     case SCHEME_WITH_PORT:
       // A URL with |scheme| is required to have the host and port (may be
@@ -116,24 +110,24 @@ bool IsValidInput(const base::StringPiece& scheme,
 SchemeHostPort::SchemeHostPort() : port_(0) {
 }
 
-SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
-                               base::StringPiece host,
+SchemeHostPort::SchemeHostPort(std::string scheme,
+                               std::string host,
                                uint16_t port,
                                ConstructPolicy policy)
     : port_(0) {
   if (!IsValidInput(scheme, host, port, policy))
     return;
 
-  scheme.CopyToString(&scheme_);
-  host.CopyToString(&host_);
+  scheme_ = std::move(scheme);
+  host_ = std::move(host);
   port_ = port;
 }
 
 SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
                                base::StringPiece host,
                                uint16_t port)
-    : SchemeHostPort(scheme,
-                     host,
+    : SchemeHostPort(scheme.as_string(),
+                     host.as_string(),
                      port,
                      ConstructPolicy::CHECK_CANONICALIZATION) {}
 
@@ -202,6 +196,9 @@ std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const {
   if (IsInvalid())
     return result;
 
+  // Reserve enough space for the "normal" case of scheme://host/.
+  result.reserve(scheme_.size() + host_.size() + 4);
+
   if (!scheme_.empty()) {
     parsed->scheme = Component(0, scheme_.length());
     result.append(scheme_);
diff --git a/chromium/url/scheme_host_port.h b/chromium/url/scheme_host_port.h
index 065e4aa6059..b2e030dfff2 100644
--- a/chromium/url/scheme_host_port.h
+++ b/chromium/url/scheme_host_port.h
@@ -96,8 +96,8 @@ class URL_EXPORT SchemeHostPort {
   // that the host and port are canonicalized. This should only be used when
   // converting between already normalized types, and should NOT be used for
   // IPC.
-  SchemeHostPort(base::StringPiece scheme,
-                 base::StringPiece host,
+  SchemeHostPort(std::string scheme,
+                 std::string host,
                  uint16_t port,
                  ConstructPolicy policy);
 
diff --git a/chromium/url/scheme_host_port_unittest.cc b/chromium/url/scheme_host_port_unittest.cc
index 81d4371a85b..ba97a6a5492 100644
--- a/chromium/url/scheme_host_port_unittest.cc
+++ b/chromium/url/scheme_host_port_unittest.cc
@@ -42,11 +42,19 @@ TEST(SchemeHostPortTest, Invalid) {
   EXPECT_TRUE(invalid.IsInvalid());
   EXPECT_TRUE(invalid.Equals(invalid));
 
-  const char* urls[] = {"data:text/html,Hello!",
-                        "javascript:alert(1)",
-                        "file://example.com:443/etc/passwd",
-                        "blob:https://example.com/uuid-goes-here",
-                        "filesystem:https://example.com/temporary/yay.png"};
+  const char* urls[] = {
+      "data:text/html,Hello!", "javascript:alert(1)",
+      "file://example.com:443/etc/passwd",
+
+      // These schemes do not follow the generic URL syntax, so make sure we
+      // treat them as invalid (scheme, host, port) tuples (even though such
+      // URLs' _Origin_ might have a (scheme, host, port) tuple, they themselves
+      // do not). This is only *implicitly* checked in the code, by means of
+      // blob schemes not being standard, and filesystem schemes having type
+      // SCHEME_WITHOUT_AUTHORITY. If conditions change such that the implicit
+      // checks no longer hold, this policy should be made explicit.
+      "blob:https://example.com/uuid-goes-here",
+      "filesystem:https://example.com/temporary/yay.png"};
 
   for (auto* test : urls) {
     SCOPED_TRACE(test);
diff --git a/chromium/url/third_party/mozilla/url_parse.cc b/chromium/url/third_party/mozilla/url_parse.cc
index ba842b87b5d..41768601244 100644
--- a/chromium/url/third_party/mozilla/url_parse.cc
+++ b/chromium/url/third_party/mozilla/url_parse.cc
@@ -175,6 +175,31 @@ void DoParseAuthority(const CHAR* spec,
   }
 }
 
+template <typename CHAR>
+inline void FindQueryAndRefParts(const CHAR* spec,
+                          const Component& path,
+                          int* query_separator,
+                          int* ref_separator) {
+  int path_end = path.begin + path.len;
+  for (int i = path.begin; i < path_end; i++) {
+    switch (spec[i]) {
+      case '?':
+        // Only match the query string if it precedes the reference fragment
+        // and when we haven't found one already.
+        if (*query_separator < 0)
+          *query_separator = i;
+        break;
+      case '#':
+        // Record the first # sign only.
+        if (*ref_separator < 0) {
+          *ref_separator = i;
+          return;
+        }
+        break;
+    }
+  }
+}
+
 template<typename CHAR>
 void ParsePath(const CHAR* spec,
                const Component& path,
@@ -193,25 +218,9 @@ void ParsePath(const CHAR* spec,
   DCHECK(path.len > 0) << "We should never have 0 length paths";
 
   // Search for first occurrence of either ? or #.
-  int path_end = path.begin + path.len;
-
   int query_separator = -1;  // Index of the '?'
   int ref_separator = -1;    // Index of the '#'
-  for (int i = path.begin; i < path_end; i++) {
-    switch (spec[i]) {
-      case '?':
-        // Only match the query string if it precedes the reference fragment
-        // and when we haven't found one already.
-        if (ref_separator < 0 && query_separator < 0)
-          query_separator = i;
-        break;
-      case '#':
-        // Record the first # sign only.
-        if (ref_separator < 0)
-          ref_separator = i;
-        break;
-    }
-  }
+  FindQueryAndRefParts(spec, path, &query_separator, &ref_separator);
 
   // Markers pointing to the character after each of these corresponding
   // components. The code below words from the end back to the beginning,
@@ -219,6 +228,7 @@ void ParsePath(const CHAR* spec,
   int file_end, query_end;
 
   // Ref fragment: from the # to the end of the path.
+  int path_end = path.begin + path.len;
   if (ref_separator >= 0) {
     file_end = query_end = ref_separator;
     *ref = MakeRange(ref_separator + 1, path_end);
@@ -680,8 +690,7 @@ bool DoExtractQueryKeyValue(const CHAR* spec,
 
 }  // namespace
 
-Parsed::Parsed() : inner_parsed_(NULL) {
-}
+Parsed::Parsed() : whitespace_removed(false), inner_parsed_(NULL) {}
 
 Parsed::Parsed(const Parsed& other) :
     scheme(other.scheme),
diff --git a/chromium/url/third_party/mozilla/url_parse.h b/chromium/url/third_party/mozilla/url_parse.h
index 222d6053232..968578badbb 100644
--- a/chromium/url/third_party/mozilla/url_parse.h
+++ b/chromium/url/third_party/mozilla/url_parse.h
@@ -177,6 +177,9 @@ struct URL_EXPORT Parsed {
   // the string with the scheme stripped off.
   Component GetContent() const;
 
+  // True if whitespace was removed from the URL during parsing.
+  bool whitespace_removed;
+
   // This is used for nested URL types, currently only filesystem.  If you
   // parse a filesystem URL, the resulting Parsed will have a nested
   // inner_parsed_ to hold the parsed inner URL's component information.
diff --git a/chromium/url/url_canon.h b/chromium/url/url_canon.h
index c4852e490b0..ff66c6e3086 100644
--- a/chromium/url/url_canon.h
+++ b/chromium/url/url_canon.h
@@ -117,6 +117,11 @@ class CanonOutputT {
     cur_len_ += str_len;
   }
 
+  void ReserveSizeIfNeeded(int estimated_size) {
+    if (estimated_size > buffer_len_)
+      Resize(estimated_size);
+  }
+
  protected:
   // Grows the given buffer so that it can fit at least |min_additional|
   // characters. Returns true if the buffer could be resized, false on OOM.
diff --git a/chromium/url/url_canon_relative.cc b/chromium/url/url_canon_relative.cc
index e34ea2fa249..8259056f5e5 100644
--- a/chromium/url/url_canon_relative.cc
+++ b/chromium/url/url_canon_relative.cc
@@ -4,6 +4,8 @@
 
 // Canonicalizer functions for working with and resolving relative URLs.
 
+#include <algorithm>
+
 #include "base/logging.h"
 #include "url/url_canon.h"
 #include "url/url_canon_internal.h"
@@ -264,7 +266,7 @@ int CopyBaseDriveSpecIfNecessary(const char* base_url,
 #endif  // WIN32
 
 // A subroutine of DoResolveRelativeURL, this resolves the URL knowning that
-// the input is a relative path or less (qyuery or ref).
+// the input is a relative path or less (query or ref).
 template<typename CHAR>
 bool DoResolveRelativePath(const char* base_url,
                            const Parsed& base_parsed,
@@ -280,7 +282,13 @@ bool DoResolveRelativePath(const char* base_url,
   // also know we have a path so can copy up to there.
   Component path, query, ref;
   ParsePathInternal(relative_url, relative_component, &path, &query, &ref);
-  // Canonical URLs always have a path, so we can use that offset.
+
+  // Canonical URLs always have a path, so we can use that offset. Reserve
+  // enough room for the base URL, the new path, and some extra bytes for
+  // possible escaped characters.
+  output->ReserveSizeIfNeeded(
+      base_parsed.path.begin +
+      std::max(path.end(), std::max(query.end(), ref.end())) + 8);
   output->Append(base_url, base_parsed.path.begin);
 
   if (path.len > 0) {
@@ -394,6 +402,11 @@ bool DoResolveRelativeHost(const char* base_url,
   replacements.SetQuery(relative_url, relative_parsed.query);
   replacements.SetRef(relative_url, relative_parsed.ref);
 
+  // Length() does not include the old scheme, so make sure to add it from the
+  // base URL.
+  output->ReserveSizeIfNeeded(
+      replacements.components().Length() +
+      base_parsed.CountCharactersBefore(Parsed::USERNAME, false) + 8);
   return ReplaceStandardURL(base_url, base_parsed, replacements,
                             query_converter, output, out_parsed);
 }
diff --git a/chromium/url/url_util.cc b/chromium/url/url_util.cc
index 0a84d5e23cb..2c8d6978cb7 100644
--- a/chromium/url/url_util.cc
+++ b/chromium/url/url_util.cc
@@ -6,12 +6,12 @@
 
 #include <stddef.h>
 #include <string.h>
-#include <vector>
 
 #include "base/debug/leak_annotations.h"
 #include "base/logging.h"
 #include "base/strings/string_util.h"
 #include "url/url_canon_internal.h"
+#include "url/url_constants.h"
 #include "url/url_file.h"
 #include "url/url_util_internal.h"
 
@@ -19,8 +19,14 @@ namespace url {
 
 namespace {
 
-const int kNumStandardURLSchemes = 10;
-const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = {
+// Pass this enum through for methods which would like to know if whitespace
+// removal is necessary.
+enum WhitespaceRemovalPolicy {
+  REMOVE_WHITESPACE,
+  DO_NOT_REMOVE_WHITESPACE,
+};
+
+const SchemeWithType kStandardURLSchemes[] = {
     {kHttpScheme, SCHEME_WITH_PORT},
     {kHttpsScheme, SCHEME_WITH_PORT},
     // Yes, file URLs can have a hostname, so file URLs should be handled as
@@ -36,21 +42,50 @@ const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = {
     {kHttpsSuboriginScheme, SCHEME_WITH_PORT},
 };
 
-const int kNumReferrerURLSchemes = 4;
-const SchemeWithType kReferrerURLSchemes[kNumReferrerURLSchemes] = {
+const SchemeWithType kReferrerURLSchemes[] = {
     {kHttpScheme, SCHEME_WITH_PORT},
     {kHttpsScheme, SCHEME_WITH_PORT},
     {kHttpSuboriginScheme, SCHEME_WITH_PORT},
     {kHttpsSuboriginScheme, SCHEME_WITH_PORT},
 };
 
+const char* kSecureSchemes[] = {
+  kHttpsScheme,
+  kAboutScheme,
+  kDataScheme,
+  kWssScheme,
+};
+
+const char* kLocalSchemes[] = {
+  kFileScheme,
+};
+
+const char* kNoAccessSchemes[] = {
+  kAboutScheme,
+  kJavaScriptScheme,
+  kDataScheme,
+};
+
+const char* kCORSEnabledSchemes[] = {
+  kHttpScheme,
+  kHttpsScheme,
+  kDataScheme,
+};
+
+bool initialized = false;
+
 // Lists of the currently installed standard and referrer schemes. These lists
-// are lazily initialized by InitStandardSchemes and InitReferrerSchemes and are
-// leaked on shutdown to prevent any destructors from being called that will
-// slow us down or cause problems.
+// are lazily initialized by Initialize and are leaked on shutdown to prevent
+// any destructors from being called that will slow us down or cause problems.
 std::vector<SchemeWithType>* standard_schemes = nullptr;
 std::vector<SchemeWithType>* referrer_schemes = nullptr;
 
+// Similar to above, initialized by the Init*Schemes methods.
+std::vector<std::string>* secure_schemes = nullptr;
+std::vector<std::string>* local_schemes = nullptr;
+std::vector<std::string>* no_access_schemes = nullptr;
+std::vector<std::string>* cors_enabled_schemes = nullptr;
+
 // See the LockSchemeRegistries declaration in the header.
 bool scheme_registries_locked = false;
 
@@ -65,27 +100,22 @@ template<> struct CharToStringPiece<base::char16> {
   typedef base::StringPiece16 Piece;
 };
 
-void InitSchemes(std::vector<SchemeWithType>** schemes,
-                 const SchemeWithType* initial_schemes,
+void InitSchemes(std::vector<std::string>** schemes,
+                 const char** initial_schemes,
                  size_t size) {
-  if (*schemes)
-    return;
-  *schemes = new std::vector<SchemeWithType>(size);
+  *schemes = new std::vector<std::string>(size);
   for (size_t i = 0; i < size; i++) {
-    (*schemes)->push_back(initial_schemes[i]);
+    (*(*schemes))[i] = initial_schemes[i];
   }
 }
 
-// Ensures that the standard_schemes list is initialized, does nothing if
-// it already has values.
-void InitStandardSchemes() {
-  InitSchemes(&standard_schemes, kStandardURLSchemes, kNumStandardURLSchemes);
-}
-
-// Ensures that the referrer_schemes list is initialized, does nothing if
-// it already has values.
-void InitReferrerSchemes() {
-  InitSchemes(&referrer_schemes, kReferrerURLSchemes, kNumReferrerURLSchemes);
+void InitSchemesWithType(std::vector<SchemeWithType>** schemes,
+                         const SchemeWithType* initial_schemes,
+                         size_t size) {
+  *schemes = new std::vector<SchemeWithType>(size);
+  for (size_t i = 0; i < size; i++) {
+    (*(*schemes))[i] = initial_schemes[i];
+  }
 }
 
 // Given a string and a range inside the string, compares it to the given
@@ -125,7 +155,7 @@ bool DoIsInSchemes(const CHAR* spec,
 
 template<typename CHAR>
 bool DoIsStandard(const CHAR* spec, const Component& scheme, SchemeType* type) {
-  InitStandardSchemes();
+  Initialize();
   return DoIsInSchemes(spec, scheme, type, *standard_schemes);
 }
 
@@ -154,19 +184,28 @@ bool DoFindAndCompareScheme(const CHAR* str,
   return DoCompareSchemeComponent(spec, our_scheme, compare);
 }
 
-template<typename CHAR>
-bool DoCanonicalize(const CHAR* in_spec,
-                    int in_spec_len,
+template <typename CHAR>
+bool DoCanonicalize(const CHAR* spec,
+                    int spec_len,
                     bool trim_path_end,
+                    WhitespaceRemovalPolicy whitespace_policy,
                     CharsetConverter* charset_converter,
                     CanonOutput* output,
                     Parsed* output_parsed) {
-  // Remove any whitespace from the middle of the relative URL, possibly
-  // copying to the new buffer.
+  // Reserve enough room in the output for the input, plus some extra so that
+  // we have room if we have to escape a few things without reallocating.
+  output->ReserveSizeIfNeeded(spec_len + 8);
+
+  // Remove any whitespace from the middle of the relative URL if necessary.
+  // Possibly this will result in copying to the new buffer.
   RawCanonOutputT<CHAR> whitespace_buffer;
-  int spec_len;
-  const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len,
-                                         &whitespace_buffer, &spec_len);
+  if (whitespace_policy == REMOVE_WHITESPACE) {
+    int original_len = spec_len;
+    spec =
+        RemoveURLWhitespace(spec, original_len, &whitespace_buffer, &spec_len);
+    if (spec_len != original_len)
+      output_parsed->whitespace_removed = true;
+  }
 
   Parsed parsed_input;
 #ifdef WIN32
@@ -246,6 +285,9 @@ bool DoResolveRelative(const char* base_spec,
   const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length,
                                              &whitespace_buffer,
                                              &relative_length);
+  if (in_relative_length != relative_length)
+    output_parsed->whitespace_removed = true;
+
   bool base_is_authority_based = false;
   bool base_is_hierarchical = false;
   if (base_spec &&
@@ -271,6 +313,9 @@ bool DoResolveRelative(const char* base_spec,
     return false;
   }
 
+  // Don't reserve buffer space here. Instead, reserve in DoCanonicalize and
+  // ReserveRelativeURL, to enable more accurate buffer sizes.
+
   // Pretend for a moment that |base_spec| is a standard URL. Normally
   // non-standard URLs are treated as PathURLs, but if the base has an
   // authority we would like to preserve it.
@@ -287,7 +332,8 @@ bool DoResolveRelative(const char* base_spec,
       // based on base_parsed_authority instead of base_parsed) and needs to be
       // re-created.
       DoCanonicalize(temporary_output.data(), temporary_output.length(), true,
-                     charset_converter, output, output_parsed);
+                     REMOVE_WHITESPACE, charset_converter, output,
+                     output_parsed);
       return did_resolve_succeed;
     }
   } else if (is_relative) {
@@ -300,8 +346,9 @@ bool DoResolveRelative(const char* base_spec,
   }
 
   // Not relative, canonicalize the input.
-  return DoCanonicalize(relative, relative_length, true, charset_converter,
-                        output, output_parsed);
+  return DoCanonicalize(relative, relative_length, true,
+                        DO_NOT_REMOVE_WHITESPACE, charset_converter, output,
+                        output_parsed);
 }
 
 template<typename CHAR>
@@ -348,8 +395,8 @@ bool DoReplaceComponents(const char* spec,
     RawCanonOutput<128> recanonicalized;
     Parsed recanonicalized_parsed;
     DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true,
-                   charset_converter,
-                   &recanonicalized, &recanonicalized_parsed);
+                   REMOVE_WHITESPACE, charset_converter, &recanonicalized,
+                   &recanonicalized_parsed);
 
     // Recurse using the version with the scheme already replaced. This will now
     // use the replacement rules for the new scheme.
@@ -371,6 +418,12 @@ bool DoReplaceComponents(const char* spec,
                                charset_converter, output, out_parsed);
   }
 
+  // TODO(csharrison): We could be smarter about size to reserve if this is done
+  // in callers below, and the code checks to see which components are being
+  // replaced, and with what length. If this ends up being a hot spot it should
+  // be changed.
+  output->ReserveSizeIfNeeded(spec_len + 8);
+
   // If we get here, then we know the scheme doesn't need to be replaced, so can
   // just key off the scheme in the spec to know how to do the replacements.
   if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileScheme)) {
@@ -394,9 +447,7 @@ bool DoReplaceComponents(const char* spec,
   return ReplacePathURL(spec, parsed, replacements, output, out_parsed);
 }
 
-void DoAddScheme(const char* new_scheme,
-                 SchemeType type,
-                 std::vector<SchemeWithType>* schemes) {
+void DoAddScheme(const char* new_scheme, std::vector<std::string>* schemes) {
   DCHECK(schemes);
   // If this assert triggers, it means you've called Add*Scheme after
   // LockSchemeRegistries has been called (see the header file for
@@ -412,6 +463,29 @@ void DoAddScheme(const char* new_scheme,
   if (scheme_len == 0)
     return;
 
+  DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme);
+  schemes->push_back(std::string(new_scheme));
+}
+
+void DoAddSchemeWithType(const char* new_scheme,
+                         SchemeType type,
+                         std::vector<SchemeWithType>* schemes) {
+  DCHECK(schemes);
+  // If this assert triggers, it means you've called Add*Scheme after
+  // LockSchemeRegistries has been called (see the header file for
+  // LockSchemeRegistries for more).
+  //
+  // This normally means you're trying to set up a new scheme too late in your
+  // application's init process. Locate where your app does this initialization
+  // and calls LockSchemeRegistries, and add your new scheme there.
+  DCHECK(!scheme_registries_locked)
+      << "Trying to add a scheme after the lists have been locked.";
+
+  size_t scheme_len = strlen(new_scheme);
+  if (scheme_len == 0)
+    return;
+
+  DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme);
   // Duplicate the scheme into a new buffer and add it to the list of standard
   // schemes. This pointer will be leaked on shutdown.
   char* dup_scheme = new char[scheme_len + 1];
@@ -427,29 +501,85 @@ void DoAddScheme(const char* new_scheme,
 }  // namespace
 
 void Initialize() {
-  InitStandardSchemes();
-  InitReferrerSchemes();
+  if (initialized)
+    return;
+  InitSchemesWithType(&standard_schemes, kStandardURLSchemes,
+                      arraysize(kStandardURLSchemes));
+  InitSchemesWithType(&referrer_schemes, kReferrerURLSchemes,
+                      arraysize(kReferrerURLSchemes));
+  InitSchemes(&secure_schemes, kSecureSchemes, arraysize(kSecureSchemes));
+  InitSchemes(&local_schemes, kLocalSchemes, arraysize(kLocalSchemes));
+  InitSchemes(&no_access_schemes, kNoAccessSchemes,
+              arraysize(kNoAccessSchemes));
+  InitSchemes(&cors_enabled_schemes, kCORSEnabledSchemes,
+              arraysize(kCORSEnabledSchemes));
+  initialized = true;
 }
 
 void Shutdown() {
-  if (standard_schemes) {
-    delete standard_schemes;
-    standard_schemes = NULL;
-  }
-  if (referrer_schemes) {
-    delete referrer_schemes;
-    referrer_schemes = NULL;
-  }
+  initialized = false;
+  delete standard_schemes;
+  standard_schemes = nullptr;
+  delete referrer_schemes;
+  referrer_schemes = nullptr;
+  delete secure_schemes;
+  secure_schemes = nullptr;
+  delete local_schemes;
+  local_schemes = nullptr;
+  delete no_access_schemes;
+  no_access_schemes = nullptr;
+  delete cors_enabled_schemes;
+  cors_enabled_schemes = nullptr;
 }
 
 void AddStandardScheme(const char* new_scheme, SchemeType type) {
-  InitStandardSchemes();
-  DoAddScheme(new_scheme, type, standard_schemes);
+  Initialize();
+  DoAddSchemeWithType(new_scheme, type, standard_schemes);
 }
 
 void AddReferrerScheme(const char* new_scheme, SchemeType type) {
-  InitReferrerSchemes();
-  DoAddScheme(new_scheme, type, referrer_schemes);
+  Initialize();
+  DoAddSchemeWithType(new_scheme, type, referrer_schemes);
+}
+
+void AddSecureScheme(const char* new_scheme) {
+  Initialize();
+  DoAddScheme(new_scheme, secure_schemes);
+}
+
+const std::vector<std::string>& GetSecureSchemes() {
+  Initialize();
+  return *secure_schemes;
+}
+
+void AddLocalScheme(const char* new_scheme) {
+  Initialize();
+  DoAddScheme(new_scheme, local_schemes);
+}
+
+const std::vector<std::string>& GetLocalSchemes() {
+  Initialize();
+  return *local_schemes;
+}
+
+void AddNoAccessScheme(const char* new_scheme) {
+  Initialize();
+  DoAddScheme(new_scheme, no_access_schemes);
+}
+
+const std::vector<std::string>& GetNoAccessSchemes() {
+  Initialize();
+  return *no_access_schemes;
+}
+
+void AddCORSEnabledScheme(const char* new_scheme) {
+  Initialize();
+  DoAddScheme(new_scheme, cors_enabled_schemes);
+}
+
+const std::vector<std::string>& GetCORSEnabledSchemes() {
+  Initialize();
+  return *cors_enabled_schemes;
 }
 
 void LockSchemeRegistries() {
@@ -473,7 +603,7 @@ bool IsStandard(const base::char16* spec, const Component& scheme) {
 }
 
 bool IsReferrerScheme(const char* spec, const Component& scheme) {
-  InitReferrerSchemes();
+  Initialize();
   SchemeType unused_scheme_type;
   return DoIsInSchemes(spec, scheme, &unused_scheme_type, *referrer_schemes);
 }
@@ -529,14 +659,22 @@ bool DomainIs(base::StringPiece canonicalized_host,
   return true;
 }
 
+bool HostIsIPAddress(base::StringPiece host) {
+  url::RawCanonOutputT<char, 128> ignored_output;
+  url::CanonHostInfo host_info;
+  url::CanonicalizeIPAddress(host.data(), Component(0, host.length()),
+                             &ignored_output, &host_info);
+  return host_info.IsIPAddress();
+}
+
 bool Canonicalize(const char* spec,
                   int spec_len,
                   bool trim_path_end,
                   CharsetConverter* charset_converter,
                   CanonOutput* output,
                   Parsed* output_parsed) {
-  return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter,
-                        output, output_parsed);
+  return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,
+                        charset_converter, output, output_parsed);
 }
 
 bool Canonicalize(const base::char16* spec,
@@ -545,8 +683,8 @@ bool Canonicalize(const base::char16* spec,
                   CharsetConverter* charset_converter,
                   CanonOutput* output,
                   Parsed* output_parsed) {
-  return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter,
-                        output, output_parsed);
+  return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,
+                        charset_converter, output, output_parsed);
 }
 
 bool ResolveRelative(const char* base_spec,
diff --git a/chromium/url/url_util.h b/chromium/url/url_util.h
index 724ce956a7f..a4b74b13e5d 100644
--- a/chromium/url/url_util.h
+++ b/chromium/url/url_util.h
@@ -6,6 +6,7 @@
 #define URL_URL_UTIL_H_
 
 #include <string>
+#include <vector>
 
 #include "base/strings/string16.h"
 #include "base/strings/string_piece.h"
@@ -57,25 +58,44 @@ struct URL_EXPORT SchemeWithType {
   SchemeType type;
 };
 
+// The following Add*Scheme method are not threadsafe and can not be called
+// concurrently with any other url_util function. They will assert if the lists
+// of schemes have been locked (see LockSchemeRegistries).
+
 // Adds an application-defined scheme to the internal list of "standard-format"
 // URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
 // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).
-//
-// This function is not threadsafe and can not be called concurrently with any
-// other url_util function. It will assert if the lists of schemes have
-// been locked (see LockSchemeRegistries).
+
 URL_EXPORT void AddStandardScheme(const char* new_scheme,
                                   SchemeType scheme_type);
 
 // Adds an application-defined scheme to the internal list of schemes allowed
 // for referrers.
-//
-// This function is not threadsafe and can not be called concurrently with any
-// other url_util function. It will assert if the lists of schemes have
-// been locked (see LockSchemeRegistries).
 URL_EXPORT void AddReferrerScheme(const char* new_scheme,
                                   SchemeType scheme_type);
 
+// Adds an application-defined scheme to the list of schemes that do not trigger
+// mixed content warnings.
+URL_EXPORT void AddSecureScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetSecureSchemes();
+
+// Adds an application-defined scheme to the list of schemes that normal pages
+// cannot link to or access (i.e., with the same security rules as those applied
+// to "file" URLs).
+URL_EXPORT void AddLocalScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetLocalSchemes();
+
+// Adds an application-defined scheme to the list of schemes that cause pages
+// loaded with them to not have access to pages loaded with any other URL
+// scheme.
+URL_EXPORT void AddNoAccessScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetNoAccessSchemes();
+
+// Adds an application-defined scheme to the list of schemes that can be sent
+// CORS requests.
+URL_EXPORT void AddCORSEnabledScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetCORSEnabledSchemes();
+
 // Sets a flag to prevent future calls to Add*Scheme from succeeding.
 //
 // This is designed to help prevent errors for multithreaded applications.
@@ -133,7 +153,7 @@ URL_EXPORT bool GetStandardSchemeType(const char* spec,
                                       const Component& scheme,
                                       SchemeType* type);
 
-// Domains ---------------------------------------------------------------------
+// Hosts  ----------------------------------------------------------------------
 
 // Returns true if the |canonicalized_host| matches or is in the same domain as
 // the given |lower_ascii_domain| string. For example, if the canonicalized
@@ -146,6 +166,10 @@ URL_EXPORT bool GetStandardSchemeType(const char* spec,
 URL_EXPORT bool DomainIs(base::StringPiece canonicalized_host,
                          base::StringPiece lower_ascii_domain);
 
+// Returns true if the hostname is an IP address. Note: this function isn't very
+// cheap, as it must re-parse the host to verify.
+URL_EXPORT bool HostIsIPAddress(base::StringPiece host);
+
 // URL library wrappers --------------------------------------------------------
 
 // Parses the given spec according to the extracted scheme type. Normal users
author	Allan Sandfeld Jensen <allan.jensen@qt.io>	2017-04-05 14:08:31 +0200
committer	Allan Sandfeld Jensen <allan.jensen@qt.io>	2017-04-11 07:46:53 +0000
commit	6a4cabb866f66d4128a97cdc6d9d08ce074f1247 (patch)
tree	ab00f70a5e89278d6a0d16ff0c42578dc4d84a2d /chromium/url
parent	e733310db58160074f574c429d48f8308c0afe17 (diff)
download	qtwebengine-chromium-6a4cabb866f66d4128a97cdc6d9d08ce074f1247.tar.gz