diff options
author | Jüri Valdmann <juri.valdmann@qt.io> | 2018-04-24 15:04:11 +0200 |
---|---|---|
committer | Jüri Valdmann <juri.valdmann@qt.io> | 2018-07-24 13:36:45 +0000 |
commit | f7e094b3c3b91e8b1652547b5a6f6be46f0618df (patch) | |
tree | 44c5c57814eb5dd78ec50895302e3a970629334e | |
parent | 18f063e21c123b2a339735ab7129be74bed2b807 (diff) | |
download | qtwebengine-chromium-f7e094b3c3b91e8b1652547b5a6f6be46f0618df.tar.gz |
Extend url library for WebEngine custom schemes
Adds (another) parallel scheme registry in url/url_util_qt, which is then used
in Chromium and Blink to specialize URL handling for WebEngine custom schemes.
The registry is transmitted from the main process to subprocesses in a new
command line flag (--webengine-schemes), since the scheme lists in url/url_util
are locked before IPC is initialized.
Task-number: QTBUG-62536
Change-Id: Id26811a18d4c740cc4d281d2da5720304a235a41
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
-rw-r--r-- | chromium/content/browser/browser_url_handler_impl.cc | 5 | ||||
-rw-r--r-- | chromium/content/common/url_schemes.cc | 35 | ||||
-rw-r--r-- | chromium/third_party/blink/renderer/platform/weborigin/scheme_registry.cc | 10 | ||||
-rw-r--r-- | chromium/third_party/blink/renderer/platform/weborigin/security_origin.cc | 21 | ||||
-rw-r--r-- | chromium/url/BUILD.gn | 3 | ||||
-rw-r--r-- | chromium/url/gurl.cc | 9 | ||||
-rw-r--r-- | chromium/url/gurl.h | 3 | ||||
-rw-r--r-- | chromium/url/origin.cc | 3 | ||||
-rw-r--r-- | chromium/url/scheme_host_port.cc | 50 | ||||
-rw-r--r-- | chromium/url/url_canon_stdurl.cc | 6 | ||||
-rw-r--r-- | chromium/url/url_util_qt.cc | 189 | ||||
-rw-r--r-- | chromium/url/url_util_qt.h | 56 |
12 files changed, 387 insertions, 3 deletions
diff --git a/chromium/content/browser/browser_url_handler_impl.cc b/chromium/content/browser/browser_url_handler_impl.cc index ef78ddd5e26..f895fa6840e 100644 --- a/chromium/content/browser/browser_url_handler_impl.cc +++ b/chromium/content/browser/browser_url_handler_impl.cc @@ -14,6 +14,7 @@ #include "content/public/common/url_constants.h" #include "content/public/common/url_utils.h" #include "url/gurl.h" +#include "url/url_util_qt.h" namespace content { @@ -41,6 +42,10 @@ static bool HandleViewSource(GURL* url, BrowserContext* browser_context) { all_allowed_sub_schemes.push_back(default_allowed_sub_schemes[i]); GetContentClient()->browser()->GetAdditionalViewSourceSchemes( &all_allowed_sub_schemes); + for (auto& cs : url::CustomScheme::GetSchemes()) { + if (cs.flags & url::CustomScheme::ViewSourceAllowed) + all_allowed_sub_schemes.push_back(cs.name); + } bool is_sub_scheme_allowed = false; for (size_t i = 0; i < all_allowed_sub_schemes.size(); ++i) { diff --git a/chromium/content/common/url_schemes.cc b/chromium/content/common/url_schemes.cc index a269e1de2cb..51eadfc7ad5 100644 --- a/chromium/content/common/url_schemes.cc +++ b/chromium/content/common/url_schemes.cc @@ -14,6 +14,7 @@ #include "content/public/common/url_constants.h" #include "services/network/public/cpp/cors/cors_legacy.h" #include "url/url_util.h" +#include "url/url_util_qt.h" namespace content { namespace { @@ -89,6 +90,31 @@ void RegisterContentSchemes(bool lock_schemes) { for (auto& scheme : schemes.empty_document_schemes) url::AddEmptyDocumentScheme(scheme.c_str()); + // NOTE(juvaldma)(Chromium 67.0.3396.47) + // + // Since ContentClient::Schemes::standard_types doesn't have types + // (url::SchemeType), we need to bypass AddAdditionalSchemes and add our + // 'standard custom schemes' directly. Although the other scheme lists could + // be filled also in AddAdditionalSchemes by QtWebEngineCore, to follow the + // principle of the separation of concerns, we add them here instead. This + // way, from the perspective of QtWebEngineCore, everything to do with custom + // scheme parsing is fully encapsulated behind url::CustomScheme. The + // complexity of QtWebEngineCore is reduced while the complexity of + // url::CustomScheme is not significantly increased (since the functionality + // is needed anyway). + for (auto& cs : url::CustomScheme::GetSchemes()) { + if (cs.type != url::SCHEME_WITHOUT_AUTHORITY) + url::AddStandardScheme(cs.name.c_str(), cs.type); + if (cs.flags & url::CustomScheme::Secure) + url::AddSecureScheme(cs.name.c_str()); + if (cs.flags & url::CustomScheme::Local) + url::AddLocalScheme(cs.name.c_str()); + if (cs.flags & url::CustomScheme::NoAccessAllowed) + url::AddNoAccessScheme(cs.name.c_str()); + if (cs.flags & url::CustomScheme::ContentSecurityPolicyIgnored) + url::AddCSPBypassingScheme(cs.name.c_str()); + } + // Prevent future modification of the scheme lists. This is to prevent // accidental creation of data races in the program. Add*Scheme aren't // threadsafe so must be called when GURL isn't used on any other thread. This @@ -108,6 +134,15 @@ void RegisterContentSchemes(bool lock_schemes) { GetMutableSecureOrigins() = std::move(schemes.secure_origins); network::cors::legacy::RegisterSecureOrigins(GetSecureOrigins()); + + // NOTE(juvaldma)(Chromium 67.0.3396.47) + // + // This list only applies to Chromium proper whereas Blink uses it's own + // hardcoded list (see blink::URLSchemesRegistry). + for (auto& cs : url::CustomScheme::GetSchemes()) { + if (cs.flags & url::CustomScheme::ServiceWorkersAllowed) + GetMutableServiceWorkerSchemes().push_back(cs.name); + } } const std::vector<std::string>& GetSavableSchemes() { diff --git a/chromium/third_party/blink/renderer/platform/weborigin/scheme_registry.cc b/chromium/third_party/blink/renderer/platform/weborigin/scheme_registry.cc index 8b511f95242..feb1d819d08 100644 --- a/chromium/third_party/blink/renderer/platform/weborigin/scheme_registry.cc +++ b/chromium/third_party/blink/renderer/platform/weborigin/scheme_registry.cc @@ -31,6 +31,7 @@ #include "third_party/blink/renderer/platform/wtf/threading.h" #include "third_party/blink/renderer/platform/wtf/threading_primitives.h" #include "url/url_util.h" +#include "url/url_util_qt.h" namespace blink { @@ -66,6 +67,15 @@ class URLSchemesRegistry final { } for (auto& scheme : url::GetEmptyDocumentSchemes()) empty_document_schemes.insert(scheme.c_str()); + + // NOTE(juvaldma)(Chromium 67.0.3396.47) + // + // Non-blink Chromium has it's own version of this list (see + // content::RegisterContentSchemes). + for (auto& cs : url::CustomScheme::GetSchemes()) { + if (cs.flags & url::CustomScheme::ServiceWorkersAllowed) + service_worker_schemes.insert(String(cs.name.c_str())); + } } ~URLSchemesRegistry() = default; diff --git a/chromium/third_party/blink/renderer/platform/weborigin/security_origin.cc b/chromium/third_party/blink/renderer/platform/weborigin/security_origin.cc index 7d8c8610648..fedd380f57c 100644 --- a/chromium/third_party/blink/renderer/platform/weborigin/security_origin.cc +++ b/chromium/third_party/blink/renderer/platform/weborigin/security_origin.cc @@ -46,6 +46,7 @@ #include "third_party/blink/renderer/platform/wtf/text/wtf_string.h" #include "url/url_canon.h" #include "url/url_canon_ip.h" +#include "url/url_util_qt.h" namespace blink { @@ -138,6 +139,26 @@ SecurityOrigin::SecurityOrigin(const KURL& url) // document.domain starts as m_host, but can be set by the DOM. domain_ = host_; + // NOTE(juvaldma)(Chromium 67.0.3396.47) + // + // If DefaultPortForProtocol and IsDefaultPortForProtocol were appropriately + // extended, then SecurityOrigin would *almost* work without the following + // code. The only problem is that can_load_local_resources_ would be set for + // Local schemes and not LocalAccessAllowed schemes. + if (const url::CustomScheme* cs = url::CustomScheme::FindScheme(StringUTF8Adaptor(protocol_).AsStringPiece())) { + if (cs->has_port_component()) { + if (!effective_port_) // 0 is kInvalidPort + effective_port_ = cs->default_port; + if (port_ == cs->default_port) + port_ = kInvalidPort; + } else { + effective_port_ = kInvalidPort; + port_ = kInvalidPort; + } + can_load_local_resources_ = cs->flags & url::CustomScheme::LocalAccessAllowed; + return; + } + if (IsDefaultPortForProtocol(port_, protocol_)) port_ = kInvalidPort; diff --git a/chromium/url/BUILD.gn b/chromium/url/BUILD.gn index c4deb10db9c..9dbeaf48c31 100644 --- a/chromium/url/BUILD.gn +++ b/chromium/url/BUILD.gn @@ -54,6 +54,8 @@ component("url") { "url_util.cc", "url_util.h", "url_util_internal.h", + "url_util_qt.cc", + "url_util_qt.h", ] defines = [ "URL_IMPLEMENTATION" ] @@ -126,6 +128,7 @@ test("url_unittests") { "url_canon_unittest.cc", "url_parse_unittest.cc", "url_test_utils.h", + "url_util_qt_unittest.cc", "url_util_unittest.cc", ] diff --git a/chromium/url/gurl.cc b/chromium/url/gurl.cc index cec69c7cc49..9028f4873a4 100644 --- a/chromium/url/gurl.cc +++ b/chromium/url/gurl.cc @@ -16,6 +16,7 @@ #include "base/trace_event/memory_usage_estimator.h" #include "url/url_canon_stdstring.h" #include "url/url_util.h" +#include "url/url_util_qt.h" namespace { @@ -278,7 +279,7 @@ GURL GURL::ReplaceComponents( GURL GURL::GetOrigin() const { // This doesn't make sense for invalid or nonstandard URLs, so return // the empty URL. - if (!is_valid_ || !IsStandard()) + if (!is_valid_ || (!IsStandard() && !IsCustom())) return GURL(); if (SchemeIsFileSystem()) @@ -311,7 +312,7 @@ GURL GURL::GetAsReferrer() const { GURL GURL::GetWithEmptyPath() const { // This doesn't make sense for invalid or nonstandard URLs, so return // the empty URL. - if (!is_valid_ || !IsStandard()) + if (!is_valid_ || (!IsStandard() && !IsCustom())) return GURL(); // We could optimize this since we know that the URL is canonical, and we are @@ -340,6 +341,10 @@ bool GURL::IsStandard() const { return url::IsStandard(spec_.data(), parsed_.scheme); } +bool GURL::IsCustom() const { + return url::CustomScheme::FindScheme(scheme_piece()); +} + bool GURL::IsAboutBlank() const { if (!SchemeIs(url::kAboutScheme)) return false; diff --git a/chromium/url/gurl.h b/chromium/url/gurl.h index 96ea8645da6..6052b444a39 100644 --- a/chromium/url/gurl.h +++ b/chromium/url/gurl.h @@ -213,6 +213,9 @@ class URL_EXPORT GURL { // by calling SchemeIsFile[System]. bool IsStandard() const; + // Qt WebEngine custom scheme. + bool IsCustom() const; + // Returns true when the url is of the form about:blank, about:blank?foo or // about:blank/#foo. bool IsAboutBlank() const; diff --git a/chromium/url/origin.cc b/chromium/url/origin.cc index 3d5e0c519d3..076718b3652 100644 --- a/chromium/url/origin.cc +++ b/chromium/url/origin.cc @@ -14,13 +14,14 @@ #include "url/url_canon_stdstring.h" #include "url/url_constants.h" #include "url/url_util.h" +#include "url/url_util_qt.h" namespace url { Origin::Origin() : unique_(true) {} Origin Origin::Create(const GURL& url) { - if (!url.is_valid() || (!url.IsStandard() && !url.SchemeIsBlob())) + if (!url.is_valid() || (!url.IsStandard() && !url.SchemeIsBlob() && !url.IsCustom())) return Origin(); SchemeHostPort tuple; diff --git a/chromium/url/scheme_host_port.cc b/chromium/url/scheme_host_port.cc index c4f0ba5d5cc..24a478b8823 100644 --- a/chromium/url/scheme_host_port.cc +++ b/chromium/url/scheme_host_port.cc @@ -18,6 +18,7 @@ #include "url/url_canon_stdstring.h" #include "url/url_constants.h" #include "url/url_util.h" +#include "url/url_util_qt.h" namespace url { @@ -51,6 +52,22 @@ bool IsValidInput(const base::StringPiece& scheme, const base::StringPiece& host, uint16_t port, SchemeHostPort::ConstructPolicy policy) { + // NOTE(juvaldma)(Chromium 67.0.3396.47) + // + // Differences between standard and custom schemes: + // + // - SCHEME_WITH_HOST: host part is optional for standard schemes and + // mandatory for custom schemes. Among the standard schemes, 'file' has an + // optional host part, so that's probably why it's optional. + // + // - SCHEME_WITHOUT_AUTHORITY: disallowed for standard schemes, allowed for + // custom schemes. The idea being that all pages from a such a scheme, for + // example 'qrc', should belong to the same origin. + if (const CustomScheme* cs = CustomScheme::FindScheme(scheme)) + return (cs->has_host_component() == !host.empty() && + cs->has_port_component() == (port != 0) && + (policy != SchemeHostPort::CHECK_CANONICALIZATION || host.empty() || IsCanonicalHost(host))); + SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION; bool is_standard = GetStandardSchemeType( scheme.data(), @@ -163,6 +180,26 @@ bool SchemeHostPort::IsInvalid() const { } std::string SchemeHostPort::Serialize() const { + // NOTE(juvaldma)(Chromium 67.0.3396.47) + // + // We break from the standard format here and skip the double-slashes for + // custom schemes of type SCHEME_WITHOUT_AUTHORITY. This seems to be the only + // way to ensure that invariants like + // + // GURL(x.Serialize()) == x.GetURL() for all SchemeHostPort x + // + // and + // + // y == Origin::Create(GURL(y.Serialize())) for all Origin y + // + // hold without changing the URL parser. URLs of this type are parsed with the + // PathURL parser, which would include the double-slashes in the path + // component instead of ignoring them as part of the authority syntax like + // they are supposed to be. + if (const CustomScheme* cs = CustomScheme::FindScheme(scheme_)) + if (!cs->has_host_component()) + return scheme_ + ":"; + // Null checking for |parsed| in SerializeInternal is probably slower than // just filling it in and discarding it here. url::Parsed parsed; @@ -170,6 +207,19 @@ std::string SchemeHostPort::Serialize() const { } GURL SchemeHostPort::GetURL() const { + // NOTE(juvaldma)(Chromium 67.0.3396.47) + // + // See note in Serialize(). We also skip the extra slash workaround for custom + // schemes of type SCHEME_WITHOUT_AUTHORITY, since that only applies to + // StandardURL canonicalization. + if (const CustomScheme* cs = CustomScheme::FindScheme(scheme_)) { + if (!cs->has_host_component()) { + url::Parsed parsed; + parsed.scheme = Component(0, scheme_.length()); + return GURL(scheme_ + ":", parsed, true); + } + } + url::Parsed parsed; std::string serialized = SerializeInternal(&parsed); diff --git a/chromium/url/url_canon_stdurl.cc b/chromium/url/url_canon_stdurl.cc index c6193221985..21625455270 100644 --- a/chromium/url/url_canon_stdurl.cc +++ b/chromium/url/url_canon_stdurl.cc @@ -8,6 +8,7 @@ #include "url/url_canon.h" #include "url/url_canon_internal.h" #include "url/url_constants.h" +#include "url/url_util_qt.h" namespace url { @@ -137,6 +138,11 @@ int DefaultPortForScheme(const char* scheme, int scheme_len) { default_port = 80; break; } + + if (default_port == PORT_UNSPECIFIED) + if (const CustomScheme* cs = CustomScheme::FindScheme(base::StringPiece(scheme, scheme_len))) + return cs->default_port; + return default_port; } diff --git a/chromium/url/url_util_qt.cc b/chromium/url/url_util_qt.cc new file mode 100644 index 00000000000..8588f1b549e --- /dev/null +++ b/chromium/url/url_util_qt.cc @@ -0,0 +1,189 @@ +#include "url/url_util_qt.h" + +#include "base/command_line.h" +#include "base/no_destructor.h" +#include "base/numerics/safe_conversions.h" +#include "base/strings/string_number_conversions.h" +#include "base/strings/string_util.h" +#include "url/gurl.h" +#include "url/url_canon.h" +#include "url/url_util.h" + +namespace url { + +namespace { + +std::string ToString(const CustomScheme& cs) +{ + std::string serialized; + + serialized += cs.name; + serialized += ':'; + + switch (cs.type) { + case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION: + serialized += 'u'; + serialized += base::NumberToString(cs.default_port); + break; + case SCHEME_WITH_HOST_AND_PORT: + serialized += 'p'; + serialized += base::NumberToString(cs.default_port); + break; + case SCHEME_WITH_HOST: + serialized += 'h'; + break; + case SCHEME_WITHOUT_AUTHORITY: + break; + } + + if (cs.flags & CustomScheme::Secure) + serialized += 's'; + if (cs.flags & CustomScheme::Local) + serialized += 'l'; + if (cs.flags & CustomScheme::LocalAccessAllowed) + serialized += 'L'; + if (cs.flags & CustomScheme::NoAccessAllowed) + serialized += 'N'; + if (cs.flags & CustomScheme::ServiceWorkersAllowed) + serialized += 'W'; + if (cs.flags & CustomScheme::ViewSourceAllowed) + serialized += 'V'; + if (cs.flags & CustomScheme::ContentSecurityPolicyIgnored) + serialized += 'C'; + + return serialized; +} + +class Parser { +public: + void CharacterArrived(char ch) { + switch (state) { + case NAME: CharacterArrivedWhileParsingName(ch); break; + case OPTIONS: CharacterArrivedWhileParsingOptions(ch); break; + case PORT: CharacterArrivedWhileParsingPort(ch); break; + } + } + + void EndReached() { + if (!default_port_string.empty()) + FlushPort(); + if (!cs.name.empty()) + Flush(); + } + +private: + void CharacterArrivedWhileParsingName(char ch) { + switch (ch) { + case ':': state = OPTIONS; break; + case ';': Flush(); break; + default: cs.name += ch; break; + } + } + + void CharacterArrivedWhileParsingOptions(char ch) { + switch (ch) { + case 'u': cs.type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION; state = PORT; break; + case 'p': cs.type = SCHEME_WITH_HOST_AND_PORT; state = PORT; break; + case 'h': cs.type = SCHEME_WITH_HOST; break; + case 's': cs.flags |= CustomScheme::Secure; break; + case 'l': cs.flags |= CustomScheme::Local; break; + case 'L': cs.flags |= CustomScheme::LocalAccessAllowed; break; + case 'N': cs.flags |= CustomScheme::NoAccessAllowed; break; + case 'W': cs.flags |= CustomScheme::ServiceWorkersAllowed; break; + case 'V': cs.flags |= CustomScheme::ViewSourceAllowed; break; + case 'C': cs.flags |= CustomScheme::ContentSecurityPolicyIgnored; break; + case ';': Flush(); state = NAME; break; + default: CHECK(false) << "Unexpected character '" << ch << "'."; + } + } + + void CharacterArrivedWhileParsingPort(char ch) { + if (base::IsAsciiDigit(ch)) { + default_port_string += ch; + return; + } + + FlushPort(); + + state = OPTIONS; + CharacterArrivedWhileParsingOptions(ch); + } + + void FlushPort() { + CHECK(base::StringToInt(default_port_string, &cs.default_port)) + << "Failed to parse '" << default_port_string << "'."; + default_port_string.clear(); + } + + void Flush() { + CustomScheme::AddScheme(cs); + cs = CustomScheme(); + } + + enum { NAME, OPTIONS, PORT } state = NAME; + CustomScheme cs; + std::string default_port_string; +}; + +} // namespace + +std::vector<CustomScheme>& CustomScheme::GetMutableSchemes() { + static base::NoDestructor<std::vector<CustomScheme>> schemes; + return *schemes; +} + +const std::vector<CustomScheme>& CustomScheme::GetSchemes() { + return GetMutableSchemes(); +} + +void CustomScheme::ClearSchemes() +{ + GetMutableSchemes().clear(); +} + +void CustomScheme::AddScheme(const CustomScheme& cs) +{ + DCHECK(!cs.name.empty()); + DCHECK_EQ(cs.has_port_component(), (cs.default_port != PORT_UNSPECIFIED)) + << "Scheme '" << cs.name << "' has invalid configuration."; + DCHECK_EQ(base::ToLowerASCII(cs.name), cs.name) + << "Scheme '" << cs.name << "' should be lower-case."; + DCHECK(!FindScheme(cs.name)) + << "Scheme '" << cs.name << "' already added."; + + GetMutableSchemes().push_back(cs); +} + +const CustomScheme* CustomScheme::FindScheme(base::StringPiece name) +{ + for (const CustomScheme& cs : GetSchemes()) + if (base::LowerCaseEqualsASCII(name, cs.name)) + return &cs; + return nullptr; +} + +const char CustomScheme::kCommandLineFlag[] = "webengine-schemes"; + +void CustomScheme::SaveSchemes(base::CommandLine* command_line) +{ + std::string serialized; + + for (const CustomScheme& cs : GetSchemes()) { + if (!serialized.empty()) + serialized += ';'; + serialized += ToString(cs); + } + + command_line->AppendSwitchASCII(kCommandLineFlag, std::move(serialized)); +} + +void CustomScheme::LoadSchemes(const base::CommandLine* command_line) +{ + std::string serialized = command_line->GetSwitchValueASCII(kCommandLineFlag); + Parser parser; + for (char ch : serialized) + parser.CharacterArrived(ch); + parser.EndReached(); +} + +} // namespace url diff --git a/chromium/url/url_util_qt.h b/chromium/url/url_util_qt.h new file mode 100644 index 00000000000..4daf147d45b --- /dev/null +++ b/chromium/url/url_util_qt.h @@ -0,0 +1,56 @@ +#ifndef URL_URL_UTIL_QT_H_ +#define URL_URL_UTIL_QT_H_ + +#include <string> + +#include "url/url_util.h" + +namespace base { +class CommandLine; +} // namespace base + +namespace url { + +// Configuration of a custom scheme. +// +// Each process has a list of CustomSchemes. The list is filled in the main +// process and transmitted to subprocesses via command-line flags +// (SaveSchemes/LoadSchemes). We cannot use IPC for this because the url library +// scheme lists are filled and locked before IPC is initialized. +// +// To implement the required semantics, the lists are accessed not only from the +// url library but all over the codebase (grep CustomScheme). +struct URL_EXPORT CustomScheme { + enum Flag { + Secure = 0x1, + Local = 0x2, + LocalAccessAllowed = 0x4, + NoAccessAllowed = 0x8, + ServiceWorkersAllowed = 0x10, + ViewSourceAllowed = 0x20, + ContentSecurityPolicyIgnored = 0x40, + }; + + std::string name; + SchemeType type = SCHEME_WITHOUT_AUTHORITY; + int default_port = PORT_UNSPECIFIED; + int flags = 0; + + bool has_host_component() const { return type != SCHEME_WITHOUT_AUTHORITY; } + bool has_port_component() const { return type <= SCHEME_WITH_HOST_AND_PORT; } + + static const std::vector<CustomScheme>& GetSchemes(); + static std::vector<CustomScheme>& GetMutableSchemes(); + static void ClearSchemes(); + + static void AddScheme(const CustomScheme& cs); + static const CustomScheme* FindScheme(base::StringPiece name); + + static const char kCommandLineFlag[]; + static void SaveSchemes(base::CommandLine* command_line); + static void LoadSchemes(const base::CommandLine* command_line); +}; + +} // namespace url + +#endif // URL_URL_UTIL_QT_H_ |