diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2018-10-24 11:30:15 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2018-10-30 12:56:19 +0000 |
commit | 6036726eb981b6c4b42047513b9d3f4ac865daac (patch) | |
tree | 673593e70678e7789766d1f732eb51f613a2703b /chromium/url | |
parent | 466052c4e7c052268fd931888cd58961da94c586 (diff) | |
download | qtwebengine-chromium-6036726eb981b6c4b42047513b9d3f4ac865daac.tar.gz |
BASELINE: Update Chromium to 70.0.3538.78
Change-Id: Ie634710bf039e26c1957f4ae45e101bd4c434ae7
Reviewed-by: Michael BrĂ¼ning <michael.bruning@qt.io>
Diffstat (limited to 'chromium/url')
-rw-r--r-- | chromium/url/BUILD.gn | 5 | ||||
-rw-r--r-- | chromium/url/DEPS | 2 | ||||
-rw-r--r-- | chromium/url/gurl.cc | 8 | ||||
-rw-r--r-- | chromium/url/gurl.h | 4 | ||||
-rw-r--r-- | chromium/url/origin.cc | 61 | ||||
-rw-r--r-- | chromium/url/origin.h | 118 | ||||
-rw-r--r-- | chromium/url/origin_unittest.cc | 180 | ||||
-rw-r--r-- | chromium/url/url_canon_icu.cc | 86 | ||||
-rw-r--r-- | chromium/url/url_idna_icu.cc | 106 | ||||
-rw-r--r-- | chromium/url/url_idna_icu_alternatives_android.cc (renamed from chromium/url/url_canon_icu_alternatives_android.cc) | 0 | ||||
-rw-r--r-- | chromium/url/url_idna_icu_alternatives_ios.mm (renamed from chromium/url/url_canon_icu_alternatives_ios.mm) | 0 | ||||
-rw-r--r-- | chromium/url/url_util.cc | 21 | ||||
-rw-r--r-- | chromium/url/url_util.h | 5 | ||||
-rw-r--r-- | chromium/url/url_util_unittest.cc | 5 |
14 files changed, 406 insertions, 195 deletions
diff --git a/chromium/url/BUILD.gn b/chromium/url/BUILD.gn index 57bbe16c15e..1365fcea0c4 100644 --- a/chromium/url/BUILD.gn +++ b/chromium/url/BUILD.gn @@ -76,7 +76,7 @@ component("url") { # ICU support. if (use_platform_icu_alternatives) { if (is_android) { - sources += [ "url_canon_icu_alternatives_android.cc" ] + sources += [ "url_idna_icu_alternatives_android.cc" ] deps += [ ":url_features", ":url_java", @@ -85,7 +85,7 @@ component("url") { "//base/third_party/dynamic_annotations", ] } else if (is_ios) { - sources += [ "url_canon_icu_alternatives_ios.mm" ] + sources += [ "url_idna_icu_alternatives_ios.mm" ] } else { assert(false, "ICU alternative is not implemented for platform: " + target_os) @@ -95,6 +95,7 @@ component("url") { sources += [ "url_canon_icu.cc", "url_canon_icu.h", + "url_idna_icu.cc", ] deps += [ "//third_party/icu" ] } diff --git a/chromium/url/DEPS b/chromium/url/DEPS index c43ecab890e..8ee3d2de047 100644 --- a/chromium/url/DEPS +++ b/chromium/url/DEPS @@ -10,7 +10,7 @@ specific_include_rules = { "gurl_fuzzer.cc": [ "+base/i18n", ], - "url_canon_icu(\.cc|_unittest\.cc)": [ + "url_(canon|idna)_icu(\.cc|_unittest\.cc)": [ "+third_party/icu", ], "run_all_unittests\.cc": [ diff --git a/chromium/url/gurl.cc b/chromium/url/gurl.cc index 8ec69021129..d9f36f67556 100644 --- a/chromium/url/gurl.cc +++ b/chromium/url/gurl.cc @@ -19,8 +19,6 @@ namespace { -static base::LazyInstance<std::string>::Leaky empty_string = - LAZY_INSTANCE_INITIALIZER; static base::LazyInstance<GURL>::Leaky empty_gurl = LAZY_INSTANCE_INITIALIZER; } // namespace @@ -166,7 +164,7 @@ const std::string& GURL::spec() const { return spec_; DCHECK(false) << "Trying to get the spec of an invalid URL!"; - return empty_string.Get(); + return base::EmptyString(); } bool GURL::operator<(const GURL& other) const { @@ -178,7 +176,7 @@ bool GURL::operator>(const GURL& other) const { } // Note: code duplicated below (it's inconvenient to use a template here). -GURL GURL::Resolve(const std::string& relative) const { +GURL GURL::Resolve(base::StringPiece relative) const { // Not allowed for invalid URLs. if (!is_valid_) return GURL(); @@ -204,7 +202,7 @@ GURL GURL::Resolve(const std::string& relative) const { } // Note: code duplicated above (it's inconvenient to use a template here). -GURL GURL::Resolve(const base::string16& relative) const { +GURL GURL::Resolve(base::StringPiece16 relative) const { // Not allowed for invalid URLs. if (!is_valid_) return GURL(); diff --git a/chromium/url/gurl.h b/chromium/url/gurl.h index 32ef5de756d..58ab96dc68a 100644 --- a/chromium/url/gurl.h +++ b/chromium/url/gurl.h @@ -151,8 +151,8 @@ class URL_EXPORT GURL { // // It is an error to resolve a URL relative to an invalid URL. The result // will be the empty URL. - GURL Resolve(const std::string& relative) const; - GURL Resolve(const base::string16& relative) const; + GURL Resolve(base::StringPiece relative) const; + GURL Resolve(base::StringPiece16 relative) const; // Creates a new GURL by replacing the current URL's components with the // supplied versions. See the Replacements class in url_canon.h for more. diff --git a/chromium/url/origin.cc b/chromium/url/origin.cc index 3d5e0c519d3..f8c664c932f 100644 --- a/chromium/url/origin.cc +++ b/chromium/url/origin.cc @@ -17,7 +17,7 @@ namespace url { -Origin::Origin() : unique_(true) {} +Origin::Origin() {} Origin Origin::Create(const GURL& url) { if (!url.is_valid() || (!url.IsStandard() && !url.SchemeIsBlob())) @@ -43,15 +43,36 @@ Origin Origin::Create(const GURL& url) { return Origin(std::move(tuple)); } -Origin::Origin(SchemeHostPort tuple) - : tuple_(std::move(tuple)), unique_(false) { - DCHECK(!tuple_.IsInvalid()); +// Note: this is very similar to Create(const GURL&), but opaque origins are +// created with CreateUniqueOpaque() rather than the default constructor. +Origin Origin::CreateCanonical(const GURL& url) { + if (!url.is_valid() || (!url.IsStandard() && !url.SchemeIsBlob())) + return CreateUniqueOpaque(); + + SchemeHostPort tuple; + + if (url.SchemeIsFileSystem()) { + tuple = SchemeHostPort(*url.inner_url()); + } else if (url.SchemeIsBlob()) { + // If we're dealing with a 'blob:' URL, https://url.spec.whatwg.org/#origin + // defines the origin as the origin of the URL which results from parsing + // the "path", which boils down to everything after the scheme. GURL's + // 'GetContent()' gives us exactly that. + tuple = SchemeHostPort(GURL(url.GetContent())); + } else { + tuple = SchemeHostPort(url); + } + + if (tuple.IsInvalid()) + return CreateUniqueOpaque(); + + return Origin(std::move(tuple)); } -Origin::Origin(const Origin&) = default; -Origin& Origin::operator=(const Origin&) = default; -Origin::Origin(Origin&&) = default; -Origin& Origin::operator=(Origin&&) = default; +Origin::Origin(const Origin& other) = default; +Origin& Origin::operator=(const Origin& other) = default; +Origin::Origin(Origin&& other) = default; +Origin& Origin::operator=(Origin&& other) = default; Origin::~Origin() = default; @@ -94,24 +115,30 @@ GURL Origin::GetURL() const { if (scheme() == kFileScheme) return GURL("file:///"); - GURL tuple_url(tuple_.GetURL()); - - return tuple_url; + return tuple_.GetURL(); } bool Origin::IsSameOriginWith(const Origin& other) const { - if (unique_ || other.unique_) - return false; - - return tuple_.Equals(other.tuple_); + return tuple_.Equals(other.tuple_) && + (!unique() || (nonce_ && nonce_ == other.nonce_)); } bool Origin::DomainIs(base::StringPiece canonical_domain) const { - return !unique_ && url::DomainIs(tuple_.host(), canonical_domain); + return !unique() && url::DomainIs(tuple_.host(), canonical_domain); } bool Origin::operator<(const Origin& other) const { - return tuple_ < other.tuple_; + return std::tie(tuple_, nonce_) < std::tie(other.tuple_, other.nonce_); +} + +Origin Origin::CreateUniqueOpaque() { + return Origin(ConstructAsOpaque::kTag); +} + +Origin::Origin(ConstructAsOpaque) : nonce_(base::UnguessableToken::Create()) {} + +Origin::Origin(SchemeHostPort tuple) : tuple_(std::move(tuple)) { + DCHECK(!tuple_.IsInvalid()); } std::ostream& operator<<(std::ostream& out, const url::Origin& origin) { diff --git a/chromium/url/origin.h b/chromium/url/origin.h index aee680d45bc..75807359d77 100644 --- a/chromium/url/origin.h +++ b/chromium/url/origin.h @@ -10,8 +10,11 @@ #include <string> #include "base/debug/alias.h" +#include "base/optional.h" #include "base/strings/string16.h" #include "base/strings/string_piece.h" +#include "base/strings/string_util.h" +#include "base/unguessable_token.h" #include "url/scheme_host_port.h" #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h" @@ -22,7 +25,10 @@ class GURL; namespace url { -// An Origin is a tuple of (scheme, host, port), as described in RFC 6454. +// Per https://html.spec.whatwg.org/multipage/origin.html#origin, an origin is +// either: +// - a tuple origin of (scheme, host, port) as described in RFC 6454. +// - an opaque origin with an internal value // // TL;DR: If you need to make a security-relevant decision, use 'url::Origin'. // If you only need to extract the bits of a URL which are relevant for a @@ -30,31 +36,48 @@ namespace url { // // STL;SDR: If you aren't making actual network connections, use 'url::Origin'. // -// 'Origin', like 'SchemeHostPort', is composed of a tuple of (scheme, host, -// port), but contains a number of additional concepts which make it appropriate -// for use as a security boundary and access control mechanism between contexts. -// // This class ought to be used when code needs to determine if two resources // are "same-origin", and when a canonical serialization of an origin is -// required. Note that some origins are "unique", meaning that they are not -// same-origin with any other origin (including themselves). +// required. Note that the canonical serialization of an origin *must not* be +// used to determine if two resources are same-origin. +// +// A tuple origin, like 'SchemeHostPort', is composed of a tuple of (scheme, +// host, port), but contains a number of additional concepts which make it +// appropriate for use as a security boundary and access control mechanism +// between contexts. Two tuple origins are same-origin if the tuples are equal. +// A tuple origin may also be re-created from its serialization. +// +// An opaque origin is cross-origin to any origin, including itself and copies +// of itself. Unlike tuple origins, an opaque origin cannot be re-created from +// its serialization, which is always the string "null". +// +// TODO(https://crbug.com/768460): work is in progress to associate an internal +// globally unique identifier with an opaque origin: completing this work will +// allow a copy of an opaque origin to be same-origin to the original instance +// of that opaque origin. +// +// IMPORTANT: Since opaque origins always serialize as the string "null", it is +// *never* safe to use the serialization for security checks! +// +// A tuple origin and an opaque origin are never same-origin. // // There are a few subtleties to note: // -// * Invalid and non-standard GURLs are parsed as unique origins. This includes +// * A default constructed Origin is opaque, but unlike the spec definition, has +// no associated identifier. A default constructed Origin is cross-origin to +// every other Origin object. +// +// * Invalid and non-standard GURLs are parsed as opaque origins. This includes // non-hierarchical URLs like 'data:text/html,...' and 'javascript:alert(1)'. // // * GURLs with schemes of 'filesystem' or 'blob' parse the origin out of the // internals of the URL. That is, 'filesystem:https://example.com/temporary/f' // is parsed as ('https', 'example.com', 443). // -// * Unique origins all serialize to the string "null"; this means that the -// serializations of two unique origins are identical to each other, though -// the origins themselves are not "the same". This means that origins' -// serializations must not be relied upon for security checks. -// // * GURLs with a 'file' scheme are tricky. They are parsed as ('file', '', 0), // but their behavior may differ from embedder to embedder. +// TODO(dcheng): This behavior is not consistent with Blink's notion of file +// URLs, which always creates an opaque origin. // // * The host component of an IPv6 address includes brackets, just like the URL // representation. @@ -78,16 +101,20 @@ namespace url { // } class URL_EXPORT Origin { public: - // Creates a unique Origin. + // Creates an opaque and always unique Origin. The returned Origin is + // always cross-origin to any Origin, including itself. Origin(); // Creates an Origin from |url|, as described at // https://url.spec.whatwg.org/#origin, with the following additions: // - // 1. If |url| is invalid or non-standard, a unique Origin is constructed. + // 1. If |url| is invalid or non-standard, an opaque Origin is constructed. // 2. 'filesystem' URLs behave as 'blob' URLs (that is, the origin is parsed // out of everything in the URL which follows the scheme). // 3. 'file' URLs all parse as ("file", "", 0). + // + // If this method returns an opaque Origin, the returned Origin will be + // cross-origin to any Origin, including itself. static Origin Create(const GURL& url); // Copyable and movable. @@ -97,8 +124,8 @@ class URL_EXPORT Origin { Origin& operator=(Origin&&); // Creates an Origin from a |scheme|, |host|, and |port|. All the parameters - // must be valid and canonicalized. Do not use this method to create unique - // origins. Use Origin() for that. + // must be valid and canonicalized. Do not use this method to create opaque + // origins. Use Origin() or Origin::CreateOpaque() for that. // // This constructor should be used in order to pass 'Origin' objects back and // forth over IPC (as transitioning through GURL would risk potentially @@ -119,12 +146,17 @@ class URL_EXPORT Origin { ~Origin(); - // For unique origins, these return ("", "", 0). - const std::string& scheme() const { return tuple_.scheme(); } - const std::string& host() const { return tuple_.host(); } - uint16_t port() const { return tuple_.port(); } + // For opaque origins, these return ("", "", 0). + const std::string& scheme() const { + return !unique() ? tuple_.scheme() : base::EmptyString(); + } + const std::string& host() const { + return !unique() ? tuple_.host() : base::EmptyString(); + } + uint16_t port() const { return !unique() ? tuple_.port() : 0; } - bool unique() const { return unique_; } + // TODO(dcheng): Rename this to opaque(). + bool unique() const { return tuple_.IsInvalid(); } // An ASCII serialization of the Origin as per Section 6.2 of RFC 6454, with // the addition that all Origins with a 'file' scheme serialize to "file://". @@ -157,11 +189,49 @@ class URL_EXPORT Origin { bool operator<(const Origin& other) const; private: - // |tuple| must be valid, implying that the created Origin is never unique. + friend class OriginTest; + + // Creates a new opaque origin that is guaranteed to be cross-origin to all + // currently existing origins. An origin created by this method retains its + // identity across copies. Copies are guaranteed to be same-origin to each + // other, e.g. + // + // url::Origin a = Origin::CreateUniqueOpaque(); + // url::Origin b = Origin::CreateUniqueOpaque(); + // url::Origin c = a; + // url::Origin d = b; + // + // |a| and |c| are same-origin, since |c| was copied from |a|. |b| and |d| are + // same-origin as well, since |d| was copied from |b|. All other combinations + // of origins are considered cross-origin, e.g. |a| is cross-origin to |b| and + // |d|, |b| is cross-origin to |a| and |c|, |c| is cross-origin to |b| and + // |d|, and |d| is cross-origin to |a| and |c|. + // + // Note that this is private internal helper, since relatively few locations + // should be responsible for deriving a canonical origin from a GURL. + static Origin CreateUniqueOpaque(); + + // Similar to Create(const GURL&). However, if the returned Origin is an + // opaque origin, it will be created with CreateUniqueOpaque(), have an + // associated identity, and be considered same-origin to copies of itself. + static Origin CreateCanonical(const GURL&); + + enum class ConstructAsOpaque { kTag }; + explicit Origin(ConstructAsOpaque); + + // |tuple| must be valid, implying that the created Origin is never an opaque + // origin. explicit Origin(SchemeHostPort tuple); + // Helpers for managing union for destroy, copy, and move. + // The tuple is used for tuple origins (e.g. https://example.com:80). This + // is expected to be the common case. |IsInvalid()| will be true for opaque + // origins. SchemeHostPort tuple_; - bool unique_; + + // The nonce is used for maintaining identity of an opaque origin. This + // nonce is preserved when an opaque origin is copied or moved. + base::Optional<base::UnguessableToken> nonce_; }; URL_EXPORT std::ostream& operator<<(std::ostream& out, const Origin& origin); diff --git a/chromium/url/origin_unittest.cc b/chromium/url/origin_unittest.cc index 0f6feaaccca..bfa095ac27a 100644 --- a/chromium/url/origin_unittest.cc +++ b/chromium/url/origin_unittest.cc @@ -11,12 +11,12 @@ #include "url/gurl.h" #include "url/origin.h" -namespace { +namespace url { void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) { EXPECT_EQ(a, b); - const url::Parsed& a_parsed = a.parsed_for_possibly_invalid_spec(); - const url::Parsed& b_parsed = b.parsed_for_possibly_invalid_spec(); + const Parsed& a_parsed = a.parsed_for_possibly_invalid_spec(); + const Parsed& b_parsed = b.parsed_for_possibly_invalid_spec(); EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin); EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len); EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin); @@ -35,14 +35,37 @@ void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) { EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len); } -TEST(OriginTest, UniqueOriginComparison) { - url::Origin unique_origin; +class OriginTest : public ::testing::Test { + protected: + Origin CreateUniqueOpaque() { return Origin::CreateUniqueOpaque(); } + + Origin CreateCanonical(const GURL& url) { + return Origin::CreateCanonical(url); + } +}; + +TEST_F(OriginTest, OpaqueOriginComparison) { + // A default constructed Origin should be cross origin to everything, + // including itself. + Origin unique_origin; EXPECT_EQ("", unique_origin.scheme()); EXPECT_EQ("", unique_origin.host()); EXPECT_EQ(0, unique_origin.port()); EXPECT_TRUE(unique_origin.unique()); EXPECT_FALSE(unique_origin.IsSameOriginWith(unique_origin)); + // An opaque Origin with a nonce should be same origin to itself though. + Origin opaque_origin = CreateUniqueOpaque(); + EXPECT_EQ("", opaque_origin.scheme()); + EXPECT_EQ("", opaque_origin.host()); + EXPECT_EQ(0, opaque_origin.port()); + EXPECT_TRUE(opaque_origin.unique()); + EXPECT_TRUE(opaque_origin.IsSameOriginWith(opaque_origin)); + + // The default constructed Origin and the opaque Origin should always be + // cross origin to each other. + EXPECT_FALSE(opaque_origin.IsSameOriginWith(unique_origin)); + const char* const urls[] = {"data:text/html,Hello!", "javascript:alert(1)", "about:blank", @@ -53,20 +76,62 @@ TEST(OriginTest, UniqueOriginComparison) { for (auto* test_url : urls) { SCOPED_TRACE(test_url); GURL url(test_url); - url::Origin origin = url::Origin::Create(url); - EXPECT_EQ("", origin.scheme()); - EXPECT_EQ("", origin.host()); - EXPECT_EQ(0, origin.port()); - EXPECT_TRUE(origin.unique()); - EXPECT_FALSE(origin.IsSameOriginWith(origin)); - EXPECT_FALSE(unique_origin.IsSameOriginWith(origin)); - EXPECT_FALSE(origin.IsSameOriginWith(unique_origin)); - ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL()); + // no nonce mode of opaque origins + { + Origin origin = Origin::Create(url); + EXPECT_EQ("", origin.scheme()); + EXPECT_EQ("", origin.host()); + EXPECT_EQ(0, origin.port()); + EXPECT_TRUE(origin.unique()); + // An opaque Origin with no nonce is always cross-origin to itself. + EXPECT_FALSE(origin.IsSameOriginWith(origin)); + // A copy of |origin| should be cross-origin as well. + Origin origin_copy = origin; + EXPECT_EQ("", origin_copy.scheme()); + EXPECT_EQ("", origin_copy.host()); + EXPECT_EQ(0, origin_copy.port()); + EXPECT_TRUE(origin_copy.unique()); + EXPECT_FALSE(origin.IsSameOriginWith(origin_copy)); + // And it should always be cross-origin to another opaque Origin. + EXPECT_FALSE(origin.IsSameOriginWith(opaque_origin)); + // As well as the default constructed Origin. + EXPECT_FALSE(origin.IsSameOriginWith(unique_origin)); + // Re-creating from the URL should also be cross-origin. + EXPECT_FALSE(origin.IsSameOriginWith(Origin::Create(url))); + + ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL()); + } + + // opaque origins with a nonce + { + Origin origin = CreateCanonical(url); + EXPECT_EQ("", origin.scheme()); + EXPECT_EQ("", origin.host()); + EXPECT_EQ(0, origin.port()); + EXPECT_TRUE(origin.unique()); + // An opaque Origin with a nonce is always same-origin to itself. + EXPECT_TRUE(origin.IsSameOriginWith(origin)); + // A copy of |origin| should be same-origin as well. + Origin origin_copy = origin; + EXPECT_EQ("", origin_copy.scheme()); + EXPECT_EQ("", origin_copy.host()); + EXPECT_EQ(0, origin_copy.port()); + EXPECT_TRUE(origin_copy.unique()); + EXPECT_TRUE(origin.IsSameOriginWith(origin_copy)); + // But it should always be cross origin to another opaque Origin. + EXPECT_FALSE(origin.IsSameOriginWith(opaque_origin)); + // As well as the default constructed Origin. + EXPECT_FALSE(origin.IsSameOriginWith(unique_origin)); + // Re-creating from the URL should also be cross origin. + EXPECT_FALSE(origin.IsSameOriginWith(CreateCanonical(url))); + + ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL()); + } } } -TEST(OriginTest, ConstructFromTuple) { +TEST_F(OriginTest, ConstructFromTuple) { struct TestCases { const char* const scheme; const char* const host; @@ -82,7 +147,7 @@ TEST(OriginTest, ConstructFromTuple) { scope_message << test_case.scheme << "://" << test_case.host << ":" << test_case.port; SCOPED_TRACE(scope_message); - url::Origin origin = url::Origin::CreateFromNormalizedTuple( + Origin origin = Origin::CreateFromNormalizedTuple( test_case.scheme, test_case.host, test_case.port); EXPECT_EQ(test_case.scheme, origin.scheme()); @@ -91,9 +156,9 @@ TEST(OriginTest, ConstructFromTuple) { } } -TEST(OriginTest, ConstructFromGURL) { - url::Origin different_origin = - url::Origin::Create(GURL("https://not-in-the-list.test/")); +TEST_F(OriginTest, ConstructFromGURL) { + Origin different_origin = + Origin::Create(GURL("https://not-in-the-list.test/")); struct TestCases { const char* const url; @@ -146,7 +211,7 @@ TEST(OriginTest, ConstructFromGURL) { SCOPED_TRACE(test_case.url); GURL url(test_case.url); EXPECT_TRUE(url.is_valid()); - url::Origin origin = url::Origin::Create(url); + Origin origin = Origin::Create(url); EXPECT_EQ(test_case.expected_scheme, origin.scheme()); EXPECT_EQ(test_case.expected_host, origin.host()); EXPECT_EQ(test_case.expected_port, origin.port()); @@ -159,7 +224,7 @@ TEST(OriginTest, ConstructFromGURL) { } } -TEST(OriginTest, Serialization) { +TEST_F(OriginTest, Serialization) { struct TestCases { const char* const url; const char* const expected; @@ -179,7 +244,7 @@ TEST(OriginTest, Serialization) { SCOPED_TRACE(test_case.url); GURL url(test_case.url); EXPECT_TRUE(url.is_valid()); - url::Origin origin = url::Origin::Create(url); + Origin origin = Origin::Create(url); std::string serialized = origin.Serialize(); ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL()); @@ -192,7 +257,7 @@ TEST(OriginTest, Serialization) { } } -TEST(OriginTest, Comparison) { +TEST_F(OriginTest, Comparison) { // These URLs are arranged in increasing order: const char* const urls[] = { "data:uniqueness", @@ -206,19 +271,44 @@ TEST(OriginTest, Comparison) { "https://b:81", }; - for (size_t i = 0; i < arraysize(urls); i++) { - GURL current_url(urls[i]); - url::Origin current = url::Origin::Create(current_url); - for (size_t j = i; j < arraysize(urls); j++) { - GURL compare_url(urls[j]); - url::Origin to_compare = url::Origin::Create(compare_url); - EXPECT_EQ(i < j, current < to_compare) << i << " < " << j; - EXPECT_EQ(j < i, to_compare < current) << j << " < " << i; + { + // Unlike below, pre-creation here isn't necessary, since the old creation + // path doesn't populate a nonce. It makes for easier copy and paste though. + std::vector<Origin> origins; + for (const auto* test_url : urls) + origins.push_back(CreateCanonical(GURL(test_url))); + + for (size_t i = 0; i < origins.size(); i++) { + const Origin& current = origins[i]; + for (size_t j = i; j < origins.size(); j++) { + const Origin& to_compare = origins[j]; + EXPECT_EQ(i < j, current < to_compare) << i << " < " << j; + EXPECT_EQ(j < i, to_compare < current) << j << " < " << i; + } + } + } + + // Validate the comparison logic still works when creating a canonical origin, + // when any created opaque origins contain a nonce. + { + // Pre-create the origins, as the internal nonce for unique origins changes + // with each freshly-constructed Origin (that's not copied). + std::vector<Origin> origins; + for (const auto* test_url : urls) + origins.push_back(CreateCanonical(GURL(test_url))); + + for (size_t i = 0; i < origins.size(); i++) { + const Origin& current = origins[i]; + for (size_t j = i; j < origins.size(); j++) { + const Origin& to_compare = origins[j]; + EXPECT_EQ(i < j, current < to_compare) << i << " < " << j; + EXPECT_EQ(j < i, to_compare < current) << j << " < " << i; + } } } } -TEST(OriginTest, UnsafelyCreate) { +TEST_F(OriginTest, UnsafelyCreate) { struct TestCase { const char* scheme; const char* host; @@ -235,7 +325,7 @@ TEST(OriginTest, UnsafelyCreate) { for (const auto& test : cases) { SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":" << test.port); - url::Origin origin = url::Origin::UnsafelyCreateOriginWithoutNormalization( + Origin origin = Origin::UnsafelyCreateOriginWithoutNormalization( test.scheme, test.host, test.port); EXPECT_EQ(test.scheme, origin.scheme()); EXPECT_EQ(test.host, origin.host()); @@ -247,7 +337,7 @@ TEST(OriginTest, UnsafelyCreate) { } } -TEST(OriginTest, UnsafelyCreateUniqueOnInvalidInput) { +TEST_F(OriginTest, UnsafelyCreateUniqueOnInvalidInput) { struct TestCases { const char* scheme; const char* host; @@ -272,7 +362,7 @@ TEST(OriginTest, UnsafelyCreateUniqueOnInvalidInput) { for (const auto& test : cases) { SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":" << test.port); - url::Origin origin = url::Origin::UnsafelyCreateOriginWithoutNormalization( + Origin origin = Origin::UnsafelyCreateOriginWithoutNormalization( test.scheme, test.host, test.port); EXPECT_EQ("", origin.scheme()); EXPECT_EQ("", origin.host()); @@ -284,7 +374,7 @@ TEST(OriginTest, UnsafelyCreateUniqueOnInvalidInput) { } } -TEST(OriginTest, UnsafelyCreateUniqueViaEmbeddedNulls) { +TEST_F(OriginTest, UnsafelyCreateUniqueViaEmbeddedNulls) { struct TestCases { const char* scheme; size_t scheme_length; @@ -301,7 +391,7 @@ TEST(OriginTest, UnsafelyCreateUniqueViaEmbeddedNulls) { for (const auto& test : cases) { SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":" << test.port); - url::Origin origin = url::Origin::UnsafelyCreateOriginWithoutNormalization( + Origin origin = Origin::UnsafelyCreateOriginWithoutNormalization( std::string(test.scheme, test.scheme_length), std::string(test.host, test.host_length), test.port); EXPECT_EQ("", origin.scheme()); @@ -314,7 +404,7 @@ TEST(OriginTest, UnsafelyCreateUniqueViaEmbeddedNulls) { } } -TEST(OriginTest, DomainIs) { +TEST_F(OriginTest, DomainIs) { const struct { const char* url; const char* lower_ascii_domain; @@ -355,7 +445,7 @@ TEST(OriginTest, DomainIs) { << ")"); GURL url(test_case.url); ASSERT_TRUE(url.is_valid()); - url::Origin origin = url::Origin::Create(url); + Origin origin = Origin::Create(url); EXPECT_EQ(test_case.expected_domain_is, origin.DomainIs(test_case.lower_ascii_domain)); @@ -364,17 +454,17 @@ TEST(OriginTest, DomainIs) { // If the URL is invalid, DomainIs returns false. GURL invalid_url("google.com"); ASSERT_FALSE(invalid_url.is_valid()); - EXPECT_FALSE(url::Origin::Create(invalid_url).DomainIs("google.com")); + EXPECT_FALSE(Origin::Create(invalid_url).DomainIs("google.com")); // Unique origins. - EXPECT_FALSE(url::Origin().DomainIs("")); - EXPECT_FALSE(url::Origin().DomainIs("com")); + EXPECT_FALSE(Origin().DomainIs("")); + EXPECT_FALSE(Origin().DomainIs("com")); } -TEST(OriginTest, DebugAlias) { - url::Origin origin1 = url::Origin::Create(GURL("https://foo.com/bar")); +TEST_F(OriginTest, DebugAlias) { + Origin origin1 = Origin::Create(GURL("https://foo.com/bar")); DEBUG_ALIAS_FOR_ORIGIN(origin1_debug_alias, origin1); EXPECT_STREQ("https://foo.com", origin1_debug_alias); } -} // namespace +} // namespace url diff --git a/chromium/url/url_canon_icu.cc b/chromium/url/url_canon_icu.cc index 254d7e44bd1..02cc2b4ff4f 100644 --- a/chromium/url/url_canon_icu.cc +++ b/chromium/url/url_canon_icu.cc @@ -2,17 +2,15 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -// ICU integration functions. +// ICU-based character set converter. #include <stdint.h> #include <stdlib.h> #include <string.h> -#include "base/lazy_instance.h" #include "base/logging.h" #include "third_party/icu/source/common/unicode/ucnv.h" #include "third_party/icu/source/common/unicode/ucnv_cb.h" -#include "third_party/icu/source/common/unicode/uidna.h" #include "third_party/icu/source/common/unicode/utypes.h" #include "url/url_canon_icu.h" #include "url/url_canon_internal.h" // for _itoa_s @@ -74,46 +72,6 @@ class AppendHandlerInstaller { const void* old_context_; }; -// A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to -// a UTS46/IDNA 2008 handling object opened with uidna_openUTS46(). -// -// We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned -// code points allowed) to IDNA 2008 with -// the backward compatibility in mind. What it does: -// -// 1. Use the up-to-date Unicode data. -// 2. Define a case folding/mapping with the up-to-date Unicode data as -// in IDNA 2003. -// 3. Use transitional mechanism for 4 deviation characters (sharp-s, -// final sigma, ZWJ and ZWNJ) for now. -// 4. Continue to allow symbols and punctuations. -// 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules. -// 6. Do not apply STD3 rules -// 7. Do not allow unassigned code points. -// -// It also closely matches what IE 10 does except for the BiDi check ( -// http://goo.gl/3XBhqw ). -// See http://http://unicode.org/reports/tr46/ and references therein -// for more details. -struct UIDNAWrapper { - UIDNAWrapper() { - UErrorCode err = U_ZERO_ERROR; - // TODO(jungshik): Change options as different parties (browsers, - // registrars, search engines) converge toward a consensus. - value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err); - if (U_FAILURE(err)) { - CHECK(false) << "failed to open UTS46 data with error: " - << u_errorName(err) - << ". If you see this error message in a test environment " - << "your test environment likely lacks the required data " - << "tables for libicu. See https://crbug.com/778929."; - value = NULL; - } - } - - UIDNA* value; -}; - } // namespace ICUCharsetConverter::ICUCharsetConverter(UConverter* converter) @@ -149,46 +107,4 @@ void ICUCharsetConverter::ConvertFromUTF16(const base::char16* input, } while (true); } -static base::LazyInstance<UIDNAWrapper>::Leaky - g_uidna = LAZY_INSTANCE_INITIALIZER; - -// Converts the Unicode input representing a hostname to ASCII using IDN rules. -// The output must be ASCII, but is represented as wide characters. -// -// On success, the output will be filled with the ASCII host name and it will -// return true. Unlike most other canonicalization functions, this assumes that -// the output is empty. The beginning of the host will be at offset 0, and -// the length of the output will be set to the length of the new host name. -// -// On error, this will return false. The output in this case is undefined. -// TODO(jungshik): use UTF-8/ASCII version of nameToASCII. -// Change the function signature and callers accordingly to avoid unnecessary -// conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII -// version with StringByteSink. That way, we can avoid C wrappers and additional -// string conversion. -bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) { - DCHECK(output->length() == 0); // Output buffer is assumed empty. - - UIDNA* uidna = g_uidna.Get().value; - DCHECK(uidna != NULL); - while (true) { - UErrorCode err = U_ZERO_ERROR; - UIDNAInfo info = UIDNA_INFO_INITIALIZER; - int output_length = uidna_nameToASCII(uidna, src, src_len, output->data(), - output->capacity(), &info, &err); - if (U_SUCCESS(err) && info.errors == 0) { - output->set_length(output_length); - return true; - } - - // TODO(jungshik): Look at info.errors to handle them case-by-case basis - // if necessary. - if (err != U_BUFFER_OVERFLOW_ERROR || info.errors != 0) - return false; // Unknown error, give up. - - // Not enough room in our buffer, expand. - output->Resize(output_length); - } -} - } // namespace url diff --git a/chromium/url/url_idna_icu.cc b/chromium/url/url_idna_icu.cc new file mode 100644 index 00000000000..601736ee93c --- /dev/null +++ b/chromium/url/url_idna_icu.cc @@ -0,0 +1,106 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// ICU-based IDNA converter. + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "base/lazy_instance.h" +#include "base/logging.h" +#include "third_party/icu/source/common/unicode/uidna.h" +#include "third_party/icu/source/common/unicode/utypes.h" +#include "url/url_canon_icu.h" +#include "url/url_canon_internal.h" // for _itoa_s + +namespace url { + +namespace { + +// A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to +// a UTS46/IDNA 2008 handling object opened with uidna_openUTS46(). +// +// We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned +// code points allowed) to IDNA 2008 with +// the backward compatibility in mind. What it does: +// +// 1. Use the up-to-date Unicode data. +// 2. Define a case folding/mapping with the up-to-date Unicode data as +// in IDNA 2003. +// 3. Use transitional mechanism for 4 deviation characters (sharp-s, +// final sigma, ZWJ and ZWNJ) for now. +// 4. Continue to allow symbols and punctuations. +// 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules. +// 6. Do not apply STD3 rules +// 7. Do not allow unassigned code points. +// +// It also closely matches what IE 10 does except for the BiDi check ( +// http://goo.gl/3XBhqw ). +// See http://http://unicode.org/reports/tr46/ and references therein +// for more details. +struct UIDNAWrapper { + UIDNAWrapper() { + UErrorCode err = U_ZERO_ERROR; + // TODO(jungshik): Change options as different parties (browsers, + // registrars, search engines) converge toward a consensus. + value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err); + if (U_FAILURE(err)) { + CHECK(false) << "failed to open UTS46 data with error: " + << u_errorName(err) + << ". If you see this error message in a test environment " + << "your test environment likely lacks the required data " + << "tables for libicu. See https://crbug.com/778929."; + value = NULL; + } + } + + UIDNA* value; +}; + +} // namespace + +static base::LazyInstance<UIDNAWrapper>::Leaky g_uidna = + LAZY_INSTANCE_INITIALIZER; + +// Converts the Unicode input representing a hostname to ASCII using IDN rules. +// The output must be ASCII, but is represented as wide characters. +// +// On success, the output will be filled with the ASCII host name and it will +// return true. Unlike most other canonicalization functions, this assumes that +// the output is empty. The beginning of the host will be at offset 0, and +// the length of the output will be set to the length of the new host name. +// +// On error, this will return false. The output in this case is undefined. +// TODO(jungshik): use UTF-8/ASCII version of nameToASCII. +// Change the function signature and callers accordingly to avoid unnecessary +// conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII +// version with StringByteSink. That way, we can avoid C wrappers and additional +// string conversion. +bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) { + DCHECK(output->length() == 0); // Output buffer is assumed empty. + + UIDNA* uidna = g_uidna.Get().value; + DCHECK(uidna != NULL); + while (true) { + UErrorCode err = U_ZERO_ERROR; + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + int output_length = uidna_nameToASCII(uidna, src, src_len, output->data(), + output->capacity(), &info, &err); + if (U_SUCCESS(err) && info.errors == 0) { + output->set_length(output_length); + return true; + } + + // TODO(jungshik): Look at info.errors to handle them case-by-case basis + // if necessary. + if (err != U_BUFFER_OVERFLOW_ERROR || info.errors != 0) + return false; // Unknown error, give up. + + // Not enough room in our buffer, expand. + output->Resize(output_length); + } +} + +} // namespace url diff --git a/chromium/url/url_canon_icu_alternatives_android.cc b/chromium/url/url_idna_icu_alternatives_android.cc index daa0582604f..daa0582604f 100644 --- a/chromium/url/url_canon_icu_alternatives_android.cc +++ b/chromium/url/url_idna_icu_alternatives_android.cc diff --git a/chromium/url/url_canon_icu_alternatives_ios.mm b/chromium/url/url_idna_icu_alternatives_ios.mm index 66b844e8a81..66b844e8a81 100644 --- a/chromium/url/url_canon_icu_alternatives_ios.mm +++ b/chromium/url/url_idna_icu_alternatives_ios.mm diff --git a/chromium/url/url_util.cc b/chromium/url/url_util.cc index dd4e914b8d0..4bede9dd29c 100644 --- a/chromium/url/url_util.cc +++ b/chromium/url/url_util.cc @@ -808,6 +808,7 @@ DecodeURLResult DecodeURLEscapeSequences(const char* input, } } + int output_initial_length = output->length(); bool did_utf8_decode = false; bool did_isomorphic_decode = false; // Convert that 8-bit to UTF-16. It's not clear IE does this at all to @@ -829,21 +830,21 @@ DecodeURLResult DecodeURLEscapeSequences(const char* input, i = next_character; did_utf8_decode = true; } else { - // If there are any sequences that are not valid UTF-8, we keep - // invalid code points and promote to UTF-16. We copy all characters - // from the current position to the end of the identified sequence. - while (i < next_character) { - output->push_back(static_cast<unsigned char>(unescaped_chars.at(i))); - i++; - } - output->push_back(static_cast<unsigned char>(unescaped_chars.at(i))); + // If there are any sequences that are not valid UTF-8, we + // revert |output| changes, and promote any bytes to UTF-16. We + // copy all characters from the beginning to the end of the + // identified sequence. + output->set_length(output_initial_length); + did_utf8_decode = false; + for (int j = 0; j < unescaped_chars.length(); ++j) + output->push_back(static_cast<unsigned char>(unescaped_chars.at(j))); did_isomorphic_decode = true; + break; } } } - if (did_utf8_decode && did_isomorphic_decode) - return DecodeURLResult::kMixed; + DCHECK(!(did_utf8_decode && did_isomorphic_decode)); if (did_isomorphic_decode) return DecodeURLResult::kIsomorphic; if (did_utf8_decode) diff --git a/chromium/url/url_util.h b/chromium/url/url_util.h index 20b2344a121..32e7f0d9eac 100644 --- a/chromium/url/url_util.h +++ b/chromium/url/url_util.h @@ -258,12 +258,13 @@ enum class DecodeURLResult { // Did UTF-8 decode only. kUTF8, // Did byte to Unicode mapping only. + // https://infra.spec.whatwg.org/#isomorphic-decode kIsomorphic, - // Did both of UTF-8 decode and isomorphic decode. - kMixed, }; // Unescapes the given string using URL escaping rules. +// This function tries to decode non-ASCII characters in UTF-8 first, +// then in isomorphic encoding if UTF-8 decoding failed. URL_EXPORT DecodeURLResult DecodeURLEscapeSequences(const char* input, int length, CanonOutputW* output); diff --git a/chromium/url/url_util_unittest.cc b/chromium/url/url_util_unittest.cc index 526d63fbafa..65f3435e9d1 100644 --- a/chromium/url/url_util_unittest.cc +++ b/chromium/url/url_util_unittest.cc @@ -245,9 +245,10 @@ TEST_F(URLUtilTest, DecodeURLEscapeSequences) { // Test the error behavior for invalid UTF-8. { const char invalid_input[] = "%e4%a0%e5%a5%bd"; - const base::char16 invalid_expected[4] = {0x00e4, 0x00a0, 0x597d, 0}; + const base::char16 invalid_expected[6] = {0x00e4, 0x00a0, 0x00e5, + 0x00a5, 0x00bd, 0}; RawCanonOutputT<base::char16> invalid_output; - EXPECT_EQ(DecodeURLResult::kMixed, + EXPECT_EQ(DecodeURLResult::kIsomorphic, DecodeURLEscapeSequences(invalid_input, strlen(invalid_input), &invalid_output)); EXPECT_EQ(base::string16(invalid_expected), |