summaryrefslogtreecommitdiff
path: root/chromium/third_party/blink/renderer/modules/url_pattern
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/third_party/blink/renderer/modules/url_pattern')
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/BUILD.gn18
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/DEPS2
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/2dcf128c70be5016986fa5965a89eb839fd6cc3c1
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/345433f6443349a932caefebc1754a7da500a8851
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/43e53712d966badcd72516f9a9df30486173d8bc1
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/4eba2aa9b6632b032ad9affab48eed570c3a7bec1
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/60aff90a381901bfbb4fd3d1753a5a86878428211
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/97cccba9cd38cd5138376093447ea6382a4df2201
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/b0c2fea6b0fbc79ebcebf728cf1c76d4b35091211
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/d376eb4568f7c9ab01409838be90c31db1a9e7551
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/dc9e31b18d4686a0f8dec64b5602d5a426ab0f441
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/ecac354bd05b4e6328a498c43291ad2e129134fc1
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/idls.gni13
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.cc1064
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.h78
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.idl17
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_canon.cc464
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_canon.h83
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_component.cc390
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_component.h113
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_fuzzer.cc29
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_parser.cc453
-rw-r--r--chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_parser.h192
23 files changed, 2010 insertions, 916 deletions
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/BUILD.gn b/chromium/third_party/blink/renderer/modules/url_pattern/BUILD.gn
index 6bc2956e0c0..dd47096a003 100644
--- a/chromium/third_party/blink/renderer/modules/url_pattern/BUILD.gn
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/BUILD.gn
@@ -2,12 +2,19 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+import("//testing/libfuzzer/fuzzer_test.gni")
import("//third_party/blink/renderer/modules/modules.gni")
blink_modules_sources("url_pattern") {
sources = [
"url_pattern.cc",
"url_pattern.h",
+ "url_pattern_canon.cc",
+ "url_pattern_canon.h",
+ "url_pattern_component.cc",
+ "url_pattern_component.h",
+ "url_pattern_parser.cc",
+ "url_pattern_parser.h",
]
public_deps = [
@@ -15,3 +22,14 @@ blink_modules_sources("url_pattern") {
"//third_party/liburlpattern",
]
}
+
+if (use_libfuzzer) {
+ fuzzer_test("url_pattern_fuzzer") {
+ sources = [ "url_pattern_fuzzer.cc" ]
+ deps = [
+ "//third_party/blink/renderer/modules:modules",
+ "//third_party/blink/renderer/platform:blink_fuzzer_test_support",
+ ]
+ seed_corpus = "fuzzer_seed_corpus"
+ }
+}
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/DEPS b/chromium/third_party/blink/renderer/modules/url_pattern/DEPS
index f5a16143eb0..35dd35a70ba 100644
--- a/chromium/third_party/blink/renderer/modules/url_pattern/DEPS
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/DEPS
@@ -5,4 +5,6 @@
include_rules = [
"+base/strings/string_util.h",
"+third_party/liburlpattern",
+ "+url/url_canon.h",
+ "+url/url_util.h",
]
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/2dcf128c70be5016986fa5965a89eb839fd6cc3c b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/2dcf128c70be5016986fa5965a89eb839fd6cc3c
new file mode 100644
index 00000000000..6d1f1cad6fd
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/2dcf128c70be5016986fa5965a89eb839fd6cc3c
@@ -0,0 +1 @@
+https://example.com/count/([0-9]+)
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/345433f6443349a932caefebc1754a7da500a885 b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/345433f6443349a932caefebc1754a7da500a885
new file mode 100644
index 00000000000..9388cc3a9d0
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/345433f6443349a932caefebc1754a7da500a885
@@ -0,0 +1 @@
+https://example.com/foo
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/43e53712d966badcd72516f9a9df30486173d8bc b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/43e53712d966badcd72516f9a9df30486173d8bc
new file mode 100644
index 00000000000..ee9ccfb46eb
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/43e53712d966badcd72516f9a9df30486173d8bc
@@ -0,0 +1 @@
+https://example.com::port?/foo
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/4eba2aa9b6632b032ad9affab48eed570c3a7bec b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/4eba2aa9b6632b032ad9affab48eed570c3a7bec
new file mode 100644
index 00000000000..f71ae5546df
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/4eba2aa9b6632b032ad9affab48eed570c3a7bec
@@ -0,0 +1 @@
+https://:user::pass@example.com/foo
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/60aff90a381901bfbb4fd3d1753a5a8687842821 b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/60aff90a381901bfbb4fd3d1753a5a8687842821
new file mode 100644
index 00000000000..25978ff030a
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/60aff90a381901bfbb4fd3d1753a5a8687842821
@@ -0,0 +1 @@
+https://example.com/:product
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/97cccba9cd38cd5138376093447ea6382a4df220 b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/97cccba9cd38cd5138376093447ea6382a4df220
new file mode 100644
index 00000000000..581fd125184
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/97cccba9cd38cd5138376093447ea6382a4df220
@@ -0,0 +1 @@
+http{s}?://example.com/foo
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/b0c2fea6b0fbc79ebcebf728cf1c76d4b3509121 b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/b0c2fea6b0fbc79ebcebf728cf1c76d4b3509121
new file mode 100644
index 00000000000..f9d3f896b75
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/b0c2fea6b0fbc79ebcebf728cf1c76d4b3509121
@@ -0,0 +1 @@
+https://example.com/count/:value([0-9]+)
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/d376eb4568f7c9ab01409838be90c31db1a9e755 b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/d376eb4568f7c9ab01409838be90c31db1a9e755
new file mode 100644
index 00000000000..db759a3cbf4
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/d376eb4568f7c9ab01409838be90c31db1a9e755
@@ -0,0 +1 @@
+https://{:sub.}?example.com/:product?/index.html
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/dc9e31b18d4686a0f8dec64b5602d5a426ab0f44 b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/dc9e31b18d4686a0f8dec64b5602d5a426ab0f44
new file mode 100644
index 00000000000..f9354e7b6d0
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/dc9e31b18d4686a0f8dec64b5602d5a426ab0f44
@@ -0,0 +1 @@
+https://example.com/foo?bar#baz
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/ecac354bd05b4e6328a498c43291ad2e129134fc b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/ecac354bd05b4e6328a498c43291ad2e129134fc
new file mode 100644
index 00000000000..7d615d8c3b2
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/fuzzer_seed_corpus/ecac354bd05b4e6328a498c43291ad2e129134fc
@@ -0,0 +1 @@
+https://{*.}example.com/foo
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/idls.gni b/chromium/third_party/blink/renderer/modules/url_pattern/idls.gni
deleted file mode 100644
index 9f0ad7e912f..00000000000
--- a/chromium/third_party/blink/renderer/modules/url_pattern/idls.gni
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2020 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-modules_idl_files = [ "url_pattern.idl" ]
-
-modules_dictionary_idl_files = [
- "url_pattern_component_result.idl",
- "url_pattern_init.idl",
- "url_pattern_result.idl",
-]
-
-modules_dependency_idl_files = []
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.cc b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.cc
index e0e2f943dfb..3abf9254028 100644
--- a/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.cc
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.cc
@@ -6,10 +6,13 @@
#include "base/strings/string_util.h"
#include "third_party/blink/renderer/bindings/core/v8/script_regexp.h"
-#include "third_party/blink/renderer/bindings/modules/v8/usv_string_or_url_pattern_init.h"
#include "third_party/blink/renderer/bindings/modules/v8/v8_union_urlpatterninit_usvstring.h"
#include "third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_component_result.h"
+#include "third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_init.h"
#include "third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_result.h"
+#include "third_party/blink/renderer/modules/url_pattern/url_pattern_canon.h"
+#include "third_party/blink/renderer/modules/url_pattern/url_pattern_component.h"
+#include "third_party/blink/renderer/modules/url_pattern/url_pattern_parser.h"
#include "third_party/blink/renderer/platform/bindings/exception_state.h"
#include "third_party/blink/renderer/platform/weborigin/kurl.h"
#include "third_party/blink/renderer/platform/weborigin/security_origin.h"
@@ -17,88 +20,15 @@
#include "third_party/blink/renderer/platform/wtf/text/string_utf8_adaptor.h"
#include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
#include "third_party/liburlpattern/pattern.h"
+#include "third_party/liburlpattern/tokenize.h"
namespace blink {
-// A struct representing all the information needed to match a particular
-// component of a URL.
-class URLPattern::Component final
- : public GarbageCollected<URLPattern::Component> {
- public:
- bool Match(StringView input, Vector<String>* group_list) const {
- return regexp->Match(input, /*start_from=*/0, /*match_length=*/nullptr,
- group_list) == 0;
- }
-
- void Trace(Visitor* visitor) const { visitor->Trace(regexp); }
-
- // The parsed pattern.
- liburlpattern::Pattern pattern;
-
- // The pattern compiled down to a js regular expression.
- Member<ScriptRegexp> regexp;
-
- // The names to be applied to the regular expression capture groups. Note,
- // liburlpattern regular expressions do not use named capture groups directly.
- Vector<String> name_list;
-
- Component(liburlpattern::Pattern p, ScriptRegexp* r, Vector<String> n)
- : pattern(p), regexp(r), name_list(std::move(n)) {}
-};
+using url_pattern::Component;
+using url_pattern::ValueType;
namespace {
-// The default pattern string for components that are not specified in the
-// URLPattern constructor.
-const char* kDefaultPattern = "*";
-
-// The liburlpattern::Options to use for most component patterns. We
-// default to strict mode and case sensitivity. In addition, most
-// components have no concept of a delimiter or prefix character.
-const liburlpattern::Options& DefaultOptions() {
- DEFINE_THREAD_SAFE_STATIC_LOCAL(liburlpattern::Options, options,
- ({.delimiter_list = "",
- .prefix_list = "",
- .sensitive = true,
- .strict = true}));
- return options;
-}
-
-// The liburlpattern::Options to use for hostname patterns. This uses a
-// "." delimiter controlling how far a named group like ":bar" will match
-// by default. Note, hostnames are case insensitive but we require case
-// sensitivity here. This assumes that the hostname values have already
-// been normalized to lower case as in URL().
-const liburlpattern::Options& HostnameOptions() {
- DEFINE_STATIC_LOCAL(liburlpattern::Options, options,
- ({.delimiter_list = ".",
- .prefix_list = "",
- .sensitive = true,
- .strict = true}));
- return options;
-}
-
-// The liburlpattern::Options to use for pathname patterns. This uses a
-// "/" delimiter controlling how far a named group like ":bar" will match
-// by default. It also configures "/" to be treated as an automatic
-// prefix before groups.
-const liburlpattern::Options& PathnameOptions() {
- DEFINE_STATIC_LOCAL(liburlpattern::Options, options,
- ({.delimiter_list = "/",
- .prefix_list = "/",
- .sensitive = true,
- .strict = true}));
- return options;
-}
-
-// An enum indicating whether the associated component values be operated
-// on are for patterns or URLs. Validation and canonicalization will
-// do different things depending on the type.
-enum class ValueType {
- kPattern,
- kURL,
-};
-
// Utility function to determine if a pathname is absolute or not. For
// kURL values this mainly consists of a check for a leading slash. For
// patterns we do some additional checking for escaped or grouped slashes.
@@ -128,213 +58,6 @@ bool IsAbsolutePathname(const String& pathname, ValueType type) {
return false;
}
-String StringFromCanonOutput(const url::CanonOutput& output,
- const url::Component& component) {
- return String::FromUTF8(output.data() + component.begin, component.len);
-}
-
-std::string StdStringFromCanonOutput(const url::CanonOutput& output,
- const url::Component& component) {
- return std::string(output.data() + component.begin, component.len);
-}
-
-// A callback to be passed to the liburlpattern::Parse() method that performs
-// validation and encoding for the protocol component.
-absl::StatusOr<std::string> ProtocolEncodeCallback(absl::string_view input) {
- if (input.empty())
- return std::string();
-
- url::RawCanonOutputT<char> canon_output;
- url::Component component;
-
- bool result = url::CanonicalizeScheme(
- input.data(), url::Component(0, static_cast<int>(input.size())),
- &canon_output, &component);
-
- if (!result) {
- return absl::InvalidArgumentError("Invalid protocol '" +
- std::string(input) + "'.");
- }
-
- return StdStringFromCanonOutput(canon_output, component);
-}
-
-// Utility function to canonicalize a protocol string. Throws an exception
-// if the input is invalid. The canonicalization and/or validation will
-// differ depending on whether |type| is kURL or kPattern.
-String CanonicalizeProtocol(const String& input,
- ValueType type,
- ExceptionState& exception_state) {
- if (type == ValueType::kPattern) {
- // Canonicalization for patterns is handled during compilation via
- // encoding callbacks.
- return input;
- }
-
- bool result = false;
- url::RawCanonOutputT<char> canon_output;
- url::Component component;
- if (input.Is8Bit()) {
- StringUTF8Adaptor utf8(input);
- result = url::CanonicalizeScheme(
- utf8.data(), url::Component(0, utf8.size()), &canon_output, &component);
- } else {
- result = url::CanonicalizeScheme(input.Characters16(),
- url::Component(0, input.length()),
- &canon_output, &component);
- }
-
- if (!result) {
- exception_state.ThrowTypeError("Invalid protocol '" + input + "'.");
- return String();
- }
-
- return StringFromCanonOutput(canon_output, component);
-}
-
-// A callback to be passed to the liburlpattern::Parse() method that performs
-// validation and encoding for the username component.
-absl::StatusOr<std::string> UsernameEncodeCallback(absl::string_view input) {
- if (input.empty())
- return std::string();
-
- url::RawCanonOutputT<char> canon_output;
- url::Component username_component;
- url::Component password_component;
-
- bool result = url::CanonicalizeUserInfo(
- input.data(), url::Component(0, static_cast<int>(input.size())), "",
- url::Component(0, 0), &canon_output, &username_component,
- &password_component);
-
- if (!result) {
- return absl::InvalidArgumentError("Invalid username pattern '" +
- std::string(input) + "'.");
- }
-
- return StdStringFromCanonOutput(canon_output, username_component);
-}
-
-// A callback to be passed to the liburlpattern::Parse() method that performs
-// validation and encoding for the password component.
-absl::StatusOr<std::string> PasswordEncodeCallback(absl::string_view input) {
- if (input.empty())
- return std::string();
-
- url::RawCanonOutputT<char> canon_output;
- url::Component username_component;
- url::Component password_component;
-
- bool result = url::CanonicalizeUserInfo(
- "", url::Component(0, 0), input.data(),
- url::Component(0, static_cast<int>(input.size())), &canon_output,
- &username_component, &password_component);
-
- if (!result) {
- return absl::InvalidArgumentError("Invalid password pattern '" +
- std::string(input) + "'.");
- }
-
- return StdStringFromCanonOutput(canon_output, password_component);
-}
-
-// Utility function to canonicalize username and/or password strings. Throws
-// an exception if either is invalid. The canonicalization and/or validation
-// will differ depending on whether |type| is kURL or kPattern. On success
-// |username_out| and |password_out| will contain the canonical values.
-void CanonicalizeUsernameAndPassword(const String& username,
- const String& password,
- ValueType type,
- String& username_out,
- String& password_out,
- ExceptionState& exception_state) {
- if (type == ValueType::kPattern) {
- // Canonicalization for patterns is handled during compilation via
- // encoding callbacks.
- username_out = username;
- password_out = password;
- return;
- }
-
- bool result = false;
- url::RawCanonOutputT<char> canon_output;
- url::Component username_component;
- url::Component password_component;
-
- if (username && password && username.Is8Bit() && password.Is8Bit()) {
- StringUTF8Adaptor username_utf8(username);
- StringUTF8Adaptor password_utf8(password);
- result = url::CanonicalizeUserInfo(
- username_utf8.data(), url::Component(0, username_utf8.size()),
- password_utf8.data(), url::Component(0, password_utf8.size()),
- &canon_output, &username_component, &password_component);
-
- } else {
- String username16(username);
- String password16(password);
- username16.Ensure16Bit();
- password16.Ensure16Bit();
- result = url::CanonicalizeUserInfo(
- username16.Characters16(), url::Component(0, username16.length()),
- password16.Characters16(), url::Component(0, password16.length()),
- &canon_output, &username_component, &password_component);
- }
-
- if (!result) {
- exception_state.ThrowTypeError("Invalid username '" + username +
- "' and/or password '" + password + "'.");
- return;
- }
-
- if (username_component.len != -1)
- username_out = StringFromCanonOutput(canon_output, username_component);
- if (password_component.len != -1)
- password_out = StringFromCanonOutput(canon_output, password_component);
-}
-
-// A callback to be passed to the liburlpattern::Parse() method that performs
-// validation and encoding for the hostname component.
-absl::StatusOr<std::string> HostnameEncodeCallback(absl::string_view input) {
- if (input.empty())
- return std::string();
-
- url::RawCanonOutputT<char> canon_output;
- url::Component component;
-
- bool result = url::CanonicalizeHost(
- input.data(), url::Component(0, static_cast<int>(input.size())),
- &canon_output, &component);
-
- if (!result) {
- return absl::InvalidArgumentError("Invalid hostname pattern '" +
- std::string(input) + "'.");
- }
-
- return StdStringFromCanonOutput(canon_output, component);
-}
-
-// Utility function to canonicalize a hostname string. Throws an exception
-// if the input is invalid. The canonicalization and/or validation will
-// differ depending on whether |type| is kURL or kPattern.
-String CanonicalizeHostname(const String& input,
- ValueType type,
- ExceptionState& exception_state) {
- if (type == ValueType::kPattern) {
- // Canonicalization for patterns is handled during compilation via
- // encoding callbacks.
- return input;
- }
-
- bool success = false;
- String result = SecurityOrigin::CanonicalizeHost(input, &success);
- if (!success) {
- exception_state.ThrowTypeError("Invalid hostname '" + input + "'.");
- return String();
- }
-
- return result;
-}
-
// Utility function to determine if the default port for the given protocol
// matches the given port number.
bool IsProtocolDefaultPort(const String& protocol, const String& port) {
@@ -353,260 +76,6 @@ bool IsProtocolDefaultPort(const String& protocol, const String& port) {
return default_port != url::PORT_UNSPECIFIED && default_port == port_number;
}
-// A callback to be passed to the liburlpattern::Parse() method that performs
-// validation and encoding for the port component.
-absl::StatusOr<std::string> PortEncodeCallback(absl::string_view input) {
- if (input.empty())
- return std::string();
-
- url::RawCanonOutputT<char> canon_output;
- url::Component component;
-
- bool result = url::CanonicalizePort(
- input.data(), url::Component(0, static_cast<int>(input.size())),
- url::PORT_UNSPECIFIED, &canon_output, &component);
-
- if (!result) {
- return absl::InvalidArgumentError("Invalid port pattern '" +
- std::string(input) + "'.");
- }
-
- return StdStringFromCanonOutput(canon_output, component);
-}
-
-// Utility function to canonicalize a port string. Throws an exception
-// if the input is invalid. The canonicalization and/or validation will
-// differ depending on whether |type| is kURL or kPattern. The |protocol|
-// must be provided in order to handle default ports correctly.
-String CanonicalizePort(const String& input,
- ValueType type,
- const String& protocol,
- ExceptionState& exception_state) {
- if (type == ValueType::kPattern) {
- // Canonicalization for patterns is handled during compilation via
- // encoding callbacks.
- return input;
- }
-
- int default_port = url::PORT_UNSPECIFIED;
- if (!input.IsEmpty()) {
- StringUTF8Adaptor protocol_utf8(protocol);
- default_port =
- url::DefaultPortForScheme(protocol_utf8.data(), protocol_utf8.size());
- }
-
- // Since ports only consist of digits there should be no encoding needed.
- // Therefore we directly use the UTF8 encoding version of CanonicalizePort().
- StringUTF8Adaptor utf8(input);
- url::RawCanonOutputT<char> canon_output;
- url::Component component;
- if (!url::CanonicalizePort(utf8.data(), url::Component(0, utf8.size()),
- default_port, &canon_output, &component)) {
- exception_state.ThrowTypeError("Invalid port '" + input + "'.");
- return String();
- }
-
- return component.len == -1 ? g_empty_string
- : StringFromCanonOutput(canon_output, component);
-}
-
-// A callback to be passed to the liburlpattern::Parse() method that performs
-// validation and encoding for the pathname component using "standard" URL
-// behavior.
-absl::StatusOr<std::string> StandardURLPathnameEncodeCallback(
- absl::string_view input) {
- if (input.empty())
- return std::string();
-
- url::RawCanonOutputT<char> canon_output;
- url::Component component;
-
- bool result = url::CanonicalizePartialPath(
- input.data(), url::Component(0, static_cast<int>(input.size())),
- &canon_output, &component);
-
- if (!result) {
- return absl::InvalidArgumentError("Invalid pathname pattern '" +
- std::string(input) + "'.");
- }
-
- return StdStringFromCanonOutput(canon_output, component);
-}
-
-// A callback to be passed to the liburlpattern::Parse() method that performs
-// validation and encoding for the pathname component using "path" URL
-// behavior. This is like "cannot-be-a-base" URL behavior in the spec.
-absl::StatusOr<std::string> PathURLPathnameEncodeCallback(
- absl::string_view input) {
- if (input.empty())
- return std::string();
-
- url::RawCanonOutputT<char> canon_output;
- url::Component component;
-
- url::CanonicalizePathURLPath(
- input.data(), url::Component(0, static_cast<int>(input.size())),
- &canon_output, &component);
-
- return StdStringFromCanonOutput(canon_output, component);
-}
-
-// Utility function to canonicalize a pathname string. Throws an exception
-// if the input is invalid. The canonicalization and/or validation will
-// differ depending on whether |type| is kURL or kPattern.
-String CanonicalizePathname(const String& protocol,
- const String& input,
- ValueType type,
- ExceptionState& exception_state) {
- if (type == ValueType::kPattern) {
- // Canonicalization for patterns is handled during compilation via
- // encoding callbacks.
- return input;
- }
-
- // Determine if we are using "standard" or "path" URL canonicalization
- // for the pathname. In spec terms the "path" URL behavior corresponds
- // to "cannot-be-a-base" URLs. We make this determination based on the
- // protocol string since we cannot look at the number of slashes between
- // components like the URL spec. If this is inadequate the developer
- // can use the baseURL property to get more strict URL behavior.
- //
- // We default to "standard" URL behavior to match how the empty protocol
- // string in the URLPattern constructor results in the pathname pattern
- // getting "standard" URL canonicalization.
- bool standard = false;
- if (protocol.IsEmpty()) {
- standard = true;
- } else if (protocol.Is8Bit()) {
- StringUTF8Adaptor utf8(protocol);
- standard = url::IsStandard(utf8.data(), url::Component(0, utf8.size()));
- } else {
- standard = url::IsStandard(protocol.Characters16(),
- url::Component(0, protocol.length()));
- }
-
- // Do not enforce absolute pathnames here since we can't enforce it
- // it consistently in the URLPattern constructor. This allows us to
- // produce a match when the exact same fixed pathname string is passed
- // to both the constructor and test()/exec(). Similarly, we use
- // url::CanonicalizePartialPath() below instead of url::CanonicalizePath()
- // to avoid pre-pending a slash at the start of the string.
-
- bool result = false;
- url::RawCanonOutputT<char> canon_output;
- url::Component component;
-
- const auto canonicalize_path = [&](const auto* data, int length) {
- if (standard) {
- return url::CanonicalizePartialPath(data, url::Component(0, length),
- &canon_output, &component);
- }
- url::CanonicalizePathURLPath(data, url::Component(0, length), &canon_output,
- &component);
- return true;
- };
-
- if (input.Is8Bit()) {
- StringUTF8Adaptor utf8(input);
- result = canonicalize_path(utf8.data(), utf8.size());
- } else {
- result = canonicalize_path(input.Characters16(), input.length());
- }
-
- if (!result) {
- exception_state.ThrowTypeError("Invalid pathname '" + input + "'.");
- return String();
- }
-
- return StringFromCanonOutput(canon_output, component);
-}
-
-// A callback to be passed to the liburlpattern::Parse() method that performs
-// validation and encoding for the search component.
-absl::StatusOr<std::string> SearchEncodeCallback(absl::string_view input) {
- if (input.empty())
- return std::string();
-
- url::RawCanonOutputT<char> canon_output;
- url::Component component;
-
- url::CanonicalizeQuery(input.data(),
- url::Component(0, static_cast<int>(input.size())),
- /*converter=*/nullptr, &canon_output, &component);
-
- return StdStringFromCanonOutput(canon_output, component);
-}
-
-// Utility function to canonicalize a search string. Throws an exception
-// if the input is invalid. The canonicalization and/or validation will
-// differ depending on whether |type| is kURL or kPattern.
-String CanonicalizeSearch(const String& input,
- ValueType type,
- ExceptionState& exception_state) {
- if (type == ValueType::kPattern) {
- // Canonicalization for patterns is handled during compilation via
- // encoding callbacks.
- return input;
- }
-
- url::RawCanonOutputT<char> canon_output;
- url::Component component;
- if (input.Is8Bit()) {
- StringUTF8Adaptor utf8(input);
- url::CanonicalizeQuery(utf8.data(), url::Component(0, utf8.size()),
- /*converter=*/nullptr, &canon_output, &component);
- } else {
- url::CanonicalizeQuery(input.Characters16(),
- url::Component(0, input.length()),
- /*converter=*/nullptr, &canon_output, &component);
- }
-
- return StringFromCanonOutput(canon_output, component);
-}
-
-// A callback to be passed to the liburlpattern::Parse() method that performs
-// validation and encoding for the hash component.
-absl::StatusOr<std::string> HashEncodeCallback(absl::string_view input) {
- if (input.empty())
- return std::string();
-
- url::RawCanonOutputT<char> canon_output;
- url::Component component;
-
- url::CanonicalizeRef(input.data(),
- url::Component(0, static_cast<int>(input.size())),
- &canon_output, &component);
-
- return StdStringFromCanonOutput(canon_output, component);
-}
-
-// Utility function to canonicalize a hash string. Throws an exception
-// if the input is invalid. The canonicalization and/or validation will
-// differ depending on whether |type| is kURL or kPattern.
-String CanonicalizeHash(const String& input,
- ValueType type,
- ExceptionState& exception_state) {
- if (type == ValueType::kPattern) {
- // Canonicalization for patterns is handled during compilation via
- // encoding callbacks.
- return input;
- }
-
- url::RawCanonOutputT<char> canon_output;
- url::Component component;
- if (input.Is8Bit()) {
- StringUTF8Adaptor utf8(input);
- url::CanonicalizeRef(utf8.data(), url::Component(0, utf8.size()),
- &canon_output, &component);
- } else {
- url::CanonicalizeRef(input.Characters16(),
- url::Component(0, input.length()), &canon_output,
- &component);
- }
-
- return StringFromCanonOutput(canon_output, component);
-}
-
// A utility method that takes a URLPatternInit, splits it apart, and applies
// the individual component values in the given set of strings. The strings
// are only applied if a value is present in the init structure.
@@ -643,33 +112,37 @@ void ApplyInit(const URLPatternInit* init,
port =
base_url.Port() > 0 ? String::Number(base_url.Port()) : g_empty_string;
pathname = base_url.GetPath() ? base_url.GetPath() : g_empty_string;
-
- // Do no propagate search or hash from the base URL. This matches the
- // behavior when resolving a relative URL against a base URL.
+ search = base_url.Query() ? base_url.Query() : g_empty_string;
+ hash = base_url.HasFragmentIdentifier() ? base_url.FragmentIdentifier()
+ : g_empty_string;
}
// Apply the URLPatternInit component values on top of the default and
// baseURL values.
if (init->hasProtocol()) {
- protocol = CanonicalizeProtocol(init->protocol(), type, exception_state);
+ protocol = url_pattern::CanonicalizeProtocol(init->protocol(), type,
+ exception_state);
if (exception_state.HadException())
return;
}
if (init->hasUsername() || init->hasPassword()) {
String init_username = init->hasUsername() ? init->username() : String();
String init_password = init->hasPassword() ? init->password() : String();
- CanonicalizeUsernameAndPassword(init_username, init_password, type,
- username, password, exception_state);
+ url_pattern::CanonicalizeUsernameAndPassword(init_username, init_password,
+ type, username, password,
+ exception_state);
if (exception_state.HadException())
return;
}
if (init->hasHostname()) {
- hostname = CanonicalizeHostname(init->hostname(), type, exception_state);
+ hostname = url_pattern::CanonicalizeHostname(init->hostname(), type,
+ exception_state);
if (exception_state.HadException())
return;
}
if (init->hasPort()) {
- port = CanonicalizePort(init->port(), type, protocol, exception_state);
+ port = url_pattern::CanonicalizePort(init->port(), type, protocol,
+ exception_state);
if (exception_state.HadException())
return;
}
@@ -689,17 +162,19 @@ void ApplyInit(const URLPatternInit* init,
pathname = base_url.GetPath().Substring(0, slash_index + 1) + pathname;
}
}
- pathname = CanonicalizePathname(protocol, pathname, type, exception_state);
+ pathname = url_pattern::CanonicalizePathname(protocol, pathname, type,
+ exception_state);
if (exception_state.HadException())
return;
}
if (init->hasSearch()) {
- search = CanonicalizeSearch(init->search(), type, exception_state);
+ search =
+ url_pattern::CanonicalizeSearch(init->search(), type, exception_state);
if (exception_state.HadException())
return;
}
if (init->hasHash()) {
- hash = CanonicalizeHash(init->hash(), type, exception_state);
+ hash = url_pattern::CanonicalizeHash(init->hash(), type, exception_state);
if (exception_state.HadException())
return;
}
@@ -707,7 +182,52 @@ void ApplyInit(const URLPatternInit* init,
} // namespace
+URLPattern* URLPattern::Create(const V8URLPatternInput* input,
+ const String& base_url,
+ ExceptionState& exception_state) {
+ if (input->GetContentType() ==
+ V8URLPatternInput::ContentType::kURLPatternInit) {
+ exception_state.ThrowTypeError(
+ "Invalid second argument baseURL '" + base_url +
+ "' provided with a URLPatternInit input. Use the "
+ "URLPatternInit.baseURL property instead.");
+ return nullptr;
+ }
+
+ const auto& input_string = input->GetAsUSVString();
+
+ url_pattern::Parser parser(input_string);
+ parser.Parse(exception_state);
+ if (exception_state.HadException())
+ return nullptr;
+
+ URLPatternInit* init = parser.GetResult();
+ if (!base_url && !init->hasProtocol()) {
+ exception_state.ThrowTypeError(
+ "Relative constructor string '" + input_string +
+ "' must have a base URL passed as the second argument.");
+ return nullptr;
+ }
+
+ if (base_url)
+ init->setBaseURL(base_url);
+
+ return Create(init, parser.GetProtocolComponent(), exception_state);
+}
+
+URLPattern* URLPattern::Create(const V8URLPatternInput* input,
+ ExceptionState& exception_state) {
+ if (input->IsURLPatternInit()) {
+ return URLPattern::Create(input->GetAsURLPatternInit(),
+ /*precomputed_protocol_component=*/nullptr,
+ exception_state);
+ }
+
+ return Create(input, /*base_url=*/String(), exception_state);
+}
+
URLPattern* URLPattern::Create(const URLPatternInit* init,
+ Component* precomputed_protocol_component,
ExceptionState& exception_state) {
// Each component defaults to a wildcard matching any input. We use
// the null string as a shorthand for the default.
@@ -734,80 +254,54 @@ URLPattern* URLPattern::Create(const URLPatternInit* init,
if (IsProtocolDefaultPort(protocol, port))
port = "";
- // Compile each component pattern into a Component structure that can
- // be used for matching. Components that match any input may have a
- // nullptr Component struct pointer.
+ // Compile each component pattern into a Component structure that
+ // can be used for matching.
- auto* protocol_component =
- CompilePattern(protocol, "protocol", ProtocolEncodeCallback,
- DefaultOptions(), exception_state);
+ auto* protocol_component = precomputed_protocol_component;
+ if (!protocol_component) {
+ protocol_component =
+ Component::Compile(protocol, Component::Type::kProtocol,
+ /*protocol_component=*/nullptr, exception_state);
+ }
if (exception_state.HadException())
return nullptr;
auto* username_component =
- CompilePattern(username, "username", UsernameEncodeCallback,
- DefaultOptions(), exception_state);
+ Component::Compile(username, Component::Type::kUsername,
+ protocol_component, exception_state);
if (exception_state.HadException())
return nullptr;
auto* password_component =
- CompilePattern(password, "password", PasswordEncodeCallback,
- DefaultOptions(), exception_state);
+ Component::Compile(password, Component::Type::kPassword,
+ protocol_component, exception_state);
if (exception_state.HadException())
return nullptr;
auto* hostname_component =
- CompilePattern(hostname, "hostname", HostnameEncodeCallback,
- HostnameOptions(), exception_state);
+ Component::Compile(hostname, Component::Type::kHostname,
+ protocol_component, exception_state);
if (exception_state.HadException())
return nullptr;
- auto* port_component = CompilePattern(port, "port", PortEncodeCallback,
- DefaultOptions(), exception_state);
+ auto* port_component = Component::Compile(
+ port, Component::Type::kPort, protocol_component, exception_state);
if (exception_state.HadException())
return nullptr;
- // Different types of URLs use different canonicalization for pathname.
- // A "standard" URL flattens `.`/`..` and performs full percent encoding.
- // A "path" URL does not flatten and uses a more lax percent encoding.
- // The spec calls "path" URLs as "cannot-be-a-base-URL" URLs:
- //
- // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
- //
- // We prefer "standard" URL here by checking to see if the protocol
- // pattern matches any of the known standard protocol strings. So
- // an exact pattern of `http` will match, but so will `http{s}?` and
- // `*`.
- //
- // If the protocol pattern does not match any of the known standard URL
- // protocols then we fall back to the "path" URL behavior. This will
- // normally be triggered by `data`, `javascript`, `about`, etc. It
- // will also be triggered for custom protocol strings. We favor "path"
- // behavior here because its better to under canonicalize since the
- // developer can always manually canonicalize the pathname for a custom
- // protocol.
- //
- // ShouldTreatAsStandardURL can by a bit expensive, so only do it if we
- // actually have a pathname pattern to compile.
- liburlpattern::EncodeCallback pathname_encode = PathURLPathnameEncodeCallback;
- if (!pathname.IsNull() && ShouldTreatAsStandardURL(protocol_component)) {
- pathname_encode = StandardURLPathnameEncodeCallback;
- }
-
auto* pathname_component =
- CompilePattern(pathname, "pathname", pathname_encode, PathnameOptions(),
- exception_state);
+ Component::Compile(pathname, Component::Type::kPathname,
+ protocol_component, exception_state);
if (exception_state.HadException())
return nullptr;
- auto* search_component =
- CompilePattern(search, "search", SearchEncodeCallback, DefaultOptions(),
- exception_state);
+ auto* search_component = Component::Compile(
+ search, Component::Type::kSearch, protocol_component, exception_state);
if (exception_state.HadException())
return nullptr;
- auto* hash_component = CompilePattern(hash, "hash", HashEncodeCallback,
- DefaultOptions(), exception_state);
+ auto* hash_component = Component::Compile(
+ hash, Component::Type::kHash, protocol_component, exception_state);
if (exception_state.HadException())
return nullptr;
@@ -836,32 +330,20 @@ URLPattern::URLPattern(Component* protocol,
hash_(hash) {}
bool URLPattern::test(
-#if defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
const V8URLPatternInput* input,
-#else // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
- const USVStringOrURLPatternInit& input,
-#endif // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
const String& base_url,
ExceptionState& exception_state) const {
return Match(input, base_url, /*result=*/nullptr, exception_state);
}
bool URLPattern::test(
-#if defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
const V8URLPatternInput* input,
-#else // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
- const USVStringOrURLPatternInit& input,
-#endif // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
ExceptionState& exception_state) const {
return test(input, /*base_url=*/String(), exception_state);
}
URLPatternResult* URLPattern::exec(
-#if defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
const V8URLPatternInput* input,
-#else // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
- const USVStringOrURLPatternInit& input,
-#endif // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
const String& base_url,
ExceptionState& exception_state) const {
URLPatternResult* result = URLPatternResult::Create();
@@ -871,69 +353,71 @@ URLPatternResult* URLPattern::exec(
}
URLPatternResult* URLPattern::exec(
-#if defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
const V8URLPatternInput* input,
-#else // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
- const USVStringOrURLPatternInit& input,
-#endif // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
ExceptionState& exception_state) const {
return exec(input, /*base_url=*/String(), exception_state);
}
String URLPattern::protocol() const {
- if (!protocol_)
- return kDefaultPattern;
- std::string result = protocol_->pattern.GeneratePatternString();
- return String::FromUTF8(result);
+ return protocol_->GeneratePatternString();
}
String URLPattern::username() const {
- if (!username_)
- return kDefaultPattern;
- std::string result = username_->pattern.GeneratePatternString();
- return String::FromUTF8(result);
+ return username_->GeneratePatternString();
}
String URLPattern::password() const {
- if (!password_)
- return kDefaultPattern;
- std::string result = password_->pattern.GeneratePatternString();
- return String::FromUTF8(result);
+ return password_->GeneratePatternString();
}
String URLPattern::hostname() const {
- if (!hostname_)
- return kDefaultPattern;
- std::string result = hostname_->pattern.GeneratePatternString();
- return String::FromUTF8(result);
+ return hostname_->GeneratePatternString();
}
String URLPattern::port() const {
- if (!port_)
- return kDefaultPattern;
- std::string result = port_->pattern.GeneratePatternString();
- return String::FromUTF8(result);
+ return port_->GeneratePatternString();
}
String URLPattern::pathname() const {
- if (!pathname_)
- return kDefaultPattern;
- std::string result = pathname_->pattern.GeneratePatternString();
- return String::FromUTF8(result);
+ return pathname_->GeneratePatternString();
}
String URLPattern::search() const {
- if (!search_)
- return kDefaultPattern;
- std::string result = search_->pattern.GeneratePatternString();
- return String::FromUTF8(result);
+ return search_->GeneratePatternString();
}
String URLPattern::hash() const {
- if (!hash_)
- return kDefaultPattern;
- std::string result = hash_->pattern.GeneratePatternString();
- return String::FromUTF8(result);
+ return hash_->GeneratePatternString();
+}
+
+// static
+int URLPattern::compareComponent(const V8URLPatternComponent& component,
+ const URLPattern* left,
+ const URLPattern* right) {
+ switch (component.AsEnum()) {
+ case V8URLPatternComponent::Enum::kProtocol:
+ return url_pattern::Component::Compare(*left->protocol_,
+ *right->protocol_);
+ case V8URLPatternComponent::Enum::kUsername:
+ return url_pattern::Component::Compare(*left->username_,
+ *right->username_);
+ case V8URLPatternComponent::Enum::kPassword:
+ return url_pattern::Component::Compare(*left->password_,
+ *right->password_);
+ case V8URLPatternComponent::Enum::kHostname:
+ return url_pattern::Component::Compare(*left->hostname_,
+ *right->hostname_);
+ case V8URLPatternComponent::Enum::kPort:
+ return url_pattern::Component::Compare(*left->port_, *right->port_);
+ case V8URLPatternComponent::Enum::kPathname:
+ return url_pattern::Component::Compare(*left->pathname_,
+ *right->pathname_);
+ case V8URLPatternComponent::Enum::kSearch:
+ return url_pattern::Component::Compare(*left->search_, *right->search_);
+ case V8URLPatternComponent::Enum::kHash:
+ return url_pattern::Component::Compare(*left->hash_, *right->hash_);
+ }
+ NOTREACHED();
}
void URLPattern::Trace(Visitor* visitor) const {
@@ -948,87 +432,8 @@ void URLPattern::Trace(Visitor* visitor) const {
ScriptWrappable::Trace(visitor);
}
-// static
-URLPattern::Component* URLPattern::CompilePattern(
- const String& pattern,
- StringView component,
- liburlpattern::EncodeCallback encode_callback,
- const liburlpattern::Options& options,
- ExceptionState& exception_state) {
- // If the pattern is null then optimize by not compiling a pattern. Instead,
- // a nullptr Component is interpreted as matching any input value.
- if (pattern.IsNull())
- return nullptr;
-
- // Parse the pattern.
- StringUTF8Adaptor utf8(pattern);
- auto parse_result =
- liburlpattern::Parse(absl::string_view(utf8.data(), utf8.size()),
- std::move(encode_callback), options);
- if (!parse_result.ok()) {
- exception_state.ThrowTypeError("Invalid " + component + " pattern '" +
- pattern + "'.");
- return nullptr;
- }
-
- // Extract a regular expression string from the parsed pattern.
- std::vector<std::string> name_list;
- std::string regexp_string =
- parse_result.value().GenerateRegexString(&name_list);
-
- // Compile the regular expression to verify it is valid.
- auto case_sensitive = options.sensitive ? WTF::kTextCaseSensitive
- : WTF::kTextCaseASCIIInsensitive;
- DCHECK(base::IsStringASCII(regexp_string));
- ScriptRegexp* regexp = MakeGarbageCollected<ScriptRegexp>(
- String(regexp_string.data(), regexp_string.size()), case_sensitive,
- kMultilineDisabled, ScriptRegexp::UTF16);
- if (!regexp->IsValid()) {
- // The regular expression failed to compile. This means that some
- // custom regexp group within the pattern is illegal. Attempt to
- // compile each regexp group individually in order to identify the
- // culprit.
- for (auto& part : parse_result.value().PartList()) {
- if (part.type != liburlpattern::PartType::kRegex)
- continue;
- DCHECK(base::IsStringASCII(part.value));
- String group_value(part.value.data(), part.value.size());
- regexp = MakeGarbageCollected<ScriptRegexp>(
- group_value, case_sensitive, kMultilineDisabled, ScriptRegexp::UTF16);
- if (regexp->IsValid())
- continue;
- exception_state.ThrowTypeError("Invalid " + component + " pattern '" +
- pattern +
- "'. Custom regular expression group '" +
- group_value + "' is invalid.");
- return nullptr;
- }
- // We couldn't find a bad regexp group, but we still have an overall
- // error. This shouldn't happen, but we handle it anyway.
- exception_state.ThrowTypeError("Invalid " + component + " pattern '" +
- pattern +
- "'. An unexpected error has occurred.");
- return nullptr;
- }
-
- Vector<String> wtf_name_list;
- wtf_name_list.ReserveInitialCapacity(
- static_cast<wtf_size_t>(name_list.size()));
- for (const auto& name : name_list) {
- wtf_name_list.push_back(String::FromUTF8(name.data(), name.size()));
- }
-
- return MakeGarbageCollected<URLPattern::Component>(
- std::move(parse_result.value()), std::move(regexp),
- std::move(wtf_name_list));
-}
-
bool URLPattern::Match(
-#if defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
const V8URLPatternInput* input,
-#else // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
- const USVStringOrURLPatternInit& input,
-#endif // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
const String& base_url,
URLPatternResult* result,
ExceptionState& exception_state) const {
@@ -1043,84 +448,71 @@ bool URLPattern::Match(
String search(g_empty_string);
String hash(g_empty_string);
- HeapVector<USVStringOrURLPatternInit> inputs;
-
- bool is_init =
-#if defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
- input->GetContentType() ==
- V8URLPatternInput::ContentType::kURLPatternInit;
-#else // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
- input.IsURLPatternInit();
-#endif // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
-
- if (is_init) {
- if (base_url) {
- exception_state.ThrowTypeError(
- "Invalid second argument baseURL '" + base_url +
- "' provided with a URLPatternInit input. Use the "
- "URLPatternInit.baseURL property instead.");
- return false;
- }
+ HeapVector<Member<V8URLPatternInput>> inputs;
- URLPatternInit* init =
-#if defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
- input->GetAsURLPatternInit();
-#else // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
- input.GetAsURLPatternInit();
-#endif // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
-
- inputs.push_back(USVStringOrURLPatternInit::FromURLPatternInit(init));
-
- // Layer the URLPatternInit values on top of the default empty strings.
- ApplyInit(init, ValueType::kURL, protocol, username, password, hostname,
- port, pathname, search, hash, exception_state);
- if (exception_state.HadException()) {
- // Treat exceptions simply as a failure to match.
- exception_state.ClearException();
- return false;
- }
- } else {
- KURL parsed_base_url(base_url);
- if (base_url && !parsed_base_url.IsValid()) {
- // Treat as failure to match, but don't throw an exception.
- return false;
- }
+ switch (input->GetContentType()) {
+ case V8URLPatternInput::ContentType::kURLPatternInit: {
+ if (base_url) {
+ exception_state.ThrowTypeError(
+ "Invalid second argument baseURL '" + base_url +
+ "' provided with a URLPatternInit input. Use the "
+ "URLPatternInit.baseURL property instead.");
+ return false;
+ }
+
+ URLPatternInit* init = input->GetAsURLPatternInit();
+
+ inputs.push_back(MakeGarbageCollected<V8URLPatternInput>(init));
- const String& input_string =
-#if defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
- input->GetAsUSVString();
-#else // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
- input.GetAsUSVString();
-#endif // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
-
- inputs.push_back(USVStringOrURLPatternInit::FromUSVString(input_string));
- if (base_url)
- inputs.push_back(USVStringOrURLPatternInit::FromUSVString(base_url));
-
- // The compile the input string as a fully resolved URL.
- KURL url(parsed_base_url, input_string);
- if (!url.IsValid() || url.IsEmpty()) {
- // Treat as failure to match, but don't throw an exception.
- return false;
+ // Layer the URLPatternInit values on top of the default empty strings.
+ ApplyInit(init, ValueType::kURL, protocol, username, password, hostname,
+ port, pathname, search, hash, exception_state);
+ if (exception_state.HadException()) {
+ // Treat exceptions simply as a failure to match.
+ exception_state.ClearException();
+ return false;
+ }
+ break;
}
+ case V8URLPatternInput::ContentType::kUSVString: {
+ KURL parsed_base_url(base_url);
+ if (base_url && !parsed_base_url.IsValid()) {
+ // Treat as failure to match, but don't throw an exception.
+ return false;
+ }
+
+ const String& input_string = input->GetAsUSVString();
- // Apply the parsed URL components on top of our defaults.
- if (url.Protocol())
- protocol = url.Protocol();
- if (url.User())
- username = url.User();
- if (url.Pass())
- password = url.Pass();
- if (url.Host())
- hostname = url.Host();
- if (url.Port() > 0)
- port = String::Number(url.Port());
- if (url.GetPath())
- pathname = url.GetPath();
- if (url.Query())
- search = url.Query();
- if (url.FragmentIdentifier())
- hash = url.FragmentIdentifier();
+ inputs.push_back(MakeGarbageCollected<V8URLPatternInput>(input_string));
+ if (base_url)
+ inputs.push_back(MakeGarbageCollected<V8URLPatternInput>(base_url));
+
+ // The compile the input string as a fully resolved URL.
+ KURL url(parsed_base_url, input_string);
+ if (!url.IsValid() || url.IsEmpty()) {
+ // Treat as failure to match, but don't throw an exception.
+ return false;
+ }
+
+ // Apply the parsed URL components on top of our defaults.
+ if (url.Protocol())
+ protocol = url.Protocol();
+ if (url.User())
+ username = url.User();
+ if (url.Pass())
+ password = url.Pass();
+ if (url.Host())
+ hostname = url.Host();
+ if (url.Port() > 0)
+ port = String::Number(url.Port());
+ if (url.GetPath())
+ pathname = url.GetPath();
+ if (url.Query())
+ search = url.Query();
+ if (url.FragmentIdentifier())
+ hash = url.FragmentIdentifier();
+ break;
+ }
}
Vector<String> protocol_group_list;
@@ -1143,18 +535,25 @@ bool URLPattern::Match(
auto* search_group_list_ref = result ? &search_group_list : nullptr;
auto* hash_group_list_ref = result ? &hash_group_list : nullptr;
+ CHECK(protocol_);
+ CHECK(username_);
+ CHECK(password_);
+ CHECK(hostname_);
+ CHECK(port_);
+ CHECK(pathname_);
+ CHECK(search_);
+ CHECK(hash_);
+
// Each component of the pattern must match the corresponding component of
- // the input. If a pattern Component is nullptr, then it matches any
- // input and we can avoid running a real regular expression match.
- bool matched =
- (!protocol_ || protocol_->Match(protocol, protocol_group_list_ref)) &&
- (!username_ || username_->Match(username, username_group_list_ref)) &&
- (!password_ || password_->Match(password, password_group_list_ref)) &&
- (!hostname_ || hostname_->Match(hostname, hostname_group_list_ref)) &&
- (!port_ || port_->Match(port, port_group_list_ref)) &&
- (!pathname_ || pathname_->Match(pathname, pathname_group_list_ref)) &&
- (!search_ || search_->Match(search, search_group_list_ref)) &&
- (!hash_ || hash_->Match(hash, hash_group_list_ref));
+ // the input.
+ bool matched = protocol_->Match(protocol, protocol_group_list_ref) &&
+ username_->Match(username, username_group_list_ref) &&
+ password_->Match(password, password_group_list_ref) &&
+ hostname_->Match(hostname, hostname_group_list_ref) &&
+ port_->Match(port, port_group_list_ref) &&
+ pathname_->Match(pathname, pathname_group_list_ref) &&
+ search_->Match(search, search_group_list_ref) &&
+ hash_->Match(hash, hash_group_list_ref);
if (!matched || !result)
return matched;
@@ -1162,55 +561,32 @@ bool URLPattern::Match(
result->setInputs(std::move(inputs));
result->setProtocol(
- MakeComponentResult(protocol_, protocol, protocol_group_list));
+ MakeURLPatternComponentResult(protocol_, protocol, protocol_group_list));
result->setUsername(
- MakeComponentResult(username_, username, username_group_list));
+ MakeURLPatternComponentResult(username_, username, username_group_list));
result->setPassword(
- MakeComponentResult(password_, password, password_group_list));
+ MakeURLPatternComponentResult(password_, password, password_group_list));
result->setHostname(
- MakeComponentResult(hostname_, hostname, hostname_group_list));
- result->setPort(MakeComponentResult(port_, port, port_group_list));
+ MakeURLPatternComponentResult(hostname_, hostname, hostname_group_list));
+ result->setPort(MakeURLPatternComponentResult(port_, port, port_group_list));
result->setPathname(
- MakeComponentResult(pathname_, pathname, pathname_group_list));
- result->setSearch(MakeComponentResult(search_, search, search_group_list));
- result->setHash(MakeComponentResult(hash_, hash, hash_group_list));
+ MakeURLPatternComponentResult(pathname_, pathname, pathname_group_list));
+ result->setSearch(
+ MakeURLPatternComponentResult(search_, search, search_group_list));
+ result->setHash(MakeURLPatternComponentResult(hash_, hash, hash_group_list));
return true;
}
// static
-URLPatternComponentResult* URLPattern::MakeComponentResult(
+URLPatternComponentResult* URLPattern::MakeURLPatternComponentResult(
Component* component,
const String& input,
- const Vector<String>& group_list) {
- Vector<std::pair<String, String>> groups;
- if (!component) {
- // When there is not Component we must act as if there was a default
- // wildcard pattern with a group. The group includes the entire input.
- groups.emplace_back("0", input);
- } else {
- DCHECK_EQ(component->name_list.size(), group_list.size());
- for (wtf_size_t i = 0; i < group_list.size(); ++i) {
- groups.emplace_back(component->name_list[i], group_list[i]);
- }
- }
-
+ const Vector<String>& group_values) {
auto* result = URLPatternComponentResult::Create();
result->setInput(input);
- result->setGroups(groups);
+ result->setGroups(component->MakeGroupList(group_values));
return result;
}
-bool URLPattern::ShouldTreatAsStandardURL(Component* protocol) {
- if (!protocol)
- return true;
- const auto protocol_matches = [&](const std::string& scheme) {
- DCHECK(base::IsStringASCII(scheme));
- return protocol->Match(
- StringView(scheme.data(), static_cast<unsigned>(scheme.size())),
- /*group_list=*/nullptr);
- };
- return base::ranges::any_of(url::GetStandardSchemes(), protocol_matches);
-}
-
} // namespace blink
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.h b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.h
index 0609081ea0b..4dd1affda57 100644
--- a/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.h
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.h
@@ -1,4 +1,3 @@
-// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -7,27 +6,36 @@
#include "base/types/pass_key.h"
#include "third_party/blink/renderer/bindings/modules/v8/v8_typedefs.h"
+#include "third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_component.h"
+#include "third_party/blink/renderer/modules/modules_export.h"
#include "third_party/blink/renderer/platform/bindings/script_wrappable.h"
#include "third_party/liburlpattern/parse.h"
-namespace liburlpattern {
-struct Options;
-} // namespace liburlpattern
-
namespace blink {
class ExceptionState;
class URLPatternComponentResult;
class URLPatternInit;
class URLPatternResult;
-class USVStringOrURLPatternInit;
-class URLPattern : public ScriptWrappable {
+namespace url_pattern {
+class Component;
+} // namespace url_pattern
+
+class MODULES_EXPORT URLPattern : public ScriptWrappable {
DEFINE_WRAPPERTYPEINFO();
- class Component;
+ using Component = url_pattern::Component;
public:
+ static URLPattern* Create(const V8URLPatternInput* input,
+ const String& base_url,
+ ExceptionState& exception_state);
+
+ static URLPattern* Create(const V8URLPatternInput* input,
+ ExceptionState& exception_state);
+
static URLPattern* Create(const URLPatternInit* init,
+ Component* precomputed_protocol_component,
ExceptionState& exception_state);
URLPattern(Component* protocol,
@@ -40,33 +48,17 @@ class URLPattern : public ScriptWrappable {
Component* hash,
base::PassKey<URLPattern> key);
-#if defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
bool test(const V8URLPatternInput* input,
const String& base_url,
ExceptionState& exception_state) const;
bool test(const V8URLPatternInput* input,
ExceptionState& exception_state) const;
-#else // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
- bool test(const USVStringOrURLPatternInit& input,
- const String& base_url,
- ExceptionState& exception_state) const;
- bool test(const USVStringOrURLPatternInit& input,
- ExceptionState& exception_state) const;
-#endif // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
-#if defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
URLPatternResult* exec(const V8URLPatternInput* input,
const String& base_url,
ExceptionState& exception_state) const;
URLPatternResult* exec(const V8URLPatternInput* input,
ExceptionState& exception_state) const;
-#else // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
- URLPatternResult* exec(const USVStringOrURLPatternInit& input,
- const String& base_url,
- ExceptionState& exception_state) const;
- URLPatternResult* exec(const USVStringOrURLPatternInit& input,
- ExceptionState& exception_state) const;
-#endif // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
String protocol() const;
String username() const;
@@ -77,51 +69,29 @@ class URLPattern : public ScriptWrappable {
String search() const;
String hash() const;
+ static int compareComponent(const V8URLPatternComponent& component,
+ const URLPattern* left,
+ const URLPattern* right);
+
void Trace(Visitor* visitor) const override;
private:
- // A utility function that takes a given |pattern| and compiles it into a
- // Component structure. If the |pattern| matches the given |default_pattern|
- // then nullptr may be returned without throwing an exception. In this case
- // the Component is not constructed and the nullptr value should be treated as
- // matching any input value for the component. The |component| string is used
- // for exception messages. The |encode_callback| will be used to validate and
- // encode plain text within the pattern during compilation. |options| control
- // how the pattern is compiled.
- static Component* CompilePattern(
- const String& pattern,
- StringView component,
- liburlpattern::EncodeCallback encode_callback,
- const liburlpattern::Options& options,
- ExceptionState& exception_state);
-
// A utility function to determine if a given |input| matches the pattern
// or not. Returns |true| if there is a match and |false| otherwise. If
// |result| is not nullptr then the URLPatternResult contents will be filled.
-#if defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
bool Match(const V8URLPatternInput* input,
const String& base_url,
URLPatternResult* result,
ExceptionState& exception_state) const;
-#else // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
- bool Match(const USVStringOrURLPatternInit& input,
- const String& base_url,
- URLPatternResult* result,
- ExceptionState& exception_state) const;
-#endif // defined(USE_BLINK_V8_BINDING_NEW_IDL_UNION)
// A utility function that constructs a URLPatternComponentResult for
- // a given |component|, |input|, and |group_list|. The |component| may
- // be nullptr.
- static URLPatternComponentResult* MakeComponentResult(
+ // a given |component|, |input|, and |group_list|.
+ static URLPatternComponentResult* MakeURLPatternComponentResult(
Component* component,
const String& input,
- const Vector<String>& group_list);
-
- static bool ShouldTreatAsStandardURL(Component* protocol);
+ const Vector<String>& group_values);
- // The compiled patterns for each URL component. If a Component member is
- // nullptr then it should be treated as a wildcard matching any input.
+ // The compiled patterns for each URL component.
Member<Component> protocol_;
Member<Component> username_;
Member<Component> password_;
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.idl b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.idl
index f3a619f43fd..e2722f0119f 100644
--- a/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.idl
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern.idl
@@ -4,19 +4,22 @@
typedef (USVString or URLPatternInit) URLPatternInput;
+enum URLPatternComponent { "protocol", "username", "password", "hostname",
+ "port", "pathname", "search", "hash" };
+
// https://wicg.github.io/urlpattern/
[
- SecureContext,
Exposed=(Window,Worker),
RuntimeEnabled=URLPattern
] interface URLPattern {
- [RaisesException] constructor(URLPatternInit init);
+ [RaisesException, Measure]
+ constructor(URLPatternInput input, optional USVString baseURL);
- [RaisesException]
+ [RaisesException, Measure]
boolean test(URLPatternInput input, optional USVString baseURL);
- [RaisesException]
- URLPatternResult exec(URLPatternInput input, optional USVString baseURL);
+ [RaisesException, Measure]
+ URLPatternResult? exec(URLPatternInput input, optional USVString baseURL);
readonly attribute USVString protocol;
readonly attribute USVString username;
@@ -26,4 +29,8 @@ typedef (USVString or URLPatternInit) URLPatternInput;
readonly attribute USVString pathname;
readonly attribute USVString search;
readonly attribute USVString hash;
+
+ [RuntimeEnabled=URLPatternCompareComponent, Measure]
+ static short compareComponent(URLPatternComponent component,
+ URLPattern left, URLPattern right);
};
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_canon.cc b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_canon.cc
new file mode 100644
index 00000000000..d05ff0666e7
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_canon.cc
@@ -0,0 +1,464 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "third_party/blink/renderer/modules/url_pattern/url_pattern_canon.h"
+
+#include "third_party/blink/renderer/modules/url_pattern/url_pattern_component.h"
+#include "third_party/blink/renderer/platform/bindings/exception_state.h"
+#include "third_party/blink/renderer/platform/weborigin/security_origin.h"
+#include "third_party/blink/renderer/platform/wtf/text/string_utf8_adaptor.h"
+#include "url/url_canon.h"
+#include "url/url_util.h"
+
+namespace blink {
+namespace url_pattern {
+
+namespace {
+
+String MaybeStripPrefix(const String& value, StringView prefix) {
+ if (value.StartsWith(prefix))
+ return value.Substring(1, value.length() - 1);
+ return value;
+}
+
+String MaybeStripSuffix(const String& value, StringView suffix) {
+ if (value.EndsWith(suffix))
+ return value.Substring(0, value.length() - 1);
+ return value;
+}
+
+String StringFromCanonOutput(const url::CanonOutput& output,
+ const url::Component& component) {
+ return String::FromUTF8(output.data() + component.begin, component.len);
+}
+
+std::string StdStringFromCanonOutput(const url::CanonOutput& output,
+ const url::Component& component) {
+ return std::string(output.data() + component.begin, component.len);
+}
+
+} // anonymous namespace
+
+absl::StatusOr<std::string> ProtocolEncodeCallback(absl::string_view input) {
+ if (input.empty())
+ return std::string();
+
+ url::RawCanonOutputT<char> canon_output;
+ url::Component component;
+
+ bool result = url::CanonicalizeScheme(
+ input.data(), url::Component(0, static_cast<int>(input.size())),
+ &canon_output, &component);
+
+ if (!result) {
+ return absl::InvalidArgumentError("Invalid protocol '" +
+ std::string(input) + "'.");
+ }
+
+ return StdStringFromCanonOutput(canon_output, component);
+}
+
+absl::StatusOr<std::string> UsernameEncodeCallback(absl::string_view input) {
+ if (input.empty())
+ return std::string();
+
+ url::RawCanonOutputT<char> canon_output;
+ url::Component username_component;
+ url::Component password_component;
+
+ bool result = url::CanonicalizeUserInfo(
+ input.data(), url::Component(0, static_cast<int>(input.size())), "",
+ url::Component(0, 0), &canon_output, &username_component,
+ &password_component);
+
+ if (!result) {
+ return absl::InvalidArgumentError("Invalid username pattern '" +
+ std::string(input) + "'.");
+ }
+
+ return StdStringFromCanonOutput(canon_output, username_component);
+}
+
+absl::StatusOr<std::string> PasswordEncodeCallback(absl::string_view input) {
+ if (input.empty())
+ return std::string();
+
+ url::RawCanonOutputT<char> canon_output;
+ url::Component username_component;
+ url::Component password_component;
+
+ bool result = url::CanonicalizeUserInfo(
+ "", url::Component(0, 0), input.data(),
+ url::Component(0, static_cast<int>(input.size())), &canon_output,
+ &username_component, &password_component);
+
+ if (!result) {
+ return absl::InvalidArgumentError("Invalid password pattern '" +
+ std::string(input) + "'.");
+ }
+
+ return StdStringFromCanonOutput(canon_output, password_component);
+}
+
+absl::StatusOr<std::string> HostnameEncodeCallback(absl::string_view input) {
+ if (input.empty())
+ return std::string();
+
+ url::RawCanonOutputT<char> canon_output;
+ url::Component component;
+
+ bool result = url::CanonicalizeHost(
+ input.data(), url::Component(0, static_cast<int>(input.size())),
+ &canon_output, &component);
+
+ if (!result) {
+ return absl::InvalidArgumentError("Invalid hostname pattern '" +
+ std::string(input) + "'.");
+ }
+
+ return StdStringFromCanonOutput(canon_output, component);
+}
+
+absl::StatusOr<std::string> PortEncodeCallback(absl::string_view input) {
+ if (input.empty())
+ return std::string();
+
+ url::RawCanonOutputT<char> canon_output;
+ url::Component component;
+
+ bool result = url::CanonicalizePort(
+ input.data(), url::Component(0, static_cast<int>(input.size())),
+ url::PORT_UNSPECIFIED, &canon_output, &component);
+
+ if (!result) {
+ return absl::InvalidArgumentError("Invalid port pattern '" +
+ std::string(input) + "'.");
+ }
+
+ return StdStringFromCanonOutput(canon_output, component);
+}
+
+absl::StatusOr<std::string> StandardURLPathnameEncodeCallback(
+ absl::string_view input) {
+ if (input.empty())
+ return std::string();
+
+ url::RawCanonOutputT<char> canon_output;
+ url::Component component;
+
+ bool result = url::CanonicalizePartialPath(
+ input.data(), url::Component(0, static_cast<int>(input.size())),
+ &canon_output, &component);
+
+ if (!result) {
+ return absl::InvalidArgumentError("Invalid pathname pattern '" +
+ std::string(input) + "'.");
+ }
+
+ return StdStringFromCanonOutput(canon_output, component);
+}
+
+absl::StatusOr<std::string> PathURLPathnameEncodeCallback(
+ absl::string_view input) {
+ if (input.empty())
+ return std::string();
+
+ url::RawCanonOutputT<char> canon_output;
+ url::Component component;
+
+ url::CanonicalizePathURLPath(
+ input.data(), url::Component(0, static_cast<int>(input.size())),
+ &canon_output, &component);
+
+ return StdStringFromCanonOutput(canon_output, component);
+}
+
+absl::StatusOr<std::string> SearchEncodeCallback(absl::string_view input) {
+ if (input.empty())
+ return std::string();
+
+ url::RawCanonOutputT<char> canon_output;
+ url::Component component;
+
+ url::CanonicalizeQuery(input.data(),
+ url::Component(0, static_cast<int>(input.size())),
+ /*converter=*/nullptr, &canon_output, &component);
+
+ return StdStringFromCanonOutput(canon_output, component);
+}
+
+absl::StatusOr<std::string> HashEncodeCallback(absl::string_view input) {
+ if (input.empty())
+ return std::string();
+
+ url::RawCanonOutputT<char> canon_output;
+ url::Component component;
+
+ url::CanonicalizeRef(input.data(),
+ url::Component(0, static_cast<int>(input.size())),
+ &canon_output, &component);
+
+ return StdStringFromCanonOutput(canon_output, component);
+}
+
+String CanonicalizeProtocol(const String& input,
+ ValueType type,
+ ExceptionState& exception_state) {
+ // We allow the protocol input to optionally contain a ":" suffix. Strip
+ // this for both URL and pattern protocols.
+ String stripped = MaybeStripSuffix(input, ":");
+
+ if (type == ValueType::kPattern) {
+ // Canonicalization for patterns is handled during compilation via
+ // encoding callbacks.
+ return stripped;
+ }
+
+ bool result = false;
+ url::RawCanonOutputT<char> canon_output;
+ url::Component component;
+ if (stripped.Is8Bit()) {
+ StringUTF8Adaptor utf8(stripped);
+ result = url::CanonicalizeScheme(
+ utf8.data(), url::Component(0, utf8.size()), &canon_output, &component);
+ } else {
+ result = url::CanonicalizeScheme(stripped.Characters16(),
+ url::Component(0, stripped.length()),
+ &canon_output, &component);
+ }
+
+ if (!result) {
+ exception_state.ThrowTypeError("Invalid protocol '" + stripped + "'.");
+ return String();
+ }
+
+ return StringFromCanonOutput(canon_output, component);
+}
+
+void CanonicalizeUsernameAndPassword(const String& username,
+ const String& password,
+ ValueType type,
+ String& username_out,
+ String& password_out,
+ ExceptionState& exception_state) {
+ if (type == ValueType::kPattern) {
+ // Canonicalization for patterns is handled during compilation via
+ // encoding callbacks.
+ username_out = username;
+ password_out = password;
+ return;
+ }
+
+ bool result = false;
+ url::RawCanonOutputT<char> canon_output;
+ url::Component username_component;
+ url::Component password_component;
+
+ if (username && password && username.Is8Bit() && password.Is8Bit()) {
+ StringUTF8Adaptor username_utf8(username);
+ StringUTF8Adaptor password_utf8(password);
+ result = url::CanonicalizeUserInfo(
+ username_utf8.data(), url::Component(0, username_utf8.size()),
+ password_utf8.data(), url::Component(0, password_utf8.size()),
+ &canon_output, &username_component, &password_component);
+
+ } else {
+ String username16(username);
+ String password16(password);
+ username16.Ensure16Bit();
+ password16.Ensure16Bit();
+ result = url::CanonicalizeUserInfo(
+ username16.Characters16(), url::Component(0, username16.length()),
+ password16.Characters16(), url::Component(0, password16.length()),
+ &canon_output, &username_component, &password_component);
+ }
+
+ if (!result) {
+ exception_state.ThrowTypeError("Invalid username '" + username +
+ "' and/or password '" + password + "'.");
+ return;
+ }
+
+ if (username_component.len != -1)
+ username_out = StringFromCanonOutput(canon_output, username_component);
+ if (password_component.len != -1)
+ password_out = StringFromCanonOutput(canon_output, password_component);
+}
+
+String CanonicalizeHostname(const String& input,
+ ValueType type,
+ ExceptionState& exception_state) {
+ if (type == ValueType::kPattern) {
+ // Canonicalization for patterns is handled during compilation via
+ // encoding callbacks.
+ return input;
+ }
+
+ bool success = false;
+ String result = SecurityOrigin::CanonicalizeHost(input, &success);
+ if (!success) {
+ exception_state.ThrowTypeError("Invalid hostname '" + input + "'.");
+ return String();
+ }
+
+ return result;
+}
+
+String CanonicalizePort(const String& input,
+ ValueType type,
+ const String& protocol,
+ ExceptionState& exception_state) {
+ if (type == ValueType::kPattern) {
+ // Canonicalization for patterns is handled during compilation via
+ // encoding callbacks.
+ return input;
+ }
+
+ int default_port = url::PORT_UNSPECIFIED;
+ if (!input.IsEmpty()) {
+ StringUTF8Adaptor protocol_utf8(protocol);
+ default_port =
+ url::DefaultPortForScheme(protocol_utf8.data(), protocol_utf8.size());
+ }
+
+ // Since ports only consist of digits there should be no encoding needed.
+ // Therefore we directly use the UTF8 encoding version of CanonicalizePort().
+ StringUTF8Adaptor utf8(input);
+ url::RawCanonOutputT<char> canon_output;
+ url::Component component;
+ if (!url::CanonicalizePort(utf8.data(), url::Component(0, utf8.size()),
+ default_port, &canon_output, &component)) {
+ exception_state.ThrowTypeError("Invalid port '" + input + "'.");
+ return String();
+ }
+
+ return component.len == -1 ? g_empty_string
+ : StringFromCanonOutput(canon_output, component);
+}
+
+String CanonicalizePathname(const String& protocol,
+ const String& input,
+ ValueType type,
+ ExceptionState& exception_state) {
+ if (type == ValueType::kPattern) {
+ // Canonicalization for patterns is handled during compilation via
+ // encoding callbacks.
+ return input;
+ }
+
+ // Determine if we are using "standard" or "path" URL canonicalization
+ // for the pathname. In spec terms the "path" URL behavior corresponds
+ // to "cannot-be-a-base" URLs. We make this determination based on the
+ // protocol string since we cannot look at the number of slashes between
+ // components like the URL spec. If this is inadequate the developer
+ // can use the baseURL property to get more strict URL behavior.
+ //
+ // We default to "standard" URL behavior to match how the empty protocol
+ // string in the URLPattern constructor results in the pathname pattern
+ // getting "standard" URL canonicalization.
+ bool standard = false;
+ if (protocol.IsEmpty()) {
+ standard = true;
+ } else if (protocol.Is8Bit()) {
+ StringUTF8Adaptor utf8(protocol);
+ standard = url::IsStandard(utf8.data(), url::Component(0, utf8.size()));
+ } else {
+ standard = url::IsStandard(protocol.Characters16(),
+ url::Component(0, protocol.length()));
+ }
+
+ // Do not enforce absolute pathnames here since we can't enforce it
+ // it consistently in the URLPattern constructor. This allows us to
+ // produce a match when the exact same fixed pathname string is passed
+ // to both the constructor and test()/exec(). Similarly, we use
+ // url::CanonicalizePartialPath() below instead of url::CanonicalizePath()
+ // to avoid pre-pending a slash at the start of the string.
+
+ bool result = false;
+ url::RawCanonOutputT<char> canon_output;
+ url::Component component;
+
+ const auto canonicalize_path = [&](const auto* data, int length) {
+ if (standard) {
+ return url::CanonicalizePartialPath(data, url::Component(0, length),
+ &canon_output, &component);
+ }
+ url::CanonicalizePathURLPath(data, url::Component(0, length), &canon_output,
+ &component);
+ return true;
+ };
+
+ if (input.Is8Bit()) {
+ StringUTF8Adaptor utf8(input);
+ result = canonicalize_path(utf8.data(), utf8.size());
+ } else {
+ result = canonicalize_path(input.Characters16(), input.length());
+ }
+
+ if (!result) {
+ exception_state.ThrowTypeError("Invalid pathname '" + input + "'.");
+ return String();
+ }
+
+ return StringFromCanonOutput(canon_output, component);
+}
+
+String CanonicalizeSearch(const String& input,
+ ValueType type,
+ ExceptionState& exception_state) {
+ // We allow the search input to optionally contain a "?" prefix. Strip
+ // this for both URL and pattern protocols.
+ String stripped = MaybeStripPrefix(input, "?");
+
+ if (type == ValueType::kPattern) {
+ // Canonicalization for patterns is handled during compilation via
+ // encoding callbacks.
+ return stripped;
+ }
+
+ url::RawCanonOutputT<char> canon_output;
+ url::Component component;
+ if (stripped.Is8Bit()) {
+ StringUTF8Adaptor utf8(stripped);
+ url::CanonicalizeQuery(utf8.data(), url::Component(0, utf8.size()),
+ /*converter=*/nullptr, &canon_output, &component);
+ } else {
+ url::CanonicalizeQuery(stripped.Characters16(),
+ url::Component(0, stripped.length()),
+ /*converter=*/nullptr, &canon_output, &component);
+ }
+
+ return StringFromCanonOutput(canon_output, component);
+}
+
+String CanonicalizeHash(const String& input,
+ ValueType type,
+ ExceptionState& exception_state) {
+ // We allow the hash input to optionally contain a "#" prefix. Strip
+ // this for both URL and pattern protocols.
+ String stripped = MaybeStripPrefix(input, "#");
+
+ if (type == ValueType::kPattern) {
+ // Canonicalization for patterns is handled during compilation via
+ // encoding callbacks.
+ return stripped;
+ }
+
+ url::RawCanonOutputT<char> canon_output;
+ url::Component component;
+ if (stripped.Is8Bit()) {
+ StringUTF8Adaptor utf8(stripped);
+ url::CanonicalizeRef(utf8.data(), url::Component(0, utf8.size()),
+ &canon_output, &component);
+ } else {
+ url::CanonicalizeRef(stripped.Characters16(),
+ url::Component(0, stripped.length()), &canon_output,
+ &component);
+ }
+
+ return StringFromCanonOutput(canon_output, component);
+}
+
+} // namespace url_pattern
+} // namespace blink
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_canon.h b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_canon.h
new file mode 100644
index 00000000000..eb6f3b38582
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_canon.h
@@ -0,0 +1,83 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_URL_PATTERN_URL_PATTERN_CANON_H_
+#define THIRD_PARTY_BLINK_RENDERER_MODULES_URL_PATTERN_URL_PATTERN_CANON_H_
+
+#include "third_party/abseil-cpp/absl/status/statusor.h"
+#include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
+
+namespace blink {
+
+class ExceptionState;
+
+namespace url_pattern {
+
+// An enum indicating whether the associated component values to be operated
+// on are for patterns or URLs. Validation and canonicalization will
+// do different things depending on the type.
+enum class ValueType {
+ kPattern,
+ kURL,
+};
+
+// The following functions are callbacks that may be passed to the
+// liburlpattern::Parse() method. Each performs validation and encoding for
+// a different URL component.
+//
+// Note that there are two different pathname callbacks for "standard" URLs
+// like `https://foo` // vs "path" URLs like `data:foo`. Select the correct
+// callback by calling `ShouldTreatAsStandardURL()`.
+absl::StatusOr<std::string> ProtocolEncodeCallback(absl::string_view input);
+absl::StatusOr<std::string> UsernameEncodeCallback(absl::string_view input);
+absl::StatusOr<std::string> PasswordEncodeCallback(absl::string_view input);
+absl::StatusOr<std::string> HostnameEncodeCallback(absl::string_view input);
+absl::StatusOr<std::string> PortEncodeCallback(absl::string_view input);
+absl::StatusOr<std::string> StandardURLPathnameEncodeCallback(
+ absl::string_view input);
+absl::StatusOr<std::string> PathURLPathnameEncodeCallback(
+ absl::string_view input);
+absl::StatusOr<std::string> SearchEncodeCallback(absl::string_view input);
+absl::StatusOr<std::string> HashEncodeCallback(absl::string_view input);
+
+// Utility functions to canonicalize different component strings. They will
+// throw an exception if the input is invalid. The canonicalization and/or
+// validation will only be applied if the `type` is kURL. These functions
+// simply pass through the value when the `type` is kPattern. Encoding is
+// for patterns are handled later during compilation via the encode callbacks
+// above.
+//
+// The result is returned, except for `CanonicalizeUsernameAndPassword` which
+// uses separate out parameters for the resulting username and password.
+String CanonicalizeProtocol(const String& input,
+ ValueType type,
+ ExceptionState& exception_state);
+void CanonicalizeUsernameAndPassword(const String& username,
+ const String& password,
+ ValueType type,
+ String& username_out,
+ String& password_out,
+ ExceptionState& exception_state);
+String CanonicalizeHostname(const String& input,
+ ValueType type,
+ ExceptionState& exception_state);
+String CanonicalizePort(const String& input,
+ ValueType type,
+ const String& protocol,
+ ExceptionState& exception_state);
+String CanonicalizePathname(const String& protocol,
+ const String& input,
+ ValueType type,
+ ExceptionState& exception_state);
+String CanonicalizeSearch(const String& input,
+ ValueType type,
+ ExceptionState& exception_state);
+String CanonicalizeHash(const String& input,
+ ValueType type,
+ ExceptionState& exception_state);
+
+} // namespace url_pattern
+} // namespace blink
+
+#endif // THIRD_PARTY_BLINK_RENDERER_MODULES_URL_PATTERN_URL_PATTERN_CANON_H_
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_component.cc b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_component.cc
new file mode 100644
index 00000000000..8e029755c12
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_component.cc
@@ -0,0 +1,390 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "third_party/blink/renderer/modules/url_pattern/url_pattern_component.h"
+
+#include "base/ranges/algorithm.h"
+#include "base/strings/string_util.h"
+#include "third_party/blink/renderer/modules/url_pattern/url_pattern_canon.h"
+#include "third_party/blink/renderer/platform/bindings/exception_state.h"
+#include "third_party/blink/renderer/platform/wtf/text/string_utf8_adaptor.h"
+#include "url/url_util.h"
+
+namespace blink {
+namespace url_pattern {
+
+namespace {
+
+// Utility method to convert a type to a string.
+StringView TypeToString(Component::Type type) {
+ switch (type) {
+ case Component::Type::kProtocol:
+ return "protocol";
+ case Component::Type::kUsername:
+ return "username";
+ case Component::Type::kPassword:
+ return "password";
+ case Component::Type::kHostname:
+ return "hostname";
+ case Component::Type::kPort:
+ return "port";
+ case Component::Type::kPathname:
+ return "pathname";
+ case Component::Type::kSearch:
+ return "search";
+ case Component::Type::kHash:
+ return "hash";
+ }
+ NOTREACHED();
+}
+
+// Utility method to get the correct encoding callback for a given type.
+liburlpattern::EncodeCallback GetEncodeCallback(Component::Type type,
+ Component* protocol_component) {
+ switch (type) {
+ case Component::Type::kProtocol:
+ return ProtocolEncodeCallback;
+ case Component::Type::kUsername:
+ return UsernameEncodeCallback;
+ case Component::Type::kPassword:
+ return PasswordEncodeCallback;
+ case Component::Type::kHostname:
+ return HostnameEncodeCallback;
+ case Component::Type::kPort:
+ return PortEncodeCallback;
+ case Component::Type::kPathname:
+ // Different types of URLs use different canonicalization for pathname.
+ // A "standard" URL flattens `.`/`..` and performs full percent encoding.
+ // A "path" URL does not flatten and uses a more lax percent encoding.
+ // The spec calls "path" URLs as "cannot-be-a-base-URL" URLs:
+ //
+ // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
+ //
+ // We prefer "standard" URL here by checking to see if the protocol
+ // pattern matches any of the known standard protocol strings. So
+ // an exact pattern of `http` will match, but so will `http{s}?` and
+ // `*`.
+ //
+ // If the protocol pattern does not match any of the known standard URL
+ // protocols then we fall back to the "path" URL behavior. This will
+ // normally be triggered by `data`, `javascript`, `about`, etc. It
+ // will also be triggered for custom protocol strings. We favor "path"
+ // behavior here because its better to under canonicalize since the
+ // developer can always manually canonicalize the pathname for a custom
+ // protocol.
+ //
+ // ShouldTreatAsStandardURL can by a bit expensive, so only do it if we
+ // actually have a pathname pattern to compile.
+ CHECK(protocol_component);
+ if (protocol_component->ShouldTreatAsStandardURL())
+ return StandardURLPathnameEncodeCallback;
+ else
+ return PathURLPathnameEncodeCallback;
+ case Component::Type::kSearch:
+ return SearchEncodeCallback;
+ case Component::Type::kHash:
+ return HashEncodeCallback;
+ }
+ NOTREACHED();
+}
+
+// Utility method to get the correct liburlpattern parse options for a given
+// type.
+const liburlpattern::Options& GetOptions(Component::Type type) {
+ using liburlpattern::Options;
+
+ // The liburlpattern::Options to use for most component patterns. We
+ // default to strict mode and case sensitivity. In addition, most
+ // components have no concept of a delimiter or prefix character.
+ DEFINE_THREAD_SAFE_STATIC_LOCAL(Options, default_options,
+ ({.delimiter_list = "",
+ .prefix_list = "",
+ .sensitive = true,
+ .strict = true}));
+
+ // The liburlpattern::Options to use for hostname patterns. This uses a
+ // "." delimiter controlling how far a named group like ":bar" will match
+ // by default. Note, hostnames are case insensitive but we require case
+ // sensitivity here. This assumes that the hostname values have already
+ // been normalized to lower case as in URL().
+ DEFINE_THREAD_SAFE_STATIC_LOCAL(Options, hostname_options,
+ ({.delimiter_list = ".",
+ .prefix_list = "",
+ .sensitive = true,
+ .strict = true}));
+
+ // The liburlpattern::Options to use for pathname patterns. This uses a
+ // "/" delimiter controlling how far a named group like ":bar" will match
+ // by default. It also configures "/" to be treated as an automatic
+ // prefix before groups.
+ DEFINE_THREAD_SAFE_STATIC_LOCAL(Options, pathname_options,
+ ({.delimiter_list = "/",
+ .prefix_list = "/",
+ .sensitive = true,
+ .strict = true}));
+
+ switch (type) {
+ case Component::Type::kHostname:
+ return hostname_options;
+ case Component::Type::kPathname:
+ return pathname_options;
+ case Component::Type::kProtocol:
+ case Component::Type::kUsername:
+ case Component::Type::kPassword:
+ case Component::Type::kPort:
+ case Component::Type::kSearch:
+ case Component::Type::kHash:
+ return default_options;
+ }
+ NOTREACHED();
+}
+
+// Utility function to return a statically allocated Part list.
+const std::vector<liburlpattern::Part>& GetWildcardOnlyPartList() {
+ using liburlpattern::Modifier;
+ using liburlpattern::Part;
+ using liburlpattern::PartType;
+ DEFINE_THREAD_SAFE_STATIC_LOCAL(
+ std::vector<Part>, instance,
+ ({Part(PartType::kFullWildcard,
+ /*name=*/"",
+ /*prefix=*/"", /*value=*/"", /*suffix=*/"", Modifier::kNone)}));
+ return instance;
+}
+
+int ComparePart(const liburlpattern::Part& lh, const liburlpattern::Part& rh) {
+ // We prioritize PartType in the ordering so we can favor fixed text. The
+ // type ordering is:
+ //
+ // kFixed > kRegex > kSegmentWildcard > kFullWildcard.
+ //
+ // We considered kRegex greater than the wildcards because it is likely to be
+ // used for imposing some constraint and not just duplicating wildcard
+ // behavior.
+ //
+ // This comparison depends on the PartType enum in liburlpattern having the
+ // correct corresponding numeric values.
+ //
+ // Next the Modifier is considered:
+ //
+ // kNone > kOneOrMore > kOptional > kZeroOrMore.
+ //
+ // The rationale here is that requring the match group to exist is more
+ // restrictive then making it optional and requiring an exact count is more
+ // restrictive than repeating.
+ //
+ // This comparison depends on the Modifier enum in liburlpattern having the
+ // correct corresponding numeric values.
+ //
+ // Finally we lexicographically compare the text components from left to
+ // right; `prefix`, `value`, and `suffix`. Its ok to depend on simple
+ // byte-wise string comparison here because the values have all been URL
+ // encoded. This guarantees the strings contain only ASCII.
+ auto left = std::tie(lh.type, lh.modifier, lh.prefix, lh.value, lh.suffix);
+ auto right = std::tie(rh.type, rh.modifier, rh.prefix, rh.value, rh.suffix);
+ if (left < right)
+ return -1;
+ else if (left == right)
+ return 0;
+ else
+ return 1;
+}
+
+// Utility method to compare two part lists.
+int ComparePartList(const std::vector<liburlpattern::Part>& lh,
+ const std::vector<liburlpattern::Part>& rh) {
+ using liburlpattern::Modifier;
+ using liburlpattern::Part;
+ using liburlpattern::PartType;
+
+ // Begin by comparing each Part in the lists with each other. If any
+ // are not equal, then we are done.
+ size_t i = 0;
+ for (; i < lh.size() && i < rh.size(); ++i) {
+ int r = ComparePart(lh[i], rh[i]);
+ if (r)
+ return r;
+ }
+
+ // We reached the end of at least one of the lists without finding a
+ // difference. However, we must handle the case where one list is longer
+ // than the other. In this case we compare the next Part from the
+ // longer list to a synthetically created empty kFixed Part. This is
+ // necessary in order for "/foo/" to be considered more restrictive, and
+ // therefore greater, than "/foo/*".
+ if (i == lh.size() && i != rh.size())
+ return ComparePart(Part(PartType::kFixed, "", Modifier::kNone), rh[i]);
+ else if (i != lh.size() && i == rh.size())
+ return ComparePart(lh[i], Part(PartType::kFixed, "", Modifier::kNone));
+
+ // No differences were found, so declare them equal.
+ return 0;
+}
+
+} // anonymous namespace
+
+// static
+Component* Component::Compile(const String& pattern,
+ Type type,
+ Component* protocol_component,
+ ExceptionState& exception_state) {
+ // If the pattern is null then return a special Component object that matches
+ // any input as if the pattern was `*`.
+ if (pattern.IsNull()) {
+ return MakeGarbageCollected<Component>(type, base::PassKey<Component>());
+ }
+
+ const liburlpattern::Options& options = GetOptions(type);
+
+ // Parse the pattern.
+ StringUTF8Adaptor utf8(pattern);
+ auto parse_result = liburlpattern::Parse(
+ absl::string_view(utf8.data(), utf8.size()),
+ GetEncodeCallback(type, protocol_component), options);
+ if (!parse_result.ok()) {
+ exception_state.ThrowTypeError(
+ "Invalid " + TypeToString(type) + " pattern '" + pattern + "'. " +
+ String::FromUTF8(parse_result.status().message().data(),
+ parse_result.status().message().size()));
+ return nullptr;
+ }
+
+ // Extract a regular expression string from the parsed pattern.
+ std::vector<std::string> name_list;
+ std::string regexp_string =
+ parse_result.value().GenerateRegexString(&name_list);
+
+ // Compile the regular expression to verify it is valid.
+ auto case_sensitive = options.sensitive ? WTF::kTextCaseSensitive
+ : WTF::kTextCaseASCIIInsensitive;
+ DCHECK(base::IsStringASCII(regexp_string));
+ ScriptRegexp* regexp = MakeGarbageCollected<ScriptRegexp>(
+ String(regexp_string.data(), regexp_string.size()), case_sensitive,
+ kMultilineDisabled, ScriptRegexp::UTF16);
+ if (!regexp->IsValid()) {
+ // The regular expression failed to compile. This means that some
+ // custom regexp group within the pattern is illegal. Attempt to
+ // compile each regexp group individually in order to identify the
+ // culprit.
+ for (auto& part : parse_result.value().PartList()) {
+ if (part.type != liburlpattern::PartType::kRegex)
+ continue;
+ DCHECK(base::IsStringASCII(part.value));
+ String group_value(part.value.data(), part.value.size());
+ regexp = MakeGarbageCollected<ScriptRegexp>(
+ group_value, case_sensitive, kMultilineDisabled, ScriptRegexp::UTF16);
+ if (regexp->IsValid())
+ continue;
+ exception_state.ThrowTypeError("Invalid " + TypeToString(type) +
+ " pattern '" + pattern +
+ "'. Custom regular expression group '" +
+ group_value + "' is invalid.");
+ return nullptr;
+ }
+ // We couldn't find a bad regexp group, but we still have an overall
+ // error. This shouldn't happen, but we handle it anyway.
+ exception_state.ThrowTypeError("Invalid " + TypeToString(type) +
+ " pattern '" + pattern +
+ "'. An unexpected error has occurred.");
+ return nullptr;
+ }
+
+ Vector<String> wtf_name_list;
+ wtf_name_list.ReserveInitialCapacity(
+ static_cast<wtf_size_t>(name_list.size()));
+ for (const auto& name : name_list) {
+ wtf_name_list.push_back(String::FromUTF8(name.data(), name.size()));
+ }
+
+ return MakeGarbageCollected<Component>(
+ type, std::move(parse_result.value()), std::move(regexp),
+ std::move(wtf_name_list), base::PassKey<Component>());
+}
+
+// static
+int Component::Compare(const Component& lh, const Component& rh) {
+ using liburlpattern::Modifier;
+ using liburlpattern::Part;
+ using liburlpattern::PartType;
+
+ // If both the left and right components are empty wildcards, then they are
+ // effectively equal.
+ if (!lh.pattern_.has_value() && !rh.pattern_.has_value())
+ return 0;
+
+ // If one side has a real pattern and the other side is an empty component,
+ // then we have to compare to a part list with a single full wildcard.
+ if (lh.pattern_.has_value() && !rh.pattern_.has_value()) {
+ return ComparePartList(lh.pattern_->PartList(), GetWildcardOnlyPartList());
+ }
+
+ if (!lh.pattern_.has_value() && rh.pattern_.has_value()) {
+ return ComparePartList(GetWildcardOnlyPartList(), rh.pattern_->PartList());
+ }
+
+ // Otherwise compare the part lists of the patterns on each side.
+ return ComparePartList(lh.pattern_->PartList(), rh.pattern_->PartList());
+}
+
+Component::Component(Type type,
+ liburlpattern::Pattern pattern,
+ ScriptRegexp* regexp,
+ Vector<String> name_list,
+ base::PassKey<Component> key)
+ : type_(type),
+ pattern_(std::move(pattern)),
+ regexp_(regexp),
+ name_list_(std::move(name_list)) {}
+
+Component::Component(Type type, base::PassKey<Component> key)
+ : type_(type), name_list_({"0"}) {}
+
+bool Component::Match(StringView input, Vector<String>* group_list) const {
+ if (regexp_) {
+ return regexp_->Match(input, /*start_from=*/0, /*match_length=*/nullptr,
+ group_list) == 0;
+ } else {
+ if (group_list)
+ group_list->push_back(input.ToString());
+ return true;
+ }
+}
+
+String Component::GeneratePatternString() const {
+ if (pattern_.has_value())
+ return String::FromUTF8(pattern_->GeneratePatternString());
+ else
+ return "*";
+}
+
+Vector<std::pair<String, String>> Component::MakeGroupList(
+ const Vector<String>& group_values) const {
+ DCHECK_EQ(name_list_.size(), group_values.size());
+ Vector<std::pair<String, String>> result;
+ result.ReserveInitialCapacity(group_values.size());
+ for (wtf_size_t i = 0; i < group_values.size(); ++i) {
+ result.emplace_back(name_list_[i], group_values[i]);
+ }
+ return result;
+}
+
+bool Component::ShouldTreatAsStandardURL() const {
+ DCHECK(type_ == Type::kProtocol);
+ if (!pattern_.has_value())
+ return true;
+ const auto protocol_matches = [&](const std::string& scheme) {
+ DCHECK(base::IsStringASCII(scheme));
+ return Match(
+ StringView(scheme.data(), static_cast<unsigned>(scheme.size())),
+ /*group_list=*/nullptr);
+ };
+ return base::ranges::any_of(url::GetStandardSchemes(), protocol_matches);
+}
+
+void Component::Trace(Visitor* visitor) const {
+ visitor->Trace(regexp_);
+}
+
+} // namespace url_pattern
+} // namespace blink
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_component.h b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_component.h
new file mode 100644
index 00000000000..31ca11e1f62
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_component.h
@@ -0,0 +1,113 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_URL_PATTERN_URL_PATTERN_COMPONENT_H_
+#define THIRD_PARTY_BLINK_RENDERER_MODULES_URL_PATTERN_URL_PATTERN_COMPONENT_H_
+
+#include "base/types/pass_key.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+#include "third_party/blink/renderer/bindings/core/v8/script_regexp.h"
+#include "third_party/blink/renderer/platform/heap/heap.h"
+#include "third_party/blink/renderer/platform/heap/member.h"
+#include "third_party/blink/renderer/platform/heap/trace_traits.h"
+#include "third_party/blink/renderer/platform/wtf/vector.h"
+#include "third_party/liburlpattern/parse.h"
+#include "third_party/liburlpattern/pattern.h"
+
+namespace blink {
+
+class ExceptionState;
+
+namespace url_pattern {
+
+// A struct representing all the information needed to match a particular
+// component of a URL.
+class Component final : public GarbageCollected<Component> {
+ public:
+ // Enumeration defining the different types of components. Each component
+ // type uses a slightly different kind of character encoding. In addition,
+ // different component types using different liburlpattern parse options.
+ enum class Type {
+ kProtocol,
+ kUsername,
+ kPassword,
+ kHostname,
+ kPort,
+ kPathname,
+ kSearch,
+ kHash,
+ };
+
+ // A utility function that takes a given `pattern` and compiles it into a
+ // Component structure. If the `pattern` is null then nullptr
+ // may be returned without throwing an exception. In this case the
+ // Component is not constructed and the nullptr value should be
+ // treated as matching any input value for the component. The `type`
+ // specifies which URL component is the pattern is being compiled for. This
+ // will select the correct encoding callback, liburlpattern options, and
+ // populate errors messages with the correct component string.
+ static Component* Compile(const String& pattern,
+ Type type,
+ Component* protocol_component,
+ ExceptionState& exception_state);
+
+ // Compare the pattern strings in the two given components. This provides a
+ // mostly lexicographical ordering based on fixed text in the patterns.
+ // Matching groups and modifiers are treated such that more restrictive
+ // patterns are greater in value. Group names are not considered in the
+ // comparison.
+ static int Compare(const Component& lh, const Component& rh);
+
+ // Constructs a Component with a real `pattern` that compiled to the given
+ // `regexp`.
+ Component(Type type,
+ liburlpattern::Pattern pattern,
+ ScriptRegexp* regexp,
+ Vector<String> name_list,
+ base::PassKey<Component> key);
+
+ // Constructs an empty Component that matches any input as if it had the
+ // pattern `*`.
+ Component(Type type, base::PassKey<Component> key);
+
+ // Match the given `input` against the component pattern. Returns `true`
+ // if there is a match. If `group_list` is not nullptr, then it will be
+ // populated with group values captured by the pattern.
+ bool Match(StringView input, Vector<String>* group_list) const;
+
+ // Convert the compiled component pattern back into a pattern string. This
+ // will be functionally equivalent to the original, but may differ based on
+ // canonicalization that occurred during parsing.
+ String GeneratePatternString() const;
+
+ // Combines the given list of group values with the group names specified in
+ // the original pattern. The return result is a vector of name:value tuples.
+ Vector<std::pair<String, String>> MakeGroupList(
+ const Vector<String>& group_values) const;
+
+ // Method to determine if the URL associated with this component should be
+ // treated as a "standard" URL like `https://foo` vs a "path" URL like
+ // `data:foo`. This should only be called for kProtocol components.
+ bool ShouldTreatAsStandardURL() const;
+
+ void Trace(Visitor* visitor) const;
+
+ private:
+ const Type type_;
+
+ // The parsed pattern.
+ const absl::optional<liburlpattern::Pattern> pattern_;
+
+ // The pattern compiled down to a js regular expression.
+ const Member<ScriptRegexp> regexp_;
+
+ // The names to be applied to the regular expression capture groups. Note,
+ // liburlpattern regular expressions do not use named capture groups directly.
+ const Vector<String> name_list_;
+};
+
+} // namespace url_pattern
+} // namespace blink
+
+#endif // THIRD_PARTY_BLINK_RENDERER_MODULES_URL_PATTERN_URL_PATTERN_COMPONENT_H_
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_fuzzer.cc b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_fuzzer.cc
new file mode 100644
index 00000000000..9760a6d6039
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_fuzzer.cc
@@ -0,0 +1,29 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "third_party/blink/renderer/bindings/modules/v8/v8_union_urlpatterninit_usvstring.h"
+#include "third_party/blink/renderer/modules/url_pattern/url_pattern.h"
+#include "third_party/blink/renderer/platform/bindings/exception_state.h"
+#include "third_party/blink/renderer/platform/bindings/v8_per_isolate_data.h"
+#include "third_party/blink/renderer/platform/testing/blink_fuzzer_test_support.h"
+#include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
+
+namespace blink {
+
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ static BlinkFuzzerTestSupport test_support = BlinkFuzzerTestSupport();
+ DummyExceptionStateForTesting exception_state;
+ auto* input = MakeGarbageCollected<V8URLPatternInput>(
+ String::FromUTF8(reinterpret_cast<const char*>(data), size));
+ URLPattern::Create(input, exception_state);
+ V8PerIsolateData::MainThreadIsolate()->RequestGarbageCollectionForTesting(
+ v8::Isolate::kFullGarbageCollection);
+ return 0;
+}
+
+} // namespace blink
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ return blink::LLVMFuzzerTestOneInput(data, size);
+}
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_parser.cc b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_parser.cc
new file mode 100644
index 00000000000..22a34bb3b0d
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_parser.cc
@@ -0,0 +1,453 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "third_party/blink/renderer/modules/url_pattern/url_pattern_parser.h"
+
+#include "base/notreached.h"
+#include "third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_init.h"
+#include "third_party/blink/renderer/modules/url_pattern/url_pattern_component.h"
+#include "third_party/blink/renderer/platform/bindings/exception_state.h"
+#include "third_party/blink/renderer/platform/wtf/text/string_utf8_adaptor.h"
+#include "third_party/liburlpattern/tokenize.h"
+
+namespace blink {
+namespace url_pattern {
+
+Parser::Parser(const String& input) : input_(input), utf8_(input) {}
+
+void Parser::Parse(ExceptionState& exception_state) {
+ DCHECK_EQ(state_, StringParseState::kInit);
+ DCHECK_EQ(token_index_, 0u);
+
+ auto tokenize_result =
+ liburlpattern::Tokenize(absl::string_view(utf8_.data(), utf8_.size()),
+ liburlpattern::TokenizePolicy::kLenient);
+ if (!tokenize_result.ok()) {
+ // This should not happen with kLenient mode, but we handle it anyway.
+ exception_state.ThrowTypeError("Invalid input string '" + input_ +
+ "'. It unexpectedly fails to tokenize.");
+ return;
+ }
+
+ token_list_ = std::move(tokenize_result.value());
+ result_ = MakeGarbageCollected<URLPatternInit>();
+
+ // When constructing a pattern using structured input like
+ // `new URLPattern({ pathname: 'foo' })` any missing components will be
+ // defaulted to wildcards. In the constructor string case, however, all
+ // components are precisely defined as either empty string or a longer
+ // value. This is due to there being no way to simply "leave out" a
+ // component when writing a URL. The behavior also matches the URL
+ // constructor.
+ //
+ // To implement this we initialize components to the empty string in advance.
+ //
+ // We can't, however, do this immediately. We want to allow the baseURL to
+ // provide information for relative URLs, so we only want to set the default
+ // empty string values for components following the first component in the
+ // relative URL.
+ //
+ // We therefore wait to set the default component values until after we exit
+ // the kInit state and have determined if we are in relative or absolute mode.
+
+ // Iterate through the list of tokens and update our state machine as we go.
+ for (; token_index_ < token_list_.size(); token_index_ += token_increment_) {
+ // Reset back to our default `token_increment_` value.
+ token_increment_ = 1;
+
+ // All states must respect the end of the token list. The liburlpattern
+ // tokenizer guarantees that the last token will have the type `kEnd`.
+ if (token_list_[token_index_].type == liburlpattern::TokenType::kEnd) {
+ // If we failed to find a protocol terminator then we are still in
+ // relative mode. We now need to determine the first component of the
+ // relative URL.
+ if (state_ == StringParseState::kInit) {
+ // Reset back to the start of the input string.
+ Rewind();
+
+ // If the string begins with `?` then its a relative search component.
+ // If it starts with `#` then its a relative hash component. Otherwise
+ // its a relative pathname.
+ //
+ // In each case we initialize any components following the initial
+ // component to be empty string.
+ if (IsHashPrefix()) {
+ ChangeState(StringParseState::kHash, Skip(1));
+ } else if (IsSearchPrefix()) {
+ ChangeState(StringParseState::kSearch, Skip(1));
+ result_->setHash(g_empty_string);
+ } else {
+ ChangeState(StringParseState::kPathname, Skip(0));
+ result_->setSearch(g_empty_string);
+ result_->setHash(g_empty_string);
+ }
+ continue;
+ }
+
+ // If we failed to find an `@`, then there is no username and password.
+ // We should rewind and process the data as a hostname.
+ else if (state_ == StringParseState::kAuthority) {
+ RewindAndSetState(StringParseState::kHostname);
+ continue;
+ }
+
+ ChangeState(StringParseState::kDone, Skip(0));
+ break;
+ }
+
+ // In addition, all states must handle pattern groups. We do not permit
+ // a component to end in the middle of a pattern group. Therefore we skip
+ // past any tokens that are within `{` and `}`. Note, the tokenizer
+ // handles grouping `(` and `)` and `:foo` groups for us automatically, so
+ // we don't need special code for them here.
+ if (group_depth_ > 0) {
+ if (IsGroupClose())
+ group_depth_ -= 1;
+ else
+ continue;
+ }
+
+ if (IsGroupOpen()) {
+ group_depth_ += 1;
+ continue;
+ }
+
+ switch (state_) {
+ case StringParseState::kInit:
+ if (IsProtocolSuffix()) {
+ // We are in absolute mode and we know values will not be inherited
+ // from a base URL. Therefore initialize the rest of the components
+ // to the empty string.
+ result_->setUsername(g_empty_string);
+ result_->setPassword(g_empty_string);
+ result_->setHostname(g_empty_string);
+ result_->setPort(g_empty_string);
+ result_->setPathname(g_empty_string);
+ result_->setSearch(g_empty_string);
+ result_->setHash(g_empty_string);
+
+ // Update the state to expect the start of an absolute URL.
+ RewindAndSetState(StringParseState::kProtocol);
+ }
+ break;
+
+ case StringParseState::kProtocol:
+ // If we find the end of the protocol component...
+ if (IsProtocolSuffix()) {
+ // First we eagerly compile the protocol pattern and use it to
+ // compute if this entire URLPattern should be treated as a
+ // "standard" URL. If any of the special schemes, like `https`,
+ // match the protocol pattern then we treat it as standard.
+ ComputeShouldTreatAsStandardURL(exception_state);
+ if (exception_state.HadException())
+ return;
+
+ // Standard URLs default to `/` for the pathname.
+ if (should_treat_as_standard_url_)
+ result_->setPathname("/");
+
+ // By default we treat this as a "cannot-be-a-base-URL" or what chrome
+ // calls a "path" URL. In this case we go straight to the pathname
+ // component. The hostname and port are left with their default
+ // empty string values.
+ StringParseState next_state = StringParseState::kPathname;
+ Skip skip = Skip(1);
+
+ // If there are authority slashes, like `https://`, then
+ // we must transition to the authority section of the URLPattern.
+ if (NextIsAuthoritySlashes()) {
+ next_state = StringParseState::kAuthority;
+ skip = Skip(3);
+ }
+
+ // If there are no authority slashes, but the protocol is special
+ // then we still go to the authority section as this is a "standard"
+ // URL. This differs from the above case since we don't need to skip
+ // the extra slashes.
+ else if (should_treat_as_standard_url_) {
+ next_state = StringParseState::kAuthority;
+ }
+
+ ChangeState(next_state, skip);
+ }
+ break;
+
+ case StringParseState::kAuthority:
+ // Before going to the hostname state we must see if there is an
+ // identity of the form:
+ //
+ // <username>:<password>@<hostname>
+ //
+ // We check for this by looking for the `@` character. The username
+ // and password are themselves each optional, so the `:` may not be
+ // present. If we see the `@` we just go to the username state
+ // and let it proceed until it hits either the password separator
+ // or the `@` terminator.
+ if (IsIdentityTerminator())
+ RewindAndSetState(StringParseState::kUsername);
+
+ // Stop searching for the `@` character if we see the beginning
+ // of the pathname, search, or hash components.
+ else if (IsPathnameStart() || IsSearchPrefix() || IsHashPrefix())
+ RewindAndSetState(StringParseState::kHostname);
+ break;
+
+ case StringParseState::kUsername:
+ // If we find a `:` then transition to the password component state.
+ if (IsPasswordPrefix())
+ ChangeState(StringParseState::kPassword, Skip(1));
+
+ // If we find a `@` then transition to the hostname component state.
+ else if (IsIdentityTerminator())
+ ChangeState(StringParseState::kHostname, Skip(1));
+ break;
+
+ case StringParseState::kPassword:
+ // If we find a `@` then transition to the hostname component state.
+ if (IsIdentityTerminator())
+ ChangeState(StringParseState::kHostname, Skip(1));
+ break;
+
+ case StringParseState::kHostname:
+ // If we find a `:` then we transition to the port component state.
+ if (IsPortPrefix())
+ ChangeState(StringParseState::kPort, Skip(1));
+
+ // If we find a `/` then we transition to the pathname component state.
+ else if (IsPathnameStart())
+ ChangeState(StringParseState::kPathname, Skip(0));
+
+ // If we find a `?` then we transition to the search component state.
+ else if (IsSearchPrefix())
+ ChangeState(StringParseState::kSearch, Skip(1));
+
+ // If we find a `#` then we transition to the hash component state.
+ else if (IsHashPrefix())
+ ChangeState(StringParseState::kHash, Skip(1));
+ break;
+
+ case StringParseState::kPort:
+ // If we find a `/` then we transition to the pathname component state.
+ if (IsPathnameStart())
+ ChangeState(StringParseState::kPathname, Skip(0));
+ // If we find a `?` then we transition to the search component state.
+ else if (IsSearchPrefix())
+ ChangeState(StringParseState::kSearch, Skip(1));
+ // If we find a `#` then we transition to the hash component state.
+ else if (IsHashPrefix())
+ ChangeState(StringParseState::kHash, Skip(1));
+ break;
+ case StringParseState::kPathname:
+ // If we find a `?` then we transition to the search component state.
+ if (IsSearchPrefix())
+ ChangeState(StringParseState::kSearch, Skip(1));
+ // If we find a `#` then we transition to the hash component state.
+ else if (IsHashPrefix())
+ ChangeState(StringParseState::kHash, Skip(1));
+ break;
+ case StringParseState::kSearch:
+ // If we find a `#` then we transition to the hash component state.
+ if (IsHashPrefix())
+ ChangeState(StringParseState::kHash, Skip(1));
+ break;
+ case StringParseState::kHash:
+ // Nothing to do here as we are just looking for the end.
+ break;
+ case StringParseState::kDone:
+ NOTREACHED();
+ break;
+ };
+ }
+}
+
+void Parser::ChangeState(StringParseState new_state, Skip skip) {
+ // First we convert the tokens between `component_start_` and `token_index_`
+ // a component pattern string. This is stored in the appropriate result
+ // property based on the current `state_`.
+ switch (state_) {
+ case StringParseState::kInit:
+ // No component to set when transitioning from this state.
+ break;
+ case StringParseState::kProtocol:
+ result_->setProtocol(MakeComponentString());
+ break;
+ case StringParseState::kAuthority:
+ // No component to set when transitioning from this state.
+ break;
+ case StringParseState::kUsername:
+ result_->setUsername(MakeComponentString());
+ break;
+ case StringParseState::kPassword:
+ result_->setPassword(MakeComponentString());
+ break;
+ case StringParseState::kHostname:
+ result_->setHostname(MakeComponentString());
+ break;
+ case StringParseState::kPort:
+ result_->setPort(MakeComponentString());
+ break;
+ case StringParseState::kPathname:
+ result_->setPathname(MakeComponentString());
+ break;
+ case StringParseState::kSearch:
+ result_->setSearch(MakeComponentString());
+ break;
+ case StringParseState::kHash:
+ result_->setHash(MakeComponentString());
+ break;
+ case StringParseState::kDone:
+ NOTREACHED();
+ break;
+ }
+
+ ChangeStateWithoutSettingComponent(new_state, skip);
+}
+
+void Parser::ChangeStateWithoutSettingComponent(StringParseState new_state,
+ Skip skip) {
+ state_ = new_state;
+
+ // Now update `component_start_` to point to the new component. The `skip`
+ // argument tells us how many tokens to ignore to get to the next start.
+ component_start_ = token_index_ + skip.value();
+
+ // Next, move the `token_index_` so that the top of the loop will begin
+ // parsing the new component. We adjust the `token_increment_` down to
+ // zero as the skip value already takes into account moving to the start
+ // of the next component.
+ token_index_ += skip.value();
+ token_increment_ = 0;
+}
+
+void Parser::Rewind() {
+ token_index_ = component_start_;
+ token_increment_ = 0;
+}
+
+void Parser::RewindAndSetState(StringParseState new_state) {
+ Rewind();
+ state_ = new_state;
+}
+
+const liburlpattern::Token& Parser::SafeToken(size_t index) const {
+ if (index < token_list_.size())
+ return token_list_[index];
+ DCHECK(!token_list_.empty());
+ DCHECK(token_list_.back().type == liburlpattern::TokenType::kEnd);
+ return token_list_.back();
+}
+
+bool Parser::IsNonSpecialPatternChar(size_t index, const char* value) const {
+ const liburlpattern::Token& token = SafeToken(index);
+ return token.value == value &&
+ (token.type == liburlpattern::TokenType::kChar ||
+ token.type == liburlpattern::TokenType::kEscapedChar ||
+ token.type == liburlpattern::TokenType::kInvalidChar);
+}
+
+bool Parser::IsProtocolSuffix() const {
+ return IsNonSpecialPatternChar(token_index_, ":");
+}
+
+bool Parser::NextIsAuthoritySlashes() const {
+ return IsNonSpecialPatternChar(token_index_ + 1, "/") &&
+ IsNonSpecialPatternChar(token_index_ + 2, "/");
+}
+
+bool Parser::IsIdentityTerminator() const {
+ return IsNonSpecialPatternChar(token_index_, "@");
+}
+
+bool Parser::IsPasswordPrefix() const {
+ return IsNonSpecialPatternChar(token_index_, ":");
+}
+
+bool Parser::IsPortPrefix() const {
+ return IsNonSpecialPatternChar(token_index_, ":");
+}
+
+bool Parser::IsPathnameStart() const {
+ return IsNonSpecialPatternChar(token_index_, "/");
+}
+
+bool Parser::IsSearchPrefix() const {
+ if (IsNonSpecialPatternChar(token_index_, "?"))
+ return true;
+
+ if (token_list_[token_index_].value != "?")
+ return false;
+
+ // If we have a "?" that is not a normal character, then it must be an
+ // optional group modifier.
+ DCHECK_EQ(SafeToken(token_index_).type,
+ liburlpattern::TokenType::kOtherModifier);
+
+ // We have a `?` tokenized as a modifier. We only want to treat this as
+ // the search prefix if it would not normally be valid in a liburlpattern
+ // string. A modifier must follow a matching group. Therefore we inspect
+ // the preceding token to see if the `?` is immediately following a group
+ // construct.
+ //
+ // So if the string is:
+ //
+ // https://example.com/foo?bar
+ //
+ // Then we return true because the previous token is a `o` with type kChar.
+ // For the string:
+ //
+ // https://example.com/:name?bar
+ //
+ // Then we return false because the previous token is `:name` with type
+ // kName. If the developer intended this to be a search prefix then they
+ // would need to escape like question mark like `:name\\?bar`.
+ //
+ // Note, if `token_index_` is zero the index will wrap around and
+ // `SafeToken()` will return the kEnd token. This will correctly return true
+ // from this method as a pattern cannot normally begin with an unescaped `?`.
+ const auto& previous_token = SafeToken(token_index_ - 1);
+ return previous_token.type != liburlpattern::TokenType::kName &&
+ previous_token.type != liburlpattern::TokenType::kRegex &&
+ previous_token.type != liburlpattern::TokenType::kClose &&
+ previous_token.type != liburlpattern::TokenType::kAsterisk;
+}
+
+bool Parser::IsHashPrefix() const {
+ return IsNonSpecialPatternChar(token_index_, "#");
+}
+
+bool Parser::IsGroupOpen() const {
+ return token_list_[token_index_].type == liburlpattern::TokenType::kOpen;
+}
+
+bool Parser::IsGroupClose() const {
+ return token_list_[token_index_].type == liburlpattern::TokenType::kClose;
+}
+
+String Parser::MakeComponentString() const {
+ DCHECK_LT(token_index_, token_list_.size());
+ const auto& token = token_list_[token_index_];
+
+ size_t component_char_start = SafeToken(component_start_).index;
+
+ DCHECK_LE(component_char_start, utf8_.size());
+ DCHECK_GE(token.index, component_char_start);
+ DCHECK(token.index < utf8_.size() ||
+ (token.index == utf8_.size() &&
+ token.type == liburlpattern::TokenType::kEnd));
+
+ return String::FromUTF8(utf8_.data() + component_char_start,
+ token.index - component_char_start);
+}
+
+void Parser::ComputeShouldTreatAsStandardURL(ExceptionState& exception_state) {
+ DCHECK_EQ(state_, StringParseState::kProtocol);
+ protocol_component_ =
+ Component::Compile(MakeComponentString(), Component::Type::kProtocol,
+ /*protocol_component=*/nullptr, exception_state);
+ if (protocol_component_ && protocol_component_->ShouldTreatAsStandardURL())
+ should_treat_as_standard_url_ = true;
+}
+
+} // namespace url_pattern
+} // namespace blink
diff --git a/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_parser.h b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_parser.h
new file mode 100644
index 00000000000..c3392018b79
--- /dev/null
+++ b/chromium/third_party/blink/renderer/modules/url_pattern/url_pattern_parser.h
@@ -0,0 +1,192 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_URL_PATTERN_URL_PATTERN_PARSER_H_
+#define THIRD_PARTY_BLINK_RENDERER_MODULES_URL_PATTERN_URL_PATTERN_PARSER_H_
+
+#include <vector>
+
+#include "base/types/strong_alias.h"
+#include "third_party/blink/renderer/platform/wtf/allocator/allocator.h"
+#include "third_party/blink/renderer/platform/wtf/text/string_utf8_adaptor.h"
+#include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
+
+namespace liburlpattern {
+struct Token;
+} // namespace liburlpattern
+
+namespace blink {
+
+class ExceptionState;
+class URLPatternInit;
+
+namespace url_pattern {
+
+class Component;
+
+// A helper class to parse the first string passed to the URLPattern
+// constructor. In general the parser works by using the liburlpattern
+// tokenizer to first split up the input into pattern tokens. It can
+// then look through the tokens to find non-special characters that match
+// the different URL component separators. Each component is then split
+// off and stored in a `URLPatternInit` object that can be accessed via
+// `GetResult()`. The intent is that this init object should then be
+// processed as if it was passed into the constructor itself.
+class Parser final {
+ STACK_ALLOCATED();
+
+ public:
+ explicit Parser(const String& input);
+
+ // Attempt to parse the input string used to construct the Parser object.
+ // This method may only be called once. Any errors will be thrown on the
+ // give `exception_state`. Retrieve the parse result by calling
+ // `GetResult()`. A protocol component will also be eagerly compiled for
+ // absolute pattern strings. It is not compiled for relative pattern string.
+ // The compiled protocol Component can be accessed by calling
+ // `GetProtocolComponent()`.
+ void Parse(ExceptionState& exception_state);
+
+ // Return the parse result. Should only be called after `Parse()` succeeds.
+ URLPatternInit* GetResult() const { return result_; }
+
+ // Return the protocol component if it was compiled as part of parsing the
+ // input string. This should only be called after `Parse()` succeeds.
+ // This will return nullptr if the input was a relative pattern string.
+ Component* GetProtocolComponent() const { return protocol_component_; }
+
+ private:
+ enum class StringParseState {
+ kInit,
+ kProtocol,
+ kAuthority,
+ kUsername,
+ kPassword,
+ kHostname,
+ kPort,
+ kPathname,
+ kSearch,
+ kHash,
+ kDone,
+ };
+
+ using Skip = base::StrongAlias<class SkipTag, int>;
+
+ // A utility function to move from the current `state_` to `new_state`. This
+ // method will populate the component string in `result_` corresponding to the
+ // current `state_` automatically. It will also set `component_start_` and
+ // `token_index_` to point to the first token of the next section based on how
+ // many tokens the `skip` argument indicates should be ignored.
+ void ChangeState(StringParseState new_state, Skip skip);
+
+ // A utility function to move to `new_state`. This is like `ChangeState()`,
+ // but does not automatically set the component string for the current state.
+ void ChangeStateWithoutSettingComponent(StringParseState new_state,
+ Skip skip);
+
+ // Rewind the `token_index_` back to the current `component_start_`.
+ void Rewind();
+
+ // Like `Rewind()`, but also sets the state. This is used for cases where
+ // the parser needs to "look ahead" to determine what parse state to enter.
+ void RewindAndSetState(StringParseState new_state);
+
+ // Attempt to access the Token at the given `index`. If the `index` is out
+ // of bounds for the `token_list_`, then the last Token in the list is
+ // returned. This will always be a `TokenType::kEnd` token.
+ const liburlpattern::Token& SafeToken(size_t index) const;
+
+ // Returns true if the token at the given `index` is not a special pattern
+ // character and if it matches the given `value`. This simply checks that the
+ // token type is kChar, kEscapedChar, or kInvalidChar.
+ bool IsNonSpecialPatternChar(size_t index, const char* value) const;
+
+ // Returns true if the token at the given `index` is the protocol component
+ // suffix; e.g. ':'.
+ bool IsProtocolSuffix() const;
+
+ // Returns true if the next two tokens are slashes; e.g. `//`.
+ bool NextIsAuthoritySlashes() const;
+
+ // Returns true if the tokan at the given `index` is the `@` character used
+ // to separate username and password from the hostname.
+ bool IsIdentityTerminator() const;
+
+ // Returns true if the current token is the password prefix; e.g. `:`.
+ bool IsPasswordPrefix() const;
+
+ // Returns true if the current token is the port prefix; e.g. `:`.
+ bool IsPortPrefix() const;
+
+ // Returns true if the current token is the start of the pathname; e.g. `/`.
+ bool IsPathnameStart() const;
+
+ // Returns true if the current token is the search component prefix; e.g. `?`.
+ // This also takes into account if this could be a valid pattern modifier by
+ // looking at the preceding tokens.
+ bool IsSearchPrefix() const;
+
+ // Returns true if the current token is the hsah component prefix; e.g. `#`.
+ bool IsHashPrefix() const;
+
+ // These methods indicate if the current token is opening or closing a pattern
+ // grouping; e.g. `{` or `}`.
+ bool IsGroupOpen() const;
+ bool IsGroupClose() const;
+
+ // This method returns a String consisting of the tokens between
+ // `component_start_` and the current `token_index_`.
+ String MakeComponentString() const;
+
+ // Returns true if this URL should be treated as a "standard URL". These URLs
+ // automatically append a `/` for the pathname if one is not specified.
+ void ComputeShouldTreatAsStandardURL(ExceptionState& exception_state);
+
+ // The input string to the parser.
+ const String input_;
+
+ // UTF8 representation of `input_`.
+ const StringUTF8Adaptor utf8_;
+
+ // As we parse the input string we populate a `URLPatternInit` dictionary
+ // with each component pattern. This is then the final result of the parse.
+ URLPatternInit* result_ = nullptr;
+
+ // The compiled Component for the protocol. This is generated for absolute
+ // strings where we need to determine if the value should be treated as
+ // a "standard" URL.
+ Component* protocol_component_ = nullptr;
+
+ // The list of Tokens produced by calling `liburlpattern::Tokenize()` on
+ // `input_`.
+ std::vector<liburlpattern::Token> token_list_;
+
+ // The index of the first Token to include in the component string.
+ size_t component_start_ = 0;
+
+ // The index of the current Token being considered.
+ size_t token_index_ = 0;
+
+ // The value to add to `token_index_` on each turn the through the parse
+ // loop. While typically this is `1`, it is also set to `0` at times for
+ // things like state transitions, etc. It is automatically reset back to
+ // `1` at the top of the parse loop.
+ size_t token_increment_ = 1;
+
+ // The current nesting depth of `{ }` pattern groupings.
+ int group_depth_ = 0;
+
+ // The current parse state. This should only be changed via `ChangeState()`
+ // or `RewindAndSetState()`.
+ StringParseState state_ = StringParseState::kInit;
+
+ // True if we should apply parse rules as if this is a "standard" URL. If
+ // false then this is treated as a "not a base URL" or "path" URL.
+ bool should_treat_as_standard_url_ = false;
+};
+
+} // namespace url_pattern
+} // namespace blink
+
+#endif // THIRD_PARTY_BLINK_RENDERER_MODULES_URL_PATTERN_URL_PATTERN_PARSER_H_