diff options
author | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2016-06-02 10:15:40 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2016-06-02 08:41:08 +0000 |
commit | b92421879c003a0857b2074f7e05b3bbbb326569 (patch) | |
tree | bdfd21ad74690ae4069e4a055191844994027b78 /chromium/extensions | |
parent | 980b784afe75be22158126ac6a639c19459d3427 (diff) | |
download | qtwebengine-chromium-b92421879c003a0857b2074f7e05b3bbbb326569.tar.gz |
BASELINE: Update Chromium to 51.0.2704.79
Also adds a few files for url_parsing in extensions.
Change-Id: Ie4820c3da75f0a56b3cc86dccc077d671227077b
Reviewed-by: Joerg Bornemann <joerg.bornemann@qt.io>
Diffstat (limited to 'chromium/extensions')
-rw-r--r-- | chromium/extensions/common/constants.cc | 84 | ||||
-rw-r--r-- | chromium/extensions/common/constants.h | 215 | ||||
-rw-r--r-- | chromium/extensions/common/url_pattern.cc | 621 | ||||
-rw-r--r-- | chromium/extensions/common/url_pattern.h | 262 |
4 files changed, 1182 insertions, 0 deletions
diff --git a/chromium/extensions/common/constants.cc b/chromium/extensions/common/constants.cc new file mode 100644 index 00000000000..cfd00c117e3 --- /dev/null +++ b/chromium/extensions/common/constants.cc @@ -0,0 +1,84 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "extensions/common/constants.h" + +namespace extensions { + +const char kExtensionScheme[] = "chrome-extension"; +const char kExtensionResourceScheme[] = "chrome-extension-resource"; + +const base::FilePath::CharType kManifestFilename[] = + FILE_PATH_LITERAL("manifest.json"); +const base::FilePath::CharType kLocaleFolder[] = + FILE_PATH_LITERAL("_locales"); +const base::FilePath::CharType kMessagesFilename[] = + FILE_PATH_LITERAL("messages.json"); +const base::FilePath::CharType kPlatformSpecificFolder[] = + FILE_PATH_LITERAL("_platform_specific"); +const base::FilePath::CharType kMetadataFolder[] = + FILE_PATH_LITERAL("_metadata"); +const base::FilePath::CharType kVerifiedContentsFilename[] = + FILE_PATH_LITERAL("verified_contents.json"); +const base::FilePath::CharType kComputedHashesFilename[] = + FILE_PATH_LITERAL("computed_hashes.json"); + +const char kInstallDirectoryName[] = "Extensions"; + +const char kTempExtensionName[] = "CRX_INSTALL"; + +const char kDecodedImagesFilename[] = "DECODED_IMAGES"; + +const char kDecodedMessageCatalogsFilename[] = "DECODED_MESSAGE_CATALOGS"; + +const char kGeneratedBackgroundPageFilename[] = + "_generated_background_page.html"; + +const char kModulesDir[] = "_modules"; + +const base::FilePath::CharType kExtensionFileExtension[] = + FILE_PATH_LITERAL(".crx"); +const base::FilePath::CharType kExtensionKeyFileExtension[] = + FILE_PATH_LITERAL(".pem"); + +// If auto-updates are turned on, default to running every 5 hours. +const int kDefaultUpdateFrequencySeconds = 60 * 60 * 5; + +const char kLocalAppSettingsDirectoryName[] = "Local App Settings"; +const char kLocalExtensionSettingsDirectoryName[] = "Local Extension Settings"; +const char kSyncAppSettingsDirectoryName[] = "Sync App Settings"; +const char kSyncExtensionSettingsDirectoryName[] = "Sync Extension Settings"; +const char kManagedSettingsDirectoryName[] = "Managed Extension Settings"; +const char kStateStoreName[] = "Extension State"; +const char kRulesStoreName[] = "Extension Rules"; +const char kWebStoreAppId[] = "ahfgeienlihckogmohjhadlkjgocpleb"; + +const char kMimeTypeJpeg[] = "image/jpeg"; +const char kMimeTypePng[] = "image/png"; + +} // namespace extensions + +namespace extension_misc { + +const char kPdfExtensionId[] = "mhjfbmdgcfjbbpaeojofohoefgiehjai"; +const char kQuickOfficeComponentExtensionId[] = + "bpmcpldpdmajfigpchkicefoigmkfalc"; +const char kQuickOfficeInternalExtensionId[] = + "ehibbfinohgbchlgdbfpikodjaojhccn"; +const char kQuickOfficeExtensionId[] = "gbkeegbaiigmenfmjfclcdgdpimamgkj"; +const char kMimeHandlerPrivateTestExtensionId[] = + "oickdpebdnfbgkcaoklfcdhjniefkcji"; + +const char kProdHangoutsExtensionId[] = "nckgahadagoaajjgafhacjanaoiihapd"; +const char* const kHangoutsExtensionIds[6] = { + kProdHangoutsExtensionId, + "ljclpkphhpbpinifbeabbhlfddcpfdde", // Debug. + "ppleadejekpmccmnpjdimmlfljlkdfej", // Alpha. + "eggnbpckecmjlblplehfpjjdhhidfdoj", // Beta. + "jfjjdfefebklmdbmenmlehlopoocnoeh", // Packaged App Debug. + "knipolnnllmklapflnccelgolnpehhpl" // Packaged App Prod. + // Keep in sync with _api_features.json and _manifest_features.json. +}; + +} // namespace extension_misc diff --git a/chromium/extensions/common/constants.h b/chromium/extensions/common/constants.h new file mode 100644 index 00000000000..d2bbb435384 --- /dev/null +++ b/chromium/extensions/common/constants.h @@ -0,0 +1,215 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef EXTENSIONS_COMMON_CONSTANTS_H_ +#define EXTENSIONS_COMMON_CONSTANTS_H_ + +#include "base/files/file_path.h" +#include "ui/base/layout.h" + +namespace extensions { + +// Scheme we serve extension content from. +extern const char kExtensionScheme[]; + +// Canonical schemes you can use as input to GURL.SchemeIs(). +extern const char kExtensionResourceScheme[]; + +// The name of the manifest inside an extension. +extern const base::FilePath::CharType kManifestFilename[]; + +// The name of locale folder inside an extension. +extern const base::FilePath::CharType kLocaleFolder[]; + +// The name of the messages file inside an extension. +extern const base::FilePath::CharType kMessagesFilename[]; + +// The base directory for subdirectories with platform-specific code. +extern const base::FilePath::CharType kPlatformSpecificFolder[]; + +// A directory reserved for metadata, generated either by the webstore +// or chrome. +extern const base::FilePath::CharType kMetadataFolder[]; + +// Name of the verified contents file within the metadata folder. +extern const base::FilePath::CharType kVerifiedContentsFilename[]; + +// Name of the computed hashes file within the metadata folder. +extern const base::FilePath::CharType kComputedHashesFilename[]; + +// The name of the directory inside the profile where extensions are +// installed to. +extern const char kInstallDirectoryName[]; + +// The name of a temporary directory to install an extension into for +// validation before finalizing install. +extern const char kTempExtensionName[]; + +// The file to write our decoded images to, relative to the extension_path. +extern const char kDecodedImagesFilename[]; + +// The file to write our decoded message catalogs to, relative to the +// extension_path. +extern const char kDecodedMessageCatalogsFilename[]; + +// The filename to use for a background page generated from +// background.scripts. +extern const char kGeneratedBackgroundPageFilename[]; + +// Path to imported modules. +extern const char kModulesDir[]; + +// The file extension (.crx) for extensions. +extern const base::FilePath::CharType kExtensionFileExtension[]; + +// The file extension (.pem) for private key files. +extern const base::FilePath::CharType kExtensionKeyFileExtension[]; + +// Default frequency for auto updates, if turned on. +extern const int kDefaultUpdateFrequencySeconds; + +// The name of the directory inside the profile where per-app local settings +// are stored. +extern const char kLocalAppSettingsDirectoryName[]; + +// The name of the directory inside the profile where per-extension local +// settings are stored. +extern const char kLocalExtensionSettingsDirectoryName[]; + +// The name of the directory inside the profile where per-app synced settings +// are stored. +extern const char kSyncAppSettingsDirectoryName[]; + +// The name of the directory inside the profile where per-extension synced +// settings are stored. +extern const char kSyncExtensionSettingsDirectoryName[]; + +// The name of the directory inside the profile where per-extension persistent +// managed settings are stored. +extern const char kManagedSettingsDirectoryName[]; + +// The name of the database inside the profile where chrome-internal +// extension state resides. +extern const char kStateStoreName[]; + +// The name of the database inside the profile where declarative extension +// rules are stored. +extern const char kRulesStoreName[]; + +// The URL query parameter key corresponding to multi-login user index. +extern const char kAuthUserQueryKey[]; + +// Mime type strings +extern const char kMimeTypeJpeg[]; +extern const char kMimeTypePng[]; + +// The extension id of the Web Store component application. +extern const char kWebStoreAppId[]; + +// Enumeration of possible app launch sources. +// Note the enumeration is used in UMA histogram so entries +// should not be re-ordered or removed. +enum AppLaunchSource { + SOURCE_UNTRACKED = 0, + SOURCE_APP_LAUNCHER, + SOURCE_NEW_TAB_PAGE, + SOURCE_RELOAD, + SOURCE_RESTART, + SOURCE_LOAD_AND_LAUNCH, + SOURCE_COMMAND_LINE, + SOURCE_FILE_HANDLER, + SOURCE_URL_HANDLER, + SOURCE_SYSTEM_TRAY, + SOURCE_ABOUT_PAGE, + SOURCE_KEYBOARD, + SOURCE_EXTENSIONS_PAGE, + SOURCE_MANAGEMENT_API, + SOURCE_EPHEMERAL_APP_DEPRECATED, + SOURCE_BACKGROUND, + SOURCE_KIOSK, + SOURCE_CHROME_INTERNAL, + SOURCE_TEST, + + NUM_APP_LAUNCH_SOURCES +}; + +// This enum is used for the launch type the user wants to use for an +// application. +// Do not remove items or re-order this enum as it is used in preferences +// and histograms. +enum LaunchType { + LAUNCH_TYPE_INVALID = -1, + LAUNCH_TYPE_FIRST = 0, + LAUNCH_TYPE_PINNED = LAUNCH_TYPE_FIRST, + LAUNCH_TYPE_REGULAR = 1, + LAUNCH_TYPE_FULLSCREEN = 2, + LAUNCH_TYPE_WINDOW = 3, + NUM_LAUNCH_TYPES, + + // Launch an app in the in the way a click on the NTP would, + // if no user pref were set. Update this constant to change + // the default for the NTP and chrome.management.launchApp(). + LAUNCH_TYPE_DEFAULT = LAUNCH_TYPE_REGULAR +}; + +// Don't remove items or change the order of this enum. It's used in +// histograms and preferences. +enum LaunchContainer { + LAUNCH_CONTAINER_WINDOW, + LAUNCH_CONTAINER_PANEL, + LAUNCH_CONTAINER_TAB, + // For platform apps, which don't actually have a container (they just get a + // "onLaunched" event). + LAUNCH_CONTAINER_NONE, + NUM_LAUNCH_CONTAINERS +}; + +} // namespace extensions + +namespace extension_misc { + +// Matches chrome.windows.WINDOW_ID_NONE. +const int kUnknownWindowId = -1; + +// Matches chrome.windows.WINDOW_ID_CURRENT. +const int kCurrentWindowId = -2; + +// NOTE: If you change this list, you should also change kExtensionIconSizes +// in cc file. +enum ExtensionIcons { + EXTENSION_ICON_GIGANTOR = 512, + EXTENSION_ICON_EXTRA_LARGE = 256, + EXTENSION_ICON_LARGE = 128, + EXTENSION_ICON_MEDIUM = 48, + EXTENSION_ICON_SMALL = 32, + EXTENSION_ICON_SMALLISH = 24, + EXTENSION_ICON_ACTION = 19, + EXTENSION_ICON_BITTY = 16, + EXTENSION_ICON_INVALID = 0, +}; + +// The extension id of the PDF extension. +extern const char kPdfExtensionId[]; + +// The extension id of the Office Viewer component extension. +extern const char kQuickOfficeComponentExtensionId[]; + +// The extension id of the Office Viewer extension on the internal webstore. +extern const char kQuickOfficeInternalExtensionId[]; + +// The extension id of the Office Viewer extension. +extern const char kQuickOfficeExtensionId[]; + +// The extension id used for testing mimeHandlerPrivate. +extern const char kMimeHandlerPrivateTestExtensionId[]; + +// The extension id for the production version of Hangouts. +extern const char kProdHangoutsExtensionId[]; + +// Extension ids used by Hangouts. +extern const char* const kHangoutsExtensionIds[6]; + +} // namespace extension_misc + +#endif // EXTENSIONS_COMMON_CONSTANTS_H_ diff --git a/chromium/extensions/common/url_pattern.cc b/chromium/extensions/common/url_pattern.cc new file mode 100644 index 00000000000..a74c864c8e0 --- /dev/null +++ b/chromium/extensions/common/url_pattern.cc @@ -0,0 +1,621 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "extensions/common/url_pattern.h" + +#include <stddef.h> + +#include <ostream> + +#include "base/macros.h" +#include "base/strings/pattern.h" +#include "base/strings/string_number_conversions.h" +#include "base/strings/string_piece.h" +#include "base/strings/string_split.h" +#include "base/strings/string_util.h" +#include "base/strings/stringprintf.h" +#include "content/public/common/url_constants.h" +#include "extensions/common/constants.h" +#include "net/base/registry_controlled_domains/registry_controlled_domain.h" +#include "url/gurl.h" +#include "url/url_util.h" + +const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; + +namespace { + +// TODO(aa): What about more obscure schemes like data: and javascript: ? +// Note: keep this array in sync with kValidSchemeMasks. +const char* kValidSchemes[] = { + url::kHttpScheme, + url::kHttpsScheme, + url::kFileScheme, + url::kFtpScheme, + content::kChromeUIScheme, + extensions::kExtensionScheme, + url::kFileSystemScheme, +}; + +const int kValidSchemeMasks[] = { + URLPattern::SCHEME_HTTP, + URLPattern::SCHEME_HTTPS, + URLPattern::SCHEME_FILE, + URLPattern::SCHEME_FTP, + URLPattern::SCHEME_CHROMEUI, + URLPattern::SCHEME_EXTENSION, + URLPattern::SCHEME_FILESYSTEM, +}; + +static_assert(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks), + "must keep these arrays in sync"); + +const char kParseSuccess[] = "Success."; +const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator."; +const char kParseErrorInvalidScheme[] = "Invalid scheme."; +const char kParseErrorWrongSchemeType[] = "Wrong scheme type."; +const char kParseErrorEmptyHost[] = "Host can not be empty."; +const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard."; +const char kParseErrorEmptyPath[] = "Empty path."; +const char kParseErrorInvalidPort[] = "Invalid port."; +const char kParseErrorInvalidHost[] = "Invalid host."; + +// Message explaining each URLPattern::ParseResult. +const char* const kParseResultMessages[] = { + kParseSuccess, + kParseErrorMissingSchemeSeparator, + kParseErrorInvalidScheme, + kParseErrorWrongSchemeType, + kParseErrorEmptyHost, + kParseErrorInvalidHostWildcard, + kParseErrorEmptyPath, + kParseErrorInvalidPort, + kParseErrorInvalidHost, +}; + +static_assert(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages), + "must add message for each parse result"); + +const char kPathSeparator[] = "/"; + +bool IsStandardScheme(const std::string& scheme) { + // "*" gets the same treatment as a standard scheme. + if (scheme == "*") + return true; + + return url::IsStandard(scheme.c_str(), + url::Component(0, static_cast<int>(scheme.length()))); +} + +bool IsValidPortForScheme(const std::string& scheme, const std::string& port) { + if (port == "*") + return true; + + // Only accept non-wildcard ports if the scheme uses ports. + if (url::DefaultPortForScheme(scheme.c_str(), scheme.length()) == + url::PORT_UNSPECIFIED) { + return false; + } + + int parsed_port = url::PORT_UNSPECIFIED; + if (!base::StringToInt(port, &parsed_port)) + return false; + return (parsed_port >= 0) && (parsed_port < 65536); +} + +// Returns |path| with the trailing wildcard stripped if one existed. +// +// The functions that rely on this (OverlapsWith and Contains) are only +// called for the patterns inside URLPatternSet. In those cases, we know that +// the path will have only a single wildcard at the end. This makes figuring +// out overlap much easier. It seems like there is probably a computer-sciency +// way to solve the general case, but we don't need that yet. +std::string StripTrailingWildcard(const std::string& path) { + size_t wildcard_index = path.find('*'); + size_t path_last = path.size() - 1; + return wildcard_index == path_last ? path.substr(0, path_last) : path; +} + +} // namespace + +// static +bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) { + for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { + if (scheme == kValidSchemes[i]) + return true; + } + return false; +} + +URLPattern::URLPattern() + : valid_schemes_(SCHEME_NONE), + match_all_urls_(false), + match_subdomains_(false), + port_("*") {} + +URLPattern::URLPattern(int valid_schemes) + : valid_schemes_(valid_schemes), + match_all_urls_(false), + match_subdomains_(false), + port_("*") {} + +URLPattern::URLPattern(int valid_schemes, const std::string& pattern) + // Strict error checking is used, because this constructor is only + // appropriate when we know |pattern| is valid. + : valid_schemes_(valid_schemes), + match_all_urls_(false), + match_subdomains_(false), + port_("*") { + ParseResult result = Parse(pattern); + if (PARSE_SUCCESS != result) + NOTREACHED() << "URLPattern invalid: " << pattern << " result " << result; +} + +URLPattern::URLPattern(const URLPattern& other) = default; + +URLPattern::~URLPattern() { +} + +bool URLPattern::operator<(const URLPattern& other) const { + return GetAsString() < other.GetAsString(); +} + +bool URLPattern::operator>(const URLPattern& other) const { + return GetAsString() > other.GetAsString(); +} + +bool URLPattern::operator==(const URLPattern& other) const { + return GetAsString() == other.GetAsString(); +} + +std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern) { + return out << '"' << url_pattern.GetAsString() << '"'; +} + +URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) { + spec_.clear(); + SetMatchAllURLs(false); + SetMatchSubdomains(false); + SetPort("*"); + + // Special case pattern to match every valid URL. + if (pattern == kAllUrlsPattern) { + SetMatchAllURLs(true); + return PARSE_SUCCESS; + } + + // Parse out the scheme. + size_t scheme_end_pos = pattern.find(url::kStandardSchemeSeparator); + bool has_standard_scheme_separator = true; + + // Some urls also use ':' alone as the scheme separator. + if (scheme_end_pos == std::string::npos) { + scheme_end_pos = pattern.find(':'); + has_standard_scheme_separator = false; + } + + if (scheme_end_pos == std::string::npos) + return PARSE_ERROR_MISSING_SCHEME_SEPARATOR; + + if (!SetScheme(pattern.substr(0, scheme_end_pos))) + return PARSE_ERROR_INVALID_SCHEME; + + bool standard_scheme = IsStandardScheme(scheme_); + if (standard_scheme != has_standard_scheme_separator) + return PARSE_ERROR_WRONG_SCHEME_SEPARATOR; + + // Advance past the scheme separator. + scheme_end_pos += + (standard_scheme ? strlen(url::kStandardSchemeSeparator) : 1); + if (scheme_end_pos >= pattern.size()) + return PARSE_ERROR_EMPTY_HOST; + + // Parse out the host and path. + size_t host_start_pos = scheme_end_pos; + size_t path_start_pos = 0; + + if (!standard_scheme) { + path_start_pos = host_start_pos; + } else if (scheme_ == url::kFileScheme) { + size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); + if (host_end_pos == std::string::npos) { + // Allow hostname omission. + // e.g. file://* is interpreted as file:///*, + // file://foo* is interpreted as file:///foo*. + path_start_pos = host_start_pos - 1; + } else { + // Ignore hostname if scheme is file://. + // e.g. file://localhost/foo is equal to file:///foo. + path_start_pos = host_end_pos; + } + } else { + size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); + + // Host is required. + if (host_start_pos == host_end_pos) + return PARSE_ERROR_EMPTY_HOST; + + if (host_end_pos == std::string::npos) + return PARSE_ERROR_EMPTY_PATH; + + host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos); + + // The first component can optionally be '*' to match all subdomains. + std::vector<std::string> host_components = base::SplitString( + host_, ".", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); + + // Could be empty if the host only consists of whitespace characters. + if (host_components.empty() || + (host_components.size() == 1 && host_components[0].empty())) + return PARSE_ERROR_EMPTY_HOST; + + if (host_components[0] == "*") { + match_subdomains_ = true; + host_components.erase(host_components.begin(), + host_components.begin() + 1); + } + host_ = base::JoinString(host_components, "."); + + path_start_pos = host_end_pos; + } + + SetPath(pattern.substr(path_start_pos)); + + size_t port_pos = host_.find(':'); + if (port_pos != std::string::npos) { + if (!SetPort(host_.substr(port_pos + 1))) + return PARSE_ERROR_INVALID_PORT; + host_ = host_.substr(0, port_pos); + } + + // No other '*' can occur in the host, though. This isn't necessary, but is + // done as a convenience to developers who might otherwise be confused and + // think '*' works as a glob in the host. + if (host_.find('*') != std::string::npos) + return PARSE_ERROR_INVALID_HOST_WILDCARD; + + // Null characters are not allowed in hosts. + if (host_.find('\0') != std::string::npos) + return PARSE_ERROR_INVALID_HOST; + + return PARSE_SUCCESS; +} + +void URLPattern::SetValidSchemes(int valid_schemes) { + spec_.clear(); + valid_schemes_ = valid_schemes; +} + +void URLPattern::SetHost(const std::string& host) { + spec_.clear(); + host_ = host; +} + +void URLPattern::SetMatchAllURLs(bool val) { + spec_.clear(); + match_all_urls_ = val; + + if (val) { + match_subdomains_ = true; + scheme_ = "*"; + host_.clear(); + SetPath("/*"); + } +} + +void URLPattern::SetMatchSubdomains(bool val) { + spec_.clear(); + match_subdomains_ = val; +} + +bool URLPattern::SetScheme(const std::string& scheme) { + spec_.clear(); + scheme_ = scheme; + if (scheme_ == "*") { + valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); + } else if (!IsValidScheme(scheme_)) { + return false; + } + return true; +} + +bool URLPattern::IsValidScheme(const std::string& scheme) const { + if (valid_schemes_ == SCHEME_ALL) + return true; + + for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { + if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i])) + return true; + } + + return false; +} + +void URLPattern::SetPath(const std::string& path) { + spec_.clear(); + path_ = path; + path_escaped_ = path_; + base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); + base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); +} + +bool URLPattern::SetPort(const std::string& port) { + spec_.clear(); + if (IsValidPortForScheme(scheme_, port)) { + port_ = port; + return true; + } + return false; +} + +bool URLPattern::MatchesURL(const GURL& test) const { + const GURL* test_url = &test; + bool has_inner_url = test.inner_url() != NULL; + + if (has_inner_url) { + if (!test.SchemeIsFileSystem()) + return false; // The only nested URLs we handle are filesystem URLs. + test_url = test.inner_url(); + } + + if (!MatchesScheme(test_url->scheme())) + return false; + + if (match_all_urls_) + return true; + + std::string path_for_request = test.PathForRequest(); + if (has_inner_url) + path_for_request = test_url->path() + path_for_request; + + return MatchesSecurityOriginHelper(*test_url) && + MatchesPath(path_for_request); +} + +bool URLPattern::MatchesSecurityOrigin(const GURL& test) const { + const GURL* test_url = &test; + bool has_inner_url = test.inner_url() != NULL; + + if (has_inner_url) { + if (!test.SchemeIsFileSystem()) + return false; // The only nested URLs we handle are filesystem URLs. + test_url = test.inner_url(); + } + + if (!MatchesScheme(test_url->scheme())) + return false; + + if (match_all_urls_) + return true; + + return MatchesSecurityOriginHelper(*test_url); +} + +bool URLPattern::MatchesScheme(const std::string& test) const { + if (!IsValidScheme(test)) + return false; + + return scheme_ == "*" || test == scheme_; +} + +bool URLPattern::MatchesHost(const std::string& host) const { + std::string test(url::kHttpScheme); + test += url::kStandardSchemeSeparator; + test += host; + test += "/"; + return MatchesHost(GURL(test)); +} + +bool URLPattern::MatchesHost(const GURL& test) const { + // If the hosts are exactly equal, we have a match. + if (test.host() == host_) + return true; + + // If we're matching subdomains, and we have no host in the match pattern, + // that means that we're matching all hosts, which means we have a match no + // matter what the test host is. + if (match_subdomains_ && host_.empty()) + return true; + + // Otherwise, we can only match if our match pattern matches subdomains. + if (!match_subdomains_) + return false; + + // We don't do subdomain matching against IP addresses, so we can give up now + // if the test host is an IP address. + if (test.HostIsIPAddress()) + return false; + + // Check if the test host is a subdomain of our host. + if (test.host().length() <= (host_.length() + 1)) + return false; + + if (test.host().compare(test.host().length() - host_.length(), + host_.length(), host_) != 0) + return false; + + return test.host()[test.host().length() - host_.length() - 1] == '.'; +} + +bool URLPattern::ImpliesAllHosts() const { + // Check if it matches all urls or is a pattern like http://*/*. + if (match_all_urls_ || + (match_subdomains_ && host_.empty() && port_ == "*" && path_ == "/*")) { + return true; + } + + // If this doesn't even match subdomains, it can't possibly imply all hosts. + if (!match_subdomains_) + return false; + + // If |host_| is a recognized TLD, this will be 0. We don't include private + // TLDs, so that, e.g., *.appspot.com does not imply all hosts. + size_t registry_length = net::registry_controlled_domains::GetRegistryLength( + host_, + net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, + net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); + // If there was more than just a TLD in the host (e.g., *.foobar.com), it + // doesn't imply all hosts. + if (registry_length > 0) + return false; + + // At this point the host could either be just a TLD ("com") or some unknown + // TLD-like string ("notatld"). To disambiguate between them construct a + // fake URL, and check the registry. This returns 0 if the TLD is + // unrecognized, or the length of the recognized TLD. + registry_length = net::registry_controlled_domains::GetRegistryLength( + base::StringPrintf("foo.%s", host_.c_str()), + net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, + net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); + // If we recognized this TLD, then this is a pattern like *.com, and it + // should imply all hosts. Otherwise, this doesn't imply all hosts. + return registry_length > 0; +} + +bool URLPattern::MatchesSingleOrigin() const { + // Strictly speaking, the port is part of the origin, but in URLPattern it + // defaults to *. It's not very interesting anyway, so leave it out. + return !ImpliesAllHosts() && scheme_ != "*" && !match_subdomains_; +} + +bool URLPattern::MatchesPath(const std::string& test) const { + // Make the behaviour of OverlapsWith consistent with MatchesURL, which is + // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'. + if (test + "/*" == path_escaped_) + return true; + + return base::MatchPattern(test, path_escaped_); +} + +const std::string& URLPattern::GetAsString() const { + if (!spec_.empty()) + return spec_; + + if (match_all_urls_) { + spec_ = kAllUrlsPattern; + return spec_; + } + + bool standard_scheme = IsStandardScheme(scheme_); + + std::string spec = scheme_ + + (standard_scheme ? url::kStandardSchemeSeparator : ":"); + + if (scheme_ != url::kFileScheme && standard_scheme) { + if (match_subdomains_) { + spec += "*"; + if (!host_.empty()) + spec += "."; + } + + if (!host_.empty()) + spec += host_; + + if (port_ != "*") { + spec += ":"; + spec += port_; + } + } + + if (!path_.empty()) + spec += path_; + + spec_ = spec; + return spec_; +} + +bool URLPattern::OverlapsWith(const URLPattern& other) const { + if (match_all_urls() || other.match_all_urls()) + return true; + return (MatchesAnyScheme(other.GetExplicitSchemes()) || + other.MatchesAnyScheme(GetExplicitSchemes())) + && (MatchesHost(other.host()) || other.MatchesHost(host())) + && (MatchesPortPattern(other.port()) || other.MatchesPortPattern(port())) + && (MatchesPath(StripTrailingWildcard(other.path())) || + other.MatchesPath(StripTrailingWildcard(path()))); +} + +bool URLPattern::Contains(const URLPattern& other) const { + if (match_all_urls()) + return true; + return MatchesAllSchemes(other.GetExplicitSchemes()) && + MatchesHost(other.host()) && + (!other.match_subdomains_ || match_subdomains_) && + MatchesPortPattern(other.port()) && + MatchesPath(StripTrailingWildcard(other.path())); +} + +bool URLPattern::MatchesAnyScheme( + const std::vector<std::string>& schemes) const { + for (std::vector<std::string>::const_iterator i = schemes.begin(); + i != schemes.end(); ++i) { + if (MatchesScheme(*i)) + return true; + } + + return false; +} + +bool URLPattern::MatchesAllSchemes( + const std::vector<std::string>& schemes) const { + for (std::vector<std::string>::const_iterator i = schemes.begin(); + i != schemes.end(); ++i) { + if (!MatchesScheme(*i)) + return false; + } + + return true; +} + +bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const { + // Ignore hostname if scheme is file://. + if (scheme_ != url::kFileScheme && !MatchesHost(test)) + return false; + + if (!MatchesPortPattern(base::IntToString(test.EffectiveIntPort()))) + return false; + + return true; +} + +bool URLPattern::MatchesPortPattern(const std::string& port) const { + return port_ == "*" || port_ == port; +} + +std::vector<std::string> URLPattern::GetExplicitSchemes() const { + std::vector<std::string> result; + + if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) { + result.push_back(scheme_); + return result; + } + + for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { + if (MatchesScheme(kValidSchemes[i])) { + result.push_back(kValidSchemes[i]); + } + } + + return result; +} + +std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const { + std::vector<std::string> explicit_schemes = GetExplicitSchemes(); + std::vector<URLPattern> result; + + for (std::vector<std::string>::const_iterator i = explicit_schemes.begin(); + i != explicit_schemes.end(); ++i) { + URLPattern temp = *this; + temp.SetScheme(*i); + temp.SetMatchAllURLs(false); + result.push_back(temp); + } + + return result; +} + +// static +const char* URLPattern::GetParseResultString( + URLPattern::ParseResult parse_result) { + return kParseResultMessages[parse_result]; +} diff --git a/chromium/extensions/common/url_pattern.h b/chromium/extensions/common/url_pattern.h new file mode 100644 index 00000000000..bcdc7f65d5d --- /dev/null +++ b/chromium/extensions/common/url_pattern.h @@ -0,0 +1,262 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#ifndef EXTENSIONS_COMMON_URL_PATTERN_H_ +#define EXTENSIONS_COMMON_URL_PATTERN_H_ + +#include <functional> +#include <iosfwd> +#include <string> +#include <vector> + +class GURL; + +// A pattern that can be used to match URLs. A URLPattern is a very restricted +// subset of URL syntax: +// +// <url-pattern> := <scheme>://<host><port><path> | '<all_urls>' +// <scheme> := '*' | 'http' | 'https' | 'file' | 'ftp' | 'chrome' | +// 'chrome-extension' | 'filesystem' +// <host> := '*' | '*.' <anychar except '/' and '*'>+ +// <port> := [':' ('*' | <port number between 0 and 65535>)] +// <path> := '/' <any chars> +// +// * Host is not used when the scheme is 'file'. +// * The path can have embedded '*' characters which act as glob wildcards. +// * '<all_urls>' is a special pattern that matches any URL that contains a +// valid scheme (as specified by valid_schemes_). +// * The '*' scheme pattern excludes file URLs. +// +// Examples of valid patterns: +// - http://*/* +// - http://*/foo* +// - https://*.google.com/foo*bar +// - file://monkey* +// - http://127.0.0.1/* +// +// Examples of invalid patterns: +// - http://* -- path not specified +// - http://*foo/bar -- * not allowed as substring of host component +// - http://foo.*.bar/baz -- * must be first component +// - http:/bar -- scheme separator not found +// - foo://* -- invalid scheme +// - chrome:// -- we don't support chrome internal URLs +class URLPattern { + public: + // A collection of scheme bitmasks for use with valid_schemes. + enum SchemeMasks { + SCHEME_NONE = 0, + SCHEME_HTTP = 1 << 0, + SCHEME_HTTPS = 1 << 1, + SCHEME_FILE = 1 << 2, + SCHEME_FTP = 1 << 3, + SCHEME_CHROMEUI = 1 << 4, + SCHEME_EXTENSION = 1 << 5, + SCHEME_FILESYSTEM = 1 << 6, + + // IMPORTANT! + // SCHEME_ALL will match every scheme, including chrome://, chrome- + // extension://, about:, etc. Because this has lots of security + // implications, third-party extensions should usually not be able to get + // access to URL patterns initialized this way. If there is a reason + // for violating this general rule, document why this it safe. + SCHEME_ALL = -1, + }; + + // Error codes returned from Parse(). + enum ParseResult { + PARSE_SUCCESS = 0, + PARSE_ERROR_MISSING_SCHEME_SEPARATOR, + PARSE_ERROR_INVALID_SCHEME, + PARSE_ERROR_WRONG_SCHEME_SEPARATOR, + PARSE_ERROR_EMPTY_HOST, + PARSE_ERROR_INVALID_HOST_WILDCARD, + PARSE_ERROR_EMPTY_PATH, + PARSE_ERROR_INVALID_PORT, + PARSE_ERROR_INVALID_HOST, + NUM_PARSE_RESULTS + }; + + // The <all_urls> string pattern. + static const char kAllUrlsPattern[]; + + // Returns true if the given |scheme| is considered valid for extensions. + static bool IsValidSchemeForExtensions(const std::string& scheme); + + explicit URLPattern(int valid_schemes); + + // Convenience to construct a URLPattern from a string. If the string is not + // known ahead of time, use Parse() instead, which returns success or failure. + URLPattern(int valid_schemes, const std::string& pattern); + + URLPattern(); + URLPattern(const URLPattern& other); + ~URLPattern(); + + bool operator<(const URLPattern& other) const; + bool operator>(const URLPattern& other) const; + bool operator==(const URLPattern& other) const; + + // Initializes this instance by parsing the provided string. Returns + // URLPattern::PARSE_SUCCESS on success, or an error code otherwise. On + // failure, this instance will have some intermediate values and is in an + // invalid state. + ParseResult Parse(const std::string& pattern_str); + + // Gets the bitmask of valid schemes. + int valid_schemes() const { return valid_schemes_; } + void SetValidSchemes(int valid_schemes); + + // Gets the host the pattern matches. This can be an empty string if the + // pattern matches all hosts (the input was <scheme>://*/<whatever>). + const std::string& host() const { return host_; } + void SetHost(const std::string& host); + + // Gets whether to match subdomains of host(). + bool match_subdomains() const { return match_subdomains_; } + void SetMatchSubdomains(bool val); + + // Gets the path the pattern matches with the leading slash. This can have + // embedded asterisks which are interpreted using glob rules. + const std::string& path() const { return path_; } + void SetPath(const std::string& path); + + // Returns true if this pattern matches all urls. + bool match_all_urls() const { return match_all_urls_; } + void SetMatchAllURLs(bool val); + + // Sets the scheme for pattern matches. This can be a single '*' if the + // pattern matches all valid schemes (as defined by the valid_schemes_ + // property). Returns false on failure (if the scheme is not valid). + bool SetScheme(const std::string& scheme); + // Note: You should use MatchesScheme() instead of this getter unless you + // absolutely need the exact scheme. This is exposed for testing. + const std::string& scheme() const { return scheme_; } + + // Returns true if the specified scheme can be used in this URL pattern, and + // false otherwise. Uses valid_schemes_ to determine validity. + bool IsValidScheme(const std::string& scheme) const; + + // Returns true if this instance matches the specified URL. + bool MatchesURL(const GURL& test) const; + + // Returns true if this instance matches the specified security origin. + bool MatchesSecurityOrigin(const GURL& test) const; + + // Returns true if |test| matches our scheme. + // Note that if test is "filesystem", this may fail whereas MatchesURL + // may succeed. MatchesURL is smart enough to look at the inner_url instead + // of the outer "filesystem:" part. + bool MatchesScheme(const std::string& test) const; + + // Returns true if |test| matches our host. + bool MatchesHost(const std::string& test) const; + bool MatchesHost(const GURL& test) const; + + // Returns true if |test| matches our path. + bool MatchesPath(const std::string& test) const; + + // Returns true if the pattern is vague enough that it implies all hosts, + // such as *://*/*. + // This is an expensive method, and should be used sparingly! + // You should probably use URLPatternSet::ShouldWarnAllHosts(), which is + // cached. + bool ImpliesAllHosts() const; + + // Returns true if the pattern only matches a single origin. The pattern may + // include a path. + bool MatchesSingleOrigin() const; + + // Sets the port. Returns false if the port is invalid. + bool SetPort(const std::string& port); + const std::string& port() const { return port_; } + + // Returns a string representing this instance. + const std::string& GetAsString() const; + + // Determines whether there is a URL that would match this instance and + // another instance. This method is symmetrical: Calling + // other.OverlapsWith(this) would result in the same answer. + bool OverlapsWith(const URLPattern& other) const; + + // Returns true if this pattern matches all possible URLs that |other| can + // match. For example, http://*.google.com encompasses http://www.google.com. + bool Contains(const URLPattern& other) const; + + // Converts this URLPattern into an equivalent set of URLPatterns that don't + // use a wildcard in the scheme component. If this URLPattern doesn't use a + // wildcard scheme, then the returned set will contain one element that is + // equivalent to this instance. + std::vector<URLPattern> ConvertToExplicitSchemes() const; + + static bool EffectiveHostCompare(const URLPattern& a, const URLPattern& b) { + if (a.match_all_urls_ && b.match_all_urls_) + return false; + return a.host_.compare(b.host_) < 0; + } + + // Used for origin comparisons in a std::set. + class EffectiveHostCompareFunctor { + public: + bool operator()(const URLPattern& a, const URLPattern& b) const { + return EffectiveHostCompare(a, b); + } + }; + + // Get an error string for a ParseResult. + static const char* GetParseResultString(URLPattern::ParseResult parse_result); + + private: + // Returns true if any of the |schemes| items matches our scheme. + bool MatchesAnyScheme(const std::vector<std::string>& schemes) const; + + // Returns true if all of the |schemes| items matches our scheme. + bool MatchesAllSchemes(const std::vector<std::string>& schemes) const; + + bool MatchesSecurityOriginHelper(const GURL& test) const; + + // Returns true if our port matches the |port| pattern (it may be "*"). + bool MatchesPortPattern(const std::string& port) const; + + // If the URLPattern contains a wildcard scheme, returns a list of + // equivalent literal schemes, otherwise returns the current scheme. + std::vector<std::string> GetExplicitSchemes() const; + + // A bitmask containing the schemes which are considered valid for this + // pattern. Parse() uses this to decide whether a pattern contains a valid + // scheme. + int valid_schemes_; + + // True if this is a special-case "<all_urls>" pattern. + bool match_all_urls_; + + // The scheme for the pattern. + std::string scheme_; + + // The host without any leading "*" components. + std::string host_; + + // Whether we should match subdomains of the host. This is true if the first + // component of the pattern's host was "*". + bool match_subdomains_; + + // The port. + std::string port_; + + // The path to match. This is everything after the host of the URL, or + // everything after the scheme in the case of file:// URLs. + std::string path_; + + // The path with "?" and "\" characters escaped for use with the + // MatchPattern() function. + std::string path_escaped_; + + // A string representing this URLPattern. + mutable std::string spec_; +}; + +std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern); + +typedef std::vector<URLPattern> URLPatternList; + +#endif // EXTENSIONS_COMMON_URL_PATTERN_H_ |