// Copyright 2016 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef COMPONENTS_SAFE_BROWSING_DB_V4_PROTOCOL_MANAGER_UTIL_H_ #define COMPONENTS_SAFE_BROWSING_DB_V4_PROTOCOL_MANAGER_UTIL_H_ // A class that implements the stateless methods used by the GetHashUpdate and // GetFullHash stubby calls made by Chrome using the SafeBrowsing V4 protocol. #include #include #include #include #include #include #include #include "base/containers/flat_set.h" #include "base/gtest_prod_util.h" #include "base/strings/string_piece.h" #include "components/safe_browsing_db/safebrowsing.pb.h" #include "net/url_request/url_request_status.h" #include "url/gurl.h" namespace net { class HttpRequestHeaders; class IPAddress; } // namespace net namespace safe_browsing { // The size of the hash prefix, in bytes. It should be between 4 to 32 (full // hash). using PrefixSize = size_t; // The minimum expected size (in bytes) of a hash-prefix. const PrefixSize kMinHashPrefixLength = 4; // The maximum expected size (in bytes) of a hash-prefix. This represents the // length of a SHA256 hash. const PrefixSize kMaxHashPrefixLength = 32; // A hash prefix sent by the SafeBrowsing PVer4 service. using HashPrefix = std::string; // A full SHA256 hash. using FullHash = HashPrefix; using ListUpdateRequest = FetchThreatListUpdatesRequest::ListUpdateRequest; using ListUpdateResponse = FetchThreatListUpdatesResponse::ListUpdateResponse; // Config passed to the constructor of a V4 protocol manager. struct V4ProtocolConfig { // The safe browsing client name sent in each request. std::string client_name; // Disable auto-updates using a command line switch. bool disable_auto_update; // The Google API key. std::string key_param; // Current product version sent in each request. std::string version; V4ProtocolConfig(const std::string& client_name, bool disable_auto_update, const std::string& key_param, const std::string& version); V4ProtocolConfig(const V4ProtocolConfig& other); ~V4ProtocolConfig(); private: V4ProtocolConfig() = delete; }; // Different types of threats that SafeBrowsing protects against. This is the // type that's returned to the clients of SafeBrowsing in Chromium. // GENERATED_JAVA_ENUM_PACKAGE: org.chromium.components.safe_browsing // GENERATED_JAVA_PREFIX_TO_STRIP: SB_THREAT_TYPE_ enum SBThreatType { // This type can be used for lists that can be checked synchronously so a // client callback isn't required, or for whitelists. SB_THREAT_TYPE_UNUSED, // No threat at all. SB_THREAT_TYPE_SAFE, // The URL is being used for phishing. SB_THREAT_TYPE_URL_PHISHING, // The URL hosts malware. SB_THREAT_TYPE_URL_MALWARE, // The URL hosts unwanted programs. SB_THREAT_TYPE_URL_UNWANTED, // The download URL is malware. SB_THREAT_TYPE_URL_BINARY_MALWARE, // Url detected by the client-side phishing model. Note that unlike the // above values, this does not correspond to a downloaded list. SB_THREAT_TYPE_URL_CLIENT_SIDE_PHISHING, // The Chrome extension or app (given by its ID) is malware. SB_THREAT_TYPE_EXTENSION, // Url detected by the client-side malware IP list. This IP list is part // of the client side detection model. SB_THREAT_TYPE_URL_CLIENT_SIDE_MALWARE, // Url leads to a blacklisted resource script. Note that no warnings should be // shown on this threat type, but an incident report might be sent. SB_THREAT_TYPE_BLACKLISTED_RESOURCE, // Url abuses a permission API. SB_THREAT_TYPE_API_ABUSE, // Activation patterns for the Subresource Filter. SB_THREAT_TYPE_SUBRESOURCE_FILTER, // CSD Phishing whitelist. This "threat" means a URL matched the whitelist. SB_THREAT_TYPE_CSD_WHITELIST, // Url detected by password protection service. SB_THREAT_TYPE_URL_PASSWORD_PROTECTION_PHISHING, }; using SBThreatTypeSet = base::flat_set; // Return true if |set| only contains types that are valid for CheckBrowseUrl(). // Intended for use in DCHECK(). bool SBThreatTypeSetIsValidForCheckBrowseUrl(const SBThreatTypeSet& set); // Shorthand for creating an SBThreatTypeSet from a list of SBThreatTypes. Use // like CreateSBThreatTypeSet({SB_THREAT_TYPE_URL_PHISHING, // SB_THREAT_TYPE_URL_MALWARE}) inline SBThreatTypeSet CreateSBThreatTypeSet( std::initializer_list set) { return SBThreatTypeSet(set, base::KEEP_FIRST_OF_DUPES); } // The information required to uniquely identify each list the client is // interested in maintaining and downloading from the SafeBrowsing servers. // For example, for digests of Malware binaries on Windows: // platform_type = WINDOWS, // threat_entry_type = EXECUTABLE, // threat_type = MALWARE class ListIdentifier { public: ListIdentifier(PlatformType platform_type, ThreatEntryType threat_entry_type, ThreatType threat_type); explicit ListIdentifier(const ListUpdateResponse&); bool operator==(const ListIdentifier& other) const; bool operator!=(const ListIdentifier& other) const; size_t hash() const; PlatformType platform_type() const { return platform_type_; } ThreatEntryType threat_entry_type() const { return threat_entry_type_; } ThreatType threat_type() const { return threat_type_; } private: PlatformType platform_type_; ThreatEntryType threat_entry_type_; ThreatType threat_type_; ListIdentifier() = delete; }; std::ostream& operator<<(std::ostream& os, const ListIdentifier& id); PlatformType GetCurrentPlatformType(); ListIdentifier GetCertCsdDownloadWhitelistId(); ListIdentifier GetChromeExtMalwareId(); ListIdentifier GetChromeFilenameClientIncidentId(); ListIdentifier GetChromeUrlApiId(); ListIdentifier GetChromeUrlClientIncidentId(); ListIdentifier GetIpMalwareId(); ListIdentifier GetUrlCsdDownloadWhitelistId(); ListIdentifier GetUrlCsdWhitelistId(); ListIdentifier GetUrlMalBinId(); ListIdentifier GetUrlMalwareId(); ListIdentifier GetUrlSocEngId(); ListIdentifier GetUrlSubresourceFilterId(); ListIdentifier GetUrlUwsId(); // Returns the basename of the store file, without the ".store" extension. std::string GetUmaSuffixForStore(const base::FilePath& file_path); // Represents the state of each store. using StoreStateMap = std::unordered_map; // Sever response, parsed in vector form. using ParsedServerResponse = std::vector>; // Holds the hash prefix and the store that it matched in. struct StoreAndHashPrefix { public: ListIdentifier list_id; HashPrefix hash_prefix; StoreAndHashPrefix(ListIdentifier list_id, const HashPrefix& hash_prefix); ~StoreAndHashPrefix(); bool operator==(const StoreAndHashPrefix& other) const; bool operator!=(const StoreAndHashPrefix& other) const; size_t hash() const; private: StoreAndHashPrefix() = delete; }; // Used to track the hash prefix and the store in which a full hash's prefix // matched. using StoreAndHashPrefixes = std::vector; // Enumerate failures for histogramming purposes. DO NOT CHANGE THE // ORDERING OF THESE VALUES. enum V4OperationResult { // 200 response code means that the server recognized the request. STATUS_200 = 0, // Subset of successful responses where the response body wasn't parsable. PARSE_ERROR = 1, // Operation request failed (network error). NETWORK_ERROR = 2, // Operation request returned HTTP result code other than 200. HTTP_ERROR = 3, // Operation attempted during error backoff, no request sent. BACKOFF_ERROR = 4, // Operation attempted before min wait duration elapsed, no request sent. MIN_WAIT_DURATION_ERROR = 5, // Identical operation already pending. ALREADY_PENDING_ERROR = 6, // Memory space for histograms is determined by the max. ALWAYS // ADD NEW VALUES BEFORE THIS ONE. OPERATION_RESULT_MAX = 7 }; // A class that provides static methods related to the Pver4 protocol. class V4ProtocolManagerUtil { public: // Canonicalizes url as per Google Safe Browsing Specification. // See: https://developers.google.com/safe-browsing/v4/urls-hashing static void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname, std::string* canonicalized_path, std::string* canonicalized_query); // This method returns the host suffix combinations from the hostname in the // URL, as described here: // https://developers.google.com/safe-browsing/v4/urls-hashing static void GenerateHostVariantsToCheck(const std::string& host, std::vector* hosts); // This method returns the path prefix combinations from the path in the // URL, as described here: // https://developers.google.com/safe-browsing/v4/urls-hashing static void GeneratePathVariantsToCheck(const std::string& path, const std::string& query, std::vector* paths); // Given a URL, returns all the patterns we need to check. static void GeneratePatternsToCheck(const GURL& url, std::vector* urls); // Generates a Pver4 request URL and sets the appropriate header values. // |request_base64| is the serialized request protocol buffer encoded in // base 64. // |method_name| is the name of the method to call, as specified in the proto, // |config| is an instance of V4ProtocolConfig that stores the client config, // |gurl| is set to the value of the PVer4 request URL, // |headers| is populated with the appropriate header values. static void GetRequestUrlAndHeaders(const std::string& request_base64, const std::string& method_name, const V4ProtocolConfig& config, GURL* gurl, net::HttpRequestHeaders* headers); // Worker function for calculating the backoff times. // |multiplier| is doubled for each consecutive error after the // first, and |error_count| is incremented with each call. // Backoff interval is MIN(((2^(n-1))*15 minutes) * (RAND + 1), 24 hours) // where n is the number of consecutive errors. static base::TimeDelta GetNextBackOffInterval(size_t* error_count, size_t* multiplier); // Record HTTP response code when there's no error in fetching an HTTP // request, and the error code, when there is. // |metric_name| is the name of the UMA metric to record the response code or // error code against, |status| represents the status of the HTTP request, and // |response code| represents the HTTP response code received from the server. static void RecordHttpResponseOrErrorCode(const char* metric_name, const net::URLRequestStatus& status, int response_code); // Generate the set of FullHashes to check for |url|. static void UrlToFullHashes(const GURL& url, std::vector* full_hashes); static bool FullHashToHashPrefix(const FullHash& full_hash, PrefixSize prefix_size, HashPrefix* hash_prefix); static bool FullHashToSmallestHashPrefix(const FullHash& full_hash, HashPrefix* hash_prefix); static bool FullHashMatchesHashPrefix(const FullHash& full_hash, const HashPrefix& hash_prefix); static void SetClientInfoFromConfig(ClientInfo* client_info, const V4ProtocolConfig& config); static bool GetIPV6AddressFromString(const std::string& ip_address, net::IPAddress* address); // Converts a IPV4 or IPV6 address in |ip_address| to the SHA1 hash of the // corresponding packed IPV6 address in |hashed_encoded_ip|, and adds an // extra byte containing the value 128 at the end. This is done to match the // server implementation for calculating the hash prefix of an IP address. static bool IPAddressToEncodedIPV6Hash(const std::string& ip_address, FullHash* hashed_encoded_ip); private: V4ProtocolManagerUtil() {} FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest, TestBackOffLogic); FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest, TestGetRequestUrlAndUpdateHeaders); FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest, UrlParsing); FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest, CanonicalizeUrl); // Composes a URL using |prefix|, |method| (e.g.: encodedFullHashes). // |request_base64|, |client_id|, |version| and |key_param|. |prefix| // should contain the entire url prefix including scheme, host and path. static std::string ComposeUrl(const std::string& prefix, const std::string& method, const std::string& request_base64, const std::string& key_param); // Sets the HTTP headers expected by a standard PVer4 request. static void UpdateHeaders(net::HttpRequestHeaders* headers); // Given a URL, returns all the hosts we need to check. They are returned // in order of size (i.e. b.c is first, then a.b.c). static void GenerateHostsToCheck(const GURL& url, std::vector* hosts); // Given a URL, returns all the paths we need to check. static void GeneratePathsToCheck(const GURL& url, std::vector* paths); static std::string RemoveConsecutiveChars(base::StringPiece str, const char c); DISALLOW_COPY_AND_ASSIGN(V4ProtocolManagerUtil); }; using StoresToCheck = std::unordered_set; } // namespace safe_browsing namespace std { template <> struct hash { std::size_t operator()(const safe_browsing::PlatformType& p) const { return std::hash()(p); } }; template <> struct hash { std::size_t operator()(const safe_browsing::ThreatEntryType& tet) const { return std::hash()(tet); } }; template <> struct hash { std::size_t operator()(const safe_browsing::ThreatType& tt) const { return std::hash()(tt); } }; template <> struct hash { std::size_t operator()(const safe_browsing::ListIdentifier& id) const { return id.hash(); } }; } // namespace std #endif // COMPONENTS_SAFE_BROWSING_DB_V4_PROTOCOL_MANAGER_UTIL_H_