diff options
author | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2016-05-09 14:22:11 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2016-05-09 15:11:45 +0000 |
commit | 2ddb2d3e14eef3de7dbd0cef553d669b9ac2361c (patch) | |
tree | e75f511546c5fd1a173e87c1f9fb11d7ac8d1af3 /chromium/components/safe_browsing_db | |
parent | a4f3d46271c57e8155ba912df46a05559d14726e (diff) | |
download | qtwebengine-chromium-2ddb2d3e14eef3de7dbd0cef553d669b9ac2361c.tar.gz |
BASELINE: Update Chromium to 51.0.2704.41
Also adds in all smaller components by reversing logic for exclusion.
Change-Id: Ibf90b506e7da088ea2f65dcf23f2b0992c504422
Reviewed-by: Joerg Bornemann <joerg.bornemann@theqtcompany.com>
Diffstat (limited to 'chromium/components/safe_browsing_db')
38 files changed, 6994 insertions, 0 deletions
diff --git a/chromium/components/safe_browsing_db/BUILD.gn b/chromium/components/safe_browsing_db/BUILD.gn new file mode 100644 index 00000000000..dc5c80a7286 --- /dev/null +++ b/chromium/components/safe_browsing_db/BUILD.gn @@ -0,0 +1,269 @@ +# Copyright 2015 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//third_party/protobuf/proto_library.gni") + +proto_library("proto") { + sources = [ + "safebrowsing.proto", + ] +} + +# GYP version: components/safe_browsing_db.gypi:safe_browsing_metadata_proto +proto_library("metadata_proto") { + sources = [ + "metadata.proto", + ] +} + +group("safe_browsing_db_shared") { + deps = [ + ":database_manager", + ":hit_report", + ":prefix_set", + ":proto", + ":util", + ] +} + +group("safe_browsing_db") { + deps = [ + ":safe_browsing_db_shared", + ":v4_update_protocol_manager", + ] +} + +group("safe_browsing_db_mobile") { + deps = [ + ":remote_database_manager", + ":safe_browsing_api_handler", + ":safe_browsing_api_handler_util", + ":safe_browsing_db_shared", + ] +} + +source_set("database_manager") { + sources = [ + "database_manager.cc", + "database_manager.h", + ] + deps = [ + ":hit_report", + ":util", + ":v4_get_hash_protocol_manager", + "//base:base", + "//content/public/browser", + "//content/public/common", + "//net", + "//url:url", + ] +} + +source_set("hit_report") { + sources = [ + "hit_report.cc", + "hit_report.h", + ] + deps = [ + ":util", + "//components/metrics:metrics", + "//url:url", + ] +} + +source_set("prefix_set") { + sources = [ + "prefix_set.cc", + "prefix_set.h", + ] + deps = [ + ":util", + "//base", + ] +} + +source_set("remote_database_manager") { + sources = [ + "remote_database_manager.cc", + "remote_database_manager.h", + ] + deps = [ + ":database_manager", + ":safe_browsing_api_handler", + ":v4_get_hash_protocol_manager", + "//base:base", + "//components/variations", + "//content/public/browser", + "//net", + "//url:url", + ] +} + +source_set("safe_browsing_api_handler") { + sources = [ + "safe_browsing_api_handler.cc", + "safe_browsing_api_handler.h", + ] + deps = [ + ":util", + "//base:base", + "//url:url", + ] +} + +source_set("safe_browsing_api_handler_util") { + sources = [ + "safe_browsing_api_handler_util.cc", + "safe_browsing_api_handler_util.h", + ] + deps = [ + ":metadata_proto", + ":util", + "//base:base", + ] +} + +source_set("test_database_manager") { + sources = [ + "test_database_manager.cc", + "test_database_manager.h", + ] + deps = [ + ":database_manager", + "//base:base", + "//net", + ] +} + +source_set("util") { + sources = [ + "util.cc", + "util.h", + ] + deps = [ + "//base", + "//base:base", + "//crypto", + "//net", + "//url:url", + ] + if (is_win) { + # TODO(jschuh): crbug.com/167187 fix size_t to int truncations. + cflags = [ "/wd4267" ] # Conversion from size_t to 'type'. + } +} + +source_set("v4_get_hash_protocol_manager") { + sources = [ + "v4_get_hash_protocol_manager.cc", + "v4_get_hash_protocol_manager.h", + ] + public_deps = [ + ":proto", + ] + deps = [ + ":util", + ":v4_protocol_manager_util", + "//base", + "//net", + "//url:url", + ] +} + +source_set("v4_local_database_manager") { + sources = [ + "v4_local_database_manager.cc", + "v4_local_database_manager.h", + ] + deps = [ + ":database_manager", + ":hit_report", + ":v4_protocol_manager_util", + ":v4_update_protocol_manager", + "//base", + "//content/public/browser", + "//net", + "//url:url", + ] +} + +source_set("v4_protocol_manager_util") { + sources = [ + "v4_protocol_manager_util.cc", + "v4_protocol_manager_util.h", + ] + deps = [ + ":proto", + "//base", + "//net", + "//url:url", + ] +} + +source_set("v4_update_protocol_manager") { + sources = [ + "v4_update_protocol_manager.cc", + "v4_update_protocol_manager.h", + ] + deps = [ + ":proto", + ":util", + ":v4_protocol_manager_util", + "//base", + "//net", + "//url:url", + ] +} + +source_set("unit_tests") { + testonly = true + sources = [ + "prefix_set_unittest.cc", + "util_unittest.cc", + "v4_get_hash_protocol_manager_unittest.cc", + "v4_protocol_manager_util_unittest.cc", + "v4_update_protocol_manager_unittest.cc", + ] + deps = [ + ":prefix_set", + ":proto", + ":util", + ":v4_get_hash_protocol_manager", + ":v4_local_database_manager", + ":v4_protocol_manager_util", + ":v4_update_protocol_manager", + "//base", + "//net", + "//net:test_support", + "//testing/gtest", + "//url", + ] + if (is_win) { + # TODO(jschuh): crbug.com/167187 fix size_t to int truncations. + cflags = [ "/wd4267" ] # Conversion from size_t to 'type'. + } +} + +source_set("unit_tests_mobile") { + testonly = true + sources = [ + "remote_database_manager_unittest.cc", + "safe_browsing_api_handler_unittest.cc", + ] + deps = [ + ":metadata_proto", + ":remote_database_manager", + ":safe_browsing_api_handler", + ":safe_browsing_api_handler_util", + ":util", + "//base", + "//components/variations", + "//testing/gtest", + "//url", + ] + if (is_win) { + # TODO(jschuh): crbug.com/167187 fix size_t to int truncations. + cflags = [ "/wd4267" ] # Conversion from size_t to 'type'. + } +} diff --git a/chromium/components/safe_browsing_db/DEPS b/chromium/components/safe_browsing_db/DEPS new file mode 100644 index 00000000000..8bf7b58fa57 --- /dev/null +++ b/chromium/components/safe_browsing_db/DEPS @@ -0,0 +1,7 @@ +include_rules = [ + "+components/variations", + "+content/public/browser", + "+content/public/common", + "+crypto", + "+net", +] diff --git a/chromium/components/safe_browsing_db/OWNERS b/chromium/components/safe_browsing_db/OWNERS new file mode 100644 index 00000000000..6426bab6780 --- /dev/null +++ b/chromium/components/safe_browsing_db/OWNERS @@ -0,0 +1,2 @@ +mattm@chromium.org +nparker@chromium.org diff --git a/chromium/components/safe_browsing_db/README b/chromium/components/safe_browsing_db/README new file mode 100644 index 00000000000..569c2076937 --- /dev/null +++ b/chromium/components/safe_browsing_db/README @@ -0,0 +1,3 @@ +safe_browsing_db/ will contain the database-related code from existing +safe_browsing/ directories, and will house the new Pver4 database+update +implementation. diff --git a/chromium/components/safe_browsing_db/database_manager.cc b/chromium/components/safe_browsing_db/database_manager.cc new file mode 100644 index 00000000000..b8998b4431f --- /dev/null +++ b/chromium/components/safe_browsing_db/database_manager.cc @@ -0,0 +1,52 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/safe_browsing_db/database_manager.h" + +#include "components/safe_browsing_db/v4_get_hash_protocol_manager.h" +#include "content/public/browser/browser_thread.h" +#include "net/url_request/url_request_context_getter.h" +#include "url/gurl.h" + +using content::BrowserThread; + +namespace safe_browsing { + +SafeBrowsingDatabaseManager::SafeBrowsingDatabaseManager() + : v4_get_hash_protocol_manager_(NULL) { +} + +SafeBrowsingDatabaseManager::~SafeBrowsingDatabaseManager() { + DCHECK(v4_get_hash_protocol_manager_ == NULL); +} + +void SafeBrowsingDatabaseManager::StartOnIOThread( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config) { + DCHECK_CURRENTLY_ON(BrowserThread::IO); + if (request_context_getter) { + // Instantiate a V4GetHashProtocolManager. + v4_get_hash_protocol_manager_ = V4GetHashProtocolManager::Create( + request_context_getter, config); + } +} + +// |shutdown| not used. Destroys the v4 protocol managers. This may be called +// multiple times during the life of the DatabaseManager. +// Must be called on IO thread. +void SafeBrowsingDatabaseManager::StopOnIOThread(bool shutdown) { + DCHECK_CURRENTLY_ON(BrowserThread::IO); + // This cancels all in-flight GetHash requests. + if (v4_get_hash_protocol_manager_) { + delete v4_get_hash_protocol_manager_; + v4_get_hash_protocol_manager_ = NULL; + } +} + +void SafeBrowsingDatabaseManager::CheckApiBlacklistUrl(const GURL& url, + Client* client) { + // TODO(kcarattini): Implement this. +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/database_manager.h b/chromium/components/safe_browsing_db/database_manager.h new file mode 100644 index 00000000000..12d893d3f9d --- /dev/null +++ b/chromium/components/safe_browsing_db/database_manager.h @@ -0,0 +1,184 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// The Safe Browsing service is responsible for downloading anti-phishing and +// anti-malware tables and checking urls against them. + +#ifndef COMPONENTS_SAFE_BROWSING_DB_DATABASE_MANAGER_H_ +#define COMPONENTS_SAFE_BROWSING_DB_DATABASE_MANAGER_H_ + +#include <deque> +#include <map> +#include <set> +#include <string> +#include <vector> + +#include "base/memory/ref_counted.h" +#include "components/safe_browsing_db/hit_report.h" +#include "components/safe_browsing_db/util.h" +#include "content/public/common/resource_type.h" +#include "url/gurl.h" + +namespace net { +class URLRequestContextGetter; +} // namespace net + +namespace safe_browsing { + +struct V4ProtocolConfig; +class V4GetHashProtocolManager; + +// Base class to either the locally-managed or a remotely-managed database. +class SafeBrowsingDatabaseManager + : public base::RefCountedThreadSafe<SafeBrowsingDatabaseManager> { + public: + // Callers requesting a result should derive from this class. + // The destructor should call db_manager->CancelCheck(client) if a + // request is still pending. + class Client { + public: + virtual ~Client() {} + + // Called when the result of checking a browse URL is known. + virtual void OnCheckBrowseUrlResult(const GURL& url, + SBThreatType threat_type, + const ThreatMetadata& metadata) {} + + // Called when the result of checking a download URL is known. + virtual void OnCheckDownloadUrlResult(const std::vector<GURL>& url_chain, + SBThreatType threat_type) {} + + // Called when the result of checking a set of extensions is known. + virtual void OnCheckExtensionsResult( + const std::set<std::string>& threats) {} + + // Called when the result of checking the API blacklist is known. + virtual void OnCheckApiBlacklistUrlResult(const GURL& url, + const ThreatMetadata& metadata) {} + + // Called when the result of checking the resource blacklist is known. + virtual void OnCheckResourceUrlResult(const GURL& url, + SBThreatType threat_type, + const std::string& threat_hash) {} + }; + + + // Returns true if URL-checking is supported on this build+device. + // If false, calls to CheckBrowseUrl may dcheck-fail. + virtual bool IsSupported() const = 0; + + // Returns the ThreatSource for this implementation. + virtual ThreatSource GetThreatSource() const = 0; + + // Returns true if checks are never done synchronously, and therefore + // always have some latency. + virtual bool ChecksAreAlwaysAsync() const = 0; + + // Returns true if this resource type should be checked. + virtual bool CanCheckResourceType( + content::ResourceType resource_type) const = 0; + + // Returns true if the url's scheme can be checked. + virtual bool CanCheckUrl(const GURL& url) const = 0; + + // Returns whether download protection is enabled. + virtual bool IsDownloadProtectionEnabled() const = 0; + + // Called on the IO thread to check if the given url is safe or not. If we + // can synchronously determine that the url is safe, CheckUrl returns true. + // Otherwise it returns false, and "client" is called asynchronously with the + // result when it is ready. + virtual bool CheckBrowseUrl(const GURL& url, Client* client) = 0; + + // Check if the prefix for |url| is in safebrowsing download add lists. + // Result will be passed to callback in |client|. + virtual bool CheckDownloadUrl(const std::vector<GURL>& url_chain, + Client* client) = 0; + + // Check which prefixes in |extension_ids| are in the safebrowsing blacklist. + // Returns true if not, false if further checks need to be made in which case + // the result will be passed to |client|. + virtual bool CheckExtensionIDs(const std::set<std::string>& extension_ids, + Client* client) = 0; + + // Check if |url| is in the resources blacklist. Returns true if not, false + // if further checks need to be made in which case the result will be passed + // to callback in |client|. + virtual bool CheckResourceUrl(const GURL& url, Client* client) = 0; + + // Check if the |url| matches any of the full-length hashes from the client- + // side phishing detection whitelist. Returns true if there was a match and + // false otherwise. To make sure we are conservative we will return true if + // an error occurs. This method must be called on the IO thread. + virtual bool MatchCsdWhitelistUrl(const GURL& url) = 0; + + // Check if the given IP address (either IPv4 or IPv6) matches the malware + // IP blacklist. + virtual bool MatchMalwareIP(const std::string& ip_address) = 0; + + // Check if the |url| matches any of the full-length hashes from the download + // whitelist. Returns true if there was a match and false otherwise. To make + // sure we are conservative we will return true if an error occurs. This + // method must be called on the IO thread. + virtual bool MatchDownloadWhitelistUrl(const GURL& url) = 0; + + // Check if |str| matches any of the full-length hashes from the download + // whitelist. Returns true if there was a match and false otherwise. To make + // sure we are conservative we will return true if an error occurs. This + // method must be called on the IO thread. + virtual bool MatchDownloadWhitelistString(const std::string& str) = 0; + + // Check if the |url| matches any of the full-length hashes from the off- + // domain inclusion whitelist. Returns true if there was a match and false + // otherwise. To make sure we are conservative, we will return true if an + // error occurs. This method must be called on the IO thread. + virtual bool MatchInclusionWhitelistUrl(const GURL& url) = 0; + + // Check if |str|, a lowercase DLL file name, matches any of the full-length + // hashes from the module whitelist. Returns true if there was a match and + // false otherwise. To make sure we are conservative we will return true if + // an error occurs. This method must be called on the IO thread. + virtual bool MatchModuleWhitelistString(const std::string& str) = 0; + + // Check if the CSD malware IP matching kill switch is turned on. + virtual bool IsMalwareKillSwitchOn() = 0; + + // Check if the CSD whitelist kill switch is turned on. + virtual bool IsCsdWhitelistKillSwitchOn() = 0; + + // Called on the IO thread to cancel a pending check if the result is no + // longer needed. Also called after the result has been handled. + virtual void CancelCheck(Client* client) = 0; + + // Called on the IO thread to check if the given url has blacklisted APIs. + // "client" is called asynchronously with the result when it is ready. + // This method has the same implementation for both the local and remote + // database managers since it pings Safe Browsing servers directly without + // accessing the database at all. + virtual void CheckApiBlacklistUrl(const GURL& url, Client* client); + + // Called to initialize objects that are used on the io_thread, such as the + // v4 protocol manager. This may be called multiple times during the life of + // the DatabaseManager. Must be called on IO thread. + virtual void StartOnIOThread( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config); + + // Called to stop or shutdown operations on the io_thread. + virtual void StopOnIOThread(bool shutdown); + + protected: + SafeBrowsingDatabaseManager(); + + virtual ~SafeBrowsingDatabaseManager(); + + friend class base::RefCountedThreadSafe<SafeBrowsingDatabaseManager>; + + // Created and destroyed via StartonIOThread/StopOnIOThread. + V4GetHashProtocolManager* v4_get_hash_protocol_manager_; +}; // class SafeBrowsingDatabaseManager + +} // namespace safe_browsing + +#endif // COMPONENTS_SAFE_BROWSING_DB_DATABASE_MANAGER_H_ diff --git a/chromium/components/safe_browsing_db/hit_report.cc b/chromium/components/safe_browsing_db/hit_report.cc new file mode 100644 index 00000000000..cd14044e114 --- /dev/null +++ b/chromium/components/safe_browsing_db/hit_report.cc @@ -0,0 +1,15 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/safe_browsing_db/hit_report.h" + +namespace safe_browsing { + +HitReport::HitReport() {} + +HitReport::HitReport(const HitReport& other) = default; + +HitReport::~HitReport() {} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/hit_report.h b/chromium/components/safe_browsing_db/hit_report.h new file mode 100644 index 00000000000..158ec983d69 --- /dev/null +++ b/chromium/components/safe_browsing_db/hit_report.h @@ -0,0 +1,52 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Datastructures that hold details of a Safe Browsing hit for reporting. + +#ifndef COMPONENTS_SAFE_BROWSING_DB_HIT_REPORT_H_ +#define COMPONENTS_SAFE_BROWSING_DB_HIT_REPORT_H_ + +#include "components/safe_browsing_db/util.h" +#include "url/gurl.h" + +namespace safe_browsing { + +// What service classified this threat as unsafe. +enum class ThreatSource { + UNKNOWN, + DATA_SAVER, // From the Data Reduction service. + LOCAL_PVER3, // From LocalSafeBrowingDatabaseManager, protocol v3 + LOCAL_PVER4, // From LocalSafeBrowingDatabaseManager, protocol v4 + REMOTE, // From RemoteSafeBrowingDatabaseManager + CLIENT_SIDE_DETECTION, // From ClientSideDetectionHost +}; + +// Data to report about the contents of a particular threat (malware, phishing, +// unsafe download URL). If post_data is non-empty, the request will be +// sent as a POST instead of a GET. +struct HitReport { + HitReport(); + HitReport(const HitReport& other); + ~HitReport(); + + GURL malicious_url; + GURL page_url; + GURL referrer_url; + + bool is_subresource; + SBThreatType threat_type; + ThreatSource threat_source; + + // Opaque string used for tracking Pver4-based experiments + std::string population_id; + + bool is_extended_reporting; + bool is_metrics_reporting_active; + + std::string post_data; +}; + +} // namespace safe_browsing + +#endif // COMPONENTS_SAFE_BROWSING_DB_HIT_REPORT_H_ diff --git a/chromium/components/safe_browsing_db/metadata.proto b/chromium/components/safe_browsing_db/metadata.proto new file mode 100644 index 00000000000..a0635de3d1c --- /dev/null +++ b/chromium/components/safe_browsing_db/metadata.proto @@ -0,0 +1,23 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +syntax = "proto2"; + +option optimize_for = LITE_RUNTIME; + +package safe_browsing; + +// Everything below this comment was copied from the page +// <https://developers.google.com/safe-browsing/developers_guide_v3>, +// section "Full Hash Metadata". + +// Metadata for the goog-malware-shavar list. +message MalwarePatternType { + enum PATTERN_TYPE { + LANDING = 1; + DISTRIBUTION = 2; + } + + required PATTERN_TYPE pattern_type = 1; +} diff --git a/chromium/components/safe_browsing_db/prefix_set.cc b/chromium/components/safe_browsing_db/prefix_set.cc new file mode 100644 index 00000000000..ede92233ebf --- /dev/null +++ b/chromium/components/safe_browsing_db/prefix_set.cc @@ -0,0 +1,453 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/safe_browsing_db/prefix_set.h" + +#include <limits.h> + +#include <algorithm> +#include <utility> + +#include "base/files/file_util.h" +#include "base/files/scoped_file.h" +#include "base/logging.h" +#include "base/md5.h" +#include "base/metrics/histogram.h" +#include "base/metrics/sparse_histogram.h" + +namespace safe_browsing { + +namespace { + +// |kMagic| should be reasonably unique, and not match itself across +// endianness changes. I generated this value with: +// md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9 +static uint32_t kMagic = 0x864088dd; + +// Version history: +// Version 1: b6cb7cfe/r74487 by shess@chromium.org on 2011-02-10 +// Version 2: 2b59b0a6/r253924 by shess@chromium.org on 2014-02-27 +// Version 3: dd07faf5/r268145 by shess@chromium.org on 2014-05-05 + +// Version 2 layout is identical to version 1. The sort order of |index_| +// changed from |int32_t| to |uint32_t| to match the change of |SBPrefix|. +// Version 3 adds storage for full hashes. +static uint32_t kVersion = 3; +static uint32_t kDeprecatedVersion = 2; // And lower. + +typedef struct { + uint32_t magic; + uint32_t version; + uint32_t index_size; + uint32_t deltas_size; + uint32_t full_hashes_size; +} FileHeader; + +// Common std::vector<> implementations add capacity by multiplying from the +// current size (usually either by 2 or 1.5) to satisfy push_back() running in +// amortized constant time. This is not necessary for insert() at end(), but +// AFAICT it seems true for some implementations. SBPrefix values should +// uniformly cover the 32-bit space, so the final size can be estimated given a +// subset of the input. +// +// |kEstimateThreshold| is when estimates start converging. Results are strong +// starting around 1<<27. 1<<30 is chosen to prevent the degenerate case of +// resizing capacity from >50% to 100%. +// +// TODO(shess): I'm sure there is math in the world to describe good settings +// for estimating the size of a uniformly-distributed set of integers from a +// sorted subset. I do not have such math in me, so I assumed that my current +// organic database of prefixes was scale-free, and wrote a script to see how +// often given slop values would always suffice for given strides. At 1<<30, +// .5% slop was sufficient to cover all cases (though the code below uses 1%). +// +// TODO(shess): A smaller threshold uses less transient space in reallocation. +// 1<<30 uses between 125% and 150%, 1<<29 between 112% and 125%, etc. The cost +// is that a smaller threshold needs more slop (locked down for the long term). +// 1<<29 worked well with 1%, 1<<27 worked well with 2%. +const SBPrefix kEstimateThreshold = 1 << 30; +size_t EstimateFinalCount(SBPrefix current_prefix, size_t current_count) { + // estimated_count / current_count == estimated_max / current_prefix + // For large input sets, estimated_max of 2^32 is close enough. + const size_t estimated_prefix_count = static_cast<size_t>( + (static_cast<uint64_t>(current_count) << 32) / current_prefix); + + // The estimate has an error bar, if the final total is below the estimate, no + // harm, but if it is above a capacity resize will happen at nearly 100%. Add + // some slop to make sure all cases are covered. + return estimated_prefix_count + estimated_prefix_count / 100; +} + +} // namespace + +// For |std::upper_bound()| to find a prefix w/in a vector of pairs. +// static +bool PrefixSet::PrefixLess(const IndexPair& a, const IndexPair& b) { + return a.first < b.first; +} + +PrefixSet::PrefixSet() { +} + +PrefixSet::PrefixSet(IndexVector* index, + std::vector<uint16_t>* deltas, + std::vector<SBFullHash>* full_hashes) { + DCHECK(index && deltas && full_hashes); + index_.swap(*index); + deltas_.swap(*deltas); + full_hashes_.swap(*full_hashes); +} + +PrefixSet::~PrefixSet() {} + +bool PrefixSet::PrefixExists(SBPrefix prefix) const { + if (index_.empty()) + return false; + + // Find the first position after |prefix| in |index_|. + IndexVector::const_iterator iter = + std::upper_bound(index_.begin(), index_.end(), + IndexPair(prefix, 0), PrefixLess); + + // |prefix| comes before anything that's in the set. + if (iter == index_.begin()) + return false; + + // Capture the upper bound of our target entry's deltas. + const size_t bound = (iter == index_.end() ? deltas_.size() : iter->second); + + // Back up to the entry our target is in. + --iter; + + // All prefixes in |index_| are in the set. + SBPrefix current = iter->first; + if (current == prefix) + return true; + + // Scan forward accumulating deltas while a match is possible. + for (size_t di = iter->second; di < bound && current < prefix; ++di) { + current += deltas_[di]; + } + + return current == prefix; +} + +bool PrefixSet::Exists(const SBFullHash& hash) const { + if (std::binary_search(full_hashes_.begin(), full_hashes_.end(), + hash, SBFullHashLess)) { + return true; + } + return PrefixExists(hash.prefix); +} + +void PrefixSet::GetPrefixes(std::vector<SBPrefix>* prefixes) const { + prefixes->reserve(index_.size() + deltas_.size()); + + for (size_t ii = 0; ii < index_.size(); ++ii) { + // The deltas for this |index_| entry run to the next index entry, + // or the end of the deltas. + const size_t deltas_end = + (ii + 1 < index_.size()) ? index_[ii + 1].second : deltas_.size(); + + SBPrefix current = index_[ii].first; + prefixes->push_back(current); + for (size_t di = index_[ii].second; di < deltas_end; ++di) { + current += deltas_[di]; + prefixes->push_back(current); + } + } +} + +// static +scoped_ptr<const PrefixSet> PrefixSet::LoadFile( + const base::FilePath& filter_name) { + int64_t size_64; + if (!base::GetFileSize(filter_name, &size_64)) + return nullptr; + using base::MD5Digest; + if (size_64 < static_cast<int64_t>(sizeof(FileHeader) + sizeof(MD5Digest))) + return nullptr; + + base::ScopedFILE file(base::OpenFile(filter_name, "rb")); + if (!file.get()) + return nullptr; + + FileHeader header; + size_t read = fread(&header, sizeof(header), 1, file.get()); + if (read != 1) + return nullptr; + + // The file looks valid, start building the digest. + base::MD5Context context; + base::MD5Init(&context); + base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header), + sizeof(header))); + + if (header.magic != kMagic) + return nullptr; + + // Track version read to inform removal of support for older versions. + UMA_HISTOGRAM_SPARSE_SLOWLY("SB2.PrefixSetVersionRead", header.version); + + if (header.version <= kDeprecatedVersion) { + return nullptr; + } else if (header.version != kVersion) { + return nullptr; + } + + IndexVector index; + const size_t index_bytes = sizeof(index[0]) * header.index_size; + + std::vector<uint16_t> deltas; + const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size; + + std::vector<SBFullHash> full_hashes; + const size_t full_hashes_bytes = + sizeof(full_hashes[0]) * header.full_hashes_size; + + // Check for bogus sizes before allocating any space. + const size_t expected_bytes = sizeof(header) + + index_bytes + deltas_bytes + full_hashes_bytes + sizeof(MD5Digest); + if (static_cast<int64_t>(expected_bytes) != size_64) + return nullptr; + + // Read the index vector. Herb Sutter indicates that vectors are + // guaranteed to be contiuguous, so reading to where element 0 lives + // is valid. + if (header.index_size) { + index.resize(header.index_size); + read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get()); + if (read != index.size()) + return nullptr; + base::MD5Update(&context, + base::StringPiece(reinterpret_cast<char*>(&(index[0])), + index_bytes)); + } + + // Read vector of deltas. + if (header.deltas_size) { + deltas.resize(header.deltas_size); + read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get()); + if (read != deltas.size()) + return nullptr; + base::MD5Update(&context, + base::StringPiece(reinterpret_cast<char*>(&(deltas[0])), + deltas_bytes)); + } + + // Read vector of full hashes. + if (header.full_hashes_size) { + full_hashes.resize(header.full_hashes_size); + read = fread(&(full_hashes[0]), sizeof(full_hashes[0]), full_hashes.size(), + file.get()); + if (read != full_hashes.size()) + return nullptr; + base::MD5Update(&context, + base::StringPiece( + reinterpret_cast<char*>(&(full_hashes[0])), + full_hashes_bytes)); + } + + base::MD5Digest calculated_digest; + base::MD5Final(&calculated_digest, &context); + + base::MD5Digest file_digest; + read = fread(&file_digest, sizeof(file_digest), 1, file.get()); + if (read != 1) + return nullptr; + + if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) + return nullptr; + + // Steals vector contents using swap(). + return make_scoped_ptr( + new PrefixSet(&index, &deltas, &full_hashes)); +} + +bool PrefixSet::WriteFile(const base::FilePath& filter_name) const { + FileHeader header; + header.magic = kMagic; + header.version = kVersion; + header.index_size = static_cast<uint32_t>(index_.size()); + header.deltas_size = static_cast<uint32_t>(deltas_.size()); + header.full_hashes_size = static_cast<uint32_t>(full_hashes_.size()); + + // Sanity check that the 32-bit values never mess things up. + if (static_cast<size_t>(header.index_size) != index_.size() || + static_cast<size_t>(header.deltas_size) != deltas_.size() || + static_cast<size_t>(header.full_hashes_size) != full_hashes_.size()) { + NOTREACHED(); + return false; + } + + base::ScopedFILE file(base::OpenFile(filter_name, "wb")); + if (!file.get()) + return false; + + base::MD5Context context; + base::MD5Init(&context); + + // TODO(shess): The I/O code in safe_browsing_store_file.cc would + // sure be useful about now. + size_t written = fwrite(&header, sizeof(header), 1, file.get()); + if (written != 1) + return false; + base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header), + sizeof(header))); + + // As for reads, the standard guarantees the ability to access the + // contents of the vector by a pointer to an element. + if (index_.size()) { + const size_t index_bytes = sizeof(index_[0]) * index_.size(); + written = fwrite(&(index_[0]), sizeof(index_[0]), index_.size(), + file.get()); + if (written != index_.size()) + return false; + base::MD5Update(&context, + base::StringPiece( + reinterpret_cast<const char*>(&(index_[0])), + index_bytes)); + } + + if (deltas_.size()) { + const size_t deltas_bytes = sizeof(deltas_[0]) * deltas_.size(); + written = fwrite(&(deltas_[0]), sizeof(deltas_[0]), deltas_.size(), + file.get()); + if (written != deltas_.size()) + return false; + base::MD5Update(&context, + base::StringPiece( + reinterpret_cast<const char*>(&(deltas_[0])), + deltas_bytes)); + } + + if (full_hashes_.size()) { + const size_t elt_size = sizeof(full_hashes_[0]); + const size_t elts = full_hashes_.size(); + const size_t full_hashes_bytes = elt_size * elts; + written = fwrite(&(full_hashes_[0]), elt_size, elts, file.get()); + if (written != elts) + return false; + base::MD5Update(&context, + base::StringPiece( + reinterpret_cast<const char*>(&(full_hashes_[0])), + full_hashes_bytes)); + } + + base::MD5Digest digest; + base::MD5Final(&digest, &context); + written = fwrite(&digest, sizeof(digest), 1, file.get()); + if (written != 1) + return false; + + // TODO(shess): Can this code check that the close was successful? + file.reset(); + + return true; +} + +void PrefixSet::AddRun(SBPrefix index_prefix, + const uint16_t* run_begin, + const uint16_t* run_end) { + // Preempt organic capacity decisions for |delta_| once strong estimates can + // be made. + if (index_prefix > kEstimateThreshold && + deltas_.capacity() < deltas_.size() + (run_end - run_begin)) { + deltas_.reserve(EstimateFinalCount(index_prefix, deltas_.size())); + } + + index_.push_back( + std::make_pair(index_prefix, static_cast<uint32_t>(deltas_.size()))); + deltas_.insert(deltas_.end(), run_begin, run_end); +} + +PrefixSetBuilder::PrefixSetBuilder() + : prefix_set_(new PrefixSet()) { +} + +PrefixSetBuilder::PrefixSetBuilder(const std::vector<SBPrefix>& prefixes) + : prefix_set_(new PrefixSet()) { + for (size_t i = 0; i < prefixes.size(); ++i) { + AddPrefix(prefixes[i]); + } +} + +PrefixSetBuilder::~PrefixSetBuilder() { +} + +scoped_ptr<const PrefixSet> PrefixSetBuilder::GetPrefixSet( + const std::vector<SBFullHash>& hashes) { + DCHECK(prefix_set_.get()); + + // Flush runs until buffered data is gone. + while (!buffer_.empty()) { + EmitRun(); + } + + // Precisely size |index_| for read-only. It's 50k-60k, so minor savings, but + // they're almost free. + PrefixSet::IndexVector(prefix_set_->index_).swap(prefix_set_->index_); + + prefix_set_->full_hashes_ = hashes; + std::sort(prefix_set_->full_hashes_.begin(), prefix_set_->full_hashes_.end(), + SBFullHashLess); + + return std::move(prefix_set_); +} + +scoped_ptr<const PrefixSet> PrefixSetBuilder::GetPrefixSetNoHashes() { + return GetPrefixSet(std::vector<SBFullHash>()); +} + +void PrefixSetBuilder::EmitRun() { + DCHECK(prefix_set_.get()); + + SBPrefix prev_prefix = buffer_[0]; + uint16_t run[PrefixSet::kMaxRun]; + size_t run_pos = 0; + + size_t i; + for (i = 1; i < buffer_.size() && run_pos < PrefixSet::kMaxRun; ++i) { + // Calculate the delta. |unsigned| is mandatory, because the + // sorted_prefixes could be more than INT_MAX apart. + DCHECK_GT(buffer_[i], prev_prefix); + const unsigned delta = buffer_[i] - prev_prefix; + const uint16_t delta16 = static_cast<uint16_t>(delta); + + // Break the run if the delta doesn't fit. + if (delta != static_cast<unsigned>(delta16)) + break; + + // Continue the run of deltas. + run[run_pos++] = delta16; + DCHECK_EQ(static_cast<unsigned>(run[run_pos - 1]), delta); + + prev_prefix = buffer_[i]; + } + prefix_set_->AddRun(buffer_[0], run, run + run_pos); + buffer_.erase(buffer_.begin(), buffer_.begin() + i); +} + +void PrefixSetBuilder::AddPrefix(SBPrefix prefix) { + DCHECK(prefix_set_.get()); + + if (buffer_.empty()) { + DCHECK(prefix_set_->index_.empty()); + DCHECK(prefix_set_->deltas_.empty()); + } else { + // Drop duplicates. + if (buffer_.back() == prefix) + return; + + DCHECK_LT(buffer_.back(), prefix); + } + buffer_.push_back(prefix); + + // Flush buffer when a run can be constructed. +1 for the index item, and +1 + // to leave at least one item in the buffer for dropping duplicates. + if (buffer_.size() > PrefixSet::kMaxRun + 2) + EmitRun(); +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/prefix_set.h b/chromium/components/safe_browsing_db/prefix_set.h new file mode 100644 index 00000000000..3230341fb6e --- /dev/null +++ b/chromium/components/safe_browsing_db/prefix_set.h @@ -0,0 +1,191 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// A read-only set implementation for |SBPrefix| items. Prefixes are +// sorted and stored as 16-bit deltas from the previous prefix. An +// index structure provides quick random access, and also handles +// cases where 16 bits cannot encode a delta. +// +// For example, the sequence {20, 25, 41, 65432, 150000, 160000} would +// be stored as: +// A pair {20, 0} in |index_|. +// 5, 16, 65391 in |deltas_|. +// A pair {150000, 3} in |index_|. +// 10000 in |deltas_|. +// |index_.size()| will be 2, |deltas_.size()| will be 4. +// +// This structure is intended for storage of sparse uniform sets of +// prefixes of a certain size. As of this writing, my safe-browsing +// database contains: +// 653132 add prefixes +// 6446 are duplicates (from different chunks) +// 24301 w/in 2^8 of the prior prefix +// 622337 w/in 2^16 of the prior prefix +// 47 further than 2^16 from the prior prefix +// For this input, the memory usage is approximately 2 bytes per +// prefix, a bit over 1.2M. The bloom filter used 25 bits per prefix, +// a bit over 1.9M on this data. +// +// Experimenting with random selections of the above data, storage +// size drops almost linearly as prefix count drops, until the index +// overhead starts to become a problem a bit under 200k prefixes. The +// memory footprint gets worse than storing the raw prefix data around +// 75k prefixes. Fortunately, the actual memory footprint also falls. +// If the prefix count increases the memory footprint should increase +// approximately linearly. The worst-case would be 2^16 items all +// 2^16 apart, which would need 512k (versus 256k to store the raw +// data). +// +// The on-disk format looks like: +// 4 byte magic number +// 4 byte version number +// 4 byte |index_.size()| +// 4 byte |deltas_.size()| +// n * 8 byte |&index_[0]..&index_[n]| +// m * 2 byte |&deltas_[0]..&deltas_[m]| +// 16 byte digest + +#ifndef COMPONENTS_SAFE_BROWSING_DB_PREFIX_SET_H_ +#define COMPONENTS_SAFE_BROWSING_DB_PREFIX_SET_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <utility> +#include <vector> + +#include "base/gtest_prod_util.h" +#include "base/macros.h" +#include "base/memory/scoped_ptr.h" +#include "components/safe_browsing_db/util.h" + +namespace base { +class FilePath; +} + +namespace safe_browsing { + +class PrefixSet { + public: + ~PrefixSet(); + + // |true| if |hash| is in the hashes passed to the set's builder, or if + // |hash.prefix| is one of the prefixes passed to the set's builder. + bool Exists(const SBFullHash& hash) const; + + // Persist the set on disk. + static scoped_ptr<const PrefixSet> LoadFile( + const base::FilePath& filter_name); + bool WriteFile(const base::FilePath& filter_name) const; + + private: + friend class PrefixSetBuilder; + + friend class PrefixSetTest; + FRIEND_TEST_ALL_PREFIXES(PrefixSetTest, AllBig); + FRIEND_TEST_ALL_PREFIXES(PrefixSetTest, EdgeCases); + FRIEND_TEST_ALL_PREFIXES(PrefixSetTest, Empty); + FRIEND_TEST_ALL_PREFIXES(PrefixSetTest, FullHashBuild); + FRIEND_TEST_ALL_PREFIXES(PrefixSetTest, IntMinMax); + FRIEND_TEST_ALL_PREFIXES(PrefixSetTest, OneElement); + FRIEND_TEST_ALL_PREFIXES(PrefixSetTest, ReadWrite); + FRIEND_TEST_ALL_PREFIXES(PrefixSetTest, ReadWriteSigned); + FRIEND_TEST_ALL_PREFIXES(PrefixSetTest, Version3); + + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingStoreFileTest, BasicStore); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingStoreFileTest, DeleteChunks); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingStoreFileTest, DetectsCorruption); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingStoreFileTest, Empty); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingStoreFileTest, PrefixMinMax); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingStoreFileTest, SubKnockout); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingStoreFileTest, Version7); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingStoreFileTest, Version8); + + // Maximum number of consecutive deltas to encode before generating + // a new index entry. This helps keep the worst-case performance + // for |Exists()| under control. + static const size_t kMaxRun = 100; + + // Helpers to make |index_| easier to deal with. + typedef std::pair<SBPrefix, uint32_t> IndexPair; + typedef std::vector<IndexPair> IndexVector; + static bool PrefixLess(const IndexPair& a, const IndexPair& b); + + // Helper to let |PrefixSetBuilder| add a run of data. |index_prefix| is + // added to |index_|, with the other elements added into |deltas_|. + void AddRun(SBPrefix index_prefix, + const uint16_t* run_begin, + const uint16_t* run_end); + + // |true| if |prefix| is one of the prefixes passed to the set's builder. + // Provided for testing purposes. + bool PrefixExists(SBPrefix prefix) const; + + // Regenerate the vector of prefixes passed to the constructor into + // |prefixes|. Prefixes will be added in sorted order. Useful for testing. + void GetPrefixes(std::vector<SBPrefix>* prefixes) const; + + // Used by |PrefixSetBuilder|. + PrefixSet(); + + // Helper for |LoadFile()|. Steals vector contents using |swap()|. + PrefixSet(IndexVector* index, + std::vector<uint16_t>* deltas, + std::vector<SBFullHash>* full_hashes); + + // Top-level index of prefix to offset in |deltas_|. Each pair + // indicates a base prefix and where the deltas from that prefix + // begin in |deltas_|. The deltas for a pair end at the next pair's + // index into |deltas_|. + IndexVector index_; + + // Deltas which are added to the prefix in |index_| to generate + // prefixes. Deltas are only valid between consecutive items from + // |index_|, or the end of |deltas_| for the last |index_| pair. + std::vector<uint16_t> deltas_; + + // Full hashes ordered by SBFullHashLess. + std::vector<SBFullHash> full_hashes_; + + DISALLOW_COPY_AND_ASSIGN(PrefixSet); +}; + +// Helper to incrementally build a PrefixSet from a stream of sorted prefixes. +class PrefixSetBuilder { + public: + PrefixSetBuilder(); + ~PrefixSetBuilder(); + + // Helper for unit tests and format conversion. + explicit PrefixSetBuilder(const std::vector<SBPrefix>& prefixes); + + // Add a prefix to the set. Prefixes must arrive in ascending order. + // Duplicate prefixes are dropped. + void AddPrefix(SBPrefix prefix); + + // Flush any buffered prefixes, and return the final PrefixSet instance. + // |hashes| are sorted and stored in |full_hashes_|. Any call other than the + // destructor is illegal after this call. + scoped_ptr<const PrefixSet> GetPrefixSet( + const std::vector<SBFullHash>& hashes); + + // Helper for clients which only track prefixes. Calls GetPrefixSet() with + // empty hash vector. + scoped_ptr<const PrefixSet> GetPrefixSetNoHashes(); + + private: + // Encode a run of deltas for |AddRun()|. The run is broken by a too-large + // delta, or kMaxRun, whichever comes first. + void EmitRun(); + + // Buffers prefixes until enough are avaliable to emit a run. + std::vector<SBPrefix> buffer_; + + // The PrefixSet being built. + scoped_ptr<PrefixSet> prefix_set_; +}; + +} // namespace safe_browsing + +#endif // COMPONENTS_SAFE_BROWSING_DB_PREFIX_SET_H_ diff --git a/chromium/components/safe_browsing_db/prefix_set_unittest.cc b/chromium/components/safe_browsing_db/prefix_set_unittest.cc new file mode 100644 index 00000000000..0185afa43a6 --- /dev/null +++ b/chromium/components/safe_browsing_db/prefix_set_unittest.cc @@ -0,0 +1,734 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/safe_browsing_db/prefix_set.h" + +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include <algorithm> +#include <iterator> +#include <set> +#include <string> + +#include "base/files/file_util.h" +#include "base/files/scoped_file.h" +#include "base/files/scoped_temp_dir.h" +#include "base/logging.h" +#include "base/md5.h" +#include "base/memory/scoped_ptr.h" +#include "base/path_service.h" +#include "base/rand_util.h" +#include "base/strings/string_number_conversions.h" +#include "base/strings/string_util.h" +#include "build/build_config.h" +#include "components/safe_browsing_db/util.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/platform_test.h" + +namespace safe_browsing { + +namespace { + +const SBPrefix kHighBitClear = 1000u * 1000u * 1000u; +const SBPrefix kHighBitSet = 3u * 1000u * 1000u * 1000u; + +} // namespace + +class PrefixSetTest : public PlatformTest { + protected: + // Constants for the v1 format. + static const size_t kMagicOffset = 0 * sizeof(uint32_t); + static const size_t kVersionOffset = 1 * sizeof(uint32_t); + static const size_t kIndexSizeOffset = 2 * sizeof(uint32_t); + static const size_t kDeltasSizeOffset = 3 * sizeof(uint32_t); + static const size_t kFullHashesSizeOffset = 4 * sizeof(uint32_t); + static const size_t kPayloadOffset = 5 * sizeof(uint32_t); + + // Generate a set of random prefixes to share between tests. For + // most tests this generation was a large fraction of the test time. + // + // The set should contain sparse areas where adjacent items are more + // than 2^16 apart, and dense areas where adjacent items are less + // than 2^16 apart. + static void SetUpTestCase() { + // Distribute clusters of prefixes. + for (size_t i = 0; i < 250; ++i) { + // Unsigned for overflow characteristics. + const uint32_t base = static_cast<uint32_t>(base::RandUint64()); + for (size_t j = 0; j < 10; ++j) { + const uint32_t delta = + static_cast<uint32_t>(base::RandUint64() & 0xFFFF); + const SBPrefix prefix = static_cast<SBPrefix>(base + delta); + shared_prefixes_.push_back(prefix); + } + } + + // Lay down a sparsely-distributed layer. + const size_t count = shared_prefixes_.size(); + for (size_t i = 0; i < count; ++i) { + const SBPrefix prefix = static_cast<SBPrefix>(base::RandUint64()); + shared_prefixes_.push_back(prefix); + } + + // Sort for use with PrefixSet constructor. + std::sort(shared_prefixes_.begin(), shared_prefixes_.end()); + } + + // Check that all elements of |prefixes| are in |prefix_set|, and + // that nearby elements are not (for lack of a more sensible set of + // items to check for absence). + static void CheckPrefixes(const PrefixSet& prefix_set, + const std::vector<SBPrefix> &prefixes) { + // The set can generate the prefixes it believes it has, so that's + // a good starting point. + std::set<SBPrefix> check(prefixes.begin(), prefixes.end()); + std::vector<SBPrefix> prefixes_copy; + prefix_set.GetPrefixes(&prefixes_copy); + EXPECT_EQ(prefixes_copy.size(), check.size()); + EXPECT_TRUE(std::equal(check.begin(), check.end(), prefixes_copy.begin())); + + for (size_t i = 0; i < prefixes.size(); ++i) { + EXPECT_TRUE(prefix_set.PrefixExists(prefixes[i])); + + const SBPrefix left_sibling = prefixes[i] - 1; + if (check.count(left_sibling) == 0) + EXPECT_FALSE(prefix_set.PrefixExists(left_sibling)); + + const SBPrefix right_sibling = prefixes[i] + 1; + if (check.count(right_sibling) == 0) + EXPECT_FALSE(prefix_set.PrefixExists(right_sibling)); + } + } + + // Generate a |PrefixSet| file from |shared_prefixes_|, store it in + // a temporary file, and return the filename in |filenamep|. + // Returns |true| on success. + bool GetPrefixSetFile(base::FilePath* filenamep) { + if (!temp_dir_.IsValid() && !temp_dir_.CreateUniqueTempDir()) + return false; + + base::FilePath filename = temp_dir_.path().AppendASCII("PrefixSetTest"); + + PrefixSetBuilder builder(shared_prefixes_); + if (!builder.GetPrefixSetNoHashes()->WriteFile(filename)) + return false; + + *filenamep = filename; + return true; + } + + // Helper function to read the uint32_t value at |offset|, increment it + // by |inc|, and write it back in place. |fp| should be opened in + // r+ mode. + static void IncrementIntAt(FILE* fp, long offset, int inc) { + uint32_t value = 0; + + ASSERT_NE(-1, fseek(fp, offset, SEEK_SET)); + ASSERT_EQ(1U, fread(&value, sizeof(value), 1, fp)); + + value += inc; + + ASSERT_NE(-1, fseek(fp, offset, SEEK_SET)); + ASSERT_EQ(1U, fwrite(&value, sizeof(value), 1, fp)); + } + + // Helper function to re-generated |fp|'s checksum to be correct for + // the file's contents. |fp| should be opened in r+ mode. + static void CleanChecksum(FILE* fp) { + base::MD5Context context; + base::MD5Init(&context); + + ASSERT_NE(-1, fseek(fp, 0, SEEK_END)); + long file_size = ftell(fp); + + using base::MD5Digest; + size_t payload_size = static_cast<size_t>(file_size) - sizeof(MD5Digest); + size_t digested_size = 0; + ASSERT_NE(-1, fseek(fp, 0, SEEK_SET)); + while (digested_size < payload_size) { + char buf[1024]; + size_t nitems = std::min(payload_size - digested_size, sizeof(buf)); + ASSERT_EQ(nitems, fread(buf, 1, nitems, fp)); + base::MD5Update(&context, base::StringPiece(buf, nitems)); + digested_size += nitems; + } + ASSERT_EQ(digested_size, payload_size); + ASSERT_EQ(static_cast<long>(digested_size), ftell(fp)); + + base::MD5Digest new_digest; + base::MD5Final(&new_digest, &context); + ASSERT_NE(-1, fseek(fp, digested_size, SEEK_SET)); + ASSERT_EQ(1U, fwrite(&new_digest, sizeof(new_digest), 1, fp)); + ASSERT_EQ(file_size, ftell(fp)); + } + + // Open |filename| and increment the uint32_t at |offset| by |inc|. + // Then re-generate the checksum to account for the new contents. + void ModifyAndCleanChecksum(const base::FilePath& filename, long offset, + int inc) { + int64_t size_64; + ASSERT_TRUE(base::GetFileSize(filename, &size_64)); + + base::ScopedFILE file(base::OpenFile(filename, "r+b")); + IncrementIntAt(file.get(), offset, inc); + CleanChecksum(file.get()); + file.reset(); + + int64_t new_size_64; + ASSERT_TRUE(base::GetFileSize(filename, &new_size_64)); + ASSERT_EQ(new_size_64, size_64); + } + + base::FilePath TestFilePath() { + base::FilePath path; + PathService::Get(base::DIR_SOURCE_ROOT, &path); + return path.AppendASCII("components") + .AppendASCII("test") + .AppendASCII("data") + .AppendASCII("SafeBrowsingDb"); + } + + // Fill |prefixes| with values read from a reference file. The reference file + // was generated from a specific |shared_prefixes_|. + bool ReadReferencePrefixes(std::vector<SBPrefix>* prefixes) { + const char kRefname[] = "PrefixSetRef"; + base::FilePath ref_path = TestFilePath(); + ref_path = ref_path.AppendASCII(kRefname); + + base::ScopedFILE file(base::OpenFile(ref_path, "r")); + if (!file.get()) + return false; + char buf[1024]; + while (fgets(buf, sizeof(buf), file.get())) { + std::string trimmed; + if (base::TRIM_TRAILING != + base::TrimWhitespaceASCII(buf, base::TRIM_ALL, &trimmed)) + return false; + unsigned prefix; + if (!base::StringToUint(trimmed, &prefix)) + return false; + prefixes->push_back(prefix); + } + return true; + } + + // Tests should not modify this shared resource. + static std::vector<SBPrefix> shared_prefixes_; + + base::ScopedTempDir temp_dir_; +}; + +std::vector<SBPrefix> PrefixSetTest::shared_prefixes_; + +// Test that a small sparse random input works. +TEST_F(PrefixSetTest, Baseline) { + PrefixSetBuilder builder(shared_prefixes_); + CheckPrefixes(*builder.GetPrefixSetNoHashes(), shared_prefixes_); +} + +// Test that the empty set doesn't appear to have anything in it. +TEST_F(PrefixSetTest, Empty) { + const std::vector<SBPrefix> empty; + PrefixSetBuilder builder(empty); + scoped_ptr<const PrefixSet> prefix_set = builder.GetPrefixSetNoHashes(); + for (size_t i = 0; i < shared_prefixes_.size(); ++i) { + EXPECT_FALSE(prefix_set->PrefixExists(shared_prefixes_[i])); + } +} + +// Single-element set should work fine. +TEST_F(PrefixSetTest, OneElement) { + const std::vector<SBPrefix> prefixes(100, 0u); + PrefixSetBuilder builder(prefixes); + scoped_ptr<const PrefixSet> prefix_set = builder.GetPrefixSetNoHashes(); + EXPECT_FALSE(prefix_set->PrefixExists(static_cast<SBPrefix>(-1))); + EXPECT_TRUE(prefix_set->PrefixExists(prefixes[0])); + EXPECT_FALSE(prefix_set->PrefixExists(1u)); + + // Check that |GetPrefixes()| returns the same set of prefixes as + // was passed in. + std::vector<SBPrefix> prefixes_copy; + prefix_set->GetPrefixes(&prefixes_copy); + EXPECT_EQ(1U, prefixes_copy.size()); + EXPECT_EQ(prefixes[0], prefixes_copy[0]); +} + +// Edges of the 32-bit integer range. +TEST_F(PrefixSetTest, IntMinMax) { + std::vector<SBPrefix> prefixes; + + // Using bit patterns rather than portable constants because this + // really is testing how the entire 32-bit integer range is handled. + prefixes.push_back(0x00000000); + prefixes.push_back(0x0000FFFF); + prefixes.push_back(0x7FFF0000); + prefixes.push_back(0x7FFFFFFF); + prefixes.push_back(0x80000000); + prefixes.push_back(0x8000FFFF); + prefixes.push_back(0xFFFF0000); + prefixes.push_back(0xFFFFFFFF); + + std::sort(prefixes.begin(), prefixes.end()); + PrefixSetBuilder builder(prefixes); + scoped_ptr<const PrefixSet> prefix_set = builder.GetPrefixSetNoHashes(); + + // Check that |GetPrefixes()| returns the same set of prefixes as + // was passed in. + std::vector<SBPrefix> prefixes_copy; + prefix_set->GetPrefixes(&prefixes_copy); + ASSERT_EQ(prefixes_copy.size(), prefixes.size()); + EXPECT_TRUE(std::equal(prefixes.begin(), prefixes.end(), + prefixes_copy.begin())); +} + +// A range with only large deltas. +TEST_F(PrefixSetTest, AllBig) { + std::vector<SBPrefix> prefixes; + + const unsigned kDelta = 10 * 1000 * 1000; + for (SBPrefix prefix = kHighBitClear; + prefix < kHighBitSet; prefix += kDelta) { + prefixes.push_back(prefix); + } + + std::sort(prefixes.begin(), prefixes.end()); + PrefixSetBuilder builder(prefixes); + scoped_ptr<const PrefixSet> prefix_set = builder.GetPrefixSetNoHashes(); + + // Check that |GetPrefixes()| returns the same set of prefixes as + // was passed in. + std::vector<SBPrefix> prefixes_copy; + prefix_set->GetPrefixes(&prefixes_copy); + prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), prefixes.end()); + EXPECT_EQ(prefixes_copy.size(), prefixes.size()); + EXPECT_TRUE(std::equal(prefixes.begin(), prefixes.end(), + prefixes_copy.begin())); +} + +// Use artificial inputs to test various edge cases in PrefixExists(). Items +// before the lowest item aren't present. Items after the largest item aren't +// present. Create a sequence of items with deltas above and below 2^16, and +// make sure they're all present. Create a very long sequence with deltas below +// 2^16 to test crossing |kMaxRun|. +TEST_F(PrefixSetTest, EdgeCases) { + std::vector<SBPrefix> prefixes; + + // Put in a high-bit prefix. + SBPrefix prefix = kHighBitSet; + prefixes.push_back(prefix); + + // Add a sequence with very large deltas. + unsigned delta = 100 * 1000 * 1000; + for (int i = 0; i < 10; ++i) { + prefix += delta; + prefixes.push_back(prefix); + } + + // Add a sequence with deltas that start out smaller than the + // maximum delta, and end up larger. Also include some duplicates. + delta = 256 * 256 - 100; + for (int i = 0; i < 200; ++i) { + prefix += delta; + prefixes.push_back(prefix); + prefixes.push_back(prefix); + delta++; + } + + // Add a long sequence with deltas smaller than the maximum delta, + // so a new index item will be injected. + delta = 256 * 256 - 1; + prefix = kHighBitClear - delta * 1000; + prefixes.push_back(prefix); + for (int i = 0; i < 1000; ++i) { + prefix += delta; + prefixes.push_back(prefix); + delta--; + } + + std::sort(prefixes.begin(), prefixes.end()); + PrefixSetBuilder builder(prefixes); + scoped_ptr<const PrefixSet> prefix_set = builder.GetPrefixSetNoHashes(); + + // Check that |GetPrefixes()| returns the same set of prefixes as + // was passed in. + std::vector<SBPrefix> prefixes_copy; + prefix_set->GetPrefixes(&prefixes_copy); + prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), prefixes.end()); + EXPECT_EQ(prefixes_copy.size(), prefixes.size()); + EXPECT_TRUE(std::equal(prefixes.begin(), prefixes.end(), + prefixes_copy.begin())); + + // Items before and after the set are not present, and don't crash. + EXPECT_FALSE(prefix_set->PrefixExists(kHighBitSet - 100)); + EXPECT_FALSE(prefix_set->PrefixExists(kHighBitClear + 100)); + + // Check that the set correctly flags all of the inputs, and also + // check items just above and below the inputs to make sure they + // aren't present. + for (size_t i = 0; i < prefixes.size(); ++i) { + EXPECT_TRUE(prefix_set->PrefixExists(prefixes[i])); + + EXPECT_FALSE(prefix_set->PrefixExists(prefixes[i] - 1)); + EXPECT_FALSE(prefix_set->PrefixExists(prefixes[i] + 1)); + } +} + +// Test writing a prefix set to disk and reading it back in. +TEST_F(PrefixSetTest, ReadWrite) { + base::FilePath filename; + + // Write the sample prefix set out, read it back in, and check all + // the prefixes. Leaves the path in |filename|. + { + ASSERT_TRUE(GetPrefixSetFile(&filename)); + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_TRUE(prefix_set.get()); + CheckPrefixes(*prefix_set, shared_prefixes_); + } + + // Test writing and reading a very sparse set containing no deltas. + { + std::vector<SBPrefix> prefixes; + prefixes.push_back(kHighBitClear); + prefixes.push_back(kHighBitSet); + + PrefixSetBuilder builder(prefixes); + ASSERT_TRUE(builder.GetPrefixSetNoHashes()->WriteFile(filename)); + + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_TRUE(prefix_set.get()); + CheckPrefixes(*prefix_set, prefixes); + } + + // Test writing and reading an empty set. + { + std::vector<SBPrefix> prefixes; + PrefixSetBuilder builder(prefixes); + ASSERT_TRUE(builder.GetPrefixSetNoHashes()->WriteFile(filename)); + + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_TRUE(prefix_set.get()); + CheckPrefixes(*prefix_set, prefixes); + } + + // Test that full hashes are persisted. + { + std::vector<SBFullHash> hashes; + hashes.push_back(SBFullHashForString("one")); + hashes.push_back(SBFullHashForString("two")); + hashes.push_back(SBFullHashForString("three")); + + std::vector<SBPrefix> prefixes(shared_prefixes_); + + // Remove any collisions from the prefixes. + for (size_t i = 0; i < hashes.size(); ++i) { + std::vector<SBPrefix>::iterator iter = + std::lower_bound(prefixes.begin(), prefixes.end(), hashes[i].prefix); + if (iter != prefixes.end() && *iter == hashes[i].prefix) + prefixes.erase(iter); + } + + PrefixSetBuilder builder(prefixes); + ASSERT_TRUE(builder.GetPrefixSet(hashes)->WriteFile(filename)); + + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_TRUE(prefix_set.get()); + CheckPrefixes(*prefix_set, prefixes); + + EXPECT_TRUE(prefix_set->Exists(hashes[0])); + EXPECT_TRUE(prefix_set->Exists(hashes[1])); + EXPECT_TRUE(prefix_set->Exists(hashes[2])); + EXPECT_FALSE(prefix_set->PrefixExists(hashes[0].prefix)); + EXPECT_FALSE(prefix_set->PrefixExists(hashes[1].prefix)); + EXPECT_FALSE(prefix_set->PrefixExists(hashes[2].prefix)); + } +} + +// Check that |CleanChecksum()| makes an acceptable checksum. +TEST_F(PrefixSetTest, CorruptionHelpers) { + base::FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + // This will modify data in |index_|, which will fail the digest check. + base::ScopedFILE file(base::OpenFile(filename, "r+b")); + IncrementIntAt(file.get(), kPayloadOffset, 1); + file.reset(); + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_FALSE(prefix_set.get()); + + // Fix up the checksum and it will read successfully (though the + // data will be wrong). + file.reset(base::OpenFile(filename, "r+b")); + CleanChecksum(file.get()); + file.reset(); + prefix_set = PrefixSet::LoadFile(filename); + ASSERT_TRUE(prefix_set.get()); +} + +// Bad magic is caught by the sanity check. +TEST_F(PrefixSetTest, CorruptionMagic) { + base::FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + ASSERT_NO_FATAL_FAILURE( + ModifyAndCleanChecksum(filename, kMagicOffset, 1)); + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_FALSE(prefix_set.get()); +} + +// Bad version is caught by the sanity check. +TEST_F(PrefixSetTest, CorruptionVersion) { + base::FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + ASSERT_NO_FATAL_FAILURE( + ModifyAndCleanChecksum(filename, kVersionOffset, 10)); + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_FALSE(prefix_set.get()); +} + +// Bad |index_| size is caught by the sanity check. +TEST_F(PrefixSetTest, CorruptionIndexSize) { + base::FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + ASSERT_NO_FATAL_FAILURE( + ModifyAndCleanChecksum(filename, kIndexSizeOffset, 1)); + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_FALSE(prefix_set.get()); +} + +// Bad |deltas_| size is caught by the sanity check. +TEST_F(PrefixSetTest, CorruptionDeltasSize) { + base::FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + ASSERT_NO_FATAL_FAILURE( + ModifyAndCleanChecksum(filename, kDeltasSizeOffset, 1)); + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_FALSE(prefix_set.get()); +} + +// Bad |full_hashes_| size is caught by the sanity check. +TEST_F(PrefixSetTest, CorruptionFullHashesSize) { + base::FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + ASSERT_NO_FATAL_FAILURE( + ModifyAndCleanChecksum(filename, kFullHashesSizeOffset, 1)); + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_FALSE(prefix_set.get()); +} + +// Test that the digest catches corruption in the middle of the file +// (in the payload between the header and the digest). +TEST_F(PrefixSetTest, CorruptionPayload) { + base::FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + base::ScopedFILE file(base::OpenFile(filename, "r+b")); + ASSERT_NO_FATAL_FAILURE(IncrementIntAt(file.get(), 666, 1)); + file.reset(); + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_FALSE(prefix_set.get()); +} + +// Test corruption in the digest itself. +TEST_F(PrefixSetTest, CorruptionDigest) { + base::FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + int64_t size_64; + ASSERT_TRUE(base::GetFileSize(filename, &size_64)); + base::ScopedFILE file(base::OpenFile(filename, "r+b")); + long digest_offset = static_cast<long>(size_64 - sizeof(base::MD5Digest)); + ASSERT_NO_FATAL_FAILURE(IncrementIntAt(file.get(), digest_offset, 1)); + file.reset(); + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_FALSE(prefix_set.get()); +} + +// Test excess data after the digest (fails the size test). +TEST_F(PrefixSetTest, CorruptionExcess) { + base::FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + // Add some junk to the trunk. + base::ScopedFILE file(base::OpenFile(filename, "ab")); + const char buf[] = "im in ur base, killing ur d00dz."; + ASSERT_EQ(strlen(buf), fwrite(buf, 1, strlen(buf), file.get())); + file.reset(); + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_FALSE(prefix_set.get()); +} + +// Test that files which had 64-bit size_t are discarded. +TEST_F(PrefixSetTest, SizeTRecovery) { + base::FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + // Open the file for rewrite. + base::ScopedFILE file(base::OpenFile(filename, "r+b")); + + // Leave existing magic and version. + ASSERT_NE(-1, fseek(file.get(), sizeof(uint32_t) * 2, SEEK_SET)); + + // Indicate two index values and two deltas. + uint32_t val = 2; + ASSERT_EQ(sizeof(val), fwrite(&val, 1, sizeof(val), file.get())); + ASSERT_EQ(sizeof(val), fwrite(&val, 1, sizeof(val), file.get())); + + // Write two index values with 64-bit "size_t". + std::pair<SBPrefix, uint64_t> item; + memset(&item, 0, sizeof(item)); // Includes any padding. + item.first = 17; + item.second = 0; + ASSERT_EQ(sizeof(item), fwrite(&item, 1, sizeof(item), file.get())); + item.first = 100042; + item.second = 1; + ASSERT_EQ(sizeof(item), fwrite(&item, 1, sizeof(item), file.get())); + + // Write two delta values. + uint16_t delta = 23; + ASSERT_EQ(sizeof(delta), fwrite(&delta, 1, sizeof(delta), file.get())); + ASSERT_EQ(sizeof(delta), fwrite(&delta, 1, sizeof(delta), file.get())); + + // Leave space for the digest at the end, and regenerate it. + base::MD5Digest dummy = { { 0 } }; + ASSERT_EQ(sizeof(dummy), fwrite(&dummy, 1, sizeof(dummy), file.get())); + ASSERT_TRUE(base::TruncateFile(file.get())); + CleanChecksum(file.get()); + file.reset(); // Flush updates. + + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_FALSE(prefix_set.get()); +} + +// Test Exists() against full hashes passed to builder. +TEST_F(PrefixSetTest, FullHashBuild) { + const SBFullHash kHash1 = SBFullHashForString("one"); + const SBFullHash kHash2 = SBFullHashForString("two"); + const SBFullHash kHash3 = SBFullHashForString("three"); + const SBFullHash kHash4 = SBFullHashForString("four"); + const SBFullHash kHash5 = SBFullHashForString("five"); + const SBFullHash kHash6 = SBFullHashForString("six"); + + std::vector<SBPrefix> prefixes; + prefixes.push_back(kHash1.prefix); + prefixes.push_back(kHash2.prefix); + std::sort(prefixes.begin(), prefixes.end()); + + std::vector<SBFullHash> hashes; + hashes.push_back(kHash4); + hashes.push_back(kHash5); + + PrefixSetBuilder builder(prefixes); + scoped_ptr<const PrefixSet> prefix_set = builder.GetPrefixSet(hashes); + + EXPECT_TRUE(prefix_set->Exists(kHash1)); + EXPECT_TRUE(prefix_set->Exists(kHash2)); + EXPECT_FALSE(prefix_set->Exists(kHash3)); + EXPECT_TRUE(prefix_set->Exists(kHash4)); + EXPECT_TRUE(prefix_set->Exists(kHash5)); + EXPECT_FALSE(prefix_set->Exists(kHash6)); + + EXPECT_TRUE(prefix_set->PrefixExists(kHash1.prefix)); + EXPECT_TRUE(prefix_set->PrefixExists(kHash2.prefix)); + EXPECT_FALSE(prefix_set->PrefixExists(kHash3.prefix)); + EXPECT_FALSE(prefix_set->PrefixExists(kHash4.prefix)); + EXPECT_FALSE(prefix_set->PrefixExists(kHash5.prefix)); + EXPECT_FALSE(prefix_set->PrefixExists(kHash6.prefix)); +} + +// Test that a version 1 file is discarded on read. +TEST_F(PrefixSetTest, ReadSigned) { + base::FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + // Open the file for rewrite. + base::ScopedFILE file(base::OpenFile(filename, "r+b")); + + // Leave existing magic. + ASSERT_NE(-1, fseek(file.get(), sizeof(uint32_t), SEEK_SET)); + + // Version 1. + uint32_t version = 1; + ASSERT_EQ(sizeof(version), fwrite(&version, 1, sizeof(version), file.get())); + + // Indicate two index values and two deltas. + uint32_t val = 2; + ASSERT_EQ(sizeof(val), fwrite(&val, 1, sizeof(val), file.get())); + ASSERT_EQ(sizeof(val), fwrite(&val, 1, sizeof(val), file.get())); + + std::pair<int32_t, uint32_t> item; + memset(&item, 0, sizeof(item)); // Includes any padding. + item.first = -1000; + item.second = 0; + ASSERT_EQ(sizeof(item), fwrite(&item, 1, sizeof(item), file.get())); + item.first = 1000; + item.second = 1; + ASSERT_EQ(sizeof(item), fwrite(&item, 1, sizeof(item), file.get())); + + // Write two delta values. + uint16_t delta = 23; + ASSERT_EQ(sizeof(delta), fwrite(&delta, 1, sizeof(delta), file.get())); + ASSERT_EQ(sizeof(delta), fwrite(&delta, 1, sizeof(delta), file.get())); + + // Leave space for the digest at the end, and regenerate it. + base::MD5Digest dummy = { { 0 } }; + ASSERT_EQ(sizeof(dummy), fwrite(&dummy, 1, sizeof(dummy), file.get())); + ASSERT_TRUE(base::TruncateFile(file.get())); + CleanChecksum(file.get()); + file.reset(); // Flush updates. + + scoped_ptr<const PrefixSet> prefix_set = PrefixSet::LoadFile(filename); + ASSERT_FALSE(prefix_set.get()); +} + +// Test that a golden v2 file is discarded on read. All platforms generating v2 +// files are little-endian, so there is no point to testing this transition +// if/when a big-endian port is added. +#if defined(ARCH_CPU_LITTLE_ENDIAN) +TEST_F(PrefixSetTest, Version2) { + std::vector<SBPrefix> ref_prefixes; + ASSERT_TRUE(ReadReferencePrefixes(&ref_prefixes)); + + const char kBasename[] = "PrefixSetVersion2"; + base::FilePath golden_path = TestFilePath(); + golden_path = golden_path.AppendASCII(kBasename); + + scoped_ptr<const PrefixSet> prefix_set(PrefixSet::LoadFile(golden_path)); + ASSERT_FALSE(prefix_set.get()); +} +#endif + +// Test that a golden v3 file can be read by the current code. All platforms +// generating v3 files are little-endian, so there is no point to testing this +// transition if/when a big-endian port is added. +#if defined(ARCH_CPU_LITTLE_ENDIAN) +TEST_F(PrefixSetTest, Version3) { + std::vector<SBPrefix> ref_prefixes; + ASSERT_TRUE(ReadReferencePrefixes(&ref_prefixes)); + + const char kBasename[] = "PrefixSetVersion3"; + base::FilePath golden_path = TestFilePath(); + golden_path = golden_path.AppendASCII(kBasename); + + scoped_ptr<const PrefixSet> prefix_set(PrefixSet::LoadFile(golden_path)); + ASSERT_TRUE(prefix_set.get()); + CheckPrefixes(*prefix_set, ref_prefixes); + + const SBFullHash kHash1 = SBFullHashForString("www.evil.com/malware.html"); + const SBFullHash kHash2 = SBFullHashForString("www.evil.com/phishing.html"); + + EXPECT_TRUE(prefix_set->Exists(kHash1)); + EXPECT_TRUE(prefix_set->Exists(kHash2)); + EXPECT_FALSE(prefix_set->PrefixExists(kHash1.prefix)); + EXPECT_FALSE(prefix_set->PrefixExists(kHash2.prefix)); +} +#endif + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/remote_database_manager.cc b/chromium/components/safe_browsing_db/remote_database_manager.cc new file mode 100644 index 00000000000..a67e72d2889 --- /dev/null +++ b/chromium/components/safe_browsing_db/remote_database_manager.cc @@ -0,0 +1,308 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/safe_browsing_db/remote_database_manager.h" + +#include <vector> + +#include "base/metrics/histogram_macros.h" +#include "base/strings/string_number_conversions.h" +#include "base/strings/string_split.h" +#include "base/timer/elapsed_timer.h" +#include "components/safe_browsing_db/safe_browsing_api_handler.h" +#include "components/safe_browsing_db/v4_get_hash_protocol_manager.h" +#include "components/variations/variations_associated_data.h" +#include "content/public/browser/browser_thread.h" + +using content::BrowserThread; + +namespace net { +class URLRequestContextGetter; +} // namespace net + +namespace { + +// Android field trial for controlling types_to_check. +const char kAndroidFieldExperiment[] = "SafeBrowsingAndroid"; +const char kAndroidTypesToCheckParam[] = "types_to_check"; + +} // namespace + +namespace safe_browsing { + +// +// RemoteSafeBrowsingDatabaseManager::ClientRequest methods +// +class RemoteSafeBrowsingDatabaseManager::ClientRequest { + public: + ClientRequest(Client* client, + RemoteSafeBrowsingDatabaseManager* db_manager, + const GURL& url); + + static void OnRequestDoneWeak(const base::WeakPtr<ClientRequest>& req, + SBThreatType matched_threat_type, + const ThreatMetadata& metadata); + void OnRequestDone(SBThreatType matched_threat_type, + const ThreatMetadata& metadata); + + // Accessors + Client* client() const { return client_; } + const GURL& url() const { return url_; } + base::WeakPtr<ClientRequest> GetWeakPtr() { + return weak_factory_.GetWeakPtr(); + } + + private: + Client* client_; + RemoteSafeBrowsingDatabaseManager* db_manager_; + GURL url_; + base::ElapsedTimer timer_; + base::WeakPtrFactory<ClientRequest> weak_factory_; +}; + +RemoteSafeBrowsingDatabaseManager::ClientRequest::ClientRequest( + Client* client, + RemoteSafeBrowsingDatabaseManager* db_manager, + const GURL& url) + : client_(client), + db_manager_(db_manager), + url_(url), + weak_factory_(this) {} + +// Static +void RemoteSafeBrowsingDatabaseManager::ClientRequest::OnRequestDoneWeak( + const base::WeakPtr<ClientRequest>& req, + SBThreatType matched_threat_type, + const ThreatMetadata& metadata) { + DCHECK_CURRENTLY_ON(BrowserThread::IO); + if (!req) + return; // Previously canceled + req->OnRequestDone(matched_threat_type, metadata); +} + +void RemoteSafeBrowsingDatabaseManager::ClientRequest::OnRequestDone( + SBThreatType matched_threat_type, + const ThreatMetadata& metadata) { + DVLOG(1) << "OnRequestDone took " << timer_.Elapsed().InMilliseconds() + << " ms for client " << client_ << " and URL " << url_; + client_->OnCheckBrowseUrlResult(url_, matched_threat_type, metadata); + UMA_HISTOGRAM_TIMES("SB2.RemoteCall.Elapsed", timer_.Elapsed()); + // CancelCheck() will delete *this. + db_manager_->CancelCheck(client_); +} + +// +// RemoteSafeBrowsingDatabaseManager methods +// + +// TODO(nparker): Add more tests for this class +RemoteSafeBrowsingDatabaseManager::RemoteSafeBrowsingDatabaseManager() + : enabled_(false) { + // Decide which resource types to check. These two are the minimum. + resource_types_to_check_.insert(content::RESOURCE_TYPE_MAIN_FRAME); + resource_types_to_check_.insert(content::RESOURCE_TYPE_SUB_FRAME); + + // The param is expected to be a comma-separated list of ints + // corresponding to the enum types. We're keeping this finch + // control around so we can add back types if they later become dangerous. + const std::string ints_str = variations::GetVariationParamValue( + kAndroidFieldExperiment, kAndroidTypesToCheckParam); + if (ints_str.empty()) { + // By default, we check all types except a few. + static_assert(content::RESOURCE_TYPE_LAST_TYPE == + content::RESOURCE_TYPE_PLUGIN_RESOURCE + 1, + "Decide if new resource type should be skipped on mobile."); + for (int t_int = 0; t_int < content::RESOURCE_TYPE_LAST_TYPE; t_int++) { + content::ResourceType t = static_cast<content::ResourceType>(t_int); + switch (t) { + case content::RESOURCE_TYPE_STYLESHEET: + case content::RESOURCE_TYPE_IMAGE: + case content::RESOURCE_TYPE_FONT_RESOURCE: + case content::RESOURCE_TYPE_FAVICON: + break; + default: + resource_types_to_check_.insert(t); + } + } + } else { + // Use the finch param. + for (const std::string& val_str : base::SplitString( + ints_str, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL)) { + int i; + if (base::StringToInt(val_str, &i) && i >= 0 && + i < content::RESOURCE_TYPE_LAST_TYPE) { + resource_types_to_check_.insert(static_cast<content::ResourceType>(i)); + } + } + } +} + +RemoteSafeBrowsingDatabaseManager::~RemoteSafeBrowsingDatabaseManager() { + DCHECK(!enabled_); +} + +bool RemoteSafeBrowsingDatabaseManager::IsSupported() const { + return SafeBrowsingApiHandler::GetInstance() != nullptr; +} + +safe_browsing::ThreatSource RemoteSafeBrowsingDatabaseManager::GetThreatSource() + const { + return safe_browsing::ThreatSource::REMOTE; +} + +bool RemoteSafeBrowsingDatabaseManager::ChecksAreAlwaysAsync() const { + return true; +} + +bool RemoteSafeBrowsingDatabaseManager::CanCheckResourceType( + content::ResourceType resource_type) const { + return resource_types_to_check_.count(resource_type) > 0; +} + +bool RemoteSafeBrowsingDatabaseManager::CanCheckUrl(const GURL& url) const { + return url.SchemeIs(url::kHttpsScheme) || url.SchemeIs(url::kHttpScheme) || + url.SchemeIs(url::kFtpScheme); +} + +bool RemoteSafeBrowsingDatabaseManager::IsDownloadProtectionEnabled() const { + return false; +} + +bool RemoteSafeBrowsingDatabaseManager::CheckDownloadUrl( + const std::vector<GURL>& url_chain, + Client* client) { + NOTREACHED(); + return true; +} + +bool RemoteSafeBrowsingDatabaseManager::CheckExtensionIDs( + const std::set<std::string>& extension_ids, + Client* client) { + NOTREACHED(); + return true; +} + +bool RemoteSafeBrowsingDatabaseManager::MatchMalwareIP( + const std::string& ip_address) { + NOTREACHED(); + return false; +} + +bool RemoteSafeBrowsingDatabaseManager::MatchCsdWhitelistUrl(const GURL& url) { + NOTREACHED(); + return true; +} + +bool RemoteSafeBrowsingDatabaseManager::MatchDownloadWhitelistUrl( + const GURL& url) { + NOTREACHED(); + return true; +} + +bool RemoteSafeBrowsingDatabaseManager::MatchDownloadWhitelistString( + const std::string& str) { + NOTREACHED(); + return true; +} + +bool RemoteSafeBrowsingDatabaseManager::MatchInclusionWhitelistUrl( + const GURL& url) { + NOTREACHED(); + return true; +} + +bool RemoteSafeBrowsingDatabaseManager::MatchModuleWhitelistString( + const std::string& str) { + NOTREACHED(); + return true; +} + +bool RemoteSafeBrowsingDatabaseManager::CheckResourceUrl(const GURL& url, + Client* client) { + NOTREACHED(); + return true; +} + +bool RemoteSafeBrowsingDatabaseManager::IsMalwareKillSwitchOn() { + NOTREACHED(); + return true; +} + +bool RemoteSafeBrowsingDatabaseManager::IsCsdWhitelistKillSwitchOn() { + NOTREACHED(); + return true; +} + +bool RemoteSafeBrowsingDatabaseManager::CheckBrowseUrl(const GURL& url, + Client* client) { + DCHECK_CURRENTLY_ON(BrowserThread::IO); + if (!enabled_) + return true; + + bool can_check_url = CanCheckUrl(url); + UMA_HISTOGRAM_BOOLEAN("SB2.RemoteCall.CanCheckUrl", can_check_url); + if (!can_check_url) + return true; // Safe, continue right away. + + scoped_ptr<ClientRequest> req(new ClientRequest(client, this, url)); + std::vector<SBThreatType> threat_types; // Not currently used. + + DVLOG(1) << "Checking for client " << client << " and URL " << url; + SafeBrowsingApiHandler* api_handler = SafeBrowsingApiHandler::GetInstance(); + // This shouldn't happen since SafeBrowsingResourceThrottle checks + // IsSupported() earlier. + DCHECK(api_handler) << "SafeBrowsingApiHandler was never constructed"; + api_handler->StartURLCheck( + base::Bind(&ClientRequest::OnRequestDoneWeak, req->GetWeakPtr()), url, + threat_types); + + UMA_HISTOGRAM_COUNTS_10000("SB2.RemoteCall.ChecksPending", + current_requests_.size()); + current_requests_.push_back(req.release()); + + // Defer the resource load. + return false; +} + +void RemoteSafeBrowsingDatabaseManager::CancelCheck(Client* client) { + DCHECK_CURRENTLY_ON(BrowserThread::IO); + DCHECK(enabled_); + for (auto itr = current_requests_.begin(); itr != current_requests_.end(); + ++itr) { + if ((*itr)->client() == client) { + DVLOG(2) << "Canceling check for URL " << (*itr)->url(); + delete *itr; + current_requests_.erase(itr); + return; + } + } + NOTREACHED(); +} + +void RemoteSafeBrowsingDatabaseManager::StartOnIOThread( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config) { + VLOG(1) << "RemoteSafeBrowsingDatabaseManager starting"; + SafeBrowsingDatabaseManager::StartOnIOThread(request_context_getter, config); + enabled_ = true; +} + +void RemoteSafeBrowsingDatabaseManager::StopOnIOThread(bool shutdown) { + // |shutdown| is not used. + DCHECK_CURRENTLY_ON(BrowserThread::IO); + DVLOG(1) << "RemoteSafeBrowsingDatabaseManager stopping"; + + // Call back and delete any remaining clients. OnRequestDone() modifies + // |current_requests_|, so we make a copy first. + std::vector<ClientRequest*> to_callback(current_requests_); + for (auto req : to_callback) { + DVLOG(1) << "Stopping: Invoking unfinished req for URL " << req->url(); + req->OnRequestDone(SB_THREAT_TYPE_SAFE, ThreatMetadata()); + } + enabled_ = false; + + SafeBrowsingDatabaseManager::StopOnIOThread(shutdown); +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/remote_database_manager.h b/chromium/components/safe_browsing_db/remote_database_manager.h new file mode 100644 index 00000000000..ffe2dbc8176 --- /dev/null +++ b/chromium/components/safe_browsing_db/remote_database_manager.h @@ -0,0 +1,90 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Implementation of the SafeBrowsingDatabaseManager that sends URLs +// via IPC to a database that chromium doesn't manage locally. + +#ifndef COMPONENTS_SAFE_BROWSING_DB_REMOTE_DATABASE_MANAGER_H_ +#define COMPONENTS_SAFE_BROWSING_DB_REMOTE_DATABASE_MANAGER_H_ + +#include <set> +#include <string> +#include <vector> + +#include "base/macros.h" +#include "base/memory/ref_counted.h" +#include "base/memory/weak_ptr.h" +#include "components/safe_browsing_db/database_manager.h" +#include "url/gurl.h" + +namespace net { +class URLRequestContextGetter; +} + +namespace safe_browsing { + +struct V4ProtocolConfig; + +// An implementation that proxies requests to a service outside of Chromium. +// Does not manage a local database. +class RemoteSafeBrowsingDatabaseManager : public SafeBrowsingDatabaseManager { + public: + // Construct RemoteSafeBrowsingDatabaseManager. + // Must be initialized by calling StartOnIOThread() before using. + RemoteSafeBrowsingDatabaseManager(); + + // + // SafeBrowsingDatabaseManager implementation + // + + bool IsSupported() const override; + safe_browsing::ThreatSource GetThreatSource() const override; + bool ChecksAreAlwaysAsync() const override; + bool CanCheckResourceType(content::ResourceType resource_type) const override; + bool CanCheckUrl(const GURL& url) const override; + bool IsDownloadProtectionEnabled() const override; + bool CheckBrowseUrl(const GURL& url, Client* client) override; + void CancelCheck(Client* client) override; + void StartOnIOThread( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config) override; + void StopOnIOThread(bool shutdown) override; + + // These will fail with DCHECK() since their functionality isn't implemented. + // We may later add support for a subset of them. + bool CheckDownloadUrl(const std::vector<GURL>& url_chain, + Client* client) override; + bool CheckExtensionIDs(const std::set<std::string>& extension_ids, + Client* client) override; + bool MatchCsdWhitelistUrl(const GURL& url) override; + bool MatchMalwareIP(const std::string& ip_address) override; + bool MatchDownloadWhitelistUrl(const GURL& url) override; + bool MatchDownloadWhitelistString(const std::string& str) override; + bool MatchInclusionWhitelistUrl(const GURL& url) override; + bool MatchModuleWhitelistString(const std::string& str) override; + bool CheckResourceUrl(const GURL& url, Client* client) override; + bool IsMalwareKillSwitchOn() override; + bool IsCsdWhitelistKillSwitchOn() override; + + // + // RemoteSafeBrowsingDatabaseManager implementation + // + + private: + ~RemoteSafeBrowsingDatabaseManager() override; + class ClientRequest; // Per-request tracker. + + // Requests currently outstanding. This owns the ptrs. + std::vector<ClientRequest*> current_requests_; + bool enabled_; + + std::set<content::ResourceType> resource_types_to_check_; + + friend class base::RefCountedThreadSafe<RemoteSafeBrowsingDatabaseManager>; + DISALLOW_COPY_AND_ASSIGN(RemoteSafeBrowsingDatabaseManager); +}; // class RemoteSafeBrowsingDatabaseManager + +} // namespace safe_browsing + +#endif // COMPONENTS_SAFE_BROWSING_DB_REMOTE_DATABASE_MANAGER_H_ diff --git a/chromium/components/safe_browsing_db/remote_database_manager_unittest.cc b/chromium/components/safe_browsing_db/remote_database_manager_unittest.cc new file mode 100644 index 00000000000..d91baa53d9f --- /dev/null +++ b/chromium/components/safe_browsing_db/remote_database_manager_unittest.cc @@ -0,0 +1,103 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "base/metrics/field_trial.h" +#include "base/strings/stringprintf.h" +#include "base/time/time.h" +#include "components/safe_browsing_db/remote_database_manager.h" +#include "components/safe_browsing_db/safe_browsing_api_handler.h" +#include "components/variations/variations_associated_data.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace safe_browsing { + +namespace { + +class TestSafeBrowsingApiHandler : public SafeBrowsingApiHandler { + public: + void StartURLCheck(const URLCheckCallbackMeta& callback, + const GURL& url, + const std::vector<SBThreatType>& threat_types) override {} +}; + +} // namespace + +class RemoteDatabaseManagerTest : public testing::Test { + protected: + RemoteDatabaseManagerTest() : field_trials_(new base::FieldTrialList(NULL)) {} + + void SetUp() override { + SafeBrowsingApiHandler::SetInstance(&api_handler_); + db_ = new RemoteSafeBrowsingDatabaseManager(); + } + + // Setup the two field trial params. These are read in db_'s ctor. + void SetFieldTrialParams(const std::string types_to_check_val) { + // Destroy the existing FieldTrialList before creating a new one to avoid + // a DCHECK. + field_trials_.reset(); + field_trials_.reset(new base::FieldTrialList(NULL)); + variations::testing::ClearAllVariationIDs(); + variations::testing::ClearAllVariationParams(); + + const std::string group_name = "GroupFoo"; // Value not used + const std::string experiment_name = "SafeBrowsingAndroid"; + ASSERT_TRUE( + base::FieldTrialList::CreateFieldTrial(experiment_name, group_name)); + + std::map<std::string, std::string> params; + if (!types_to_check_val.empty()) + params["types_to_check"] = types_to_check_val; + + ASSERT_TRUE(variations::AssociateVariationParams(experiment_name, + group_name, params)); + } + + scoped_ptr<base::FieldTrialList> field_trials_; + TestSafeBrowsingApiHandler api_handler_; + scoped_refptr<RemoteSafeBrowsingDatabaseManager> db_; +}; + +TEST_F(RemoteDatabaseManagerTest, DisabledViaNull) { + EXPECT_TRUE(db_->IsSupported()); + + SafeBrowsingApiHandler::SetInstance(nullptr); + EXPECT_FALSE(db_->IsSupported()); +} + +TEST_F(RemoteDatabaseManagerTest, TypesToCheckDefault) { + // Most are true, a few are false. + for (int t_int = 0; t_int < content::RESOURCE_TYPE_LAST_TYPE; t_int++) { + content::ResourceType t = static_cast<content::ResourceType>(t_int); + switch (t) { + case content::RESOURCE_TYPE_STYLESHEET: + case content::RESOURCE_TYPE_IMAGE: + case content::RESOURCE_TYPE_FONT_RESOURCE: + case content::RESOURCE_TYPE_FAVICON: + EXPECT_FALSE(db_->CanCheckResourceType(t)); + break; + default: + EXPECT_TRUE(db_->CanCheckResourceType(t)); + break; + } + } +} + +TEST_F(RemoteDatabaseManagerTest, TypesToCheckFromTrial) { + SetFieldTrialParams("1,2,blah, 9"); + db_ = new RemoteSafeBrowsingDatabaseManager(); + EXPECT_TRUE(db_->CanCheckResourceType( + content::RESOURCE_TYPE_MAIN_FRAME)); // defaulted + EXPECT_TRUE(db_->CanCheckResourceType(content::RESOURCE_TYPE_SUB_FRAME)); + EXPECT_TRUE(db_->CanCheckResourceType(content::RESOURCE_TYPE_STYLESHEET)); + EXPECT_FALSE(db_->CanCheckResourceType(content::RESOURCE_TYPE_SCRIPT)); + EXPECT_FALSE(db_->CanCheckResourceType(content::RESOURCE_TYPE_IMAGE)); + // ... + EXPECT_FALSE(db_->CanCheckResourceType(content::RESOURCE_TYPE_MEDIA)); + EXPECT_TRUE(db_->CanCheckResourceType(content::RESOURCE_TYPE_WORKER)); +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/safe_browsing_api_handler.cc b/chromium/components/safe_browsing_db/safe_browsing_api_handler.cc new file mode 100644 index 00000000000..528dccbf8cb --- /dev/null +++ b/chromium/components/safe_browsing_db/safe_browsing_api_handler.cc @@ -0,0 +1,22 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/bind.h" +#include "components/safe_browsing_db/safe_browsing_api_handler.h" + +namespace safe_browsing { + +SafeBrowsingApiHandler* SafeBrowsingApiHandler::instance_ = NULL; + +// static +void SafeBrowsingApiHandler::SetInstance(SafeBrowsingApiHandler* instance) { + instance_ = instance; +} + +// static +SafeBrowsingApiHandler* SafeBrowsingApiHandler::GetInstance() { + return instance_; +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/safe_browsing_api_handler.h b/chromium/components/safe_browsing_db/safe_browsing_api_handler.h new file mode 100644 index 00000000000..fa01a6077af --- /dev/null +++ b/chromium/components/safe_browsing_db/safe_browsing_api_handler.h @@ -0,0 +1,44 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Glue to pass Safe Browsing API requests between +// RemoteSafeBrowsingDatabaseManager and Java-based API to check URLs. + +#ifndef COMPONENTS_SAFE_BROWSING_DB_SAFE_BROWSING_API_HANDLER_H_ +#define COMPONENTS_SAFE_BROWSING_DB_SAFE_BROWSING_API_HANDLER_H_ + +#include <string> +#include <vector> + +#include "base/callback.h" +#include "components/safe_browsing_db/util.h" +#include "url/gurl.h" + +namespace safe_browsing { + +class SafeBrowsingApiHandler { + public: + // Singleton interface. + static void SetInstance(SafeBrowsingApiHandler* instance); + static SafeBrowsingApiHandler* GetInstance(); + + typedef base::Callback<void(SBThreatType sb_threat_type, + const ThreatMetadata& metadata)> + URLCheckCallbackMeta; + + // Makes Native->Java call and invokes callback when check is done. + virtual void StartURLCheck(const URLCheckCallbackMeta& callback, + const GURL& url, + const std::vector<SBThreatType>& threat_types) = 0; + + virtual ~SafeBrowsingApiHandler() {} + + private: + // Pointer not owned. + static SafeBrowsingApiHandler* instance_; +}; + +} // namespace safe_browsing + +#endif // COMPONENTS_SAFE_BROWSING_DB_SAFE_BROWSING_API_HANDLER_H_ diff --git a/chromium/components/safe_browsing_db/safe_browsing_api_handler_unittest.cc b/chromium/components/safe_browsing_db/safe_browsing_api_handler_unittest.cc new file mode 100644 index 00000000000..15b50634c57 --- /dev/null +++ b/chromium/components/safe_browsing_db/safe_browsing_api_handler_unittest.cc @@ -0,0 +1,143 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/safe_browsing_db/metadata.pb.h" +#include "components/safe_browsing_db/safe_browsing_api_handler_util.h" +#include "components/safe_browsing_db/testing_util.h" +#include "components/safe_browsing_db/util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace safe_browsing { + +class SafeBrowsingApiHandlerUtilTest : public ::testing::Test { + protected: + SBThreatType threat_; + ThreatMetadata meta_; + const ThreatMetadata empty_meta_; + + UmaRemoteCallResult ResetAndParseJson(const std::string& json) { + threat_ = SB_THREAT_TYPE_EXTENSION; // Should never be seen + meta_ = ThreatMetadata(); + return ParseJsonFromGMSCore(json, &threat_, &meta_); + } + +}; + +TEST_F(SafeBrowsingApiHandlerUtilTest, BadJson) { + EXPECT_EQ(UMA_STATUS_JSON_EMPTY, ResetAndParseJson("")); + EXPECT_EQ(SB_THREAT_TYPE_SAFE, threat_); + EXPECT_EQ(empty_meta_, meta_); + + EXPECT_EQ(UMA_STATUS_JSON_FAILED_TO_PARSE, ResetAndParseJson("{")); + EXPECT_EQ(SB_THREAT_TYPE_SAFE, threat_); + EXPECT_EQ(empty_meta_, meta_); + + EXPECT_EQ(UMA_STATUS_JSON_FAILED_TO_PARSE, ResetAndParseJson("[]")); + EXPECT_EQ(SB_THREAT_TYPE_SAFE, threat_); + EXPECT_EQ(empty_meta_, meta_); + + EXPECT_EQ(UMA_STATUS_JSON_FAILED_TO_PARSE, + ResetAndParseJson("{\"matches\":\"foo\"}")); + EXPECT_EQ(SB_THREAT_TYPE_SAFE, threat_); + EXPECT_EQ(empty_meta_, meta_); + + EXPECT_EQ(UMA_STATUS_JSON_UNKNOWN_THREAT, + ResetAndParseJson("{\"matches\":[{}]}")); + EXPECT_EQ(SB_THREAT_TYPE_SAFE, threat_); + EXPECT_EQ(empty_meta_, meta_); + + EXPECT_EQ(UMA_STATUS_JSON_UNKNOWN_THREAT, + ResetAndParseJson("{\"matches\":[{\"threat_type\":\"junk\"}]}")); + EXPECT_EQ(SB_THREAT_TYPE_SAFE, threat_); + EXPECT_EQ(empty_meta_, meta_); + + EXPECT_EQ(UMA_STATUS_JSON_UNKNOWN_THREAT, + ResetAndParseJson("{\"matches\":[{\"threat_type\":\"999\"}]}")); + EXPECT_EQ(SB_THREAT_TYPE_SAFE, threat_); + EXPECT_EQ(empty_meta_, meta_); +} + +TEST_F(SafeBrowsingApiHandlerUtilTest, BasicThreats) { + EXPECT_EQ(UMA_STATUS_UNSAFE, + ResetAndParseJson("{\"matches\":[{\"threat_type\":\"4\"}]}")); + EXPECT_EQ(SB_THREAT_TYPE_URL_MALWARE, threat_); + EXPECT_EQ(empty_meta_, meta_); + + EXPECT_EQ(UMA_STATUS_UNSAFE, + ResetAndParseJson("{\"matches\":[{\"threat_type\":\"5\"}]}")); + EXPECT_EQ(SB_THREAT_TYPE_URL_PHISHING, threat_); + EXPECT_EQ(empty_meta_, meta_); +} + +TEST_F(SafeBrowsingApiHandlerUtilTest, MultipleThreats) { + EXPECT_EQ( + UMA_STATUS_UNSAFE, + ResetAndParseJson( + "{\"matches\":[{\"threat_type\":\"4\"}, {\"threat_type\":\"5\"}]}")); + EXPECT_EQ(SB_THREAT_TYPE_URL_MALWARE, threat_); + EXPECT_EQ(empty_meta_, meta_); +} + +TEST_F(SafeBrowsingApiHandlerUtilTest, PhaSubType) { + ThreatMetadata expected; + + EXPECT_EQ(UMA_STATUS_UNSAFE, + ResetAndParseJson("{\"matches\":[{\"threat_type\":\"4\", " + "\"pha_pattern_type\":\"LANDING\"}]}")); + EXPECT_EQ(SB_THREAT_TYPE_URL_MALWARE, threat_); + expected.threat_pattern_type = ThreatPatternType::LANDING; + EXPECT_EQ(expected, meta_); + // Test the ThreatMetadata comparitor for this field. + EXPECT_NE(empty_meta_, meta_); + + EXPECT_EQ(UMA_STATUS_UNSAFE, + ResetAndParseJson("{\"matches\":[{\"threat_type\":\"4\", " + "\"pha_pattern_type\":\"DISTRIBUTION\"}]}")); + EXPECT_EQ(SB_THREAT_TYPE_URL_MALWARE, threat_); + expected.threat_pattern_type = ThreatPatternType::DISTRIBUTION; + EXPECT_EQ(expected, meta_); + + EXPECT_EQ(UMA_STATUS_UNSAFE, + ResetAndParseJson("{\"matches\":[{\"threat_type\":\"4\", " + "\"pha_pattern_type\":\"junk\"}]}")); + EXPECT_EQ(empty_meta_, meta_); +} + +TEST_F(SafeBrowsingApiHandlerUtilTest, SocialEngineeringSubType) { + ThreatMetadata expected; + + EXPECT_EQ(UMA_STATUS_UNSAFE, + ResetAndParseJson("{\"matches\":[{\"threat_type\":\"5\", " + "\"se_pattern_type\":\"LANDING\"}]}")); + EXPECT_EQ(SB_THREAT_TYPE_URL_PHISHING, threat_); + expected.threat_pattern_type = ThreatPatternType::LANDING; + EXPECT_EQ(expected, meta_); + + EXPECT_EQ(UMA_STATUS_UNSAFE, + ResetAndParseJson("{\"matches\":[{\"threat_type\":\"5\", " + "\"se_pattern_type\":\"DISTRIBUTION\"}]}")); + EXPECT_EQ(SB_THREAT_TYPE_URL_PHISHING, threat_); + expected.threat_pattern_type = ThreatPatternType::DISTRIBUTION; + EXPECT_EQ(expected, meta_); + + EXPECT_EQ(UMA_STATUS_UNSAFE, + ResetAndParseJson("{\"matches\":[{\"threat_type\":\"5\", " + "\"se_pattern_type\":\"junk\"}]}")); + EXPECT_EQ(empty_meta_, meta_); +} + +TEST_F(SafeBrowsingApiHandlerUtilTest, PopulationId) { + ThreatMetadata expected; + + EXPECT_EQ(UMA_STATUS_UNSAFE, + ResetAndParseJson("{\"matches\":[{\"threat_type\":\"4\", " + "\"UserPopulation\":\"foobarbazz\"}]}")); + EXPECT_EQ(SB_THREAT_TYPE_URL_MALWARE, threat_); + expected.population_id = "foobarbazz"; + EXPECT_EQ(expected, meta_); + // Test the ThreatMetadata comparator for this field. + EXPECT_NE(empty_meta_, meta_); +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/safe_browsing_api_handler_util.cc b/chromium/components/safe_browsing_db/safe_browsing_api_handler_util.cc new file mode 100644 index 00000000000..d780dbc7ce3 --- /dev/null +++ b/chromium/components/safe_browsing_db/safe_browsing_api_handler_util.cc @@ -0,0 +1,175 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/safe_browsing_db/safe_browsing_api_handler_util.h" + +#include <stddef.h> + +#include <string> + +#include "base/json/json_reader.h" +#include "base/memory/scoped_ptr.h" +#include "base/metrics/histogram_macros.h" +#include "base/strings/string_number_conversions.h" +#include "base/values.h" +#include "components/safe_browsing_db/metadata.pb.h" +#include "components/safe_browsing_db/util.h" + +namespace safe_browsing { +namespace { + +// JSON metatdata keys. These are are fixed in the Java-side API. +const char kJsonKeyMatches[] = "matches"; +const char kJsonKeyThreatType[] = "threat_type"; + +// Do not reorder or delete. Make sure changes are reflected in +// SB2RemoteCallThreatSubType. +enum UmaThreatSubType { + UMA_THREAT_SUB_TYPE_NOT_SET = 0, + UMA_THREAT_SUB_TYPE_LANDING = 1, + UMA_THREAT_SUB_TYPE_DISTRIBUTION = 2, + UMA_THREAT_SUB_TYPE_UNKNOWN = 3, + UMA_THREAT_SUB_TYPE_MAX_VALUE +}; + +void ReportUmaThreatSubType(SBThreatType threat_type, + UmaThreatSubType sub_type) { + if (threat_type == SB_THREAT_TYPE_URL_MALWARE) { + UMA_HISTOGRAM_ENUMERATION( + "SB2.RemoteCall.ThreatSubType.PotentiallyHarmfulApp", sub_type, + UMA_THREAT_SUB_TYPE_MAX_VALUE); + } else { + UMA_HISTOGRAM_ENUMERATION("SB2.RemoteCall.ThreatSubType.SocialEngineering", + sub_type, UMA_THREAT_SUB_TYPE_MAX_VALUE); + } +} + +// Parse the appropriate "*_pattern_type" key from the metadata. +// Returns NONE if no pattern type was found. +ThreatPatternType ParseThreatSubType( + const base::DictionaryValue* match, + SBThreatType threat_type) { + std::string pattern_key; + if (threat_type == SB_THREAT_TYPE_URL_MALWARE) { + pattern_key = "pha_pattern_type"; + } else { + DCHECK(threat_type == SB_THREAT_TYPE_URL_PHISHING); + pattern_key = "se_pattern_type"; + } + + std::string pattern_type; + if (!match->GetString(pattern_key, &pattern_type)) { + ReportUmaThreatSubType(threat_type, UMA_THREAT_SUB_TYPE_NOT_SET); + return ThreatPatternType::NONE; + } + + if (pattern_type == "LANDING") { + ReportUmaThreatSubType(threat_type, UMA_THREAT_SUB_TYPE_LANDING); + return ThreatPatternType::LANDING; + } else if (pattern_type == "DISTRIBUTION") { + ReportUmaThreatSubType(threat_type, UMA_THREAT_SUB_TYPE_DISTRIBUTION); + return ThreatPatternType::DISTRIBUTION; + } else { + ReportUmaThreatSubType(threat_type, UMA_THREAT_SUB_TYPE_UNKNOWN); + return ThreatPatternType::NONE; + } +} + +// Parse the optional "UserPopulation" key from the metadata. +// Returns empty string if none was found. +std::string ParseUserPopulation(const base::DictionaryValue* match) { + std::string population_id; + if (!match->GetString("UserPopulation", &population_id)) + return std::string(); + else + return population_id; +} + +int GetThreatSeverity(int java_threat_num) { + // Assign higher numbers to more severe threats. + switch (java_threat_num) { + case JAVA_THREAT_TYPE_POTENTIALLY_HARMFUL_APPLICATION: + return 2; + case JAVA_THREAT_TYPE_SOCIAL_ENGINEERING: + return 1; + default: + // Unknown threat type + return -1; + } +} + +SBThreatType JavaToSBThreatType(int java_threat_num) { + switch (java_threat_num) { + case JAVA_THREAT_TYPE_POTENTIALLY_HARMFUL_APPLICATION: + return SB_THREAT_TYPE_URL_MALWARE; + case JAVA_THREAT_TYPE_SOCIAL_ENGINEERING: + return SB_THREAT_TYPE_URL_PHISHING; + default: + // Unknown threat type + return SB_THREAT_TYPE_SAFE; + } +} + +} // namespace + +// Valid examples: +// {"matches":[{"threat_type":"5"}]} +// or +// {"matches":[{"threat_type":"4"}, +// {"threat_type":"5", "se_pattern_type":"LANDING"}]} +// or +// {"matches":[{"threat_type":"4", "UserPopulation":"YXNvZWZpbmFqO..."}] +UmaRemoteCallResult ParseJsonFromGMSCore(const std::string& metadata_str, + SBThreatType* worst_threat, + ThreatMetadata* metadata) { + *worst_threat = SB_THREAT_TYPE_SAFE; // Default to safe. + *metadata = ThreatMetadata(); // Default values. + + if (metadata_str.empty()) + return UMA_STATUS_JSON_EMPTY; + + // Pick out the "matches" list. + scoped_ptr<base::Value> value = base::JSONReader::Read(metadata_str); + const base::ListValue* matches = nullptr; + if (!value.get() || !value->IsType(base::Value::TYPE_DICTIONARY) || + !(static_cast<base::DictionaryValue*>(value.get())) + ->GetList(kJsonKeyMatches, &matches) || + !matches) { + return UMA_STATUS_JSON_FAILED_TO_PARSE; + } + + // Go through each matched threat type and pick the most severe. + int worst_threat_num = -1; + const base::DictionaryValue* worst_match = nullptr; + for (size_t i = 0; i < matches->GetSize(); i++) { + // Get the threat number + const base::DictionaryValue* match; + std::string threat_num_str; + int java_threat_num = -1; + if (!matches->GetDictionary(i, &match) || + !match->GetString(kJsonKeyThreatType, &threat_num_str) || + !base::StringToInt(threat_num_str, &java_threat_num)) { + continue; // Skip malformed list entries + } + + if (GetThreatSeverity(java_threat_num) > + GetThreatSeverity(worst_threat_num)) { + worst_threat_num = java_threat_num; + worst_match = match; + } + } + + *worst_threat = JavaToSBThreatType(worst_threat_num); + if (*worst_threat == SB_THREAT_TYPE_SAFE || !worst_match) + return UMA_STATUS_JSON_UNKNOWN_THREAT; + + // Fill in the metadata + metadata->threat_pattern_type = + ParseThreatSubType(worst_match, *worst_threat); + metadata->population_id = ParseUserPopulation(worst_match); + + return UMA_STATUS_UNSAFE; // success +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/safe_browsing_api_handler_util.h b/chromium/components/safe_browsing_db/safe_browsing_api_handler_util.h new file mode 100644 index 00000000000..0cb5e579212 --- /dev/null +++ b/chromium/components/safe_browsing_db/safe_browsing_api_handler_util.h @@ -0,0 +1,60 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Helper functions for SafeBrowsingApiHandlerImpl. Separated out for tests. + +#ifndef COMPONENTS_SAFE_BROWSING_DB_SAFE_BROWSING_API_HANDLER_UTIL_H_ +#define COMPONENTS_SAFE_BROWSING_DB_SAFE_BROWSING_API_HANDLER_UTIL_H_ + +#include <string> + +#include "components/safe_browsing_db/util.h" + +namespace safe_browsing { + +// These match what SafeBrowsingApiHandler.java uses for |resultStatus| +enum RemoteCallResultStatus { + RESULT_STATUS_INTERNAL_ERROR = -1, + RESULT_STATUS_SUCCESS = 0, + RESULT_STATUS_TIMEOUT = 1, +}; + +// Threat types as per the Java code. +// This must match those in GMS's SafeBrowsingThreatTypes.java. +enum JavaThreatTypes { + JAVA_THREAT_TYPE_POTENTIALLY_HARMFUL_APPLICATION = 4, + JAVA_THREAT_TYPE_SOCIAL_ENGINEERING = 5, +}; + +// Do not reorder or delete entries, and make sure changes here are reflected +// in SB2RemoteCallResult histogram. +enum UmaRemoteCallResult { + UMA_STATUS_INTERNAL_ERROR = 0, + UMA_STATUS_TIMEOUT = 1, + UMA_STATUS_SAFE = 2, + UMA_STATUS_UNSAFE = 3, + UMA_STATUS_JSON_EMPTY = 4, + UMA_STATUS_JSON_FAILED_TO_PARSE = 5, + UMA_STATUS_JSON_UNKNOWN_THREAT = 6, + UMA_STATUS_UNSUPPORTED = 7, + UMA_STATUS_MAX_VALUE +}; + +// This parses the JSON from the GMSCore API and then: +// 1) Picks the most severe threat type +// 2) Parses that threat's key/value pairs into the metadata struct. +// +// If anything fails to parse, this sets the threat to "safe". The caller +// should report the return value via UMA. +UmaRemoteCallResult ParseJsonFromGMSCore(const std::string& metadata_str, + SBThreatType* worst_threat, + ThreatMetadata* metadata); + +// DEPRECATED. Will be removed. +UmaRemoteCallResult ParseJsonToThreatAndPB(const std::string& metadata_str, + SBThreatType* worst_threat, + std::string* metadata_pb_str); +} // namespace safe_browsing + +#endif // COMPONENTS_SAFE_BROWSING_DB_SAFE_BROWSING_API_HANDLER_UTIL_H_ diff --git a/chromium/components/safe_browsing_db/safebrowsing.proto b/chromium/components/safe_browsing_db/safebrowsing.proto new file mode 100644 index 00000000000..6fe0f53c8ab --- /dev/null +++ b/chromium/components/safe_browsing_db/safebrowsing.proto @@ -0,0 +1,473 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// This file includes Safe Browsing V4 API blacklist request and response +// protocol buffers. They should be kept in sync with the server implementation. + +syntax = "proto2"; + +option optimize_for = LITE_RUNTIME; + +package safe_browsing; + +message ThreatInfo { + // The threat types to be checked. + repeated ThreatType threat_types = 1; + + // The platform types to be checked. + repeated PlatformType platform_types = 2; + + // The entry types to be checked. + repeated ThreatEntryType threat_entry_types = 4; + + // The threat entries to be checked. + repeated ThreatEntry threat_entries = 3; +} + +// A match when checking a threat entry in the Safe Browsing threat lists. +message ThreatMatch { + // The threat type matching this threat. + optional ThreatType threat_type = 1; + + // The platform type matching this threat. + optional PlatformType platform_type = 2; + + // The threat entry type matching this threat. + optional ThreatEntryType threat_entry_type = 6; + + // The threat matching this threat. + optional ThreatEntry threat = 3; + + // Optional metadata associated with this threat. + optional ThreatEntryMetadata threat_entry_metadata = 4; + + // The cache lifetime for the returned match. Clients must not cache this + // response for more than this duration to avoid false positives. + optional Duration cache_duration = 5; +} + +// Request to check entries against lists. +message FindThreatMatchesRequest { + // The client metadata. + optional ClientInfo client = 1; + + // The lists and entries to be checked for matches. + optional ThreatInfo threat_info = 2; +} + +// Response type for requests to find threat matches. +message FindThreatMatchesResponse { + // The threat list matches. + repeated ThreatMatch matches = 1; +} + +// Describes a Safe Browsing API update request. Clients can request updates for +// multiple lists in a single request. +message FetchThreatListUpdatesRequest { + // The client metadata. + optional ClientInfo client = 1; + + // A single list update request. + message ListUpdateRequest { + // The type of threat posed by entries present in the list. + optional ThreatType threat_type = 1; + + // The type of platform at risk by entries present in the list. + optional PlatformType platform_type = 2; + + // The types of entries present in the list. + optional ThreatEntryType threat_entry_type = 5; + + // The current state of the client for the requested list (the encrypted + // ClientState that was sent to the client from the previous update + // request). + optional bytes state = 3; + + // The constraints for this update. + message Constraints { + // The maximum size in number of entries. The update will not contain more + // entries than this value. This should be a power of 2 between 2**10 and + // 2**20. If zero, no update size limit is set. + optional int32 max_update_entries = 1; + + // Sets the maxmimum number of entries that the client is willing to have + // in the local database. This should be a power of 2 between 2**10 and + // 2**20. If zero, no database size limit is set. + optional int32 max_database_entries = 2; + + // Requests the list for a specific geographic location. If not set the + // server may pick that value based on the user's IP address. Expects ISO + // 3166-1 alpha-2 format. + optional string region = 3; + + // The compression types supported by the client. + repeated CompressionType supported_compressions = 4; + } + + // The constraints associated with this request. + optional Constraints constraints = 4; + } + + // The requested threat list updates. + repeated ListUpdateRequest list_update_requests = 3; +} + +// Response type for threat list update requests. +message FetchThreatListUpdatesResponse { + // An update to an individual list. + message ListUpdateResponse { + // The threat type for which data is returned. + optional ThreatType threat_type = 1; + + // The format of the threats. + optional ThreatEntryType threat_entry_type = 2; + + // The platform type for which data is returned. + optional PlatformType platform_type = 3; + + // The type of response sent to the client. + enum ResponseType { + // Unknown. + RESPONSE_TYPE_UNSPECIFIED = 0; + + // Partial updates are applied to the client's existing local database. + PARTIAL_UPDATE = 1; + + // Full updates replace the client's entire local database. This means + // that either the client was seriously out-of-date or the client is + // believed to be corrupt. + FULL_UPDATE = 2; + } + + // The type of response. This may indicate that an action is required by the + // client when the response is received. + optional ResponseType response_type = 4; + + // A set of entries to add to a local threat type's list. Repeated to allow + // for a combination of compressed and raw data to be sent in a single + // response. + repeated ThreatEntrySet additions = 5; + + // A set of entries to remove from a local threat type's list. Repeated for + // the same reason as above. + repeated ThreatEntrySet removals = 6; + + // The new client state, in encrypted format. Opaque to clients. + optional bytes new_client_state = 7; + + // The expected SHA256 hash of the client state; that is, of the sorted list + // of all hashes present in the database after applying the provided update. + // If the client state doesn't match the expected state, the client must + // disregard this update and retry later. + optional Checksum checksum = 8; + } + + // The list updates requested by the clients. + repeated ListUpdateResponse list_update_responses = 1; + + // The minimum duration the client must wait before issuing any update + // request. If this field is not set clients may update as soon as they want. + optional Duration minimum_wait_duration = 2; +} + +// Request to return full hashes matched by the provided hash prefixes. +message FindFullHashesRequest { + // The client metadata. + optional ClientInfo client = 1; + + // The current client states for each of the client's local threat lists. + repeated bytes client_states = 2; + + // The lists and hashes to be checked. + optional ThreatInfo threat_info = 3; +} + +// Response type for requests to find full hashes. +message FindFullHashesResponse { + // The full hashes that matched the requested prefixes. + repeated ThreatMatch matches = 1; + + // The minimum duration the client must wait before issuing any find hashes + // request. If this field is not set, clients can issue a request as soon as + // they want. + optional Duration minimum_wait_duration = 2; + + // For requested entities that did not match the threat list, how long to + // cache the response. + optional Duration negative_cache_duration = 3; +} + +// A hit comprised of multiple resources; one is the threat list entry that was +// encountered by the client, while others give context as to how the client +// arrived at the unsafe entry. +message ThreatHit { + // The threat type reported. + optional ThreatType threat_type = 1; + + // The platform type reported. + optional PlatformType platform_type = 2; + + // The threat entry responsible for the hit. Full hash should be reported for + // hash-based hits. + optional ThreatEntry entry = 3; + + // Types of resources reported by the client as part of a single hit. + enum ThreatSourceType { + // Unknown. + THREAT_SOURCE_TYPE_UNSPECIFIED = 0; + // The URL that matched the threat list (for which GetFullHash returned a + // valid hash). + MATCHING_URL = 1; + // The final top-level URL of the tab that the client was browsing when the + // match occurred. + TAB_URL = 2; + // A redirect URL that was fetched before hitting the final TAB_URL. + TAB_REDIRECT = 3; + } + + // A single resource related to a threat hit. + message ThreatSource { + // The URL of the resource. + optional string url = 1; + + // The type of source reported. + optional ThreatSourceType type = 2; + + // The remote IP of the resource in ASCII format. Either IPv4 or IPv6. + optional string remote_ip = 3; + + // Referrer of the resource. Only set if the referrer is available. + optional string referrer = 4; + } + + // The resources related to the threat hit. + repeated ThreatSource resources = 4; +} + +// Types of threats. +enum ThreatType { + // Unknown. + THREAT_TYPE_UNSPECIFIED = 0; + + // Malware threat type. + MALWARE_THREAT = 1; + + // Social engineering threat type. + SOCIAL_ENGINEERING_PUBLIC = 2; + + // Unwanted software threat type. + UNWANTED_SOFTWARE = 3; + + // Potentially harmful application threat type. + POTENTIALLY_HARMFUL_APPLICATION = 4; + + // Social engineering threat type for internal use. + SOCIAL_ENGINEERING = 5; + + // API abuse threat type. + API_ABUSE = 6; +} + +// Types of platforms. +enum PlatformType { + // Unknown platform. + PLATFORM_TYPE_UNSPECIFIED = 0; + + // Threat posed to Windows. + WINDOWS_PLATFORM = 1; + + // Threat posed to Linux. + LINUX_PLATFORM = 2; + + // Threat posed to Android. + // This cannot be ANDROID because that symbol is defined for android builds + // here: build/config/android/BUILD.gn line21. + ANDROID_PLATFORM = 3; + + // Threat posed to OSX. + OSX_PLATFORM = 4; + + // Threat posed to iOS. + IOS_PLATFORM = 5; + + // Threat posed to at least one of the defined platforms. + ANY_PLATFORM = 6; + + // Threat posed to all defined platforms. + ALL_PLATFORMS = 7; + + // Threat posed to Chrome. + CHROME_PLATFORM = 8; +} + +// The client metadata associated with Safe Browsing API requests. +message ClientInfo { + // A client ID that (hopefully) uniquely identifies the client implementation + // of the Safe Browsing API. + optional string client_id = 1; + + // The version of the client implementation. + optional string client_version = 2; +} + +// The expected state of a client's local database. +message Checksum { + // The SHA256 hash of the client state; that is, of the sorted list of all + // hashes present in the database. + optional bytes sha256 = 1; +} + +// The ways in which threat entry sets can be compressed. +enum CompressionType { + // Unknown. + COMPRESSION_TYPE_UNSPECIFIED = 0; + + // Raw, uncompressed data. + RAW = 1; + + // Rice-Golomb encoded data. + RICE = 2; +} + +// An individual threat; for example, a malicious URL or its hash +// representation. Only one of these fields should be set. +message ThreatEntry { + // A variable-length SHA256 hash with size between 4 and 32 bytes inclusive. + optional bytes hash = 1; + + // A URL. + optional string url = 2; +} + +// Types of entries that pose threats. Threat lists are collections of entries +// of a single type. +enum ThreatEntryType { + // Unspecified. + THREAT_ENTRY_TYPE_UNSPECIFIED = 0; + + // A host-suffix/path-prefix URL expression; for example, "foo.bar.com/baz/". + URL_EXPRESSION = 1; + + // The digest of a binary. + BINARY_DIGEST = 2; + + // An IP range. + IP_RANGE = 3; +} + +// A set of threats that should be added or removed from a client's local +// database. +message ThreatEntrySet { + // The compression type for the entries in this set. + optional CompressionType compression_type = 1; + + // At most one of the following fields should be set. + + // The raw SHA256-formatted entries. + optional RawHashes raw_hashes = 2; + + // The raw removal indices for a local list. + optional RawIndices raw_indices = 3; + + // The encoded 4-byte prefixes of SHA256-formatted entries, using a + // Golomb-Rice encoding. + optional RiceDeltaEncoding rice_hashes = 4; + + // The encoded local, lexicographically-sorted list indices, using a + // Golomb-Rice encoding. Used for sending compressed removal indicies. + optional RiceDeltaEncoding rice_indices = 5; +} + +// A set of raw indicies to remove from a local list. +message RawIndices { + // The indicies to remove from a lexicographically-sorted local list. + repeated int32 indices = 1; +} + +// The uncompressed threat entries in hash format of a particular prefix length. +// Hashes can be anywhere from 4 to 32 bytes in size. A large majority are 4 +// bytes, but some hashes are lengthened if they collide with the hash of a +// popular URL. +// +// Used for sending ThreatEntrySet to clients that do not support compression, +// or when sending non-4-byte hashes to clients that do support compression. +message RawHashes { + // The number of bytes for each prefix encoded below. This field can be + // anywhere from 4 (shortest prefix) to 32 (full SHA256 hash). + optional int32 prefix_size = 1; + + // The hashes, all concatenated into one long string. Each hash has a prefix + // size of |prefix_size| above. Hashes are sorted in lexicographic order. + optional bytes raw_hashes = 2; +} + +// The Rice-Golomb encoded data. Used for sending compressed 4-byte hashes or +// compressed removal indices. +message RiceDeltaEncoding { + // The offset of the first entry in the encoded data, or, if only a single + // integer was encoded, that single integer's value. + optional int64 first_value = 1; + + // The Golomb-Rice parameter which is a number between 2 and 28. This field + // is missing (that is, zero) if num_entries is zero. + optional int32 rice_parameter = 2; + + // The number of entries that are delta encoded in the encoded data. If only a + // single integer was encoded, this will be zero and the single value will be + // stored in first_value. + optional int32 num_entries = 3; + + // The encoded deltas that are encoded using the Golomb-Rice coder. + optional bytes encoded_data = 4; +} + +// The metadata associated with a specific threat entry. The client is expected +// to know the metadata key/value pairs associated with each threat type. +message ThreatEntryMetadata { + // A single metadata entry. + message MetadataEntry { + // The metadata entry key. + optional bytes key = 1; + + // The metadata entry value. + optional bytes value = 2; + } + + // The metadata entries. + repeated MetadataEntry entries = 1; +} + +// Describes an individual threat list. A list is defined by three parameters: +// the type of threat posed, the type of platform targeted by the threat, and +// the type of entries in the list. +message ThreatListDescriptor { + // The threat type posed by the list's entries. + optional ThreatType threat_type = 1; + + // The platform type targeted by the list's entries. + optional PlatformType platform_type = 2; + + // The entry types contained in the list. + optional ThreatEntryType threat_entry_type = 3; +} + +// A collection of lists available for download. +message ListThreatListsResponse { + // The lists available for download. + repeated ThreatListDescriptor threat_lists = 1; +} + +message Duration { + // Signed seconds of the span of time. Must be from -315,576,000,000 + // to +315,576,000,000 inclusive. + optional int64 seconds = 1; + + // Signed fractions of a second at nanosecond resolution of the span + // of time. Durations less than one second are represented with a 0 + // `seconds` field and a positive or negative `nanos` field. For durations + // of one second or more, a non-zero value for the `nanos` field must be + // of the same sign as the `seconds` field. Must be from -999,999,999 + // to +999,999,999 inclusive. + optional int32 nanos = 2; +} diff --git a/chromium/components/safe_browsing_db/test_database_manager.cc b/chromium/components/safe_browsing_db/test_database_manager.cc new file mode 100644 index 00000000000..478e0641094 --- /dev/null +++ b/chromium/components/safe_browsing_db/test_database_manager.cc @@ -0,0 +1,136 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/safe_browsing_db/test_database_manager.h" + +#include <set> +#include <string> +#include <vector> + +#include "base/logging.h" +#include "net/url_request/url_request_context_getter.h" + +namespace safe_browsing { + +bool TestSafeBrowsingDatabaseManager::IsSupported() const { + NOTIMPLEMENTED(); + return false; +} + +safe_browsing::ThreatSource TestSafeBrowsingDatabaseManager::GetThreatSource() + const { + NOTIMPLEMENTED(); + return safe_browsing::ThreatSource::UNKNOWN; +} + +bool TestSafeBrowsingDatabaseManager::ChecksAreAlwaysAsync() const { + NOTIMPLEMENTED(); + return false; +} + +bool TestSafeBrowsingDatabaseManager::CanCheckResourceType( + content::ResourceType resource_type) const { + NOTIMPLEMENTED(); + return false; +} + +bool TestSafeBrowsingDatabaseManager::CanCheckUrl(const GURL& url) const { + NOTIMPLEMENTED(); + return false; +} + +bool TestSafeBrowsingDatabaseManager::IsDownloadProtectionEnabled() const { + NOTIMPLEMENTED(); + return false; +} + +bool TestSafeBrowsingDatabaseManager::CheckBrowseUrl(const GURL& url, + Client* client) { + NOTIMPLEMENTED(); + return true; +} + +bool TestSafeBrowsingDatabaseManager::CheckDownloadUrl( + const std::vector<GURL>& url_chain, + Client* client) { + NOTIMPLEMENTED(); + return true; +} + +bool TestSafeBrowsingDatabaseManager::CheckExtensionIDs( + const std::set<std::string>& extension_ids, + Client* client) { + NOTIMPLEMENTED(); + return true; +} + +bool TestSafeBrowsingDatabaseManager::CheckResourceUrl(const GURL& url, + Client* client) { + NOTIMPLEMENTED(); + return true; +} + +bool TestSafeBrowsingDatabaseManager::MatchCsdWhitelistUrl(const GURL& url) { + NOTIMPLEMENTED(); + return true; +} + +bool TestSafeBrowsingDatabaseManager::MatchMalwareIP( + const std::string& ip_address) { + NOTIMPLEMENTED(); + return true; +} + +bool TestSafeBrowsingDatabaseManager::MatchDownloadWhitelistUrl( + const GURL& url) { + NOTIMPLEMENTED(); + return true; +} + +bool TestSafeBrowsingDatabaseManager::MatchDownloadWhitelistString( + const std::string& str) { + NOTIMPLEMENTED(); + return true; +} + +bool TestSafeBrowsingDatabaseManager::MatchInclusionWhitelistUrl( + const GURL& url) { + NOTIMPLEMENTED(); + return true; +} + +bool TestSafeBrowsingDatabaseManager::MatchModuleWhitelistString( + const std::string& str) { + NOTIMPLEMENTED(); + return true; +} + +bool TestSafeBrowsingDatabaseManager::IsMalwareKillSwitchOn() { + NOTIMPLEMENTED(); + return false; +} + +bool TestSafeBrowsingDatabaseManager::IsCsdWhitelistKillSwitchOn() { + NOTIMPLEMENTED(); + return false; +} + +void TestSafeBrowsingDatabaseManager::CancelCheck(Client* client) { + NOTIMPLEMENTED(); +} + +void TestSafeBrowsingDatabaseManager::CheckApiBlacklistUrl(const GURL& url, + Client* client) { + NOTIMPLEMENTED(); +} + +void TestSafeBrowsingDatabaseManager::StartOnIOThread( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config) { +} + +void TestSafeBrowsingDatabaseManager::StopOnIOThread(bool shutdown) { +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/test_database_manager.h b/chromium/components/safe_browsing_db/test_database_manager.h new file mode 100644 index 00000000000..eb7fcfd26f5 --- /dev/null +++ b/chromium/components/safe_browsing_db/test_database_manager.h @@ -0,0 +1,62 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_SAFE_BROWSING_DB_TEST_DATABASE_MANAGER_H_ +#define COMPONENTS_SAFE_BROWSING_DB_TEST_DATABASE_MANAGER_H_ + +#include <set> +#include <string> +#include <vector> + +#include "components/safe_browsing_db/database_manager.h" + +namespace net { +class URLRequestContextGetter; +} + +namespace safe_browsing { + +struct V4ProtocolConfig; + +// This is a non-pure-virtual implementation of the SafeBrowsingDatabaseManager +// interface. It's used in tests by overriding only the functions that get +// called, and it'll complain if you call one that isn't overriden. +class TestSafeBrowsingDatabaseManager + : public SafeBrowsingDatabaseManager { + public: + // SafeBrowsingDatabaseManager implementation: + bool IsSupported() const override; + safe_browsing::ThreatSource GetThreatSource() const override; + bool ChecksAreAlwaysAsync() const override; + bool CanCheckResourceType(content::ResourceType resource_type) const override; + bool CanCheckUrl(const GURL& url) const override; + bool IsDownloadProtectionEnabled() const override; + bool CheckBrowseUrl(const GURL& url, Client* client) override; + bool CheckDownloadUrl(const std::vector<GURL>& url_chain, + Client* client) override; + bool CheckExtensionIDs(const std::set<std::string>& extension_ids, + Client* client) override; + bool CheckResourceUrl(const GURL& url, Client* client) override; + bool MatchCsdWhitelistUrl(const GURL& url) override; + bool MatchMalwareIP(const std::string& ip_address) override; + bool MatchDownloadWhitelistUrl(const GURL& url) override; + bool MatchDownloadWhitelistString(const std::string& str) override; + bool MatchInclusionWhitelistUrl(const GURL& url) override; + bool MatchModuleWhitelistString(const std::string& str) override; + bool IsMalwareKillSwitchOn() override; + bool IsCsdWhitelistKillSwitchOn() override; + void CancelCheck(Client* client) override; + void CheckApiBlacklistUrl(const GURL& url, Client* client) override; + void StartOnIOThread( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config) override; + void StopOnIOThread(bool shutdown) override; + + protected: + ~TestSafeBrowsingDatabaseManager() override {}; +}; + +} // namespace safe_browsing + +#endif // COMPONENTS_SAFE_BROWSING_DB_TEST_DATABASE_MANAGER_H_ diff --git a/chromium/components/safe_browsing_db/testing_util.h b/chromium/components/safe_browsing_db/testing_util.h new file mode 100644 index 00000000000..76b319464c4 --- /dev/null +++ b/chromium/components/safe_browsing_db/testing_util.h @@ -0,0 +1,36 @@ +// Copyright (c) 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Utilities to be used in tests of safe_browsing_db/ component. + +#ifndef COMPONENTS_SAFE_BROWSING_DB_TESTING_UTIL_H_ +#define COMPONENTS_SAFE_BROWSING_DB_TESTING_UTIL_H_ + +#include "components/safe_browsing_db/util.h" + +#include <ostream> + +namespace safe_browsing { + +inline bool operator==(const ThreatMetadata& lhs, const ThreatMetadata& rhs) { + return lhs.threat_pattern_type == rhs.threat_pattern_type && + lhs.api_permissions == rhs.api_permissions && + lhs.population_id == rhs.population_id; +} + +inline bool operator!=(const ThreatMetadata& lhs, const ThreatMetadata& rhs) { + return !(lhs == rhs); +} + +inline std::ostream& operator<<(std::ostream& os, const ThreatMetadata& meta) { + os << "{threat_pattern_type=" << static_cast<int>(meta.threat_pattern_type) + << ", api_permissions=["; + for (auto p : meta.api_permissions) + os << p << ","; + return os << "], population_id=" << meta.population_id; +} + +} // namespace safe_browsing + +#endif // COMPONENTS_SAFE_BROWSING_DB_TESTING_UTIL_H_ diff --git a/chromium/components/safe_browsing_db/util.cc b/chromium/components/safe_browsing_db/util.cc new file mode 100644 index 00000000000..0b77f7f607b --- /dev/null +++ b/chromium/components/safe_browsing_db/util.cc @@ -0,0 +1,432 @@ +// Copyright (c) 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/safe_browsing_db/util.h" + +#include <stddef.h> + +#include "base/macros.h" +#include "base/strings/string_util.h" +#include "base/trace_event/trace_event.h" +#include "crypto/sha2.h" +#include "net/base/escape.h" +#include "url/gurl.h" +#include "url/url_util.h" + +namespace safe_browsing { + +// Utility functions ----------------------------------------------------------- + +namespace { +bool IsKnownList(const std::string& name) { + for (size_t i = 0; i < arraysize(kAllLists); ++i) { + if (!strcmp(kAllLists[i], name.c_str())) { + return true; + } + } + return false; +} +} // namespace + +// ThreatMetadata ------------------------------------------------------------ +ThreatMetadata::ThreatMetadata() + : threat_pattern_type(ThreatPatternType::NONE) {} + +ThreatMetadata::ThreatMetadata(const ThreatMetadata& other) = default; + +ThreatMetadata::~ThreatMetadata() {} + +// SBCachedFullHashResult ------------------------------------------------------ + +SBCachedFullHashResult::SBCachedFullHashResult() {} + +SBCachedFullHashResult::SBCachedFullHashResult( + const base::Time& in_expire_after) + : expire_after(in_expire_after) {} + +SBCachedFullHashResult::SBCachedFullHashResult( + const SBCachedFullHashResult& other) = default; + +SBCachedFullHashResult::~SBCachedFullHashResult() {} + +// Listnames that browser can process. +const char kMalwareList[] = "goog-malware-shavar"; +const char kPhishingList[] = "goog-phish-shavar"; +const char kBinUrlList[] = "goog-badbinurl-shavar"; +const char kCsdWhiteList[] = "goog-csdwhite-sha256"; +const char kDownloadWhiteList[] = "goog-downloadwhite-digest256"; +const char kExtensionBlacklist[] = "goog-badcrxids-digestvar"; +const char kIPBlacklist[] = "goog-badip-digest256"; +const char kUnwantedUrlList[] = "goog-unwanted-shavar"; +const char kInclusionWhitelist[] = "goog-csdinclusionwhite-sha256"; +const char kModuleWhitelist[] = "goog-whitemodule-digest256"; +const char kResourceBlacklist[] = "goog-badresource-shavar"; + +const char* kAllLists[11] = { + kMalwareList, kPhishingList, kBinUrlList, kCsdWhiteList, + kDownloadWhiteList, kExtensionBlacklist, kIPBlacklist, kUnwantedUrlList, + kInclusionWhitelist, kModuleWhitelist, kResourceBlacklist, +}; + +ListType GetListId(const base::StringPiece& name) { + ListType id; + if (name == kMalwareList) { + id = MALWARE; + } else if (name == kPhishingList) { + id = PHISH; + } else if (name == kBinUrlList) { + id = BINURL; + } else if (name == kCsdWhiteList) { + id = CSDWHITELIST; + } else if (name == kDownloadWhiteList) { + id = DOWNLOADWHITELIST; + } else if (name == kExtensionBlacklist) { + id = EXTENSIONBLACKLIST; + } else if (name == kIPBlacklist) { + id = IPBLACKLIST; + } else if (name == kUnwantedUrlList) { + id = UNWANTEDURL; + } else if (name == kInclusionWhitelist) { + id = INCLUSIONWHITELIST; + } else if (name == kModuleWhitelist) { + id = MODULEWHITELIST; + } else if (name == kResourceBlacklist) { + id = RESOURCEBLACKLIST; + } else { + id = INVALID; + } + return id; +} + +bool GetListName(ListType list_id, std::string* list) { + switch (list_id) { + case MALWARE: + *list = kMalwareList; + break; + case PHISH: + *list = kPhishingList; + break; + case BINURL: + *list = kBinUrlList; + break; + case CSDWHITELIST: + *list = kCsdWhiteList; + break; + case DOWNLOADWHITELIST: + *list = kDownloadWhiteList; + break; + case EXTENSIONBLACKLIST: + *list = kExtensionBlacklist; + break; + case IPBLACKLIST: + *list = kIPBlacklist; + break; + case UNWANTEDURL: + *list = kUnwantedUrlList; + break; + case INCLUSIONWHITELIST: + *list = kInclusionWhitelist; + break; + case MODULEWHITELIST: + *list = kModuleWhitelist; + case RESOURCEBLACKLIST: + *list = kResourceBlacklist; + break; + default: + return false; + } + DCHECK(IsKnownList(*list)); + return true; +} + + +SBFullHash SBFullHashForString(const base::StringPiece& str) { + SBFullHash h; + crypto::SHA256HashString(str, &h.full_hash, sizeof(h.full_hash)); + return h; +} + +SBFullHash StringToSBFullHash(const std::string& hash_in) { + DCHECK_EQ(crypto::kSHA256Length, hash_in.size()); + SBFullHash hash_out; + memcpy(hash_out.full_hash, hash_in.data(), crypto::kSHA256Length); + return hash_out; +} + +std::string SBFullHashToString(const SBFullHash& hash) { + DCHECK_EQ(crypto::kSHA256Length, sizeof(hash.full_hash)); + return std::string(hash.full_hash, sizeof(hash.full_hash)); +} + + +std::string Unescape(const std::string& url) { + std::string unescaped_str(url); + const int kMaxLoopIterations = 1024; + size_t old_size = 0; + int loop_var = 0; + do { + old_size = unescaped_str.size(); + unescaped_str = net::UnescapeURLComponent( + unescaped_str, + net::UnescapeRule::SPOOFING_AND_CONTROL_CHARS | + net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | + net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); + } while (old_size != unescaped_str.size() && + ++loop_var <= kMaxLoopIterations); + + return unescaped_str; +} + +std::string Escape(const std::string& url) { + std::string escaped_str; + // The escaped string is larger so allocate double the length to reduce the + // chance of the string being grown. + escaped_str.reserve(url.length() * 2); + const char* kHexString = "0123456789ABCDEF"; + for (size_t i = 0; i < url.length(); i++) { + unsigned char c = static_cast<unsigned char>(url[i]); + if (c <= ' ' || c > '~' || c == '#' || c == '%') { + escaped_str += '%'; + escaped_str += kHexString[c >> 4]; + escaped_str += kHexString[c & 0xf]; + } else { + escaped_str += c; + } + } + + return escaped_str; +} + +std::string RemoveConsecutiveChars(base::StringPiece str, const char c) { + std::string output; + // Output is at most the length of the original string. + output.reserve(str.size()); + + size_t i = 0; + while (i < str.size()) { + output.append(1, str[i++]); + if (str[i - 1] == c) { + while (i < str.size() && str[i] == c) { + i++; + } + } + } + + return output; +} + +// Canonicalizes url as per Google Safe Browsing Specification. +// See section 6.1 in +// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. +void CanonicalizeUrl(const GURL& url, + std::string* canonicalized_hostname, + std::string* canonicalized_path, + std::string* canonicalized_query) { + DCHECK(url.is_valid()); + + // We only canonicalize "normal" URLs. + if (!url.IsStandard()) + return; + + // Following canonicalization steps are excluded since url parsing takes care + // of those :- + // 1. Remove any tab (0x09), CR (0x0d), and LF (0x0a) chars from url. + // (Exclude escaped version of these chars). + // 2. Normalize hostname to 4 dot-seperated decimal values. + // 3. Lowercase hostname. + // 4. Resolve path sequences "/../" and "/./". + + // That leaves us with the following :- + // 1. Remove fragment in URL. + GURL url_without_fragment; + GURL::Replacements f_replacements; + f_replacements.ClearRef(); + f_replacements.ClearUsername(); + f_replacements.ClearPassword(); + url_without_fragment = url.ReplaceComponents(f_replacements); + + // 2. Do URL unescaping until no more hex encoded characters exist. + std::string url_unescaped_str(Unescape(url_without_fragment.spec())); + url::Parsed parsed; + url::ParseStandardURL(url_unescaped_str.data(), url_unescaped_str.length(), + &parsed); + + // 3. In hostname, remove all leading and trailing dots. + base::StringPiece host; + if (parsed.host.len > 0) + host.set(url_unescaped_str.data() + parsed.host.begin, parsed.host.len); + + base::StringPiece host_without_end_dots = + base::TrimString(host, ".", base::TrimPositions::TRIM_ALL); + + // 4. In hostname, replace consecutive dots with a single dot. + std::string host_without_consecutive_dots(RemoveConsecutiveChars( + host_without_end_dots, '.')); + + // 5. In path, replace runs of consecutive slashes with a single slash. + base::StringPiece path; + if (parsed.path.len > 0) + path.set(url_unescaped_str.data() + parsed.path.begin, parsed.path.len); + std::string path_without_consecutive_slash(RemoveConsecutiveChars(path, '/')); + + url::Replacements<char> hp_replacements; + hp_replacements.SetHost( + host_without_consecutive_dots.data(), + url::Component(0, host_without_consecutive_dots.length())); + hp_replacements.SetPath( + path_without_consecutive_slash.data(), + url::Component(0, path_without_consecutive_slash.length())); + + std::string url_unescaped_with_can_hostpath; + url::StdStringCanonOutput output(&url_unescaped_with_can_hostpath); + url::Parsed temp_parsed; + url::ReplaceComponents(url_unescaped_str.data(), + url_unescaped_str.length(), + parsed, + hp_replacements, + NULL, + &output, + &temp_parsed); + output.Complete(); + + // 6. Step needed to revert escaping done in url::ReplaceComponents. + url_unescaped_with_can_hostpath = Unescape(url_unescaped_with_can_hostpath); + + // 7. After performing all above steps, percent-escape all chars in url which + // are <= ASCII 32, >= 127, #, %. Escapes must be uppercase hex characters. + std::string escaped_canon_url_str(Escape(url_unescaped_with_can_hostpath)); + url::Parsed final_parsed; + url::ParseStandardURL(escaped_canon_url_str.data(), + escaped_canon_url_str.length(), + &final_parsed); + + if (canonicalized_hostname && final_parsed.host.len > 0) { + *canonicalized_hostname = + escaped_canon_url_str.substr(final_parsed.host.begin, + final_parsed.host.len); + } + if (canonicalized_path && final_parsed.path.len > 0) { + *canonicalized_path = escaped_canon_url_str.substr(final_parsed.path.begin, + final_parsed.path.len); + } + if (canonicalized_query && final_parsed.query.len > 0) { + *canonicalized_query = escaped_canon_url_str.substr( + final_parsed.query.begin, final_parsed.query.len); + } +} + +void UrlToFullHashes(const GURL& url, + bool include_whitelist_hashes, + std::vector<SBFullHash>* full_hashes) { + // Include this function in traces because it's not cheap so it should be + // called sparingly. + TRACE_EVENT2("loader", "safe_browsing::UrlToFullHashes", "url", url.spec(), + "include_whitelist_hashes", include_whitelist_hashes); + std::vector<std::string> hosts; + if (url.HostIsIPAddress()) { + hosts.push_back(url.host()); + } else { + GenerateHostsToCheck(url, &hosts); + } + + std::vector<std::string> paths; + GeneratePathsToCheck(url, &paths); + + for (const std::string& host : hosts) { + for (const std::string& path : paths) { + full_hashes->push_back( + SBFullHashForString(host + path)); + + // We may have /foo as path-prefix in the whitelist which should + // also match with /foo/bar and /foo?bar. Hence, for every path + // that ends in '/' we also add the path without the slash. + if (include_whitelist_hashes && path.size() > 1 && + path[path.size() - 1] == '/') { + full_hashes->push_back(SBFullHashForString( + host + path.substr(0, path.size() - 1))); + } + } + } +} + +void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts) { + hosts->clear(); + + std::string canon_host; + CanonicalizeUrl(url, &canon_host, NULL, NULL); + + const std::string host = canon_host; // const sidesteps GCC bugs below! + if (host.empty()) + return; + + // Per the Safe Browsing Protocol v2 spec, we try the host, and also up to 4 + // hostnames formed by starting with the last 5 components and successively + // removing the leading component. The last component isn't examined alone, + // since it's the TLD or a subcomponent thereof. + // + // Note that we don't need to be clever about stopping at the "real" eTLD -- + // the data on the server side has been filtered to ensure it will not + // blacklist a whole TLD, and it's not significantly slower on our side to + // just check too much. + // + // Also note that because we have a simple blacklist, not some sort of complex + // whitelist-in-blacklist or vice versa, it doesn't matter what order we check + // these in. + const size_t kMaxHostsToCheck = 4; + bool skipped_last_component = false; + for (std::string::const_reverse_iterator i(host.rbegin()); + i != host.rend() && hosts->size() < kMaxHostsToCheck; ++i) { + if (*i == '.') { + if (skipped_last_component) + hosts->push_back(std::string(i.base(), host.end())); + else + skipped_last_component = true; + } + } + hosts->push_back(host); +} + +void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths) { + paths->clear(); + + std::string canon_path; + std::string canon_query; + CanonicalizeUrl(url, NULL, &canon_path, &canon_query); + + const std::string path = canon_path; // const sidesteps GCC bugs below! + const std::string query = canon_query; + if (path.empty()) + return; + + // Per the Safe Browsing Protocol v2 spec, we try the exact path with/without + // the query parameters, and also up to 4 paths formed by starting at the root + // and adding more path components. + // + // As with the hosts above, it doesn't matter what order we check these in. + const size_t kMaxPathsToCheck = 4; + for (std::string::const_iterator i(path.begin()); + i != path.end() && paths->size() < kMaxPathsToCheck; ++i) { + if (*i == '/') + paths->push_back(std::string(path.begin(), i + 1)); + } + + if (!paths->empty() && paths->back() != path) + paths->push_back(path); + + if (!query.empty()) + paths->push_back(path + "?" + query); +} + +void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls) { + std::vector<std::string> hosts, paths; + GenerateHostsToCheck(url, &hosts); + GeneratePathsToCheck(url, &paths); + for (size_t h = 0; h < hosts.size(); ++h) { + for (size_t p = 0; p < paths.size(); ++p) { + urls->push_back(hosts[h] + paths[p]); + } + } +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/util.h b/chromium/components/safe_browsing_db/util.h new file mode 100644 index 00000000000..e36ce1dad1d --- /dev/null +++ b/chromium/components/safe_browsing_db/util.h @@ -0,0 +1,215 @@ +// Copyright (c) 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Utilities for the SafeBrowsing DB code. + +#ifndef COMPONENTS_SAFE_BROWSING_DB_UTIL_H_ +#define COMPONENTS_SAFE_BROWSING_DB_UTIL_H_ + +#include <stdint.h> + +#include <cstring> +#include <string> +#include <vector> + +#include "base/strings/string_piece.h" +#include "base/time/time.h" + +class GURL; + +namespace safe_browsing { + +// Different types of threats that SafeBrowsing protects against. +enum SBThreatType { + // No threat at all. + SB_THREAT_TYPE_SAFE, + + // The URL is being used for phishing. + SB_THREAT_TYPE_URL_PHISHING, + + // The URL hosts malware. + SB_THREAT_TYPE_URL_MALWARE, + + // The URL hosts unwanted programs. + SB_THREAT_TYPE_URL_UNWANTED, + + // The download URL is malware. + SB_THREAT_TYPE_BINARY_MALWARE_URL, + + // Url detected by the client-side phishing model. Note that unlike the + // above values, this does not correspond to a downloaded list. + SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL, + + // The Chrome extension or app (given by its ID) is malware. + SB_THREAT_TYPE_EXTENSION, + + // Url detected by the client-side malware IP list. This IP list is part + // of the client side detection model. + SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL, + + // Url leads to a blacklisted resource script. Note that no warnings should be + // shown on this threat type, but an incident report might be sent. + SB_THREAT_TYPE_BLACKLISTED_RESOURCE, +}; + +// Metadata that indicates what kind of URL match this is. +enum class ThreatPatternType { + NONE, // Pattern type didn't appear in the metadata + LANDING, // The match is a landing page + DISTRIBUTION, // The match is a distribution page +}; + +// Metadata that was returned by a GetFullHash call. This is the parsed version +// of the PB (from Pver3, or Pver4 local) or JSON (from Pver4 via GMSCore). +// Some fields are only applicable to certain lists. +struct ThreatMetadata { + ThreatMetadata(); + ThreatMetadata(const ThreatMetadata& other); + ~ThreatMetadata(); + + // Type of blacklisted page. Used on malware and UwS lists. + // This will be NONE if it wasn't present in the reponse. + ThreatPatternType threat_pattern_type; + + // List of permissions blocked. Used with threat_type API_ABUSE. + // This will be empty if it wasn't present in the response. + std::vector<std::string> api_permissions; + + // Opaque base64 string used for user-population experiments in pver4. + // This will be empty if it wasn't present in the response. + std::string population_id; +}; + +// A truncated hash's type. +typedef uint32_t SBPrefix; + +// A full hash. +union SBFullHash { + char full_hash[32]; + SBPrefix prefix; +}; + +// Used when we get a gethash response. +struct SBFullHashResult { + SBFullHash hash; + // TODO(shess): Refactor to allow ListType here. + int list_id; + ThreatMetadata metadata; + // Used only for V4 results. The cache lifetime for this result. The response + // must not be cached for more than this duration to avoid false positives. + base::TimeDelta cache_duration; +}; + +// Caches individual response from GETHASH request. +struct SBCachedFullHashResult { + SBCachedFullHashResult(); + explicit SBCachedFullHashResult(const base::Time& in_expire_after); + SBCachedFullHashResult(const SBCachedFullHashResult& other); + ~SBCachedFullHashResult(); + + base::Time expire_after; + std::vector<SBFullHashResult> full_hashes; +}; + +// SafeBrowsing list names. +extern const char kMalwareList[]; +extern const char kPhishingList[]; +// Binary Download list name. +extern const char kBinUrlList[]; +// SafeBrowsing client-side detection whitelist list name. +extern const char kCsdWhiteList[]; +// SafeBrowsing download whitelist list name. +extern const char kDownloadWhiteList[]; +// SafeBrowsing extension list name. +extern const char kExtensionBlacklist[]; +// SafeBrowsing csd malware IP blacklist name. +extern const char kIPBlacklist[]; +// SafeBrowsing unwanted URL list. +extern const char kUnwantedUrlList[]; +// SafeBrowsing off-domain inclusion whitelist list name. +extern const char kInclusionWhitelist[]; +// SafeBrowsing module whitelist list name. +extern const char kModuleWhitelist[]; +// Blacklisted resource URLs list name. +extern const char kResourceBlacklist[]; +/// This array must contain all Safe Browsing lists. +extern const char* kAllLists[11]; + +enum ListType { + INVALID = -1, + MALWARE = 0, + PHISH = 1, + BINURL = 2, + // Obsolete BINHASH = 3, + CSDWHITELIST = 4, + // SafeBrowsing lists are stored in pairs. Keep ListType 5 + // available for a potential second list that we would store in the + // csd-whitelist store file. + DOWNLOADWHITELIST = 6, + // See above comment. Leave 7 available. + EXTENSIONBLACKLIST = 8, + // See above comment. Leave 9 available. + // Obsolete SIDEEFFECTFREEWHITELIST = 10, + // See above comment. Leave 11 available. + IPBLACKLIST = 12, + // See above comment. Leave 13 available. + UNWANTEDURL = 14, + // See above comment. Leave 15 available. + INCLUSIONWHITELIST = 16, + // See above comment. Leave 17 available. + MODULEWHITELIST = 18, + // See above comment. Leave 19 available. + RESOURCEBLACKLIST = 20, + // See above comment. Leave 21 available. +}; + +inline bool SBFullHashEqual(const SBFullHash& a, const SBFullHash& b) { + return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)); +} + +inline bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) { + return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0; +} + +// Generate full hash for the given string. +SBFullHash SBFullHashForString(const base::StringPiece& str); +SBFullHash StringToSBFullHash(const std::string& hash_in); +std::string SBFullHashToString(const SBFullHash& hash_out); + + +// Maps a list name to ListType. +ListType GetListId(const base::StringPiece& name); + +// Maps a ListId to list name. Return false if fails. +bool GetListName(ListType list_id, std::string* list); + +// Canonicalizes url as per Google Safe Browsing Specification. +// See section 6.1 in +// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. +void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname, + std::string* canonicalized_path, + std::string* canonicalized_query); + + +// Generate the set of full hashes to check for |url|. If +// |include_whitelist_hashes| is true we will generate additional path-prefixes +// to match against the csd whitelist. E.g., if the path-prefix /foo is on the +// whitelist it should also match /foo/bar which is not the case for all the +// other lists. We'll also always add a pattern for the empty path. +void UrlToFullHashes(const GURL& url, bool include_whitelist_hashes, + std::vector<SBFullHash>* full_hashes); + +// Given a URL, returns all the hosts we need to check. They are returned +// in order of size (i.e. b.c is first, then a.b.c). +void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); + +// Given a URL, returns all the paths we need to check. +void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); + +// Given a URL, returns all the patterns we need to check. +void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls); + +} // namespace safe_browsing + +#endif // COMPONENTS_SAFE_BROWSING_DB_UTIL_H_ diff --git a/chromium/components/safe_browsing_db/util_unittest.cc b/chromium/components/safe_browsing_db/util_unittest.cc new file mode 100644 index 00000000000..85f32a9146d --- /dev/null +++ b/chromium/components/safe_browsing_db/util_unittest.cc @@ -0,0 +1,378 @@ +// Copyright (c) 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stddef.h> + +#include <algorithm> + +#include "base/macros.h" +#include "base/strings/stringprintf.h" +#include "components/safe_browsing_db/util.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "url/gurl.h" + +namespace safe_browsing { + +namespace { + +bool VectorContains(const std::vector<std::string>& data, + const std::string& str) { + return std::find(data.begin(), data.end(), str) != data.end(); +} + +} // namespace + +// Tests that we generate the required host/path combinations for testing +// according to the Safe Browsing spec. +// See section 6.2 in +// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. +TEST(SafeBrowsingDbUtilTest, UrlParsing) { + std::vector<std::string> hosts, paths; + + GURL url("http://a.b.c/1/2.html?param=1"); + GenerateHostsToCheck(url, &hosts); + GeneratePathsToCheck(url, &paths); + EXPECT_EQ(hosts.size(), static_cast<size_t>(2)); + EXPECT_EQ(paths.size(), static_cast<size_t>(4)); + EXPECT_EQ(hosts[0], "b.c"); + EXPECT_EQ(hosts[1], "a.b.c"); + + EXPECT_TRUE(VectorContains(paths, "/1/2.html?param=1")); + EXPECT_TRUE(VectorContains(paths, "/1/2.html")); + EXPECT_TRUE(VectorContains(paths, "/1/")); + EXPECT_TRUE(VectorContains(paths, "/")); + + url = GURL("http://a.b.c.d.e.f.g/1.html"); + GenerateHostsToCheck(url, &hosts); + GeneratePathsToCheck(url, &paths); + EXPECT_EQ(hosts.size(), static_cast<size_t>(5)); + EXPECT_EQ(paths.size(), static_cast<size_t>(2)); + EXPECT_EQ(hosts[0], "f.g"); + EXPECT_EQ(hosts[1], "e.f.g"); + EXPECT_EQ(hosts[2], "d.e.f.g"); + EXPECT_EQ(hosts[3], "c.d.e.f.g"); + EXPECT_EQ(hosts[4], "a.b.c.d.e.f.g"); + EXPECT_TRUE(VectorContains(paths, "/1.html")); + EXPECT_TRUE(VectorContains(paths, "/")); + + url = GURL("http://a.b/saw-cgi/eBayISAPI.dll/"); + GeneratePathsToCheck(url, &paths); + EXPECT_EQ(paths.size(), static_cast<size_t>(3)); + EXPECT_TRUE(VectorContains(paths, "/saw-cgi/eBayISAPI.dll/")); + EXPECT_TRUE(VectorContains(paths, "/saw-cgi/")); + EXPECT_TRUE(VectorContains(paths, "/")); +} + +// Tests the url canonicalization according to the Safe Browsing spec. +// See section 6.1 in +// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. +TEST(SafeBrowsingDbUtilTest, CanonicalizeUrl) { + struct { + const char* input_url; + const char* expected_canonicalized_hostname; + const char* expected_canonicalized_path; + const char* expected_canonicalized_query; + } tests[] = { + { + "http://host/%25%32%35", + "host", + "/%25", + "" + }, { + "http://host/%25%32%35%25%32%35", + "host", + "/%25%25", + "" + }, { + "http://host/%2525252525252525", + "host", + "/%25", + "" + }, { + "http://host/asdf%25%32%35asd", + "host", + "/asdf%25asd", + "" + }, { + "http://host/%%%25%32%35asd%%", + "host", + "/%25%25%25asd%25%25", + "" + }, { + "http://host/%%%25%32%35asd%%", + "host", + "/%25%25%25asd%25%25", + "" + }, { + "http://www.google.com/", + "www.google.com", + "/", + "" + }, { + "http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77" + "%77%77%2E%65%62%61%79%2E%63%6F%6D/", + "168.188.99.26", + "/.secure/www.ebay.com/", + "" + }, { + "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserd" + "ataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/", + "195.127.0.11", + "/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmv" + "alidateinfoswqpcmlx=hgplmcx/", + "" + }, { + "http://host.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A" + "22%252833%252944_55%252B", + "host.com", + "/~a!b@c%23d$e%25f^00&11*22(33)44_55+", + "" + }, { + "http://3279880203/blah", + "195.127.0.11", + "/blah", + "" + }, { + "http://www.google.com/blah/..", + "www.google.com", + "/", + "" + }, { + "http://www.google.com/blah#fraq", + "www.google.com", + "/blah", + "" + }, { + "http://www.GOOgle.com/", + "www.google.com", + "/", + "" + }, { + "http://www.google.com.../", + "www.google.com", + "/", + "" + }, { + "http://www.google.com/q?", + "www.google.com", + "/q", + "" + }, { + "http://www.google.com/q?r?", + "www.google.com", + "/q", + "r?" + }, { + "http://www.google.com/q?r?s", + "www.google.com", + "/q", + "r?s" + }, { + "http://evil.com/foo#bar#baz", + "evil.com", + "/foo", + "" + }, { + "http://evil.com/foo;", + "evil.com", + "/foo;", + "" + }, { + "http://evil.com/foo?bar;", + "evil.com", + "/foo", + "bar;" + }, { + "http://notrailingslash.com", + "notrailingslash.com", + "/", + "" + }, { + "http://www.gotaport.com:1234/", + "www.gotaport.com", + "/", + "" + }, { + " http://www.google.com/ ", + "www.google.com", + "/", + "" + }, { + "http:// leadingspace.com/", + "%20leadingspace.com", + "/", + "" + }, { + "http://%20leadingspace.com/", + "%20leadingspace.com", + "/", + "" + }, { + "https://www.securesite.com/", + "www.securesite.com", + "/", + "" + }, { + "http://host.com/ab%23cd", + "host.com", + "/ab%23cd", + "" + }, { + "http://host%3e.com//twoslashes?more//slashes", + "host>.com", + "/twoslashes", + "more//slashes" + }, { + "http://host.com/abc?val=xyz#anything", + "host.com", + "/abc", + "val=xyz" + }, { + "http://abc:def@host.com/xyz", + "host.com", + "/xyz", + "" + }, { + "http://host%3e.com/abc/%2e%2e%2fdef", + "host>.com", + "/def", + "" + }, { + "http://.......host...com.....//abc/////def%2F%2F%2Fxyz", + "host.com", + "/abc/def/xyz", + "" + }, { + "ftp://host.com/foo?bar", + "host.com", + "/foo", + "bar" + }, { + "data:text/html;charset=utf-8,%0D%0A", + "", + "", + "" + }, { + "javascript:alert()", + "", + "", + "" + }, { + "mailto:abc@example.com", + "", + "", + "" + }, + }; + for (size_t i = 0; i < arraysize(tests); ++i) { + SCOPED_TRACE(base::StringPrintf("Test: %s", tests[i].input_url)); + GURL url(tests[i].input_url); + + std::string canonicalized_hostname; + std::string canonicalized_path; + std::string canonicalized_query; + CanonicalizeUrl(url, &canonicalized_hostname, &canonicalized_path, + &canonicalized_query); + + EXPECT_EQ(tests[i].expected_canonicalized_hostname, canonicalized_hostname); + EXPECT_EQ(tests[i].expected_canonicalized_path, canonicalized_path); + EXPECT_EQ(tests[i].expected_canonicalized_query, canonicalized_query); + } +} + +TEST(SafeBrowsingDbUtilTest, UrlToFullHashes) { + std::vector<SBFullHash> results; + GURL url("http://www.evil.com/evil1/evilness.html"); + UrlToFullHashes(url, false, &results); + + EXPECT_EQ(6UL, results.size()); + EXPECT_TRUE(SBFullHashEqual(SBFullHashForString("evil.com/"), + results[0])); + EXPECT_TRUE(SBFullHashEqual(SBFullHashForString("evil.com/evil1/"), + results[1])); + EXPECT_TRUE(SBFullHashEqual( + SBFullHashForString("evil.com/evil1/evilness.html"), results[2])); + EXPECT_TRUE(SBFullHashEqual(SBFullHashForString("www.evil.com/"), + results[3])); + EXPECT_TRUE(SBFullHashEqual(SBFullHashForString("www.evil.com/evil1/"), + results[4])); + EXPECT_TRUE(SBFullHashEqual( + SBFullHashForString("www.evil.com/evil1/evilness.html"), results[5])); + + results.clear(); + GURL url2("http://www.evil.com/evil1/evilness.html"); + UrlToFullHashes(url2, true, &results); + + EXPECT_EQ(8UL, results.size()); + EXPECT_TRUE(SBFullHashEqual(SBFullHashForString("evil.com/"), + results[0])); + EXPECT_TRUE(SBFullHashEqual(SBFullHashForString("evil.com/evil1/"), + results[1])); + EXPECT_TRUE(SBFullHashEqual(SBFullHashForString("evil.com/evil1"), + results[2])); + EXPECT_TRUE(SBFullHashEqual( + SBFullHashForString("evil.com/evil1/evilness.html"), results[3])); + EXPECT_TRUE(SBFullHashEqual(SBFullHashForString("www.evil.com/"), + results[4])); + EXPECT_TRUE(SBFullHashEqual(SBFullHashForString("www.evil.com/evil1/"), + results[5])); + EXPECT_TRUE(SBFullHashEqual(SBFullHashForString("www.evil.com/evil1"), + results[6])); + EXPECT_TRUE(SBFullHashEqual( + SBFullHashForString("www.evil.com/evil1/evilness.html"), results[7])); +} + +TEST(SafeBrowsingDbUtilTest, ListIdListNameConversion) { + std::string list_name; + EXPECT_FALSE(GetListName(INVALID, &list_name)); + EXPECT_TRUE(GetListName(MALWARE, &list_name)); + EXPECT_EQ(list_name, std::string(kMalwareList)); + EXPECT_EQ(MALWARE, GetListId(list_name)); + + EXPECT_TRUE(GetListName(PHISH, &list_name)); + EXPECT_EQ(list_name, std::string(kPhishingList)); + EXPECT_EQ(PHISH, GetListId(list_name)); + + EXPECT_TRUE(GetListName(BINURL, &list_name)); + EXPECT_EQ(list_name, std::string(kBinUrlList)); + EXPECT_EQ(BINURL, GetListId(list_name)); +} + +// Since the ids are saved in file, we need to make sure they don't change. +// Since only the last bit of each id is saved in file together with +// chunkids, this checks only last bit. +TEST(SafeBrowsingDbUtilTest, ListIdVerification) { + EXPECT_EQ(0, MALWARE % 2); + EXPECT_EQ(1, PHISH % 2); + EXPECT_EQ(0, BINURL % 2); +} + +TEST(SafeBrowsingDbUtilTest, StringToSBFullHashAndSBFullHashToString) { + // 31 chars plus the last \0 as full_hash. + const std::string hash_in = "12345678902234567890323456789012"; + SBFullHash hash_out = StringToSBFullHash(hash_in); + EXPECT_EQ(0x34333231U, hash_out.prefix); + EXPECT_EQ(0, memcmp(hash_in.data(), hash_out.full_hash, sizeof(SBFullHash))); + + std::string hash_final = SBFullHashToString(hash_out); + EXPECT_EQ(hash_in, hash_final); +} + +TEST(SafeBrowsingDbUtilTest, FullHashOperators) { + const SBFullHash kHash1 = SBFullHashForString("one"); + const SBFullHash kHash2 = SBFullHashForString("two"); + + EXPECT_TRUE(SBFullHashEqual(kHash1, kHash1)); + EXPECT_TRUE(SBFullHashEqual(kHash2, kHash2)); + EXPECT_FALSE(SBFullHashEqual(kHash1, kHash2)); + EXPECT_FALSE(SBFullHashEqual(kHash2, kHash1)); + + EXPECT_FALSE(SBFullHashLess(kHash1, kHash2)); + EXPECT_TRUE(SBFullHashLess(kHash2, kHash1)); + + EXPECT_FALSE(SBFullHashLess(kHash1, kHash1)); + EXPECT_FALSE(SBFullHashLess(kHash2, kHash2)); +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/v4_get_hash_protocol_manager.cc b/chromium/components/safe_browsing_db/v4_get_hash_protocol_manager.cc new file mode 100644 index 00000000000..b0a30bcd814 --- /dev/null +++ b/chromium/components/safe_browsing_db/v4_get_hash_protocol_manager.cc @@ -0,0 +1,348 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/safe_browsing_db/v4_get_hash_protocol_manager.h" + +#include <utility> + +#include "base/base64.h" +#include "base/macros.h" +#include "base/metrics/histogram_macros.h" +#include "base/timer/timer.h" +#include "net/base/load_flags.h" +#include "net/http/http_response_headers.h" +#include "net/http/http_status_code.h" +#include "net/url_request/url_fetcher.h" +#include "net/url_request/url_request_context_getter.h" + +using base::Time; +using base::TimeDelta; + +namespace { + +// Enumerate parsing failures for histogramming purposes. DO NOT CHANGE +// THE ORDERING OF THESE VALUES. +enum ParseResultType { + // Error parsing the protocol buffer from a string. + PARSE_FROM_STRING_ERROR = 0, + + // A match in the response had an unexpected THREAT_ENTRY_TYPE. + UNEXPECTED_THREAT_ENTRY_TYPE_ERROR = 1, + + // A match in the response had an unexpected THREAT_TYPE. + UNEXPECTED_THREAT_TYPE_ERROR = 2, + + // A match in the response had an unexpected PLATFORM_TYPE. + UNEXPECTED_PLATFORM_TYPE_ERROR = 3, + + // A match in the response contained no metadata where metadata was + // expected. + NO_METADATA_ERROR = 4, + + // A match in the response contained a ThreatType that was inconsistent + // with the other matches. + INCONSISTENT_THREAT_TYPE_ERROR = 5, + + // Memory space for histograms is determined by the max. ALWAYS + // ADD NEW VALUES BEFORE THIS ONE. + PARSE_RESULT_TYPE_MAX = 6 +}; + +// Record parsing errors of a GetHash result. +void RecordParseGetHashResult(ParseResultType result_type) { + UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.ParseV4HashResult", result_type, + PARSE_RESULT_TYPE_MAX); +} + +// Record a GetHash result. +void RecordGetHashResult(safe_browsing::V4OperationResult result) { + UMA_HISTOGRAM_ENUMERATION( + "SafeBrowsing.GetV4HashResult", result, + safe_browsing::V4OperationResult::OPERATION_RESULT_MAX); +} + +} // namespace + +namespace safe_browsing { + +const char kUmaV4HashResponseMetricName[] = + "SafeBrowsing.GetV4HashHttpResponseOrErrorCode"; + +// The default V4GetHashProtocolManagerFactory. +class V4GetHashProtocolManagerFactoryImpl + : public V4GetHashProtocolManagerFactory { + public: + V4GetHashProtocolManagerFactoryImpl() {} + ~V4GetHashProtocolManagerFactoryImpl() override {} + V4GetHashProtocolManager* CreateProtocolManager( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config) override { + return new V4GetHashProtocolManager(request_context_getter, config); + } + + private: + DISALLOW_COPY_AND_ASSIGN(V4GetHashProtocolManagerFactoryImpl); +}; + +// V4GetHashProtocolManager implementation -------------------------------- + +// static +V4GetHashProtocolManagerFactory* V4GetHashProtocolManager::factory_ = NULL; + +// static +V4GetHashProtocolManager* V4GetHashProtocolManager::Create( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config) { + if (!factory_) + factory_ = new V4GetHashProtocolManagerFactoryImpl(); + return factory_->CreateProtocolManager(request_context_getter, config); +} + +void V4GetHashProtocolManager::ResetGetHashErrors() { + gethash_error_count_ = 0; + gethash_back_off_mult_ = 1; +} + +V4GetHashProtocolManager::V4GetHashProtocolManager( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config) + : gethash_error_count_(0), + gethash_back_off_mult_(1), + next_gethash_time_(Time::FromDoubleT(0)), + config_(config), + request_context_getter_(request_context_getter), + url_fetcher_id_(0) { +} + +V4GetHashProtocolManager::~V4GetHashProtocolManager() { + // Delete in-progress SafeBrowsing requests. + STLDeleteContainerPairFirstPointers(hash_requests_.begin(), + hash_requests_.end()); + hash_requests_.clear(); +} + +std::string V4GetHashProtocolManager::GetHashRequest( + const std::vector<SBPrefix>& prefixes, + const std::vector<PlatformType>& platforms, + ThreatType threat_type) { + // Build the request. Client info and client states are not added to the + // request protocol buffer. Client info is passed as params in the url. + FindFullHashesRequest req; + ThreatInfo* info = req.mutable_threat_info(); + info->add_threat_types(threat_type); + info->add_threat_entry_types(URL_EXPRESSION); + for (const PlatformType p : platforms) { + info->add_platform_types(p); + } + for (const SBPrefix& prefix : prefixes) { + std::string hash(reinterpret_cast<const char*>(&prefix), sizeof(SBPrefix)); + info->add_threat_entries()->set_hash(hash); + } + + // Serialize and Base64 encode. + std::string req_data, req_base64; + req.SerializeToString(&req_data); + base::Base64Encode(req_data, &req_base64); + + return req_base64; +} + +bool V4GetHashProtocolManager::ParseHashResponse( + const std::string& data, + std::vector<SBFullHashResult>* full_hashes, + base::TimeDelta* negative_cache_duration) { + FindFullHashesResponse response; + + if (!response.ParseFromString(data)) { + RecordParseGetHashResult(PARSE_FROM_STRING_ERROR); + return false; + } + + if (response.has_negative_cache_duration()) { + // Seconds resolution is good enough so we ignore the nanos field. + *negative_cache_duration = base::TimeDelta::FromSeconds( + response.negative_cache_duration().seconds()); + } + + if (response.has_minimum_wait_duration()) { + // Seconds resolution is good enough so we ignore the nanos field. + next_gethash_time_ = + Time::Now() + base::TimeDelta::FromSeconds( + response.minimum_wait_duration().seconds()); + } + + // We only expect one threat type per request, so we make sure + // the threat types are consistent between matches. + ThreatType expected_threat_type = THREAT_TYPE_UNSPECIFIED; + + // Loop over the threat matches and fill in full_hashes. + for (const ThreatMatch& match : response.matches()) { + // Make sure the platform and threat entry type match. + if (!(match.has_threat_entry_type() && + match.threat_entry_type() == URL_EXPRESSION && match.has_threat())) { + RecordParseGetHashResult(UNEXPECTED_THREAT_ENTRY_TYPE_ERROR); + return false; + } + + if (!match.has_threat_type()) { + RecordParseGetHashResult(UNEXPECTED_THREAT_TYPE_ERROR); + return false; + } + + if (expected_threat_type == THREAT_TYPE_UNSPECIFIED) { + expected_threat_type = match.threat_type(); + } else if (match.threat_type() != expected_threat_type) { + RecordParseGetHashResult(INCONSISTENT_THREAT_TYPE_ERROR); + return false; + } + + // Fill in the full hash. + SBFullHashResult result; + result.hash = StringToSBFullHash(match.threat().hash()); + + if (match.has_cache_duration()) { + // Seconds resolution is good enough so we ignore the nanos field. + result.cache_duration = + base::TimeDelta::FromSeconds(match.cache_duration().seconds()); + } + + // Different threat types will handle the metadata differently. + if (match.threat_type() == API_ABUSE) { + if (match.has_platform_type() && + match.platform_type() == CHROME_PLATFORM) { + if (match.has_threat_entry_metadata()) { + // For API Abuse, store a list of the returned permissions. + for (const ThreatEntryMetadata::MetadataEntry& m : + match.threat_entry_metadata().entries()) { + if (m.key() == "permission") { + result.metadata.api_permissions.push_back(m.value()); + } + } + } else { + RecordParseGetHashResult(NO_METADATA_ERROR); + return false; + } + } else { + RecordParseGetHashResult(UNEXPECTED_PLATFORM_TYPE_ERROR); + return false; + } + } else { + RecordParseGetHashResult(UNEXPECTED_THREAT_TYPE_ERROR); + return false; + } + + full_hashes->push_back(result); + } + return true; +} + +void V4GetHashProtocolManager::GetFullHashes( + const std::vector<SBPrefix>& prefixes, + const std::vector<PlatformType>& platforms, + ThreatType threat_type, + FullHashCallback callback) { + DCHECK(CalledOnValidThread()); + // We need to wait the minimum waiting duration, and if we are in backoff, + // we need to check if we're past the next allowed time. If we are, we can + // proceed with the request. If not, we are required to return empty results + // (i.e. treat the page as safe). + if (Time::Now() <= next_gethash_time_) { + if (gethash_error_count_) { + RecordGetHashResult(V4OperationResult::BACKOFF_ERROR); + } else { + RecordGetHashResult(V4OperationResult::MIN_WAIT_DURATION_ERROR); + } + std::vector<SBFullHashResult> full_hashes; + callback.Run(full_hashes, base::TimeDelta()); + return; + } + + std::string req_base64 = GetHashRequest(prefixes, platforms, threat_type); + GURL gethash_url = GetHashUrl(req_base64); + + net::URLFetcher* fetcher = + net::URLFetcher::Create(url_fetcher_id_++, gethash_url, + net::URLFetcher::GET, this) + .release(); + hash_requests_[fetcher] = callback; + + fetcher->SetLoadFlags(net::LOAD_DISABLE_CACHE); + fetcher->SetRequestContext(request_context_getter_.get()); + fetcher->Start(); +} + +void V4GetHashProtocolManager::GetFullHashesWithApis( + const std::vector<SBPrefix>& prefixes, + FullHashCallback callback) { + std::vector<PlatformType> platform = {CHROME_PLATFORM}; + GetFullHashes(prefixes, platform, API_ABUSE, callback); +} + +// net::URLFetcherDelegate implementation ---------------------------------- + +// SafeBrowsing request responses are handled here. +void V4GetHashProtocolManager::OnURLFetchComplete( + const net::URLFetcher* source) { + DCHECK(CalledOnValidThread()); + + HashRequests::iterator it = hash_requests_.find(source); + DCHECK(it != hash_requests_.end()) << "Request not found"; + + // FindFullHashes response. + // Reset the scoped pointer so the fetcher gets destroyed properly. + scoped_ptr<const net::URLFetcher> fetcher(it->first); + + int response_code = source->GetResponseCode(); + net::URLRequestStatus status = source->GetStatus(); + V4ProtocolManagerUtil::RecordHttpResponseOrErrorCode( + kUmaV4HashResponseMetricName, status, response_code); + + const FullHashCallback& callback = it->second; + std::vector<SBFullHashResult> full_hashes; + base::TimeDelta negative_cache_duration; + if (status.is_success() && response_code == net::HTTP_OK) { + RecordGetHashResult(V4OperationResult::STATUS_200); + ResetGetHashErrors(); + std::string data; + source->GetResponseAsString(&data); + if (!ParseHashResponse(data, &full_hashes, &negative_cache_duration)) { + full_hashes.clear(); + RecordGetHashResult(V4OperationResult::PARSE_ERROR); + } + } else { + HandleGetHashError(Time::Now()); + + DVLOG(1) << "SafeBrowsing GetEncodedFullHashes request for: " + << source->GetURL() << " failed with error: " << status.error() + << " and response code: " << response_code; + + if (status.status() == net::URLRequestStatus::FAILED) { + RecordGetHashResult(V4OperationResult::NETWORK_ERROR); + } else { + RecordGetHashResult(V4OperationResult::HTTP_ERROR); + } + } + + // Invoke the callback with full_hashes, even if there was a parse error or + // an error response code (in which case full_hashes will be empty). The + // caller can't be blocked indefinitely. + callback.Run(full_hashes, negative_cache_duration); + + hash_requests_.erase(it); +} + +void V4GetHashProtocolManager::HandleGetHashError(const Time& now) { + DCHECK(CalledOnValidThread()); + base::TimeDelta next = V4ProtocolManagerUtil::GetNextBackOffInterval( + &gethash_error_count_, &gethash_back_off_mult_); + next_gethash_time_ = now + next; +} + +GURL V4GetHashProtocolManager::GetHashUrl(const std::string& req_base64) const { + return V4ProtocolManagerUtil::GetRequestUrl(req_base64, "encodedFullHashes", + config_); +} + + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/v4_get_hash_protocol_manager.h b/chromium/components/safe_browsing_db/v4_get_hash_protocol_manager.h new file mode 100644 index 00000000000..407a7b6e46d --- /dev/null +++ b/chromium/components/safe_browsing_db/v4_get_hash_protocol_manager.h @@ -0,0 +1,187 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_SAFE_BROWSING_DB_V4_GET_HASH_PROTOCOL_MANAGER_H_ +#define COMPONENTS_SAFE_BROWSING_DB_V4_GET_HASH_PROTOCOL_MANAGER_H_ + +// A class that implements Chrome's interface with the SafeBrowsing V4 protocol. +// +// The V4GetHashProtocolManager handles formatting and making requests of, and +// handling responses from, Google's SafeBrowsing servers. The purpose of this +// class is to get full hash matches from the SB server for the given set of +// hash prefixes. + +#include <string> +#include <vector> + +#include "base/gtest_prod_util.h" +#include "base/macros.h" +#include "base/memory/scoped_ptr.h" +#include "base/threading/non_thread_safe.h" +#include "base/time/time.h" +#include "base/timer/timer.h" +#include "components/safe_browsing_db/safebrowsing.pb.h" +#include "components/safe_browsing_db/util.h" +#include "components/safe_browsing_db/v4_protocol_manager_util.h" +#include "net/url_request/url_fetcher_delegate.h" +#include "url/gurl.h" + +namespace net { +class URLFetcher; +class URLRequestContextGetter; +} // namespace net + +namespace safe_browsing { + +class V4GetHashProtocolManagerFactory; + +class V4GetHashProtocolManager : public net::URLFetcherDelegate, + public base::NonThreadSafe { + public: + // FullHashCallback is invoked when GetFullHashes completes. + // Parameters: + // - The vector of full hash results. If empty, indicates that there + // were no matches, and that the resource is safe. + // - The negative cache duration of the result. + typedef base::Callback<void(const std::vector<SBFullHashResult>&, + const base::TimeDelta&)> + FullHashCallback; + + ~V4GetHashProtocolManager() override; + + // Makes the passed |factory| the factory used to instantiate + // a V4GetHashProtocolManager. Useful for tests. + static void RegisterFactory(V4GetHashProtocolManagerFactory* factory) { + factory_ = factory; + } + + // Create an instance of the safe browsing v4 protocol manager. + static V4GetHashProtocolManager* Create( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config); + + // net::URLFetcherDelegate interface. + void OnURLFetchComplete(const net::URLFetcher* source) override; + + // Retrieve the full hash for a set of prefixes, and invoke the callback + // argument when the results are retrieved. The callback may be invoked + // synchronously. + virtual void GetFullHashes(const std::vector<SBPrefix>& prefixes, + const std::vector<PlatformType>& platforms, + ThreatType threat_type, + FullHashCallback callback); + + // Retrieve the full hash and API metadata for a set of prefixes, and invoke + // the callback argument when the results are retrieved. The callback may be + // invoked synchronously. + virtual void GetFullHashesWithApis(const std::vector<SBPrefix>& prefixes, + FullHashCallback callback); + + protected: + // Constructs a V4GetHashProtocolManager that issues + // network requests using |request_context_getter|. + V4GetHashProtocolManager( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config); + + private: + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingV4GetHashProtocolManagerTest, + TestGetHashRequest); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingV4GetHashProtocolManagerTest, + TestParseHashResponse); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingV4GetHashProtocolManagerTest, + TestParseHashResponseWrongThreatEntryType); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingV4GetHashProtocolManagerTest, + TestParseHashResponseSocialEngineeringThreatType); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingV4GetHashProtocolManagerTest, + TestParseHashResponseNonPermissionMetadata); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingV4GetHashProtocolManagerTest, + TestParseHashResponseInconsistentThreatTypes); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingV4GetHashProtocolManagerTest, + TestGetHashErrorHandlingOK); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingV4GetHashProtocolManagerTest, + TestGetHashErrorHandlingNetwork); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingV4GetHashProtocolManagerTest, + TestGetHashErrorHandlingResponseCode); + friend class V4GetHashProtocolManagerFactoryImpl; + + GURL GetHashUrl(const std::string& request_base64) const; + + // Fills a FindFullHashesRequest protocol buffer for a request. + // Returns the serialized and base 64 encoded request as a string. + std::string GetHashRequest(const std::vector<SBPrefix>& prefixes, + const std::vector<PlatformType>& platforms, + ThreatType threat_type); + + // Parses a FindFullHashesResponse protocol buffer and fills the results in + // |full_hashes| and |negative_cache_duration|. |data| is a serialized + // FindFullHashes protocol buffer. |negative_cache_duration| is the duration + // to cache the response for entities that did not match the threat list. + // Returns true if parsing is successful, false otherwise. + bool ParseHashResponse(const std::string& data_base64, + std::vector<SBFullHashResult>* full_hashes, + base::TimeDelta* negative_cache_duration); + + // Resets the gethash error counter and multiplier. + void ResetGetHashErrors(); + + // Updates internal state for each GetHash response error, assuming that + // the current time is |now|. + void HandleGetHashError(const base::Time& now); + + private: + // Map of GetHash requests to parameters which created it. + typedef base::hash_map<const net::URLFetcher*, FullHashCallback> HashRequests; + + // The factory that controls the creation of V4GetHashProtocolManager. + // This is used by tests. + static V4GetHashProtocolManagerFactory* factory_; + + // Current active request (in case we need to cancel) for updates or chunks + // from the SafeBrowsing service. We can only have one of these outstanding + // at any given time unlike GetHash requests, which are tracked separately. + scoped_ptr<net::URLFetcher> request_; + + // The number of HTTP response errors since the the last successful HTTP + // response, used for request backoff timing. + size_t gethash_error_count_; + + // Multiplier for the backoff error after the second. + size_t gethash_back_off_mult_; + + HashRequests hash_requests_; + + // For v4, the next gethash time is set to the backoff time is the last + // response was an error, or the minimum wait time if the last response was + // successful. + base::Time next_gethash_time_; + + // The config of the client making Pver4 requests. + const V4ProtocolConfig config_; + + // The context we use to issue network requests. + scoped_refptr<net::URLRequestContextGetter> request_context_getter_; + + // ID for URLFetchers for testing. + int url_fetcher_id_; + + DISALLOW_COPY_AND_ASSIGN(V4GetHashProtocolManager); +}; + +// Interface of a factory to create V4GetHashProtocolManager. Useful for tests. +class V4GetHashProtocolManagerFactory { + public: + V4GetHashProtocolManagerFactory() {} + virtual ~V4GetHashProtocolManagerFactory() {} + virtual V4GetHashProtocolManager* CreateProtocolManager( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config) = 0; + + private: + DISALLOW_COPY_AND_ASSIGN(V4GetHashProtocolManagerFactory); +}; + +} // namespace safe_browsing + +#endif // COMPONENTS_SAFE_BROWSING_DB_V4_GET_HASH_PROTOCOL_MANAGER_H_ diff --git a/chromium/components/safe_browsing_db/v4_get_hash_protocol_manager_unittest.cc b/chromium/components/safe_browsing_db/v4_get_hash_protocol_manager_unittest.cc new file mode 100644 index 00000000000..ba766378d1d --- /dev/null +++ b/chromium/components/safe_browsing_db/v4_get_hash_protocol_manager_unittest.cc @@ -0,0 +1,354 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <vector> + +#include "base/base64.h" +#include "base/memory/scoped_ptr.h" +#include "base/strings/stringprintf.h" +#include "base/time/time.h" +#include "components/safe_browsing_db/safebrowsing.pb.h" +#include "components/safe_browsing_db/testing_util.h" +#include "components/safe_browsing_db/util.h" +#include "components/safe_browsing_db/v4_get_hash_protocol_manager.h" +#include "net/base/escape.h" +#include "net/base/load_flags.h" +#include "net/base/net_errors.h" +#include "net/url_request/test_url_fetcher_factory.h" +#include "testing/gtest/include/gtest/gtest.h" + +using base::Time; +using base::TimeDelta; + +namespace { + +const char kClient[] = "unittest"; +const char kAppVer[] = "1.0"; +const char kKeyParam[] = "test_key_param"; + +} // namespace + +namespace safe_browsing { + +class SafeBrowsingV4GetHashProtocolManagerTest : public testing::Test { + protected: + scoped_ptr<V4GetHashProtocolManager> CreateProtocolManager() { + V4ProtocolConfig config; + config.client_name = kClient; + config.version = kAppVer; + config.key_param = kKeyParam; + return scoped_ptr<V4GetHashProtocolManager>( + V4GetHashProtocolManager::Create(NULL, config)); + } + + std::string GetStockV4HashResponse() { + FindFullHashesResponse res; + res.mutable_negative_cache_duration()->set_seconds(600); + ThreatMatch* m = res.add_matches(); + m->set_threat_type(API_ABUSE); + m->set_platform_type(CHROME_PLATFORM); + m->set_threat_entry_type(URL_EXPRESSION); + m->mutable_cache_duration()->set_seconds(300); + m->mutable_threat()->set_hash( + SBFullHashToString(SBFullHashForString("Everything's shiny, Cap'n."))); + ThreatEntryMetadata::MetadataEntry* e = + m->mutable_threat_entry_metadata()->add_entries(); + e->set_key("permission"); + e->set_value("NOTIFICATIONS"); + + // Serialize. + std::string res_data; + res.SerializeToString(&res_data); + + return res_data; + } +}; + +void ValidateGetV4HashResults( + const std::vector<SBFullHashResult>& expected_full_hashes, + const base::TimeDelta& expected_cache_duration, + const std::vector<SBFullHashResult>& full_hashes, + const base::TimeDelta& cache_duration) { + EXPECT_EQ(expected_cache_duration, cache_duration); + ASSERT_EQ(expected_full_hashes.size(), full_hashes.size()); + + for (unsigned int i = 0; i < expected_full_hashes.size(); ++i) { + const SBFullHashResult& expected = expected_full_hashes[i]; + const SBFullHashResult& actual = full_hashes[i]; + EXPECT_TRUE(SBFullHashEqual(expected.hash, actual.hash)); + EXPECT_EQ(expected.metadata, actual.metadata); + EXPECT_EQ(expected.cache_duration, actual.cache_duration); + } +} + +TEST_F(SafeBrowsingV4GetHashProtocolManagerTest, + TestGetHashErrorHandlingNetwork) { + net::TestURLFetcherFactory factory; + scoped_ptr<V4GetHashProtocolManager> pm(CreateProtocolManager()); + + std::vector<SBPrefix> prefixes; + std::vector<SBFullHashResult> expected_full_hashes; + base::TimeDelta expected_cache_duration; + + pm->GetFullHashesWithApis( + prefixes, base::Bind(&ValidateGetV4HashResults, expected_full_hashes, + expected_cache_duration)); + + net::TestURLFetcher* fetcher = factory.GetFetcherByID(0); + DCHECK(fetcher); + // Failed request status should result in error. + fetcher->set_status(net::URLRequestStatus(net::URLRequestStatus::FAILED, + net::ERR_CONNECTION_RESET)); + fetcher->set_response_code(200); + fetcher->SetResponseString(GetStockV4HashResponse()); + fetcher->delegate()->OnURLFetchComplete(fetcher); + + // Should have recorded one error, but back off multiplier is unchanged. + EXPECT_EQ(1ul, pm->gethash_error_count_); + EXPECT_EQ(1ul, pm->gethash_back_off_mult_); +} + +TEST_F(SafeBrowsingV4GetHashProtocolManagerTest, + TestGetHashErrorHandlingResponseCode) { + net::TestURLFetcherFactory factory; + scoped_ptr<V4GetHashProtocolManager> pm(CreateProtocolManager()); + + std::vector<SBPrefix> prefixes; + std::vector<SBFullHashResult> expected_full_hashes; + base::TimeDelta expected_cache_duration; + + pm->GetFullHashesWithApis( + prefixes, base::Bind(&ValidateGetV4HashResults, expected_full_hashes, + expected_cache_duration)); + + net::TestURLFetcher* fetcher = factory.GetFetcherByID(0); + DCHECK(fetcher); + fetcher->set_status(net::URLRequestStatus()); + // Response code of anything other than 200 should result in error. + fetcher->set_response_code(204); + fetcher->SetResponseString(GetStockV4HashResponse()); + fetcher->delegate()->OnURLFetchComplete(fetcher); + + // Should have recorded one error, but back off multiplier is unchanged. + EXPECT_EQ(1ul, pm->gethash_error_count_); + EXPECT_EQ(1ul, pm->gethash_back_off_mult_); +} + +TEST_F(SafeBrowsingV4GetHashProtocolManagerTest, TestGetHashErrorHandlingOK) { + net::TestURLFetcherFactory factory; + scoped_ptr<V4GetHashProtocolManager> pm(CreateProtocolManager()); + + std::vector<SBPrefix> prefixes; + std::vector<SBFullHashResult> expected_full_hashes; + SBFullHashResult hash_result; + hash_result.hash = SBFullHashForString("Everything's shiny, Cap'n."); + hash_result.metadata.api_permissions.push_back("NOTIFICATIONS"); + hash_result.cache_duration = base::TimeDelta::FromSeconds(300); + expected_full_hashes.push_back(hash_result); + base::TimeDelta expected_cache_duration = base::TimeDelta::FromSeconds(600); + + pm->GetFullHashesWithApis( + prefixes, base::Bind(&ValidateGetV4HashResults, expected_full_hashes, + expected_cache_duration)); + + net::TestURLFetcher* fetcher = factory.GetFetcherByID(0); + DCHECK(fetcher); + fetcher->set_status(net::URLRequestStatus()); + fetcher->set_response_code(200); + fetcher->SetResponseString(GetStockV4HashResponse()); + fetcher->delegate()->OnURLFetchComplete(fetcher); + + // No error, back off multiplier is unchanged. + EXPECT_EQ(0ul, pm->gethash_error_count_); + EXPECT_EQ(1ul, pm->gethash_back_off_mult_); +} + +TEST_F(SafeBrowsingV4GetHashProtocolManagerTest, TestGetHashRequest) { + scoped_ptr<V4GetHashProtocolManager> pm(CreateProtocolManager()); + + FindFullHashesRequest req; + ThreatInfo* info = req.mutable_threat_info(); + info->add_threat_types(API_ABUSE); + info->add_platform_types(CHROME_PLATFORM); + info->add_threat_entry_types(URL_EXPRESSION); + + SBPrefix one = 1u; + SBPrefix two = 2u; + SBPrefix three = 3u; + std::string hash(reinterpret_cast<const char*>(&one), sizeof(SBPrefix)); + info->add_threat_entries()->set_hash(hash); + hash.clear(); + hash.append(reinterpret_cast<const char*>(&two), sizeof(SBPrefix)); + info->add_threat_entries()->set_hash(hash); + hash.clear(); + hash.append(reinterpret_cast<const char*>(&three), sizeof(SBPrefix)); + info->add_threat_entries()->set_hash(hash); + + // Serialize and Base64 encode. + std::string req_data, req_base64; + req.SerializeToString(&req_data); + base::Base64Encode(req_data, &req_base64); + + std::vector<PlatformType> platform; + platform.push_back(CHROME_PLATFORM); + std::vector<SBPrefix> prefixes; + prefixes.push_back(one); + prefixes.push_back(two); + prefixes.push_back(three); + EXPECT_EQ(req_base64, pm->GetHashRequest(prefixes, platform, API_ABUSE)); +} + +TEST_F(SafeBrowsingV4GetHashProtocolManagerTest, TestParseHashResponse) { + scoped_ptr<V4GetHashProtocolManager> pm(CreateProtocolManager()); + + FindFullHashesResponse res; + res.mutable_negative_cache_duration()->set_seconds(600); + res.mutable_minimum_wait_duration()->set_seconds(400); + ThreatMatch* m = res.add_matches(); + m->set_threat_type(API_ABUSE); + m->set_platform_type(CHROME_PLATFORM); + m->set_threat_entry_type(URL_EXPRESSION); + m->mutable_cache_duration()->set_seconds(300); + m->mutable_threat()->set_hash( + SBFullHashToString(SBFullHashForString("Everything's shiny, Cap'n."))); + ThreatEntryMetadata::MetadataEntry* e = + m->mutable_threat_entry_metadata()->add_entries(); + e->set_key("permission"); + e->set_value("NOTIFICATIONS"); + + // Serialize. + std::string res_data; + res.SerializeToString(&res_data); + + Time now = Time::Now(); + std::vector<SBFullHashResult> full_hashes; + base::TimeDelta cache_lifetime; + EXPECT_TRUE(pm->ParseHashResponse(res_data, &full_hashes, &cache_lifetime)); + + EXPECT_EQ(base::TimeDelta::FromSeconds(600), cache_lifetime); + EXPECT_EQ(1ul, full_hashes.size()); + EXPECT_TRUE(SBFullHashEqual(SBFullHashForString("Everything's shiny, Cap'n."), + full_hashes[0].hash)); + EXPECT_EQ(1ul, full_hashes[0].metadata.api_permissions.size()); + EXPECT_EQ("NOTIFICATIONS", full_hashes[0].metadata.api_permissions[0]); + EXPECT_EQ(base::TimeDelta::FromSeconds(300), full_hashes[0].cache_duration); + EXPECT_LE(now + base::TimeDelta::FromSeconds(400), pm->next_gethash_time_); +} + +// Adds an entry with an ignored ThreatEntryType. +TEST_F(SafeBrowsingV4GetHashProtocolManagerTest, + TestParseHashResponseWrongThreatEntryType) { + scoped_ptr<V4GetHashProtocolManager> pm(CreateProtocolManager()); + + FindFullHashesResponse res; + res.mutable_negative_cache_duration()->set_seconds(600); + res.add_matches()->set_threat_entry_type(BINARY_DIGEST); + + // Serialize. + std::string res_data; + res.SerializeToString(&res_data); + + std::vector<SBFullHashResult> full_hashes; + base::TimeDelta cache_lifetime; + EXPECT_FALSE(pm->ParseHashResponse(res_data, &full_hashes, &cache_lifetime)); + + EXPECT_EQ(base::TimeDelta::FromSeconds(600), cache_lifetime); + // There should be no hash results. + EXPECT_EQ(0ul, full_hashes.size()); +} + +// Adds an entry with a SOCIAL_ENGINEERING threat type. +TEST_F(SafeBrowsingV4GetHashProtocolManagerTest, + TestParseHashResponseSocialEngineeringThreatType) { + scoped_ptr<V4GetHashProtocolManager> pm(CreateProtocolManager()); + + FindFullHashesResponse res; + res.mutable_negative_cache_duration()->set_seconds(600); + ThreatMatch* m = res.add_matches(); + m->set_threat_type(SOCIAL_ENGINEERING); + m->set_platform_type(CHROME_PLATFORM); + m->set_threat_entry_type(URL_EXPRESSION); + m->mutable_threat()->set_hash( + SBFullHashToString(SBFullHashForString("Not to fret."))); + ThreatEntryMetadata::MetadataEntry* e = + m->mutable_threat_entry_metadata()->add_entries(); + e->set_key("permission"); + e->set_value("IGNORED"); + + // Serialize. + std::string res_data; + res.SerializeToString(&res_data); + + std::vector<SBFullHashResult> full_hashes; + base::TimeDelta cache_lifetime; + EXPECT_FALSE(pm->ParseHashResponse(res_data, &full_hashes, &cache_lifetime)); + + EXPECT_EQ(base::TimeDelta::FromSeconds(600), cache_lifetime); + EXPECT_EQ(0ul, full_hashes.size()); +} + +// Adds metadata with a key value that is not "permission". +TEST_F(SafeBrowsingV4GetHashProtocolManagerTest, + TestParseHashResponseNonPermissionMetadata) { + scoped_ptr<V4GetHashProtocolManager> pm(CreateProtocolManager()); + + FindFullHashesResponse res; + res.mutable_negative_cache_duration()->set_seconds(600); + ThreatMatch* m = res.add_matches(); + m->set_threat_type(API_ABUSE); + m->set_platform_type(CHROME_PLATFORM); + m->set_threat_entry_type(URL_EXPRESSION); + m->mutable_threat()->set_hash( + SBFullHashToString(SBFullHashForString("Not to fret."))); + ThreatEntryMetadata::MetadataEntry* e = + m->mutable_threat_entry_metadata()->add_entries(); + e->set_key("notpermission"); + e->set_value("NOTGEOLOCATION"); + + // Serialize. + std::string res_data; + res.SerializeToString(&res_data); + + std::vector<SBFullHashResult> full_hashes; + base::TimeDelta cache_lifetime; + EXPECT_TRUE(pm->ParseHashResponse(res_data, &full_hashes, &cache_lifetime)); + + EXPECT_EQ(base::TimeDelta::FromSeconds(600), cache_lifetime); + EXPECT_EQ(1ul, full_hashes.size()); + + EXPECT_TRUE(SBFullHashEqual(SBFullHashForString("Not to fret."), + full_hashes[0].hash)); + // Metadata should be empty. + EXPECT_EQ(0ul, full_hashes[0].metadata.api_permissions.size()); + EXPECT_EQ(base::TimeDelta::FromSeconds(0), full_hashes[0].cache_duration); +} + +TEST_F(SafeBrowsingV4GetHashProtocolManagerTest, + TestParseHashResponseInconsistentThreatTypes) { + scoped_ptr<V4GetHashProtocolManager> pm(CreateProtocolManager()); + + FindFullHashesResponse res; + ThreatMatch* m1 = res.add_matches(); + m1->set_threat_type(API_ABUSE); + m1->set_platform_type(CHROME_PLATFORM); + m1->set_threat_entry_type(URL_EXPRESSION); + m1->mutable_threat()->set_hash( + SBFullHashToString(SBFullHashForString("Everything's shiny, Cap'n."))); + m1->mutable_threat_entry_metadata()->add_entries(); + ThreatMatch* m2 = res.add_matches(); + m2->set_threat_type(MALWARE_THREAT); + m2->set_threat_entry_type(URL_EXPRESSION); + m2->mutable_threat()->set_hash( + SBFullHashToString(SBFullHashForString("Not to fret."))); + + // Serialize. + std::string res_data; + res.SerializeToString(&res_data); + + std::vector<SBFullHashResult> full_hashes; + base::TimeDelta cache_lifetime; + EXPECT_FALSE(pm->ParseHashResponse(res_data, &full_hashes, &cache_lifetime)); +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/v4_local_database_manager.cc b/chromium/components/safe_browsing_db/v4_local_database_manager.cc new file mode 100644 index 00000000000..bc6c2c5d78f --- /dev/null +++ b/chromium/components/safe_browsing_db/v4_local_database_manager.cc @@ -0,0 +1,172 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/safe_browsing_db/v4_local_database_manager.h" + +#include <vector> + +#include "content/public/browser/browser_thread.h" + +using content::BrowserThread; + +namespace safe_browsing { + +V4LocalDatabaseManager::V4LocalDatabaseManager() : enabled_(false) {} + +V4LocalDatabaseManager::~V4LocalDatabaseManager() { + DCHECK(!enabled_); +} + +bool V4LocalDatabaseManager::IsSupported() const { + return true; +} + +safe_browsing::ThreatSource V4LocalDatabaseManager::GetThreatSource() const { + return safe_browsing::ThreatSource::LOCAL_PVER4; +} + +bool V4LocalDatabaseManager::ChecksAreAlwaysAsync() const { + return false; +} + +bool V4LocalDatabaseManager::CanCheckResourceType( + content::ResourceType resource_type) const { + // We check all types since most checks are fast. + return true; +} + +bool V4LocalDatabaseManager::CanCheckUrl(const GURL& url) const { + return url.SchemeIs(url::kHttpsScheme) || url.SchemeIs(url::kHttpScheme) || + url.SchemeIs(url::kFtpScheme); +} + +bool V4LocalDatabaseManager::IsDownloadProtectionEnabled() const { + // TODO(vakh): Investigate the possibility of using a command line switch for + // this instead. + return true; +} + +bool V4LocalDatabaseManager::CheckDownloadUrl( + const std::vector<GURL>& url_chain, + Client* client) { + DCHECK_CURRENTLY_ON(BrowserThread::IO); + // TODO(vakh): Implement this skeleton. + return true; +} + +bool V4LocalDatabaseManager::CheckExtensionIDs( + const std::set<std::string>& extension_ids, + Client* client) { + // TODO(vakh): Implement this skeleton. + DCHECK_CURRENTLY_ON(BrowserThread::IO); + return true; +} + +bool V4LocalDatabaseManager::MatchMalwareIP(const std::string& ip_address) { + // TODO(vakh): Implement this skeleton. + DCHECK_CURRENTLY_ON(BrowserThread::IO); + return false; +} + +bool V4LocalDatabaseManager::MatchCsdWhitelistUrl(const GURL& url) { + // TODO(vakh): Implement this skeleton. + DCHECK_CURRENTLY_ON(BrowserThread::IO); + return true; +} + +bool V4LocalDatabaseManager::MatchDownloadWhitelistUrl(const GURL& url) { + // TODO(vakh): Implement this skeleton. + DCHECK_CURRENTLY_ON(BrowserThread::IO); + return true; +} + +bool V4LocalDatabaseManager::MatchDownloadWhitelistString( + const std::string& str) { + // TODO(vakh): Implement this skeleton. + DCHECK_CURRENTLY_ON(BrowserThread::IO); + return true; +} + +bool V4LocalDatabaseManager::MatchInclusionWhitelistUrl(const GURL& url) { + // TODO(vakh): Implement this skeleton. + DCHECK_CURRENTLY_ON(BrowserThread::IO); + return true; +} + +bool V4LocalDatabaseManager::MatchModuleWhitelistString( + const std::string& str) { + // TODO(vakh): Implement this skeleton. + DCHECK_CURRENTLY_ON(BrowserThread::IO); + return true; +} + +bool V4LocalDatabaseManager::CheckResourceUrl(const GURL& url, Client* client) { + // TODO(vakh): Implement this skeleton. + DCHECK_CURRENTLY_ON(BrowserThread::IO); + return true; +} + +bool V4LocalDatabaseManager::IsMalwareKillSwitchOn() { + // TODO(vakh): Implement this skeleton. + DCHECK_CURRENTLY_ON(BrowserThread::IO); + return true; +} + +bool V4LocalDatabaseManager::IsCsdWhitelistKillSwitchOn() { + // TODO(vakh): Implement this skeleton. + DCHECK_CURRENTLY_ON(BrowserThread::IO); + return true; +} + +bool V4LocalDatabaseManager::CheckBrowseUrl(const GURL& url, Client* client) { + // TODO(vakh): Implement this skeleton. + DCHECK_CURRENTLY_ON(BrowserThread::IO); + if (!enabled_) + return true; + + // Don't defer the resource load. + return true; +} + +void V4LocalDatabaseManager::CancelCheck(Client* client) { + // TODO(vakh): Implement this skeleton. + DCHECK_CURRENTLY_ON(BrowserThread::IO); + DCHECK(enabled_); +} + +void V4LocalDatabaseManager::StartOnIOThread( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config) { + // TODO(vakh): Implement this skeleton. + VLOG(1) << "V4LocalDatabaseManager starting"; + SafeBrowsingDatabaseManager::StartOnIOThread(request_context_getter, config); + + V4UpdateCallback callback = base::Bind( + &V4LocalDatabaseManager::UpdateRequestCompleted, base::Unretained(this)); + v4_update_protocol_manager_ = V4UpdateProtocolManager::Create( + request_context_getter, config, current_list_states_, callback); + + enabled_ = true; +} + +void V4LocalDatabaseManager::StopOnIOThread(bool shutdown) { + DCHECK_CURRENTLY_ON(BrowserThread::IO); + DVLOG(1) << "V4LocalDatabaseManager stopping"; + + // Delete the V4UpdateProtocolManager. + // This cancels any in-flight update request. + if (v4_update_protocol_manager_.get()) { + v4_update_protocol_manager_.reset(); + } + + enabled_ = false; + SafeBrowsingDatabaseManager::StopOnIOThread(shutdown); +} + +void V4LocalDatabaseManager::UpdateRequestCompleted( + const std::vector<ListUpdateResponse>& responses) { + // TODO(vakh): Updates downloaded. Store them on disk and record new state. +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/v4_local_database_manager.h b/chromium/components/safe_browsing_db/v4_local_database_manager.h new file mode 100644 index 00000000000..bf1a7d03376 --- /dev/null +++ b/chromium/components/safe_browsing_db/v4_local_database_manager.h @@ -0,0 +1,81 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_SAFE_BROWSING_DB_V4_LOCAL_DATABASE_MANAGER_H_ +#define COMPONENTS_SAFE_BROWSING_DB_V4_LOCAL_DATABASE_MANAGER_H_ + +// A class that provides the interface between the SafeBrowsing protocol manager +// and database that holds the downloaded updates. + +#include "components/safe_browsing_db/database_manager.h" +#include "components/safe_browsing_db/hit_report.h" +#include "components/safe_browsing_db/v4_protocol_manager_util.h" +#include "components/safe_browsing_db/v4_update_protocol_manager.h" +#include "url/gurl.h" + +using content::ResourceType; + +namespace safe_browsing { + +// Manages the local, on-disk database of updates downloaded from the +// SafeBrowsing service and interfaces with the protocol manager. +class V4LocalDatabaseManager : public SafeBrowsingDatabaseManager { + public: + // Construct V4LocalDatabaseManager. + // Must be initialized by calling StartOnIOThread() before using. + V4LocalDatabaseManager(); + + // + // SafeBrowsingDatabaseManager implementation + // + + bool IsSupported() const override; + safe_browsing::ThreatSource GetThreatSource() const override; + bool ChecksAreAlwaysAsync() const override; + bool CanCheckResourceType(content::ResourceType resource_type) const override; + bool CanCheckUrl(const GURL& url) const override; + bool IsDownloadProtectionEnabled() const override; + bool CheckBrowseUrl(const GURL& url, Client* client) override; + void CancelCheck(Client* client) override; + void StartOnIOThread( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config) override; + void StopOnIOThread(bool shutdown) override; + bool CheckDownloadUrl(const std::vector<GURL>& url_chain, + Client* client) override; + bool CheckExtensionIDs(const std::set<std::string>& extension_ids, + Client* client) override; + bool MatchCsdWhitelistUrl(const GURL& url) override; + bool MatchMalwareIP(const std::string& ip_address) override; + bool MatchDownloadWhitelistUrl(const GURL& url) override; + bool MatchDownloadWhitelistString(const std::string& str) override; + bool MatchInclusionWhitelistUrl(const GURL& url) override; + bool MatchModuleWhitelistString(const std::string& str) override; + bool CheckResourceUrl(const GURL& url, Client* client) override; + bool IsMalwareKillSwitchOn() override; + bool IsCsdWhitelistKillSwitchOn() override; + + private: + ~V4LocalDatabaseManager() override; + + // The callback called each time the protocol manager downloads updates + // successfully. + void UpdateRequestCompleted(const std::vector<ListUpdateResponse>& responses); + + bool enabled_; + + // Stores the current status of the lists to download from the SafeBrowsing + // servers. + base::hash_map<UpdateListIdentifier, std::string> current_list_states_; + + // The protocol manager that downloads the hash prefix updates. + scoped_ptr<V4UpdateProtocolManager> v4_update_protocol_manager_; + + friend class base::RefCountedThreadSafe<V4LocalDatabaseManager>; + DISALLOW_COPY_AND_ASSIGN(V4LocalDatabaseManager); +}; // class V4LocalDatabaseManager + +} // namespace safe_browsing + +#endif // COMPONENTS_SAFE_BROWSING_DB_V4_LOCAL_DATABASE_MANAGER_H_ diff --git a/chromium/components/safe_browsing_db/v4_protocol_manager_util.cc b/chromium/components/safe_browsing_db/v4_protocol_manager_util.cc new file mode 100644 index 00000000000..ec2de4cc672 --- /dev/null +++ b/chromium/components/safe_browsing_db/v4_protocol_manager_util.cc @@ -0,0 +1,110 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/safe_browsing_db/v4_protocol_manager_util.h" + +#include "base/base64.h" +#include "base/metrics/sparse_histogram.h" +#include "base/rand_util.h" +#include "base/strings/stringprintf.h" +#include "net/base/escape.h" + +using base::Time; +using base::TimeDelta; + +namespace safe_browsing { + +// The Safe Browsing V4 server URL prefix. +const char kSbV4UrlPrefix[] = "https://safebrowsing.googleapis.com/v4"; + +bool UpdateListIdentifier::operator==(const UpdateListIdentifier& other) const { + return platform_type == other.platform_type && + threat_entry_type == other.threat_entry_type && + threat_type == other.threat_type; +} + +bool UpdateListIdentifier::operator!=(const UpdateListIdentifier& other) const { + return !operator==(other); +} + +size_t UpdateListIdentifier::hash() const { + std::size_t first = std::hash<unsigned int>()(platform_type); + std::size_t second = std::hash<unsigned int>()(threat_entry_type); + std::size_t third = std::hash<unsigned int>()(threat_type); + + std::size_t interim = base::HashInts(first, second); + return base::HashInts(interim, third); +} + +V4ProtocolConfig::V4ProtocolConfig() : disable_auto_update(false) {} + +V4ProtocolConfig::V4ProtocolConfig(const V4ProtocolConfig& other) = default; + +V4ProtocolConfig::~V4ProtocolConfig() {} + +// static +// Backoff interval is MIN(((2^(n-1))*15 minutes) * (RAND + 1), 24 hours) where +// n is the number of consecutive errors. +base::TimeDelta V4ProtocolManagerUtil::GetNextBackOffInterval( + size_t* error_count, + size_t* multiplier) { + DCHECK(multiplier && error_count); + (*error_count)++; + if (*error_count > 1 && *error_count < 9) { + // With error count 9 and above we will hit the 24 hour max interval. + // Cap the multiplier here to prevent integer overflow errors. + *multiplier *= 2; + } + base::TimeDelta next = + base::TimeDelta::FromMinutes(*multiplier * (1 + base::RandDouble()) * 15); + + base::TimeDelta day = base::TimeDelta::FromHours(24); + + if (next < day) + return next; + else + return day; +} + +// static +void V4ProtocolManagerUtil::RecordHttpResponseOrErrorCode( + const char* metric_name, + const net::URLRequestStatus& status, + int response_code) { + UMA_HISTOGRAM_SPARSE_SLOWLY( + metric_name, status.is_success() ? response_code : status.error()); +} + +// static +// The API hash call uses the pver4 Safe Browsing server. +GURL V4ProtocolManagerUtil::GetRequestUrl(const std::string& request_base64, + const std::string& method_name, + const V4ProtocolConfig& config) { + std::string url = + ComposeUrl(kSbV4UrlPrefix, method_name, request_base64, + config.client_name, config.version, config.key_param); + return GURL(url); +} + +// static +std::string V4ProtocolManagerUtil::ComposeUrl(const std::string& prefix, + const std::string& method, + const std::string& request_base64, + const std::string& client_id, + const std::string& version, + const std::string& key_param) { + DCHECK(!prefix.empty() && !method.empty() && !client_id.empty() && + !version.empty()); + std::string url = + base::StringPrintf("%s/%s/%s?alt=proto&client_id=%s&client_version=%s", + prefix.c_str(), method.c_str(), request_base64.c_str(), + client_id.c_str(), version.c_str()); + if (!key_param.empty()) { + base::StringAppendF(&url, "&key=%s", + net::EscapeQueryParamValue(key_param, true).c_str()); + } + return url; +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/v4_protocol_manager_util.h b/chromium/components/safe_browsing_db/v4_protocol_manager_util.h new file mode 100644 index 00000000000..0d552675498 --- /dev/null +++ b/chromium/components/safe_browsing_db/v4_protocol_manager_util.h @@ -0,0 +1,142 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_SAFE_BROWSING_DB_V4_PROTOCOL_MANAGER_UTIL_H_ +#define COMPONENTS_SAFE_BROWSING_DB_V4_PROTOCOL_MANAGER_UTIL_H_ + +// A class that implements the stateless methods used by the GetHashUpdate and +// GetFullHash stubby calls made by Chrome using the SafeBrowsing V4 protocol. + +#include <string> + +#include "base/gtest_prod_util.h" +#include "base/hash.h" +#include "components/safe_browsing_db/safebrowsing.pb.h" +#include "net/url_request/url_request_status.h" +#include "url/gurl.h" + +namespace safe_browsing { +// Config passed to the constructor of a V4 protocol manager. +struct V4ProtocolConfig { + // The safe browsing client name sent in each request. + std::string client_name; + + // Current product version sent in each request. + std::string version; + + // The Google API key. + std::string key_param; + + // Disable auto-updates using a command line switch? + bool disable_auto_update; + + V4ProtocolConfig(); + V4ProtocolConfig(const V4ProtocolConfig& other); + ~V4ProtocolConfig(); +}; + +// The information required to uniquely identify each list the client is +// interested in maintaining and downloading from the SafeBrowsing servers. +// For example, for digests of Malware binaries on Windows: +// platform_type = WINDOWS, +// threat_entry_type = BINARY_DIGEST, +// threat_type = MALWARE +struct UpdateListIdentifier { + PlatformType platform_type; + ThreatEntryType threat_entry_type; + ThreatType threat_type; + + bool operator==(const UpdateListIdentifier& other) const; + bool operator!=(const UpdateListIdentifier& other) const; + size_t hash() const; +}; + +// Enumerate failures for histogramming purposes. DO NOT CHANGE THE +// ORDERING OF THESE VALUES. +enum V4OperationResult { + // 200 response code means that the server recognized the request. + STATUS_200 = 0, + + // Subset of successful responses where the response body wasn't parsable. + PARSE_ERROR = 1, + + // Operation request failed (network error). + NETWORK_ERROR = 2, + + // Operation request returned HTTP result code other than 200. + HTTP_ERROR = 3, + + // Operation attempted during error backoff, no request sent. + BACKOFF_ERROR = 4, + + // Operation attempted before min wait duration elapsed, no request sent. + MIN_WAIT_DURATION_ERROR = 5, + + // Identical operation already pending. + ALREADY_PENDING_ERROR = 6, + + // Memory space for histograms is determined by the max. ALWAYS + // ADD NEW VALUES BEFORE THIS ONE. + OPERATION_RESULT_MAX = 7 +}; + +// A class that provides static methods related to the Pver4 protocol. +class V4ProtocolManagerUtil { + public: + // Record HTTP response code when there's no error in fetching an HTTP + // request, and the error code, when there is. + // |metric_name| is the name of the UMA metric to record the response code or + // error code against, |status| represents the status of the HTTP request, and + // |response code| represents the HTTP response code received from the server. + static void RecordHttpResponseOrErrorCode(const char* metric_name, + const net::URLRequestStatus& status, + int response_code); + + // Generates a Pver4 request URL. + // |request_base64| is the serialized request protocol buffer encoded in + // base 64. + // |method_name| is the name of the method to call, as specified in the proto, + // |config| is an instance of V4ProtocolConfig that stores the client config. + static GURL GetRequestUrl(const std::string& request_base64, + const std::string& method_name, + const V4ProtocolConfig& config); + + // Worker function for calculating the backoff times. + // |multiplier| is doubled for each consecutive error after the + // first, and |error_count| is incremented with each call. + static base::TimeDelta GetNextBackOffInterval(size_t* error_count, + size_t* multiplier); + + private: + V4ProtocolManagerUtil(){}; + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingV4ProtocolManagerUtilTest, + TestBackOffLogic); + FRIEND_TEST_ALL_PREFIXES(SafeBrowsingV4ProtocolManagerUtilTest, + TestGetRequestUrl); + + // Composes a URL using |prefix|, |method| (e.g.: encodedFullHashes). + // |request_base64|, |client_id|, |version| and |key_param|. |prefix| + // should contain the entire url prefix including scheme, host and path. + static std::string ComposeUrl(const std::string& prefix, + const std::string& method, + const std::string& request_base64, + const std::string& client_id, + const std::string& version, + const std::string& key_param); + + DISALLOW_COPY_AND_ASSIGN(V4ProtocolManagerUtil); +}; + +} // namespace safe_browsing + +namespace std { +template <> +struct hash<safe_browsing::UpdateListIdentifier> { + std::size_t operator()(const safe_browsing::UpdateListIdentifier& s) const { + return s.hash(); + } +}; +} + +#endif // COMPONENTS_SAFE_BROWSING_DB_V4_PROTOCOL_MANAGER_UTIL_H_ diff --git a/chromium/components/safe_browsing_db/v4_protocol_manager_util_unittest.cc b/chromium/components/safe_browsing_db/v4_protocol_manager_util_unittest.cc new file mode 100644 index 00000000000..fd128516d64 --- /dev/null +++ b/chromium/components/safe_browsing_db/v4_protocol_manager_util_unittest.cc @@ -0,0 +1,122 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <vector> + +#include "base/base64.h" +#include "base/memory/scoped_ptr.h" +#include "base/time/time.h" +#include "components/safe_browsing_db/v4_protocol_manager_util.h" +#include "net/base/escape.h" +#include "testing/gtest/include/gtest/gtest.h" + +using base::Time; +using base::TimeDelta; + +namespace { + +const char kClient[] = "unittest"; +const char kAppVer[] = "1.0"; +const char kKeyParam[] = "test_key_param"; + +} // namespace + +namespace safe_browsing { + +class SafeBrowsingV4ProtocolManagerUtilTest : public testing::Test { + protected: + void PopulateV4ProtocolConfig(V4ProtocolConfig* config) { + config->client_name = kClient; + config->version = kAppVer; + config->key_param = kKeyParam; + } +}; + +TEST_F(SafeBrowsingV4ProtocolManagerUtilTest, TestBackOffLogic) { + size_t error_count = 0, back_off_multiplier = 1; + + // 1 error. + base::TimeDelta next = V4ProtocolManagerUtil::GetNextBackOffInterval( + &error_count, &back_off_multiplier); + EXPECT_EQ(1U, error_count); + EXPECT_EQ(1U, back_off_multiplier); + EXPECT_LE(TimeDelta::FromMinutes(15), next); + EXPECT_GE(TimeDelta::FromMinutes(30), next); + + // 2 errors. + next = V4ProtocolManagerUtil::GetNextBackOffInterval( + &error_count, &back_off_multiplier); + EXPECT_EQ(2U, error_count); + EXPECT_EQ(2U, back_off_multiplier); + EXPECT_LE(TimeDelta::FromMinutes(30), next); + EXPECT_GE(TimeDelta::FromMinutes(60), next); + + // 3 errors. + next = V4ProtocolManagerUtil::GetNextBackOffInterval( + &error_count, &back_off_multiplier); + EXPECT_EQ(3U, error_count); + EXPECT_EQ(4U, back_off_multiplier); + EXPECT_LE(TimeDelta::FromMinutes(60), next); + EXPECT_GE(TimeDelta::FromMinutes(120), next); + + // 4 errors. + next = V4ProtocolManagerUtil::GetNextBackOffInterval( + &error_count, &back_off_multiplier); + EXPECT_EQ(4U, error_count); + EXPECT_EQ(8U, back_off_multiplier); + EXPECT_LE(TimeDelta::FromMinutes(120), next); + EXPECT_GE(TimeDelta::FromMinutes(240), next); + + // 5 errors. + next = V4ProtocolManagerUtil::GetNextBackOffInterval( + &error_count, &back_off_multiplier); + EXPECT_EQ(5U, error_count); + EXPECT_EQ(16U, back_off_multiplier); + EXPECT_LE(TimeDelta::FromMinutes(240), next); + EXPECT_GE(TimeDelta::FromMinutes(480), next); + + // 6 errors. + next = V4ProtocolManagerUtil::GetNextBackOffInterval( + &error_count, &back_off_multiplier); + EXPECT_EQ(6U, error_count); + EXPECT_EQ(32U, back_off_multiplier); + EXPECT_LE(TimeDelta::FromMinutes(480), next); + EXPECT_GE(TimeDelta::FromMinutes(960), next); + + // 7 errors. + next = V4ProtocolManagerUtil::GetNextBackOffInterval( + &error_count, &back_off_multiplier); + EXPECT_EQ(7U, error_count); + EXPECT_EQ(64U, back_off_multiplier); + EXPECT_LE(TimeDelta::FromMinutes(960), next); + EXPECT_GE(TimeDelta::FromMinutes(1920), next); + + // 8 errors, reached max backoff. + next = V4ProtocolManagerUtil::GetNextBackOffInterval( + &error_count, &back_off_multiplier); + EXPECT_EQ(8U, error_count); + EXPECT_EQ(128U, back_off_multiplier); + EXPECT_EQ(TimeDelta::FromHours(24), next); + + // 9 errors, reached max backoff and multiplier capped. + next = V4ProtocolManagerUtil::GetNextBackOffInterval( + &error_count, &back_off_multiplier); + EXPECT_EQ(9U, error_count); + EXPECT_EQ(128U, back_off_multiplier); + EXPECT_EQ(TimeDelta::FromHours(24), next); +} + +TEST_F(SafeBrowsingV4ProtocolManagerUtilTest, TestGetRequestUrl) { + V4ProtocolConfig config; + PopulateV4ProtocolConfig(&config); + + std::string expectedUrl = + "https://safebrowsing.googleapis.com/v4/someMethod/request_base64?" + "alt=proto&client_id=unittest&client_version=1.0&key=test_key_param"; + EXPECT_EQ(expectedUrl, V4ProtocolManagerUtil::GetRequestUrl( + "request_base64", "someMethod", config) + .spec()); +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/v4_update_protocol_manager.cc b/chromium/components/safe_browsing_db/v4_update_protocol_manager.cc new file mode 100644 index 00000000000..91bf26eb046 --- /dev/null +++ b/chromium/components/safe_browsing_db/v4_update_protocol_manager.cc @@ -0,0 +1,318 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/safe_browsing_db/v4_update_protocol_manager.h" + +#include <utility> + +#include "base/base64.h" +#include "base/macros.h" +#include "base/metrics/histogram_macros.h" +#include "base/rand_util.h" +#include "base/timer/timer.h" +#include "components/safe_browsing_db/safebrowsing.pb.h" +#include "net/base/load_flags.h" +#include "net/http/http_response_headers.h" +#include "net/http/http_status_code.h" +#include "net/url_request/url_fetcher.h" +#include "net/url_request/url_request_context_getter.h" + +using base::Time; +using base::TimeDelta; + +namespace { + +// Enumerate parsing failures for histogramming purposes. DO NOT CHANGE +// THE ORDERING OF THESE VALUES. +enum ParseResultType { + // Error parsing the protocol buffer from a string. + PARSE_FROM_STRING_ERROR = 0, + + // No platform_type set in the response. + NO_PLATFORM_TYPE_ERROR = 1, + + // No threat_entry_type set in the response. + NO_THREAT_ENTRY_TYPE_ERROR = 2, + + // No threat_type set in the response. + NO_THREAT_TYPE_ERROR = 3, + + // No state set in the response for one or more lists. + NO_STATE_ERROR = 4, + + // Memory space for histograms is determined by the max. ALWAYS + // ADD NEW VALUES BEFORE THIS ONE. + PARSE_RESULT_TYPE_MAX = 5 +}; + +// Record parsing errors of an update result. +void RecordParseUpdateResult(ParseResultType result_type) { + UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.ParseV4UpdateResult", result_type, + PARSE_RESULT_TYPE_MAX); +} + +void RecordUpdateResult(safe_browsing::V4OperationResult result) { + UMA_HISTOGRAM_ENUMERATION( + "SafeBrowsing.V4UpdateResult", result, + safe_browsing::V4OperationResult::OPERATION_RESULT_MAX); +} + +} // namespace + +namespace safe_browsing { + +// Minimum time, in seconds, from start up before we must issue an update query. +static const int kV4TimerStartIntervalSecMin = 60; + +// Maximum time, in seconds, from start up before we must issue an update query. +static const int kV4TimerStartIntervalSecMax = 300; + +// The default V4UpdateProtocolManagerFactory. +class V4UpdateProtocolManagerFactoryImpl + : public V4UpdateProtocolManagerFactory { + public: + V4UpdateProtocolManagerFactoryImpl() {} + ~V4UpdateProtocolManagerFactoryImpl() override {} + scoped_ptr<V4UpdateProtocolManager> CreateProtocolManager( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config, + const base::hash_map<UpdateListIdentifier, std::string>& + current_list_states, + V4UpdateCallback callback) override { + return scoped_ptr<V4UpdateProtocolManager>(new V4UpdateProtocolManager( + request_context_getter, config, current_list_states, callback)); + } + + private: + DISALLOW_COPY_AND_ASSIGN(V4UpdateProtocolManagerFactoryImpl); +}; + +// V4UpdateProtocolManager implementation -------------------------------- + +// static +V4UpdateProtocolManagerFactory* V4UpdateProtocolManager::factory_ = NULL; + +// static +scoped_ptr<V4UpdateProtocolManager> V4UpdateProtocolManager::Create( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config, + const base::hash_map<UpdateListIdentifier, std::string>& + current_list_states, + V4UpdateCallback callback) { + if (!factory_) + factory_ = new V4UpdateProtocolManagerFactoryImpl(); + return factory_->CreateProtocolManager(request_context_getter, config, + current_list_states, callback); +} + +void V4UpdateProtocolManager::ResetUpdateErrors() { + update_error_count_ = 0; + update_back_off_mult_ = 1; +} + +V4UpdateProtocolManager::V4UpdateProtocolManager( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config, + const base::hash_map<UpdateListIdentifier, std::string>& + current_list_states, + V4UpdateCallback callback) + : current_list_states_(current_list_states), + update_error_count_(0), + update_back_off_mult_(1), + next_update_interval_(base::TimeDelta::FromSeconds( + base::RandInt(kV4TimerStartIntervalSecMin, + kV4TimerStartIntervalSecMax))), + config_(config), + request_context_getter_(request_context_getter), + url_fetcher_id_(0), + callback_(callback) { + ScheduleNextUpdate(false /* no back off */); +} + +V4UpdateProtocolManager::~V4UpdateProtocolManager() {} + +bool V4UpdateProtocolManager::IsUpdateScheduled() const { + return update_timer_.IsRunning(); +} + +void V4UpdateProtocolManager::ScheduleNextUpdate(bool back_off) { + // TODO(vakh): Set disable_auto_update correctly using the command line + // switch. + if (config_.disable_auto_update) { + DCHECK(!IsUpdateScheduled()); + return; + } + + // Reschedule with the new update. + base::TimeDelta next_update_interval = GetNextUpdateInterval(back_off); + ScheduleNextUpdateAfterInterval(next_update_interval); +} + +// According to section 5 of the SafeBrowsing protocol specification, we must +// back off after a certain number of errors. +base::TimeDelta V4UpdateProtocolManager::GetNextUpdateInterval(bool back_off) { + DCHECK(CalledOnValidThread()); + DCHECK(next_update_interval_ > base::TimeDelta()); + base::TimeDelta next = next_update_interval_; + if (back_off) { + next = V4ProtocolManagerUtil::GetNextBackOffInterval( + &update_error_count_, &update_back_off_mult_); + } + return next; +} + +void V4UpdateProtocolManager::ScheduleNextUpdateAfterInterval( + base::TimeDelta interval) { + DCHECK(CalledOnValidThread()); + DCHECK(interval >= base::TimeDelta()); + // Unschedule any current timer. + update_timer_.Stop(); + update_timer_.Start(FROM_HERE, interval, this, + &V4UpdateProtocolManager::IssueUpdateRequest); +} + +std::string V4UpdateProtocolManager::GetBase64SerializedUpdateRequestProto( + const base::hash_map<UpdateListIdentifier, std::string>& + current_list_states) { + // Build the request. Client info and client states are not added to the + // request protocol buffer. Client info is passed as params in the url. + FetchThreatListUpdatesRequest request; + for (const auto& entry : current_list_states) { + const auto& list_to_update = entry.first; + const auto& state = entry.second; + ListUpdateRequest* list_update_request = request.add_list_update_requests(); + list_update_request->set_platform_type(list_to_update.platform_type); + list_update_request->set_threat_entry_type( + list_to_update.threat_entry_type); + list_update_request->set_threat_type(list_to_update.threat_type); + + if (!state.empty()) { + list_update_request->set_state(state); + } + } + + // Serialize and Base64 encode. + std::string req_data, req_base64; + request.SerializeToString(&req_data); + base::Base64Encode(req_data, &req_base64); + + return req_base64; +} + +bool V4UpdateProtocolManager::ParseUpdateResponse( + const std::string& data, + std::vector<ListUpdateResponse>* list_update_responses) { + FetchThreatListUpdatesResponse response; + + if (!response.ParseFromString(data)) { + RecordParseUpdateResult(PARSE_FROM_STRING_ERROR); + return false; + } + + if (response.has_minimum_wait_duration()) { + // Seconds resolution is good enough so we ignore the nanos field. + int64_t minimum_wait_duration_seconds = + response.minimum_wait_duration().seconds(); + + // Do not let the next_update_interval_ to be too low. + if (minimum_wait_duration_seconds < kV4TimerStartIntervalSecMin) { + minimum_wait_duration_seconds = kV4TimerStartIntervalSecMin; + } + next_update_interval_ = + base::TimeDelta::FromSeconds(minimum_wait_duration_seconds); + } + + // TODO(vakh): Do something useful with this response. + for (const ListUpdateResponse& list_update_response : + response.list_update_responses()) { + if (!list_update_response.has_platform_type()) { + RecordParseUpdateResult(NO_PLATFORM_TYPE_ERROR); + } else if (!list_update_response.has_threat_entry_type()) { + RecordParseUpdateResult(NO_THREAT_ENTRY_TYPE_ERROR); + } else if (!list_update_response.has_threat_type()) { + RecordParseUpdateResult(NO_THREAT_TYPE_ERROR); + } else if (!list_update_response.has_new_client_state()) { + RecordParseUpdateResult(NO_STATE_ERROR); + } else { + list_update_responses->push_back(list_update_response); + } + } + return true; +} + +void V4UpdateProtocolManager::IssueUpdateRequest() { + DCHECK(CalledOnValidThread()); + + // If an update request is already pending, record and return silently. + if (request_.get()) { + RecordUpdateResult(V4OperationResult::ALREADY_PENDING_ERROR); + return; + } + + std::string req_base64 = GetBase64SerializedUpdateRequestProto( + current_list_states_); + GURL update_url = GetUpdateUrl(req_base64); + + request_.reset(net::URLFetcher::Create(url_fetcher_id_++, update_url, + net::URLFetcher::GET, this) + .release()); + + request_->SetLoadFlags(net::LOAD_DISABLE_CACHE); + request_->SetRequestContext(request_context_getter_.get()); + request_->Start(); + // TODO(vakh): Handle request timeout. +} + +// net::URLFetcherDelegate implementation ---------------------------------- + +// SafeBrowsing request responses are handled here. +void V4UpdateProtocolManager::OnURLFetchComplete( + const net::URLFetcher* source) { + DCHECK(CalledOnValidThread()); + + int response_code = source->GetResponseCode(); + net::URLRequestStatus status = source->GetStatus(); + V4ProtocolManagerUtil::RecordHttpResponseOrErrorCode( + "SafeBrowsing.V4UpdateHttpResponseOrErrorCode", status, response_code); + + std::vector<ListUpdateResponse> list_update_responses; + bool back_off; + if (status.is_success() && response_code == net::HTTP_OK) { + back_off = false; + RecordUpdateResult(V4OperationResult::STATUS_200); + ResetUpdateErrors(); + std::string data; + source->GetResponseAsString(&data); + if (!ParseUpdateResponse(data, &list_update_responses)) { + list_update_responses.clear(); + RecordUpdateResult(V4OperationResult::PARSE_ERROR); + } + // Invoke the callback with list_update_responses. + // The caller should update its state now, based on list_update_responses. + callback_.Run(list_update_responses); + } else { + back_off = true; + DVLOG(1) << "SafeBrowsing GetEncodedUpdates request for: " + << source->GetURL() << " failed with error: " << status.error() + << " and response code: " << response_code; + + if (status.status() == net::URLRequestStatus::FAILED) { + RecordUpdateResult(V4OperationResult::NETWORK_ERROR); + } else { + RecordUpdateResult(V4OperationResult::HTTP_ERROR); + } + // TODO(vakh): Figure out whether it is just a network error vs backoff vs + // another condition and RecordUpdateResult more accurately. + } + request_.reset(); + ScheduleNextUpdate(back_off); +} + +GURL V4UpdateProtocolManager::GetUpdateUrl( + const std::string& req_base64) const { + return V4ProtocolManagerUtil::GetRequestUrl(req_base64, "encodedUpdates", + config_); +} + +} // namespace safe_browsing diff --git a/chromium/components/safe_browsing_db/v4_update_protocol_manager.h b/chromium/components/safe_browsing_db/v4_update_protocol_manager.h new file mode 100644 index 00000000000..be91bc57438 --- /dev/null +++ b/chromium/components/safe_browsing_db/v4_update_protocol_manager.h @@ -0,0 +1,198 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_SAFE_BROWSING_DB_V4_UPDATE_PROTOCOL_MANAGER_H_ +#define COMPONENTS_SAFE_BROWSING_DB_V4_UPDATE_PROTOCOL_MANAGER_H_ + +// A class that implements Chrome's interface with the SafeBrowsing V4 update +// protocol. +// +// The V4UpdateProtocolManager handles formatting and making requests of, and +// handling responses from, Google's SafeBrowsing servers. The purpose of this +// class is to get hash prefixes from the SB server for the given set of lists. + +#include <string> +#include <vector> + +#include "base/gtest_prod_util.h" +#include "base/macros.h" +#include "base/memory/scoped_ptr.h" +#include "base/threading/non_thread_safe.h" +#include "base/time/time.h" +#include "base/timer/timer.h" +#include "components/safe_browsing_db/safebrowsing.pb.h" +#include "components/safe_browsing_db/util.h" +#include "components/safe_browsing_db/v4_protocol_manager_util.h" +#include "net/url_request/url_fetcher_delegate.h" +#include "url/gurl.h" + +namespace net { +class URLFetcher; +class URLRequestContextGetter; +} // namespace net + +namespace safe_browsing { + +class V4UpdateProtocolManagerFactory; + +typedef FetchThreatListUpdatesRequest::ListUpdateRequest ListUpdateRequest; +typedef FetchThreatListUpdatesResponse::ListUpdateResponse ListUpdateResponse; + +// V4UpdateCallback is invoked when a scheduled update completes. +// Parameters: +// - The vector of update response protobufs received from the server for +// each list type. +typedef base::Callback<void(const std::vector<ListUpdateResponse>&)> + V4UpdateCallback; + +class V4UpdateProtocolManager : public net::URLFetcherDelegate, + public base::NonThreadSafe { + public: + ~V4UpdateProtocolManager() override; + + // Makes the passed |factory| the factory used to instantiate + // a V4UpdateProtocolManager. Useful for tests. + static void RegisterFactory(V4UpdateProtocolManagerFactory* factory) { + factory_ = factory; + } + + // Create an instance of the safe browsing v4 protocol manager. + static scoped_ptr<V4UpdateProtocolManager> Create( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config, + const base::hash_map<UpdateListIdentifier, std::string>& + current_list_states, + V4UpdateCallback callback); + + // net::URLFetcherDelegate interface. + void OnURLFetchComplete(const net::URLFetcher* source) override; + + protected: + // Constructs a V4UpdateProtocolManager that issues network requests using + // |request_context_getter|. + // Schedules an update to get the hash prefixes for the lists in + // |current_list_states|, and invoke |callback| when the results + // are retrieved. The callback may be invoked synchronously. + V4UpdateProtocolManager( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config, + const base::hash_map<UpdateListIdentifier, std::string>& + current_list_states, + V4UpdateCallback callback); + + private: + FRIEND_TEST_ALL_PREFIXES(V4UpdateProtocolManagerTest, + TestGetUpdatesErrorHandlingNetwork); + FRIEND_TEST_ALL_PREFIXES(V4UpdateProtocolManagerTest, + TestGetUpdatesErrorHandlingResponseCode); + FRIEND_TEST_ALL_PREFIXES(V4UpdateProtocolManagerTest, TestGetUpdatesNoError); + FRIEND_TEST_ALL_PREFIXES(V4UpdateProtocolManagerTest, + TestGetUpdatesWithOneBackoff); + friend class V4UpdateProtocolManagerFactoryImpl; + + // The method to generate the URL for the request to be sent to the server. + // |request_base64| is the base64 encoded form of an instance of the protobuf + // FetchThreatListUpdatesRequest. + GURL GetUpdateUrl(const std::string& request_base64) const; + + // Fills a FetchThreatListUpdatesRequest protocol buffer for a request. + // Returns the serialized and base 64 encoded request as a string. + std::string GetBase64SerializedUpdateRequestProto( + const base::hash_map<UpdateListIdentifier, std::string>& + current_list_states); + + // Parses the base64 encoded response received from the server as a + // FetchThreatListUpdatesResponse protobuf and returns each of the + // ListUpdateResponse protobufs contained in it as a vector. + // Returns true if parsing is successful, false otherwise. + bool ParseUpdateResponse( + const std::string& data_base64, + std::vector<ListUpdateResponse>* list_update_responses); + + // Resets the update error counter and multiplier. + void ResetUpdateErrors(); + + // Updates internal update and backoff state for each update response error, + // assuming that the current time is |now|. + void HandleUpdateError(const base::Time& now); + + // Generates the URL for the update request and issues the request for the + // lists passed to the constructor. + void IssueUpdateRequest(); + + // Returns whether another update is currently scheduled. + bool IsUpdateScheduled() const; + + // Schedule the next update, considering whether we are in backoff. + void ScheduleNextUpdate(bool back_off); + + // Schedule the next update, after the given interval. + void ScheduleNextUpdateAfterInterval(base::TimeDelta interval); + + // Get the next update interval, considering whether we are in backoff. + base::TimeDelta GetNextUpdateInterval(bool back_off); + + // The factory that controls the creation of V4UpdateProtocolManager. + // This is used by tests. + static V4UpdateProtocolManagerFactory* factory_; + + // The last known state of the lists. + // At init, this is read from the disk or is empty for no prior state. + // Each successful update from the server contains a new state for each + // requested list. + base::hash_map<UpdateListIdentifier, std::string> current_list_states_; + + // The number of HTTP response errors since the the last successful HTTP + // response, used for request backoff timing. + size_t update_error_count_; + + // Multiplier for the backoff error after the second. + size_t update_back_off_mult_; + + // The time delta after which the next update request may be sent. + // It is set to a random interval between 60 and 300 seconds at start. + // The server can set it by setting the minimum_wait_duration. + base::TimeDelta next_update_interval_; + + // The config of the client making Pver4 requests. + const V4ProtocolConfig config_; + + // The context we use to issue network requests. + scoped_refptr<net::URLRequestContextGetter> request_context_getter_; + + // ID for URLFetchers for testing. + int url_fetcher_id_; + + // The callback that's called when GetUpdates completes. + V4UpdateCallback callback_; + + // The pending update request. The request must be canceled when the object is + // destroyed. + scoped_ptr<net::URLFetcher> request_; + + // Timer to setup the next update request. + base::OneShotTimer update_timer_; + + DISALLOW_COPY_AND_ASSIGN(V4UpdateProtocolManager); +}; + +// Interface of a factory to create V4UpdateProtocolManager. Useful for tests. +class V4UpdateProtocolManagerFactory { + public: + V4UpdateProtocolManagerFactory() {} + virtual ~V4UpdateProtocolManagerFactory() {} + virtual scoped_ptr<V4UpdateProtocolManager> CreateProtocolManager( + net::URLRequestContextGetter* request_context_getter, + const V4ProtocolConfig& config, + const base::hash_map<UpdateListIdentifier, std::string>& + current_list_states, + V4UpdateCallback callback) = 0; + + private: + DISALLOW_COPY_AND_ASSIGN(V4UpdateProtocolManagerFactory); +}; + +} // namespace safe_browsing + +#endif // COMPONENTS_SAFE_BROWSING_DB_V4_UPDATE_PROTOCOL_MANAGER_H_ diff --git a/chromium/components/safe_browsing_db/v4_update_protocol_manager_unittest.cc b/chromium/components/safe_browsing_db/v4_update_protocol_manager_unittest.cc new file mode 100644 index 00000000000..c7665cd8fa0 --- /dev/null +++ b/chromium/components/safe_browsing_db/v4_update_protocol_manager_unittest.cc @@ -0,0 +1,300 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <vector> + +#include "base/base64.h" +#include "base/memory/scoped_ptr.h" +#include "base/strings/stringprintf.h" +#include "base/test/test_simple_task_runner.h" +#include "base/thread_task_runner_handle.h" +#include "base/time/time.h" +#include "components/safe_browsing_db/safebrowsing.pb.h" +#include "components/safe_browsing_db/util.h" +#include "components/safe_browsing_db/v4_update_protocol_manager.h" +#include "net/base/escape.h" +#include "net/base/load_flags.h" +#include "net/base/net_errors.h" +#include "net/url_request/test_url_fetcher_factory.h" +#include "testing/gtest/include/gtest/gtest.h" + +using base::Time; +using base::TimeDelta; + +namespace { + +const char kClient[] = "unittest"; +const char kAppVer[] = "1.0"; +const char kKeyParam[] = "test_key_param"; + +} // namespace + +namespace safe_browsing { + +class V4UpdateProtocolManagerTest : public testing::Test { + protected: + void ValidateGetUpdatesResults( + const std::vector<ListUpdateResponse>& expected_lurs, + const std::vector<ListUpdateResponse>& list_update_responses) { + // The callback should never be called if expect_callback_to_be_called_ is + // false. + EXPECT_TRUE(expect_callback_to_be_called_); + ASSERT_EQ(expected_lurs.size(), list_update_responses.size()); + + for (unsigned int i = 0; i < list_update_responses.size(); ++i) { + const ListUpdateResponse& expected = expected_lurs[i]; + const ListUpdateResponse& actual = list_update_responses[i]; + + EXPECT_EQ(expected.platform_type(), actual.platform_type()); + EXPECT_EQ(expected.response_type(), actual.response_type()); + EXPECT_EQ(expected.threat_entry_type(), actual.threat_entry_type()); + EXPECT_EQ(expected.threat_type(), actual.threat_type()); + EXPECT_EQ(expected.new_client_state(), actual.new_client_state()); + + // TODO(vakh): Test more fields from the proto. + } + } + + scoped_ptr<V4UpdateProtocolManager> CreateProtocolManager( + const base::hash_map<UpdateListIdentifier, std::string> + current_list_states, + const std::vector<ListUpdateResponse>& expected_lurs) { + V4ProtocolConfig config; + config.client_name = kClient; + config.version = kAppVer; + config.key_param = kKeyParam; + config.disable_auto_update = false; + return V4UpdateProtocolManager::Create( + NULL, config, current_list_states, + base::Bind(&V4UpdateProtocolManagerTest::ValidateGetUpdatesResults, + base::Unretained(this), expected_lurs)); + } + + void SetupCurrentListStates( + base::hash_map<UpdateListIdentifier, std::string>* current_list_states) { + UpdateListIdentifier list_identifier; + list_identifier.platform_type = WINDOWS_PLATFORM; + list_identifier.threat_entry_type = URL_EXPRESSION; + list_identifier.threat_type = MALWARE_THREAT; + current_list_states->insert({list_identifier, "initial_state_1"}); + + list_identifier.platform_type = WINDOWS_PLATFORM; + list_identifier.threat_entry_type = URL_EXPRESSION; + list_identifier.threat_type = UNWANTED_SOFTWARE; + current_list_states->insert({list_identifier, "initial_state_2"}); + + list_identifier.platform_type = WINDOWS_PLATFORM; + list_identifier.threat_entry_type = BINARY_DIGEST; + list_identifier.threat_type = MALWARE_THREAT; + current_list_states->insert({list_identifier, "initial_state_3"}); + } + + void SetupExpectedListUpdateResponse( + std::vector<ListUpdateResponse>* expected_lurs) { + ListUpdateResponse lur; + lur.set_platform_type(WINDOWS_PLATFORM); + lur.set_response_type(ListUpdateResponse::PARTIAL_UPDATE); + lur.set_threat_entry_type(URL_EXPRESSION); + lur.set_threat_type(MALWARE_THREAT); + lur.set_new_client_state("new_state_1"); + expected_lurs->push_back(lur); + + lur.set_platform_type(WINDOWS_PLATFORM); + lur.set_response_type(ListUpdateResponse::PARTIAL_UPDATE); + lur.set_threat_entry_type(URL_EXPRESSION); + lur.set_threat_type(UNWANTED_SOFTWARE); + lur.set_new_client_state("new_state_2"); + expected_lurs->push_back(lur); + + lur.set_platform_type(WINDOWS_PLATFORM); + lur.set_response_type(ListUpdateResponse::FULL_UPDATE); + lur.set_threat_entry_type(BINARY_DIGEST); + lur.set_threat_type(MALWARE_THREAT); + lur.set_new_client_state("new_state_3"); + expected_lurs->push_back(lur); + } + + std::string GetExpectedV4UpdateResponse( + std::vector<ListUpdateResponse>& expected_lurs) const { + FetchThreatListUpdatesResponse response; + + for (const auto& expected_lur : expected_lurs) { + ListUpdateResponse* lur = response.add_list_update_responses(); + lur->set_new_client_state(expected_lur.new_client_state()); + lur->set_platform_type(expected_lur.platform_type()); + lur->set_response_type(expected_lur.response_type()); + lur->set_threat_entry_type(expected_lur.threat_entry_type()); + lur->set_threat_type(expected_lur.threat_type()); + } + + // Serialize. + std::string res_data; + response.SerializeToString(&res_data); + + return res_data; + } + + bool expect_callback_to_be_called_; +}; + +// TODO(vakh): Add many more tests. +TEST_F(V4UpdateProtocolManagerTest, TestGetUpdatesErrorHandlingNetwork) { + scoped_refptr<base::TestSimpleTaskRunner> runner( + new base::TestSimpleTaskRunner()); + base::ThreadTaskRunnerHandle runner_handler(runner); + net::TestURLFetcherFactory factory; + const base::hash_map<UpdateListIdentifier, std::string> current_list_states; + const std::vector<ListUpdateResponse> expected_lurs; + scoped_ptr<V4UpdateProtocolManager> pm( + CreateProtocolManager(current_list_states, expected_lurs)); + runner->ClearPendingTasks(); + + // Initial state. No errors. + EXPECT_EQ(0ul, pm->update_error_count_); + EXPECT_EQ(1ul, pm->update_back_off_mult_); + expect_callback_to_be_called_ = false; + pm->IssueUpdateRequest(); + + EXPECT_FALSE(pm->IsUpdateScheduled()); + + runner->RunPendingTasks(); + + net::TestURLFetcher* fetcher = factory.GetFetcherByID(0); + DCHECK(fetcher); + // Failed request status should result in error. + fetcher->set_status(net::URLRequestStatus(net::URLRequestStatus::FAILED, + net::ERR_CONNECTION_RESET)); + fetcher->delegate()->OnURLFetchComplete(fetcher); + + // Should have recorded one error, but back off multiplier is unchanged. + EXPECT_EQ(1ul, pm->update_error_count_); + EXPECT_EQ(1ul, pm->update_back_off_mult_); + EXPECT_TRUE(pm->IsUpdateScheduled()); +} + +TEST_F(V4UpdateProtocolManagerTest, TestGetUpdatesErrorHandlingResponseCode) { + scoped_refptr<base::TestSimpleTaskRunner> runner( + new base::TestSimpleTaskRunner()); + base::ThreadTaskRunnerHandle runner_handler(runner); + net::TestURLFetcherFactory factory; + const std::vector<ListUpdateResponse> expected_lurs; + const base::hash_map<UpdateListIdentifier, std::string> current_list_states; + scoped_ptr<V4UpdateProtocolManager> pm( + CreateProtocolManager(current_list_states, expected_lurs)); + runner->ClearPendingTasks(); + + // Initial state. No errors. + EXPECT_EQ(0ul, pm->update_error_count_); + EXPECT_EQ(1ul, pm->update_back_off_mult_); + expect_callback_to_be_called_ = false; + pm->IssueUpdateRequest(); + + EXPECT_FALSE(pm->IsUpdateScheduled()); + + runner->RunPendingTasks(); + + net::TestURLFetcher* fetcher = factory.GetFetcherByID(0); + DCHECK(fetcher); + fetcher->set_status(net::URLRequestStatus()); + // Response code of anything other than 200 should result in error. + fetcher->set_response_code(net::HTTP_NO_CONTENT); + fetcher->SetResponseString(""); + fetcher->delegate()->OnURLFetchComplete(fetcher); + + // Should have recorded one error, but back off multiplier is unchanged. + EXPECT_EQ(1ul, pm->update_error_count_); + EXPECT_EQ(1ul, pm->update_back_off_mult_); + EXPECT_TRUE(pm->IsUpdateScheduled()); +} + +TEST_F(V4UpdateProtocolManagerTest, TestGetUpdatesNoError) { + scoped_refptr<base::TestSimpleTaskRunner> runner( + new base::TestSimpleTaskRunner()); + base::ThreadTaskRunnerHandle runner_handler(runner); + net::TestURLFetcherFactory factory; + std::vector<ListUpdateResponse> expected_lurs; + SetupExpectedListUpdateResponse(&expected_lurs); + base::hash_map<UpdateListIdentifier, std::string> current_list_states; + SetupCurrentListStates(¤t_list_states); + scoped_ptr<V4UpdateProtocolManager> pm( + CreateProtocolManager(current_list_states, expected_lurs)); + runner->ClearPendingTasks(); + + // Initial state. No errors. + EXPECT_EQ(0ul, pm->update_error_count_); + EXPECT_EQ(1ul, pm->update_back_off_mult_); + expect_callback_to_be_called_ = true; + pm->IssueUpdateRequest(); + + EXPECT_FALSE(pm->IsUpdateScheduled()); + + runner->RunPendingTasks(); + + net::TestURLFetcher* fetcher = factory.GetFetcherByID(0); + DCHECK(fetcher); + fetcher->set_status(net::URLRequestStatus()); + fetcher->set_response_code(net::HTTP_OK); + fetcher->SetResponseString(GetExpectedV4UpdateResponse(expected_lurs)); + fetcher->delegate()->OnURLFetchComplete(fetcher); + + // No error, back off multiplier is unchanged. + EXPECT_EQ(0ul, pm->update_error_count_); + EXPECT_EQ(1ul, pm->update_back_off_mult_); + EXPECT_TRUE(pm->IsUpdateScheduled()); +} + +TEST_F(V4UpdateProtocolManagerTest, TestGetUpdatesWithOneBackoff) { + scoped_refptr<base::TestSimpleTaskRunner> runner( + new base::TestSimpleTaskRunner()); + base::ThreadTaskRunnerHandle runner_handler(runner); + net::TestURLFetcherFactory factory; + std::vector<ListUpdateResponse> expected_lurs; + SetupExpectedListUpdateResponse(&expected_lurs); + base::hash_map<UpdateListIdentifier, std::string> current_list_states; + SetupCurrentListStates(¤t_list_states); + scoped_ptr<V4UpdateProtocolManager> pm( + CreateProtocolManager(current_list_states, expected_lurs)); + runner->ClearPendingTasks(); + + // Initial state. No errors. + EXPECT_EQ(0ul, pm->update_error_count_); + EXPECT_EQ(1ul, pm->update_back_off_mult_); + expect_callback_to_be_called_ = false; + pm->IssueUpdateRequest(); + + EXPECT_FALSE(pm->IsUpdateScheduled()); + + runner->RunPendingTasks(); + + net::TestURLFetcher* fetcher = factory.GetFetcherByID(0); + DCHECK(fetcher); + fetcher->set_status(net::URLRequestStatus()); + // Response code of anything other than 200 should result in error. + fetcher->set_response_code(net::HTTP_NO_CONTENT); + fetcher->SetResponseString(""); + fetcher->delegate()->OnURLFetchComplete(fetcher); + + // Should have recorded one error, but back off multiplier is unchanged. + EXPECT_EQ(1ul, pm->update_error_count_); + EXPECT_EQ(1ul, pm->update_back_off_mult_); + EXPECT_TRUE(pm->IsUpdateScheduled()); + + // Retry, now no backoff. + expect_callback_to_be_called_ = true; + runner->RunPendingTasks(); + + fetcher = factory.GetFetcherByID(1); + DCHECK(fetcher); + fetcher->set_status(net::URLRequestStatus()); + fetcher->set_response_code(net::HTTP_OK); + fetcher->SetResponseString(GetExpectedV4UpdateResponse(expected_lurs)); + fetcher->delegate()->OnURLFetchComplete(fetcher); + + // No error, back off multiplier is unchanged. + EXPECT_EQ(0ul, pm->update_error_count_); + EXPECT_EQ(1ul, pm->update_back_off_mult_); + EXPECT_TRUE(pm->IsUpdateScheduled()); +} + +} // namespace safe_browsing |