summaryrefslogtreecommitdiff
path: root/chromium/components/safe_browsing_db/v4_protocol_manager_util.h
blob: 9c083542bdda53abe98b71c060e34ee3196f564c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_SAFE_BROWSING_DB_V4_PROTOCOL_MANAGER_UTIL_H_
#define COMPONENTS_SAFE_BROWSING_DB_V4_PROTOCOL_MANAGER_UTIL_H_

// A class that implements the stateless methods used by the GetHashUpdate and
// GetFullHash stubby calls made by Chrome using the SafeBrowsing V4 protocol.

#include <initializer_list>
#include <memory>
#include <ostream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include "base/containers/flat_set.h"
#include "base/gtest_prod_util.h"
#include "base/strings/string_piece.h"
#include "components/safe_browsing_db/safebrowsing.pb.h"
#include "net/url_request/url_request_status.h"
#include "url/gurl.h"

namespace net {
class HttpRequestHeaders;
class IPAddress;
}  // namespace net

namespace safe_browsing {

// The size of the hash prefix, in bytes. It should be between 4 to 32 (full
// hash).
using PrefixSize = size_t;

// The minimum expected size (in bytes) of a hash-prefix.
const PrefixSize kMinHashPrefixLength = 4;

// The maximum expected size (in bytes) of a hash-prefix. This represents the
// length of a SHA256 hash.
const PrefixSize kMaxHashPrefixLength = 32;

// A hash prefix sent by the SafeBrowsing PVer4 service.
using HashPrefix = std::string;

// A full SHA256 hash.
using FullHash = HashPrefix;

using ListUpdateRequest = FetchThreatListUpdatesRequest::ListUpdateRequest;
using ListUpdateResponse = FetchThreatListUpdatesResponse::ListUpdateResponse;

// Config passed to the constructor of a V4 protocol manager.
struct V4ProtocolConfig {
  // The safe browsing client name sent in each request.
  std::string client_name;

  // Disable auto-updates using a command line switch.
  bool disable_auto_update;

  // The Google API key.
  std::string key_param;

  // Current product version sent in each request.
  std::string version;

  V4ProtocolConfig(const std::string& client_name,
                   bool disable_auto_update,
                   const std::string& key_param,
                   const std::string& version);
  V4ProtocolConfig(const V4ProtocolConfig& other);
  ~V4ProtocolConfig();

 private:
  V4ProtocolConfig() = delete;
};

// Different types of threats that SafeBrowsing protects against. This is the
// type that's returned to the clients of SafeBrowsing in Chromium.
// GENERATED_JAVA_ENUM_PACKAGE: org.chromium.components.safe_browsing
// GENERATED_JAVA_PREFIX_TO_STRIP: SB_THREAT_TYPE_
enum SBThreatType {
  // This type can be used for lists that can be checked synchronously so a
  // client callback isn't required, or for whitelists.
  SB_THREAT_TYPE_UNUSED,

  // No threat at all.
  SB_THREAT_TYPE_SAFE,

  // The URL is being used for phishing.
  SB_THREAT_TYPE_URL_PHISHING,

  // The URL hosts malware.
  SB_THREAT_TYPE_URL_MALWARE,

  // The URL hosts unwanted programs.
  SB_THREAT_TYPE_URL_UNWANTED,

  // The download URL is malware.
  SB_THREAT_TYPE_URL_BINARY_MALWARE,

  // Url detected by the client-side phishing model.  Note that unlike the
  // above values, this does not correspond to a downloaded list.
  SB_THREAT_TYPE_URL_CLIENT_SIDE_PHISHING,

  // The Chrome extension or app (given by its ID) is malware.
  SB_THREAT_TYPE_EXTENSION,

  // Url detected by the client-side malware IP list. This IP list is part
  // of the client side detection model.
  SB_THREAT_TYPE_URL_CLIENT_SIDE_MALWARE,

  // Url leads to a blacklisted resource script. Note that no warnings should be
  // shown on this threat type, but an incident report might be sent.
  SB_THREAT_TYPE_BLACKLISTED_RESOURCE,

  // Url abuses a permission API.
  SB_THREAT_TYPE_API_ABUSE,

  // Activation patterns for the Subresource Filter.
  SB_THREAT_TYPE_SUBRESOURCE_FILTER,

  // CSD Phishing whitelist.  This "threat" means a URL matched the whitelist.
  SB_THREAT_TYPE_CSD_WHITELIST,

  // Url detected by password protection service.
  SB_THREAT_TYPE_URL_PASSWORD_PROTECTION_PHISHING,
};

using SBThreatTypeSet = base::flat_set<SBThreatType>;

// Return true if |set| only contains types that are valid for CheckBrowseUrl().
// Intended for use in DCHECK().
bool SBThreatTypeSetIsValidForCheckBrowseUrl(const SBThreatTypeSet& set);

// Shorthand for creating an SBThreatTypeSet from a list of SBThreatTypes. Use
// like CreateSBThreatTypeSet({SB_THREAT_TYPE_URL_PHISHING,
//                             SB_THREAT_TYPE_URL_MALWARE})
inline SBThreatTypeSet CreateSBThreatTypeSet(
    std::initializer_list<SBThreatType> set) {
  return SBThreatTypeSet(set, base::KEEP_FIRST_OF_DUPES);
}

// The information required to uniquely identify each list the client is
// interested in maintaining and downloading from the SafeBrowsing servers.
// For example, for digests of Malware binaries on Windows:
// platform_type = WINDOWS,
// threat_entry_type = EXECUTABLE,
// threat_type = MALWARE
class ListIdentifier {
 public:
  ListIdentifier(PlatformType platform_type,
                 ThreatEntryType threat_entry_type,
                 ThreatType threat_type);
  explicit ListIdentifier(const ListUpdateResponse&);

  bool operator==(const ListIdentifier& other) const;
  bool operator!=(const ListIdentifier& other) const;
  size_t hash() const;

  PlatformType platform_type() const { return platform_type_; }
  ThreatEntryType threat_entry_type() const { return threat_entry_type_; }
  ThreatType threat_type() const { return threat_type_; }

 private:
  PlatformType platform_type_;
  ThreatEntryType threat_entry_type_;
  ThreatType threat_type_;

  ListIdentifier() = delete;
};

std::ostream& operator<<(std::ostream& os, const ListIdentifier& id);

PlatformType GetCurrentPlatformType();
ListIdentifier GetCertCsdDownloadWhitelistId();
ListIdentifier GetChromeExtMalwareId();
ListIdentifier GetChromeFilenameClientIncidentId();
ListIdentifier GetChromeUrlApiId();
ListIdentifier GetChromeUrlClientIncidentId();
ListIdentifier GetIpMalwareId();
ListIdentifier GetUrlCsdDownloadWhitelistId();
ListIdentifier GetUrlCsdWhitelistId();
ListIdentifier GetUrlMalBinId();
ListIdentifier GetUrlMalwareId();
ListIdentifier GetUrlSocEngId();
ListIdentifier GetUrlSubresourceFilterId();
ListIdentifier GetUrlUwsId();

// Returns the basename of the store file, without the ".store" extension.
std::string GetUmaSuffixForStore(const base::FilePath& file_path);

// Represents the state of each store.
using StoreStateMap = std::unordered_map<ListIdentifier, std::string>;

// Sever response, parsed in vector form.
using ParsedServerResponse = std::vector<std::unique_ptr<ListUpdateResponse>>;

// Holds the hash prefix and the store that it matched in.
struct StoreAndHashPrefix {
 public:
  ListIdentifier list_id;
  HashPrefix hash_prefix;

  StoreAndHashPrefix(ListIdentifier list_id, const HashPrefix& hash_prefix);
  ~StoreAndHashPrefix();

  bool operator==(const StoreAndHashPrefix& other) const;
  bool operator!=(const StoreAndHashPrefix& other) const;
  size_t hash() const;

 private:
  StoreAndHashPrefix() = delete;
};

// Used to track the hash prefix and the store in which a full hash's prefix
// matched.
using StoreAndHashPrefixes = std::vector<StoreAndHashPrefix>;

// Enumerate failures for histogramming purposes.  DO NOT CHANGE THE
// ORDERING OF THESE VALUES.
enum V4OperationResult {
  // 200 response code means that the server recognized the request.
  STATUS_200 = 0,

  // Subset of successful responses where the response body wasn't parsable.
  PARSE_ERROR = 1,

  // Operation request failed (network error).
  NETWORK_ERROR = 2,

  // Operation request returned HTTP result code other than 200.
  HTTP_ERROR = 3,

  // Operation attempted during error backoff, no request sent.
  BACKOFF_ERROR = 4,

  // Operation attempted before min wait duration elapsed, no request sent.
  MIN_WAIT_DURATION_ERROR = 5,

  // Identical operation already pending.
  ALREADY_PENDING_ERROR = 6,

  // Memory space for histograms is determined by the max.  ALWAYS
  // ADD NEW VALUES BEFORE THIS ONE.
  OPERATION_RESULT_MAX = 7
};

// A class that provides static methods related to the Pver4 protocol.
class V4ProtocolManagerUtil {
 public:
  // Canonicalizes url as per Google Safe Browsing Specification.
  // See: https://developers.google.com/safe-browsing/v4/urls-hashing
  static void CanonicalizeUrl(const GURL& url,
                              std::string* canonicalized_hostname,
                              std::string* canonicalized_path,
                              std::string* canonicalized_query);

  // This method returns the host suffix combinations from the hostname in the
  // URL, as described here:
  // https://developers.google.com/safe-browsing/v4/urls-hashing
  static void GenerateHostVariantsToCheck(const std::string& host,
                                          std::vector<std::string>* hosts);

  // This method returns the path prefix combinations from the path in the
  // URL, as described here:
  // https://developers.google.com/safe-browsing/v4/urls-hashing
  static void GeneratePathVariantsToCheck(const std::string& path,
                                          const std::string& query,
                                          std::vector<std::string>* paths);

  // Given a URL, returns all the patterns we need to check.
  static void GeneratePatternsToCheck(const GURL& url,
                                      std::vector<std::string>* urls);

  // Generates a Pver4 request URL and sets the appropriate header values.
  // |request_base64| is the serialized request protocol buffer encoded in
  // base 64.
  // |method_name| is the name of the method to call, as specified in the proto,
  // |config| is an instance of V4ProtocolConfig that stores the client config,
  // |gurl| is set to the value of the PVer4 request URL,
  // |headers| is populated with the appropriate header values.
  static void GetRequestUrlAndHeaders(const std::string& request_base64,
                                      const std::string& method_name,
                                      const V4ProtocolConfig& config,
                                      GURL* gurl,
                                      net::HttpRequestHeaders* headers);

  // Worker function for calculating the backoff times.
  // |multiplier| is doubled for each consecutive error after the
  // first, and |error_count| is incremented with each call.
  // Backoff interval is MIN(((2^(n-1))*15 minutes) * (RAND + 1), 24 hours)
  // where n is the number of consecutive errors.
  static base::TimeDelta GetNextBackOffInterval(size_t* error_count,
                                                size_t* multiplier);

  // Record HTTP response code when there's no error in fetching an HTTP
  // request, and the error code, when there is.
  // |metric_name| is the name of the UMA metric to record the response code or
  // error code against, |status| represents the status of the HTTP request, and
  // |response code| represents the HTTP response code received from the server.
  static void RecordHttpResponseOrErrorCode(const char* metric_name,
                                            const net::URLRequestStatus& status,
                                            int response_code);

  // Generate the set of FullHashes to check for |url|.
  static void UrlToFullHashes(const GURL& url,
                              std::vector<FullHash>* full_hashes);

  static bool FullHashToHashPrefix(const FullHash& full_hash,
                                   PrefixSize prefix_size,
                                   HashPrefix* hash_prefix);

  static bool FullHashToSmallestHashPrefix(const FullHash& full_hash,
                                           HashPrefix* hash_prefix);

  static bool FullHashMatchesHashPrefix(const FullHash& full_hash,
                                        const HashPrefix& hash_prefix);

  static void SetClientInfoFromConfig(ClientInfo* client_info,
                                      const V4ProtocolConfig& config);

  static bool GetIPV6AddressFromString(const std::string& ip_address,
                                       net::IPAddress* address);

  // Converts a IPV4 or IPV6 address in |ip_address| to the SHA1 hash of the
  // corresponding packed IPV6 address in |hashed_encoded_ip|, and adds an
  // extra byte containing the value 128 at the end. This is done to match the
  // server implementation for calculating the hash prefix of an IP address.
  static bool IPAddressToEncodedIPV6Hash(const std::string& ip_address,
                                         FullHash* hashed_encoded_ip);

 private:
  V4ProtocolManagerUtil() {}

  FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest, TestBackOffLogic);
  FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest,
                           TestGetRequestUrlAndUpdateHeaders);
  FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest, UrlParsing);
  FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest, CanonicalizeUrl);

  // Composes a URL using |prefix|, |method| (e.g.: encodedFullHashes).
  // |request_base64|, |client_id|, |version| and |key_param|. |prefix|
  // should contain the entire url prefix including scheme, host and path.
  static std::string ComposeUrl(const std::string& prefix,
                                const std::string& method,
                                const std::string& request_base64,
                                const std::string& key_param);

  // Sets the HTTP headers expected by a standard PVer4 request.
  static void UpdateHeaders(net::HttpRequestHeaders* headers);

  // Given a URL, returns all the hosts we need to check.  They are returned
  // in order of size (i.e. b.c is first, then a.b.c).
  static void GenerateHostsToCheck(const GURL& url,
                                   std::vector<std::string>* hosts);

  // Given a URL, returns all the paths we need to check.
  static void GeneratePathsToCheck(const GURL& url,
                                   std::vector<std::string>* paths);

  static std::string RemoveConsecutiveChars(base::StringPiece str,
                                            const char c);

  DISALLOW_COPY_AND_ASSIGN(V4ProtocolManagerUtil);
};

using StoresToCheck = std::unordered_set<ListIdentifier>;

}  // namespace safe_browsing

namespace std {

template <>
struct hash<safe_browsing::PlatformType> {
  std::size_t operator()(const safe_browsing::PlatformType& p) const {
    return std::hash<unsigned int>()(p);
  }
};

template <>
struct hash<safe_browsing::ThreatEntryType> {
  std::size_t operator()(const safe_browsing::ThreatEntryType& tet) const {
    return std::hash<unsigned int>()(tet);
  }
};

template <>
struct hash<safe_browsing::ThreatType> {
  std::size_t operator()(const safe_browsing::ThreatType& tt) const {
    return std::hash<unsigned int>()(tt);
  }
};

template <>
struct hash<safe_browsing::ListIdentifier> {
  std::size_t operator()(const safe_browsing::ListIdentifier& id) const {
    return id.hash();
  }
};

}  // namespace std

#endif  // COMPONENTS_SAFE_BROWSING_DB_V4_PROTOCOL_MANAGER_UTIL_H_