summaryrefslogtreecommitdiff
path: root/chromium/components/safe_browsing_db/v4_store.h
blob: 1a6b14a35142458093f90dbe25f8b495f8c82036 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_SAFE_BROWSING_DB_V4_STORE_H_
#define COMPONENTS_SAFE_BROWSING_DB_V4_STORE_H_

#include "base/files/file_path.h"
#include "base/memory/ref_counted.h"
#include "base/sequenced_task_runner.h"
#include "base/single_thread_task_runner.h"
#include "components/safe_browsing_db/v4_protocol_manager_util.h"

namespace safe_browsing {

class V4Store;

typedef base::Callback<void(std::unique_ptr<V4Store> new_store)>
    UpdatedStoreReadyCallback;

// The sorted list of hash prefixes.
typedef std::string HashPrefixes;

// Stores the list of sorted hash prefixes, by size.
// For instance: {4: ["abcd", "bcde", "cdef", "gggg"], 5: ["fffff"]}
typedef base::hash_map<PrefixSize, HashPrefixes> HashPrefixMap;

// Stores the iterator to the last element merged from the HashPrefixMap for a
// given prefix size.
// For instance: {4:iter(3), 5:iter(1)} means that we have already merged
// 3 hash prefixes of length 4, and 1 hash prefix of length 5.
typedef base::hash_map<PrefixSize, HashPrefixes::const_iterator> IteratorMap;

// Enumerate different failure events while parsing the file read from disk for
// histogramming purposes.  DO NOT CHANGE THE ORDERING OF THESE VALUES.
enum StoreReadResult {
  // No errors.
  READ_SUCCESS = 0,

  // Reserved for errors in parsing this enum.
  UNEXPECTED_READ_FAILURE = 1,

  // The contents of the file could not be read.
  FILE_UNREADABLE_FAILURE = 2,

  // The file was found to be empty.
  FILE_EMPTY_FAILURE = 3,

  // The contents of the file could not be interpreted as a valid
  // V4StoreFileFormat proto.
  PROTO_PARSING_FAILURE = 4,

  // The magic number didn't match. We're most likely trying to read a file
  // that doesn't contain hash prefixes.
  UNEXPECTED_MAGIC_NUMBER_FAILURE = 5,

  // The version of the file is different from expected and Chromium doesn't
  // know how to interpret this version of the file.
  FILE_VERSION_INCOMPATIBLE_FAILURE = 6,

  // The rest of the file could not be parsed as a ListUpdateResponse protobuf.
  // This can happen if the machine crashed before the file was fully written to
  // disk or if there was disk corruption.
  HASH_PREFIX_INFO_MISSING_FAILURE = 7,

  // Unable to generate the hash prefix map from the updates on disk.
  HASH_PREFIX_MAP_GENERATION_FAILURE = 8,

  // Memory space for histograms is determined by the max.  ALWAYS
  // ADD NEW VALUES BEFORE THIS ONE.
  STORE_READ_RESULT_MAX
};

// Enumerate different failure events while writing the file to disk after
// applying updates for histogramming purposes.
// DO NOT CHANGE THE ORDERING OF THESE VALUES.
enum StoreWriteResult {
  // No errors.
  WRITE_SUCCESS = 0,

  // Reserved for errors in parsing this enum.
  UNEXPECTED_WRITE_FAILURE = 1,

  // The proto being written to disk wasn't a FULL_UPDATE proto.
  INVALID_RESPONSE_TYPE_FAILURE = 2,

  // Number of bytes written to disk was different from the size of the proto.
  UNEXPECTED_BYTES_WRITTEN_FAILURE = 3,

  // Renaming the temporary file to store file failed.
  UNABLE_TO_RENAME_FAILURE = 4,

  // Memory space for histograms is determined by the max.  ALWAYS
  // ADD NEW VALUES BEFORE THIS ONE.
  STORE_WRITE_RESULT_MAX
};

// Enumerate different events while applying the update fetched fom the server
// for histogramming purposes.
// DO NOT CHANGE THE ORDERING OF THESE VALUES.
enum ApplyUpdateResult {
  // No errors.
  APPLY_UPDATE_SUCCESS = 0,

  // Reserved for errors in parsing this enum.
  UNEXPECTED_APPLY_UPDATE_FAILURE = 1,

  // Prefix size smaller than 4 (which is the lowest expected).
  PREFIX_SIZE_TOO_SMALL_FAILURE = 2,

  // Prefix size larger than 32 (length of a full SHA256 hash).
  PREFIX_SIZE_TOO_LARGE_FAILURE = 3,

  // The number of bytes in additions isn't a multiple of prefix size.
  ADDITIONS_SIZE_UNEXPECTED_FAILURE = 4,

  // The update received from the server contains a prefix that's already
  // present in the map.
  ADDITIONS_HAS_EXISTING_PREFIX_FAILURE = 5,

  // The server sent a response_type that the client did not expect.
  UNEXPECTED_RESPONSE_TYPE_FAILURE = 6,

  // One of more index(es) in removals field of the response is greater than
  // the number of hash prefixes currently in the (old) store.
  REMOVALS_INDEX_TOO_LARGE_FAILURE = 7,

  // Failed to decode the Rice-encoded additions/removals field.
  RICE_DECODING_FAILURE = 8,

  // Compression type other than RAW and RICE for additions.
  UNEXPECTED_COMPRESSION_TYPE_ADDITIONS_FAILURE = 9,

  // Compression type other than RAW and RICE for removals.
  UNEXPECTED_COMPRESSION_TYPE_REMOVALS_FAILURE = 10,

  // The state of the store did not match the expected checksum sent by the
  // server.
  CHECKSUM_MISMATCH_FAILURE = 11,

  // Memory space for histograms is determined by the max.  ALWAYS
  // ADD NEW VALUES BEFORE THIS ONE.
  APPLY_UPDATE_RESULT_MAX
};

// Factory for creating V4Store. Tests implement this factory to create fake
// stores for testing.
class V4StoreFactory {
 public:
  virtual ~V4StoreFactory() {}
  virtual V4Store* CreateV4Store(
      const scoped_refptr<base::SequencedTaskRunner>& task_runner,
      const base::FilePath& store_path);
};

class V4Store {
 public:
  // The |task_runner| is used to ensure that the operations in this file are
  // performed on the correct thread. |store_path| specifies the location on
  // disk for this file. The constructor doesn't read the store file from disk.
  // If the store is being created to apply an update to the old store, then
  // |old_file_size| is the size of the existing file on disk for this store;
  // 0 otherwise. This is needed so that we can correctly report the size of
  // store file on disk, even if writing the new file fails after successfully
  // applying an update.
  V4Store(const scoped_refptr<base::SequencedTaskRunner>& task_runner,
          const base::FilePath& store_path,
          const int64_t old_file_size = 0);
  virtual ~V4Store();

  const std::string& state() const { return state_; }

  const base::FilePath& store_path() const { return store_path_; }

  void ApplyUpdate(std::unique_ptr<ListUpdateResponse> response,
                   const scoped_refptr<base::SingleThreadTaskRunner>& runner,
                   UpdatedStoreReadyCallback callback);

  // Records (in kilobytes) and returns the size of the file on disk for this
  // store using |base_metric| as prefix and the filename as suffix.
  int64_t RecordAndReturnFileSize(const std::string& base_metric);

  // If a hash prefix in this store matches |full_hash|, returns that hash
  // prefix; otherwise returns an empty hash prefix.
  virtual HashPrefix GetMatchingHashPrefix(const FullHash& full_hash);

  std::string DebugString() const;

  // Schedules the destruction of the V4Store object pointed to by |v4_store|,
  // on the task runner.
  static void Destroy(std::unique_ptr<V4Store> v4_store);

  // Reads the store file from disk and populates the in-memory representation
  // of the hash prefixes.
  void Initialize();

  // True if this store has valid contents, either from a successful read
  // from disk or a full update.  This does not mean the checksum was verified.
  virtual bool HasValidData() const;

  // Reset internal state.
  void Reset();

  // Scheduled after reading the store file from disk on startup. When run, it
  // ensures that the checksum of the hash prefixes in lexicographical sorted
  // order matches the expected value in |expected_checksum_|. Returns true if
  // it matches; false otherwise. Checksum verification can take a long time,
  // so it is performed outside of the hotpath of loading SafeBrowsing database,
  // which blocks resource loads.
  bool VerifyChecksum();

 private:
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestReadFromEmptyFile);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestReadFromAbsentFile);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestReadFromInvalidContentsFile);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestReadFromUnexpectedMagicFile);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestReadFromLowVersionFile);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestReadFromNoHashPrefixInfoFile);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestReadFromNoHashPrefixesFile);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestWriteNoResponseType);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestWritePartialResponseType);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestWriteFullResponseType);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestReadFromFileWithUnknownProto);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestAddUnlumpedHashesWithInvalidAddition);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestAddUnlumpedHashes);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestAddUnlumpedHashesWithEmptyString);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestGetNextSmallestUnmergedPrefixWithEmptyPrefixMap);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestGetNextSmallestUnmergedPrefix);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestMergeUpdatesWithSameSizesInEachMap);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestMergeUpdatesWithDifferentSizesInEachMap);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestMergeUpdatesOldMapRunsOutFirst);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestMergeUpdatesAdditionsMapRunsOutFirst);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestMergeUpdatesFailsForRepeatedHashPrefix);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestMergeUpdatesFailsWhenRemovalsIndexTooLarge);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestMergeUpdatesRemovesOnlyElement);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestMergeUpdatesRemovesFirstElement);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestMergeUpdatesRemovesMiddleElement);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestMergeUpdatesRemovesLastElement);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestMergeUpdatesRemovesWhenOldHasDifferentSizes);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestMergeUpdatesRemovesMultipleAcrossDifferentSizes);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestReadFullResponseWithValidHashPrefixMap);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestReadFullResponseWithInvalidHashPrefixMap);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestHashPrefixExistsAtTheBeginning);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestHashPrefixExistsInTheMiddle);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestHashPrefixExistsAtTheEnd);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestHashPrefixExistsAtTheBeginningOfEven);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestHashPrefixExistsAtTheEndOfEven);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestHashPrefixDoesNotExistInConcatenatedList);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestFullHashExistsInMapWithSingleSize);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestFullHashExistsInMapWithDifferentSizes);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestHashPrefixExistsInMapWithSingleSize);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestHashPrefixExistsInMapWithDifferentSizes);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestHashPrefixDoesNotExistInMapWithDifferentSizes);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, GetMatchingHashPrefixSize32Or21);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest,
                           TestAdditionsWithRiceEncodingFailsWithInvalidInput);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestAdditionsWithRiceEncodingSucceeds);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestRemovalsWithRiceEncodingSucceeds);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestMergeUpdatesFailsChecksum);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, TestChecksumErrorOnStartup);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, WriteToDiskFails);
  FRIEND_TEST_ALL_PREFIXES(V4StoreTest, FullUpdateFailsChecksumSynchronously);
  friend class V4StoreTest;

  // If |prefix_size| is within expected range, and |raw_hashes_length| is a
  // multiple of prefix_size, then it sets the string of length
  // |raw_hashes_length| starting at |raw_hashes_begin| as the value at key
  // |prefix_size| in |additions_map|
  static ApplyUpdateResult AddUnlumpedHashes(PrefixSize prefix_size,
                                             const char* raw_hashes_begin,
                                             const size_t raw_hashes_length,
                                             HashPrefixMap* additions_map);

  // An overloaded version of AddUnlumpedHashes that allows passing in a
  // std::string object.
  static ApplyUpdateResult AddUnlumpedHashes(PrefixSize prefix_size,
                                             const std::string& raw_hashes,
                                             HashPrefixMap* additions_map);

  // Get the next unmerged hash prefix in dictionary order from
  // |hash_prefix_map|. |iterator_map| is used to determine which hash prefixes
  // have been merged already. Returns true if there are any unmerged hash
  // prefixes in the list.
  static bool GetNextSmallestUnmergedPrefix(
      const HashPrefixMap& hash_prefix_map,
      const IteratorMap& iterator_map,
      HashPrefix* smallest_hash_prefix);

  // Returns true if |hash_prefix| exists between |begin| and |end| iterators.
  static bool HashPrefixMatches(const HashPrefix& hash_prefix,
                                const HashPrefixes::const_iterator& begin,
                                const HashPrefixes::const_iterator& end);

  // For each key in |hash_prefix_map|, sets the iterator at that key
  // |iterator_map| to hash_prefix_map[key].begin().
  static void InitializeIteratorMap(const HashPrefixMap& hash_prefix_map,
                                    IteratorMap* iterator_map);

  // Reserve the appropriate string size so that the string size of the merged
  // list is exact. This ignores the space that would otherwise be released by
  // deletions specified in the update because it is non-trivial to calculate
  // those deletions upfront. This isn't so bad since deletions are supposed to
  // be small and infrequent.
  static void ReserveSpaceInPrefixMap(const HashPrefixMap& other_prefixes_map,
                                      HashPrefixMap* prefix_map_to_update);

  // Merges the prefix map from the old store (|old_hash_prefix_map|) and the
  // update (additions_map) to populate the prefix map for the current store.
  // The indices in the |raw_removals| list, which may be NULL, are not merged.
  // The SHA256 checksum of the final list of hash prefixes, in
  // lexicographically sorted order, must match |expected_checksum| (if it's not
  // empty).
  ApplyUpdateResult MergeUpdate(
      const HashPrefixMap& old_hash_prefix_map,
      const HashPrefixMap& additions_map,
      const ::google::protobuf::RepeatedField<::google::protobuf::int32>*
          raw_removals,
      const std::string& expected_checksum);

  // Processes the FULL_UPDATE |response| from the server, and writes the
  // merged V4Store to disk. If processing the |response| succeeds, it returns
  // APPLY_UPDATE_SUCCESS. The UMA metrics for all interesting sub-operations
  // use the prefix |metric|.
  // This method is only called when we receive a FULL_UPDATE from the server.
  ApplyUpdateResult ProcessFullUpdateAndWriteToDisk(
      const std::string& metric,
      std::unique_ptr<ListUpdateResponse> response);

  // Processes a FULL_UPDATE |response| and updates the V4Store. If processing
  // the |response| succeeds, it returns APPLY_UPDATE_SUCCESS.
  // This method is called when we receive a FULL_UPDATE from the server, and
  // when we read a store file from disk on startup. The UMA metrics for all
  // interesting sub-operations use the prefix |metric|. Delays the checksum
  // check if |delay_checksum_check| is true.
  ApplyUpdateResult ProcessFullUpdate(
      const std::string& metric,
      const std::unique_ptr<ListUpdateResponse>& response,
      bool delay_checksum_check);

  // Merges the hash prefixes in |hash_prefix_map_old| and |response|, updates
  // the |hash_prefix_map_| and |state_| in the V4Store, and writes the merged
  // store to disk. If processing succeeds, it returns APPLY_UPDATE_SUCCESS.
  // This method is only called when we receive a PARTIAL_UPDATE from the
  // server. The UMA metrics for all interesting sub-operations use the prefix
  // |metric|.
  ApplyUpdateResult ProcessPartialUpdateAndWriteToDisk(
      const std::string& metric,
      const HashPrefixMap& hash_prefix_map_old,
      std::unique_ptr<ListUpdateResponse> response);

  // Merges the hash prefixes in |hash_prefix_map_old| and |response|, and
  // updates the |hash_prefix_map_| and |state_| in the V4Store. If processing
  // succeeds, it returns APPLY_UPDATE_SUCCESS. The UMA metrics for all
  // interesting sub-operations use the prefix |metric|. Delays the checksum
  // check if |delay_checksum_check| is true.
  ApplyUpdateResult ProcessUpdate(
      const std::string& metric,
      const HashPrefixMap& hash_prefix_map_old,
      const std::unique_ptr<ListUpdateResponse>& response,
      bool delay_checksum_check);

  // Reads the state of the store from the file on disk and returns the reason
  // for the failure or reports success.
  StoreReadResult ReadFromDisk();

  // Updates the |additions_map| with the additions received in the partial
  // update from the server. The UMA metrics for all interesting sub-operations
  // use the prefix |metric|.
  ApplyUpdateResult UpdateHashPrefixMapFromAdditions(
      const std::string& metric,
      const ::google::protobuf::RepeatedPtrField<ThreatEntrySet>& additions,
      HashPrefixMap* additions_map);

  // Writes the hash_prefix_map_ to disk as a V4StoreFileFormat proto.
  // |checksum| is used to set the |checksum| field in the final proto.
  StoreWriteResult WriteToDisk(const Checksum& checksum);

 protected:
  HashPrefixMap hash_prefix_map_;

 private:
  // The checksum value as read from the disk, until it is verified. Once
  // verified, it is cleared.
  std::string expected_checksum_;

  // The size of the file on disk for this store.
  int64_t file_size_;

  // True if the file was successfully read+parsed or was populated from
  // a full update.
  bool has_valid_data_;

  // The state of the store as returned by the PVer4 server in the last applied
  // update response.
  std::string state_;
  const base::FilePath store_path_;
  const scoped_refptr<base::SequencedTaskRunner> task_runner_;
};

std::ostream& operator<<(std::ostream& os, const V4Store& store);

}  // namespace safe_browsing

#endif  // COMPONENTS_SAFE_BROWSING_DB_V4_STORE_H_