summaryrefslogtreecommitdiff
path: root/chromium/content/browser/download/base_file.cc
blob: 7c05f8deac393f8141a7006a039a758341635663 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "content/browser/download/base_file.h"

#include <utility>

#include "base/bind.h"
#include "base/files/file.h"
#include "base/files/file_util.h"
#include "base/format_macros.h"
#include "base/logging.h"
#include "base/pickle.h"
#include "base/strings/stringprintf.h"
#include "base/threading/thread_restrictions.h"
#include "build/build_config.h"
#include "content/browser/download/download_interrupt_reasons_impl.h"
#include "content/browser/download/download_net_log_parameters.h"
#include "content/browser/download/download_stats.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/content_browser_client.h"
#include "content/public/common/quarantine.h"
#include "crypto/secure_hash.h"
#include "net/base/net_errors.h"
#include "net/log/net_log.h"
#include "net/log/net_log_event_type.h"

namespace content {

BaseFile::BaseFile(const net::NetLogWithSource& net_log) : net_log_(net_log) {}

BaseFile::~BaseFile() {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  if (detached_)
    Close();
  else
    Cancel();  // Will delete the file.
}

DownloadInterruptReason BaseFile::Initialize(
    const base::FilePath& full_path,
    const base::FilePath& default_directory,
    base::File file,
    int64_t bytes_so_far,
    const std::string& hash_so_far,
    std::unique_ptr<crypto::SecureHash> hash_state,
    bool is_sparse_file) {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  DCHECK(!detached_);

  if (full_path.empty()) {
    base::FilePath initial_directory(default_directory);
    base::FilePath temp_file;
    if (initial_directory.empty()) {
      initial_directory =
          GetContentClient()->browser()->GetDefaultDownloadDirectory();
    }
    // |initial_directory| can still be empty if ContentBrowserClient returned
    // an empty path for the downloads directory.
    if ((initial_directory.empty() ||
         !base::CreateTemporaryFileInDir(initial_directory, &temp_file)) &&
        !base::CreateTemporaryFile(&temp_file)) {
      return LogInterruptReason("Unable to create", 0,
                                DOWNLOAD_INTERRUPT_REASON_FILE_FAILED);
    }
    full_path_ = temp_file;
  } else {
    full_path_ = full_path;
  }

  bytes_so_far_ = bytes_so_far;
  secure_hash_ = std::move(hash_state);
  is_sparse_file_ = is_sparse_file;
  DCHECK(!is_sparse_file_ || !secure_hash_);
  file_ = std::move(file);

  return Open(hash_so_far);
}

DownloadInterruptReason BaseFile::AppendDataToFile(const char* data,
                                                   size_t data_len) {
  DCHECK(!is_sparse_file_);
  return WriteDataToFile(bytes_so_far_, data, data_len);
}

DownloadInterruptReason BaseFile::WriteDataToFile(int64_t offset,
                                                  const char* data,
                                                  size_t data_len) {
  // NOTE(benwells): The above DCHECK won't be present in release builds,
  // so we log any occurences to see how common this error is in the wild.
  if (detached_)
    RecordDownloadCount(APPEND_TO_DETACHED_FILE_COUNT);

  if (!file_.IsValid())
    return LogInterruptReason("No file stream on append", 0,
                              DOWNLOAD_INTERRUPT_REASON_FILE_FAILED);

  // TODO(phajdan.jr): get rid of this check.
  if (data_len == 0)
    return DOWNLOAD_INTERRUPT_REASON_NONE;

  net_log_.BeginEvent(net::NetLogEventType::DOWNLOAD_FILE_WRITTEN);
  int write_result = file_.Write(offset, data, data_len);
  DCHECK_NE(0, write_result);

  // Report errors on file writes.
  if (write_result < 0)
    return LogSystemError("Write", logging::GetLastSystemErrorCode());

  DCHECK_EQ(static_cast<size_t>(write_result), data_len);

  if (bytes_so_far_ != offset) {
    // A hole is created in the file.
    is_sparse_file_ = true;
    secure_hash_.reset();
  }

  bytes_so_far_ += data_len;
  net_log_.EndEvent(net::NetLogEventType::DOWNLOAD_FILE_WRITTEN,
                    net::NetLog::Int64Callback("bytes", data_len));

  if (secure_hash_)
    secure_hash_->Update(data, data_len);

  return DOWNLOAD_INTERRUPT_REASON_NONE;
}

DownloadInterruptReason BaseFile::Rename(const base::FilePath& new_path) {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  DownloadInterruptReason rename_result = DOWNLOAD_INTERRUPT_REASON_NONE;

  // If the new path is same as the old one, there is no need to perform the
  // following renaming logic.
  if (new_path == full_path_)
    return DOWNLOAD_INTERRUPT_REASON_NONE;

  // Save the information whether the download is in progress because
  // it will be overwritten by closing the file.
  bool was_in_progress = in_progress();

  Close();

  net_log_.BeginEvent(
      net::NetLogEventType::DOWNLOAD_FILE_RENAMED,
      base::Bind(&FileRenamedNetLogCallback, &full_path_, &new_path));

  base::CreateDirectory(new_path.DirName());

  // A simple rename wouldn't work here since we want the file to have
  // permissions / security descriptors that makes sense in the new directory.
  rename_result = MoveFileAndAdjustPermissions(new_path);

  net_log_.EndEvent(net::NetLogEventType::DOWNLOAD_FILE_RENAMED);

  if (rename_result == DOWNLOAD_INTERRUPT_REASON_NONE)
    full_path_ = new_path;

  // Re-open the file if we were still using it regardless of the interrupt
  // reason.
  DownloadInterruptReason open_result = DOWNLOAD_INTERRUPT_REASON_NONE;
  if (was_in_progress)
    open_result = Open(std::string());

  return rename_result == DOWNLOAD_INTERRUPT_REASON_NONE ? open_result
                                                         : rename_result;
}

void BaseFile::Detach() {
  detached_ = true;
  net_log_.AddEvent(net::NetLogEventType::DOWNLOAD_FILE_DETACHED);
}

void BaseFile::Cancel() {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  DCHECK(!detached_);

  net_log_.AddEvent(net::NetLogEventType::CANCELLED);

  Close();

  if (!full_path_.empty()) {
    net_log_.AddEvent(net::NetLogEventType::DOWNLOAD_FILE_DELETED);
    base::DeleteFile(full_path_, false);
  }

  Detach();
}

std::unique_ptr<crypto::SecureHash> BaseFile::Finish() {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);

  // TODO(qinmin): verify that all the holes have been filled.
  if (is_sparse_file_)
    CalculatePartialHash(std::string());
  Close();
  return std::move(secure_hash_);
}

std::string BaseFile::DebugString() const {
  return base::StringPrintf(
      "{ "
      " full_path_ = \"%" PRFilePath
      "\""
      " bytes_so_far_ = %" PRId64 " detached_ = %c }",
      full_path_.value().c_str(),
      bytes_so_far_,
      detached_ ? 'T' : 'F');
}

DownloadInterruptReason BaseFile::CalculatePartialHash(
    const std::string& hash_to_expect) {
  secure_hash_ = crypto::SecureHash::Create(crypto::SecureHash::SHA256);

  if (bytes_so_far_ == 0)
    return DOWNLOAD_INTERRUPT_REASON_NONE;

  if (file_.Seek(base::File::FROM_BEGIN, 0) != 0)
    return LogSystemError("Seek partial file",
                          logging::GetLastSystemErrorCode());

  const size_t kMinBufferSize = secure_hash_->GetHashLength();
  const size_t kMaxBufferSize = 1024 * 512;
  static_assert(kMaxBufferSize <= std::numeric_limits<int>::max(),
                "kMaxBufferSize must fit on an int");

  // The size of the buffer is:
  // - at least kMinBufferSize so that we can use it to hold the hash as well.
  // - at most kMaxBufferSize so that there's a reasonable bound.
  // - not larger than |bytes_so_far_| unless bytes_so_far_ is less than the
  //   hash size.
  std::vector<char> buffer(std::max<int64_t>(
      kMinBufferSize, std::min<int64_t>(kMaxBufferSize, bytes_so_far_)));

  int64_t current_position = 0;
  while (current_position < bytes_so_far_) {
    // While std::min needs to work with int64_t, the result is always at most
    // kMaxBufferSize, which fits on an int.
    int bytes_to_read =
        std::min<int64_t>(buffer.size(), bytes_so_far_ - current_position);
    int length = file_.ReadAtCurrentPos(&buffer.front(), bytes_to_read);
    if (length == -1) {
      return LogInterruptReason("Reading partial file",
                                logging::GetLastSystemErrorCode(),
                                DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT);
    }

    if (length == 0)
      break;

    secure_hash_->Update(&buffer.front(), length);
    current_position += length;
  }

  if (current_position != bytes_so_far_) {
    return LogInterruptReason(
        "Verifying prefix hash", 0, DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT);
  }

  if (!hash_to_expect.empty()) {
    DCHECK_EQ(secure_hash_->GetHashLength(), hash_to_expect.size());
    DCHECK(buffer.size() >= secure_hash_->GetHashLength());
    std::unique_ptr<crypto::SecureHash> partial_hash(secure_hash_->Clone());
    partial_hash->Finish(&buffer.front(), buffer.size());

    if (memcmp(&buffer.front(),
               hash_to_expect.c_str(),
               partial_hash->GetHashLength())) {
      return LogInterruptReason("Verifying prefix hash",
                                0,
                                DOWNLOAD_INTERRUPT_REASON_FILE_HASH_MISMATCH);
    }
  }

  return DOWNLOAD_INTERRUPT_REASON_NONE;
}

DownloadInterruptReason BaseFile::Open(const std::string& hash_so_far) {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  DCHECK(!detached_);
  DCHECK(!full_path_.empty());

  // Create a new file if it is not provided.
  if (!file_.IsValid()) {
    file_.Initialize(full_path_,
                     base::File::FLAG_OPEN_ALWAYS | base::File::FLAG_WRITE |
                         base::File::FLAG_READ);
    if (!file_.IsValid()) {
      return LogNetError("Open/Initialize File",
                         net::FileErrorToNetError(file_.error_details()));
    }
  }

  net_log_.BeginEvent(
      net::NetLogEventType::DOWNLOAD_FILE_OPENED,
      base::Bind(&FileOpenedNetLogCallback, &full_path_, bytes_so_far_));

  // For sparse file, skip hash validation.
  if (is_sparse_file_) {
    if (file_.GetLength() < bytes_so_far_) {
      ClearFile();
      return LogInterruptReason("File has fewer written bytes than expected", 0,
                                DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT);
    }
    return DOWNLOAD_INTERRUPT_REASON_NONE;
  }

  if (!secure_hash_) {
    DownloadInterruptReason reason = CalculatePartialHash(hash_so_far);
    if (reason != DOWNLOAD_INTERRUPT_REASON_NONE) {
      ClearFile();
      return reason;
    }
  }

  int64_t file_size = file_.Seek(base::File::FROM_END, 0);
  if (file_size < 0) {
    logging::SystemErrorCode error = logging::GetLastSystemErrorCode();
    ClearFile();
    return LogSystemError("Seeking to end", error);
  } else if (file_size > bytes_so_far_) {
    // The file is larger than we expected.
    // This is OK, as long as we don't use the extra.
    // Truncate the file.
    if (!file_.SetLength(bytes_so_far_) ||
        file_.Seek(base::File::FROM_BEGIN, bytes_so_far_) != bytes_so_far_) {
      logging::SystemErrorCode error = logging::GetLastSystemErrorCode();
      ClearFile();
      return LogSystemError("Truncating to last known offset", error);
    }
  } else if (file_size < bytes_so_far_) {
    // The file is shorter than we expected.  Our hashes won't be valid.
    ClearFile();
    return LogInterruptReason("Unable to seek to last written point", 0,
                              DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT);
  }

  return DOWNLOAD_INTERRUPT_REASON_NONE;
}

void BaseFile::Close() {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);

  if (file_.IsValid()) {
    // Currently we don't really care about the return value, since if it fails
    // theres not much we can do.  But we might in the future.
    file_.Flush();
    ClearFile();
  }
}

void BaseFile::ClearFile() {
  // This should only be called when we have a stream.
  DCHECK(file_.IsValid());
  file_.Close();
  net_log_.EndEvent(net::NetLogEventType::DOWNLOAD_FILE_OPENED);
}

DownloadInterruptReason BaseFile::LogNetError(
    const char* operation,
    net::Error error) {
  net_log_.AddEvent(net::NetLogEventType::DOWNLOAD_FILE_ERROR,
                    base::Bind(&FileErrorNetLogCallback, operation, error));
  return ConvertNetErrorToInterruptReason(error, DOWNLOAD_INTERRUPT_FROM_DISK);
}

DownloadInterruptReason BaseFile::LogSystemError(
    const char* operation,
    logging::SystemErrorCode os_error) {
  // There's no direct conversion from a system error to an interrupt reason.
  base::File::Error file_error = base::File::OSErrorToFileError(os_error);
  return LogInterruptReason(
      operation, os_error,
      ConvertFileErrorToInterruptReason(file_error));
}

DownloadInterruptReason BaseFile::LogInterruptReason(
    const char* operation,
    int os_error,
    DownloadInterruptReason reason) {
  DVLOG(1) << __func__ << "() operation:" << operation
           << " os_error:" << os_error
           << " reason:" << DownloadInterruptReasonToString(reason);
  net_log_.AddEvent(
      net::NetLogEventType::DOWNLOAD_FILE_ERROR,
      base::Bind(&FileInterruptedNetLogCallback, operation, os_error, reason));
  return reason;
}

#if defined(OS_WIN) || defined(OS_MACOSX) || defined(OS_LINUX)

namespace {

// Given a source and a referrer, determines the "safest" URL that can be used
// to determine the authority of the download source. Returns an empty URL if no
// HTTP/S URL can be determined for the <|source_url|, |referrer_url|> pair.
GURL GetEffectiveAuthorityURL(const GURL& source_url,
                              const GURL& referrer_url) {
  if (source_url.is_valid()) {
    // http{,s} has an authority and are supported.
    if (source_url.SchemeIsHTTPOrHTTPS())
      return source_url;

    // If the download source is file:// ideally we should copy the MOTW from
    // the original file, but given that Chrome/Chromium places strict
    // restrictions on which schemes can reference file:// URLs, this code is
    // going to assume that at this point it's okay to treat this download as
    // being from the local system.
    if (source_url.SchemeIsFile())
      return source_url;

    // ftp:// has an authority.
    if (source_url.SchemeIs(url::kFtpScheme))
      return source_url;
  }

  if (referrer_url.is_valid() && referrer_url.SchemeIsHTTPOrHTTPS())
    return referrer_url;

  return GURL();
}

}  // namespace

DownloadInterruptReason BaseFile::AnnotateWithSourceInformation(
    const std::string& client_guid,
    const GURL& source_url,
    const GURL& referrer_url) {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  DCHECK(!detached_);
  DCHECK(!full_path_.empty());

  net_log_.BeginEvent(net::NetLogEventType::DOWNLOAD_FILE_ANNOTATED);
  QuarantineFileResult result = QuarantineFile(
      full_path_, GetEffectiveAuthorityURL(source_url, referrer_url),
      referrer_url, client_guid);
  net_log_.EndEvent(net::NetLogEventType::DOWNLOAD_FILE_ANNOTATED);
  switch (result) {
    case QuarantineFileResult::OK:
      return DOWNLOAD_INTERRUPT_REASON_NONE;
    case QuarantineFileResult::VIRUS_INFECTED:
      return DOWNLOAD_INTERRUPT_REASON_FILE_VIRUS_INFECTED;
    case QuarantineFileResult::SECURITY_CHECK_FAILED:
      return DOWNLOAD_INTERRUPT_REASON_FILE_SECURITY_CHECK_FAILED;
    case QuarantineFileResult::BLOCKED_BY_POLICY:
      return DOWNLOAD_INTERRUPT_REASON_FILE_BLOCKED;
    case QuarantineFileResult::ACCESS_DENIED:
      return DOWNLOAD_INTERRUPT_REASON_FILE_ACCESS_DENIED;

    case QuarantineFileResult::FILE_MISSING:
      // Don't have a good interrupt reason here. This return code means that
      // the file at |full_path_| went missing before QuarantineFile got to look
      // at it. Not expected to happen, but we've seen instances where a file
      // goes missing immediately after BaseFile closes the handle.
      //
      // Intentionally using a different error message than
      // SECURITY_CHECK_FAILED in order to distinguish the two.
      return DOWNLOAD_INTERRUPT_REASON_FILE_FAILED;

    case QuarantineFileResult::ANNOTATION_FAILED:
      // This means that the mark-of-the-web couldn't be applied. The file is
      // already on the file system under its final target name.
      //
      // Causes of failed annotations typically aren't transient. E.g. the
      // target file system may not support extended attributes or alternate
      // streams. We are going to allow these downloads to progress on the
      // assumption that failures to apply MOTW can't reliably be introduced
      // remotely.
      return DOWNLOAD_INTERRUPT_REASON_NONE;
  }
  return DOWNLOAD_INTERRUPT_REASON_FILE_FAILED;
}
#else  // !OS_WIN && !OS_MACOSX && !OS_LINUX
DownloadInterruptReason BaseFile::AnnotateWithSourceInformation(
    const std::string& client_guid,
    const GURL& source_url,
    const GURL& referrer_url) {
  return DOWNLOAD_INTERRUPT_REASON_NONE;
}
#endif

}  // namespace content