summaryrefslogtreecommitdiff
path: root/chromium/components/download/internal/common/base_file.cc
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/components/download/internal/common/base_file.cc')
-rw-r--r--chromium/components/download/internal/common/base_file.cc528
1 files changed, 528 insertions, 0 deletions
diff --git a/chromium/components/download/internal/common/base_file.cc b/chromium/components/download/internal/common/base_file.cc
new file mode 100644
index 00000000000..fce4f343809
--- /dev/null
+++ b/chromium/components/download/internal/common/base_file.cc
@@ -0,0 +1,528 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/download/public/common/base_file.h"
+
+#include <memory>
+#include <utility>
+
+#include "base/bind.h"
+#include "base/files/file.h"
+#include "base/files/file_util.h"
+#include "base/format_macros.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/pickle.h"
+#include "base/strings/stringprintf.h"
+#include "base/threading/thread_restrictions.h"
+#include "base/trace_event/trace_event.h"
+#include "build/build_config.h"
+#include "components/download/public/common/download_interrupt_reasons_utils.h"
+#include "components/download/public/common/download_item.h"
+#include "components/download/public/common/download_stats.h"
+#include "components/download/quarantine/quarantine.h"
+#include "crypto/secure_hash.h"
+
+#define CONDITIONAL_TRACE(trace) \
+ do { \
+ if (download_id_ != DownloadItem::kInvalidId) \
+ TRACE_EVENT_##trace; \
+ } while (0)
+
+namespace download {
+
+namespace {
+class FileErrorData : public base::trace_event::ConvertableToTraceFormat {
+ public:
+ FileErrorData(const char* operation,
+ int os_error,
+ DownloadInterruptReason interrupt_reason)
+ : operation_(operation),
+ os_error_(os_error),
+ interrupt_reason_(interrupt_reason) {}
+
+ ~FileErrorData() override = default;
+
+ void AppendAsTraceFormat(std::string* out) const override {
+ out->append("{");
+ out->append(
+ base::StringPrintf("\"operation\":\"%s\",", operation_.c_str()));
+ out->append(base::StringPrintf("\"os_error\":\"%d\",", os_error_));
+ out->append(base::StringPrintf(
+ "\"interrupt_reason\":\"%s\",",
+ DownloadInterruptReasonToString(interrupt_reason_).c_str()));
+ out->append("}");
+ }
+
+ private:
+ std::string operation_;
+ int os_error_;
+ DownloadInterruptReason interrupt_reason_;
+ DISALLOW_COPY_AND_ASSIGN(FileErrorData);
+};
+} // namespace
+
+BaseFile::BaseFile(uint32_t download_id) : download_id_(download_id) {
+ DETACH_FROM_SEQUENCE(sequence_checker_);
+}
+
+BaseFile::~BaseFile() {
+ DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+ if (detached_)
+ Close();
+ else
+ Cancel(); // Will delete the file.
+}
+
+DownloadInterruptReason BaseFile::Initialize(
+ const base::FilePath& full_path,
+ const base::FilePath& default_directory,
+ base::File file,
+ int64_t bytes_so_far,
+ const std::string& hash_so_far,
+ std::unique_ptr<crypto::SecureHash> hash_state,
+ bool is_sparse_file,
+ int64_t* const bytes_wasted) {
+ DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+ DCHECK(!detached_);
+
+ if (full_path.empty()) {
+ base::FilePath temp_file;
+ if ((default_directory.empty() ||
+ !base::CreateTemporaryFileInDir(default_directory, &temp_file)) &&
+ !base::CreateTemporaryFile(&temp_file)) {
+ return LogInterruptReason("Unable to create", 0,
+ DOWNLOAD_INTERRUPT_REASON_FILE_FAILED);
+ }
+ full_path_ = temp_file;
+ } else {
+ full_path_ = full_path;
+ }
+
+ bytes_so_far_ = bytes_so_far;
+ secure_hash_ = std::move(hash_state);
+ is_sparse_file_ = is_sparse_file;
+ // Sparse file doesn't validate hash.
+ if (is_sparse_file_)
+ secure_hash_.reset();
+ file_ = std::move(file);
+
+ return Open(hash_so_far, bytes_wasted);
+}
+
+DownloadInterruptReason BaseFile::AppendDataToFile(const char* data,
+ size_t data_len) {
+ DCHECK(!is_sparse_file_);
+ return WriteDataToFile(bytes_so_far_, data, data_len);
+}
+
+DownloadInterruptReason BaseFile::WriteDataToFile(int64_t offset,
+ const char* data,
+ size_t data_len) {
+ // NOTE(benwells): The above DCHECK won't be present in release builds,
+ // so we log any occurences to see how common this error is in the wild.
+ if (detached_)
+ RecordDownloadCount(APPEND_TO_DETACHED_FILE_COUNT);
+
+ if (!file_.IsValid())
+ return LogInterruptReason("No file stream on append", 0,
+ DOWNLOAD_INTERRUPT_REASON_FILE_FAILED);
+
+ // TODO(phajdan.jr): get rid of this check.
+ if (data_len == 0)
+ return DOWNLOAD_INTERRUPT_REASON_NONE;
+
+ // Use nestable async event instead of sync event so that all the writes
+ // belong to the same download will be grouped together.
+ CONDITIONAL_TRACE(
+ NESTABLE_ASYNC_BEGIN0("download", "DownloadFileWrite", download_id_));
+ int write_result = file_.Write(offset, data, data_len);
+ DCHECK_NE(0, write_result);
+
+ // Report errors on file writes.
+ if (write_result < 0)
+ return LogSystemError("Write", logging::GetLastSystemErrorCode());
+
+ DCHECK_EQ(static_cast<size_t>(write_result), data_len);
+
+ if (bytes_so_far_ != offset) {
+ // A hole is created in the file.
+ is_sparse_file_ = true;
+ secure_hash_.reset();
+ }
+
+ bytes_so_far_ += data_len;
+ CONDITIONAL_TRACE(NESTABLE_ASYNC_END1("download", "DownloadFileWrite",
+ download_id_, "bytes", data_len));
+
+ if (secure_hash_)
+ secure_hash_->Update(data, data_len);
+
+ return DOWNLOAD_INTERRUPT_REASON_NONE;
+}
+
+DownloadInterruptReason BaseFile::Rename(const base::FilePath& new_path) {
+ DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+ DownloadInterruptReason rename_result = DOWNLOAD_INTERRUPT_REASON_NONE;
+
+ // If the new path is same as the old one, there is no need to perform the
+ // following renaming logic.
+ if (new_path == full_path_)
+ return DOWNLOAD_INTERRUPT_REASON_NONE;
+
+ // Save the information whether the download is in progress because
+ // it will be overwritten by closing the file.
+ bool was_in_progress = in_progress();
+
+ Close();
+
+ CONDITIONAL_TRACE(BEGIN2("download", "DownloadFileRename", "old_filename",
+ full_path_.AsUTF8Unsafe(), "new_filename",
+ new_path.AsUTF8Unsafe()));
+
+ base::CreateDirectory(new_path.DirName());
+
+ // A simple rename wouldn't work here since we want the file to have
+ // permissions / security descriptors that makes sense in the new directory.
+ rename_result = MoveFileAndAdjustPermissions(new_path);
+
+ CONDITIONAL_TRACE(END0("download", "DownloadFileRename"));
+
+ if (rename_result == DOWNLOAD_INTERRUPT_REASON_NONE)
+ full_path_ = new_path;
+
+ // Re-open the file if we were still using it regardless of the interrupt
+ // reason.
+ DownloadInterruptReason open_result = DOWNLOAD_INTERRUPT_REASON_NONE;
+ if (was_in_progress) {
+ int64_t bytes_wasted; // Do not need to use bytes_wasted.
+ open_result = Open(std::string(), &bytes_wasted);
+ }
+
+ return rename_result == DOWNLOAD_INTERRUPT_REASON_NONE ? open_result
+ : rename_result;
+}
+
+void BaseFile::Detach() {
+ detached_ = true;
+ CONDITIONAL_TRACE(
+ INSTANT0("download", "DownloadFileDetached", TRACE_EVENT_SCOPE_THREAD));
+}
+
+void BaseFile::Cancel() {
+ DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+ DCHECK(!detached_);
+
+ CONDITIONAL_TRACE(
+ INSTANT0("download", "DownloadCancelled", TRACE_EVENT_SCOPE_THREAD));
+
+ Close();
+
+ if (!full_path_.empty()) {
+ CONDITIONAL_TRACE(
+ INSTANT0("download", "DownloadFileDeleted", TRACE_EVENT_SCOPE_THREAD));
+ base::DeleteFile(full_path_, false);
+ }
+
+ Detach();
+}
+
+std::unique_ptr<crypto::SecureHash> BaseFile::Finish() {
+ DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+
+ // TODO(qinmin): verify that all the holes have been filled.
+ if (is_sparse_file_)
+ CalculatePartialHash(std::string());
+ Close();
+ return std::move(secure_hash_);
+}
+
+std::string BaseFile::DebugString() const {
+ return base::StringPrintf(
+ "{ "
+ " full_path_ = \"%" PRFilePath
+ "\""
+ " bytes_so_far_ = %" PRId64 " detached_ = %c }",
+ full_path_.value().c_str(), bytes_so_far_, detached_ ? 'T' : 'F');
+}
+
+DownloadInterruptReason BaseFile::CalculatePartialHash(
+ const std::string& hash_to_expect) {
+ secure_hash_ = crypto::SecureHash::Create(crypto::SecureHash::SHA256);
+
+ if (bytes_so_far_ == 0)
+ return DOWNLOAD_INTERRUPT_REASON_NONE;
+
+ if (file_.Seek(base::File::FROM_BEGIN, 0) != 0)
+ return LogSystemError("Seek partial file",
+ logging::GetLastSystemErrorCode());
+
+ const size_t kMinBufferSize = secure_hash_->GetHashLength();
+ const size_t kMaxBufferSize = 1024 * 512;
+ static_assert(kMaxBufferSize <= std::numeric_limits<int>::max(),
+ "kMaxBufferSize must fit on an int");
+
+ // The size of the buffer is:
+ // - at least kMinBufferSize so that we can use it to hold the hash as well.
+ // - at most kMaxBufferSize so that there's a reasonable bound.
+ // - not larger than |bytes_so_far_| unless bytes_so_far_ is less than the
+ // hash size.
+ std::vector<char> buffer(std::max<int64_t>(
+ kMinBufferSize, std::min<int64_t>(kMaxBufferSize, bytes_so_far_)));
+
+ int64_t current_position = 0;
+ while (current_position < bytes_so_far_) {
+ // While std::min needs to work with int64_t, the result is always at most
+ // kMaxBufferSize, which fits on an int.
+ int bytes_to_read =
+ std::min<int64_t>(buffer.size(), bytes_so_far_ - current_position);
+ int length = file_.ReadAtCurrentPos(&buffer.front(), bytes_to_read);
+ if (length == -1) {
+ return LogInterruptReason("Reading partial file",
+ logging::GetLastSystemErrorCode(),
+ DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT);
+ }
+
+ if (length == 0)
+ break;
+
+ secure_hash_->Update(&buffer.front(), length);
+ current_position += length;
+ }
+
+ if (current_position != bytes_so_far_) {
+ return LogInterruptReason("Verifying prefix hash", 0,
+ DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT);
+ }
+
+ if (!hash_to_expect.empty()) {
+ DCHECK_EQ(secure_hash_->GetHashLength(), hash_to_expect.size());
+ DCHECK(buffer.size() >= secure_hash_->GetHashLength());
+ std::unique_ptr<crypto::SecureHash> partial_hash(secure_hash_->Clone());
+ partial_hash->Finish(&buffer.front(), buffer.size());
+
+ if (memcmp(&buffer.front(), hash_to_expect.c_str(),
+ partial_hash->GetHashLength())) {
+ return LogInterruptReason("Verifying prefix hash", 0,
+ DOWNLOAD_INTERRUPT_REASON_FILE_HASH_MISMATCH);
+ }
+ }
+
+ return DOWNLOAD_INTERRUPT_REASON_NONE;
+}
+
+DownloadInterruptReason BaseFile::Open(const std::string& hash_so_far,
+ int64_t* const bytes_wasted) {
+ DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+ DCHECK(!detached_);
+ DCHECK(!full_path_.empty());
+
+ // Create a new file if it is not provided.
+ if (!file_.IsValid()) {
+ file_.Initialize(full_path_, base::File::FLAG_OPEN_ALWAYS |
+ base::File::FLAG_WRITE |
+ base::File::FLAG_READ);
+ if (!file_.IsValid()) {
+ return LogNetError("Open/Initialize File",
+ net::FileErrorToNetError(file_.error_details()));
+ }
+ }
+
+ CONDITIONAL_TRACE(NESTABLE_ASYNC_BEGIN2(
+ "download", "DownloadFileOpen", download_id_, "file_name",
+ full_path_.AsUTF8Unsafe(), "bytes_so_far", bytes_so_far_));
+
+ // For sparse file, skip hash validation.
+ if (is_sparse_file_) {
+ if (file_.GetLength() < bytes_so_far_) {
+ *bytes_wasted = bytes_so_far_;
+ ClearFile();
+ return LogInterruptReason("File has fewer written bytes than expected", 0,
+ DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT);
+ }
+ return DOWNLOAD_INTERRUPT_REASON_NONE;
+ }
+
+ if (!secure_hash_) {
+ DownloadInterruptReason reason = CalculatePartialHash(hash_so_far);
+ if (reason != DOWNLOAD_INTERRUPT_REASON_NONE) {
+ *bytes_wasted = file_.GetLength();
+ ClearFile();
+ return reason;
+ }
+ }
+
+ int64_t file_size = file_.Seek(base::File::FROM_END, 0);
+ if (file_size < 0) {
+ logging::SystemErrorCode error = logging::GetLastSystemErrorCode();
+ ClearFile();
+ return LogSystemError("Seeking to end", error);
+ } else if (file_size > bytes_so_far_) {
+ // The file is larger than we expected.
+ // This is OK, as long as we don't use the extra.
+ // Truncate the file.
+ *bytes_wasted = file_size - bytes_so_far_;
+ if (!file_.SetLength(bytes_so_far_) ||
+ file_.Seek(base::File::FROM_BEGIN, bytes_so_far_) != bytes_so_far_) {
+ logging::SystemErrorCode error = logging::GetLastSystemErrorCode();
+ *bytes_wasted = file_size;
+ ClearFile();
+ return LogSystemError("Truncating to last known offset", error);
+ }
+ } else if (file_size < bytes_so_far_) {
+ // The file is shorter than we expected. Our hashes won't be valid.
+ *bytes_wasted = bytes_so_far_;
+ ClearFile();
+ return LogInterruptReason("Unable to seek to last written point", 0,
+ DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT);
+ }
+
+ return DOWNLOAD_INTERRUPT_REASON_NONE;
+}
+
+void BaseFile::Close() {
+ DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+
+ if (file_.IsValid()) {
+ // Currently we don't really care about the return value, since if it fails
+ // theres not much we can do. But we might in the future.
+ file_.Flush();
+ ClearFile();
+ }
+}
+
+void BaseFile::ClearFile() {
+ // This should only be called when we have a stream.
+ DCHECK(file_.IsValid());
+ file_.Close();
+ CONDITIONAL_TRACE(
+ NESTABLE_ASYNC_END0("download", "DownloadFileOpen", download_id_));
+}
+
+DownloadInterruptReason BaseFile::LogNetError(const char* operation,
+ net::Error error) {
+ CONDITIONAL_TRACE(INSTANT2("download", "DownloadFileError",
+ TRACE_EVENT_SCOPE_THREAD, "operation", operation,
+ "net_error", error));
+ return ConvertNetErrorToInterruptReason(error, DOWNLOAD_INTERRUPT_FROM_DISK);
+}
+
+DownloadInterruptReason BaseFile::LogSystemError(
+ const char* operation,
+ logging::SystemErrorCode os_error) {
+ // There's no direct conversion from a system error to an interrupt reason.
+ base::File::Error file_error = base::File::OSErrorToFileError(os_error);
+ return LogInterruptReason(operation, os_error,
+ ConvertFileErrorToInterruptReason(file_error));
+}
+
+DownloadInterruptReason BaseFile::LogInterruptReason(
+ const char* operation,
+ int os_error,
+ DownloadInterruptReason reason) {
+ DVLOG(1) << __func__ << "() operation:" << operation
+ << " os_error:" << os_error
+ << " reason:" << DownloadInterruptReasonToString(reason);
+ auto error_data =
+ std::make_unique<FileErrorData>(operation, os_error, reason);
+ CONDITIONAL_TRACE(INSTANT1("download", "DownloadFileError",
+ TRACE_EVENT_SCOPE_THREAD, "file_error",
+ std::move(error_data)));
+ return reason;
+}
+
+#if defined(OS_WIN) || defined(OS_MACOSX) || defined(OS_LINUX)
+
+namespace {
+
+// Given a source and a referrer, determines the "safest" URL that can be used
+// to determine the authority of the download source. Returns an empty URL if no
+// HTTP/S URL can be determined for the <|source_url|, |referrer_url|> pair.
+GURL GetEffectiveAuthorityURL(const GURL& source_url,
+ const GURL& referrer_url) {
+ if (source_url.is_valid()) {
+ // http{,s} has an authority and are supported.
+ if (source_url.SchemeIsHTTPOrHTTPS())
+ return source_url;
+
+ // If the download source is file:// ideally we should copy the MOTW from
+ // the original file, but given that Chrome/Chromium places strict
+ // restrictions on which schemes can reference file:// URLs, this code is
+ // going to assume that at this point it's okay to treat this download as
+ // being from the local system.
+ if (source_url.SchemeIsFile())
+ return source_url;
+
+ // ftp:// has an authority.
+ if (source_url.SchemeIs(url::kFtpScheme))
+ return source_url;
+ }
+
+ if (referrer_url.is_valid() && referrer_url.SchemeIsHTTPOrHTTPS())
+ return referrer_url;
+
+ return GURL();
+}
+
+} // namespace
+
+DownloadInterruptReason BaseFile::AnnotateWithSourceInformation(
+ const std::string& client_guid,
+ const GURL& source_url,
+ const GURL& referrer_url) {
+ DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+ DCHECK(!detached_);
+ DCHECK(!full_path_.empty());
+
+ CONDITIONAL_TRACE(BEGIN0("download", "DownloadFileAnnotate"));
+ QuarantineFileResult result = QuarantineFile(
+ full_path_, GetEffectiveAuthorityURL(source_url, referrer_url),
+ referrer_url, client_guid);
+ CONDITIONAL_TRACE(END0("download", "DownloadFileAnnotate"));
+
+ switch (result) {
+ case QuarantineFileResult::OK:
+ return DOWNLOAD_INTERRUPT_REASON_NONE;
+ case QuarantineFileResult::VIRUS_INFECTED:
+ return DOWNLOAD_INTERRUPT_REASON_FILE_VIRUS_INFECTED;
+ case QuarantineFileResult::SECURITY_CHECK_FAILED:
+ return DOWNLOAD_INTERRUPT_REASON_FILE_SECURITY_CHECK_FAILED;
+ case QuarantineFileResult::BLOCKED_BY_POLICY:
+ return DOWNLOAD_INTERRUPT_REASON_FILE_BLOCKED;
+ case QuarantineFileResult::ACCESS_DENIED:
+ return DOWNLOAD_INTERRUPT_REASON_FILE_ACCESS_DENIED;
+
+ case QuarantineFileResult::FILE_MISSING:
+ // Don't have a good interrupt reason here. This return code means that
+ // the file at |full_path_| went missing before QuarantineFile got to look
+ // at it. Not expected to happen, but we've seen instances where a file
+ // goes missing immediately after BaseFile closes the handle.
+ //
+ // Intentionally using a different error message than
+ // SECURITY_CHECK_FAILED in order to distinguish the two.
+ return DOWNLOAD_INTERRUPT_REASON_FILE_FAILED;
+
+ case QuarantineFileResult::ANNOTATION_FAILED:
+ // This means that the mark-of-the-web couldn't be applied. The file is
+ // already on the file system under its final target name.
+ //
+ // Causes of failed annotations typically aren't transient. E.g. the
+ // target file system may not support extended attributes or alternate
+ // streams. We are going to allow these downloads to progress on the
+ // assumption that failures to apply MOTW can't reliably be introduced
+ // remotely.
+ return DOWNLOAD_INTERRUPT_REASON_NONE;
+ }
+ return DOWNLOAD_INTERRUPT_REASON_FILE_FAILED;
+}
+#else // !OS_WIN && !OS_MACOSX && !OS_LINUX
+DownloadInterruptReason BaseFile::AnnotateWithSourceInformation(
+ const std::string& client_guid,
+ const GURL& source_url,
+ const GURL& referrer_url) {
+ return DOWNLOAD_INTERRUPT_REASON_NONE;
+}
+#endif
+
+} // namespace download