diff options
Diffstat (limited to 'chromium/components/download/internal/common/base_file.cc')
-rw-r--r-- | chromium/components/download/internal/common/base_file.cc | 528 |
1 files changed, 528 insertions, 0 deletions
diff --git a/chromium/components/download/internal/common/base_file.cc b/chromium/components/download/internal/common/base_file.cc new file mode 100644 index 00000000000..fce4f343809 --- /dev/null +++ b/chromium/components/download/internal/common/base_file.cc @@ -0,0 +1,528 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/download/public/common/base_file.h" + +#include <memory> +#include <utility> + +#include "base/bind.h" +#include "base/files/file.h" +#include "base/files/file_util.h" +#include "base/format_macros.h" +#include "base/logging.h" +#include "base/macros.h" +#include "base/pickle.h" +#include "base/strings/stringprintf.h" +#include "base/threading/thread_restrictions.h" +#include "base/trace_event/trace_event.h" +#include "build/build_config.h" +#include "components/download/public/common/download_interrupt_reasons_utils.h" +#include "components/download/public/common/download_item.h" +#include "components/download/public/common/download_stats.h" +#include "components/download/quarantine/quarantine.h" +#include "crypto/secure_hash.h" + +#define CONDITIONAL_TRACE(trace) \ + do { \ + if (download_id_ != DownloadItem::kInvalidId) \ + TRACE_EVENT_##trace; \ + } while (0) + +namespace download { + +namespace { +class FileErrorData : public base::trace_event::ConvertableToTraceFormat { + public: + FileErrorData(const char* operation, + int os_error, + DownloadInterruptReason interrupt_reason) + : operation_(operation), + os_error_(os_error), + interrupt_reason_(interrupt_reason) {} + + ~FileErrorData() override = default; + + void AppendAsTraceFormat(std::string* out) const override { + out->append("{"); + out->append( + base::StringPrintf("\"operation\":\"%s\",", operation_.c_str())); + out->append(base::StringPrintf("\"os_error\":\"%d\",", os_error_)); + out->append(base::StringPrintf( + "\"interrupt_reason\":\"%s\",", + DownloadInterruptReasonToString(interrupt_reason_).c_str())); + out->append("}"); + } + + private: + std::string operation_; + int os_error_; + DownloadInterruptReason interrupt_reason_; + DISALLOW_COPY_AND_ASSIGN(FileErrorData); +}; +} // namespace + +BaseFile::BaseFile(uint32_t download_id) : download_id_(download_id) { + DETACH_FROM_SEQUENCE(sequence_checker_); +} + +BaseFile::~BaseFile() { + DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); + if (detached_) + Close(); + else + Cancel(); // Will delete the file. +} + +DownloadInterruptReason BaseFile::Initialize( + const base::FilePath& full_path, + const base::FilePath& default_directory, + base::File file, + int64_t bytes_so_far, + const std::string& hash_so_far, + std::unique_ptr<crypto::SecureHash> hash_state, + bool is_sparse_file, + int64_t* const bytes_wasted) { + DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); + DCHECK(!detached_); + + if (full_path.empty()) { + base::FilePath temp_file; + if ((default_directory.empty() || + !base::CreateTemporaryFileInDir(default_directory, &temp_file)) && + !base::CreateTemporaryFile(&temp_file)) { + return LogInterruptReason("Unable to create", 0, + DOWNLOAD_INTERRUPT_REASON_FILE_FAILED); + } + full_path_ = temp_file; + } else { + full_path_ = full_path; + } + + bytes_so_far_ = bytes_so_far; + secure_hash_ = std::move(hash_state); + is_sparse_file_ = is_sparse_file; + // Sparse file doesn't validate hash. + if (is_sparse_file_) + secure_hash_.reset(); + file_ = std::move(file); + + return Open(hash_so_far, bytes_wasted); +} + +DownloadInterruptReason BaseFile::AppendDataToFile(const char* data, + size_t data_len) { + DCHECK(!is_sparse_file_); + return WriteDataToFile(bytes_so_far_, data, data_len); +} + +DownloadInterruptReason BaseFile::WriteDataToFile(int64_t offset, + const char* data, + size_t data_len) { + // NOTE(benwells): The above DCHECK won't be present in release builds, + // so we log any occurences to see how common this error is in the wild. + if (detached_) + RecordDownloadCount(APPEND_TO_DETACHED_FILE_COUNT); + + if (!file_.IsValid()) + return LogInterruptReason("No file stream on append", 0, + DOWNLOAD_INTERRUPT_REASON_FILE_FAILED); + + // TODO(phajdan.jr): get rid of this check. + if (data_len == 0) + return DOWNLOAD_INTERRUPT_REASON_NONE; + + // Use nestable async event instead of sync event so that all the writes + // belong to the same download will be grouped together. + CONDITIONAL_TRACE( + NESTABLE_ASYNC_BEGIN0("download", "DownloadFileWrite", download_id_)); + int write_result = file_.Write(offset, data, data_len); + DCHECK_NE(0, write_result); + + // Report errors on file writes. + if (write_result < 0) + return LogSystemError("Write", logging::GetLastSystemErrorCode()); + + DCHECK_EQ(static_cast<size_t>(write_result), data_len); + + if (bytes_so_far_ != offset) { + // A hole is created in the file. + is_sparse_file_ = true; + secure_hash_.reset(); + } + + bytes_so_far_ += data_len; + CONDITIONAL_TRACE(NESTABLE_ASYNC_END1("download", "DownloadFileWrite", + download_id_, "bytes", data_len)); + + if (secure_hash_) + secure_hash_->Update(data, data_len); + + return DOWNLOAD_INTERRUPT_REASON_NONE; +} + +DownloadInterruptReason BaseFile::Rename(const base::FilePath& new_path) { + DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); + DownloadInterruptReason rename_result = DOWNLOAD_INTERRUPT_REASON_NONE; + + // If the new path is same as the old one, there is no need to perform the + // following renaming logic. + if (new_path == full_path_) + return DOWNLOAD_INTERRUPT_REASON_NONE; + + // Save the information whether the download is in progress because + // it will be overwritten by closing the file. + bool was_in_progress = in_progress(); + + Close(); + + CONDITIONAL_TRACE(BEGIN2("download", "DownloadFileRename", "old_filename", + full_path_.AsUTF8Unsafe(), "new_filename", + new_path.AsUTF8Unsafe())); + + base::CreateDirectory(new_path.DirName()); + + // A simple rename wouldn't work here since we want the file to have + // permissions / security descriptors that makes sense in the new directory. + rename_result = MoveFileAndAdjustPermissions(new_path); + + CONDITIONAL_TRACE(END0("download", "DownloadFileRename")); + + if (rename_result == DOWNLOAD_INTERRUPT_REASON_NONE) + full_path_ = new_path; + + // Re-open the file if we were still using it regardless of the interrupt + // reason. + DownloadInterruptReason open_result = DOWNLOAD_INTERRUPT_REASON_NONE; + if (was_in_progress) { + int64_t bytes_wasted; // Do not need to use bytes_wasted. + open_result = Open(std::string(), &bytes_wasted); + } + + return rename_result == DOWNLOAD_INTERRUPT_REASON_NONE ? open_result + : rename_result; +} + +void BaseFile::Detach() { + detached_ = true; + CONDITIONAL_TRACE( + INSTANT0("download", "DownloadFileDetached", TRACE_EVENT_SCOPE_THREAD)); +} + +void BaseFile::Cancel() { + DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); + DCHECK(!detached_); + + CONDITIONAL_TRACE( + INSTANT0("download", "DownloadCancelled", TRACE_EVENT_SCOPE_THREAD)); + + Close(); + + if (!full_path_.empty()) { + CONDITIONAL_TRACE( + INSTANT0("download", "DownloadFileDeleted", TRACE_EVENT_SCOPE_THREAD)); + base::DeleteFile(full_path_, false); + } + + Detach(); +} + +std::unique_ptr<crypto::SecureHash> BaseFile::Finish() { + DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); + + // TODO(qinmin): verify that all the holes have been filled. + if (is_sparse_file_) + CalculatePartialHash(std::string()); + Close(); + return std::move(secure_hash_); +} + +std::string BaseFile::DebugString() const { + return base::StringPrintf( + "{ " + " full_path_ = \"%" PRFilePath + "\"" + " bytes_so_far_ = %" PRId64 " detached_ = %c }", + full_path_.value().c_str(), bytes_so_far_, detached_ ? 'T' : 'F'); +} + +DownloadInterruptReason BaseFile::CalculatePartialHash( + const std::string& hash_to_expect) { + secure_hash_ = crypto::SecureHash::Create(crypto::SecureHash::SHA256); + + if (bytes_so_far_ == 0) + return DOWNLOAD_INTERRUPT_REASON_NONE; + + if (file_.Seek(base::File::FROM_BEGIN, 0) != 0) + return LogSystemError("Seek partial file", + logging::GetLastSystemErrorCode()); + + const size_t kMinBufferSize = secure_hash_->GetHashLength(); + const size_t kMaxBufferSize = 1024 * 512; + static_assert(kMaxBufferSize <= std::numeric_limits<int>::max(), + "kMaxBufferSize must fit on an int"); + + // The size of the buffer is: + // - at least kMinBufferSize so that we can use it to hold the hash as well. + // - at most kMaxBufferSize so that there's a reasonable bound. + // - not larger than |bytes_so_far_| unless bytes_so_far_ is less than the + // hash size. + std::vector<char> buffer(std::max<int64_t>( + kMinBufferSize, std::min<int64_t>(kMaxBufferSize, bytes_so_far_))); + + int64_t current_position = 0; + while (current_position < bytes_so_far_) { + // While std::min needs to work with int64_t, the result is always at most + // kMaxBufferSize, which fits on an int. + int bytes_to_read = + std::min<int64_t>(buffer.size(), bytes_so_far_ - current_position); + int length = file_.ReadAtCurrentPos(&buffer.front(), bytes_to_read); + if (length == -1) { + return LogInterruptReason("Reading partial file", + logging::GetLastSystemErrorCode(), + DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT); + } + + if (length == 0) + break; + + secure_hash_->Update(&buffer.front(), length); + current_position += length; + } + + if (current_position != bytes_so_far_) { + return LogInterruptReason("Verifying prefix hash", 0, + DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT); + } + + if (!hash_to_expect.empty()) { + DCHECK_EQ(secure_hash_->GetHashLength(), hash_to_expect.size()); + DCHECK(buffer.size() >= secure_hash_->GetHashLength()); + std::unique_ptr<crypto::SecureHash> partial_hash(secure_hash_->Clone()); + partial_hash->Finish(&buffer.front(), buffer.size()); + + if (memcmp(&buffer.front(), hash_to_expect.c_str(), + partial_hash->GetHashLength())) { + return LogInterruptReason("Verifying prefix hash", 0, + DOWNLOAD_INTERRUPT_REASON_FILE_HASH_MISMATCH); + } + } + + return DOWNLOAD_INTERRUPT_REASON_NONE; +} + +DownloadInterruptReason BaseFile::Open(const std::string& hash_so_far, + int64_t* const bytes_wasted) { + DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); + DCHECK(!detached_); + DCHECK(!full_path_.empty()); + + // Create a new file if it is not provided. + if (!file_.IsValid()) { + file_.Initialize(full_path_, base::File::FLAG_OPEN_ALWAYS | + base::File::FLAG_WRITE | + base::File::FLAG_READ); + if (!file_.IsValid()) { + return LogNetError("Open/Initialize File", + net::FileErrorToNetError(file_.error_details())); + } + } + + CONDITIONAL_TRACE(NESTABLE_ASYNC_BEGIN2( + "download", "DownloadFileOpen", download_id_, "file_name", + full_path_.AsUTF8Unsafe(), "bytes_so_far", bytes_so_far_)); + + // For sparse file, skip hash validation. + if (is_sparse_file_) { + if (file_.GetLength() < bytes_so_far_) { + *bytes_wasted = bytes_so_far_; + ClearFile(); + return LogInterruptReason("File has fewer written bytes than expected", 0, + DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT); + } + return DOWNLOAD_INTERRUPT_REASON_NONE; + } + + if (!secure_hash_) { + DownloadInterruptReason reason = CalculatePartialHash(hash_so_far); + if (reason != DOWNLOAD_INTERRUPT_REASON_NONE) { + *bytes_wasted = file_.GetLength(); + ClearFile(); + return reason; + } + } + + int64_t file_size = file_.Seek(base::File::FROM_END, 0); + if (file_size < 0) { + logging::SystemErrorCode error = logging::GetLastSystemErrorCode(); + ClearFile(); + return LogSystemError("Seeking to end", error); + } else if (file_size > bytes_so_far_) { + // The file is larger than we expected. + // This is OK, as long as we don't use the extra. + // Truncate the file. + *bytes_wasted = file_size - bytes_so_far_; + if (!file_.SetLength(bytes_so_far_) || + file_.Seek(base::File::FROM_BEGIN, bytes_so_far_) != bytes_so_far_) { + logging::SystemErrorCode error = logging::GetLastSystemErrorCode(); + *bytes_wasted = file_size; + ClearFile(); + return LogSystemError("Truncating to last known offset", error); + } + } else if (file_size < bytes_so_far_) { + // The file is shorter than we expected. Our hashes won't be valid. + *bytes_wasted = bytes_so_far_; + ClearFile(); + return LogInterruptReason("Unable to seek to last written point", 0, + DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT); + } + + return DOWNLOAD_INTERRUPT_REASON_NONE; +} + +void BaseFile::Close() { + DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); + + if (file_.IsValid()) { + // Currently we don't really care about the return value, since if it fails + // theres not much we can do. But we might in the future. + file_.Flush(); + ClearFile(); + } +} + +void BaseFile::ClearFile() { + // This should only be called when we have a stream. + DCHECK(file_.IsValid()); + file_.Close(); + CONDITIONAL_TRACE( + NESTABLE_ASYNC_END0("download", "DownloadFileOpen", download_id_)); +} + +DownloadInterruptReason BaseFile::LogNetError(const char* operation, + net::Error error) { + CONDITIONAL_TRACE(INSTANT2("download", "DownloadFileError", + TRACE_EVENT_SCOPE_THREAD, "operation", operation, + "net_error", error)); + return ConvertNetErrorToInterruptReason(error, DOWNLOAD_INTERRUPT_FROM_DISK); +} + +DownloadInterruptReason BaseFile::LogSystemError( + const char* operation, + logging::SystemErrorCode os_error) { + // There's no direct conversion from a system error to an interrupt reason. + base::File::Error file_error = base::File::OSErrorToFileError(os_error); + return LogInterruptReason(operation, os_error, + ConvertFileErrorToInterruptReason(file_error)); +} + +DownloadInterruptReason BaseFile::LogInterruptReason( + const char* operation, + int os_error, + DownloadInterruptReason reason) { + DVLOG(1) << __func__ << "() operation:" << operation + << " os_error:" << os_error + << " reason:" << DownloadInterruptReasonToString(reason); + auto error_data = + std::make_unique<FileErrorData>(operation, os_error, reason); + CONDITIONAL_TRACE(INSTANT1("download", "DownloadFileError", + TRACE_EVENT_SCOPE_THREAD, "file_error", + std::move(error_data))); + return reason; +} + +#if defined(OS_WIN) || defined(OS_MACOSX) || defined(OS_LINUX) + +namespace { + +// Given a source and a referrer, determines the "safest" URL that can be used +// to determine the authority of the download source. Returns an empty URL if no +// HTTP/S URL can be determined for the <|source_url|, |referrer_url|> pair. +GURL GetEffectiveAuthorityURL(const GURL& source_url, + const GURL& referrer_url) { + if (source_url.is_valid()) { + // http{,s} has an authority and are supported. + if (source_url.SchemeIsHTTPOrHTTPS()) + return source_url; + + // If the download source is file:// ideally we should copy the MOTW from + // the original file, but given that Chrome/Chromium places strict + // restrictions on which schemes can reference file:// URLs, this code is + // going to assume that at this point it's okay to treat this download as + // being from the local system. + if (source_url.SchemeIsFile()) + return source_url; + + // ftp:// has an authority. + if (source_url.SchemeIs(url::kFtpScheme)) + return source_url; + } + + if (referrer_url.is_valid() && referrer_url.SchemeIsHTTPOrHTTPS()) + return referrer_url; + + return GURL(); +} + +} // namespace + +DownloadInterruptReason BaseFile::AnnotateWithSourceInformation( + const std::string& client_guid, + const GURL& source_url, + const GURL& referrer_url) { + DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); + DCHECK(!detached_); + DCHECK(!full_path_.empty()); + + CONDITIONAL_TRACE(BEGIN0("download", "DownloadFileAnnotate")); + QuarantineFileResult result = QuarantineFile( + full_path_, GetEffectiveAuthorityURL(source_url, referrer_url), + referrer_url, client_guid); + CONDITIONAL_TRACE(END0("download", "DownloadFileAnnotate")); + + switch (result) { + case QuarantineFileResult::OK: + return DOWNLOAD_INTERRUPT_REASON_NONE; + case QuarantineFileResult::VIRUS_INFECTED: + return DOWNLOAD_INTERRUPT_REASON_FILE_VIRUS_INFECTED; + case QuarantineFileResult::SECURITY_CHECK_FAILED: + return DOWNLOAD_INTERRUPT_REASON_FILE_SECURITY_CHECK_FAILED; + case QuarantineFileResult::BLOCKED_BY_POLICY: + return DOWNLOAD_INTERRUPT_REASON_FILE_BLOCKED; + case QuarantineFileResult::ACCESS_DENIED: + return DOWNLOAD_INTERRUPT_REASON_FILE_ACCESS_DENIED; + + case QuarantineFileResult::FILE_MISSING: + // Don't have a good interrupt reason here. This return code means that + // the file at |full_path_| went missing before QuarantineFile got to look + // at it. Not expected to happen, but we've seen instances where a file + // goes missing immediately after BaseFile closes the handle. + // + // Intentionally using a different error message than + // SECURITY_CHECK_FAILED in order to distinguish the two. + return DOWNLOAD_INTERRUPT_REASON_FILE_FAILED; + + case QuarantineFileResult::ANNOTATION_FAILED: + // This means that the mark-of-the-web couldn't be applied. The file is + // already on the file system under its final target name. + // + // Causes of failed annotations typically aren't transient. E.g. the + // target file system may not support extended attributes or alternate + // streams. We are going to allow these downloads to progress on the + // assumption that failures to apply MOTW can't reliably be introduced + // remotely. + return DOWNLOAD_INTERRUPT_REASON_NONE; + } + return DOWNLOAD_INTERRUPT_REASON_FILE_FAILED; +} +#else // !OS_WIN && !OS_MACOSX && !OS_LINUX +DownloadInterruptReason BaseFile::AnnotateWithSourceInformation( + const std::string& client_guid, + const GURL& source_url, + const GURL& referrer_url) { + return DOWNLOAD_INTERRUPT_REASON_NONE; +} +#endif + +} // namespace download |