// Copyright 2017 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "components/services/unzip/unzipper_impl.h" #include #include #include "base/bind.h" #include "base/compiler_specific.h" #include "base/files/file.h" #include "base/logging.h" #include "base/memory/raw_ptr.h" #include "base/strings/utf_string_conversions.h" #include "build/build_config.h" #include "components/services/filesystem/public/mojom/directory.mojom.h" #include "third_party/ced/src/compact_enc_det/compact_enc_det.h" #include "third_party/zlib/google/redact.h" #include "third_party/zlib/google/zip.h" #include "third_party/zlib/google/zip_reader.h" namespace unzip { namespace { // Modifies output_dir to point to the final directory. bool CreateDirectory(filesystem::mojom::Directory* output_dir, const base::FilePath& path) { base::File::Error err = base::File::Error::FILE_OK; return output_dir->OpenDirectory(path.AsUTF8Unsafe(), mojo::NullReceiver(), filesystem::mojom::kFlagOpenAlways, &err) && err == base::File::Error::FILE_OK; } // A file writer that uses a mojom::Directory. class Writer : public zip::FileWriterDelegate { public: Writer(mojo::Remote output_dir, base::FilePath path) : FileWriterDelegate(base::File()), owned_output_dir_(std::move(output_dir)), output_dir_(owned_output_dir_.get()), path_(std::move(path)) { DCHECK(output_dir_); } Writer(filesystem::mojom::Directory* output_dir, base::FilePath path) : FileWriterDelegate(base::File()), output_dir_(output_dir), path_(std::move(path)) { DCHECK(output_dir_); } // Creates the output file. bool PrepareOutput() override { if (base::File::Error err; !output_dir_->OpenFileHandle( path_.AsUTF8Unsafe(), filesystem::mojom::kFlagCreate | filesystem::mojom::kFlagWrite | filesystem::mojom::kFlagWriteAttributes, &err, &owned_file_) || err != base::File::Error::FILE_OK) { LOG(ERROR) << "Cannot create extracted file " << zip::Redact(path_); return false; } return FileWriterDelegate::PrepareOutput(); } // Deletes the output file. void OnError() override { FileWriterDelegate::OnError(); owned_file_.Close(); if (base::File::Error err; !output_dir_->Delete(path_.AsUTF8Unsafe(), 0, &err) || err != base::File::Error::FILE_OK) { LOG(ERROR) << "Cannot delete extracted file " << zip::Redact(path_); } } private: const mojo::Remote owned_output_dir_; const raw_ptr output_dir_; const base::FilePath path_; }; std::unique_ptr MakeFileWriterDelegate( filesystem::mojom::Directory* output_dir, const base::FilePath& path) { if (path == path.BaseName()) return std::make_unique(output_dir, path); mojo::Remote parent; if (base::File::Error err; !output_dir->OpenDirectory(path.DirName().AsUTF8Unsafe(), parent.BindNewPipeAndPassReceiver(), filesystem::mojom::kFlagOpenAlways, &err) || err != base::File::Error::FILE_OK) { return nullptr; } return std::make_unique(std::move(parent), path.BaseName()); } bool Filter(const mojo::Remote& filter, const base::FilePath& path) { bool result = false; filter->ShouldUnzipFile(path, &result); return result; } // Reads the given ZIP archive, and returns all the filenames concatenated // together in one long string capped at ~100KB, without any separator, and in // the encoding used by the ZIP archive itself. Returns an empty string if the // ZIP cannot be read. std::string GetRawFileNamesFromZip(const base::File& zip_file) { std::string result; // Open ZIP archive for reading. zip::ZipReader reader; if (!reader.OpenFromPlatformFile(zip_file.GetPlatformFile())) { LOG(ERROR) << "Cannot decode ZIP archive from file handle " << zip_file.GetPlatformFile(); return result; } // Reserve a ~100KB buffer. result.reserve(100000); // Iterate over file entries of the ZIP archive. while (const zip::ZipReader::Entry* const entry = reader.Next()) { const std::string& path = entry->path_in_original_encoding; // Stop if we have enough data in |result|. if (path.size() > (result.capacity() - result.size())) break; // Accumulate data in |result|. result += path; } LOG_IF(ERROR, result.empty()) << "Cannot extract filenames from ZIP archive"; return result; } } // namespace UnzipperImpl::UnzipperImpl() = default; UnzipperImpl::UnzipperImpl(mojo::PendingReceiver receiver) : receiver_(this, std::move(receiver)) { receiver_.set_disconnect_handler(base::BindOnce( &UnzipperImpl::OnReceiverDisconnect, weak_ptr_factory_.GetWeakPtr())); } UnzipperImpl::~UnzipperImpl() = default; Encoding GetEncoding(const base::File& zip_file) { // Accumulate raw filenames. const std::string all_names = GetRawFileNamesFromZip(zip_file); if (all_names.empty()) { return UNKNOWN_ENCODING; } // Detect encoding. int consumed_bytes = 0; bool is_reliable = false; const Encoding encoding = CompactEncDet::DetectEncoding( all_names.data(), all_names.size(), nullptr, nullptr, nullptr, UNKNOWN_ENCODING, UNKNOWN_LANGUAGE, CompactEncDet::QUERY_CORPUS, // Plain text true, // Exclude 7-bit encodings &consumed_bytes, &is_reliable); VLOG(1) << "Detected encoding: " << MimeEncodingName(encoding) << " (" << encoding << "), reliable: " << is_reliable << ", consumed bytes: " << consumed_bytes; LOG_IF(ERROR, encoding == UNKNOWN_ENCODING) << "Cannot detect encoding of filenames in ZIP archive"; return encoding; } void UnzipperImpl::Listener(const mojo::Remote& listener, uint64_t bytes) { listener->OnProgress(bytes); } bool DoUnzip(base::File zip_file, mojo::Remote output_dir, std::string encoding_name, std::string password, zip::FilterCallback filter_cb, zip::UnzipProgressCallback progress_cb) { return zip::Unzip( zip_file.GetPlatformFile(), base::BindRepeating(&MakeFileWriterDelegate, output_dir.get()), base::BindRepeating(&CreateDirectory, output_dir.get()), {.encoding = std::move(encoding_name), .filter = std::move(filter_cb), .progress = std::move(progress_cb), .password = std::move(password)}); } bool RunUnzip( base::File zip_file, mojo::PendingRemote output_dir_remote, std::string encoding_name, std::string password, mojo::PendingRemote filter_remote, mojo::PendingRemote listener_remote) { mojo::Remote output_dir( std::move(output_dir_remote)); zip::FilterCallback filter_cb; if (filter_remote) { filter_cb = base::BindRepeating( &Filter, mojo::Remote(std::move(filter_remote))); } zip::UnzipProgressCallback progress_cb; if (listener_remote) { mojo::Remote listener(std::move(listener_remote)); progress_cb = base::BindRepeating(&UnzipperImpl::Listener, std::move(listener)); } return zip::Unzip( zip_file.GetPlatformFile(), base::BindRepeating(&MakeFileWriterDelegate, output_dir.get()), base::BindRepeating(&CreateDirectory, output_dir.get()), {.encoding = std::move(encoding_name), .filter = std::move(filter_cb), .progress = std::move(progress_cb), .password = std::move(password)}); } void UnzipperImpl::Unzip( base::File zip_file, mojo::PendingRemote output_dir_remote, mojom::UnzipOptionsPtr set_options, mojo::PendingRemote filter_remote, mojo::PendingRemote listener_remote, UnzipCallback callback) { DCHECK(zip_file.IsValid()); std::string encoding_name; if (set_options->encoding == "auto") { Encoding encoding = GetEncoding(zip_file); if (IsShiftJisOrVariant(encoding) || encoding == RUSSIAN_CP866) { encoding_name = MimeEncodingName(encoding); } } else { encoding_name = set_options->encoding; } runner_->PostTaskAndReplyWithResult( FROM_HERE, base::BindOnce(&RunUnzip, std::move(zip_file), std::move(output_dir_remote), std::move(encoding_name), std::move(set_options->password), std::move(filter_remote), std::move(listener_remote)), base::BindOnce(std::move(callback))); } void UnzipperImpl::DetectEncoding(base::File zip_file, DetectEncodingCallback callback) { DCHECK(zip_file.IsValid()); const Encoding encoding = GetEncoding(zip_file); std::move(callback).Run(encoding); } void UnzipperImpl::GetExtractedInfo(base::File zip_file, GetExtractedInfoCallback callback) { DCHECK(zip_file.IsValid()); // Open ZIP archive for reading. zip::ZipReader reader; if (!reader.OpenFromPlatformFile(zip_file.GetPlatformFile())) { LOG(ERROR) << "Cannot decode ZIP archive from file handle " << zip_file.GetPlatformFile(); unzip::mojom::InfoPtr info = unzip::mojom::Info::New(false, 0, false, false); std::move(callback).Run(std::move(info)); return; } int64_t size = 0; bool valid = true; bool has_encrypted_content = false; bool uses_aes_encryption = false; // Iterate over file entries of the ZIP archive. while (const zip::ZipReader::Entry* const entry = reader.Next()) { // Check for (invalid) size stored. if (entry->original_size < 0 || entry->original_size > std::numeric_limits::max() - size) { LOG(ERROR) << "ZIP bad size info from file handle " << zip_file.GetPlatformFile(); valid = false; break; } // Accumulate size (since original_size is signed, ignore invalid sizes). if (entry->original_size > 0) { size += entry->original_size; } if (entry->is_encrypted) { has_encrypted_content = true; if (entry->uses_aes_encryption) { uses_aes_encryption = true; } } } unzip::mojom::InfoPtr info = unzip::mojom::Info::New( valid, size, has_encrypted_content, uses_aes_encryption); std::move(callback).Run(std::move(info)); } void UnzipperImpl::OnReceiverDisconnect() { DCHECK(receiver_.is_bound()); receiver_.reset(); } } // namespace unzip