diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2018-08-24 12:15:48 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2018-08-28 13:30:04 +0000 |
commit | b014812705fc80bff0a5c120dfcef88f349816dc (patch) | |
tree | 25a2e2d9fa285f1add86aa333389a839f81a39ae /chromium/components/zucchini | |
parent | 9f4560b1027ae06fdb497023cdcaf91b8511fa74 (diff) | |
download | qtwebengine-chromium-b014812705fc80bff0a5c120dfcef88f349816dc.tar.gz |
BASELINE: Update Chromium to 68.0.3440.125
Change-Id: I23f19369e01f688e496f5bf179abb521ad73874f
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/components/zucchini')
40 files changed, 2978 insertions, 251 deletions
diff --git a/chromium/components/zucchini/BUILD.gn b/chromium/components/zucchini/BUILD.gn index 6adbc9e7b47..70831275505 100644 --- a/chromium/components/zucchini/BUILD.gn +++ b/chromium/components/zucchini/BUILD.gn @@ -2,10 +2,26 @@ # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. +import("//build/buildflag_header.gni") import("//chrome/process_version_rc_template.gni") -import("//testing/libfuzzer/fuzzer_test.gni") import("//testing/test.gni") +buildflag_header("buildflags") { + header = "buildflags.h" + + # Disable DEX on Windows Official Builds. + _enable_dex = !(is_win && is_official_build) + _enable_win = true + + # Disable ZTF (Zucchini Text Format) on official builds it is for testing only. + _enable_ztf = !is_official_build + flags = [ + "ENABLE_DEX=$_enable_dex", + "ENABLE_WIN=$_enable_win", + "ENABLE_ZTF=$_enable_ztf", + ] +} + static_library("zucchini_lib") { sources = [ "abs32_utils.cc", @@ -30,6 +46,8 @@ static_library("zucchini_lib") { "disassembler_no_op.h", "disassembler_win32.cc", "disassembler_win32.h", + "disassembler_ztf.cc", + "disassembler_ztf.h", "element_detection.cc", "element_detection.h", "encoded_view.cc", @@ -43,6 +61,8 @@ static_library("zucchini_lib") { "image_index.cc", "image_index.h", "image_utils.h", + "imposed_ensemble_matcher.cc", + "imposed_ensemble_matcher.h", "io_utils.cc", "io_utils.h", "patch_reader.cc", @@ -50,6 +70,8 @@ static_library("zucchini_lib") { "patch_utils.h", "patch_writer.cc", "patch_writer.h", + "reference_bytes_mixer.cc", + "reference_bytes_mixer.h", "reference_set.cc", "reference_set.h", "rel32_finder.cc", @@ -76,6 +98,7 @@ static_library("zucchini_lib") { ] deps = [ + ":buildflags", "//base", ] } @@ -122,31 +145,6 @@ if (is_win) { } } -# To download the corpus for local fuzzing use: -# gsutil -m rsync \ -# gs://clusterfuzz-corpus/libfuzzer/zucchini_disassembler_win32_fuzzer \ -# components/zucchini/testdata/disassembler_win32_fuzzer -fuzzer_test("zucchini_disassembler_win32_fuzzer") { - sources = [ - "disassembler_win32_fuzzer.cc", - ] - deps = [ - ":zucchini_lib", - "//base", - ] -} - -fuzzer_test("zucchini_patch_fuzzer") { - sources = [ - "patch_fuzzer.cc", - ] - deps = [ - ":zucchini_lib", - "//base", - ] - seed_corpus = "testdata/patch_fuzzer" -} - test("zucchini_unittests") { sources = [ "abs32_utils_unittest.cc", @@ -158,11 +156,13 @@ test("zucchini_unittests") { "buffer_view_unittest.cc", "crc32_unittest.cc", "disassembler_dex_unittest.cc", + "disassembler_ztf_unittest.cc", "element_detection_unittest.cc", "encoded_view_unittest.cc", "equivalence_map_unittest.cc", "image_index_unittest.cc", "image_utils_unittest.cc", + "imposed_ensemble_matcher_unittest.cc", "io_utils_unittest.cc", "mapped_file_unittest.cc", "patch_read_write_unittest.cc", @@ -208,3 +208,21 @@ test("zucchini_integration_test") { "//testing/gtest", ] } + +# Group to build and depend on all the Zucchini related fuzzers. +group("zucchini_fuzzers") { + testonly = true + deps = [ + "//components/zucchini/fuzzers:zucchini_disassembler_win32_fuzzer", + "//components/zucchini/fuzzers:zucchini_patch_fuzzer", + ] + + # Ensure protoc is available. + # Disabled on Windows due to crbug/844826. + if (current_toolchain == host_toolchain && !is_win) { + deps += [ + "//components/zucchini/fuzzers:zucchini_raw_apply_fuzzer", + "//components/zucchini/fuzzers:zucchini_raw_gen_fuzzer", + ] + } +} diff --git a/chromium/components/zucchini/README.md b/chromium/components/zucchini/README.md index 19f8cd69861..42f3b3e9a52 100644 --- a/chromium/components/zucchini/README.md +++ b/chromium/components/zucchini/README.md @@ -209,8 +209,8 @@ PatchElement self contained. Name | Format | Description --- | --- | --- old_offset | uint32 | Starting offset of the element in old file. -new_offset | uint32 | Starting offset of the element in new file. old_length | uint32 | Length of the element in old file. +new_offset | uint32 | Starting offset of the element in new file. new_length | uint32 | Length of the element in new file. exe_type | uint32 | Executable type for this unit, see `enum ExecutableType`. diff --git a/chromium/components/zucchini/disassembler_ztf.cc b/chromium/components/zucchini/disassembler_ztf.cc new file mode 100644 index 00000000000..9f3c318fa8b --- /dev/null +++ b/chromium/components/zucchini/disassembler_ztf.cc @@ -0,0 +1,647 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_ztf.h" + +#include <algorithm> +#include <cmath> +#include <iterator> +#include <numeric> + +#include "base/logging.h" +#include "base/macros.h" +#include "base/numerics/checked_math.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/buffer_source.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/io_utils.h" + +namespace zucchini { + +namespace { + +constexpr uint8_t kDelimiter = ','; + +constexpr int kHeaderMagicSize = 4; +constexpr int kFooterMagicSize = 5; +constexpr int kTotalMagicSize = kHeaderMagicSize + kFooterMagicSize; + +// Number of characters that aren't digits in each type of reference. +constexpr int kNumConstCharInAbs = 3; +constexpr int kNumConstCharInRel = 5; + +/******** ZtfConfig ********/ + +// For passing around metadata about the type of reference to match. +// - |digits_per_dim| is the length of the offset in lines/cols of a +// reference. +// - |open_char| is an ASCII character representing the opening char. +// - |close_char| is an ASCII character representing the closing char. +struct ZtfConfig { + uint8_t digits_per_dim; + uint8_t open_char; + uint8_t close_char; + + constexpr uint8_t abs_width() const { + return digits_per_dim * 2 + kNumConstCharInAbs; + } + + constexpr uint8_t rel_width() const { + return digits_per_dim * 2 + kNumConstCharInRel; + } + + uint8_t Width(ztf::LineCol /* lc */) const { return abs_width(); } + + uint8_t Width(ztf::DeltaLineCol /* dlc */) const { return rel_width(); } +}; + +// Creates a ZtfConfig for parsing or writing based on the desired |digits| and +// |pool|. +template <DisassemblerZtf::ReferencePool pool> +constexpr ZtfConfig MakeZtfConfig(uint8_t digits) { + switch (pool) { + case DisassemblerZtf::kAngles: + return ZtfConfig{digits, '<', '>'}; + case DisassemblerZtf::kBraces: + return ZtfConfig{digits, '{', '}'}; + case DisassemblerZtf::kBrackets: + return ZtfConfig{digits, '[', ']'}; + case DisassemblerZtf::kParentheses: + break; // Handled below. + } + return ZtfConfig{digits, '(', ')'}; +} + +/******** ZtfParser ********/ + +// ZtfParser is used to extract (absolute) LineCol and (relative) DeltaLineCol +// from a ZTF file, and contains various helpers for character, digits, and sign +// matching. +class ZtfParser { + public: + ZtfParser(offset_t hi, ConstBufferView image, ZtfConfig config) + : image_(image), hi_(hi), config_(config) { + DCHECK_LE(static_cast<size_t>(std::pow(10U, config_.digits_per_dim)), + ztf::kMaxDimValue); + } + + // Attempts to match an absolute reference at |offset|. If successful then + // assigns the result to |abs_lc| and returns true. Otherwise returns false. + // An absolute reference takes the form: + // <open><digits><delimiter><digits><close> + bool MatchAtOffset(offset_t offset, ztf::LineCol* abs_lc) { + if (hi_ < config_.abs_width() || offset > hi_ - config_.abs_width()) + return false; + offset_ = offset; + return MatchChar(config_.open_char) && MatchDigits(+1, &abs_lc->line) && + MatchChar(kDelimiter) && MatchDigits(+1, &abs_lc->col) && + MatchChar(config_.close_char); + } + + // Attempts to match an absolute reference at |offset|. If successful then + // assigns the result to |rel_lc| and returns true. Otherwise returns false. A + // relative reference takes the form: + // <open><sign><digits><delimiter><sign><digits><close> + bool MatchAtOffset(offset_t offset, ztf::DeltaLineCol* rel_dlc) { + if (hi_ < config_.rel_width() || offset > hi_ - config_.rel_width()) + return false; + offset_ = offset; + ztf::dim_t line_sign; + ztf::dim_t col_sign; + return MatchChar(config_.open_char) && MatchSign(&line_sign) && + MatchDigits(line_sign, &rel_dlc->line) && MatchChar(kDelimiter) && + MatchSign(&col_sign) && MatchDigits(col_sign, &rel_dlc->col) && + MatchChar(config_.close_char); + } + + private: + // The Match*() functions below can advance |offset_|, and return a bool to + // indicate success to allow chaining using &&. + + // Returns true if |character| is at location |offset_| in |image_| and + // increments |offset_|. + bool MatchChar(uint8_t character) { + return character == image_.read<uint8_t>(offset_++); + } + + // Looks for '+' or '-' at |offset_|. If found, stores +1 or -1 in |sign| and + // returns true. Otherwise returns false. + bool MatchSign(ztf::dim_t* sign) { + uint8_t val = image_.read<uint8_t>(offset_++); + if (val == static_cast<uint8_t>(ztf::SignChar::kMinus)) { + *sign = -1; + return true; + } + if (val == static_cast<uint8_t>(ztf::SignChar::kPlus)) { + *sign = 1; + return true; + } + return false; + } + + // Attempts to extract a number with the number of base 10 digits equal to + // |config_.digits_per_dim| from |image_| starting from |offset_|. Returns + // true and assigns the integer value to |value| if successful. + bool MatchDigits(ztf::dim_t sign, ztf::dim_t* value) { + ztf::dim_t output = 0; + for (int i = 0; i < config_.digits_per_dim; ++i) { + auto digit = image_.read<uint8_t>(offset_++); + if (digit >= '0' && digit < '0' + 10) + output = output * 10 + digit - '0'; + else + return false; + } + if (!output && sign < 0) // Disallow "-0", "-00", etc. + return false; + *value = sign * output; + return true; + } + + ConstBufferView image_; + const offset_t hi_; + const ZtfConfig config_; + offset_t offset_ = 0; + + DISALLOW_COPY_AND_ASSIGN(ZtfParser); +}; + +/******** ZtfWriter ********/ + +// ZtfWriter is used to write references to an image. This includes writing +// the enclosing characters around the reference. +class ZtfWriter { + public: + ZtfWriter(MutableBufferView image, ZtfConfig config) + : image_(image), + config_(config), + val_bound_( + static_cast<ztf::dim_t>(std::pow(10, config_.digits_per_dim))) {} + + // Write an absolute reference |abs_ref| at |offset|. Note that references + // that would overwrite a newline are skipped as this would invalidate all + // the other reference line numbers. + void Write(offset_t offset, ztf::LineCol abs_ref) { + offset_ = offset; + if (!SafeToWriteNumber(abs_ref.line) || !SafeToWriteNumber(abs_ref.col) || + !SafeToWriteData(offset_, offset_ + config_.abs_width())) { + return; + } + WriteChar(config_.open_char); + WriteNumber(abs_ref.line); + WriteChar(kDelimiter); + WriteNumber(abs_ref.col); + WriteChar(config_.close_char); + } + + // Write a relative reference |rel_ref| at |offset|. Note that references + // that would overwrite a newline are skipped as this would invalidate all + // the other reference line numbers. + void Write(offset_t offset, ztf::DeltaLineCol rel_ref) { + offset_ = offset; + if (!SafeToWriteNumber(rel_ref.line) || !SafeToWriteNumber(rel_ref.col) || + !SafeToWriteData(offset_, offset_ + config_.rel_width())) { + return; + } + WriteChar(config_.open_char); + WriteSign(rel_ref.line); + WriteNumber(rel_ref.line); + WriteChar(kDelimiter); + WriteSign(rel_ref.col); + WriteNumber(rel_ref.col); + WriteChar(config_.close_char); + } + + private: + // Returns whether it is safe to modify bytes in |[lo, hi)| in |image_| for + // Reference correction. Failure cases are: + // - Out-of-bound writes. + // - Overwriting '\n'. This is a ZTF special case since '\n' dictates file + // structure, and Reference correction should never mess with this. + bool SafeToWriteData(offset_t lo, offset_t hi) const { + DCHECK_LE(lo, hi); + // Out of bounds. + if (hi > image_.size()) + return false; + for (offset_t i = lo; i < hi; ++i) { + if (image_.read<uint8_t>(i) == '\n') + return false; + } + return true; + } + + // Checks whether it is safe to write a |val| based on + // |config_.digits_per_dim|. + bool SafeToWriteNumber(ztf::dim_t val) const { + return std::abs(val) < val_bound_; + } + + // The Write*() functions each advance |offset_| by a fixed distance. The + // caller should ensure there's enough space to write data. + + // Write |character| at |offset_| and increment |offset_|. + void WriteChar(uint8_t character) { image_.write(offset_++, character); } + + // Write the sign of |value| at |offset_| and increment |offset_|. + void WriteSign(ztf::dim_t value) { + image_.write(offset_++, + value >= 0 ? ztf::SignChar::kPlus : ztf::SignChar::kMinus); + } + + // Writes the absolute value of the number represented by |value| at |offset_| + // using zero padding to fill |config_.digits_per_dim|. + void WriteNumber(ztf::dim_t value) { + size_t size = config_.digits_per_dim + 1; + DCHECK_LE(size, kMaxDigitCount + 1); + char digits[kMaxDigitCount + 1]; // + 1 for terminator. + int len = + snprintf(digits, size, "%0*u", config_.digits_per_dim, std::abs(value)); + DCHECK_EQ(len, config_.digits_per_dim); + for (int i = 0; i < len; ++i) + image_.write(offset_++, digits[i]); + } + + MutableBufferView image_; + const ZtfConfig config_; + // Bound on numeric values, as limited by |config_.digits_per_dim|. + const ztf::dim_t val_bound_; + offset_t offset_ = 0; + DISALLOW_COPY_AND_ASSIGN(ZtfWriter); +}; + +// Specialization of ReferenceReader for reading text references. +template <typename T> +class ZtfReferenceReader : public ReferenceReader { + public: + ZtfReferenceReader(offset_t lo, + offset_t hi, + ConstBufferView image, + const ZtfTranslator& translator, + ZtfConfig config) + : offset_(lo), + hi_(hi), + translator_(translator), + config_(config), + parser_(hi_, image, config_) { + DCHECK_LE(hi_, image.size()); + } + + // Walks |offset_| from |lo| to |hi_| running |parser_|. If any matches are + // found they are returned. + base::Optional<Reference> GetNext() override { + T line_col; + for (; offset_ < hi_; ++offset_) { + if (!parser_.MatchAtOffset(offset_, &line_col)) + continue; + + auto target = ConvertToTargetOffset(offset_, line_col); + // Ignore targets that point outside the file. + if (target == kInvalidOffset) + continue; + offset_t location = offset_; + offset_ += config_.Width(line_col); + return Reference{location, target}; + } + return base::nullopt; + } + + private: + // Converts |lc| (an absolute reference) to an offset using |translator_|. + offset_t ConvertToTargetOffset(offset_t /* location */, + ztf::LineCol lc) const { + return translator_.LineColToOffset(lc); + } + + // Converts |dlc| (a relative reference) to an offset using |translator_|. + // This requires converting the |dlc| to a ztf::LineCol to find the offset. + offset_t ConvertToTargetOffset(offset_t location, + ztf::DeltaLineCol dlc) const { + auto lc = translator_.OffsetToLineCol(location); + if (!lc.has_value()) + return kInvalidOffset; + return translator_.LineColToOffset(lc.value() + dlc); + } + + offset_t offset_; + const offset_t hi_; + const ZtfTranslator& translator_; + const ZtfConfig config_; + ZtfParser parser_; +}; + +// Specialization of ReferenceWriter for writing text references. +template <typename T> +class ZtfReferenceWriter : public ReferenceWriter { + public: + ZtfReferenceWriter(MutableBufferView image, + const ZtfTranslator& translator, + ZtfConfig config) + : translator_(translator), writer_(image, config) {} + + void PutNext(Reference reference) override { + T line_col; + if (!ConvertToTargetLineCol(reference, &line_col)) + return; + + writer_.Write(reference.location, line_col); + } + + private: + // Converts |reference| to an absolute reference to be stored in |out_lc|. + // Returns true on success. + bool ConvertToTargetLineCol(Reference reference, ztf::LineCol* out_lc) { + auto temp_lc = translator_.OffsetToLineCol(reference.target); + if (!temp_lc.has_value() || !translator_.IsValid(temp_lc.value())) + return false; + + *out_lc = temp_lc.value(); + return true; + } + + // Converts |reference| to a relative reference to be stored in |out_dlc|. + // Will return true on success. + bool ConvertToTargetLineCol(Reference reference, ztf::DeltaLineCol* out_dlc) { + auto location_lc = translator_.OffsetToLineCol(reference.location); + if (!location_lc.has_value()) + return false; + + auto target_lc = translator_.OffsetToLineCol(reference.target); + if (!target_lc.has_value()) + return false; + + *out_dlc = target_lc.value() - location_lc.value(); + return translator_.IsValid(reference.location, *out_dlc); + } + + const ZtfTranslator& translator_; + ZtfWriter writer_; +}; + +// Reads a text header to check for the magic string "ZTxt" at the start +// indicating the file should be treated as a Zucchini text file. +bool ReadZtfHeader(ConstBufferView image) { + BufferSource source(image); + // Reject empty images and "ZTxtxTZ\n" (missing 't'). + if (source.size() < kTotalMagicSize) + return false; + return source.CheckNextBytes({'Z', 'T', 'x', 't'}); +} + +} // namespace + +/******** ZtfTranslator ********/ + +ZtfTranslator::ZtfTranslator() {} + +ZtfTranslator::~ZtfTranslator() = default; + +bool ZtfTranslator::Init(ConstBufferView image) { + line_starts_.clear(); + // Record the starting offset of every line in |image_| into |line_start_|. + line_starts_.push_back(0); + for (size_t i = 0; i < image.size(); ++i) { + if (image.read<uint8_t>(i) == '\n') { + // Maximum number of entries is |ztf::kMaxDimValue|, including the end + // sentinel. + if (line_starts_.size() >= ztf::kMaxDimValue) + return false; + line_starts_.push_back(i + 1); + // Check that the line length is reachable from an absolute reference. + if (line_starts_.back() - *std::next(line_starts_.rbegin()) >= + ztf::kMaxDimValue) { + return false; + } + } + } + // Since the last character of ZTF file is always '\n', |line_starts_| will + // always contain the file length as the last element, which serves as a + // sentinel. + CHECK_EQ(image.size(), static_cast<size_t>(line_starts_.back())); + return true; +} + +bool ZtfTranslator::IsValid(ztf::LineCol lc) const { + DCHECK(!line_starts_.empty()); + return lc.line >= 1 && lc.col >= 1 && + static_cast<offset_t>(lc.line) <= NumLines() && + static_cast<offset_t>(lc.col) <= LineLength(lc.line); +} + +bool ZtfTranslator::IsValid(offset_t offset, ztf::DeltaLineCol dlc) const { + DCHECK(!line_starts_.empty()); + auto abs_lc = OffsetToLineCol(offset); + if (!abs_lc.has_value()) + return false; + + if (!base::CheckAdd(abs_lc->line, dlc.line).IsValid() || + !base::CheckAdd(abs_lc->col, dlc.col).IsValid()) { + return false; + } + return IsValid(abs_lc.value() + dlc); +} + +offset_t ZtfTranslator::LineColToOffset(ztf::LineCol lc) const { + // Guard against out of bounds access to |line_starts_| and ensure the + // |lc| falls within the file. + DCHECK(!line_starts_.empty()); + if (!IsValid(lc)) + return kInvalidOffset; + + offset_t target = line_starts_[lc.line - 1] + lc.col - 1; + DCHECK_LT(target, line_starts_.back()); + return target; +} + +base::Optional<ztf::LineCol> ZtfTranslator::OffsetToLineCol( + offset_t offset) const { + DCHECK(!line_starts_.empty()); + // Don't place a target outside the image. + if (offset >= line_starts_.back()) + return base::nullopt; + auto it = SearchForRange(offset); + ztf::LineCol lc; + lc.line = std::distance(line_starts_.cbegin(), it) + 1; + lc.col = offset - line_starts_[lc.line - 1] + 1; + DCHECK_LE(static_cast<offset_t>(lc.col), LineLength(lc.line)); + return lc; +} + +std::vector<offset_t>::const_iterator ZtfTranslator::SearchForRange( + offset_t offset) const { + DCHECK(!line_starts_.empty()); + auto it = + std::upper_bound(line_starts_.cbegin(), line_starts_.cend(), offset); + DCHECK(it != line_starts_.cbegin()); + return --it; +} + +offset_t ZtfTranslator::LineLength(uint16_t line) const { + DCHECK_GE(line, 1); + DCHECK_LE(line, NumLines()); + return line_starts_[line] - line_starts_[line - 1]; +} + +/******** DisassemblerZtf ********/ + +// Use 2 even though reference "chaining" isn't present in ZTF as it is the +// usual case for other Disassemblers and this is meant to mimic that as closely +// as possible. +DisassemblerZtf::DisassemblerZtf() : Disassembler(2) {} + +DisassemblerZtf::~DisassemblerZtf() = default; + +// static. +bool DisassemblerZtf::QuickDetect(ConstBufferView image) { + return ReadZtfHeader(image); +} + +ExecutableType DisassemblerZtf::GetExeType() const { + return kExeTypeZtf; +} + +std::string DisassemblerZtf::GetExeTypeString() const { + return "Zucchini Text Format"; +} + +std::vector<ReferenceGroup> DisassemblerZtf::MakeReferenceGroups() const { + return { + {{5, TypeTag(kAnglesAbs1), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadAbs<1, kAngles>, + &DisassemblerZtf::MakeWriteAbs<1, kAngles>}, + {{7, TypeTag(kAnglesAbs2), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadAbs<2, kAngles>, + &DisassemblerZtf::MakeWriteAbs<2, kAngles>}, + {{9, TypeTag(kAnglesAbs3), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadAbs<3, kAngles>, + &DisassemblerZtf::MakeWriteAbs<3, kAngles>}, + {{7, TypeTag(kAnglesRel1), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadRel<1, kAngles>, + &DisassemblerZtf::MakeWriteRel<1, kAngles>}, + {{9, TypeTag(kAnglesRel2), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadRel<2, kAngles>, + &DisassemblerZtf::MakeWriteRel<2, kAngles>}, + {{11, TypeTag(kAnglesRel3), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadRel<3, kAngles>, + &DisassemblerZtf::MakeWriteRel<3, kAngles>}, + {{5, TypeTag(kBracesAbs1), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadAbs<1, kBraces>, + &DisassemblerZtf::MakeWriteAbs<1, kBraces>}, + {{7, TypeTag(kBracesAbs2), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadAbs<2, kBraces>, + &DisassemblerZtf::MakeWriteAbs<2, kBraces>}, + {{9, TypeTag(kBracesAbs3), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadAbs<3, kBraces>, + &DisassemblerZtf::MakeWriteAbs<3, kBraces>}, + {{7, TypeTag(kBracesRel1), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadRel<1, kBraces>, + &DisassemblerZtf::MakeWriteRel<1, kBraces>}, + {{9, TypeTag(kBracesRel2), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadRel<2, kBraces>, + &DisassemblerZtf::MakeWriteRel<2, kBraces>}, + {{11, TypeTag(kBracesRel3), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadRel<3, kBraces>, + &DisassemblerZtf::MakeWriteRel<3, kBraces>}, + {{5, TypeTag(kBracketsAbs1), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadAbs<1, kBrackets>, + &DisassemblerZtf::MakeWriteAbs<1, kBrackets>}, + {{7, TypeTag(kBracketsAbs2), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadAbs<2, kBrackets>, + &DisassemblerZtf::MakeWriteAbs<2, kBrackets>}, + {{9, TypeTag(kBracketsAbs3), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadAbs<3, kBrackets>, + &DisassemblerZtf::MakeWriteAbs<3, kBrackets>}, + {{7, TypeTag(kBracketsRel1), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadRel<1, kBrackets>, + &DisassemblerZtf::MakeWriteRel<1, kBrackets>}, + {{9, TypeTag(kBracketsRel2), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadRel<2, kBrackets>, + &DisassemblerZtf::MakeWriteRel<2, kBrackets>}, + {{11, TypeTag(kBracketsRel3), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadRel<3, kBrackets>, + &DisassemblerZtf::MakeWriteRel<3, kBrackets>}, + {{5, TypeTag(kParenthesesAbs1), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadAbs<1, kParentheses>, + &DisassemblerZtf::MakeWriteAbs<1, kParentheses>}, + {{7, TypeTag(kParenthesesAbs2), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadAbs<2, kParentheses>, + &DisassemblerZtf::MakeWriteAbs<2, kParentheses>}, + {{9, TypeTag(kParenthesesAbs3), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadAbs<3, kParentheses>, + &DisassemblerZtf::MakeWriteAbs<3, kParentheses>}, + {{7, TypeTag(kParenthesesRel1), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadRel<1, kParentheses>, + &DisassemblerZtf::MakeWriteRel<1, kParentheses>}, + {{9, TypeTag(kParenthesesRel2), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadRel<2, kParentheses>, + &DisassemblerZtf::MakeWriteRel<2, kParentheses>}, + {{11, TypeTag(kParenthesesRel3), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadRel<3, kParentheses>, + &DisassemblerZtf::MakeWriteRel<3, kParentheses>}, + }; +} + +template <uint8_t digits, DisassemblerZtf::ReferencePool pool> +std::unique_ptr<ReferenceReader> DisassemblerZtf::MakeReadAbs(offset_t lo, + offset_t hi) { + static_assert(digits >= 1 && digits <= kMaxDigitCount, + "|digits| must be in range [1, 3]"); + return std::make_unique<ZtfReferenceReader<ztf::LineCol>>( + lo, hi, image_, translator_, MakeZtfConfig<pool>(digits)); +} + +template <uint8_t digits, DisassemblerZtf::ReferencePool pool> +std::unique_ptr<ReferenceReader> DisassemblerZtf::MakeReadRel(offset_t lo, + offset_t hi) { + static_assert(digits >= 1 && digits <= kMaxDigitCount, + "|digits| must be in range [1, 3]"); + return std::make_unique<ZtfReferenceReader<ztf::DeltaLineCol>>( + lo, hi, image_, translator_, MakeZtfConfig<pool>(digits)); +} + +template <uint8_t digits, DisassemblerZtf::ReferencePool pool> +std::unique_ptr<ReferenceWriter> DisassemblerZtf::MakeWriteAbs( + MutableBufferView image) { + static_assert(digits >= 1 && digits <= kMaxDigitCount, + "|digits| must be in range [1, 3]"); + return std::make_unique<ZtfReferenceWriter<ztf::LineCol>>( + image, translator_, MakeZtfConfig<pool>(digits)); +} + +template <uint8_t digits, DisassemblerZtf::ReferencePool pool> +std::unique_ptr<ReferenceWriter> DisassemblerZtf::MakeWriteRel( + MutableBufferView image) { + static_assert(digits >= 1 && digits <= kMaxDigitCount, + "|digits| must be in range [1, 3]"); + return std::make_unique<ZtfReferenceWriter<ztf::DeltaLineCol>>( + image, translator_, MakeZtfConfig<pool>(digits)); +} + +bool DisassemblerZtf::Parse(ConstBufferView image) { + image_ = image; + if (!ReadZtfHeader(image_)) + return false; + + CHECK_GE(image_.size(), + static_cast<size_t>(kTotalMagicSize)); // Needs header and footer. + + // Find the terminating footer "txTZ\n" that indicates the end of the image. + offset_t offset = 0; + for (; offset <= image_.size() - kFooterMagicSize; offset++) { + if (image_.read<uint8_t>(offset) == 't' && + image_.read<uint8_t>(offset + 1) == 'x' && + image_.read<uint8_t>(offset + 2) == 'T' && + image_.read<uint8_t>(offset + 3) == 'Z' && + image_.read<uint8_t>(offset + 4) == '\n') { + break; + } + } + + // If no footer is found before the end of the image then the parsing failed. + if (offset > image_.size() - kFooterMagicSize) + return false; + image_.shrink(offset + kFooterMagicSize); + + return translator_.Init(image_); +} + +} // namespace zucchini diff --git a/chromium/components/zucchini/disassembler_ztf.h b/chromium/components/zucchini/disassembler_ztf.h new file mode 100644 index 00000000000..0719093792d --- /dev/null +++ b/chromium/components/zucchini/disassembler_ztf.h @@ -0,0 +1,201 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_ +#define COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_ + +#include <stdint.h> +#include <stdlib.h> + +#include <memory> +#include <string> +#include <vector> + +#include "base/macros.h" +#include "base/optional.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/type_ztf.h" + +namespace zucchini { + +// Disassembler for text based files. This file format is supported for +// debugging Zucchini and is not intended for production usage. +// +// A valid Zucchini Text Format (ZTF) file is specified as follows: +// +// Header: +// The first four bytes must be - 'Z' 'T' 'x' 't' +// Footer: +// The last five bytes must be - 't' 'x' 'T' 'Z' '\n' +// (note that terminating new line is required). +// Content: +// The content can be any sequence of printable ASCII characters and new line +// (but not carriage return). This excludes the sequence that comprises the +// Footer. +// References: +// A reference is either Absolute or Relative. All references must begin and +// end with a pair of enclosing characters <open>, <close>. The options are: +// - Angles: '<' and '>' +// - Braces: '{' and '}' +// - Brackets: '[' and ']' +// - Parentheses: '(' and ')' +// +// A reference contains three items: +// - A line number <line> +// - A delimiter ',' <delimiter> +// - A column number <col> +// <line> and <col> may contain 1-3 digits and both must contain the same +// number of digits. If a number is too short then it can be left-padded +// with '0'. +// +// For Absolute references, <line> and <col> are 1-based (i.e. positive) +// index of line and column numbers of a character in the ZTF. This follows +// standard convention for text editors. Note that "\n" is considered to be +// part of a preceding line. +// +// <open><line><delimiter><col><close> +// +// For Relative references, <line> and <col> are integer offsets deltas of the +// target's (absolute) line and column relative to the line and column of the +// reference's first byte (i.e. <open>). Relative references have <sign> ('+' +// or '-') before <line> and <col>. For the special case of "0", "00", etc., +// <sign> must be "+". +// +// <open><sign><line><delimiter><sign><col><close> +// +// If a reference points outside the target either in writing or reading it is +// considered invalid and ignored. Similarly if it overflows a line. i.e. if a +// line is 10 characters long and a references targets character 11 of that +// line it is rejected. Lines are delimited with '\n' which is counted toward +// the line length. +// +// If a reference is to be written that would overwrite a '\n' character it is +// ignored as this would break all other line values. + +enum : size_t { kMaxDigitCount = 3 }; + +// Helper class for translating among offset_t, ztf::LineCol and +// ztf::DeltaLineCol. +class ZtfTranslator { + public: + ZtfTranslator(); + ~ZtfTranslator(); + + // Initializes |line_starts_| with the contents of |image|. + bool Init(ConstBufferView image); + + // Checks if |lc| is a valid location in the file. + bool IsValid(ztf::LineCol lc) const; + + // Checks if |dlc| relative to |offset| is a valid location in the file. + bool IsValid(offset_t offset, ztf::DeltaLineCol dlc) const; + + // Returns the offset corresponding to |line_col| if it is valid. Otherwise + // returns |kInvalidOffset|. + offset_t LineColToOffset(ztf::LineCol line_col) const; + + // Returns the ztf::LineCol for an |offset| if it is valid. Otherwise returns + // base::nullopt. + base::Optional<ztf::LineCol> OffsetToLineCol(offset_t offset) const; + + private: + // Returns an iterator to the range containing |offset|. Which is represented + // by the starting offset. The next element will contain the upper bound of + // the range. + std::vector<offset_t>::const_iterator SearchForRange(offset_t offset) const; + + // Returns the length of a 1-indexed line. The caller is expected to check + // that the requested line exists. + offset_t LineLength(uint16_t line) const; + + offset_t NumLines() const { + return static_cast<offset_t>(line_starts_.size() - 1); + } + + // |line_starts_| is a sorted list of each line's starting offset, along with + // the image size as the sentinel; it looks like {0, ..., image.size}. + std::vector<offset_t> line_starts_; + DISALLOW_COPY_AND_ASSIGN(ZtfTranslator); +}; + +// Disassembler for Zucchini Text Format (ZTF). +class DisassemblerZtf : public Disassembler { + public: + // Target Pools + enum ReferencePool : uint8_t { + kAngles, // <> + kBraces, // {} + kBrackets, // [] + kParentheses // () + }; + + // Type breakdown. Should contain all permutations of ReferencePool, Abs|Rel + // and the possible number of digits (1-3). + enum ReferenceType : uint8_t { + kAnglesAbs1, + kAnglesAbs2, + kAnglesAbs3, + kAnglesRel1, + kAnglesRel2, + kAnglesRel3, + kBracesAbs1, + kBracesAbs2, + kBracesAbs3, + kBracesRel1, + kBracesRel2, + kBracesRel3, + kBracketsAbs1, + kBracketsAbs2, + kBracketsAbs3, + kBracketsRel1, + kBracketsRel2, + kBracketsRel3, + kParenthesesAbs1, + kParenthesesAbs2, + kParenthesesAbs3, + kParenthesesRel1, + kParenthesesRel2, + kParenthesesRel3, + kNumTypes + }; + + DisassemblerZtf(); + ~DisassemblerZtf() override; + + // Applies quick checks to determine if |image| *may* point to the start of a + // ZTF file. Returns true on success. + static bool QuickDetect(ConstBufferView image); + + // Disassembler: + ExecutableType GetExeType() const override; + std::string GetExeTypeString() const override; + std::vector<ReferenceGroup> MakeReferenceGroups() const override; + + // Reference Readers, templated to allow configurable digit count and pool. + template <uint8_t digits, ReferencePool pool> + std::unique_ptr<ReferenceReader> MakeReadAbs(offset_t lo, offset_t hi); + template <uint8_t digits, ReferencePool pool> + std::unique_ptr<ReferenceReader> MakeReadRel(offset_t lo, offset_t hi); + + // Reference Writers, templated to allow configurable digit count and pool. + template <uint8_t digits, ReferencePool pool> + std::unique_ptr<ReferenceWriter> MakeWriteAbs(MutableBufferView image); + template <uint8_t digits, ReferencePool pool> + std::unique_ptr<ReferenceWriter> MakeWriteRel(MutableBufferView image); + + private: + friend Disassembler; + + // Disassembler: + bool Parse(ConstBufferView image) override; + + ZtfTranslator translator_; + + DISALLOW_COPY_AND_ASSIGN(DisassemblerZtf); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_ diff --git a/chromium/components/zucchini/disassembler_ztf_unittest.cc b/chromium/components/zucchini/disassembler_ztf_unittest.cc new file mode 100644 index 00000000000..1e713592777 --- /dev/null +++ b/chromium/components/zucchini/disassembler_ztf_unittest.cc @@ -0,0 +1,402 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_ztf.h" + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <map> +#include <set> +#include <utility> +#include <vector> + +#include "base/logging.h" +#include "base/stl_util.h" +#include "base/strings/string_piece.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/element_detection.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +constexpr char kNormalText[] = R"(ZTxt +Hello World! +This is an example of an absolute reference <<1,1>> +And {-01,+05} is an example of a relative ref +txTZ +TRAILING DATA)"; +// -1 to exclude null byte. +constexpr size_t kNormalTextExtraBytes = base::size("TRAILING DATA") - 1; + +constexpr char kOutOfBoundsText[] = R"(ZTxt<1,1> +Hello World! +This is an example of an OOB absolute reference <890,605> +And {-050,+100} is an example of an OOB relative ref. +but [+00,+10] is valid at least. As is (1,5). +<1, 6> and { ,1} aren't nor is {4,5] +{7,6}<1,1><2,3>{+00,+00}{004,100}[+00,+60][+000,-100]<-000,-035>(-00,-00)txTZ +)"; + +// Converts a raw string into data. +std::vector<uint8_t> StrToData(base::StringPiece s) { + return std::vector<uint8_t>(s.begin(), s.end()); +} + +// Compare if |a.location < b.location| as references have unique locations. +struct ReferenceCompare { + bool operator()(const Reference& a, const Reference& b) const { + return a.location < b.location; + } +}; + +using ReferenceKey = + std::pair<DisassemblerZtf::ReferencePool, DisassemblerZtf::ReferenceType>; +using ReferenceSets = + std::map<ReferenceKey, std::set<Reference, ReferenceCompare>>; + +// Write references in |refs_to_write| to |image|. Also validate the +// disassembler parses |image| such that it is of |expected_size|. +void WriteReferences(MutableBufferView image, + size_t expected_size, + const ReferenceSets& refs_to_write) { + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + std::unique_ptr<DisassemblerZtf> dis = + Disassembler::Make<DisassemblerZtf>(image); + EXPECT_TRUE(dis); + EXPECT_EQ(expected_size, dis->size()); + image.shrink(dis->size()); + auto reference_groups = dis->MakeReferenceGroups(); + for (const auto& group : reference_groups) { + auto writer = group.GetWriter(image, dis.get()); + ReferenceKey key = { + static_cast<DisassemblerZtf::ReferencePool>(group.pool_tag().value()), + static_cast<DisassemblerZtf::ReferenceType>(group.type_tag().value())}; + if (!refs_to_write.count(key)) + continue; + for (const auto& ref : refs_to_write.at(key)) + writer->PutNext(ref); + } +} + +// Read references in |refs_to_read| from |image|. Once found +// the elements are removed from |refs_to_read|. Also validate the +// disassembler parses |image| such that it is of |expected_size|. +void ReadReferences(ConstBufferView image, + size_t expected_size, + ReferenceSets* refs_to_read) { + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + std::unique_ptr<DisassemblerZtf> dis = + Disassembler::Make<DisassemblerZtf>(image); + EXPECT_TRUE(dis); + EXPECT_EQ(expected_size, dis->size()); + auto reference_groups = dis->MakeReferenceGroups(); + for (const auto& group : reference_groups) { + auto reader = group.GetReader(dis.get()); + ReferenceKey key = { + static_cast<DisassemblerZtf::ReferencePool>(group.pool_tag().value()), + static_cast<DisassemblerZtf::ReferenceType>(group.type_tag().value())}; + if (!refs_to_read->count(key)) { + // No elements of this pool/type pair are expected so assert that none are + // found. + auto ref = reader->GetNext(); + EXPECT_FALSE(ref.has_value()); + continue; + } + // For each reference remove it from the set if it exists, error if + // unexpected references are found. + for (auto ref = reader->GetNext(); ref.has_value(); + ref = reader->GetNext()) { + EXPECT_EQ(1UL, refs_to_read->at(key).erase(ref.value())); + } + EXPECT_EQ(0U, refs_to_read->at(key).size()); + } +} + +void TestTranslation(const ZtfTranslator& translator, + offset_t expected_location, + ztf::LineCol lc) { + // Check the lc is translated to the expected location. + EXPECT_EQ(expected_location, translator.LineColToOffset(lc)); + auto new_lc = translator.OffsetToLineCol(expected_location); + if (expected_location == kInvalidOffset) { + EXPECT_FALSE(translator.IsValid(lc)); + EXPECT_FALSE(new_lc.has_value()); + } else { + EXPECT_TRUE(translator.IsValid(lc)); + // Check that the reverse is true. |ztf::LineCol{0, 0}| is a sentinel and + // should never be valid. + EXPECT_EQ(lc.line, new_lc->line); + EXPECT_EQ(lc.col, new_lc->col); + } +} + +template <typename T> +size_t CountDistinct(const std::vector<T>& v) { + return std::set<T>(v.begin(), v.end()).size(); +} + +} // namespace + +TEST(ZtfTranslatorTest, Translate) { + ztf::dim_t kMaxVal = INT16_MAX; + ztf::dim_t kMinVal = INT16_MIN; + + const std::vector<uint8_t> text(StrToData(kOutOfBoundsText)); + ConstBufferView image(text.data(), text.size()); + ZtfTranslator translator; + EXPECT_TRUE(translator.Init(image)); + + // Absolute Translations: + + // Check a bunch of invalid locations. + TestTranslation(translator, kInvalidOffset, ztf::LineCol{50, 60}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{0, 0}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, 0}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{0, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{0, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, -1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{-1, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{-1, -1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, kMaxVal}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{kMaxVal, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, kMinVal}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{kMinVal, 1}); + + // Check the start of the file. + TestTranslation(translator, 0, ztf::LineCol{1, 1}); + TestTranslation(translator, 1, ztf::LineCol{1, 2}); + + // Check the boundary around a newline. + TestTranslation(translator, 9, ztf::LineCol{1, 10}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, 11}); + TestTranslation(translator, 10, ztf::LineCol{2, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{2, 0}); + + // Check the end of the file. + TestTranslation(translator, kInvalidOffset, ztf::LineCol{8, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{7, 79}); + // Need to subtract to account for the newline. + TestTranslation(translator, text.size() - 1, ztf::LineCol{7, 78}); + TestTranslation(translator, text.size() - 2, ztf::LineCol{7, 77}); + + // Delta Validity + // - Reminder! 0 -> 1:1 + + // Common possible edge cases. + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{0, 0})); + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{0, 1})); + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{1, 0})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{-1, -1})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{-1, 0})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, -1})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, -1})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, kMaxVal})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{kMaxVal, 0})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, kMinVal})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{kMinVal, 0})); + EXPECT_FALSE(translator.IsValid(233, ztf::DeltaLineCol{0, kMaxVal})); + EXPECT_FALSE(translator.IsValid(233, ztf::DeltaLineCol{kMaxVal, 0})); + EXPECT_FALSE(translator.IsValid(233, ztf::DeltaLineCol{kMaxVal, kMaxVal})); + + // Newline area. + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{0, 9})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, 10})); + EXPECT_FALSE(translator.IsValid(9, ztf::DeltaLineCol{0, 1})); + EXPECT_FALSE(translator.IsValid(9, ztf::DeltaLineCol{-1, 0})); + EXPECT_FALSE(translator.IsValid(9, ztf::DeltaLineCol{1, -10})); + EXPECT_TRUE(translator.IsValid(9, ztf::DeltaLineCol{1, -9})); + + // End of file. + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{7, 78})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{7, 77})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{6, 78})); + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{6, 77})); + EXPECT_FALSE(translator.IsValid(text.size() - 1, ztf::DeltaLineCol{0, 1})); + EXPECT_FALSE(translator.IsValid(text.size() - 1, ztf::DeltaLineCol{1, 0})); + EXPECT_TRUE(translator.IsValid(text.size() - 2, ztf::DeltaLineCol{0, 1})); + EXPECT_FALSE(translator.IsValid(text.size() - 2, ztf::DeltaLineCol{1, 0})); +} + +// Ensures that ReferenceGroups from DisassemblerZtf::MakeReferenceGroups() +// cover each non-sentinel element in ReferenceType in order, exactly once. Also +// ensures that the ReferenceType elements are grouped by ReferencePool, and +// listed in increasing order. +TEST(DisassemblerZtfTest, ReferenceGroups) { + std::vector<uint32_t> pool_list; + std::vector<uint32_t> type_list; + DisassemblerZtf dis; + for (ReferenceGroup group : dis.MakeReferenceGroups()) { + pool_list.push_back(static_cast<uint32_t>(group.pool_tag().value())); + type_list.push_back(static_cast<uint32_t>(group.type_tag().value())); + } + + // Check ReferenceByte coverage. + constexpr size_t kNumTypes = DisassemblerZtf::kNumTypes; + EXPECT_EQ(kNumTypes, type_list.size()); + EXPECT_EQ(kNumTypes, CountDistinct(type_list)); + EXPECT_TRUE(std::is_sorted(type_list.begin(), type_list.end())); + + // Check that ReferenceType elements are grouped by ReferencePool. Note that + // repeats can occur, and pools can be skipped. + EXPECT_TRUE(std::is_sorted(pool_list.begin(), pool_list.end())); +} + +TEST(DisassemblerZtfTest, BadMagic) { + // Test a case where there is no header so a disassembler cannot be created. + { + const std::vector<uint8_t> text(StrToData("foobarbaz bazbarfoo")); + ConstBufferView image(text.data(), text.size()); + EXPECT_FALSE(DisassemblerZtf::QuickDetect(image)); + EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image)); + } + // Test a case where there is no footer so a disassembler cannot be created. + { + const std::vector<uint8_t> text(StrToData("ZTxtfoobarbaz bazbarfootxTZ")); + ConstBufferView image(text.data(), text.size()); + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image)); + } + // Test when the header is too short + { + const std::vector<uint8_t> text(StrToData("ZTxtxTZ\n")); + ConstBufferView image(text.data(), text.size()); + EXPECT_FALSE(DisassemblerZtf::QuickDetect(image)); + EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image)); + } +} + +TEST(DisassemblerZtfTest, ZtfSizeBound) { + { + std::vector<uint8_t> text(StrToData("ZTxt")); + std::fill_n(std::back_inserter(text), ztf::kMaxDimValue - 2, '\n'); + text.insert(text.end(), {'t', 'x', 'T', 'Z', '\n'}); + ConstBufferView image(text.data(), text.size()); + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + EXPECT_TRUE(Disassembler::Make<DisassemblerZtf>(image)); + } + { + std::vector<uint8_t> text(StrToData("ZTxt")); + std::fill_n(std::back_inserter(text), ztf::kMaxDimValue - 1, '\n'); + text.insert(text.end(), {'t', 'x', 'T', 'Z', '\n'}); + ConstBufferView image(text.data(), text.size()); + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image)); + } +} + +// Try reading from a well formed source. +TEST(DisassemblerZtfTest, NormalRead) { + const std::vector<uint8_t> text(StrToData(kNormalText)); + ConstBufferView image(text.data(), text.size()); + ReferenceSets expected_map = { + {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1}, + {Reference({63, 0})}}, + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesRel2}, + {Reference({74, 27})}}, + }; + ReadReferences(image, text.size() - kNormalTextExtraBytes, &expected_map); +} + +// Try writing to a well formed source and ensure that what is read back +// reflects what was written. +TEST(DisassemblerZtfTest, NormalWrite) { + std::vector<uint8_t> mutable_text(StrToData(kNormalText)); + MutableBufferView image(mutable_text.data(), mutable_text.size()); + ReferenceSets change_map = { + {{DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1}, + {Reference({63, 71})}}, + {{DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel3}, + {Reference({74, 4})}}, + }; + WriteReferences(image, mutable_text.size() - kNormalTextExtraBytes, + change_map); + + // As a sanity check see if a disassembler can identify the same references. + ConstBufferView const_image(image); + ReadReferences(const_image, mutable_text.size() - kNormalTextExtraBytes, + &change_map); +} + +// Try reading from a source rife with errors. +TEST(DisassemblerZtfTest, ReadOutOfBoundsRefs) { + const std::vector<uint8_t> text(StrToData(kOutOfBoundsText)); + ConstBufferView image(text.data(), text.size()); + ReferenceSets expected_map = { + {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1}, + {Reference({4, 0}), Reference({223, 0}), Reference({228, 12})}}, + {{DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel2}, + {Reference({139, 149})}}, + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesAbs1}, + {Reference({218, 223})}}, + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesRel2}, + {Reference({233, 233})}}, + {{DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1}, + {Reference({174, 4})}}, + }; + ReadReferences(image, text.size(), &expected_map); +} + +// Try writing to a source rife with errors (malformed references or ones that +// reference non-existent locations. Some of the values written are also bad. To +// validate check if the expected set of references are read back. +TEST(DisassemblerZtfTest, WriteOutOfBoundsRefs) { + // Replace |old_val| (provided for checking) with |new_val| in |set|. + auto update_set = [](Reference old_ref, Reference new_ref, + std::set<Reference, ReferenceCompare>* set) { + auto it = set->find(old_ref); + EXPECT_NE(it, set->cend()); + EXPECT_EQ(*it, old_ref); + set->erase(it); + set->insert(new_ref); + }; + + // Replace |old_val| (provided for checking) with |new_val| in the set which + // is the value corresponding to |key| in |map|. + auto update_map = + [update_set]( + ReferenceKey key, Reference old_ref, Reference new_ref, + std::map<ReferenceKey, std::set<Reference, ReferenceCompare>>* map) { + auto it = map->find(key); + EXPECT_NE(it, map->cend()); + update_set(old_ref, new_ref, &(it->second)); + }; + + std::vector<uint8_t> mutable_text(StrToData(kOutOfBoundsText)); + MutableBufferView image(mutable_text.data(), mutable_text.size()); + ReferenceSets change_map = { + {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1}, + {Reference({223, 15}), Reference({228, 13})}}, + {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs3}, + {Reference({4, 50})}}, // This should fail to write. + {{DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel2}, + {Reference({139, mutable_text.size()})}}, // This should fail. + {{DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1}, + {Reference({174, 21})}}, // This should fail. + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesAbs1}, + {Reference({218, 219})}}, + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesRel2}, + {Reference({233, 174})}}, + }; + WriteReferences(image, mutable_text.size(), change_map); + + // As a sanity check see if a disassembler can identify the same references + // (excluding the invalid ones). + change_map.erase(change_map.find( + {DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs3})); + change_map.at({DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1}) + .emplace(Reference{4, 0}); + update_map({DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel2}, + Reference({139, mutable_text.size()}), Reference({139, 149}), + &change_map); + update_map({DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1}, + Reference({174, 21}), Reference({174, 4}), &change_map); + ConstBufferView const_image(image); + ReadReferences(const_image, mutable_text.size(), &change_map); +} + +} // namespace zucchini diff --git a/chromium/components/zucchini/element_detection.cc b/chromium/components/zucchini/element_detection.cc index 2fa3604ae1f..6b31f612f7a 100644 --- a/chromium/components/zucchini/element_detection.cc +++ b/chromium/components/zucchini/element_detection.cc @@ -7,17 +7,28 @@ #include <utility> #include "base/logging.h" +#include "components/zucchini/buildflags.h" #include "components/zucchini/disassembler.h" -#include "components/zucchini/disassembler_dex.h" #include "components/zucchini/disassembler_no_op.h" + +#if BUILDFLAG(ENABLE_DEX) +#include "components/zucchini/disassembler_dex.h" +#endif // BUILDFLAG(ENABLE_DEX) + +#if BUILDFLAG(ENABLE_WIN) #include "components/zucchini/disassembler_win32.h" +#endif // BUILDFLAG(ENABLE_WIN) + +#if BUILDFLAG(ENABLE_ZTF) +#include "components/zucchini/disassembler_ztf.h" +#endif // BUILDFLAG(ENABLE_ZTF) namespace zucchini { namespace { // Impose a minimal program size to eliminate pathological cases. -constexpr size_t kMinProgramSize = 16; +enum : size_t { kMinProgramSize = 16 }; } // namespace @@ -25,6 +36,7 @@ constexpr size_t kMinProgramSize = 16; std::unique_ptr<Disassembler> MakeDisassemblerWithoutFallback( ConstBufferView image) { +#if BUILDFLAG(ENABLE_WIN) if (DisassemblerWin32X86::QuickDetect(image)) { auto disasm = Disassembler::Make<DisassemblerWin32X86>(image); if (disasm && disasm->size() >= kMinProgramSize) @@ -36,12 +48,24 @@ std::unique_ptr<Disassembler> MakeDisassemblerWithoutFallback( if (disasm && disasm->size() >= kMinProgramSize) return disasm; } +#endif // BUILDFLAG(ENABLE_WIN) +#if BUILDFLAG(ENABLE_DEX) if (DisassemblerDex::QuickDetect(image)) { auto disasm = Disassembler::Make<DisassemblerDex>(image); if (disasm && disasm->size() >= kMinProgramSize) return disasm; } +#endif // BUILDFLAG(ENABLE_DEX) + +#if BUILDFLAG(ENABLE_ZTF) + if (DisassemblerZtf::QuickDetect(image)) { + // This disallows very short examples like "ZTxtxtZ\n" in ensemble patching. + auto disasm = Disassembler::Make<DisassemblerZtf>(image); + if (disasm && disasm->size() >= kMinProgramSize) + return disasm; + } +#endif // BUILDFLAG(ENABLE_ZTF) return nullptr; } @@ -49,15 +73,24 @@ std::unique_ptr<Disassembler> MakeDisassemblerWithoutFallback( std::unique_ptr<Disassembler> MakeDisassemblerOfType(ConstBufferView image, ExecutableType exe_type) { switch (exe_type) { +#if BUILDFLAG(ENABLE_WIN) case kExeTypeWin32X86: return Disassembler::Make<DisassemblerWin32X86>(image); case kExeTypeWin32X64: return Disassembler::Make<DisassemblerWin32X64>(image); +#endif // BUILDFLAG(ENABLE_WIN) +#if BUILDFLAG(ENABLE_DEX) case kExeTypeDex: return Disassembler::Make<DisassemblerDex>(image); +#endif // BUILDFLAG(ENABLE_DEX) +#if BUILDFLAG(ENABLE_ZTF) + case kExeTypeZtf: + return Disassembler::Make<DisassemblerZtf>(image); +#endif // BUILDFLAG(ENABLE_ZTF) case kExeTypeNoOp: return Disassembler::Make<DisassemblerNoOp>(image); default: + // If an architecture is disabled then null is handled gracefully. return nullptr; } } diff --git a/chromium/components/zucchini/element_detection_unittest.cc b/chromium/components/zucchini/element_detection_unittest.cc index 2200c0bf1c2..6dbfa3f8b7f 100644 --- a/chromium/components/zucchini/element_detection_unittest.cc +++ b/chromium/components/zucchini/element_detection_unittest.cc @@ -11,14 +11,65 @@ #include "testing/gtest/include/gtest/gtest.h" namespace zucchini { - namespace { +// This test uses a mock archive format where regions are determined by their +// consecutive byte values rather than parsing real executables. +// +// 0 - Padding or raw data (not mapped to an executable). +// 1 - A Win32x86 executable. +// 2 - A Win32x64 executable. +// +// So an example archive file of; +// 0 1 1 1 0 1 1 0 0 2 2 2 2 +// contains (in order left to right): +// - One padding byte +// - Three byte Win32x86 executable +// - One padding byte +// - Two byte Win32x86 executable +// - Two padding bytes +// - Four byte Win32x64 executable -using ElementVector = std::vector<Element>; +class ElementDetectionTest : public ::testing::Test { + protected: + using ElementVector = std::vector<Element>; + using ExeTypeMap = std::map<uint8_t, ExecutableType>; -} // namespace + ElementDetectionTest() + : exe_map_({{1, kExeTypeWin32X86}, {2, kExeTypeWin32X64}}) {} + + ElementVector TestElementFinder(std::vector<uint8_t> buffer) { + ConstBufferView image(buffer.data(), buffer.size()); + + ElementFinder finder( + image, + base::BindRepeating( + [](ExeTypeMap exe_map, ConstBufferView image, + ConstBufferView region) -> base::Optional<Element> { + EXPECT_GE(region.begin(), image.begin()); + EXPECT_LE(region.end(), image.end()); + EXPECT_GE(region.size(), 0U); + + if (region[0] != 0) { + offset_t length = 1; + while (length < region.size() && region[length] == region[0]) + ++length; + return Element{{0, length}, exe_map[region[0]]}; + } + return base::nullopt; + }, + exe_map_, image)); + std::vector<Element> elements; + for (auto element = finder.GetNext(); element; element = finder.GetNext()) { + elements.push_back(*element); + } + return elements; + } + + // Translation map from mock archive bytes to actual types used in Zucchini. + ExeTypeMap exe_map_; +}; -TEST(ElementDetectionTest, ElementFinderEmpty) { +TEST_F(ElementDetectionTest, ElementFinderEmpty) { std::vector<uint8_t> buffer(10, 0); ElementFinder finder( ConstBufferView(buffer.data(), buffer.size()), @@ -28,36 +79,7 @@ TEST(ElementDetectionTest, ElementFinderEmpty) { EXPECT_EQ(base::nullopt, finder.GetNext()); } -ElementVector TestElementFinder(std::vector<uint8_t> buffer) { - ConstBufferView image(buffer.data(), buffer.size()); - - ElementFinder finder( - image, - base::BindRepeating( - [](ConstBufferView image, - ConstBufferView region) -> base::Optional<Element> { - EXPECT_GE(region.begin(), image.begin()); - EXPECT_LE(region.end(), image.end()); - EXPECT_GE(region.size(), 0U); - - if (region[0] != 0) { - offset_t length = 1; - while (length < region.size() && region[length] == region[0]) - ++length; - return Element{{0, length}, - static_cast<ExecutableType>(region[0])}; - } - return base::nullopt; - }, - image)); - std::vector<Element> elements; - for (auto element = finder.GetNext(); element; element = finder.GetNext()) { - elements.push_back(*element); - } - return elements; -} - -TEST(ElementDetectionTest, ElementFinder) { +TEST_F(ElementDetectionTest, ElementFinder) { EXPECT_EQ(ElementVector(), TestElementFinder({})); EXPECT_EQ(ElementVector(), TestElementFinder({0, 0})); EXPECT_EQ(ElementVector({{{0, 2}, kExeTypeWin32X86}}), @@ -75,4 +97,5 @@ TEST(ElementDetectionTest, ElementFinder) { TestElementFinder({0, 1, 1, 0, 2, 2, 2})); } +} // namespace } // namespace zucchini diff --git a/chromium/components/zucchini/equivalence_map_unittest.cc b/chromium/components/zucchini/equivalence_map_unittest.cc index ce8ffe177fb..9c4166fdff7 100644 --- a/chromium/components/zucchini/equivalence_map_unittest.cc +++ b/chromium/components/zucchini/equivalence_map_unittest.cc @@ -251,7 +251,7 @@ TEST(EquivalenceMapTest, PruneEquivalencesAndSortBySource) { auto PruneEquivalencesAndSortBySourceTest = [](std::vector<Equivalence>&& equivalences) { OffsetMapper::PruneEquivalencesAndSortBySource(&equivalences); - return equivalences; + return std::move(equivalences); }; EXPECT_EQ(std::vector<Equivalence>(), diff --git a/chromium/components/zucchini/fuzzers/BUILD.gn b/chromium/components/zucchini/fuzzers/BUILD.gn new file mode 100644 index 00000000000..7afe6db662b --- /dev/null +++ b/chromium/components/zucchini/fuzzers/BUILD.gn @@ -0,0 +1,100 @@ +# Copyright 2018 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//testing/libfuzzer/fuzzer_test.gni") +import("//third_party/protobuf/proto_library.gni") + +# To download the corpus for local fuzzing use: +# gsutil -m rsync \ +# gs://clusterfuzz-corpus/libfuzzer/zucchini_disassembler_win32_fuzzer \ +# components/zucchini/fuzzing/testdata/disassembler_win32_fuzzer +fuzzer_test("zucchini_disassembler_win32_fuzzer") { + sources = [ + "disassembler_win32_fuzzer.cc", + ] + deps = [ + "//base", + "//components/zucchini:zucchini_lib", + ] +} + +fuzzer_test("zucchini_patch_fuzzer") { + sources = [ + "patch_fuzzer.cc", + ] + deps = [ + "//base", + "//components/zucchini:zucchini_lib", + ] + seed_corpus = "testdata/patch_fuzzer" +} + +proto_library("zucchini_file_pair_proto") { + sources = [ + "file_pair.proto", + ] +} + +# Ensure protoc is available. +# Disabled on Windows due to crbug/844826. +if (current_toolchain == host_toolchain && !is_win) { + # Raw Apply Fuzzer: + action("zucchini_raw_apply_seed") { + script = "generate_fuzzer_data.py" + + args = [ + "--raw", + "old_eventlog_provider.dll", # <old_file> + "new_eventlog_provider.dll", # <new_file> + "eventlog_provider.patch", # <patch_file> (temporary) + + # <output_dir> + rebase_path("$target_gen_dir/testdata/raw_apply_fuzzer", root_build_dir), + ] + + # Files depended upon. + sources = [ + "create_seed_file_pair.py", + "testdata/new_eventlog_provider.dll", + "testdata/old_eventlog_provider.dll", + ] + + # Outputs: necessary for validation. + outputs = [ + "$target_gen_dir/testdata/raw_apply_fuzzer/seed_proto.bin", + ] + deps = [ + "//components/zucchini:zucchini", + "//third_party/protobuf:protoc", + ] + } + + fuzzer_test("zucchini_raw_apply_fuzzer") { + sources = [ + "raw_gen_fuzzer.cc", + ] + deps = [ + ":zucchini_file_pair_proto", + "//base", + "//components/zucchini:zucchini_lib", + "//third_party/libprotobuf-mutator", + ] + seed_corpus = "$target_gen_dir/testdata/raw_apply_fuzzer" + seed_corpus_deps = [ ":zucchini_raw_apply_seed" ] + } + + # Raw Gen Fuzzer: + fuzzer_test("zucchini_raw_gen_fuzzer") { + sources = [ + "raw_gen_fuzzer.cc", + ] + deps = [ + ":zucchini_file_pair_proto", + "//base", + "//components/zucchini:zucchini_lib", + "//third_party/libprotobuf-mutator", + ] + seed_corpus = "testdata/raw_gen_fuzzer" + } +} diff --git a/chromium/components/zucchini/fuzzers/create_seed_file_pair.py b/chromium/components/zucchini/fuzzers/create_seed_file_pair.py new file mode 100755 index 00000000000..a44db7b6a6f --- /dev/null +++ b/chromium/components/zucchini/fuzzers/create_seed_file_pair.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python +# Copyright 2018 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Create binary protobuf encoding for fuzzer seeds. + +This script is used to generate binary encoded protobuf seeds for fuzzers +related to Zucchini-gen and -apply, which take pairs of files are arguments. The +binary protobuf format is faster to parse so it is the preferred method for +encoding the seeds. For gen related fuzzers this should only need to be run +once. For any apply related fuzzers this should be rerun whenever the patch +format is changed. +""" + +import argparse +import logging +import os +import subprocess +import sys + +ABS_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__))) +PROTO_DEFINITION_FILE = 'file_pair.proto' +OUTPUT_FORMAT = b'old_file: "{}"\nnew_or_patch_file: "{}"' + +def parse_args(): + """Parse commandline args.""" + parser = argparse.ArgumentParser() + parser.add_argument('protoc_path', help='Path to protoc.') + parser.add_argument('old_file', help='Old file to generate/apply patch.') + parser.add_argument('new_or_patch_file', + help='New file to generate or patch to apply.') + parser.add_argument('output_file', help='File to write binary protobuf to.') + return parser.parse_args() + + +def read_to_proto_escaped_string(filename): + """Reads a file and converts it to hex escape sequences.""" + with open(filename, 'rb') as f: + # Note that string_escape escapes all non-ASCII printable characters + # excluding ", which needs to be manually escaped. + return f.read().encode('string_escape').replace('"', '\\"') + + +def main(): + args = parse_args() + # Create an ASCII string representing a protobuf. + content = OUTPUT_FORMAT.format(read_to_proto_escaped_string(args.old_file), + read_to_proto_escaped_string( + args.new_or_patch_file)) + + # Encode the ASCII protobuf as a binary protobuf. + ps = subprocess.Popen([args.protoc_path, '--proto_path=%s' % ABS_PATH, + '--encode=zucchini.fuzzers.FilePair', + os.path.join(ABS_PATH, PROTO_DEFINITION_FILE)], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + # Write the string to the subprocess. Single line IO is fine as protoc returns + # a string. + output = ps.communicate(input=content) + ps.wait() + if ps.returncode: + logging.error('Binary protobuf encoding failed.') + return ps.returncode + + # Write stdout of the subprocess for protoc to the |output_file|. + with open(args.output_file, 'wb') as f: + f.write(output[0]) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/chromium/components/zucchini/disassembler_win32_fuzzer.cc b/chromium/components/zucchini/fuzzers/disassembler_win32_fuzzer.cc index f432dddc960..f432dddc960 100644 --- a/chromium/components/zucchini/disassembler_win32_fuzzer.cc +++ b/chromium/components/zucchini/fuzzers/disassembler_win32_fuzzer.cc diff --git a/chromium/components/zucchini/fuzzers/file_pair.proto b/chromium/components/zucchini/fuzzers/file_pair.proto new file mode 100644 index 00000000000..22163819933 --- /dev/null +++ b/chromium/components/zucchini/fuzzers/file_pair.proto @@ -0,0 +1,15 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +syntax = "proto2"; + +package zucchini.fuzzers; + +// NEXT_TAG = 3 +message FilePair { + // File to generate patch from or apply patch to. + required bytes old_file = 1; + // New file to generate patch or the patch to apply. + required bytes new_or_patch_file = 2; +} diff --git a/chromium/components/zucchini/fuzzers/generate_fuzzer_data.py b/chromium/components/zucchini/fuzzers/generate_fuzzer_data.py new file mode 100755 index 00000000000..c182baf2578 --- /dev/null +++ b/chromium/components/zucchini/fuzzers/generate_fuzzer_data.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +# Copyright 2018 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Script for generating new binary protobuf seeds for fuzzers. + +Currently supports creating a single seed binary protobuf of the form +zucchini.fuzzer.FilePair. +""" + +import argparse +import hashlib +import logging +import os +import platform +import subprocess +import sys + +ABS_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__))) +ABS_TESTDATA_PATH = os.path.join(ABS_PATH, 'testdata') + +def parse_args(): + """Parses arguments from command-line.""" + parser = argparse.ArgumentParser() + parser.add_argument('--raw', help='Whether to use Raw Zucchini.', + action='store_true') + parser.add_argument('old_file', help='Old file to generate/apply patch.') + parser.add_argument('new_file', help='New file to generate patch from.') + parser.add_argument('patch_file', help='Patch filename to use.') + parser.add_argument('output_dir', + help='Directory to write binary protobuf to.') + return parser.parse_args() + + +def gen(old_file, new_file, patch_file, output_dir, is_raw, is_win): + """Generates a new patch and binary encodes a protobuf pair.""" + # Create output directory if missing. + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + # Handle Windows executable names. + zucchini = 'zucchini' + protoc = 'protoc' + if is_win: + zucchini += '.exe' + protoc += '.exe' + + zuc_cmd = [os.path.abspath(zucchini), '-gen'] + if is_raw: + zuc_cmd.append('-raw') + # Generate a new patch. + ret = subprocess.call(zuc_cmd + [old_file, new_file, patch_file], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if ret: + logging.error('Patch generation failed for ({}, {})'.format(old_file, + new_file)) + return ret + # Binary encode the protobuf pair. + ret = subprocess.call([sys.executable, + os.path.join(ABS_PATH, 'create_seed_file_pair.py'), + os.path.abspath(protoc), old_file, patch_file, + os.path.join(output_dir, 'seed_proto.bin')], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + os.remove(patch_file) + return ret + + +def main(): + args = parse_args() + return gen(os.path.join(ABS_TESTDATA_PATH, args.old_file), + os.path.join(ABS_TESTDATA_PATH, args.new_file), + os.path.join(ABS_TESTDATA_PATH, args.patch_file), + os.path.abspath(args.output_dir), + args.raw, + platform.system() == 'Windows') + + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/chromium/components/zucchini/patch_fuzzer.cc b/chromium/components/zucchini/fuzzers/patch_fuzzer.cc index 2d1c9b7fe45..2d1c9b7fe45 100644 --- a/chromium/components/zucchini/patch_fuzzer.cc +++ b/chromium/components/zucchini/fuzzers/patch_fuzzer.cc diff --git a/chromium/components/zucchini/fuzzers/raw_apply_fuzzer.cc b/chromium/components/zucchini/fuzzers/raw_apply_fuzzer.cc new file mode 100644 index 00000000000..da3230a81f7 --- /dev/null +++ b/chromium/components/zucchini/fuzzers/raw_apply_fuzzer.cc @@ -0,0 +1,59 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stdint.h> +#include <stdlib.h> + +#include <iostream> +#include <vector> + +#include "base/environment.h" +#include "base/logging.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/fuzzers/file_pair.pb.h" +#include "components/zucchini/patch_reader.h" +#include "components/zucchini/zucchini.h" +#include "testing/libfuzzer/proto/lpm_interface.h" + +struct Environment { + Environment() { + logging::SetMinLogLevel(3); // Disable console spamming. + } +}; + +Environment* env = new Environment(); + +DEFINE_BINARY_PROTO_FUZZER(const zucchini::fuzzers::FilePair& file_pair) { + // Dump code for debugging. + if (base::Environment::Create()->HasVar("LPM_DUMP_NATIVE_INPUT")) { + std::cout << "Old File: " << file_pair.old_file() << std::endl + << "Patch File: " << file_pair.new_or_patch_file() << std::endl; + } + + // Prepare data. + zucchini::ConstBufferView old_image( + reinterpret_cast<const uint8_t*>(file_pair.old_file().data()), + file_pair.old_file().size()); + zucchini::ConstBufferView patch_file( + reinterpret_cast<const uint8_t*>(file_pair.new_or_patch_file().data()), + file_pair.new_or_patch_file().size()); + + // Generate a patch reader. + auto patch_reader = zucchini::EnsemblePatchReader::Create(patch_file); + // Abort if the patch can't be read. + if (!patch_reader.has_value()) + return; + + // Create the underlying new file. + size_t new_size = patch_reader->header().new_size; + // Reject unreasonably large "new" files that fuzzed patch may specify. + if (new_size > 64 * 1024) + return; + std::vector<uint8_t> new_data(new_size); + zucchini::MutableBufferView new_image(new_data.data(), new_size); + + // Fuzz target. + zucchini::Apply(old_image, *patch_reader, new_image); + // No need to check whether output exist, or if so, whether it's valid. +} diff --git a/chromium/components/zucchini/fuzzers/raw_gen_fuzzer.cc b/chromium/components/zucchini/fuzzers/raw_gen_fuzzer.cc new file mode 100644 index 00000000000..176412daf14 --- /dev/null +++ b/chromium/components/zucchini/fuzzers/raw_gen_fuzzer.cc @@ -0,0 +1,58 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stdint.h> + +#include <iostream> + +#include "base/environment.h" +#include "base/logging.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/fuzzers/file_pair.pb.h" +#include "components/zucchini/patch_writer.h" +#include "components/zucchini/zucchini_gen.h" +#include "testing/libfuzzer/proto/lpm_interface.h" + +namespace { + +constexpr int kMinImageSize = 16; +constexpr int kMaxImageSize = 1024; + +} // namespace + +struct Environment { + Environment() { + logging::SetMinLogLevel(3); // Disable console spamming. + } +}; + +Environment* env = new Environment(); + +DEFINE_BINARY_PROTO_FUZZER(const zucchini::fuzzers::FilePair& file_pair) { + // Dump code for debugging. + if (base::Environment::Create()->HasVar("LPM_DUMP_NATIVE_INPUT")) { + std::cout << "Old File: " << file_pair.old_file() << std::endl + << "New File: " << file_pair.new_or_patch_file() << std::endl; + } + + // Prepare data. + zucchini::ConstBufferView old_image( + reinterpret_cast<const uint8_t*>(file_pair.old_file().data()), + file_pair.old_file().size()); + zucchini::ConstBufferView new_image( + reinterpret_cast<const uint8_t*>(file_pair.new_or_patch_file().data()), + file_pair.new_or_patch_file().size()); + + // Restrict image sizes to speed up fuzzing. + if (old_image.size() < kMinImageSize || old_image.size() > kMaxImageSize || + new_image.size() < kMinImageSize || new_image.size() > kMaxImageSize) { + return; + } + + // Generate a patch writer. + zucchini::EnsemblePatchWriter patch_writer(old_image, new_image); + + // Fuzz Target. + zucchini::GenerateRaw(old_image, new_image, &patch_writer); +} diff --git a/chromium/components/zucchini/heuristic_ensemble_matcher.cc b/chromium/components/zucchini/heuristic_ensemble_matcher.cc index aead5dce7b8..1e99d5bf921 100644 --- a/chromium/components/zucchini/heuristic_ensemble_matcher.cc +++ b/chromium/components/zucchini/heuristic_ensemble_matcher.cc @@ -68,8 +68,9 @@ bool UnsafeDifference(const Element& old_element, const Element& new_element) { } std::ostream& operator<<(std::ostream& stream, const Element& elt) { - stream << "(" << elt.exe_type << ", " << AsHex<8, size_t>(elt.offset) << " +" - << AsHex<8, size_t>(elt.size) << ")"; + stream << "(" << CastExecutableTypeToString(elt.exe_type) << ", " + << AsHex<8, size_t>(elt.offset) << " +" << AsHex<8, size_t>(elt.size) + << ")"; return stream; } diff --git a/chromium/components/zucchini/image_utils.h b/chromium/components/zucchini/image_utils.h index 3765763f9f9..9aba0a69e99 100644 --- a/chromium/components/zucchini/image_utils.h +++ b/chromium/components/zucchini/image_utils.h @@ -8,8 +8,13 @@ #include <stddef.h> #include <stdint.h> +#include <string> + +#include "base/format_macros.h" +#include "base/macros.h" #include "base/numerics/safe_conversions.h" #include "base/optional.h" +#include "base/strings/stringprintf.h" #include "components/zucchini/buffer_view.h" #include "components/zucchini/typed_value.h" @@ -137,20 +142,54 @@ struct EquivalenceCandidate { double similarity; }; -// Enumerations for supported executables. +template <size_t N> +inline constexpr uint32_t ExeTypeToUint32(const char (&exe_type)[N]) { + static_assert(N == 5, "Expected ExeType of length 4 + 1 null byte."); + return (exe_type[3] << 24) | (exe_type[2] << 16) | (exe_type[1] << 8) | + exe_type[0]; +} + +// Enumerations for supported executables. Values in this enum must be distinct. +// Once present, values should never be altered or removed to ensure backwards +// compatibility and patch type collision avoidance. enum ExecutableType : uint32_t { kExeTypeUnknown = UINT32_MAX, - kExeTypeNoOp = 0, - kExeTypeWin32X86 = 1, - kExeTypeWin32X64 = 2, - kExeTypeElfX86 = 3, - kExeTypeElfX64 = 4, - kExeTypeElfArm32 = 5, - kExeTypeElfAArch64 = 6, - kExeTypeDex = 7, - kNumExeType + kExeTypeNoOp = ExeTypeToUint32("NoOp"), + kExeTypeWin32X86 = ExeTypeToUint32("Px86"), + kExeTypeWin32X64 = ExeTypeToUint32("Px64"), + kExeTypeElfX86 = ExeTypeToUint32("Ex86"), + kExeTypeElfX64 = ExeTypeToUint32("Ex64"), + kExeTypeElfArm32 = ExeTypeToUint32("EA32"), + kExeTypeElfAArch64 = ExeTypeToUint32("EA64"), + kExeTypeDex = ExeTypeToUint32("DEX "), + kExeTypeZtf = ExeTypeToUint32("ZTF "), }; +constexpr ExecutableType CastToExecutableType(uint32_t possible_exe_type) { + switch (static_cast<ExecutableType>(possible_exe_type)) { + case kExeTypeNoOp: // Falls through. + case kExeTypeWin32X86: // Falls through. + case kExeTypeWin32X64: // Falls through. + case kExeTypeElfX86: // Falls through. + case kExeTypeElfX64: // Falls through. + case kExeTypeElfArm32: // Falls through. + case kExeTypeElfAArch64: // Falls through. + case kExeTypeDex: // Falls through. + case kExeTypeZtf: // Falls through. + case kExeTypeUnknown: + return static_cast<ExecutableType>(possible_exe_type); + default: + return kExeTypeUnknown; + } +} + +inline std::string CastExecutableTypeToString(ExecutableType exe_type) { + uint32_t v = static_cast<uint32_t>(exe_type); + char result[] = {v & 0xFF, (v >> 8) & 0xFF, (v >> 16) & 0xFF, + (v >> 24) & 0xFF, 0}; + return result; +} + // A region in an image with associated executable type |exe_type|. If // |exe_type == kExeTypeNoOp|, then the Element represents a region of raw data. struct Element : public BufferRegion { @@ -178,6 +217,15 @@ struct ElementMatch { bool IsValid() const { return old_element.exe_type == new_element.exe_type; } ExecutableType exe_type() const { return old_element.exe_type; } + // Represents match as "#+#=#+#", where "#" denotes the integers: + // [offset in "old", size in "old", offset in "new", size in "new"]. + // Note that element type is omitted. + std::string ToString() const { + return base::StringPrintf("%" PRIuS "+%" PRIuS "=%" PRIuS "+%" PRIuS "", + old_element.offset, old_element.size, + new_element.offset, new_element.size); + } + Element old_element; Element new_element; }; diff --git a/chromium/components/zucchini/image_utils_unittest.cc b/chromium/components/zucchini/image_utils_unittest.cc index cd71a2f251e..81695e920c8 100644 --- a/chromium/components/zucchini/image_utils_unittest.cc +++ b/chromium/components/zucchini/image_utils_unittest.cc @@ -14,4 +14,21 @@ TEST(ImageUtilsTest, Bitness) { EXPECT_EQ(8U, WidthOf(kBit64)); } +TEST(ImageUtilsTest, CastExecutableTypeToString) { + EXPECT_EQ("NoOp", CastExecutableTypeToString(kExeTypeNoOp)); + EXPECT_EQ("Px86", CastExecutableTypeToString(kExeTypeWin32X86)); + EXPECT_EQ("EA64", CastExecutableTypeToString(kExeTypeElfAArch64)); + EXPECT_EQ("DEX ", CastExecutableTypeToString(kExeTypeDex)); +} + +TEST(ImageUtilsTest, ElementMatchToString) { + constexpr ExecutableType kAnyType = kExeTypeWin32X86; + EXPECT_EQ("1+2=3+4", + (ElementMatch{{{1, 2}, kAnyType}, {{3, 4}, kAnyType}}).ToString()); + EXPECT_EQ( + "1000000000+1=0+1000000000", + (ElementMatch{{{1000000000, 1}, kAnyType}, {{0, 1000000000}, kAnyType}}) + .ToString()); +} + } // namespace zucchini diff --git a/chromium/components/zucchini/imposed_ensemble_matcher.cc b/chromium/components/zucchini/imposed_ensemble_matcher.cc new file mode 100644 index 00000000000..e735bc4e79e --- /dev/null +++ b/chromium/components/zucchini/imposed_ensemble_matcher.cc @@ -0,0 +1,143 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/imposed_ensemble_matcher.h" + +#include <algorithm> +#include <sstream> +#include <utility> + +#include "base/bind.h" +#include "base/logging.h" +#include "components/zucchini/io_utils.h" + +namespace zucchini { + +/******** ImposedMatchParser ********/ + +ImposedMatchParser::ImposedMatchParser() = default; + +ImposedMatchParser::~ImposedMatchParser() = default; + +ImposedMatchParser::Status ImposedMatchParser::Parse( + std::string imposed_matches, + ConstBufferView old_image, + ConstBufferView new_image, + ElementDetector&& detector) { + CHECK(matches_.empty()); + CHECK(bad_matches_.empty()); + + // Parse |imposed_matches| and check bounds. + std::istringstream iss(std::move(imposed_matches)); + bool first = true; + iss.peek(); // Makes empty |iss| realize EOF is reached. + while (iss && !iss.eof()) { + // Eat delimiter. + if (first) { + first = false; + } else if (!(iss >> EatChar(','))) { + return kInvalidDelimiter; + } + // Extract parameters for one imposed match. + offset_t old_offset = 0U; + size_t old_size = 0U; + offset_t new_offset = 0U; + size_t new_size = 0U; + if (!(iss >> StrictUInt<offset_t>(old_offset) >> EatChar('+') >> + StrictUInt<size_t>(old_size) >> EatChar('=') >> + StrictUInt<offset_t>(new_offset) >> EatChar('+') >> + StrictUInt<size_t>(new_size))) { + return kParseError; + } + // Check bounds. + if (old_size == 0 || new_size == 0 || + !old_image.covers({old_offset, old_size}) || + !new_image.covers({new_offset, new_size})) { + return kOutOfBound; + } + matches_.push_back( + {{{old_offset, old_size}, kExeTypeUnknown}, // Assign type later. + {{new_offset, new_size}, kExeTypeUnknown}}); // Assign type later. + } + // Sort matches by "new" file offsets. This helps with overlap checks. + std::sort(matches_.begin(), matches_.end(), + [](const ElementMatch& match_a, const ElementMatch& match_b) { + return match_a.new_element.offset < match_b.new_element.offset; + }); + + // Check for overlaps in "new" file. + if (std::adjacent_find( + matches_.begin(), matches_.end(), + [](const ElementMatch& match1, const ElementMatch& match2) { + return match1.new_element.hi() > match2.new_element.lo(); + }) != matches_.end()) { + return kOverlapInNew; + } + + // Compute types and verify consistency. Remove identical matches and matches + // where any sub-image has an unknown type. + size_t write_idx = 0; + for (size_t read_idx = 0; read_idx < matches_.size(); ++read_idx) { + ConstBufferView old_sub_image( + old_image[matches_[read_idx].old_element.region()]); + ConstBufferView new_sub_image( + new_image[matches_[read_idx].new_element.region()]); + // Remove identical match. + if (old_sub_image.equals(new_sub_image)) { + ++num_identical_; + continue; + } + // Check executable types of sub-images. + base::Optional<Element> old_element = detector.Run(old_sub_image); + base::Optional<Element> new_element = detector.Run(new_sub_image); + if (!old_element || !new_element) { + // Skip unknown types, including those mixed with known types. + bad_matches_.push_back(matches_[read_idx]); + continue; + } else if (old_element->exe_type != new_element->exe_type) { + // Error if types are known, but inconsistent. + return kTypeMismatch; + } + + // Keep match and remove gaps. + matches_[read_idx].old_element.exe_type = old_element->exe_type; + matches_[read_idx].new_element.exe_type = new_element->exe_type; + if (write_idx < read_idx) + matches_[write_idx] = matches_[read_idx]; + ++write_idx; + } + matches_.resize(write_idx); + return kSuccess; +} + +/******** ImposedEnsembleMatcher ********/ + +ImposedEnsembleMatcher::ImposedEnsembleMatcher( + const std::string& imposed_matches) + : imposed_matches_(imposed_matches) {} + +ImposedEnsembleMatcher::~ImposedEnsembleMatcher() = default; + +bool ImposedEnsembleMatcher::RunMatch(ConstBufferView old_image, + ConstBufferView new_image) { + DCHECK(matches_.empty()); + LOG(INFO) << "Start matching."; + ImposedMatchParser parser; + ImposedMatchParser::Status status = + parser.Parse(std::move(imposed_matches_), old_image, new_image, + base::BindRepeating(DetectElementFromDisassembler)); + // Print all warnings first. + for (const ElementMatch& bad_match : *parser.mutable_bad_matches()) + LOG(WARNING) << "Skipped match with unknown type: " << bad_match.ToString(); + if (status != ImposedMatchParser::kSuccess) { + LOG(ERROR) << "Imposed match failed with error code " << status << "."; + return false; + } + num_identical_ = parser.num_identical(); + matches_ = std::move(*parser.mutable_matches()); + Trim(); + return true; +} + +} // namespace zucchini diff --git a/chromium/components/zucchini/imposed_ensemble_matcher.h b/chromium/components/zucchini/imposed_ensemble_matcher.h new file mode 100644 index 00000000000..4dfc38e1e4e --- /dev/null +++ b/chromium/components/zucchini/imposed_ensemble_matcher.h @@ -0,0 +1,83 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_IMPOSED_ENSEMBLE_MATCHER_H_ +#define COMPONENTS_ZUCCHINI_IMPOSED_ENSEMBLE_MATCHER_H_ + +#include <stddef.h> + +#include <string> +#include <vector> + +#include "base/macros.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/ensemble_matcher.h" + +namespace zucchini { + +// A class to parse imposed match format, which is either an empty string (no +// imposed patch), or a string formatted as: +// "#+#=#+#,#+#=#+#,..." (e.g., "1+2=3+4", "1+2=3+4,5+6=7+8"), +// where "#+#=#+#" encodes a match as 4 unsigned integers: +// [offset in "old", size in "old", offset in "new", size in "new"]. +class ImposedMatchParser { + public: + enum Status { + kSuccess, + kInvalidDelimiter, + kParseError, + kOutOfBound, + kOverlapInNew, + kTypeMismatch, + }; + + ImposedMatchParser(); + ~ImposedMatchParser(); + + // Parses |imposed_matches| and writes the results to member variables. + // |old_image| and |new_image| are used for validation. Returns a Status value + // to signal success or various error modes. |detector| is used to validate + // Element types for matched pairs. This should only be called once for each + // instance. + Status Parse(std::string imposed_matches, + ConstBufferView old_image, + ConstBufferView new_image, + ElementDetector&& detector); + + size_t num_identical() const { return num_identical_; } + std::vector<ElementMatch>* mutable_matches() { return &matches_; } + std::vector<ElementMatch>* mutable_bad_matches() { return &bad_matches_; } + + private: + size_t num_identical_ = 0; + std::vector<ElementMatch> matches_; + // Stores "forgiven" bad matches, so the caller can impose matches for + // unsupported image types (which will simply be ignored). Note that imposing + // matches for known but incompatible image types would result in error. + std::vector<ElementMatch> bad_matches_; + + DISALLOW_COPY_AND_ASSIGN(ImposedMatchParser); +}; + +// An ensemble matcher that parses a format string that describes matches. +class ImposedEnsembleMatcher : public EnsembleMatcher { + public: + // |imposed_matches| specifies imposed maches, using a format described below. + // Validation is performed in RunMatch(). + explicit ImposedEnsembleMatcher(const std::string& imposed_matches); + ~ImposedEnsembleMatcher() override; + + // EnsembleMatcher: + bool RunMatch(ConstBufferView old_image, ConstBufferView new_image) override; + + private: + const std::string imposed_matches_; + + DISALLOW_COPY_AND_ASSIGN(ImposedEnsembleMatcher); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_IMPOSED_ENSEMBLE_MATCHER_H_ diff --git a/chromium/components/zucchini/imposed_ensemble_matcher_unittest.cc b/chromium/components/zucchini/imposed_ensemble_matcher_unittest.cc new file mode 100644 index 00000000000..97a8898c39a --- /dev/null +++ b/chromium/components/zucchini/imposed_ensemble_matcher_unittest.cc @@ -0,0 +1,214 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <stddef.h> +#include <stdint.h> + +#include <string> +#include <utility> +#include <vector> + +#include "components/zucchini/imposed_ensemble_matcher.h" + +#include "base/bind.h" +#include "base/bind_helpers.h" +#include "base/logging.h" +#include "base/optional.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/image_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +// This test uses a mock archive format where regions are determined by their +// consecutive byte values rather than parsing real executables. In fact, since +// elements are imposed, only the first byte of the element is used to specify +// executable type of the mock data: +// - 'W' and 'w' specify kExeTypeWin32X86. +// - 'E' and 'e' specify kExeTypeElfX86. +// - Everything else specify kExeTypeUnknown. +class TestElementDetector { + public: + TestElementDetector() {} + + base::Optional<Element> Run(ConstBufferView image) const { + DCHECK_GT(image.size(), 0U); + char first_char = *image.begin(); + if (first_char == 'W' || first_char == 'w') + return Element(image.local_region(), kExeTypeWin32X86); + if (first_char == 'E' || first_char == 'e') + return Element(image.local_region(), kExeTypeElfX86); + return base::nullopt; + } +}; + +} // namespace + +TEST(ImposedMatchParserTest, ImposedMatchParser) { + std::vector<uint8_t> old_data; + std::vector<uint8_t> new_data; + auto populate = [](const std::string& s, std::vector<uint8_t>* data) { + for (char ch : s) + data->push_back(static_cast<uint8_t>(ch)); + }; + // Pos: 11111111 + // 012345678901234567 + populate("1WW222EEEE", &old_data); + populate("33eee2222222wwww44", &new_data); + + ConstBufferView old_image(&old_data[0], old_data.size()); + ConstBufferView new_image(&new_data[0], new_data.size()); + + TestElementDetector detector; + + // Reusable output values. + std::string prev_imposed_matches; + ImposedMatchParser::Status status; + size_t num_identical; + std::vector<ElementMatch> matches; + std::vector<ElementMatch> bad_matches; + + auto run_test = [&](const std::string& imposed_matches) -> bool { + prev_imposed_matches = imposed_matches; + status = ImposedMatchParser::kSuccess; + num_identical = 0; + matches.clear(); + bad_matches.clear(); + ImposedMatchParser parser; + status = parser.Parse(imposed_matches, old_image, new_image, + base::BindRepeating(&TestElementDetector::Run, + base::Unretained(&detector))); + num_identical = parser.num_identical(); + matches = std::move(*parser.mutable_matches()); + bad_matches = std::move(*parser.mutable_bad_matches()); + return status == ImposedMatchParser::kSuccess; + }; + + auto run_check = [&](const ElementMatch& match, ExecutableType exe_type, + offset_t old_offset, size_t old_size, + offset_t new_offset, size_t new_size) { + EXPECT_EQ(exe_type, match.exe_type()) << prev_imposed_matches; + EXPECT_EQ(exe_type, match.old_element.exe_type) << prev_imposed_matches; + EXPECT_EQ(old_offset, match.old_element.offset) << prev_imposed_matches; + EXPECT_EQ(old_size, match.old_element.size) << prev_imposed_matches; + EXPECT_EQ(exe_type, match.new_element.exe_type) << prev_imposed_matches; + EXPECT_EQ(new_offset, match.new_element.offset) << prev_imposed_matches; + EXPECT_EQ(new_size, match.new_element.size) << prev_imposed_matches; + }; + + // Empty string: Vacuous but valid. + EXPECT_TRUE(run_test("")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(0U, matches.size()); + EXPECT_EQ(0U, bad_matches.size()); + + // Full matches. Different permutations give same result. + for (const std::string& imposed_matches : + {"1+2=12+4,4+2=5+2,6+4=2+3", "1+2=12+4,6+4=2+3,4+2=5+2", + "4+2=5+2,1+2=12+4,6+4=2+3", "4+2=5+2,6+4=2+3,1+2=12+4", + "6+4=2+3,1+2=12+4,4+2=5+2", "6+4=2+3,1+2=12+4,4+2=5+2"}) { + EXPECT_TRUE(run_test(imposed_matches)); + EXPECT_EQ(1U, num_identical); // "4+2=5+2" + EXPECT_EQ(2U, matches.size()); + // Results are sorted by "new" offsets. + run_check(matches[0], kExeTypeElfX86, 6, 4, 2, 3); + run_check(matches[1], kExeTypeWin32X86, 1, 2, 12, 4); + EXPECT_EQ(0U, bad_matches.size()); + } + + // Single subregion match. + EXPECT_TRUE(run_test("1+2=12+4")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(1U, matches.size()); + run_check(matches[0], kExeTypeWin32X86, 1, 2, 12, 4); + EXPECT_EQ(0U, bad_matches.size()); + + // Single subregion match. We're lax with redundant 0. + EXPECT_TRUE(run_test("6+04=02+10")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(1U, matches.size()); + run_check(matches[0], kExeTypeElfX86, 6, 4, 2, 10); + EXPECT_EQ(0U, bad_matches.size()); + + // Successive elements, no overlap. + EXPECT_TRUE(run_test("1+1=12+1,2+1=13+1")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(2U, matches.size()); + run_check(matches[0], kExeTypeWin32X86, 1, 1, 12, 1); + run_check(matches[1], kExeTypeWin32X86, 2, 1, 13, 1); + EXPECT_EQ(0U, bad_matches.size()); + + // Overlap in "old" file is okay. + EXPECT_TRUE(run_test("1+2=12+2,1+2=14+2")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(2U, matches.size()); + run_check(matches[0], kExeTypeWin32X86, 1, 2, 12, 2); + run_check(matches[1], kExeTypeWin32X86, 1, 2, 14, 2); + EXPECT_EQ(0U, bad_matches.size()); + + // Entire files: Have unknown type, so are recognized as such, and ignored. + EXPECT_TRUE(run_test("0+10=0+18")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(0U, matches.size()); + EXPECT_EQ(1U, bad_matches.size()); + run_check(bad_matches[0], kExeTypeUnknown, 0, 10, 0, 18); + + // Forgive matches that mix known type with unknown type. + EXPECT_TRUE(run_test("1+2=0+18")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(0U, matches.size()); + EXPECT_EQ(1U, bad_matches.size()); + run_check(bad_matches[0], kExeTypeUnknown, 1, 2, 0, 18); + + EXPECT_TRUE(run_test("0+10=12+4")); + EXPECT_EQ(0U, num_identical); + EXPECT_EQ(0U, matches.size()); + EXPECT_EQ(1U, bad_matches.size()); + run_check(bad_matches[0], kExeTypeUnknown, 0, 10, 12, 4); + + // Test invalid delimiter. + for (const std::string& imposed_matches : + {"1+2=12+4,4+2=5+2x", "1+2=12+4 4+2=5+2", "1+2=12+4,4+2=5+2 ", + "1+2=12+4 "}) { + EXPECT_FALSE(run_test(imposed_matches)); + EXPECT_EQ(ImposedMatchParser::kInvalidDelimiter, status); + } + + // Test parse errors, including uint32_t overflow. + for (const std::string& imposed_matches : + {"x1+2=12+4,4+2=5+2,6+4=2+3", "x1+2=12+4,4+2=5+2,6+4=2+3x", ",", " ", + "+2=12+4", "1+2+12+4", "1=2+12+4", " 1+2=12+4", "1+2= 12+4", "1", "1+2", + "1+2=", "1+2=12", "1+2=12+", "4294967296+2=12+4"}) { + EXPECT_FALSE(run_test(imposed_matches)); + EXPECT_EQ(ImposedMatchParser::kParseError, status); + } + + // Test bound errors, include 0-size. + for (const std::string& imposed_matches : + {"1+10=12+4", "1+2=12+7", "0+11=0+18", "0+12=0+17", "10+1=0+18", + "0+10=18+1", "0+0=0+18", "0+10=0+0", "1000000000+1=0+1000000000"}) { + EXPECT_FALSE(run_test(imposed_matches)); + EXPECT_EQ(ImposedMatchParser::kOutOfBound, status); + } + + // Test overlap errors. Matches that get ignored are still tested. + for (const std::string& imposed_matches : + {"1+2=12+4,4+2=5+2,6+4=2+4", "0+10=0+18,1+2=12+4", "6+4=2+10,3+2=5+2"}) { + EXPECT_FALSE(run_test(imposed_matches)); + EXPECT_EQ(ImposedMatchParser::kOverlapInNew, status); + } + + // Test type mismatch errors. + EXPECT_FALSE(run_test("1+2=2+3")); + EXPECT_EQ(ImposedMatchParser::kTypeMismatch, status); + + EXPECT_FALSE(run_test("6+4=12+4")); + EXPECT_EQ(ImposedMatchParser::kTypeMismatch, status); +} + +} // namespace zucchini diff --git a/chromium/components/zucchini/integration_test.cc b/chromium/components/zucchini/integration_test.cc index b0ec8641eb9..c4c7004d349 100644 --- a/chromium/components/zucchini/integration_test.cc +++ b/chromium/components/zucchini/integration_test.cc @@ -22,7 +22,7 @@ namespace zucchini { base::FilePath MakeTestPath(const std::string& filename) { base::FilePath path; - DCHECK(PathService::Get(base::DIR_SOURCE_ROOT, &path)); + DCHECK(base::PathService::Get(base::DIR_SOURCE_ROOT, &path)); return path.AppendASCII("chrome") .AppendASCII("installer") .AppendASCII("zucchini") diff --git a/chromium/components/zucchini/main_utils.cc b/chromium/components/zucchini/main_utils.cc index 7a283881f99..b6b564280ef 100644 --- a/chromium/components/zucchini/main_utils.cc +++ b/chromium/components/zucchini/main_utils.cc @@ -63,13 +63,16 @@ struct Command { /******** List of Zucchini commands ********/ constexpr Command kCommands[] = { - {"gen", "-gen <old_file> <new_file> <patch_file> [-raw] [-keep]", 3, - &MainGen}, + {"gen", + "-gen <old_file> <new_file> <patch_file> [-raw] [-keep]" + " [-impose=#+#=#+#,#+#=#+#,...]", + 3, &MainGen}, {"apply", "-apply <old_file> <patch_file> <new_file> [-keep]", 3, &MainApply}, {"read", "-read <exe> [-dump]", 1, &MainRead}, {"detect", "-detect <archive_file> [-dd=format#]", 1, &MainDetect}, - {"match", "-match <old_file> <new_file>", 2, &MainMatch}, + {"match", "-match <old_file> <new_file> [-impose=#+#=#+#,#+#=#+#,...]", 2, + &MainMatch}, {"crc32", "-crc32 <file>", 1, &MainCrc32}, }; diff --git a/chromium/components/zucchini/patch_read_write_unittest.cc b/chromium/components/zucchini/patch_read_write_unittest.cc index b701df8fbe9..627513c642a 100644 --- a/chromium/components/zucchini/patch_read_write_unittest.cc +++ b/chromium/components/zucchini/patch_read_write_unittest.cc @@ -55,6 +55,64 @@ void TestSerialize(const ByteVector& expected, const T& value) { EXPECT_EQ(expected, buffer); } +ByteVector CreatePatchElement() { + return { + // PatchElementHeader + 0x01, 0, 0, 0, // old_offset + 0x51, 0, 0, 0, // old_length + 0x03, 0, 0, 0, // new_offset + 0x13, 0, 0, 0, // new_length + 'P', 'x', '8', '6', // exe_type = EXE_TYPE_WIN32_X86 + // EquivalenceSource + 1, 0, 0, 0, // src_skip size + 0x10, // src_skip content + 1, 0, 0, 0, // dst_skip size + 0x00, // dst_skip content + 1, 0, 0, 0, // copy_count size + 0x12, // copy_count content + // ExtraDataSource + 1, 0, 0, 0, // extra_data size + 0x13, // extra_data content + // RawDeltaSource + 1, 0, 0, 0, // raw_delta_skip size + 0x14, // raw_delta_skip content + 1, 0, 0, 0, // raw_delta_diff size + 0x15, // raw_delta_diff content + // ReferenceDeltaSource + 1, 0, 0, 0, // reference_delta size + 0x16, // reference_delta content + // PatchElementReader + 2, 0, 0, 0, // pool count + 0, // pool_tag + 1, 0, 0, 0, // extra_targets size + 0x17, // extra_targets content + 2, // pool_tag + 1, 0, 0, 0, // extra_targets size + 0x18, // extra_targets content + }; +} + +ByteVector CreateElementMatch() { + return { + // PatchElementHeader + 0x01, 0, 0, 0, // old_offset + 0x02, 0, 0, 0, // old_length + 0x03, 0, 0, 0, // new_offset + 0x04, 0, 0, 0, // new_length + 'D', 'E', 'X', ' ', // exe_type = kExeTypeDex + }; +} + +// Helper to mutate test |data| (e.g., from CreatePatchElement()) at |idx| from +// |from_val| (as sanity check) to |to_val|. +void ModifyByte(size_t idx, + uint8_t from_val, + uint8_t to_val, + std::vector<uint8_t>* data) { + ASSERT_EQ(from_val, (*data)[idx]); + (*data)[idx] = to_val; +} + } // namespace bool operator==(const ByteVector& a, ConstBufferView b) { @@ -62,13 +120,7 @@ bool operator==(const ByteVector& a, ConstBufferView b) { } TEST(PatchTest, ParseSerializeElementMatch) { - ByteVector data = { - 0x01, 0, 0, 0, // old_offset - 0x03, 0, 0, 0, // new_offset - 0x02, 0, 0, 0, // old_length - 0x04, 0, 0, 0, // new_length - 7, 0, 0, 0, // kExeTypeDex - }; + ByteVector data = CreateElementMatch(); BufferSource buffer_source(data.data(), data.size()); ElementMatch element_match = {}; EXPECT_TRUE(patch::ParseElementMatch(&buffer_source, &element_match)); @@ -95,6 +147,38 @@ TEST(PatchTest, ParseElementMatchTooSmall) { EXPECT_FALSE(patch::ParseElementMatch(&buffer_source, &element_match)); } +TEST(PatchTest, ParseElementMatchNoLength) { + // Set old_length to 0 to trigger an error. + { + ByteVector data = CreateElementMatch(); + // old_length := 0. + ModifyByte(offsetof(PatchElementHeader, old_length), 0x02, 0x00, &data); + BufferSource buffer_source(data.data(), data.size()); + ElementMatch element_match = {}; + EXPECT_FALSE(patch::ParseElementMatch(&buffer_source, &element_match)); + } + // Set new_length to 0 to trigger an error. + { + ByteVector data = CreateElementMatch(); + // new_length := 0. + ModifyByte(offsetof(PatchElementHeader, new_length), 0x04, 0x00, &data); + BufferSource buffer_source(data.data(), data.size()); + ElementMatch element_match = {}; + EXPECT_FALSE(patch::ParseElementMatch(&buffer_source, &element_match)); + } + // Set both new_length and old_length to 0 to trigger an error. + { + ByteVector data = CreateElementMatch(); + // old_length := 0. + ModifyByte(offsetof(PatchElementHeader, old_length), 0x02, 0x00, &data); + // new_length := 0. + ModifyByte(offsetof(PatchElementHeader, new_length), 0x04, 0x00, &data); + BufferSource buffer_source(data.data(), data.size()); + ElementMatch element_match = {}; + EXPECT_FALSE(patch::ParseElementMatch(&buffer_source, &element_match)); + } +} + TEST(PatchTest, ParseSerializeElementMatchExeMismatch) { ByteVector buffer(28); BufferSink buffer_sink(buffer.data(), buffer.size()); @@ -167,6 +251,7 @@ TEST(PatchTest, SerializeBufferTooSmall) { TEST(EquivalenceSinkSourceTest, Empty) { ByteVector data = { + // EquivalenceSource 0, 0, 0, 0, // src_skip size 0, 0, 0, 0, // dst_skip size 0, 0, 0, 0, // copy_count size @@ -182,6 +267,7 @@ TEST(EquivalenceSinkSourceTest, Empty) { TEST(EquivalenceSourceSinkTest, Normal) { ByteVector data = { + // EquivalenceSource 2, 0, 0, 0, // src_skip size 6, 7, // src_skip content 2, 0, 0, 0, // dst_skip size @@ -216,6 +302,7 @@ TEST(EquivalenceSourceSinkTest, Normal) { TEST(ExtraDataSourceSinkTest, Empty) { ByteVector data = { + // ExtraDataSource 0, 0, 0, 0, // extra_data size }; ExtraDataSource extra_data_source = TestInitialize<ExtraDataSource>(&data); @@ -228,6 +315,7 @@ TEST(ExtraDataSourceSinkTest, Empty) { TEST(ExtraDataSourceSinkTest, Normal) { ByteVector data = { + // ExtraDataSource 5, 0, 0, 0, // extra_data size 1, 2, 3, 4, 5, // extra_data content }; @@ -261,6 +349,7 @@ TEST(ExtraDataSourceSinkTest, Normal) { TEST(RawDeltaSourceSinkTest, Empty) { ByteVector data = { + // RawDeltaSource 0, 0, 0, 0, // raw_delta_skip size 0, 0, 0, 0, // raw_delta_diff size }; @@ -274,6 +363,7 @@ TEST(RawDeltaSourceSinkTest, Empty) { TEST(RawDeltaSinkSourceSinkTest, Normal) { ByteVector data = { + // RawDeltaSource 3, 0, 0, 0, // raw_delta_skip size 1, 3, 0, // raw_delta_skip content 3, 0, 0, 0, // raw_delta_diff size @@ -310,8 +400,22 @@ TEST(RawDeltaSinkSourceSinkTest, Normal) { TestSerialize(data, raw_delta_sink); } +TEST(RawDeltaSourceSinkTest, InvalidContent) { + ByteVector data = { + // RawDeltaSource + 2, 0, 0, 0, // raw_delta_skip size + 1, 3, // raw_delta_skip content + 2, 0, 0, 0, // raw_delta_diff size + 0, 4, // raw_delta_diff content + }; + RawDeltaSource raw_delta_source = TestInitialize<RawDeltaSource>(&data); + EXPECT_FALSE(raw_delta_source.GetNext()); + EXPECT_FALSE(raw_delta_source.Done()); +} + TEST(ReferenceDeltaSourceSinkTest, Empty) { ByteVector data = { + // ReferenceDeltaSource 0, 0, 0, 0, // reference_delta size }; ReferenceDeltaSource reference_delta_source = @@ -325,6 +429,7 @@ TEST(ReferenceDeltaSourceSinkTest, Empty) { TEST(ReferenceDeltaSourceSinkTest, Normal) { ByteVector data = { + // ReferenceDeltaSource 2, 0, 0, 0, // reference_delta size 84, 47, // reference_delta content }; @@ -353,6 +458,7 @@ TEST(ReferenceDeltaSourceSinkTest, Normal) { TEST(TargetSourceSinkTest, Empty) { ByteVector data = { + // TargetSource 0, 0, 0, 0, // extra_targets size }; TargetSource target_source = TestInitialize<TargetSource>(&data); @@ -365,6 +471,7 @@ TEST(TargetSourceSinkTest, Empty) { TEST(TargetSourceSinkTest, Normal) { ByteVector data = { + // TargetSource 2, 0, 0, 0, // extra_targets size 3, 1, // extra_targets content }; @@ -391,39 +498,7 @@ TEST(TargetSourceSinkTest, Normal) { } TEST(PatchElementTest, Normal) { - ByteVector data = { - 0x01, 0, 0, 0, // old_offset - 0x03, 0, 0, 0, // new_offset - 0x02, 0, 0, 0, // old_length - 0x04, 0, 0, 0, // new_length - 1, 0, 0, 0, // EXE_TYPE_WIN32_X86 - - 1, 0, 0, 0, // src_skip size - 0x10, // src_skip content - 1, 0, 0, 0, // dst_skip size - 0x11, // dst_skip content - 1, 0, 0, 0, // copy_count size - 0x12, // copy_count content - - 1, 0, 0, 0, // extra_data size - 0x13, // extra_data content - - 1, 0, 0, 0, // raw_delta_skip size - 0x14, // raw_delta_skip content - 1, 0, 0, 0, // raw_delta_diff size - 0x15, // raw_delta_diff content - - 1, 0, 0, 0, // reference_delta size - 0x16, // reference_delta content - - 2, 0, 0, 0, // pool count - 0, // pool_tag - 1, 0, 0, 0, // extra_targets size - 0x17, // extra_targets content - 2, // pool_tag - 1, 0, 0, 0, // extra_targets size - 0x18, // extra_targets content - }; + ByteVector data = CreatePatchElement(); PatchElementReader patch_element_reader = TestInitialize<PatchElementReader>(&data); @@ -433,14 +508,14 @@ TEST(PatchElementTest, Normal) { EXPECT_EQ(kExeTypeWin32X86, element_match.old_element.exe_type); EXPECT_EQ(kExeTypeWin32X86, element_match.new_element.exe_type); EXPECT_EQ(0x1U, element_match.old_element.offset); - EXPECT_EQ(0x2U, element_match.old_element.size); + EXPECT_EQ(0x51U, element_match.old_element.size); EXPECT_EQ(0x3U, element_match.new_element.offset); - EXPECT_EQ(0x4U, element_match.new_element.size); + EXPECT_EQ(0x13U, element_match.new_element.size); EquivalenceSource equivalence_source = patch_element_reader.GetEquivalenceSource(); EXPECT_EQ(ByteVector({0x10}), equivalence_source.src_skip()); - EXPECT_EQ(ByteVector({0x11}), equivalence_source.dst_skip()); + EXPECT_EQ(ByteVector({0x00}), equivalence_source.dst_skip()); EXPECT_EQ(ByteVector({0x12}), equivalence_source.copy_count()); ExtraDataSource extra_data_source = patch_element_reader.GetExtraDataSource(); @@ -467,7 +542,7 @@ TEST(PatchElementTest, Normal) { PatchElementWriter patch_element_writer(element_match); patch_element_writer.SetEquivalenceSink( - EquivalenceSink({0x10}, {0x11}, {0x12})); + EquivalenceSink({0x10}, {0x00}, {0x12})); patch_element_writer.SetExtraDataSink(ExtraDataSink({0x13})); patch_element_writer.SetRawDeltaSink(RawDeltaSink({0x14}, {0x15})); patch_element_writer.SetReferenceDeltaSink(ReferenceDeltaSink({0x16})); @@ -476,29 +551,72 @@ TEST(PatchElementTest, Normal) { TestSerialize(data, patch_element_writer); } +TEST(PatchElementTest, BadEquivalence) { + // If the "old" element is too small then the test should fail. + { + ByteVector data = CreatePatchElement(); + // old_length := 0x4 (too small). + ModifyByte(offsetof(PatchElementHeader, old_length), 0x51, 0x04, &data); + TestInvalidInitialize<PatchElementReader>(&data); + } + + // If the "new" element is too small then the test should fail. + { + ByteVector data = CreatePatchElement(); + // new_length := 0x5 (too small). + ModifyByte(offsetof(PatchElementHeader, new_length), 0x13, 0x05, &data); + TestInvalidInitialize<PatchElementReader>(&data); + } +} + +TEST(PatchElementTest, WrongExtraData) { + // Make "new" too large so insufficient extra data exists to cover the image. + { + ByteVector data = CreatePatchElement(); + // new_length := 0x14 (too large). + ModifyByte(offsetof(PatchElementHeader, new_length), 0x13, 0x14, &data); + TestInvalidInitialize<PatchElementReader>(&data); + } + // Make "new" too small so there is too much extra data. + { + ByteVector data = CreatePatchElement(); + // new_length := 0x12 (too small). + ModifyByte(offsetof(PatchElementHeader, new_length), 0x13, 0x12, &data); + TestInvalidInitialize<PatchElementReader>(&data); + } +} + TEST(EnsemblePatchTest, RawPatch) { ByteVector data = { + // PatchHeader 0x5A, 0x75, 0x63, 0x00, // magic 0x10, 0x32, 0x54, 0x76, // old_size 0x00, 0x11, 0x22, 0x33, // old_crc - 0x98, 0xBA, 0xDC, 0xFE, // new_size + 0x01, 0, 0, 0, // new_size 0x44, 0x55, 0x66, 0x77, // new_crc - 1, 0, 0, 0, // number of element - - 0x01, 0, 0, 0, // old_offset - 0x00, 0, 0, 0, // new_offset - 0x02, 0, 0, 0, // old_length - 0x98, 0xBA, 0xDC, 0xFE, // new_length - 1, 0, 0, 0, // EXE_TYPE_WIN32_X86 - 0, 0, 0, 0, // src_skip size - 0, 0, 0, 0, // dst_skip size - 0, 0, 0, 0, // copy_count size - 0, 0, 0, 0, // extra_data size - 0, 0, 0, 0, // raw_delta_skip size - 0, 0, 0, 0, // raw_delta_diff size - 0, 0, 0, 0, // reference_delta size - 0, 0, 0, 0, // pool count + 1, 0, 0, 0, // number of element + + // PatchElementHeader + 0x01, 0, 0, 0, // old_offset + 0x02, 0, 0, 0, // old_length + 0x00, 0, 0, 0, // new_offset + 0x01, 0, 0, 0, // new_length + 'P', 'x', '8', '6', // exe_type = EXE_TYPE_WIN32_X86 + // EquivalenceSource + 0, 0, 0, 0, // src_skip size + 0, 0, 0, 0, // dst_skip size + 0, 0, 0, 0, // copy_count size + // ExtraDataSource + 0x01, 0, 0, 0, // extra_data size + 0x04, // extra_data content + // RawDeltaSource + 0, 0, 0, 0, // raw_delta_skip size + 0, 0, 0, 0, // raw_delta_diff size + // ReferenceDeltaSource + 0, 0, 0, 0, // reference_delta size + // PatchElementReader + 0, 0, 0, 0, // pool count }; EnsemblePatchReader ensemble_patch_reader = @@ -508,7 +626,7 @@ TEST(EnsemblePatchTest, RawPatch) { EXPECT_EQ(PatchHeader::kMagic, header.magic); EXPECT_EQ(0x76543210U, header.old_size); EXPECT_EQ(0x33221100U, header.old_crc); - EXPECT_EQ(0xFEDCBA98U, header.new_size); + EXPECT_EQ(0x01U, header.new_size); EXPECT_EQ(0x77665544U, header.new_crc); const std::vector<PatchElementReader>& elements = @@ -518,7 +636,7 @@ TEST(EnsemblePatchTest, RawPatch) { EnsemblePatchWriter ensemble_patch_writer(header); PatchElementWriter patch_element_writer(elements[0].element_match()); patch_element_writer.SetEquivalenceSink({}); - patch_element_writer.SetExtraDataSink({}); + patch_element_writer.SetExtraDataSink(ExtraDataSink({0x04})); patch_element_writer.SetRawDeltaSink({}); patch_element_writer.SetReferenceDeltaSink({}); ensemble_patch_writer.AddElement(std::move(patch_element_writer)); @@ -528,27 +646,35 @@ TEST(EnsemblePatchTest, RawPatch) { TEST(EnsemblePatchTest, CheckFile) { ByteVector data = { + // PatchHeader 0x5A, 0x75, 0x63, 0x00, // magic 0x05, 0x00, 0x00, 0x00, // old_size 0xDF, 0x13, 0xE4, 0x10, // old_crc 0x03, 0x00, 0x00, 0x00, // new_size 0xDC, 0xF7, 0x00, 0x40, // new_crc - 1, 0, 0, 0, // number of element - - 0x01, 0, 0, 0, // old_offset - 0x00, 0, 0, 0, // new_offset - 0x02, 0, 0, 0, // old_length - 0x03, 0, 0, 0, // new_length - 1, 0, 0, 0, // EXE_TYPE_WIN32_X86 - 0, 0, 0, 0, // src_skip size - 0, 0, 0, 0, // dst_skip size - 0, 0, 0, 0, // copy_count size - 0, 0, 0, 0, // extra_data size - 0, 0, 0, 0, // raw_delta_skip size - 0, 0, 0, 0, // raw_delta_diff size - 0, 0, 0, 0, // reference_delta size - 0, 0, 0, 0, // pool count + 1, 0, 0, 0, // number of element + + // PatchElementHeader + 0x01, 0, 0, 0, // old_offset + 0x02, 0, 0, 0, // old_length + 0x00, 0, 0, 0, // new_offset + 0x03, 0, 0, 0, // new_length + 'P', 'x', '8', '6', // exe_type = EXE_TYPE_WIN32_X86 + // EquivalenceSource + 0, 0, 0, 0, // src_skip size + 0, 0, 0, 0, // dst_skip size + 0, 0, 0, 0, // copy_count size + // ExtraDataSource + 0x03, 0, 0, 0, // extra_data size + 'A', 'B', 'C', // extra_data content + // RawDeltaSource + 0, 0, 0, 0, // raw_delta_skip size + 0, 0, 0, 0, // raw_delta_diff size + // ReferenceDeltaSource + 0, 0, 0, 0, // reference_delta size + // PatchElementReader + 0, 0, 0, 0, // pool count }; EnsemblePatchReader ensemble_patch_reader = @@ -568,27 +694,34 @@ TEST(EnsemblePatchTest, CheckFile) { TEST(EnsemblePatchTest, InvalidMagic) { ByteVector data = { + // PatchHeader 0x42, 0x42, 0x42, 0x00, // magic 0x10, 0x32, 0x54, 0x76, // old_size 0x00, 0x11, 0x22, 0x33, // old_crc 0x03, 0x00, 0x00, 0x00, // new_size 0x44, 0x55, 0x66, 0x77, // new_crc - 1, 0, 0, 0, // number of element - - 0x01, 0, 0, 0, // old_offset - 0x00, 0, 0, 0, // new_offset - 0x02, 0, 0, 0, // old_length - 0x03, 0, 0, 0, // new_length - 1, 0, 0, 0, // EXE_TYPE_WIN32_X86 - 0, 0, 0, 0, // src_skip size - 0, 0, 0, 0, // dst_skip size - 0, 0, 0, 0, // copy_count size - 0, 0, 0, 0, // extra_data size - 0, 0, 0, 0, // raw_delta_skip size - 0, 0, 0, 0, // raw_delta_diff size - 0, 0, 0, 0, // reference_delta size - 0, 0, 0, 0, // pool count + 1, 0, 0, 0, // number of element + + // PatchElementHeader + 0x01, 0, 0, 0, // old_offset + 0x02, 0, 0, 0, // old_length + 0x00, 0, 0, 0, // new_offset + 0x03, 0, 0, 0, // new_length + 'P', 'x', '8', '6', // exe_type = EXE_TYPE_WIN32_X86 + // EquivalenceSource + 0, 0, 0, 0, // src_skip size + 0, 0, 0, 0, // dst_skip size + 0, 0, 0, 0, // copy_count size + // ExtraDataSource + 0, 0, 0, 0, // extra_data size + // RawDeltaSource + 0, 0, 0, 0, // raw_delta_skip size + 0, 0, 0, 0, // raw_delta_diff size + // ReferenceDeltaSource + 0, 0, 0, 0, // reference_delta size + // PatchElementReader + 0, 0, 0, 0, // pool count }; TestInvalidInitialize<EnsemblePatchReader>(&data); diff --git a/chromium/components/zucchini/patch_reader.cc b/chromium/components/zucchini/patch_reader.cc index 970b90c2831..3ec17e45487 100644 --- a/chromium/components/zucchini/patch_reader.cc +++ b/chromium/components/zucchini/patch_reader.cc @@ -8,6 +8,7 @@ #include <utility> #include "base/numerics/safe_conversions.h" +#include "components/zucchini/algorithm.h" #include "components/zucchini/crc32.h" namespace zucchini { @@ -15,22 +16,30 @@ namespace zucchini { namespace patch { bool ParseElementMatch(BufferSource* source, ElementMatch* element_match) { - PatchElementHeader element_header; - if (!source->GetValue(&element_header)) { + PatchElementHeader unsafe_element_header; + if (!source->GetValue(&unsafe_element_header)) { LOG(ERROR) << "Impossible to read ElementMatch from source."; - LOG(ERROR) << base::debug::StackTrace().ToString(); return false; } ExecutableType exe_type = - static_cast<ExecutableType>(element_header.exe_type); - if (exe_type >= kNumExeType) { - LOG(ERROR) << "Invalid ExecutableType encountered."; - LOG(ERROR) << base::debug::StackTrace().ToString(); + CastToExecutableType(unsafe_element_header.exe_type); + if (exe_type == kExeTypeUnknown) { + LOG(ERROR) << "Invalid ExecutableType found."; return false; } + if (!unsafe_element_header.old_length || !unsafe_element_header.new_length) { + LOG(ERROR) << "Empty patch element found."; + return false; + } + // |unsafe_element_header| is now considered to be safe as it has a valid + // |exe_type| and the length fields are of sufficient size. + const auto& element_header = unsafe_element_header; + + // Caveat: Element offsets and lengths can still be invalid (e.g., exceeding + // archive bounds), but this will be checked later. element_match->old_element.offset = element_header.old_offset; - element_match->new_element.offset = element_header.new_offset; element_match->old_element.size = element_header.old_length; + element_match->new_element.offset = element_header.new_offset; element_match->new_element.size = element_header.new_length; element_match->old_element.exe_type = exe_type; element_match->new_element.exe_type = exe_type; @@ -38,17 +47,20 @@ bool ParseElementMatch(BufferSource* source, ElementMatch* element_match) { } bool ParseBuffer(BufferSource* source, BufferSource* buffer) { - uint32_t size = 0; - if (!source->GetValue(&size)) { + uint32_t unsafe_size = 0; // Bytes. + static_assert(sizeof(size_t) >= sizeof(unsafe_size), + "size_t is expected to be larger than uint32_t."); + if (!source->GetValue(&unsafe_size)) { LOG(ERROR) << "Impossible to read buffer size from source."; - LOG(ERROR) << base::debug::StackTrace().ToString(); return false; } - if (!source->GetRegion(base::checked_cast<size_t>(size), buffer)) { + if (!source->GetRegion(static_cast<size_t>(unsafe_size), buffer)) { LOG(ERROR) << "Impossible to read buffer content from source."; - LOG(ERROR) << base::debug::StackTrace().ToString(); return false; } + // Caveat: |buffer| is considered to be safe as it was possible to extract it + // from the patch. However, this does not mean its contents are safe and when + // parsed must be validated if possible. return true; } @@ -103,6 +115,9 @@ base::Optional<Equivalence> EquivalenceSource::GetNext() { if (!previous_dst_offset_.IsValid()) return base::nullopt; + // Caveat: |equivalence| is assumed to be safe only once the + // ValidateEquivalencesAndExtraData() method has returned true. Prior to this + // any equivalence returned is assumed to be unsafe. return equivalence; } @@ -120,6 +135,7 @@ base::Optional<ConstBufferView> ExtraDataSource::GetNext(offset_t size) { ConstBufferView buffer; if (!extra_data_.GetRegion(size, &buffer)) return base::nullopt; + // |buffer| is assumed to always be safe/valid. return buffer; } @@ -138,7 +154,7 @@ base::Optional<RawDeltaUnit> RawDeltaSource::GetNext() { if (raw_delta_skip_.empty() || raw_delta_diff_.empty()) return base::nullopt; - RawDeltaUnit delta = {}; + RawDeltaUnit raw_delta = {}; uint32_t copy_offset_diff = 0; if (!patch::ParseVarUInt<uint32_t>(&raw_delta_skip_, ©_offset_diff)) return base::nullopt; @@ -146,17 +162,22 @@ base::Optional<RawDeltaUnit> RawDeltaSource::GetNext() { copy_offset_diff + copy_offset_compensation_; if (!copy_offset.IsValid()) return base::nullopt; - delta.copy_offset = copy_offset.ValueOrDie(); + raw_delta.copy_offset = copy_offset.ValueOrDie(); - if (!raw_delta_diff_.GetValue<int8_t>(&delta.diff)) + if (!raw_delta_diff_.GetValue<int8_t>(&raw_delta.diff)) + return base::nullopt; + + // A 0 value for a delta.diff is considered invalid since it has no meaning. + if (!raw_delta.diff) return base::nullopt; // We keep track of the compensation needed for next offset, taking into - // accound delta encoding and bias of -1. + // account delta encoding and bias of -1. copy_offset_compensation_ = copy_offset + 1; if (!copy_offset_compensation_.IsValid()) return base::nullopt; - return delta; + // |raw_delta| is assumed to always be safe/valid. + return raw_delta; } /******** ReferenceDeltaSource ********/ @@ -167,16 +188,17 @@ ReferenceDeltaSource::ReferenceDeltaSource(const ReferenceDeltaSource&) = ReferenceDeltaSource::~ReferenceDeltaSource() = default; bool ReferenceDeltaSource::Initialize(BufferSource* source) { - return patch::ParseBuffer(source, &reference_delta_); + return patch::ParseBuffer(source, &source_); } base::Optional<int32_t> ReferenceDeltaSource::GetNext() { - if (reference_delta_.empty()) + if (source_.empty()) return base::nullopt; - int32_t delta = 0; - if (!patch::ParseVarInt<int32_t>(&reference_delta_, &delta)) + int32_t ref_delta = 0; + if (!patch::ParseVarInt<int32_t>(&source_, &ref_delta)) return base::nullopt; - return delta; + // |ref_delta| is assumed to always be safe/valid. + return ref_delta; } /******** TargetSource ********/ @@ -201,10 +223,12 @@ base::Optional<offset_t> TargetSource::GetNext() { return base::nullopt; // We keep track of the compensation needed for next target, taking into - // accound delta encoding and bias of -1. + // account delta encoding and bias of -1. target_compensation_ = target + 1; if (!target_compensation_.IsValid()) return base::nullopt; + // Caveat: |target| will be a valid offset_t, but it's up to the caller to + // check whether it's a valid offset for an image. return offset_t(target.ValueOrDie()); } @@ -215,10 +239,11 @@ PatchElementReader::PatchElementReader(PatchElementReader&&) = default; PatchElementReader::~PatchElementReader() = default; bool PatchElementReader::Initialize(BufferSource* source) { - bool ok = patch::ParseElementMatch(source, &element_match_) && - equivalences_.Initialize(source) && - extra_data_.Initialize(source) && raw_delta_.Initialize(source) && - reference_delta_.Initialize(source); + bool ok = + patch::ParseElementMatch(source, &element_match_) && + equivalences_.Initialize(source) && extra_data_.Initialize(source) && + ValidateEquivalencesAndExtraData() && raw_delta_.Initialize(source) && + reference_delta_.Initialize(source); if (!ok) return false; uint32_t pool_count = 0; @@ -239,7 +264,7 @@ bool PatchElementReader::Initialize(BufferSource* source) { } auto insert_result = extra_targets_.insert({pool_tag, {}}); if (!insert_result.second) { // Element already present. - LOG(ERROR) << "Multiple ExtraTargetList found for the same pool_tag"; + LOG(ERROR) << "Multiple ExtraTargetList found for the same pool_tag."; return false; } if (!insert_result.first->second.Initialize(source)) @@ -248,6 +273,43 @@ bool PatchElementReader::Initialize(BufferSource* source) { return true; } +bool PatchElementReader::ValidateEquivalencesAndExtraData() { + EquivalenceSource equivalences_copy = equivalences_; + + const size_t old_region_size = element_match_.old_element.size; + const size_t new_region_size = element_match_.new_element.size; + + base::CheckedNumeric<uint32_t> total_length = 0; + // Validate that each |equivalence| falls within the bounds of the + // |element_match_| and are in order. + offset_t prev_dst_end = 0; + for (auto equivalence = equivalences_copy.GetNext(); equivalence.has_value(); + equivalence = equivalences_copy.GetNext()) { + if (!RangeIsBounded(equivalence->src_offset, equivalence->length, + old_region_size) || + !RangeIsBounded(equivalence->dst_offset, equivalence->length, + new_region_size)) { + LOG(ERROR) << "Out of bounds equivalence detected."; + return false; + } + if (prev_dst_end > equivalence->dst_end()) { + LOG(ERROR) << "Out of order equivalence detected."; + return false; + } + prev_dst_end = equivalence->dst_end(); + total_length += equivalence->length; + } + if (!total_length.IsValid() || + element_match_.new_element.region().size < total_length.ValueOrDie() || + extra_data_.extra_data().size() != + element_match_.new_element.region().size - + static_cast<size_t>(total_length.ValueOrDie())) { + LOG(ERROR) << "Incorrect amount of extra_data."; + return false; + } + return true; +} + /******** EnsemblePatchReader ********/ base::Optional<EnsemblePatchReader> EnsemblePatchReader::Create( @@ -272,6 +334,7 @@ bool EnsemblePatchReader::Initialize(BufferSource* source) { LOG(ERROR) << "Patch contains invalid magic."; return false; } + // |header_| is assumed to be safe from this point forward. uint32_t element_count = 0; if (!source->GetValue(&element_count)) { diff --git a/chromium/components/zucchini/patch_reader.h b/chromium/components/zucchini/patch_reader.h index ad517f5956d..515da500144 100644 --- a/chromium/components/zucchini/patch_reader.h +++ b/chromium/components/zucchini/patch_reader.h @@ -168,13 +168,13 @@ class ReferenceDeltaSource { // Core functions. bool Initialize(BufferSource* source); base::Optional<int32_t> GetNext(); - bool Done() const { return reference_delta_.empty(); } + bool Done() const { return source_.empty(); } // Accessors for unittest. - BufferSource reference_delta() const { return reference_delta_; } + BufferSource reference_delta() const { return source_; } private: - BufferSource reference_delta_; + BufferSource source_; }; // Source for additional targets. @@ -218,7 +218,11 @@ class PatchElementReader { const Element& old_element() const { return element_match_.old_element; } const Element& new_element() const { return element_match_.new_element; } - // The Get*() functions below return copies of cached sources. + // The Get*() functions below return copies of cached sources. Callers may + // assume the following: + // - Equivalences satisfy basic boundary constraints + // - "Old" / "new" blocks lie entirely in "old" / "new" images. + // - "New" blocks are sorted. EquivalenceSource GetEquivalenceSource() const { return equivalences_; } ExtraDataSource GetExtraDataSource() const { return extra_data_; } RawDeltaSource GetRawDeltaSource() const { return raw_delta_; } @@ -231,6 +235,12 @@ class PatchElementReader { } private: + // Checks that "old" and "new" blocks of each item in |equivalences_| satisfy + // basic order and image bound constraints (using |element_match_| data). Also + // validates that the amount of extra data is correct. Returns true if + // successful. + bool ValidateEquivalencesAndExtraData(); + ElementMatch element_match_; // Cached sources. diff --git a/chromium/components/zucchini/patch_utils.h b/chromium/components/zucchini/patch_utils.h index b90438076f7..5f491950ab7 100644 --- a/chromium/components/zucchini/patch_utils.h +++ b/chromium/components/zucchini/patch_utils.h @@ -34,20 +34,20 @@ struct PatchHeader { }; // Sanity check. -static_assert(sizeof(PatchHeader) == 20, "PatchHeader is 20 bytes"); +static_assert(sizeof(PatchHeader) == 20, "PatchHeader must be 20 bytes"); // Header for a patch element, found at the beginning of every patch element. struct PatchElementHeader { uint32_t old_offset; - uint32_t new_offset; uint32_t old_length; + uint32_t new_offset; uint32_t new_length; - uint32_t exe_type; + uint32_t exe_type; // ExecutableType. }; // Sanity check. static_assert(sizeof(PatchElementHeader) == 20, - "PatchElementHeader is 28 bytes"); + "PatchElementHeader must be 20 bytes"); #pragma pack(pop) diff --git a/chromium/components/zucchini/patch_writer.cc b/chromium/components/zucchini/patch_writer.cc index 114bcd44b99..1206208e0b4 100644 --- a/chromium/components/zucchini/patch_writer.cc +++ b/chromium/components/zucchini/patch_writer.cc @@ -23,10 +23,10 @@ bool SerializeElementMatch(const ElementMatch& element_match, PatchElementHeader element_header; element_header.old_offset = base::checked_cast<uint32_t>(element_match.old_element.offset); - element_header.new_offset = - base::checked_cast<uint32_t>(element_match.new_element.offset); element_header.old_length = base::checked_cast<uint32_t>(element_match.old_element.size); + element_header.new_offset = + base::checked_cast<uint32_t>(element_match.new_element.offset); element_header.new_length = base::checked_cast<uint32_t>(element_match.new_element.size); element_header.exe_type = element_match.exe_type(); diff --git a/chromium/components/zucchini/reference_bytes_mixer.cc b/chromium/components/zucchini/reference_bytes_mixer.cc new file mode 100644 index 00000000000..c0d5ca35ca2 --- /dev/null +++ b/chromium/components/zucchini/reference_bytes_mixer.cc @@ -0,0 +1,48 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/reference_bytes_mixer.h" + +#include "base/logging.h" +#include "components/zucchini/disassembler.h" + +namespace zucchini { + +/******** ReferenceBytesMixer ********/ + +// Default implementation is a stub, i.e., for architectures whose references +// have operation bits and payload bits stored in separate bytes. So during +// patch application, payload bits are copied for matched blocks, ignored by +// bytewise corrections, and fixed by reference target corrections. +ReferenceBytesMixer::ReferenceBytesMixer() {} + +ReferenceBytesMixer::~ReferenceBytesMixer() = default; + +// static. +std::unique_ptr<ReferenceBytesMixer> ReferenceBytesMixer::Create( + const Disassembler& src_dis, + const Disassembler& dst_dis) { + ExecutableType exe_type = src_dis.GetExeType(); + DCHECK_EQ(exe_type, dst_dis.GetExeType()); + // TODO(huangs): Add ARM handling code when ARM is ready. + return std::make_unique<ReferenceBytesMixer>(); +} + +// Stub implementation. +int ReferenceBytesMixer::NumBytes(uint8_t type) const { + return 0; +} + +// Base class implementation is a stub that should not be called. +ConstBufferView ReferenceBytesMixer::Mix( + uint8_t type, + ConstBufferView::const_iterator old_base, + offset_t old_offset, + ConstBufferView::const_iterator new_base, + offset_t new_offset) { + NOTREACHED() << "Stub."; + return ConstBufferView(); +} + +} // namespace zucchini diff --git a/chromium/components/zucchini/reference_bytes_mixer.h b/chromium/components/zucchini/reference_bytes_mixer.h new file mode 100644 index 00000000000..9bc8f2dbcb3 --- /dev/null +++ b/chromium/components/zucchini/reference_bytes_mixer.h @@ -0,0 +1,91 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_REFERENCE_BYTES_MIXER_H_ +#define COMPONENTS_ZUCCHINI_REFERENCE_BYTES_MIXER_H_ + +#include <stdint.h> + +#include <memory> +#include <vector> + +#include "base/macros.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/image_utils.h" + +namespace zucchini { + +class Disassembler; + +// References encoding may be quite complex in some architectures (e.g., ARM), +// requiring bit-level manipulation. In general, bits in a reference body fall +// under 2 categories: +// - Operation bits: Instruction op code, conditionals, or structural data. +// - Payload bits: Actual target data of the reference. These may be absolute, +// or be displacements relative to instruction pointer / program counter. +// During patch application, +// Old reference bytes = {old operation, old payload}, +// is transformed to +// New reference bytes = {new operation, new payload}. +// New image bytes are written by three sources: +// (1) Direct copy from old image to new image for matched blocks. +// (2) Bytewise diff correction. +// (3) Dedicated reference target correction. +// +// For references whose operation and payload bits are stored in easily +// separable bytes (e.g., rel32 reference in X86), (2) can exclude payload bits. +// So during patch application, (1) naively copies everything, (2) fixes +// operation bytes only, and (3) fixes payload bytes only. +// +// For architectures with references whose operation and payload bits may mix +// within shared bytes (e.g., ARM rel32), a dilemma arises: +// - (2) cannot ignores shared bytes, since otherwise new operation bits not +// properly transfer. +// - Having (2) always overwrite these bytes would reduce the benefits of +// reference correction, since references are likely to change. +// +// Our solution applies a hybrid approach: For each matching old / new reference +// pair, define: +// Mixed reference bytes = {new operation, old payload}, +// +// During patch generation, we compute bytewise correction from old reference +// bytes to the mixed reference bytes. So during patch application, (2) only +// corrects operation bit changes (and skips if they don't change), and (3) +// overwrites old payload bits to new payload bits. + +// A base class for (stateful) mixed reference byte generation. This base class +// serves as a stub. Architectures whose references store operation bits and +// payload bits can share common bytes (e.g., ARM rel32) should override this. +class ReferenceBytesMixer { + public: + ReferenceBytesMixer(); + virtual ~ReferenceBytesMixer(); + + // Returns a new ReferenceBytesMixer instance that's owned by the caller. + static std::unique_ptr<ReferenceBytesMixer> Create( + const Disassembler& src_dis, + const Disassembler& dst_dis); + + // Returns the number of bytes that need to be mixed for references with given + // |type|. Returns 0 if no mixing is required. + virtual int NumBytes(uint8_t type) const; + + // Computes mixed reference bytes by combining (a) "payload bits" from an + // "old" reference of |type| at |old_base[old_offset]| with (b) "operation + // bits" from a "new" reference of |type| at |new_base[new_offset]|. Returns + // the result as ConstBufferView, which is valid only until the next call to + // Mix(). + virtual ConstBufferView Mix(uint8_t type, + ConstBufferView::const_iterator old_base, + offset_t old_offset, + ConstBufferView::const_iterator new_base, + offset_t new_offset); + + private: + DISALLOW_COPY_AND_ASSIGN(ReferenceBytesMixer); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_REFERENCE_BYTES_MIXER_H_ diff --git a/chromium/components/zucchini/target_pool.h b/chromium/components/zucchini/target_pool.h index b881b1e45bf..27884d628dc 100644 --- a/chromium/components/zucchini/target_pool.h +++ b/chromium/components/zucchini/target_pool.h @@ -54,6 +54,9 @@ class TargetPool { // this class. offset_t OffsetForKey(key_t key) const { return targets_[key]; } + // Returns whether a particular key is valid. + bool KeyIsValid(key_t key) const { return key < targets_.size(); } + // Uses |offset_mapper| to transform "old" |targets_| to "new" |targets_|, // resulting in sorted and unique targets. void FilterAndProject(const OffsetMapper& offset_mapper); diff --git a/chromium/components/zucchini/type_ztf.h b/chromium/components/zucchini/type_ztf.h new file mode 100644 index 00000000000..42798b22628 --- /dev/null +++ b/chromium/components/zucchini/type_ztf.h @@ -0,0 +1,52 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TYPE_ZTF_H_ +#define COMPONENTS_ZUCCHINI_TYPE_ZTF_H_ + +#include <stddef.h> +#include <stdint.h> + +namespace zucchini { + +namespace ztf { + +typedef int16_t dim_t; + +// A exclusive upper bound on number of lines and/or columns. Throughout the ZTF +// code a dimension (dim) refers to a block of 1-3 digits which contain a line +// or column number. +enum : size_t { kMaxDimValue = 1000 }; + +enum SignChar : uint8_t { + kMinus = '-', + kPlus = '+', +}; + +// Lines and columns are 1-based to follow the convention of most modern text +// editing software. |line| and |col| should be positive, but int16_t is used to +// limit ranges such that it matches DeltaLineCol. +struct LineCol { + dim_t line; + dim_t col; +}; + +struct DeltaLineCol { + dim_t line; + dim_t col; +}; + +constexpr DeltaLineCol operator-(const LineCol& lhs, const LineCol& rhs) { + return DeltaLineCol{lhs.line - rhs.line, lhs.col - rhs.col}; +} + +constexpr LineCol operator+(const LineCol& lhs, const DeltaLineCol& rhs) { + return LineCol{lhs.line + rhs.line, lhs.col + rhs.col}; +} + +} // namespace ztf + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TYPE_ZTF_H_ diff --git a/chromium/components/zucchini/zucchini.h b/chromium/components/zucchini/zucchini.h index 9100709dd3c..e9093eb6aca 100644 --- a/chromium/components/zucchini/zucchini.h +++ b/chromium/components/zucchini/zucchini.h @@ -5,6 +5,8 @@ #ifndef COMPONENTS_ZUCCHINI_ZUCCHINI_H_ #define COMPONENTS_ZUCCHINI_ZUCCHINI_H_ +#include <string> + #include "components/zucchini/buffer_view.h" #include "components/zucchini/patch_reader.h" #include "components/zucchini/patch_writer.h" @@ -31,12 +33,25 @@ enum Code { } // namespace status -// Generates ensemble patch from |old_image| to |new_image|, and writes it to -// |patch_writer|. +// Generates ensemble patch from |old_image| to |new_image| using the default +// element detection and matching heuristics, writes the results to +// |patch_writer|, and returns a status::Code. status::Code GenerateEnsemble(ConstBufferView old_image, ConstBufferView new_image, EnsemblePatchWriter* patch_writer); +// Same as GenerateEnsemble(), but if |imposed_matches| is non-empty, then +// overrides default element detection and matching heuristics with custom +// element matching encoded in |imposed_matches|, which should be formatted as: +// "#+#=#+#,#+#=#+#,..." (e.g., "1+2=3+4", "1+2=3+4,5+6=7+8"), +// where "#+#=#+#" encodes a match as 4 unsigned integers: +// [offset in "old", size in "old", offset in "new", size in "new"]. +status::Code GenerateEnsembleWithImposedMatches( + ConstBufferView old_image, + ConstBufferView new_image, + std::string imposed_matches, + EnsemblePatchWriter* patch_writer); + // Generates raw patch from |old_image| to |new_image|, and writes it to // |patch_writer|. status::Code GenerateRaw(ConstBufferView old_image, diff --git a/chromium/components/zucchini/zucchini_apply.cc b/chromium/components/zucchini/zucchini_apply.cc index 15328742b21..8969e3bbe56 100644 --- a/chromium/components/zucchini/zucchini_apply.cc +++ b/chromium/components/zucchini/zucchini_apply.cc @@ -26,17 +26,18 @@ bool ApplyEquivalenceAndExtraData(ConstBufferView old_image, for (auto equivalence = equiv_source.GetNext(); equivalence.has_value(); equivalence = equiv_source.GetNext()) { - // TODO(etiennep): Guard against out of range errors and return false - // instead. MutableBufferView::iterator next_dst_it = new_image.begin() + equivalence->dst_offset; CHECK(next_dst_it >= dst_it); + offset_t gap = static_cast<offset_t>(next_dst_it - dst_it); base::Optional<ConstBufferView> extra_data = extra_data_source.GetNext(gap); if (!extra_data) { LOG(ERROR) << "Error reading extra_data"; return false; } + // |extra_data| length is based on what was parsed from the patch so this + // copy should be valid. dst_it = std::copy(extra_data->begin(), extra_data->end(), dst_it); CHECK_EQ(dst_it, next_dst_it); dst_it = std::copy_n(old_image.begin() + equivalence->src_offset, @@ -150,6 +151,11 @@ bool ApplyReferencesCorrection(ExecutableType exe_type, LOG(ERROR) << "Error reading reference_delta"; return false; } + const key_t key = expected_key + delta.value(); + if (!targets.KeyIsValid(key)) { + LOG(ERROR) << "Invalid reference_delta"; + return false; + } ref->target = targets.OffsetForKey(expected_key + delta.value()); ref->location = ref->location - equivalence->src_offset + equivalence->dst_offset; diff --git a/chromium/components/zucchini/zucchini_commands.cc b/chromium/components/zucchini/zucchini_commands.cc index 2d4b1564746..62dd20d215d 100644 --- a/chromium/components/zucchini/zucchini_commands.cc +++ b/chromium/components/zucchini/zucchini_commands.cc @@ -8,6 +8,7 @@ #include <stdint.h> #include <ostream> +#include <string> #include <utility> #include "base/command_line.h" @@ -29,6 +30,7 @@ namespace { /******** Command-line Switches ********/ constexpr char kSwitchDump[] = "dump"; +constexpr char kSwitchImpose[] = "impose"; constexpr char kSwitchKeep[] = "keep"; constexpr char kSwitchRaw[] = "raw"; @@ -56,11 +58,18 @@ zucchini::status::Code MainGen(MainParams params) { zucchini::EnsemblePatchWriter patch_writer(old_image.region(), new_image.region()); - auto generate = params.command_line.HasSwitch(kSwitchRaw) - ? zucchini::GenerateRaw - : zucchini::GenerateEnsemble; - zucchini::status::Code result = - generate(old_image.region(), new_image.region(), &patch_writer); + zucchini::status::Code result = zucchini::status::kStatusSuccess; + if (params.command_line.HasSwitch(kSwitchRaw)) { + result = GenerateRaw(old_image.region(), new_image.region(), &patch_writer); + } else { + // May be empty. + std::string imposed_matches = + params.command_line.GetSwitchValueASCII(kSwitchImpose); + result = GenerateEnsembleWithImposedMatches( + old_image.region(), new_image.region(), std::move(imposed_matches), + &patch_writer); + } + if (result != zucchini::status::kStatusSuccess) { params.out << "Fatal error encountered when generating patch." << std::endl; return result; @@ -154,9 +163,13 @@ zucchini::status::Code MainMatch(MainParams params) { << new_image.error(); return zucchini::status::kStatusFileReadError; } + + std::string imposed_matches = + params.command_line.GetSwitchValueASCII(kSwitchImpose); zucchini::status::Code status = zucchini::MatchAll({old_image.data(), old_image.length()}, - {new_image.data(), new_image.length()}, params.out); + {new_image.data(), new_image.length()}, + std::move(imposed_matches), params.out); if (status != zucchini::status::kStatusSuccess) params.err << "Fatal error found when matching executables." << std::endl; return status; diff --git a/chromium/components/zucchini/zucchini_gen.cc b/chromium/components/zucchini/zucchini_gen.cc index af7c7ddc4a9..29be8147841 100644 --- a/chromium/components/zucchini/zucchini_gen.cc +++ b/chromium/components/zucchini/zucchini_gen.cc @@ -10,6 +10,7 @@ #include <algorithm> #include <map> #include <memory> +#include <string> #include <utility> #include "base/logging.h" @@ -21,7 +22,9 @@ #include "components/zucchini/equivalence_map.h" #include "components/zucchini/heuristic_ensemble_matcher.h" #include "components/zucchini/image_index.h" +#include "components/zucchini/imposed_ensemble_matcher.h" #include "components/zucchini/patch_writer.h" +#include "components/zucchini/reference_bytes_mixer.h" #include "components/zucchini/suffix_array.h" #include "components/zucchini/targets_affinity.h" @@ -120,6 +123,7 @@ bool GenerateRawDelta(ConstBufferView old_image, ConstBufferView new_image, const EquivalenceMap& equivalence_map, const ImageIndex& new_image_index, + ReferenceBytesMixer* reference_bytes_mixer, PatchElementWriter* patch_writer) { RawDeltaSink raw_delta_sink; @@ -130,14 +134,37 @@ bool GenerateRawDelta(ConstBufferView old_image, Equivalence equivalence = candidate.eq; // For each bytewise delta from |old_image| to |new_image|, compute "copy // offset" and pass it along with delta to the sink. - for (offset_t i = 0; i < equivalence.length; ++i) { - if (new_image_index.IsReference(equivalence.dst_offset + i)) - continue; // Skip references since they're handled elsewhere. - - int8_t diff = new_image[equivalence.dst_offset + i] - - old_image[equivalence.src_offset + i]; - if (diff) - raw_delta_sink.PutNext({base_copy_offset + i, diff}); + for (offset_t i = 0; i < equivalence.length;) { + if (new_image_index.IsReference(equivalence.dst_offset + i)) { + DCHECK(new_image_index.IsToken(equivalence.dst_offset + i)); + TypeTag type_tag = + new_image_index.LookupType(equivalence.dst_offset + i); + + // Reference delta has its own flow. On some architectures (e.g., x86) + // this does not involve raw delta, so we skip. On other architectures + // (e.g., ARM) references are mixed with other bits that may change, so + // we need to "mix" data and store some changed bits into raw delta. + int num_bytes = reference_bytes_mixer->NumBytes(type_tag.value()); + if (num_bytes) { + ConstBufferView mixed_ref_bytes = reference_bytes_mixer->Mix( + type_tag.value(), old_image.begin(), equivalence.src_offset + i, + new_image.begin(), equivalence.dst_offset + i); + for (int j = 0; j < num_bytes; ++j) { + int8_t diff = + mixed_ref_bytes[j] - old_image[equivalence.src_offset + i + j]; + if (diff) + raw_delta_sink.PutNext({base_copy_offset + i + j, diff}); + } + } + i += new_image_index.refs(type_tag).width(); + DCHECK_LE(i, equivalence.length); + } else { + int8_t diff = new_image[equivalence.dst_offset + i] - + old_image[equivalence.src_offset + i]; + if (diff) + raw_delta_sink.PutNext({base_copy_offset + i, diff}); + ++i; + } } base_copy_offset += equivalence.length; } @@ -225,10 +252,12 @@ bool GenerateRawElement(const std::vector<offset_t>& old_sa, kMinEquivalenceSimilarity); patch_writer->SetReferenceDeltaSink({}); + + ReferenceBytesMixer no_op_bytes_mixer; return GenerateEquivalencesAndExtraData(new_image, equivalences, patch_writer) && GenerateRawDelta(old_image, new_image, equivalences, new_image_index, - patch_writer); + &no_op_bytes_mixer, patch_writer); } bool GenerateExecutableElement(ExecutableType exe_type, @@ -282,20 +311,18 @@ bool GenerateExecutableElement(ExecutableType exe_type, } } patch_writer->SetReferenceDeltaSink(std::move(reference_delta_sink)); - + std::unique_ptr<ReferenceBytesMixer> reference_bytes_mixer = + ReferenceBytesMixer::Create(*old_disasm, *new_disasm); return GenerateEquivalencesAndExtraData(new_image, equivalences, patch_writer) && GenerateRawDelta(old_image, new_image, equivalences, new_image_index, - patch_writer); + reference_bytes_mixer.get(), patch_writer); } -/******** Exported Functions ********/ - -status::Code GenerateEnsemble(ConstBufferView old_image, - ConstBufferView new_image, - EnsemblePatchWriter* patch_writer) { - std::unique_ptr<EnsembleMatcher> matcher = - std::make_unique<HeuristicEnsembleMatcher>(nullptr); +status::Code GenerateEnsembleCommon(ConstBufferView old_image, + ConstBufferView new_image, + std::unique_ptr<EnsembleMatcher> matcher, + EnsemblePatchWriter* patch_writer) { if (!matcher->RunMatch(old_image, new_image)) { LOG(INFO) << "RunMatch() failed, generating raw patch."; return GenerateRaw(old_image, new_image, patch_writer); @@ -392,6 +419,29 @@ status::Code GenerateEnsemble(ConstBufferView old_image, return status::kStatusSuccess; } +/******** Exported Functions ********/ + +status::Code GenerateEnsemble(ConstBufferView old_image, + ConstBufferView new_image, + EnsemblePatchWriter* patch_writer) { + return GenerateEnsembleCommon( + old_image, new_image, std::make_unique<HeuristicEnsembleMatcher>(nullptr), + patch_writer); +} + +status::Code GenerateEnsembleWithImposedMatches( + ConstBufferView old_image, + ConstBufferView new_image, + std::string imposed_matches, + EnsemblePatchWriter* patch_writer) { + if (imposed_matches.empty()) + return GenerateEnsemble(old_image, new_image, patch_writer); + + return GenerateEnsembleCommon( + old_image, new_image, + std::make_unique<ImposedEnsembleMatcher>(imposed_matches), patch_writer); +} + status::Code GenerateRaw(ConstBufferView old_image, ConstBufferView new_image, EnsemblePatchWriter* patch_writer) { diff --git a/chromium/components/zucchini/zucchini_gen.h b/chromium/components/zucchini/zucchini_gen.h index a0f36303ef9..17f1fd4b2a4 100644 --- a/chromium/components/zucchini/zucchini_gen.h +++ b/chromium/components/zucchini/zucchini_gen.h @@ -18,6 +18,7 @@ class EquivalenceMap; class OffsetMapper; class ImageIndex; class PatchElementWriter; +class ReferenceBytesMixer; class ReferenceDeltaSink; class ReferenceSet; class TargetPool; @@ -48,6 +49,7 @@ bool GenerateRawDelta(ConstBufferView old_image, ConstBufferView new_image, const EquivalenceMap& equivalence_map, const ImageIndex& new_image_index, + ReferenceBytesMixer* reference_bytes_mixer, PatchElementWriter* patch_writer); // Writes reference delta between references from |old_refs| and from diff --git a/chromium/components/zucchini/zucchini_tools.cc b/chromium/components/zucchini/zucchini_tools.cc index 57ff0b2cd9b..5fcf0667f4a 100644 --- a/chromium/components/zucchini/zucchini_tools.cc +++ b/chromium/components/zucchini/zucchini_tools.cc @@ -10,7 +10,7 @@ #include <algorithm> #include <memory> #include <ostream> -#include <string> +#include <utility> #include "base/bind.h" #include "base/logging.h" @@ -19,6 +19,7 @@ #include "components/zucchini/element_detection.h" #include "components/zucchini/ensemble_matcher.h" #include "components/zucchini/heuristic_ensemble_matcher.h" +#include "components/zucchini/imposed_ensemble_matcher.h" #include "components/zucchini/io_utils.h" namespace zucchini { @@ -44,11 +45,10 @@ status::Code ReadReferences(ConstBufferView image, targets.erase(std::unique(targets.begin(), targets.end()), targets.end()); size_t num_targets = targets.size(); - out << "Type " << int(group.type_tag().value()); - out << ": Pool=" << static_cast<uint32_t>(group.pool_tag().value()); - out << ", width=" << group.width(); - out << ", #locations=" << num_locations; - out << ", #targets=" << num_targets; + out << "Type " << int(group.type_tag().value()) + << ": Pool=" << static_cast<uint32_t>(group.pool_tag().value()) + << ", width=" << group.width() << ", #locations=" << num_locations + << ", #targets=" << num_targets; if (num_targets > 0) { double ratio = static_cast<double>(num_locations) / num_targets; out << " (ratio=" << base::StringPrintf("%.4f", ratio) << ")"; @@ -59,8 +59,8 @@ status::Code ReadReferences(ConstBufferView image, refs = group.GetReader(disasm.get()); for (auto ref = refs->GetNext(); ref; ref = refs->GetNext()) { - out << " " << AsHex<8>(ref->location); - out << " " << AsHex<8>(ref->target) << std::endl; + out << " " << AsHex<8>(ref->location) << " " << AsHex<8>(ref->target) + << std::endl; } } } @@ -112,14 +112,27 @@ status::Code DetectAll(ConstBufferView image, status::Code MatchAll(ConstBufferView old_image, ConstBufferView new_image, + std::string imposed_matches, std::ostream& out) { - HeuristicEnsembleMatcher matcher(&out); - if (!matcher.RunMatch(old_image, new_image)) { + std::unique_ptr<EnsembleMatcher> matcher; + if (imposed_matches.empty()) { + matcher = std::make_unique<HeuristicEnsembleMatcher>(&out); + } else { + matcher = + std::make_unique<ImposedEnsembleMatcher>(std::move(imposed_matches)); + } + if (!matcher->RunMatch(old_image, new_image)) { out << "RunMatch() failed."; return status::kStatusFatal; } - out << "Found " << matcher.matches().size() << " nontrivial matches and " - << matcher.num_identical() << " identical matches." << std::endl; + out << "Found " << matcher->matches().size() << " nontrivial matches and " + << matcher->num_identical() << " identical matches." << std::endl + << "To impose the same matches by command line, use: " << std::endl + << " -impose="; + PrefixSep sep(","); + for (const ElementMatch& match : matcher->matches()) + out << sep << match.ToString(); + out << std::endl; return status::kStatusSuccess; } diff --git a/chromium/components/zucchini/zucchini_tools.h b/chromium/components/zucchini/zucchini_tools.h index 626874552d8..bf9a95c3861 100644 --- a/chromium/components/zucchini/zucchini_tools.h +++ b/chromium/components/zucchini/zucchini_tools.h @@ -6,6 +6,7 @@ #define COMPONENTS_ZUCCHINI_ZUCCHINI_TOOLS_H_ #include <iosfwd> +#include <string> #include <vector> #include "components/zucchini/buffer_view.h" @@ -29,8 +30,14 @@ status::Code DetectAll(ConstBufferView image, std::vector<ConstBufferView>* sub_image_list); // Prints all matched regions from |old_image| to |new_image|. +// |imposed_matches|, if non-empty, encodes custom element matching to override +// the default element detection and matching heuristics, and is formatted as: +// "#+#=#+#,#+#=#+#,..." (e.g., "1+2=3+4", "1+2=3+4,5+6=7+8"), +// where "#+#=#+#" encodes a match as 4 unsigned integers: +// [offset in "old", size in "old", offset in "new", size in "new"]. status::Code MatchAll(ConstBufferView old_image, ConstBufferView new_image, + std::string imposed_matches, std::ostream& out); } // namespace zucchini |