// Copyright 2020 Google Inc. All Rights Reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include #include #include #include #include #include #include "snappy-test.h" #include "snappy-internal.h" #include "snappy-sinksource.h" #include "snappy.h" #include "snappy_test_data.h" SNAPPY_FLAG(int32_t, start_len, -1, "Starting prefix size for testing (-1: just full file contents)"); SNAPPY_FLAG(int32_t, end_len, -1, "Starting prefix size for testing (-1: just full file contents)"); SNAPPY_FLAG(int32_t, bytes, 10485760, "How many bytes to compress/uncompress per file for timing"); SNAPPY_FLAG(bool, zlib, true, "Run zlib compression (http://www.zlib.net)"); SNAPPY_FLAG(bool, lzo, true, "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)"); SNAPPY_FLAG(bool, lz4, true, "Run LZ4 compression (https://github.com/lz4/lz4)"); SNAPPY_FLAG(bool, snappy, true, "Run snappy compression"); SNAPPY_FLAG(bool, write_compressed, false, "Write compressed versions of each file to .comp"); SNAPPY_FLAG(bool, write_uncompressed, false, "Write uncompressed versions of each file to .uncomp"); namespace snappy { namespace { #if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF // To test against code that reads beyond its input, this class copies a // string to a newly allocated group of pages, the last of which // is made unreadable via mprotect. Note that we need to allocate the // memory with mmap(), as POSIX allows mprotect() only on memory allocated // with mmap(), and some malloc/posix_memalign implementations expect to // be able to read previously allocated memory while doing heap allocations. class DataEndingAtUnreadablePage { public: explicit DataEndingAtUnreadablePage(const std::string& s) { const size_t page_size = sysconf(_SC_PAGESIZE); const size_t size = s.size(); // Round up space for string to a multiple of page_size. size_t space_for_string = (size + page_size - 1) & ~(page_size - 1); alloc_size_ = space_for_string + page_size; mem_ = mmap(NULL, alloc_size_, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); CHECK_NE(MAP_FAILED, mem_); protected_page_ = reinterpret_cast(mem_) + space_for_string; char* dst = protected_page_ - size; std::memcpy(dst, s.data(), size); data_ = dst; size_ = size; // Make guard page unreadable. CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE)); } ~DataEndingAtUnreadablePage() { const size_t page_size = sysconf(_SC_PAGESIZE); // Undo the mprotect. CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE)); CHECK_EQ(0, munmap(mem_, alloc_size_)); } const char* data() const { return data_; } size_t size() const { return size_; } private: size_t alloc_size_; void* mem_; char* protected_page_; const char* data_; size_t size_; }; #else // HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF // Fallback for systems without mmap. using DataEndingAtUnreadablePage = std::string; #endif enum CompressorType { ZLIB, LZO, LZ4, SNAPPY }; const char* names[] = {"ZLIB", "LZO", "LZ4", "SNAPPY"}; size_t MinimumRequiredOutputSpace(size_t input_size, CompressorType comp) { switch (comp) { #ifdef ZLIB_VERSION case ZLIB: return ZLib::MinCompressbufSize(input_size); #endif // ZLIB_VERSION #ifdef LZO_VERSION case LZO: return input_size + input_size/64 + 16 + 3; #endif // LZO_VERSION #ifdef LZ4_VERSION_NUMBER case LZ4: return LZ4_compressBound(input_size); #endif // LZ4_VERSION_NUMBER case SNAPPY: return snappy::MaxCompressedLength(input_size); default: LOG(FATAL) << "Unknown compression type number " << comp; return 0; } } // Returns true if we successfully compressed, false otherwise. // // If compressed_is_preallocated is set, do not resize the compressed buffer. // This is typically what you want for a benchmark, in order to not spend // time in the memory allocator. If you do set this flag, however, // "compressed" must be preinitialized to at least MinCompressbufSize(comp) // number of bytes, and may contain junk bytes at the end after return. bool Compress(const char* input, size_t input_size, CompressorType comp, std::string* compressed, bool compressed_is_preallocated) { if (!compressed_is_preallocated) { compressed->resize(MinimumRequiredOutputSpace(input_size, comp)); } switch (comp) { #ifdef ZLIB_VERSION case ZLIB: { ZLib zlib; uLongf destlen = compressed->size(); int ret = zlib.Compress( reinterpret_cast(string_as_array(compressed)), &destlen, reinterpret_cast(input), input_size); CHECK_EQ(Z_OK, ret); if (!compressed_is_preallocated) { compressed->resize(destlen); } return true; } #endif // ZLIB_VERSION #ifdef LZO_VERSION case LZO: { unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS]; lzo_uint destlen; int ret = lzo1x_1_15_compress( reinterpret_cast(input), input_size, reinterpret_cast(string_as_array(compressed)), &destlen, mem); CHECK_EQ(LZO_E_OK, ret); delete[] mem; if (!compressed_is_preallocated) { compressed->resize(destlen); } break; } #endif // LZO_VERSION #ifdef LZ4_VERSION_NUMBER case LZ4: { int destlen = compressed->size(); destlen = LZ4_compress_default(input, string_as_array(compressed), input_size, destlen); CHECK_NE(destlen, 0); if (!compressed_is_preallocated) { compressed->resize(destlen); } break; } #endif // LZ4_VERSION_NUMBER case SNAPPY: { size_t destlen; snappy::RawCompress(input, input_size, string_as_array(compressed), &destlen); CHECK_LE(destlen, snappy::MaxCompressedLength(input_size)); if (!compressed_is_preallocated) { compressed->resize(destlen); } break; } default: { return false; // the asked-for library wasn't compiled in } } return true; } bool Uncompress(const std::string& compressed, CompressorType comp, int size, std::string* output) { // TODO: Switch to [[maybe_unused]] when we can assume C++17. (void)size; switch (comp) { #ifdef ZLIB_VERSION case ZLIB: { output->resize(size); ZLib zlib; uLongf destlen = output->size(); int ret = zlib.Uncompress( reinterpret_cast(string_as_array(output)), &destlen, reinterpret_cast(compressed.data()), compressed.size()); CHECK_EQ(Z_OK, ret); CHECK_EQ(static_cast(size), destlen); break; } #endif // ZLIB_VERSION #ifdef LZO_VERSION case LZO: { output->resize(size); lzo_uint destlen; int ret = lzo1x_decompress( reinterpret_cast(compressed.data()), compressed.size(), reinterpret_cast(string_as_array(output)), &destlen, NULL); CHECK_EQ(LZO_E_OK, ret); CHECK_EQ(static_cast(size), destlen); break; } #endif // LZO_VERSION #ifdef LZ4_VERSION_NUMBER case LZ4: { output->resize(size); int destlen = output->size(); destlen = LZ4_decompress_safe(compressed.data(), string_as_array(output), compressed.size(), destlen); CHECK_NE(destlen, 0); CHECK_EQ(size, destlen); break; } #endif // LZ4_VERSION_NUMBER case SNAPPY: { snappy::RawUncompress(compressed.data(), compressed.size(), string_as_array(output)); break; } default: { return false; // the asked-for library wasn't compiled in } } return true; } void Measure(const char* data, size_t length, CompressorType comp, int repeats, int block_size) { // Run tests a few time and pick median running times static const int kRuns = 5; double ctime[kRuns]; double utime[kRuns]; int compressed_size = 0; { // Chop the input into blocks int num_blocks = (length + block_size - 1) / block_size; std::vector input(num_blocks); std::vector input_length(num_blocks); std::vector compressed(num_blocks); std::vector output(num_blocks); for (int b = 0; b < num_blocks; ++b) { int input_start = b * block_size; int input_limit = std::min((b+1)*block_size, length); input[b] = data+input_start; input_length[b] = input_limit-input_start; } // Pre-grow the output buffers so we don't measure string append time. for (std::string& compressed_block : compressed) { compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp)); } // First, try one trial compression to make sure the code is compiled in if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) { LOG(WARNING) << "Skipping " << names[comp] << ": " << "library not compiled in"; return; } for (int run = 0; run < kRuns; ++run) { CycleTimer ctimer, utimer; // Pre-grow the output buffers so we don't measure string append time. for (std::string& compressed_block : compressed) { compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp)); } ctimer.Start(); for (int b = 0; b < num_blocks; ++b) { for (int i = 0; i < repeats; ++i) Compress(input[b], input_length[b], comp, &compressed[b], true); } ctimer.Stop(); // Compress once more, with resizing, so we don't leave junk // at the end that will confuse the decompressor. for (int b = 0; b < num_blocks; ++b) { Compress(input[b], input_length[b], comp, &compressed[b], false); } for (int b = 0; b < num_blocks; ++b) { output[b].resize(input_length[b]); } utimer.Start(); for (int i = 0; i < repeats; ++i) { for (int b = 0; b < num_blocks; ++b) Uncompress(compressed[b], comp, input_length[b], &output[b]); } utimer.Stop(); ctime[run] = ctimer.Get(); utime[run] = utimer.Get(); } compressed_size = 0; for (const std::string& compressed_item : compressed) { compressed_size += compressed_item.size(); } } std::sort(ctime, ctime + kRuns); std::sort(utime, utime + kRuns); const int med = kRuns/2; float comp_rate = (length / ctime[med]) * repeats / 1048576.0; float uncomp_rate = (length / utime[med]) * repeats / 1048576.0; std::string x = names[comp]; x += ":"; std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate) : std::string("?"); std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% " "comp %5.1f MB/s uncomp %5s MB/s\n", x.c_str(), block_size/(1<<20), static_cast(length), static_cast(compressed_size), (compressed_size * 100.0) / std::max(1, length), comp_rate, urate.c_str()); } void CompressFile(const char* fname) { std::string fullinput; CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); std::string compressed; Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false); CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed, file::Defaults())); } void UncompressFile(const char* fname) { std::string fullinput; CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); size_t uncompLength; CHECK(snappy::GetUncompressedLength(fullinput.data(), fullinput.size(), &uncompLength)); std::string uncompressed; uncompressed.resize(uncompLength); CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed)); CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed, file::Defaults())); } void MeasureFile(const char* fname) { std::string fullinput; CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); std::printf("%-40s :\n", fname); int start_len = (snappy::GetFlag(FLAGS_start_len) < 0) ? fullinput.size() : snappy::GetFlag(FLAGS_start_len); int end_len = fullinput.size(); if (snappy::GetFlag(FLAGS_end_len) >= 0) { end_len = std::min(fullinput.size(), snappy::GetFlag(FLAGS_end_len)); } for (int len = start_len; len <= end_len; ++len) { const char* const input = fullinput.data(); int repeats = (snappy::GetFlag(FLAGS_bytes) + len) / (len + 1); if (snappy::GetFlag(FLAGS_zlib)) Measure(input, len, ZLIB, repeats, 1024 << 10); if (snappy::GetFlag(FLAGS_lzo)) Measure(input, len, LZO, repeats, 1024 << 10); if (snappy::GetFlag(FLAGS_lz4)) Measure(input, len, LZ4, repeats, 1024 << 10); if (snappy::GetFlag(FLAGS_snappy)) Measure(input, len, SNAPPY, repeats, 4096 << 10); // For block-size based measurements if (0 && snappy::GetFlag(FLAGS_snappy)) { Measure(input, len, SNAPPY, repeats, 8<<10); Measure(input, len, SNAPPY, repeats, 16<<10); Measure(input, len, SNAPPY, repeats, 32<<10); Measure(input, len, SNAPPY, repeats, 64<<10); Measure(input, len, SNAPPY, repeats, 256<<10); Measure(input, len, SNAPPY, repeats, 1024<<10); } } } } // namespace } // namespace snappy int main(int argc, char** argv) { InitGoogle(argv[0], &argc, &argv, true); for (int arg = 1; arg < argc; ++arg) { if (snappy::GetFlag(FLAGS_write_compressed)) { snappy::CompressFile(argv[arg]); } else if (snappy::GetFlag(FLAGS_write_uncompressed)) { snappy::UncompressFile(argv[arg]); } else { snappy::MeasureFile(argv[arg]); } } return 0; }