From da51f56cc21233c2d30f0fe0d171727c3102b2e0 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Wed, 31 Jan 2018 16:33:43 +0100 Subject: BASELINE: Update Chromium to 65.0.3525.40 Also imports missing submodules Change-Id: I36901b7c6a325cda3d2c10cedb2186c25af3b79b Reviewed-by: Alexandru Croitor --- chromium/third_party/zlib/BUILD.gn | 146 ++++---- chromium/third_party/zlib/adler32_simd.c | 4 +- .../zlib/contrib/optimizations/arm/chunkcopy_arm.h | 122 ------- .../zlib/contrib/optimizations/chunkcopy.h | 376 ++++++++++++++++----- .../zlib/contrib/optimizations/inffast_chunk.c | 311 +++++++++++++++++ .../zlib/contrib/optimizations/inffast_chunk.h | 15 + .../zlib/contrib/optimizations/inffast_chunky.c | 311 ----------------- .../zlib/contrib/optimizations/inffast_chunky.h | 12 - .../zlib/contrib/optimizations/inflate.c | 9 +- .../zlib/contrib/tests/fuzzers/BUILD.gn | 45 +++ chromium/third_party/zlib/crc32.c | 27 ++ chromium/third_party/zlib/crc32_simd.c | 157 +++++++++ chromium/third_party/zlib/crc32_simd.h | 27 ++ chromium/third_party/zlib/deflate.c | 14 +- chromium/third_party/zlib/fill_window_sse.c | 6 +- chromium/third_party/zlib/names.h | 12 +- chromium/third_party/zlib/patches/0001-simd.patch | 14 +- .../zlib/patches/0005-adler32-simd.patch | 2 +- 18 files changed, 1002 insertions(+), 608 deletions(-) delete mode 100644 chromium/third_party/zlib/contrib/optimizations/arm/chunkcopy_arm.h create mode 100644 chromium/third_party/zlib/contrib/optimizations/inffast_chunk.c create mode 100644 chromium/third_party/zlib/contrib/optimizations/inffast_chunk.h delete mode 100644 chromium/third_party/zlib/contrib/optimizations/inffast_chunky.c delete mode 100644 chromium/third_party/zlib/contrib/optimizations/inffast_chunky.h create mode 100644 chromium/third_party/zlib/contrib/tests/fuzzers/BUILD.gn create mode 100644 chromium/third_party/zlib/crc32_simd.c create mode 100644 chromium/third_party/zlib/crc32_simd.h (limited to 'chromium/third_party/zlib') diff --git a/chromium/third_party/zlib/BUILD.gn b/chromium/third_party/zlib/BUILD.gn index 2f19a8fe5f6..e87d1293139 100644 --- a/chromium/third_party/zlib/BUILD.gn +++ b/chromium/third_party/zlib/BUILD.gn @@ -2,19 +2,12 @@ # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. -import("//testing/libfuzzer/fuzzer_test.gni") - if (current_cpu == "arm" || current_cpu == "arm64") { import("//build/config/arm.gni") } config("zlib_config") { include_dirs = [ "." ] - if (current_cpu == "arm" || current_cpu == "arm64") { - if (arm_use_neon) { - include_dirs += [ "contrib/optimizations/arm" ] - } - } } config("zlib_adler32_simd_config") { @@ -58,18 +51,97 @@ source_set("zlib_adler32_simd") { } } + public_configs = [ ":zlib_adler32_simd_config" ] +} + +config("zlib_inflate_chunk_simd_config") { + if (!is_ios && (current_cpu == "x86" || current_cpu == "x64")) { + defines = [ "INFLATE_CHUNK_SIMD_SSE2" ] + } + + if (current_cpu == "arm" || current_cpu == "arm64") { + if (arm_use_neon) { + defines = [ "INFLATE_CHUNK_SIMD_NEON" ] + } + } +} + +source_set("zlib_inflate_chunk_simd") { + visibility = [ ":*" ] + + if (!is_ios && (current_cpu == "x86" || current_cpu == "x64")) { + include_dirs = [ "." ] + + sources = [ + "contrib/optimizations/chunkcopy.h", + "contrib/optimizations/inffast_chunk.c", + "contrib/optimizations/inffast_chunk.h", + "contrib/optimizations/inflate.c", + ] + } + + if (current_cpu == "arm" || current_cpu == "arm64") { + if (arm_use_neon) { + include_dirs = [ "." ] + + sources = [ + "contrib/optimizations/chunkcopy.h", + "contrib/optimizations/inffast_chunk.c", + "contrib/optimizations/inffast_chunk.h", + "contrib/optimizations/inflate.c", + ] + + # TODO(772870) back off from -O3 while investigating Android + # One perf bot PNG decode regression. + # if (!is_debug) { + # # Use optimize_speed (-O3) to output the _smallest_ code. + # configs -= [ "//build/config/compiler:default_optimization" ] + # configs += [ "//build/config/compiler:optimize_speed" ] + # } + } + } + configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] - public_configs = [ ":zlib_adler32_simd_config" ] + public_configs = [ ":zlib_inflate_chunk_simd_config" ] +} + +config("zlib_crc32_simd_config") { + if (!is_ios && (current_cpu == "x86" || current_cpu == "x64")) { + defines = [ "CRC32_SIMD_SSE42_PCLMUL" ] + } +} + +source_set("zlib_crc32_simd") { + visibility = [ ":*" ] + + if (!is_ios && (current_cpu == "x86" || current_cpu == "x64")) { + sources = [ + "crc32_simd.c", + "crc32_simd.h", + ] + + if (!is_win || is_clang) { + cflags = [ + "-msse4.2", + "-mpclmul", + ] + } + } + + public_configs = [ ":zlib_crc32_simd_config" ] } static_library("zlib_x86_simd") { + visibility = [ ":*" ] + if (!is_ios && (current_cpu == "x86" || current_cpu == "x64")) { sources = [ "crc_folding.c", "fill_window_sse.c", ] + if (!is_win || is_clang) { cflags = [ "-msse4.2", @@ -129,31 +201,25 @@ static_library("zlib") { "zutil.h", ] - if (current_cpu == "arm" || current_cpu == "arm64") { - if (arm_use_neon) { - sources -= [ "inflate.c" ] - sources += [ - "contrib/optimizations/arm/chunkcopy_arm.h", - "contrib/optimizations/chunkcopy.h", - "contrib/optimizations/inffast_chunky.c", - "contrib/optimizations/inffast_chunky.h", - "contrib/optimizations/inflate.c", - ] - } - } - defines = [] deps = [] if (!is_ios && (current_cpu == "x86" || current_cpu == "x64")) { - sources += [ "x86.c" ] + deps += [ ":zlib_crc32_simd" ] deps += [ ":zlib_adler32_simd" ] + sources += [ "x86.c" ] + + deps += [ ":zlib_inflate_chunk_simd" ] + sources -= [ "inflate.c" ] } if (current_cpu == "arm" || current_cpu == "arm64") { if (arm_use_neon) { deps += [ ":zlib_adler32_simd" ] + + deps += [ ":zlib_inflate_chunk_simd" ] + sources -= [ "inflate.c" ] } } @@ -170,42 +236,6 @@ static_library("zlib") { deps += [ ":zlib_x86_simd" ] } -fuzzer_test("zlib_uncompress_fuzzer") { - sources = [ - "contrib/tests/fuzzers/uncompress_fuzzer.cc", - ] - deps = [ - ":zlib", - ] -} - -fuzzer_test("zlib_inflate_fuzzer") { - sources = [ - "contrib/tests/fuzzers/inflate_fuzzer.cc", - ] - deps = [ - ":zlib", - ] -} - -fuzzer_test("zlib_deflate_set_dictionary_fuzzer") { - sources = [ - "contrib/tests/fuzzers/deflate_set_dictionary_fuzzer.cc", - ] - deps = [ - ":zlib", - ] -} - -fuzzer_test("zlib_deflate_fuzzer") { - sources = [ - "contrib/tests/fuzzers/deflate_fuzzer.cc", - ] - deps = [ - ":zlib", - ] -} - config("minizip_warnings") { visibility = [ ":*" ] if (is_clang) { diff --git a/chromium/third_party/zlib/adler32_simd.c b/chromium/third_party/zlib/adler32_simd.c index d73f97e52cf..1354915cc09 100644 --- a/chromium/third_party/zlib/adler32_simd.c +++ b/chromium/third_party/zlib/adler32_simd.c @@ -76,7 +76,7 @@ uint32_t ZLIB_INTERNAL adler32_simd_( /* SSSE3 */ { unsigned n = NMAX / BLOCK_SIZE; /* The NMAX constraint. */ if (n > blocks) - n = blocks; + n = (unsigned) blocks; blocks -= n; const __m128i tap1 = @@ -237,7 +237,7 @@ uint32_t ZLIB_INTERNAL adler32_simd_( /* NEON */ { unsigned n = NMAX / BLOCK_SIZE; /* The NMAX constraint. */ if (n > blocks) - n = blocks; + n = (unsigned) blocks; blocks -= n; /* diff --git a/chromium/third_party/zlib/contrib/optimizations/arm/chunkcopy_arm.h b/chromium/third_party/zlib/contrib/optimizations/arm/chunkcopy_arm.h deleted file mode 100644 index 41474c8aa87..00000000000 --- a/chromium/third_party/zlib/contrib/optimizations/arm/chunkcopy_arm.h +++ /dev/null @@ -1,122 +0,0 @@ -/* chunkcopy_arm.h -- fast copies and sets - * Copyright (C) 2017 ARM, Inc. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -#ifndef CHUNKCOPY_ARM_H -#define CHUNKCOPY_ARM_H - -#include -#include "zutil.h" - -#if __STDC_VERSION__ >= 199901L -#define Z_RESTRICT restrict -#else -#define Z_RESTRICT -#endif - -/* A port to a new arch only requires to implement 2 functions - (vld_dup and chunkset_core) and the chunk type. -*/ - -typedef uint8x16_t chunkcopy_chunk_t; -#define CHUNKCOPY_CHUNK_SIZE sizeof(chunkcopy_chunk_t) - -/* Forward declarations. */ -static inline unsigned char FAR* chunkunroll_relaxed(unsigned char FAR* out, - unsigned FAR* dist, - unsigned FAR* len); - -static inline unsigned char FAR* chunkcopy_core(unsigned char FAR* out, - const unsigned char FAR* from, - unsigned len); - -/* Architecture specific code starts here. */ -static inline uint8x16_t chunkset_vld1q_dup_u8x8( - const unsigned char FAR* Z_RESTRICT from) { -#if defined(__clang__) || defined(__aarch64__) - return vreinterpretq_u8_u64(vld1q_dup_u64((void*)from)); -#else - /* 32-bit GCC uses an alignment hint for vld1q_dup_u64, even when given a - * void pointer, so here's an alternate implementation. - */ - uint8x8_t h = vld1_u8(from); - return vcombine_u8(h, h); -#endif -} - -/* - Perform an overlapping copy which behaves as a memset() operation, but - supporting periods other than one, and assume that length is non-zero and - that it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE*3 bytes of output - even if the length is shorter than this. - TODO(cavalcantii): maybe rename vreinterpretq and chunkset_vld to make it - generic and move this code to chunkcopy.h (plus we - won't need the forward declarations). - */ -static inline unsigned char FAR* chunkset_core(unsigned char FAR* out, - unsigned period, - unsigned len) { - uint8x16_t f; - int bump = ((len - 1) % sizeof(f)) + 1; - - switch (period) { - case 1: - f = vld1q_dup_u8(out - 1); - vst1q_u8(out, f); - out += bump; - len -= bump; - while (len > 0) { - vst1q_u8(out, f); - out += sizeof(f); - len -= sizeof(f); - } - return out; - case 2: - f = vreinterpretq_u8_u16(vld1q_dup_u16((void*)(out - 2))); - vst1q_u8(out, f); - out += bump; - len -= bump; - if (len > 0) { - f = vreinterpretq_u8_u16(vld1q_dup_u16((void*)(out - 2))); - do { - vst1q_u8(out, f); - out += sizeof(f); - len -= sizeof(f); - } while (len > 0); - } - return out; - case 4: - f = vreinterpretq_u8_u32(vld1q_dup_u32((void*)(out - 4))); - vst1q_u8(out, f); - out += bump; - len -= bump; - if (len > 0) { - f = vreinterpretq_u8_u32(vld1q_dup_u32((void*)(out - 4))); - do { - vst1q_u8(out, f); - out += sizeof(f); - len -= sizeof(f); - } while (len > 0); - } - return out; - case 8: - f = chunkset_vld1q_dup_u8x8(out - 8); - vst1q_u8(out, f); - out += bump; - len -= bump; - if (len > 0) { - f = chunkset_vld1q_dup_u8x8(out - 8); - do { - vst1q_u8(out, f); - out += sizeof(f); - len -= sizeof(f); - } while (len > 0); - } - return out; - } - out = chunkunroll_relaxed(out, &period, &len); - return chunkcopy_core(out, out - period, len); -} - -#endif /* CHUNKCOPY_ARM_H */ diff --git a/chromium/third_party/zlib/contrib/optimizations/chunkcopy.h b/chromium/third_party/zlib/contrib/optimizations/chunkcopy.h index 20806434976..fe38be67742 100644 --- a/chromium/third_party/zlib/contrib/optimizations/chunkcopy.h +++ b/chromium/third_party/zlib/contrib/optimizations/chunkcopy.h @@ -1,50 +1,90 @@ -/* chunkcopy.h -- fast copies and sets +/* chunkcopy.h -- fast chunk copy and set operations * Copyright (C) 2017 ARM, Inc. + * Copyright 2017 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the Chromium source repository LICENSE file. * For conditions of distribution and use, see copyright notice in zlib.h */ #ifndef CHUNKCOPY_H #define CHUNKCOPY_H -// TODO(cavalcantii): add the Intel code next. -#include "chunkcopy_arm.h" +#include +#include "zutil.h" + +#define Z_STATIC_ASSERT(name, assert) typedef char name[(assert) ? 1 : -1] + +#if __STDC_VERSION__ >= 199901L +#define Z_RESTRICT restrict +#else +#define Z_RESTRICT +#endif + +#if defined(__clang__) || defined(__GNUC__) || defined(__llvm__) +#define Z_BUILTIN_MEMCPY __builtin_memcpy +#else +#define Z_BUILTIN_MEMCPY zmemcpy +#endif + +#if defined(INFLATE_CHUNK_SIMD_NEON) +#include +typedef uint8x16_t z_vec128i_t; +#elif defined(INFLATE_CHUNK_SIMD_SSE2) +#include +typedef __m128i z_vec128i_t; +#else +#error chunkcopy.h inflate chunk SIMD is not defined for your build target +#endif /* - Ask the compiler to perform a wide, unaligned load with an machine - instruction appropriate for the chunkcopy_chunk_t type. + * chunk copy type: the z_vec128i_t type size should be exactly 128-bits + * and equal to CHUNKCOPY_CHUNK_SIZE. */ -static inline chunkcopy_chunk_t loadchunk(const unsigned char FAR* s) { - chunkcopy_chunk_t c; - __builtin_memcpy(&c, s, sizeof(c)); - return c; -} +#define CHUNKCOPY_CHUNK_SIZE sizeof(z_vec128i_t) + +Z_STATIC_ASSERT(vector_128_bits_wide, + CHUNKCOPY_CHUNK_SIZE == sizeof(int8_t) * 16); /* - Ask the compiler to perform a wide, unaligned store with an machine - instruction appropriate for the chunkcopy_chunk_t type. + * Ask the compiler to perform a wide, unaligned load with a machine + * instruction appropriate for the z_vec128i_t type. */ -static inline void storechunk(unsigned char FAR* d, chunkcopy_chunk_t c) { - __builtin_memcpy(d, &c, sizeof(c)); +static inline z_vec128i_t loadchunk( + const unsigned char FAR* s) { + z_vec128i_t v; + Z_BUILTIN_MEMCPY(&v, s, sizeof(v)); + return v; } /* - Perform a memcpy-like operation, but assume that length is non-zero and that - it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if - the length is shorter than this. - - It also guarantees that it will properly unroll the data if the distance - between `out` and `from` is at least CHUNKCOPY_CHUNK_SIZE, which we rely on - in chunkcopy_relaxed(). + * Ask the compiler to perform a wide, unaligned store with a machine + * instruction appropriate for the z_vec128i_t type. + */ +static inline void storechunk( + unsigned char FAR* d, + const z_vec128i_t v) { + Z_BUILTIN_MEMCPY(d, &v, sizeof(v)); +} - Aside from better memory bus utilisation, this means that short copies - (CHUNKCOPY_CHUNK_SIZE bytes or fewer) will fall straight through the loop - without iteration, which will hopefully make the branch prediction more - reliable. +/* + * Perform a memcpy-like operation, assuming that length is non-zero and that + * it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if + * the length is shorter than this. + * + * It also guarantees that it will properly unroll the data if the distance + * between `out` and `from` is at least CHUNKCOPY_CHUNK_SIZE, which we rely on + * in chunkcopy_relaxed(). + * + * Aside from better memory bus utilisation, this means that short copies + * (CHUNKCOPY_CHUNK_SIZE bytes or fewer) will fall straight through the loop + * without iteration, which will hopefully make the branch prediction more + * reliable. */ -static inline unsigned char FAR* chunkcopy_core(unsigned char FAR* out, - const unsigned char FAR* from, - unsigned len) { - int bump = (--len % CHUNKCOPY_CHUNK_SIZE) + 1; +static inline unsigned char FAR* chunkcopy_core( + unsigned char FAR* out, + const unsigned char FAR* from, + unsigned len) { + const int bump = (--len % CHUNKCOPY_CHUNK_SIZE) + 1; storechunk(out, loadchunk(from)); out += bump; from += bump; @@ -58,12 +98,12 @@ static inline unsigned char FAR* chunkcopy_core(unsigned char FAR* out, } /* - Like chunkcopy_core, but avoid writing beyond of legal output. - - Accepts an additional pointer to the end of safe output. A generic safe - copy would use (out + len), but it's normally the case that the end of the - output buffer is beyond the end of the current copy, and this can still be - exploited. + * Like chunkcopy_core(), but avoid writing beyond of legal output. + * + * Accepts an additional pointer to the end of safe output. A generic safe + * copy would use (out + len), but it's normally the case that the end of the + * output buffer is beyond the end of the current copy, and this can still be + * exploited. */ static inline unsigned char FAR* chunkcopy_core_safe( unsigned char FAR* out, @@ -71,20 +111,20 @@ static inline unsigned char FAR* chunkcopy_core_safe( unsigned len, unsigned char FAR* limit) { Assert(out + len <= limit, "chunk copy exceeds safety limit"); - if (limit - out < CHUNKCOPY_CHUNK_SIZE) { + if ((limit - out) < (ptrdiff_t)CHUNKCOPY_CHUNK_SIZE) { const unsigned char FAR* Z_RESTRICT rfrom = from; if (len & 8) { - __builtin_memcpy(out, rfrom, 8); + Z_BUILTIN_MEMCPY(out, rfrom, 8); out += 8; rfrom += 8; } if (len & 4) { - __builtin_memcpy(out, rfrom, 4); + Z_BUILTIN_MEMCPY(out, rfrom, 4); out += 4; rfrom += 4; } if (len & 2) { - __builtin_memcpy(out, rfrom, 2); + Z_BUILTIN_MEMCPY(out, rfrom, 2); out += 2; rfrom += 2; } @@ -97,18 +137,19 @@ static inline unsigned char FAR* chunkcopy_core_safe( } /* - Perform short copies until distance can be rewritten as being at least - CHUNKCOPY_CHUNK_SIZE. - - This assumes that it's OK to overwrite at least the first - 2*CHUNKCOPY_CHUNK_SIZE bytes of output even if the copy is shorter than - this. This assumption holds within inflate_fast() which starts every - iteration with at least 258 bytes of output space available (258 being the - maximum length output from a single token; see inffast.c). + * Perform short copies until distance can be rewritten as being at least + * CHUNKCOPY_CHUNK_SIZE. + * + * Assumes it's OK to overwrite at least the first 2*CHUNKCOPY_CHUNK_SIZE + * bytes of output even if the copy is shorter than this. This assumption + * holds within zlib inflate_fast(), which starts every iteration with at + * least 258 bytes of output space available (258 being the maximum length + * output from a single token; see inffast.c). */ -static inline unsigned char FAR* chunkunroll_relaxed(unsigned char FAR* out, - unsigned FAR* dist, - unsigned FAR* len) { +static inline unsigned char FAR* chunkunroll_relaxed( + unsigned char FAR* out, + unsigned FAR* dist, + unsigned FAR* len) { const unsigned char FAR* from = out - *dist; while (*dist < *len && *dist < CHUNKCOPY_CHUNK_SIZE) { storechunk(out, loadchunk(from)); @@ -119,15 +160,180 @@ static inline unsigned char FAR* chunkunroll_relaxed(unsigned char FAR* out, return out; } +#if defined(INFLATE_CHUNK_SIMD_NEON) +/* + * v_load64_dup(): load *src as an unaligned 64-bit int and duplicate it in + * every 64-bit component of the 128-bit result (64-bit int splat). + */ +static inline z_vec128i_t v_load64_dup(const void* src) { + return vcombine_u8(vld1_u8(src), vld1_u8(src)); +} + +/* + * v_load32_dup(): load *src as an unaligned 32-bit int and duplicate it in + * every 32-bit component of the 128-bit result (32-bit int splat). + */ +static inline z_vec128i_t v_load32_dup(const void* src) { + int32_t i32; + Z_BUILTIN_MEMCPY(&i32, src, sizeof(i32)); + return vreinterpretq_u8_s32(vdupq_n_s32(i32)); +} + +/* + * v_load16_dup(): load *src as an unaligned 16-bit int and duplicate it in + * every 16-bit component of the 128-bit result (16-bit int splat). + */ +static inline z_vec128i_t v_load16_dup(const void* src) { + int16_t i16; + Z_BUILTIN_MEMCPY(&i16, src, sizeof(i16)); + return vreinterpretq_u8_s16(vdupq_n_s16(i16)); +} + +/* + * v_load8_dup(): load the 8-bit int *src and duplicate it in every 8-bit + * component of the 128-bit result (8-bit int splat). + */ +static inline z_vec128i_t v_load8_dup(const void* src) { + return vld1q_dup_u8((const uint8_t*)src); +} + +/* + * v_store_128(): store the 128-bit vec in a memory destination (that might + * not be 16-byte aligned) void* out. + */ +static inline void v_store_128(void* out, const z_vec128i_t vec) { + vst1q_u8(out, vec); +} + +#elif defined(INFLATE_CHUNK_SIMD_SSE2) +/* + * v_load64_dup(): load *src as an unaligned 64-bit int and duplicate it in + * every 64-bit component of the 128-bit result (64-bit int splat). + */ +static inline z_vec128i_t v_load64_dup(const void* src) { + int64_t i64; + Z_BUILTIN_MEMCPY(&i64, src, sizeof(i64)); + return _mm_set1_epi64x(i64); +} + +/* + * v_load32_dup(): load *src as an unaligned 32-bit int and duplicate it in + * every 32-bit component of the 128-bit result (32-bit int splat). + */ +static inline z_vec128i_t v_load32_dup(const void* src) { + int32_t i32; + Z_BUILTIN_MEMCPY(&i32, src, sizeof(i32)); + return _mm_set1_epi32(i32); +} + /* - Perform a memcpy-like operation, but assume that length is non-zero and that - it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if - the length is shorter than this. + * v_load16_dup(): load *src as an unaligned 16-bit int and duplicate it in + * every 16-bit component of the 128-bit result (16-bit int splat). + */ +static inline z_vec128i_t v_load16_dup(const void* src) { + int16_t i16; + Z_BUILTIN_MEMCPY(&i16, src, sizeof(i16)); + return _mm_set1_epi16(i16); +} - Unlike chunkcopy_core() above, no guarantee is made regarding the behaviour - of overlapping buffers, regardless of the distance between the pointers. - This is reflected in the `restrict`-qualified pointers, allowing the - compiler to reorder loads and stores. +/* + * v_load8_dup(): load the 8-bit int *src and duplicate it in every 8-bit + * component of the 128-bit result (8-bit int splat). + */ +static inline z_vec128i_t v_load8_dup(const void* src) { + return _mm_set1_epi8(*(const char*)src); +} + +/* + * v_store_128(): store the 128-bit vec in a memory destination (that might + * not be 16-byte aligned) void* out. + */ +static inline void v_store_128(void* out, const z_vec128i_t vec) { + _mm_storeu_si128((__m128i*)out, vec); +} +#endif + +/* + * Perform an overlapping copy which behaves as a memset() operation, but + * supporting periods other than one, and assume that length is non-zero and + * that it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE*3 bytes of output + * even if the length is shorter than this. + */ +static inline unsigned char FAR* chunkset_core( + unsigned char FAR* out, + unsigned period, + unsigned len) { + z_vec128i_t v; + const int bump = ((len - 1) % sizeof(v)) + 1; + + switch (period) { + case 1: + v = v_load8_dup(out - 1); + v_store_128(out, v); + out += bump; + len -= bump; + while (len > 0) { + v_store_128(out, v); + out += sizeof(v); + len -= sizeof(v); + } + return out; + case 2: + v = v_load16_dup(out - 2); + v_store_128(out, v); + out += bump; + len -= bump; + if (len > 0) { + v = v_load16_dup(out - 2); + do { + v_store_128(out, v); + out += sizeof(v); + len -= sizeof(v); + } while (len > 0); + } + return out; + case 4: + v = v_load32_dup(out - 4); + v_store_128(out, v); + out += bump; + len -= bump; + if (len > 0) { + v = v_load32_dup(out - 4); + do { + v_store_128(out, v); + out += sizeof(v); + len -= sizeof(v); + } while (len > 0); + } + return out; + case 8: + v = v_load64_dup(out - 8); + v_store_128(out, v); + out += bump; + len -= bump; + if (len > 0) { + v = v_load64_dup(out - 8); + do { + v_store_128(out, v); + out += sizeof(v); + len -= sizeof(v); + } while (len > 0); + } + return out; + } + out = chunkunroll_relaxed(out, &period, &len); + return chunkcopy_core(out, out - period, len); +} + +/* + * Perform a memcpy-like operation, but assume that length is non-zero and that + * it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if + * the length is shorter than this. + * + * Unlike chunkcopy_core() above, no guarantee is made regarding the behaviour + * of overlapping buffers, regardless of the distance between the pointers. + * This is reflected in the `restrict`-qualified pointers, allowing the + * compiler to re-order loads and stores. */ static inline unsigned char FAR* chunkcopy_relaxed( unsigned char FAR* Z_RESTRICT out, @@ -137,17 +343,17 @@ static inline unsigned char FAR* chunkcopy_relaxed( } /* - Like chunkcopy_relaxed, but avoid writing beyond of legal output. - - Unlike chunkcopy_core_safe() above, no guarantee is made regarding the - behaviour of overlapping buffers, regardless of the distance between the - pointers. This is reflected in the `restrict`-qualified pointers, allowing - the compiler to reorder loads and stores. - - Accepts an additional pointer to the end of safe output. A generic safe - copy would use (out + len), but it's normally the case that the end of the - output buffer is beyond the end of the current copy, and this can still be - exploited. + * Like chunkcopy_relaxed(), but avoid writing beyond of legal output. + * + * Unlike chunkcopy_core_safe() above, no guarantee is made regarding the + * behaviour of overlapping buffers, regardless of the distance between the + * pointers. This is reflected in the `restrict`-qualified pointers, allowing + * the compiler to re-order loads and stores. + * + * Accepts an additional pointer to the end of safe output. A generic safe + * copy would use (out + len), but it's normally the case that the end of the + * output buffer is beyond the end of the current copy, and this can still be + * exploited. */ static inline unsigned char FAR* chunkcopy_safe( unsigned char FAR* out, @@ -159,14 +365,16 @@ static inline unsigned char FAR* chunkcopy_safe( } /* - Perform chunky copy within the same buffer, where the source and destination - may potentially overlap. - - Assumes that len > 0 on entry, and that it's safe to write at least - CHUNKCOPY_CHUNK_SIZE*3 bytes to the output. + * Perform chunky copy within the same buffer, where the source and destination + * may potentially overlap. + * + * Assumes that len > 0 on entry, and that it's safe to write at least + * CHUNKCOPY_CHUNK_SIZE*3 bytes to the output. */ -static inline unsigned char FAR* -chunkcopy_lapped_relaxed(unsigned char FAR* out, unsigned dist, unsigned len) { +static inline unsigned char FAR* chunkcopy_lapped_relaxed( + unsigned char FAR* out, + unsigned dist, + unsigned len) { if (dist < len && dist < CHUNKCOPY_CHUNK_SIZE) { return chunkset_core(out, dist, len); } @@ -174,13 +382,13 @@ chunkcopy_lapped_relaxed(unsigned char FAR* out, unsigned dist, unsigned len) { } /* - Behave like chunkcopy_lapped_relaxed, but avoid writing beyond of legal - output. - - Accepts an additional pointer to the end of safe output. A generic safe - copy would use (out + len), but it's normally the case that the end of the - output buffer is beyond the end of the current copy, and this can still be - exploited. + * Behave like chunkcopy_lapped_relaxed(), but avoid writing beyond of legal + * output. + * + * Accepts an additional pointer to the end of safe output. A generic safe + * copy would use (out + len), but it's normally the case that the end of the + * output buffer is beyond the end of the current copy, and this can still be + * exploited. */ static inline unsigned char FAR* chunkcopy_lapped_safe( unsigned char FAR* out, @@ -188,7 +396,7 @@ static inline unsigned char FAR* chunkcopy_lapped_safe( unsigned len, unsigned char FAR* limit) { Assert(out + len <= limit, "chunk copy exceeds safety limit"); - if (limit - out < CHUNKCOPY_CHUNK_SIZE * 3) { + if ((limit - out) < (ptrdiff_t)(3 * CHUNKCOPY_CHUNK_SIZE)) { /* TODO(cavalcantii): try harder to optimise this */ while (len-- > 0) { *out = *(out - dist); @@ -199,6 +407,8 @@ static inline unsigned char FAR* chunkcopy_lapped_safe( return chunkcopy_lapped_relaxed(out, dist, len); } +#undef Z_STATIC_ASSERT #undef Z_RESTRICT +#undef Z_BUILTIN_MEMCPY #endif /* CHUNKCOPY_H */ diff --git a/chromium/third_party/zlib/contrib/optimizations/inffast_chunk.c b/chromium/third_party/zlib/contrib/optimizations/inffast_chunk.c new file mode 100644 index 00000000000..4829d0da4dc --- /dev/null +++ b/chromium/third_party/zlib/contrib/optimizations/inffast_chunk.c @@ -0,0 +1,311 @@ +/* inffast_chunky.c -- fast decoding + * Copyright (C) 1995-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "contrib/optimizations/inffast_chunk.h" +#include "contrib/optimizations/chunkcopy.h" + +#ifdef ASMINF +# pragma message("Assembler code may have bugs -- use at your own risk") +#else + +/* + Decode literal, length, and distance codes and write out the resulting + literal and match bytes until either not enough input or output is + available, an end-of-block is encountered, or a data error is encountered. + When large enough input and output buffers are supplied to inflate(), for + example, a 16K input buffer and a 64K output buffer, more than 95% of the + inflate execution time is spent in this routine. + + Entry assumptions: + + state->mode == LEN + strm->avail_in >= 6 + strm->avail_out >= 258 + start >= strm->avail_out + state->bits < 8 + strm->next_out[0..strm->avail_out] does not overlap with + strm->next_in[0..strm->avail_in] + strm->state->window is allocated with an additional + CHUNKCOPY_CHUNK_SIZE-1 bytes of padding beyond strm->state->wsize + + On return, state->mode is one of: + + LEN -- ran out of enough output space or enough available input + TYPE -- reached end of block code, inflate() to interpret next block + BAD -- error in block data + + Notes: + + - The maximum input bits used by a length/distance pair is 15 bits for the + length code, 5 bits for the length extra, 15 bits for the distance code, + and 13 bits for the distance extra. This totals 48 bits, or six bytes. + Therefore if strm->avail_in >= 6, then there is enough input to avoid + checking for available input while decoding. + + - The maximum bytes that a single length/distance pair can output is 258 + bytes, which is the maximum length that can be coded. inflate_fast() + requires strm->avail_out >= 258 for each loop to avoid checking for + output space. + */ +void ZLIB_INTERNAL inflate_fast_chunk_(strm, start) +z_streamp strm; +unsigned start; /* inflate()'s starting value for strm->avail_out */ +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *in; /* local strm->next_in */ + z_const unsigned char FAR *last; /* have enough input while in < last */ + unsigned char FAR *out; /* local strm->next_out */ + unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ + unsigned char FAR *end; /* while out < end, enough space available */ + unsigned char FAR *limit; /* safety limit for chunky copies */ +#ifdef INFLATE_STRICT + unsigned dmax; /* maximum distance from zlib header */ +#endif + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ + unsigned long hold; /* local strm->hold */ + unsigned bits; /* local strm->bits */ + code const FAR *lcode; /* local strm->lencode */ + code const FAR *dcode; /* local strm->distcode */ + unsigned lmask; /* mask for first level of length codes */ + unsigned dmask; /* mask for first level of distance codes */ + code here; /* retrieved table entry */ + unsigned op; /* code bits, operation, extra bits, or */ + /* window position, window bytes to copy */ + unsigned len; /* match length, unused bytes */ + unsigned dist; /* match distance */ + unsigned char FAR *from; /* where to copy match from */ + + /* copy state to local variables */ + state = (struct inflate_state FAR *)strm->state; + in = strm->next_in; + last = in + (strm->avail_in - 5); + out = strm->next_out; + beg = out - (start - strm->avail_out); + end = out + (strm->avail_out - 257); + limit = out + strm->avail_out; +#ifdef INFLATE_STRICT + dmax = state->dmax; +#endif + wsize = state->wsize; + whave = state->whave; + wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext; + window = state->window; + hold = state->hold; + bits = state->bits; + lcode = state->lencode; + dcode = state->distcode; + lmask = (1U << state->lenbits) - 1; + dmask = (1U << state->distbits) - 1; + + /* decode literals and length/distances until end-of-block or not enough + input data or output space */ + do { + if (bits < 15) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + here = lcode[hold & lmask]; + dolen: + op = (unsigned)(here.bits); + hold >>= op; + bits -= op; + op = (unsigned)(here.op); + if (op == 0) { /* literal */ + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + *out++ = (unsigned char)(here.val); + } + else if (op & 16) { /* length base */ + len = (unsigned)(here.val); + op &= 15; /* number of extra bits */ + if (op) { + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + len += (unsigned)hold & ((1U << op) - 1); + hold >>= op; + bits -= op; + } + Tracevv((stderr, "inflate: length %u\n", len)); + if (bits < 15) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + here = dcode[hold & dmask]; + dodist: + op = (unsigned)(here.bits); + hold >>= op; + bits -= op; + op = (unsigned)(here.op); + if (op & 16) { /* distance base */ + dist = (unsigned)(here.val); + op &= 15; /* number of extra bits */ + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + } + dist += (unsigned)hold & ((1U << op) - 1); +#ifdef INFLATE_STRICT + if (dist > dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + hold >>= op; + bits -= op; + Tracevv((stderr, "inflate: distance %u\n", dist)); + op = (unsigned)(out - beg); /* max distance in output */ + if (dist > op) { /* see if copy from window */ + op = dist - op; /* distance back in window */ + if (op > whave) { + if (state->sane) { + strm->msg = + (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + if (len <= op - whave) { + do { + *out++ = 0; + } while (--len); + continue; + } + len -= op - whave; + do { + *out++ = 0; + } while (--op > whave); + if (op == 0) { + from = out - dist; + do { + *out++ = *from++; + } while (--len); + continue; + } +#endif + } + from = window; + if (wnext >= op) { /* contiguous in window */ + from += wnext - op; + } + else { /* wrap around window */ + op -= wnext; + from += wsize - op; + if (op < len) { /* some from end of window */ + len -= op; + out = chunkcopy_safe(out, from, op, limit); + from = window; /* more from start of window */ + op = wnext; + /* This (rare) case can create a situation where + the first chunkcopy below must be checked. + */ + } + } + if (op < len) { /* still need some from output */ + out = chunkcopy_safe(out, from, op, limit); + len -= op; + /* When dist is small the amount of data that can be + copied from the window is also small, and progress + towards the dangerous end of the output buffer is + also small. This means that for trivial memsets and + for chunkunroll_relaxed() a safety check is + unnecessary. However, these conditions may not be + entered at all, and in that case it's possible that + the main copy is near the end. + */ + out = chunkunroll_relaxed(out, &dist, &len); + out = chunkcopy_safe(out, out - dist, len, limit); + } else { + /* from points to window, so there is no risk of + overlapping pointers requiring memset-like behaviour + */ + out = chunkcopy_safe(out, from, len, limit); + } + } + else { + /* Whole reference is in range of current output. No + range checks are necessary because we start with room + for at least 258 bytes of output, so unroll and roundoff + operations can write beyond `out+len` so long as they + stay within 258 bytes of `out`. + */ + out = chunkcopy_lapped_relaxed(out, dist, len); + } + } + else if ((op & 64) == 0) { /* 2nd level distance code */ + here = dcode[here.val + (hold & ((1U << op) - 1))]; + goto dodist; + } + else { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + } + else if ((op & 64) == 0) { /* 2nd level length code */ + here = lcode[here.val + (hold & ((1U << op) - 1))]; + goto dolen; + } + else if (op & 32) { /* end-of-block */ + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + else { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + } while (in < last && out < end); + + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ + len = bits >> 3; + in -= len; + bits -= len << 3; + hold &= (1U << bits) - 1; + + /* update state and return */ + strm->next_in = in; + strm->next_out = out; + strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); + strm->avail_out = (unsigned)(out < end ? + 257 + (end - out) : 257 - (out - end)); + state->hold = hold; + state->bits = bits; + return; +} + +/* + inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): + - Using bit fields for code structure + - Different op definition to avoid & for extra bits (do & for table bits) + - Three separate decoding do-loops for direct, window, and wnext == 0 + - Special case for distance > 1 copies to do overlapped load and store copy + - Explicit branch predictions (based on measured branch probabilities) + - Deferring match copy and interspersed it with decoding subsequent codes + - Swapping literal/length else + - Swapping window/direct else + - Larger unrolled copy loops (three is about right) + - Moving len -= 3 statement into middle of loop + */ + +#endif /* !ASMINF */ diff --git a/chromium/third_party/zlib/contrib/optimizations/inffast_chunk.h b/chromium/third_party/zlib/contrib/optimizations/inffast_chunk.h new file mode 100644 index 00000000000..80636e75879 --- /dev/null +++ b/chromium/third_party/zlib/contrib/optimizations/inffast_chunk.h @@ -0,0 +1,15 @@ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995-2003, 2010 Mark Adler + * Copyright (C) 2017 ARM, Inc. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +// TODO(cblume): incorporate the patch done on crbug.com/764431 here and +// in related files to define and use INFLATE_FAST_MIN_HAVE/_LEFT etc. + +void ZLIB_INTERNAL inflate_fast_chunk_ OF((z_streamp strm, unsigned start)); diff --git a/chromium/third_party/zlib/contrib/optimizations/inffast_chunky.c b/chromium/third_party/zlib/contrib/optimizations/inffast_chunky.c deleted file mode 100644 index e2bc735451f..00000000000 --- a/chromium/third_party/zlib/contrib/optimizations/inffast_chunky.c +++ /dev/null @@ -1,311 +0,0 @@ -/* inffast_chunky.c -- fast decoding - * Copyright (C) 1995-2017 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -#include "zutil.h" -#include "inftrees.h" -#include "inflate.h" -#include "contrib/optimizations/inffast_chunky.h" -#include "contrib/optimizations/chunkcopy.h" - -#ifdef ASMINF -# pragma message("Assembler code may have bugs -- use at your own risk") -#else - -/* - Decode literal, length, and distance codes and write out the resulting - literal and match bytes until either not enough input or output is - available, an end-of-block is encountered, or a data error is encountered. - When large enough input and output buffers are supplied to inflate(), for - example, a 16K input buffer and a 64K output buffer, more than 95% of the - inflate execution time is spent in this routine. - - Entry assumptions: - - state->mode == LEN - strm->avail_in >= 6 - strm->avail_out >= 258 - start >= strm->avail_out - state->bits < 8 - strm->next_out[0..strm->avail_out] does not overlap with - strm->next_in[0..strm->avail_in] - strm->state->window is allocated with an additional - CHUNKCOPY_CHUNK_SIZE-1 bytes of padding beyond strm->state->wsize - - On return, state->mode is one of: - - LEN -- ran out of enough output space or enough available input - TYPE -- reached end of block code, inflate() to interpret next block - BAD -- error in block data - - Notes: - - - The maximum input bits used by a length/distance pair is 15 bits for the - length code, 5 bits for the length extra, 15 bits for the distance code, - and 13 bits for the distance extra. This totals 48 bits, or six bytes. - Therefore if strm->avail_in >= 6, then there is enough input to avoid - checking for available input while decoding. - - - The maximum bytes that a single length/distance pair can output is 258 - bytes, which is the maximum length that can be coded. inflate_fast() - requires strm->avail_out >= 258 for each loop to avoid checking for - output space. - */ -void ZLIB_INTERNAL inflate_fast_chunky(strm, start) -z_streamp strm; -unsigned start; /* inflate()'s starting value for strm->avail_out */ -{ - struct inflate_state FAR *state; - z_const unsigned char FAR *in; /* local strm->next_in */ - z_const unsigned char FAR *last; /* have enough input while in < last */ - unsigned char FAR *out; /* local strm->next_out */ - unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ - unsigned char FAR *end; /* while out < end, enough space available */ - unsigned char FAR *limit; /* safety limit for chunky copies */ -#ifdef INFLATE_STRICT - unsigned dmax; /* maximum distance from zlib header */ -#endif - unsigned wsize; /* window size or zero if not using window */ - unsigned whave; /* valid bytes in the window */ - unsigned wnext; /* window write index */ - unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ - unsigned long hold; /* local strm->hold */ - unsigned bits; /* local strm->bits */ - code const FAR *lcode; /* local strm->lencode */ - code const FAR *dcode; /* local strm->distcode */ - unsigned lmask; /* mask for first level of length codes */ - unsigned dmask; /* mask for first level of distance codes */ - code here; /* retrieved table entry */ - unsigned op; /* code bits, operation, extra bits, or */ - /* window position, window bytes to copy */ - unsigned len; /* match length, unused bytes */ - unsigned dist; /* match distance */ - unsigned char FAR *from; /* where to copy match from */ - - /* copy state to local variables */ - state = (struct inflate_state FAR *)strm->state; - in = strm->next_in; - last = in + (strm->avail_in - 5); - out = strm->next_out; - beg = out - (start - strm->avail_out); - end = out + (strm->avail_out - 257); - limit = out + strm->avail_out; -#ifdef INFLATE_STRICT - dmax = state->dmax; -#endif - wsize = state->wsize; - whave = state->whave; - wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext; - window = state->window; - hold = state->hold; - bits = state->bits; - lcode = state->lencode; - dcode = state->distcode; - lmask = (1U << state->lenbits) - 1; - dmask = (1U << state->distbits) - 1; - - /* decode literals and length/distances until end-of-block or not enough - input data or output space */ - do { - if (bits < 15) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - hold += (unsigned long)(*in++) << bits; - bits += 8; - } - here = lcode[hold & lmask]; - dolen: - op = (unsigned)(here.bits); - hold >>= op; - bits -= op; - op = (unsigned)(here.op); - if (op == 0) { /* literal */ - Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? - "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", here.val)); - *out++ = (unsigned char)(here.val); - } - else if (op & 16) { /* length base */ - len = (unsigned)(here.val); - op &= 15; /* number of extra bits */ - if (op) { - if (bits < op) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - } - len += (unsigned)hold & ((1U << op) - 1); - hold >>= op; - bits -= op; - } - Tracevv((stderr, "inflate: length %u\n", len)); - if (bits < 15) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - hold += (unsigned long)(*in++) << bits; - bits += 8; - } - here = dcode[hold & dmask]; - dodist: - op = (unsigned)(here.bits); - hold >>= op; - bits -= op; - op = (unsigned)(here.op); - if (op & 16) { /* distance base */ - dist = (unsigned)(here.val); - op &= 15; /* number of extra bits */ - if (bits < op) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - if (bits < op) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - } - } - dist += (unsigned)hold & ((1U << op) - 1); -#ifdef INFLATE_STRICT - if (dist > dmax) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } -#endif - hold >>= op; - bits -= op; - Tracevv((stderr, "inflate: distance %u\n", dist)); - op = (unsigned)(out - beg); /* max distance in output */ - if (dist > op) { /* see if copy from window */ - op = dist - op; /* distance back in window */ - if (op > whave) { - if (state->sane) { - strm->msg = - (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } -#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR - if (len <= op - whave) { - do { - *out++ = 0; - } while (--len); - continue; - } - len -= op - whave; - do { - *out++ = 0; - } while (--op > whave); - if (op == 0) { - from = out - dist; - do { - *out++ = *from++; - } while (--len); - continue; - } -#endif - } - from = window; - if (wnext >= op) { /* contiguous in window */ - from += wnext - op; - } - else { /* wrap around window */ - op -= wnext; - from += wsize - op; - if (op < len) { /* some from end of window */ - len -= op; - out = chunkcopy_safe(out, from, op, limit); - from = window; /* more from start of window */ - op = wnext; - /* This (rare) case can create a situation where - the first chunkcopy below must be checked. - */ - } - } - if (op < len) { /* still need some from output */ - out = chunkcopy_safe(out, from, op, limit); - len -= op; - /* When dist is small the amount of data that can be - copied from the window is also small, and progress - towards the dangerous end of the output buffer is - also small. This means that for trivial memsets and - for chunkunroll_relaxed() a safety check is - unnecessary. However, these conditions may not be - entered at all, and in that case it's possible that - the main copy is near the end. - */ - out = chunkunroll_relaxed(out, &dist, &len); - out = chunkcopy_safe(out, out - dist, len, limit); - } else { - /* from points to window, so there is no risk of - overlapping pointers requiring memset-like behaviour - */ - out = chunkcopy_safe(out, from, len, limit); - } - } - else { - /* Whole reference is in range of current output. No - range checks are necessary because we start with room - for at least 258 bytes of output, so unroll and roundoff - operations can write beyond `out+len` so long as they - stay within 258 bytes of `out`. - */ - out = chunkcopy_lapped_relaxed(out, dist, len); - } - } - else if ((op & 64) == 0) { /* 2nd level distance code */ - here = dcode[here.val + (hold & ((1U << op) - 1))]; - goto dodist; - } - else { - strm->msg = (char *)"invalid distance code"; - state->mode = BAD; - break; - } - } - else if ((op & 64) == 0) { /* 2nd level length code */ - here = lcode[here.val + (hold & ((1U << op) - 1))]; - goto dolen; - } - else if (op & 32) { /* end-of-block */ - Tracevv((stderr, "inflate: end of block\n")); - state->mode = TYPE; - break; - } - else { - strm->msg = (char *)"invalid literal/length code"; - state->mode = BAD; - break; - } - } while (in < last && out < end); - - /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ - len = bits >> 3; - in -= len; - bits -= len << 3; - hold &= (1U << bits) - 1; - - /* update state and return */ - strm->next_in = in; - strm->next_out = out; - strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); - strm->avail_out = (unsigned)(out < end ? - 257 + (end - out) : 257 - (out - end)); - state->hold = hold; - state->bits = bits; - return; -} - -/* - inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): - - Using bit fields for code structure - - Different op definition to avoid & for extra bits (do & for table bits) - - Three separate decoding do-loops for direct, window, and wnext == 0 - - Special case for distance > 1 copies to do overlapped load and store copy - - Explicit branch predictions (based on measured branch probabilities) - - Deferring match copy and interspersed it with decoding subsequent codes - - Swapping literal/length else - - Swapping window/direct else - - Larger unrolled copy loops (three is about right) - - Moving len -= 3 statement into middle of loop - */ - -#endif /* !ASMINF */ diff --git a/chromium/third_party/zlib/contrib/optimizations/inffast_chunky.h b/chromium/third_party/zlib/contrib/optimizations/inffast_chunky.h deleted file mode 100644 index 7f033f2c4ae..00000000000 --- a/chromium/third_party/zlib/contrib/optimizations/inffast_chunky.h +++ /dev/null @@ -1,12 +0,0 @@ -/* inffast.h -- header to use inffast.c - * Copyright (C) 1995-2003, 2010 Mark Adler - * Copyright (C) 2017 ARM, Inc. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -void ZLIB_INTERNAL inflate_fast_chunky OF((z_streamp strm, unsigned start)); diff --git a/chromium/third_party/zlib/contrib/optimizations/inflate.c b/chromium/third_party/zlib/contrib/optimizations/inflate.c index 152f1742f3e..d6c5614c879 100644 --- a/chromium/third_party/zlib/contrib/optimizations/inflate.c +++ b/chromium/third_party/zlib/contrib/optimizations/inflate.c @@ -83,8 +83,9 @@ #include "zutil.h" #include "inftrees.h" #include "inflate.h" -#include "contrib/optimizations/inffast_chunky.h" +#include "contrib/optimizations/inffast_chunk.h" #include "contrib/optimizations/chunkcopy.h" +#include "x86.h" #ifdef MAKEFIXED # ifndef BUILDFIXED @@ -202,6 +203,8 @@ int stream_size; int ret; struct inflate_state FAR *state; + x86_check_features(); + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || stream_size != (int)(sizeof(z_stream))) return Z_VERSION_ERROR; @@ -419,7 +422,7 @@ unsigned copy; and is subsequently either overwritten or left deliberately undefined at the end of decode; so there's really no point. */ - memset(state->window + wsize, 0, CHUNKCOPY_CHUNK_SIZE); + zmemzero(state->window + wsize, CHUNKCOPY_CHUNK_SIZE); #endif } @@ -1056,7 +1059,7 @@ int flush; case LEN: if (have >= 6 && left >= 258) { RESTORE(); - inflate_fast_chunky(strm, out); + inflate_fast_chunk_(strm, out); LOAD(); if (state->mode == TYPE) state->back = -1; diff --git a/chromium/third_party/zlib/contrib/tests/fuzzers/BUILD.gn b/chromium/third_party/zlib/contrib/tests/fuzzers/BUILD.gn new file mode 100644 index 00000000000..c46b6644007 --- /dev/null +++ b/chromium/third_party/zlib/contrib/tests/fuzzers/BUILD.gn @@ -0,0 +1,45 @@ +# Copyright 2017 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//testing/libfuzzer/fuzzer_test.gni") + +# root BUILD depends on this target. Needed for package discovery +group("fuzzers") { +} + +fuzzer_test("zlib_uncompress_fuzzer") { + sources = [ + "uncompress_fuzzer.cc", + ] + deps = [ + "../../../:zlib", + ] +} + +fuzzer_test("zlib_inflate_fuzzer") { + sources = [ + "inflate_fuzzer.cc", + ] + deps = [ + "../../../:zlib", + ] +} + +fuzzer_test("zlib_deflate_set_dictionary_fuzzer") { + sources = [ + "deflate_set_dictionary_fuzzer.cc", + ] + deps = [ + "../../../:zlib", + ] +} + +fuzzer_test("zlib_deflate_fuzzer") { + sources = [ + "deflate_fuzzer.cc", + ] + deps = [ + "../../../:zlib", + ] +} diff --git a/chromium/third_party/zlib/crc32.c b/chromium/third_party/zlib/crc32.c index 9162429cc7b..b4ad1e105d6 100644 --- a/chromium/third_party/zlib/crc32.c +++ b/chromium/third_party/zlib/crc32.c @@ -30,6 +30,7 @@ #include "deflate.h" #include "x86.h" +#include "crc32_simd.h" #include "zutil.h" /* for STDC and FAR definitions */ /* Definitions for doing the crc four data bytes at a time. */ @@ -241,6 +242,32 @@ unsigned long ZEXPORT crc32(crc, buf, len) const unsigned char FAR *buf; uInt len; { +#if defined(CRC32_SIMD_SSE42_PCLMUL) + /* + * Use x86 sse4.2+pclmul SIMD to compute the crc32. Since this + * routine can be freely used, check the CPU features here, to + * stop TSAN complaining about thread data races accessing the + * x86_cpu_enable_simd feature variable below. + */ + if (buf == Z_NULL) { + if (!len) /* Assume user is calling crc32(0, NULL, 0); */ + x86_check_features(); + return 0UL; + } + + if (x86_cpu_enable_simd && len >= Z_CRC32_SSE42_MINIMUM_LENGTH) { + /* crc32 16-byte chunks */ + uInt chunk_size = len & ~Z_CRC32_SSE42_CHUNKSIZE_MASK; + crc = ~crc32_sse42_simd_(buf, chunk_size, ~(uint32_t)crc); + /* check remaining data */ + len -= chunk_size; + if (!len) + return crc; + /* Fall into the default crc32 for the remaining data. */ + buf += chunk_size; + } +#endif /* CRC32_SIMD_SSE42_PCLMUL */ + return crc32_z(crc, buf, len); } diff --git a/chromium/third_party/zlib/crc32_simd.c b/chromium/third_party/zlib/crc32_simd.c new file mode 100644 index 00000000000..c2d42556e1a --- /dev/null +++ b/chromium/third_party/zlib/crc32_simd.c @@ -0,0 +1,157 @@ +/* crc32_simd.c + * + * Copyright 2017 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the Chromium source repository LICENSE file. + */ + +#include "crc32_simd.h" + +#if defined(CRC32_SIMD_SSE42_PCLMUL) + +/* + * crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer + * length must be at least 64, and a multiple of 16. Based on: + * + * "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" + * V. Gopal, E. Ozturk, et al., 2009, http://intel.ly/2ySEwL0 + */ + +#include +#include +#include + +uint32_t ZLIB_INTERNAL crc32_sse42_simd_( /* SSE4.2+PCLMUL */ + const unsigned char *buf, + z_size_t len, + uint32_t crc) +{ + /* + * Definitions of the bit-reflected domain constants k1,k2,k3, etc and + * the CRC32+Barrett polynomials given at the end of the paper. + */ + static const uint64_t zalign(16) k1k2[] = { 0x0154442bd4, 0x01c6e41596 }; + static const uint64_t zalign(16) k3k4[] = { 0x01751997d0, 0x00ccaa009e }; + static const uint64_t zalign(16) k5k0[] = { 0x0163cd6124, 0x0000000000 }; + static const uint64_t zalign(16) poly[] = { 0x01db710641, 0x01f7011641 }; + + __m128i x0, x1, x2, x3, x4, x5, x6, x7, x8, y5, y6, y7, y8; + + /* + * There's at least one block of 64. + */ + x1 = _mm_loadu_si128((__m128i *)(buf + 0x00)); + x2 = _mm_loadu_si128((__m128i *)(buf + 0x10)); + x3 = _mm_loadu_si128((__m128i *)(buf + 0x20)); + x4 = _mm_loadu_si128((__m128i *)(buf + 0x30)); + + x1 = _mm_xor_si128(x1, _mm_cvtsi32_si128(crc)); + + x0 = _mm_load_si128((__m128i *)k1k2); + + buf += 64; + len -= 64; + + /* + * Parallel fold blocks of 64, if any. + */ + while (len >= 64) + { + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x6 = _mm_clmulepi64_si128(x2, x0, 0x00); + x7 = _mm_clmulepi64_si128(x3, x0, 0x00); + x8 = _mm_clmulepi64_si128(x4, x0, 0x00); + + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x2 = _mm_clmulepi64_si128(x2, x0, 0x11); + x3 = _mm_clmulepi64_si128(x3, x0, 0x11); + x4 = _mm_clmulepi64_si128(x4, x0, 0x11); + + y5 = _mm_loadu_si128((__m128i *)(buf + 0x00)); + y6 = _mm_loadu_si128((__m128i *)(buf + 0x10)); + y7 = _mm_loadu_si128((__m128i *)(buf + 0x20)); + y8 = _mm_loadu_si128((__m128i *)(buf + 0x30)); + + x1 = _mm_xor_si128(x1, x5); + x2 = _mm_xor_si128(x2, x6); + x3 = _mm_xor_si128(x3, x7); + x4 = _mm_xor_si128(x4, x8); + + x1 = _mm_xor_si128(x1, y5); + x2 = _mm_xor_si128(x2, y6); + x3 = _mm_xor_si128(x3, y7); + x4 = _mm_xor_si128(x4, y8); + + buf += 64; + len -= 64; + } + + /* + * Fold into 128-bits. + */ + x0 = _mm_load_si128((__m128i *)k3k4); + + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x1 = _mm_xor_si128(x1, x2); + x1 = _mm_xor_si128(x1, x5); + + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x1 = _mm_xor_si128(x1, x3); + x1 = _mm_xor_si128(x1, x5); + + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x1 = _mm_xor_si128(x1, x4); + x1 = _mm_xor_si128(x1, x5); + + /* + * Single fold blocks of 16, if any. + */ + while (len >= 16) + { + x2 = _mm_loadu_si128((__m128i *)buf); + + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x1 = _mm_xor_si128(x1, x2); + x1 = _mm_xor_si128(x1, x5); + + buf += 16; + len -= 16; + } + + /* + * Fold 128-bits to 64-bits. + */ + x2 = _mm_clmulepi64_si128(x1, x0, 0x10); + x3 = _mm_set_epi32(0, ~0, 0, ~0); + x1 = _mm_srli_si128(x1, 8); + x1 = _mm_xor_si128(x1, x2); + + x0 = _mm_loadl_epi64((__m128i*)k5k0); + + x2 = _mm_srli_si128(x1, 4); + x1 = _mm_and_si128(x1, x3); + x1 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_xor_si128(x1, x2); + + /* + * Barret reduce to 32-bits. + */ + x0 = _mm_load_si128((__m128i*)poly); + + x2 = _mm_and_si128(x1, x3); + x2 = _mm_clmulepi64_si128(x2, x0, 0x10); + x2 = _mm_and_si128(x2, x3); + x2 = _mm_clmulepi64_si128(x2, x0, 0x00); + x1 = _mm_xor_si128(x1, x2); + + /* + * Return the crc32. + */ + return _mm_extract_epi32(x1, 1); +} + +#endif /* CRC32_SIMD_SSE42_PCLMUL */ diff --git a/chromium/third_party/zlib/crc32_simd.h b/chromium/third_party/zlib/crc32_simd.h new file mode 100644 index 00000000000..4e6f3268e03 --- /dev/null +++ b/chromium/third_party/zlib/crc32_simd.h @@ -0,0 +1,27 @@ +/* crc32_simd.h + * + * Copyright 2017 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the Chromium source repository LICENSE file. + */ + +#include + +#include "zconf.h" +#include "zutil.h" + +/* + * crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer + * length must be at least 64, and a multiple of 16. + */ +uint32_t ZLIB_INTERNAL crc32_sse42_simd_( + const unsigned char *buf, + z_size_t len, + uint32_t crc); + +/* + * crc32_sse42_simd_ buffer size constraints: see the use in zlib/crc32.c + * for computing the crc32 of an arbitrary length buffer. + */ +#define Z_CRC32_SSE42_MINIMUM_LENGTH 64 +#define Z_CRC32_SSE42_CHUNKSIZE_MASK 15 diff --git a/chromium/third_party/zlib/deflate.c b/chromium/third_party/zlib/deflate.c index aa0c9c67a6d..6fe9c7e09de 100644 --- a/chromium/third_party/zlib/deflate.c +++ b/chromium/third_party/zlib/deflate.c @@ -87,7 +87,7 @@ local block_state deflate_huff OF((deflate_state *s, int flush)); local void lm_init OF((deflate_state *s)); local void putShortMSB OF((deflate_state *s, uInt b)); local void flush_pending OF((z_streamp strm)); -unsigned ZLIB_INTERNAL read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); +unsigned ZLIB_INTERNAL deflate_read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); #ifdef ASMV # pragma message("Assembler code may have bugs -- use at your own risk") void match_init OF((void)); /* asm code initialization */ @@ -429,7 +429,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) /* when using zlib wrappers, compute Adler-32 for provided dictionary */ if (wrap == 1) strm->adler = adler32(strm->adler, dictionary, dictLength); - s->wrap = 0; /* avoid computing Adler-32 in read_buf */ + s->wrap = 0; /* avoid computing Adler-32 in deflate_read_buf */ /* if dictionary would fill window, just replace the history */ if (dictLength >= s->w_size) { @@ -756,7 +756,7 @@ local void putShortMSB (s, b) * Flush as much pending output as possible. All deflate() output, except for * some deflate_stored() output, goes through this function so some * applications may wish to modify it to avoid allocating a large - * strm->next_out buffer and copying into it. (See also read_buf()). + * strm->next_out buffer and copying into it. (See also deflate_read_buf()). */ local void flush_pending(strm) z_streamp strm; @@ -1193,7 +1193,7 @@ int ZEXPORT deflateCopy (dest, source) * allocating a large strm->next_in buffer and copying from it. * (See also flush_pending()). */ -ZLIB_INTERNAL unsigned read_buf(strm, buf, size) +ZLIB_INTERNAL unsigned deflate_read_buf(strm, buf, size) z_streamp strm; Bytef *buf; unsigned size; @@ -1576,7 +1576,7 @@ local void fill_window_c(s) */ Assert(more >= 2, "more < 2"); - n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + n = deflate_read_buf(s->strm, s->window + s->strstart + s->lookahead, more); s->lookahead += n; /* Initialize the hash value now that we have some input: */ @@ -1765,7 +1765,7 @@ local block_state deflate_stored(s, flush) * the check value. */ if (len) { - read_buf(s->strm, s->strm->next_out, len); + deflate_read_buf(s->strm, s->strm->next_out, len); s->strm->next_out += len; s->strm->avail_out -= len; s->strm->total_out += len; @@ -1828,7 +1828,7 @@ local block_state deflate_stored(s, flush) if (have > s->strm->avail_in) have = s->strm->avail_in; if (have) { - read_buf(s->strm, s->window + s->strstart, have); + deflate_read_buf(s->strm, s->window + s->strstart, have); s->strstart += have; } if (s->high_water < s->strstart) diff --git a/chromium/third_party/zlib/fill_window_sse.c b/chromium/third_party/zlib/fill_window_sse.c index 949ccce1ba9..ed1e5d1d673 100644 --- a/chromium/third_party/zlib/fill_window_sse.c +++ b/chromium/third_party/zlib/fill_window_sse.c @@ -26,7 +26,7 @@ }\ }\ -extern int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); +extern int deflate_read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); void fill_window_sse(deflate_state *s) { @@ -117,7 +117,9 @@ void fill_window_sse(deflate_state *s) */ Assert(more >= 2, "more < 2"); - n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + n = deflate_read_buf(s->strm, + s->window + s->strstart + s->lookahead, + more); s->lookahead += n; /* Initialize the hash value now that we have some input: */ diff --git a/chromium/third_party/zlib/names.h b/chromium/third_party/zlib/names.h index cd98ec9940b..6252b02c302 100644 --- a/chromium/third_party/zlib/names.h +++ b/chromium/third_party/zlib/names.h @@ -160,7 +160,7 @@ #define crc_fold_init Cr_z_crc_fold_init #define crc_reset Cr_z_crc_reset #define fill_window_sse Cr_z_fill_window_sse -#define read_buf Cr_z_read_buf +#define deflate_read_buf Cr_z_deflate_read_buf #define x86_check_features Cr_z_x86_check_features /* FIXME: x86_cpu_enable_ssse3 wasn't part of the simd.patch */ #define x86_cpu_enable_ssse3 Cr_z_x86_cpu_enable_ssse3 @@ -171,4 +171,14 @@ #define adler32_simd_ Cr_z_adler32_simd_ #endif +#if defined(INFLATE_CHUNK_SIMD_SSE2) || defined(INFLATE_CHUNK_SIMD_NEON) +/* Symbols added by contrib/optimizations/inffast_chunk */ +#define inflate_fast_chunk_ Cr_z_inflate_fast_chunk_ +#endif + +#if defined(CRC32_SIMD_SSE42_PCLMUL) +/* Symbols added by crc32_simd.c */ +#define crc32_sse42_simd_ Cr_z_crc32_sse42_simd_ +#endif + #endif /* THIRD_PARTY_ZLIB_NAMES_H_ */ diff --git a/chromium/third_party/zlib/patches/0001-simd.patch b/chromium/third_party/zlib/patches/0001-simd.patch index 75828d26ea5..1fbf1956f66 100644 --- a/chromium/third_party/zlib/patches/0001-simd.patch +++ b/chromium/third_party/zlib/patches/0001-simd.patch @@ -559,7 +559,7 @@ index 1ec761448de9..aa0c9c67a6dc 100644 local void putShortMSB OF((deflate_state *s, uInt b)); local void flush_pending OF((z_streamp strm)); -local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); -+unsigned ZLIB_INTERNAL read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); ++unsigned ZLIB_INTERNAL deflate_read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); #ifdef ASMV # pragma message("Assembler code may have bugs -- use at your own risk") void match_init OF((void)); /* asm code initialization */ @@ -703,7 +703,7 @@ index 1ec761448de9..aa0c9c67a6dc 100644 * (See also flush_pending()). */ -local unsigned read_buf(strm, buf, size) -+ZLIB_INTERNAL unsigned read_buf(strm, buf, size) ++ZLIB_INTERNAL unsigned deflate_read_buf(strm, buf, size) z_streamp strm; Bytef *buf; unsigned size; @@ -859,7 +859,7 @@ new file mode 100644 index 000000000000..949ccce1ba9c --- /dev/null +++ b/fill_window_sse.c -@@ -0,0 +1,175 @@ +@@ -0,0 +1,177 @@ +/* + * Fill Window with SSE2-optimized hash shifting + * @@ -888,7 +888,7 @@ index 000000000000..949ccce1ba9c + }\ + }\ + -+extern int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); ++extern int deflate_read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); + +void fill_window_sse(deflate_state *s) +{ @@ -979,7 +979,9 @@ index 000000000000..949ccce1ba9c + */ + Assert(more >= 2, "more < 2"); + -+ n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); ++ n = deflate_read_buf(s->strm, ++ s->window + s->strstart + s->lookahead, ++ more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ @@ -1051,7 +1053,7 @@ index f18df5684dc5..3436baa4eb57 100644 +#define crc_fold_init Cr_z_crc_fold_init +#define crc_reset Cr_z_crc_reset +#define fill_window_sse Cr_z_fill_window_sse -+#define read_buf Cr_z_read_buf ++#define deflate_read_buf Cr_z_deflate_read_buf +#define x86_check_features Cr_z_x86_check_features +#define x86_cpu_enable_simd Cr_z_x86_cpu_enable_simd + diff --git a/chromium/third_party/zlib/patches/0005-adler32-simd.patch b/chromium/third_party/zlib/patches/0005-adler32-simd.patch index 7034389eb38..9242b1d76de 100644 --- a/chromium/third_party/zlib/patches/0005-adler32-simd.patch +++ b/chromium/third_party/zlib/patches/0005-adler32-simd.patch @@ -603,7 +603,7 @@ index 3436baa4eb57..cd98ec9940b6 100644 +++ b/third_party/zlib/names.h @@ -162,6 +162,13 @@ #define fill_window_sse Cr_z_fill_window_sse - #define read_buf Cr_z_read_buf + #define deflate_read_buf Cr_z_deflate_read_buf #define x86_check_features Cr_z_x86_check_features +/* FIXME: x86_cpu_enable_ssse3 wasn't part of the simd.patch */ +#define x86_cpu_enable_ssse3 Cr_z_x86_cpu_enable_ssse3 -- cgit v1.2.1