diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2018-01-31 16:33:43 +0100 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2018-02-06 16:33:22 +0000 |
commit | da51f56cc21233c2d30f0fe0d171727c3102b2e0 (patch) | |
tree | 4e579ab70ce4b19bee7984237f3ce05a96d59d83 /chromium/third_party/zlib | |
parent | c8c2d1901aec01e934adf561a9fdf0cc776cdef8 (diff) | |
download | qtwebengine-chromium-da51f56cc21233c2d30f0fe0d171727c3102b2e0.tar.gz |
BASELINE: Update Chromium to 65.0.3525.40
Also imports missing submodules
Change-Id: I36901b7c6a325cda3d2c10cedb2186c25af3b79b
Reviewed-by: Alexandru Croitor <alexandru.croitor@qt.io>
Diffstat (limited to 'chromium/third_party/zlib')
16 files changed, 682 insertions, 288 deletions
diff --git a/chromium/third_party/zlib/BUILD.gn b/chromium/third_party/zlib/BUILD.gn index 2f19a8fe5f6..e87d1293139 100644 --- a/chromium/third_party/zlib/BUILD.gn +++ b/chromium/third_party/zlib/BUILD.gn @@ -2,19 +2,12 @@ # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. -import("//testing/libfuzzer/fuzzer_test.gni") - if (current_cpu == "arm" || current_cpu == "arm64") { import("//build/config/arm.gni") } config("zlib_config") { include_dirs = [ "." ] - if (current_cpu == "arm" || current_cpu == "arm64") { - if (arm_use_neon) { - include_dirs += [ "contrib/optimizations/arm" ] - } - } } config("zlib_adler32_simd_config") { @@ -58,18 +51,97 @@ source_set("zlib_adler32_simd") { } } + public_configs = [ ":zlib_adler32_simd_config" ] +} + +config("zlib_inflate_chunk_simd_config") { + if (!is_ios && (current_cpu == "x86" || current_cpu == "x64")) { + defines = [ "INFLATE_CHUNK_SIMD_SSE2" ] + } + + if (current_cpu == "arm" || current_cpu == "arm64") { + if (arm_use_neon) { + defines = [ "INFLATE_CHUNK_SIMD_NEON" ] + } + } +} + +source_set("zlib_inflate_chunk_simd") { + visibility = [ ":*" ] + + if (!is_ios && (current_cpu == "x86" || current_cpu == "x64")) { + include_dirs = [ "." ] + + sources = [ + "contrib/optimizations/chunkcopy.h", + "contrib/optimizations/inffast_chunk.c", + "contrib/optimizations/inffast_chunk.h", + "contrib/optimizations/inflate.c", + ] + } + + if (current_cpu == "arm" || current_cpu == "arm64") { + if (arm_use_neon) { + include_dirs = [ "." ] + + sources = [ + "contrib/optimizations/chunkcopy.h", + "contrib/optimizations/inffast_chunk.c", + "contrib/optimizations/inffast_chunk.h", + "contrib/optimizations/inflate.c", + ] + + # TODO(772870) back off from -O3 while investigating Android + # One perf bot PNG decode regression. + # if (!is_debug) { + # # Use optimize_speed (-O3) to output the _smallest_ code. + # configs -= [ "//build/config/compiler:default_optimization" ] + # configs += [ "//build/config/compiler:optimize_speed" ] + # } + } + } + configs -= [ "//build/config/compiler:chromium_code" ] configs += [ "//build/config/compiler:no_chromium_code" ] - public_configs = [ ":zlib_adler32_simd_config" ] + public_configs = [ ":zlib_inflate_chunk_simd_config" ] +} + +config("zlib_crc32_simd_config") { + if (!is_ios && (current_cpu == "x86" || current_cpu == "x64")) { + defines = [ "CRC32_SIMD_SSE42_PCLMUL" ] + } +} + +source_set("zlib_crc32_simd") { + visibility = [ ":*" ] + + if (!is_ios && (current_cpu == "x86" || current_cpu == "x64")) { + sources = [ + "crc32_simd.c", + "crc32_simd.h", + ] + + if (!is_win || is_clang) { + cflags = [ + "-msse4.2", + "-mpclmul", + ] + } + } + + public_configs = [ ":zlib_crc32_simd_config" ] } static_library("zlib_x86_simd") { + visibility = [ ":*" ] + if (!is_ios && (current_cpu == "x86" || current_cpu == "x64")) { sources = [ "crc_folding.c", "fill_window_sse.c", ] + if (!is_win || is_clang) { cflags = [ "-msse4.2", @@ -129,31 +201,25 @@ static_library("zlib") { "zutil.h", ] - if (current_cpu == "arm" || current_cpu == "arm64") { - if (arm_use_neon) { - sources -= [ "inflate.c" ] - sources += [ - "contrib/optimizations/arm/chunkcopy_arm.h", - "contrib/optimizations/chunkcopy.h", - "contrib/optimizations/inffast_chunky.c", - "contrib/optimizations/inffast_chunky.h", - "contrib/optimizations/inflate.c", - ] - } - } - defines = [] deps = [] if (!is_ios && (current_cpu == "x86" || current_cpu == "x64")) { - sources += [ "x86.c" ] + deps += [ ":zlib_crc32_simd" ] deps += [ ":zlib_adler32_simd" ] + sources += [ "x86.c" ] + + deps += [ ":zlib_inflate_chunk_simd" ] + sources -= [ "inflate.c" ] } if (current_cpu == "arm" || current_cpu == "arm64") { if (arm_use_neon) { deps += [ ":zlib_adler32_simd" ] + + deps += [ ":zlib_inflate_chunk_simd" ] + sources -= [ "inflate.c" ] } } @@ -170,42 +236,6 @@ static_library("zlib") { deps += [ ":zlib_x86_simd" ] } -fuzzer_test("zlib_uncompress_fuzzer") { - sources = [ - "contrib/tests/fuzzers/uncompress_fuzzer.cc", - ] - deps = [ - ":zlib", - ] -} - -fuzzer_test("zlib_inflate_fuzzer") { - sources = [ - "contrib/tests/fuzzers/inflate_fuzzer.cc", - ] - deps = [ - ":zlib", - ] -} - -fuzzer_test("zlib_deflate_set_dictionary_fuzzer") { - sources = [ - "contrib/tests/fuzzers/deflate_set_dictionary_fuzzer.cc", - ] - deps = [ - ":zlib", - ] -} - -fuzzer_test("zlib_deflate_fuzzer") { - sources = [ - "contrib/tests/fuzzers/deflate_fuzzer.cc", - ] - deps = [ - ":zlib", - ] -} - config("minizip_warnings") { visibility = [ ":*" ] if (is_clang) { diff --git a/chromium/third_party/zlib/adler32_simd.c b/chromium/third_party/zlib/adler32_simd.c index d73f97e52cf..1354915cc09 100644 --- a/chromium/third_party/zlib/adler32_simd.c +++ b/chromium/third_party/zlib/adler32_simd.c @@ -76,7 +76,7 @@ uint32_t ZLIB_INTERNAL adler32_simd_( /* SSSE3 */ { unsigned n = NMAX / BLOCK_SIZE; /* The NMAX constraint. */ if (n > blocks) - n = blocks; + n = (unsigned) blocks; blocks -= n; const __m128i tap1 = @@ -237,7 +237,7 @@ uint32_t ZLIB_INTERNAL adler32_simd_( /* NEON */ { unsigned n = NMAX / BLOCK_SIZE; /* The NMAX constraint. */ if (n > blocks) - n = blocks; + n = (unsigned) blocks; blocks -= n; /* diff --git a/chromium/third_party/zlib/contrib/optimizations/arm/chunkcopy_arm.h b/chromium/third_party/zlib/contrib/optimizations/arm/chunkcopy_arm.h deleted file mode 100644 index 41474c8aa87..00000000000 --- a/chromium/third_party/zlib/contrib/optimizations/arm/chunkcopy_arm.h +++ /dev/null @@ -1,122 +0,0 @@ -/* chunkcopy_arm.h -- fast copies and sets - * Copyright (C) 2017 ARM, Inc. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -#ifndef CHUNKCOPY_ARM_H -#define CHUNKCOPY_ARM_H - -#include <arm_neon.h> -#include "zutil.h" - -#if __STDC_VERSION__ >= 199901L -#define Z_RESTRICT restrict -#else -#define Z_RESTRICT -#endif - -/* A port to a new arch only requires to implement 2 functions - (vld_dup and chunkset_core) and the chunk type. -*/ - -typedef uint8x16_t chunkcopy_chunk_t; -#define CHUNKCOPY_CHUNK_SIZE sizeof(chunkcopy_chunk_t) - -/* Forward declarations. */ -static inline unsigned char FAR* chunkunroll_relaxed(unsigned char FAR* out, - unsigned FAR* dist, - unsigned FAR* len); - -static inline unsigned char FAR* chunkcopy_core(unsigned char FAR* out, - const unsigned char FAR* from, - unsigned len); - -/* Architecture specific code starts here. */ -static inline uint8x16_t chunkset_vld1q_dup_u8x8( - const unsigned char FAR* Z_RESTRICT from) { -#if defined(__clang__) || defined(__aarch64__) - return vreinterpretq_u8_u64(vld1q_dup_u64((void*)from)); -#else - /* 32-bit GCC uses an alignment hint for vld1q_dup_u64, even when given a - * void pointer, so here's an alternate implementation. - */ - uint8x8_t h = vld1_u8(from); - return vcombine_u8(h, h); -#endif -} - -/* - Perform an overlapping copy which behaves as a memset() operation, but - supporting periods other than one, and assume that length is non-zero and - that it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE*3 bytes of output - even if the length is shorter than this. - TODO(cavalcantii): maybe rename vreinterpretq and chunkset_vld to make it - generic and move this code to chunkcopy.h (plus we - won't need the forward declarations). - */ -static inline unsigned char FAR* chunkset_core(unsigned char FAR* out, - unsigned period, - unsigned len) { - uint8x16_t f; - int bump = ((len - 1) % sizeof(f)) + 1; - - switch (period) { - case 1: - f = vld1q_dup_u8(out - 1); - vst1q_u8(out, f); - out += bump; - len -= bump; - while (len > 0) { - vst1q_u8(out, f); - out += sizeof(f); - len -= sizeof(f); - } - return out; - case 2: - f = vreinterpretq_u8_u16(vld1q_dup_u16((void*)(out - 2))); - vst1q_u8(out, f); - out += bump; - len -= bump; - if (len > 0) { - f = vreinterpretq_u8_u16(vld1q_dup_u16((void*)(out - 2))); - do { - vst1q_u8(out, f); - out += sizeof(f); - len -= sizeof(f); - } while (len > 0); - } - return out; - case 4: - f = vreinterpretq_u8_u32(vld1q_dup_u32((void*)(out - 4))); - vst1q_u8(out, f); - out += bump; - len -= bump; - if (len > 0) { - f = vreinterpretq_u8_u32(vld1q_dup_u32((void*)(out - 4))); - do { - vst1q_u8(out, f); - out += sizeof(f); - len -= sizeof(f); - } while (len > 0); - } - return out; - case 8: - f = chunkset_vld1q_dup_u8x8(out - 8); - vst1q_u8(out, f); - out += bump; - len -= bump; - if (len > 0) { - f = chunkset_vld1q_dup_u8x8(out - 8); - do { - vst1q_u8(out, f); - out += sizeof(f); - len -= sizeof(f); - } while (len > 0); - } - return out; - } - out = chunkunroll_relaxed(out, &period, &len); - return chunkcopy_core(out, out - period, len); -} - -#endif /* CHUNKCOPY_ARM_H */ diff --git a/chromium/third_party/zlib/contrib/optimizations/chunkcopy.h b/chromium/third_party/zlib/contrib/optimizations/chunkcopy.h index 20806434976..fe38be67742 100644 --- a/chromium/third_party/zlib/contrib/optimizations/chunkcopy.h +++ b/chromium/third_party/zlib/contrib/optimizations/chunkcopy.h @@ -1,50 +1,90 @@ -/* chunkcopy.h -- fast copies and sets +/* chunkcopy.h -- fast chunk copy and set operations * Copyright (C) 2017 ARM, Inc. + * Copyright 2017 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the Chromium source repository LICENSE file. * For conditions of distribution and use, see copyright notice in zlib.h */ #ifndef CHUNKCOPY_H #define CHUNKCOPY_H -// TODO(cavalcantii): add the Intel code next. -#include "chunkcopy_arm.h" +#include <stdint.h> +#include "zutil.h" + +#define Z_STATIC_ASSERT(name, assert) typedef char name[(assert) ? 1 : -1] + +#if __STDC_VERSION__ >= 199901L +#define Z_RESTRICT restrict +#else +#define Z_RESTRICT +#endif + +#if defined(__clang__) || defined(__GNUC__) || defined(__llvm__) +#define Z_BUILTIN_MEMCPY __builtin_memcpy +#else +#define Z_BUILTIN_MEMCPY zmemcpy +#endif + +#if defined(INFLATE_CHUNK_SIMD_NEON) +#include <arm_neon.h> +typedef uint8x16_t z_vec128i_t; +#elif defined(INFLATE_CHUNK_SIMD_SSE2) +#include <emmintrin.h> +typedef __m128i z_vec128i_t; +#else +#error chunkcopy.h inflate chunk SIMD is not defined for your build target +#endif /* - Ask the compiler to perform a wide, unaligned load with an machine - instruction appropriate for the chunkcopy_chunk_t type. + * chunk copy type: the z_vec128i_t type size should be exactly 128-bits + * and equal to CHUNKCOPY_CHUNK_SIZE. */ -static inline chunkcopy_chunk_t loadchunk(const unsigned char FAR* s) { - chunkcopy_chunk_t c; - __builtin_memcpy(&c, s, sizeof(c)); - return c; -} +#define CHUNKCOPY_CHUNK_SIZE sizeof(z_vec128i_t) + +Z_STATIC_ASSERT(vector_128_bits_wide, + CHUNKCOPY_CHUNK_SIZE == sizeof(int8_t) * 16); /* - Ask the compiler to perform a wide, unaligned store with an machine - instruction appropriate for the chunkcopy_chunk_t type. + * Ask the compiler to perform a wide, unaligned load with a machine + * instruction appropriate for the z_vec128i_t type. */ -static inline void storechunk(unsigned char FAR* d, chunkcopy_chunk_t c) { - __builtin_memcpy(d, &c, sizeof(c)); +static inline z_vec128i_t loadchunk( + const unsigned char FAR* s) { + z_vec128i_t v; + Z_BUILTIN_MEMCPY(&v, s, sizeof(v)); + return v; } /* - Perform a memcpy-like operation, but assume that length is non-zero and that - it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if - the length is shorter than this. - - It also guarantees that it will properly unroll the data if the distance - between `out` and `from` is at least CHUNKCOPY_CHUNK_SIZE, which we rely on - in chunkcopy_relaxed(). + * Ask the compiler to perform a wide, unaligned store with a machine + * instruction appropriate for the z_vec128i_t type. + */ +static inline void storechunk( + unsigned char FAR* d, + const z_vec128i_t v) { + Z_BUILTIN_MEMCPY(d, &v, sizeof(v)); +} - Aside from better memory bus utilisation, this means that short copies - (CHUNKCOPY_CHUNK_SIZE bytes or fewer) will fall straight through the loop - without iteration, which will hopefully make the branch prediction more - reliable. +/* + * Perform a memcpy-like operation, assuming that length is non-zero and that + * it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if + * the length is shorter than this. + * + * It also guarantees that it will properly unroll the data if the distance + * between `out` and `from` is at least CHUNKCOPY_CHUNK_SIZE, which we rely on + * in chunkcopy_relaxed(). + * + * Aside from better memory bus utilisation, this means that short copies + * (CHUNKCOPY_CHUNK_SIZE bytes or fewer) will fall straight through the loop + * without iteration, which will hopefully make the branch prediction more + * reliable. */ -static inline unsigned char FAR* chunkcopy_core(unsigned char FAR* out, - const unsigned char FAR* from, - unsigned len) { - int bump = (--len % CHUNKCOPY_CHUNK_SIZE) + 1; +static inline unsigned char FAR* chunkcopy_core( + unsigned char FAR* out, + const unsigned char FAR* from, + unsigned len) { + const int bump = (--len % CHUNKCOPY_CHUNK_SIZE) + 1; storechunk(out, loadchunk(from)); out += bump; from += bump; @@ -58,12 +98,12 @@ static inline unsigned char FAR* chunkcopy_core(unsigned char FAR* out, } /* - Like chunkcopy_core, but avoid writing beyond of legal output. - - Accepts an additional pointer to the end of safe output. A generic safe - copy would use (out + len), but it's normally the case that the end of the - output buffer is beyond the end of the current copy, and this can still be - exploited. + * Like chunkcopy_core(), but avoid writing beyond of legal output. + * + * Accepts an additional pointer to the end of safe output. A generic safe + * copy would use (out + len), but it's normally the case that the end of the + * output buffer is beyond the end of the current copy, and this can still be + * exploited. */ static inline unsigned char FAR* chunkcopy_core_safe( unsigned char FAR* out, @@ -71,20 +111,20 @@ static inline unsigned char FAR* chunkcopy_core_safe( unsigned len, unsigned char FAR* limit) { Assert(out + len <= limit, "chunk copy exceeds safety limit"); - if (limit - out < CHUNKCOPY_CHUNK_SIZE) { + if ((limit - out) < (ptrdiff_t)CHUNKCOPY_CHUNK_SIZE) { const unsigned char FAR* Z_RESTRICT rfrom = from; if (len & 8) { - __builtin_memcpy(out, rfrom, 8); + Z_BUILTIN_MEMCPY(out, rfrom, 8); out += 8; rfrom += 8; } if (len & 4) { - __builtin_memcpy(out, rfrom, 4); + Z_BUILTIN_MEMCPY(out, rfrom, 4); out += 4; rfrom += 4; } if (len & 2) { - __builtin_memcpy(out, rfrom, 2); + Z_BUILTIN_MEMCPY(out, rfrom, 2); out += 2; rfrom += 2; } @@ -97,18 +137,19 @@ static inline unsigned char FAR* chunkcopy_core_safe( } /* - Perform short copies until distance can be rewritten as being at least - CHUNKCOPY_CHUNK_SIZE. - - This assumes that it's OK to overwrite at least the first - 2*CHUNKCOPY_CHUNK_SIZE bytes of output even if the copy is shorter than - this. This assumption holds within inflate_fast() which starts every - iteration with at least 258 bytes of output space available (258 being the - maximum length output from a single token; see inffast.c). + * Perform short copies until distance can be rewritten as being at least + * CHUNKCOPY_CHUNK_SIZE. + * + * Assumes it's OK to overwrite at least the first 2*CHUNKCOPY_CHUNK_SIZE + * bytes of output even if the copy is shorter than this. This assumption + * holds within zlib inflate_fast(), which starts every iteration with at + * least 258 bytes of output space available (258 being the maximum length + * output from a single token; see inffast.c). */ -static inline unsigned char FAR* chunkunroll_relaxed(unsigned char FAR* out, - unsigned FAR* dist, - unsigned FAR* len) { +static inline unsigned char FAR* chunkunroll_relaxed( + unsigned char FAR* out, + unsigned FAR* dist, + unsigned FAR* len) { const unsigned char FAR* from = out - *dist; while (*dist < *len && *dist < CHUNKCOPY_CHUNK_SIZE) { storechunk(out, loadchunk(from)); @@ -119,15 +160,180 @@ static inline unsigned char FAR* chunkunroll_relaxed(unsigned char FAR* out, return out; } +#if defined(INFLATE_CHUNK_SIMD_NEON) +/* + * v_load64_dup(): load *src as an unaligned 64-bit int and duplicate it in + * every 64-bit component of the 128-bit result (64-bit int splat). + */ +static inline z_vec128i_t v_load64_dup(const void* src) { + return vcombine_u8(vld1_u8(src), vld1_u8(src)); +} + +/* + * v_load32_dup(): load *src as an unaligned 32-bit int and duplicate it in + * every 32-bit component of the 128-bit result (32-bit int splat). + */ +static inline z_vec128i_t v_load32_dup(const void* src) { + int32_t i32; + Z_BUILTIN_MEMCPY(&i32, src, sizeof(i32)); + return vreinterpretq_u8_s32(vdupq_n_s32(i32)); +} + +/* + * v_load16_dup(): load *src as an unaligned 16-bit int and duplicate it in + * every 16-bit component of the 128-bit result (16-bit int splat). + */ +static inline z_vec128i_t v_load16_dup(const void* src) { + int16_t i16; + Z_BUILTIN_MEMCPY(&i16, src, sizeof(i16)); + return vreinterpretq_u8_s16(vdupq_n_s16(i16)); +} + +/* + * v_load8_dup(): load the 8-bit int *src and duplicate it in every 8-bit + * component of the 128-bit result (8-bit int splat). + */ +static inline z_vec128i_t v_load8_dup(const void* src) { + return vld1q_dup_u8((const uint8_t*)src); +} + +/* + * v_store_128(): store the 128-bit vec in a memory destination (that might + * not be 16-byte aligned) void* out. + */ +static inline void v_store_128(void* out, const z_vec128i_t vec) { + vst1q_u8(out, vec); +} + +#elif defined(INFLATE_CHUNK_SIMD_SSE2) +/* + * v_load64_dup(): load *src as an unaligned 64-bit int and duplicate it in + * every 64-bit component of the 128-bit result (64-bit int splat). + */ +static inline z_vec128i_t v_load64_dup(const void* src) { + int64_t i64; + Z_BUILTIN_MEMCPY(&i64, src, sizeof(i64)); + return _mm_set1_epi64x(i64); +} + +/* + * v_load32_dup(): load *src as an unaligned 32-bit int and duplicate it in + * every 32-bit component of the 128-bit result (32-bit int splat). + */ +static inline z_vec128i_t v_load32_dup(const void* src) { + int32_t i32; + Z_BUILTIN_MEMCPY(&i32, src, sizeof(i32)); + return _mm_set1_epi32(i32); +} + /* - Perform a memcpy-like operation, but assume that length is non-zero and that - it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if - the length is shorter than this. + * v_load16_dup(): load *src as an unaligned 16-bit int and duplicate it in + * every 16-bit component of the 128-bit result (16-bit int splat). + */ +static inline z_vec128i_t v_load16_dup(const void* src) { + int16_t i16; + Z_BUILTIN_MEMCPY(&i16, src, sizeof(i16)); + return _mm_set1_epi16(i16); +} - Unlike chunkcopy_core() above, no guarantee is made regarding the behaviour - of overlapping buffers, regardless of the distance between the pointers. - This is reflected in the `restrict`-qualified pointers, allowing the - compiler to reorder loads and stores. +/* + * v_load8_dup(): load the 8-bit int *src and duplicate it in every 8-bit + * component of the 128-bit result (8-bit int splat). + */ +static inline z_vec128i_t v_load8_dup(const void* src) { + return _mm_set1_epi8(*(const char*)src); +} + +/* + * v_store_128(): store the 128-bit vec in a memory destination (that might + * not be 16-byte aligned) void* out. + */ +static inline void v_store_128(void* out, const z_vec128i_t vec) { + _mm_storeu_si128((__m128i*)out, vec); +} +#endif + +/* + * Perform an overlapping copy which behaves as a memset() operation, but + * supporting periods other than one, and assume that length is non-zero and + * that it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE*3 bytes of output + * even if the length is shorter than this. + */ +static inline unsigned char FAR* chunkset_core( + unsigned char FAR* out, + unsigned period, + unsigned len) { + z_vec128i_t v; + const int bump = ((len - 1) % sizeof(v)) + 1; + + switch (period) { + case 1: + v = v_load8_dup(out - 1); + v_store_128(out, v); + out += bump; + len -= bump; + while (len > 0) { + v_store_128(out, v); + out += sizeof(v); + len -= sizeof(v); + } + return out; + case 2: + v = v_load16_dup(out - 2); + v_store_128(out, v); + out += bump; + len -= bump; + if (len > 0) { + v = v_load16_dup(out - 2); + do { + v_store_128(out, v); + out += sizeof(v); + len -= sizeof(v); + } while (len > 0); + } + return out; + case 4: + v = v_load32_dup(out - 4); + v_store_128(out, v); + out += bump; + len -= bump; + if (len > 0) { + v = v_load32_dup(out - 4); + do { + v_store_128(out, v); + out += sizeof(v); + len -= sizeof(v); + } while (len > 0); + } + return out; + case 8: + v = v_load64_dup(out - 8); + v_store_128(out, v); + out += bump; + len -= bump; + if (len > 0) { + v = v_load64_dup(out - 8); + do { + v_store_128(out, v); + out += sizeof(v); + len -= sizeof(v); + } while (len > 0); + } + return out; + } + out = chunkunroll_relaxed(out, &period, &len); + return chunkcopy_core(out, out - period, len); +} + +/* + * Perform a memcpy-like operation, but assume that length is non-zero and that + * it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if + * the length is shorter than this. + * + * Unlike chunkcopy_core() above, no guarantee is made regarding the behaviour + * of overlapping buffers, regardless of the distance between the pointers. + * This is reflected in the `restrict`-qualified pointers, allowing the + * compiler to re-order loads and stores. */ static inline unsigned char FAR* chunkcopy_relaxed( unsigned char FAR* Z_RESTRICT out, @@ -137,17 +343,17 @@ static inline unsigned char FAR* chunkcopy_relaxed( } /* - Like chunkcopy_relaxed, but avoid writing beyond of legal output. - - Unlike chunkcopy_core_safe() above, no guarantee is made regarding the - behaviour of overlapping buffers, regardless of the distance between the - pointers. This is reflected in the `restrict`-qualified pointers, allowing - the compiler to reorder loads and stores. - - Accepts an additional pointer to the end of safe output. A generic safe - copy would use (out + len), but it's normally the case that the end of the - output buffer is beyond the end of the current copy, and this can still be - exploited. + * Like chunkcopy_relaxed(), but avoid writing beyond of legal output. + * + * Unlike chunkcopy_core_safe() above, no guarantee is made regarding the + * behaviour of overlapping buffers, regardless of the distance between the + * pointers. This is reflected in the `restrict`-qualified pointers, allowing + * the compiler to re-order loads and stores. + * + * Accepts an additional pointer to the end of safe output. A generic safe + * copy would use (out + len), but it's normally the case that the end of the + * output buffer is beyond the end of the current copy, and this can still be + * exploited. */ static inline unsigned char FAR* chunkcopy_safe( unsigned char FAR* out, @@ -159,14 +365,16 @@ static inline unsigned char FAR* chunkcopy_safe( } /* - Perform chunky copy within the same buffer, where the source and destination - may potentially overlap. - - Assumes that len > 0 on entry, and that it's safe to write at least - CHUNKCOPY_CHUNK_SIZE*3 bytes to the output. + * Perform chunky copy within the same buffer, where the source and destination + * may potentially overlap. + * + * Assumes that len > 0 on entry, and that it's safe to write at least + * CHUNKCOPY_CHUNK_SIZE*3 bytes to the output. */ -static inline unsigned char FAR* -chunkcopy_lapped_relaxed(unsigned char FAR* out, unsigned dist, unsigned len) { +static inline unsigned char FAR* chunkcopy_lapped_relaxed( + unsigned char FAR* out, + unsigned dist, + unsigned len) { if (dist < len && dist < CHUNKCOPY_CHUNK_SIZE) { return chunkset_core(out, dist, len); } @@ -174,13 +382,13 @@ chunkcopy_lapped_relaxed(unsigned char FAR* out, unsigned dist, unsigned len) { } /* - Behave like chunkcopy_lapped_relaxed, but avoid writing beyond of legal - output. - - Accepts an additional pointer to the end of safe output. A generic safe - copy would use (out + len), but it's normally the case that the end of the - output buffer is beyond the end of the current copy, and this can still be - exploited. + * Behave like chunkcopy_lapped_relaxed(), but avoid writing beyond of legal + * output. + * + * Accepts an additional pointer to the end of safe output. A generic safe + * copy would use (out + len), but it's normally the case that the end of the + * output buffer is beyond the end of the current copy, and this can still be + * exploited. */ static inline unsigned char FAR* chunkcopy_lapped_safe( unsigned char FAR* out, @@ -188,7 +396,7 @@ static inline unsigned char FAR* chunkcopy_lapped_safe( unsigned len, unsigned char FAR* limit) { Assert(out + len <= limit, "chunk copy exceeds safety limit"); - if (limit - out < CHUNKCOPY_CHUNK_SIZE * 3) { + if ((limit - out) < (ptrdiff_t)(3 * CHUNKCOPY_CHUNK_SIZE)) { /* TODO(cavalcantii): try harder to optimise this */ while (len-- > 0) { *out = *(out - dist); @@ -199,6 +407,8 @@ static inline unsigned char FAR* chunkcopy_lapped_safe( return chunkcopy_lapped_relaxed(out, dist, len); } +#undef Z_STATIC_ASSERT #undef Z_RESTRICT +#undef Z_BUILTIN_MEMCPY #endif /* CHUNKCOPY_H */ diff --git a/chromium/third_party/zlib/contrib/optimizations/inffast_chunky.c b/chromium/third_party/zlib/contrib/optimizations/inffast_chunk.c index e2bc735451f..4829d0da4dc 100644 --- a/chromium/third_party/zlib/contrib/optimizations/inffast_chunky.c +++ b/chromium/third_party/zlib/contrib/optimizations/inffast_chunk.c @@ -6,7 +6,7 @@ #include "zutil.h" #include "inftrees.h" #include "inflate.h" -#include "contrib/optimizations/inffast_chunky.h" +#include "contrib/optimizations/inffast_chunk.h" #include "contrib/optimizations/chunkcopy.h" #ifdef ASMINF @@ -52,7 +52,7 @@ requires strm->avail_out >= 258 for each loop to avoid checking for output space. */ -void ZLIB_INTERNAL inflate_fast_chunky(strm, start) +void ZLIB_INTERNAL inflate_fast_chunk_(strm, start) z_streamp strm; unsigned start; /* inflate()'s starting value for strm->avail_out */ { diff --git a/chromium/third_party/zlib/contrib/optimizations/inffast_chunky.h b/chromium/third_party/zlib/contrib/optimizations/inffast_chunk.h index 7f033f2c4ae..80636e75879 100644 --- a/chromium/third_party/zlib/contrib/optimizations/inffast_chunky.h +++ b/chromium/third_party/zlib/contrib/optimizations/inffast_chunk.h @@ -9,4 +9,7 @@ subject to change. Applications should only use zlib.h. */ -void ZLIB_INTERNAL inflate_fast_chunky OF((z_streamp strm, unsigned start)); +// TODO(cblume): incorporate the patch done on crbug.com/764431 here and +// in related files to define and use INFLATE_FAST_MIN_HAVE/_LEFT etc. + +void ZLIB_INTERNAL inflate_fast_chunk_ OF((z_streamp strm, unsigned start)); diff --git a/chromium/third_party/zlib/contrib/optimizations/inflate.c b/chromium/third_party/zlib/contrib/optimizations/inflate.c index 152f1742f3e..d6c5614c879 100644 --- a/chromium/third_party/zlib/contrib/optimizations/inflate.c +++ b/chromium/third_party/zlib/contrib/optimizations/inflate.c @@ -83,8 +83,9 @@ #include "zutil.h" #include "inftrees.h" #include "inflate.h" -#include "contrib/optimizations/inffast_chunky.h" +#include "contrib/optimizations/inffast_chunk.h" #include "contrib/optimizations/chunkcopy.h" +#include "x86.h" #ifdef MAKEFIXED # ifndef BUILDFIXED @@ -202,6 +203,8 @@ int stream_size; int ret; struct inflate_state FAR *state; + x86_check_features(); + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || stream_size != (int)(sizeof(z_stream))) return Z_VERSION_ERROR; @@ -419,7 +422,7 @@ unsigned copy; and is subsequently either overwritten or left deliberately undefined at the end of decode; so there's really no point. */ - memset(state->window + wsize, 0, CHUNKCOPY_CHUNK_SIZE); + zmemzero(state->window + wsize, CHUNKCOPY_CHUNK_SIZE); #endif } @@ -1056,7 +1059,7 @@ int flush; case LEN: if (have >= 6 && left >= 258) { RESTORE(); - inflate_fast_chunky(strm, out); + inflate_fast_chunk_(strm, out); LOAD(); if (state->mode == TYPE) state->back = -1; diff --git a/chromium/third_party/zlib/contrib/tests/fuzzers/BUILD.gn b/chromium/third_party/zlib/contrib/tests/fuzzers/BUILD.gn new file mode 100644 index 00000000000..c46b6644007 --- /dev/null +++ b/chromium/third_party/zlib/contrib/tests/fuzzers/BUILD.gn @@ -0,0 +1,45 @@ +# Copyright 2017 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//testing/libfuzzer/fuzzer_test.gni") + +# root BUILD depends on this target. Needed for package discovery +group("fuzzers") { +} + +fuzzer_test("zlib_uncompress_fuzzer") { + sources = [ + "uncompress_fuzzer.cc", + ] + deps = [ + "../../../:zlib", + ] +} + +fuzzer_test("zlib_inflate_fuzzer") { + sources = [ + "inflate_fuzzer.cc", + ] + deps = [ + "../../../:zlib", + ] +} + +fuzzer_test("zlib_deflate_set_dictionary_fuzzer") { + sources = [ + "deflate_set_dictionary_fuzzer.cc", + ] + deps = [ + "../../../:zlib", + ] +} + +fuzzer_test("zlib_deflate_fuzzer") { + sources = [ + "deflate_fuzzer.cc", + ] + deps = [ + "../../../:zlib", + ] +} diff --git a/chromium/third_party/zlib/crc32.c b/chromium/third_party/zlib/crc32.c index 9162429cc7b..b4ad1e105d6 100644 --- a/chromium/third_party/zlib/crc32.c +++ b/chromium/third_party/zlib/crc32.c @@ -30,6 +30,7 @@ #include "deflate.h" #include "x86.h" +#include "crc32_simd.h" #include "zutil.h" /* for STDC and FAR definitions */ /* Definitions for doing the crc four data bytes at a time. */ @@ -241,6 +242,32 @@ unsigned long ZEXPORT crc32(crc, buf, len) const unsigned char FAR *buf; uInt len; { +#if defined(CRC32_SIMD_SSE42_PCLMUL) + /* + * Use x86 sse4.2+pclmul SIMD to compute the crc32. Since this + * routine can be freely used, check the CPU features here, to + * stop TSAN complaining about thread data races accessing the + * x86_cpu_enable_simd feature variable below. + */ + if (buf == Z_NULL) { + if (!len) /* Assume user is calling crc32(0, NULL, 0); */ + x86_check_features(); + return 0UL; + } + + if (x86_cpu_enable_simd && len >= Z_CRC32_SSE42_MINIMUM_LENGTH) { + /* crc32 16-byte chunks */ + uInt chunk_size = len & ~Z_CRC32_SSE42_CHUNKSIZE_MASK; + crc = ~crc32_sse42_simd_(buf, chunk_size, ~(uint32_t)crc); + /* check remaining data */ + len -= chunk_size; + if (!len) + return crc; + /* Fall into the default crc32 for the remaining data. */ + buf += chunk_size; + } +#endif /* CRC32_SIMD_SSE42_PCLMUL */ + return crc32_z(crc, buf, len); } diff --git a/chromium/third_party/zlib/crc32_simd.c b/chromium/third_party/zlib/crc32_simd.c new file mode 100644 index 00000000000..c2d42556e1a --- /dev/null +++ b/chromium/third_party/zlib/crc32_simd.c @@ -0,0 +1,157 @@ +/* crc32_simd.c + * + * Copyright 2017 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the Chromium source repository LICENSE file. + */ + +#include "crc32_simd.h" + +#if defined(CRC32_SIMD_SSE42_PCLMUL) + +/* + * crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer + * length must be at least 64, and a multiple of 16. Based on: + * + * "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" + * V. Gopal, E. Ozturk, et al., 2009, http://intel.ly/2ySEwL0 + */ + +#include <emmintrin.h> +#include <smmintrin.h> +#include <wmmintrin.h> + +uint32_t ZLIB_INTERNAL crc32_sse42_simd_( /* SSE4.2+PCLMUL */ + const unsigned char *buf, + z_size_t len, + uint32_t crc) +{ + /* + * Definitions of the bit-reflected domain constants k1,k2,k3, etc and + * the CRC32+Barrett polynomials given at the end of the paper. + */ + static const uint64_t zalign(16) k1k2[] = { 0x0154442bd4, 0x01c6e41596 }; + static const uint64_t zalign(16) k3k4[] = { 0x01751997d0, 0x00ccaa009e }; + static const uint64_t zalign(16) k5k0[] = { 0x0163cd6124, 0x0000000000 }; + static const uint64_t zalign(16) poly[] = { 0x01db710641, 0x01f7011641 }; + + __m128i x0, x1, x2, x3, x4, x5, x6, x7, x8, y5, y6, y7, y8; + + /* + * There's at least one block of 64. + */ + x1 = _mm_loadu_si128((__m128i *)(buf + 0x00)); + x2 = _mm_loadu_si128((__m128i *)(buf + 0x10)); + x3 = _mm_loadu_si128((__m128i *)(buf + 0x20)); + x4 = _mm_loadu_si128((__m128i *)(buf + 0x30)); + + x1 = _mm_xor_si128(x1, _mm_cvtsi32_si128(crc)); + + x0 = _mm_load_si128((__m128i *)k1k2); + + buf += 64; + len -= 64; + + /* + * Parallel fold blocks of 64, if any. + */ + while (len >= 64) + { + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x6 = _mm_clmulepi64_si128(x2, x0, 0x00); + x7 = _mm_clmulepi64_si128(x3, x0, 0x00); + x8 = _mm_clmulepi64_si128(x4, x0, 0x00); + + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x2 = _mm_clmulepi64_si128(x2, x0, 0x11); + x3 = _mm_clmulepi64_si128(x3, x0, 0x11); + x4 = _mm_clmulepi64_si128(x4, x0, 0x11); + + y5 = _mm_loadu_si128((__m128i *)(buf + 0x00)); + y6 = _mm_loadu_si128((__m128i *)(buf + 0x10)); + y7 = _mm_loadu_si128((__m128i *)(buf + 0x20)); + y8 = _mm_loadu_si128((__m128i *)(buf + 0x30)); + + x1 = _mm_xor_si128(x1, x5); + x2 = _mm_xor_si128(x2, x6); + x3 = _mm_xor_si128(x3, x7); + x4 = _mm_xor_si128(x4, x8); + + x1 = _mm_xor_si128(x1, y5); + x2 = _mm_xor_si128(x2, y6); + x3 = _mm_xor_si128(x3, y7); + x4 = _mm_xor_si128(x4, y8); + + buf += 64; + len -= 64; + } + + /* + * Fold into 128-bits. + */ + x0 = _mm_load_si128((__m128i *)k3k4); + + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x1 = _mm_xor_si128(x1, x2); + x1 = _mm_xor_si128(x1, x5); + + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x1 = _mm_xor_si128(x1, x3); + x1 = _mm_xor_si128(x1, x5); + + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x1 = _mm_xor_si128(x1, x4); + x1 = _mm_xor_si128(x1, x5); + + /* + * Single fold blocks of 16, if any. + */ + while (len >= 16) + { + x2 = _mm_loadu_si128((__m128i *)buf); + + x5 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_clmulepi64_si128(x1, x0, 0x11); + x1 = _mm_xor_si128(x1, x2); + x1 = _mm_xor_si128(x1, x5); + + buf += 16; + len -= 16; + } + + /* + * Fold 128-bits to 64-bits. + */ + x2 = _mm_clmulepi64_si128(x1, x0, 0x10); + x3 = _mm_set_epi32(0, ~0, 0, ~0); + x1 = _mm_srli_si128(x1, 8); + x1 = _mm_xor_si128(x1, x2); + + x0 = _mm_loadl_epi64((__m128i*)k5k0); + + x2 = _mm_srli_si128(x1, 4); + x1 = _mm_and_si128(x1, x3); + x1 = _mm_clmulepi64_si128(x1, x0, 0x00); + x1 = _mm_xor_si128(x1, x2); + + /* + * Barret reduce to 32-bits. + */ + x0 = _mm_load_si128((__m128i*)poly); + + x2 = _mm_and_si128(x1, x3); + x2 = _mm_clmulepi64_si128(x2, x0, 0x10); + x2 = _mm_and_si128(x2, x3); + x2 = _mm_clmulepi64_si128(x2, x0, 0x00); + x1 = _mm_xor_si128(x1, x2); + + /* + * Return the crc32. + */ + return _mm_extract_epi32(x1, 1); +} + +#endif /* CRC32_SIMD_SSE42_PCLMUL */ diff --git a/chromium/third_party/zlib/crc32_simd.h b/chromium/third_party/zlib/crc32_simd.h new file mode 100644 index 00000000000..4e6f3268e03 --- /dev/null +++ b/chromium/third_party/zlib/crc32_simd.h @@ -0,0 +1,27 @@ +/* crc32_simd.h + * + * Copyright 2017 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the Chromium source repository LICENSE file. + */ + +#include <stdint.h> + +#include "zconf.h" +#include "zutil.h" + +/* + * crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer + * length must be at least 64, and a multiple of 16. + */ +uint32_t ZLIB_INTERNAL crc32_sse42_simd_( + const unsigned char *buf, + z_size_t len, + uint32_t crc); + +/* + * crc32_sse42_simd_ buffer size constraints: see the use in zlib/crc32.c + * for computing the crc32 of an arbitrary length buffer. + */ +#define Z_CRC32_SSE42_MINIMUM_LENGTH 64 +#define Z_CRC32_SSE42_CHUNKSIZE_MASK 15 diff --git a/chromium/third_party/zlib/deflate.c b/chromium/third_party/zlib/deflate.c index aa0c9c67a6d..6fe9c7e09de 100644 --- a/chromium/third_party/zlib/deflate.c +++ b/chromium/third_party/zlib/deflate.c @@ -87,7 +87,7 @@ local block_state deflate_huff OF((deflate_state *s, int flush)); local void lm_init OF((deflate_state *s)); local void putShortMSB OF((deflate_state *s, uInt b)); local void flush_pending OF((z_streamp strm)); -unsigned ZLIB_INTERNAL read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); +unsigned ZLIB_INTERNAL deflate_read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); #ifdef ASMV # pragma message("Assembler code may have bugs -- use at your own risk") void match_init OF((void)); /* asm code initialization */ @@ -429,7 +429,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) /* when using zlib wrappers, compute Adler-32 for provided dictionary */ if (wrap == 1) strm->adler = adler32(strm->adler, dictionary, dictLength); - s->wrap = 0; /* avoid computing Adler-32 in read_buf */ + s->wrap = 0; /* avoid computing Adler-32 in deflate_read_buf */ /* if dictionary would fill window, just replace the history */ if (dictLength >= s->w_size) { @@ -756,7 +756,7 @@ local void putShortMSB (s, b) * Flush as much pending output as possible. All deflate() output, except for * some deflate_stored() output, goes through this function so some * applications may wish to modify it to avoid allocating a large - * strm->next_out buffer and copying into it. (See also read_buf()). + * strm->next_out buffer and copying into it. (See also deflate_read_buf()). */ local void flush_pending(strm) z_streamp strm; @@ -1193,7 +1193,7 @@ int ZEXPORT deflateCopy (dest, source) * allocating a large strm->next_in buffer and copying from it. * (See also flush_pending()). */ -ZLIB_INTERNAL unsigned read_buf(strm, buf, size) +ZLIB_INTERNAL unsigned deflate_read_buf(strm, buf, size) z_streamp strm; Bytef *buf; unsigned size; @@ -1576,7 +1576,7 @@ local void fill_window_c(s) */ Assert(more >= 2, "more < 2"); - n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + n = deflate_read_buf(s->strm, s->window + s->strstart + s->lookahead, more); s->lookahead += n; /* Initialize the hash value now that we have some input: */ @@ -1765,7 +1765,7 @@ local block_state deflate_stored(s, flush) * the check value. */ if (len) { - read_buf(s->strm, s->strm->next_out, len); + deflate_read_buf(s->strm, s->strm->next_out, len); s->strm->next_out += len; s->strm->avail_out -= len; s->strm->total_out += len; @@ -1828,7 +1828,7 @@ local block_state deflate_stored(s, flush) if (have > s->strm->avail_in) have = s->strm->avail_in; if (have) { - read_buf(s->strm, s->window + s->strstart, have); + deflate_read_buf(s->strm, s->window + s->strstart, have); s->strstart += have; } if (s->high_water < s->strstart) diff --git a/chromium/third_party/zlib/fill_window_sse.c b/chromium/third_party/zlib/fill_window_sse.c index 949ccce1ba9..ed1e5d1d673 100644 --- a/chromium/third_party/zlib/fill_window_sse.c +++ b/chromium/third_party/zlib/fill_window_sse.c @@ -26,7 +26,7 @@ }\ }\ -extern int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); +extern int deflate_read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); void fill_window_sse(deflate_state *s) { @@ -117,7 +117,9 @@ void fill_window_sse(deflate_state *s) */ Assert(more >= 2, "more < 2"); - n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + n = deflate_read_buf(s->strm, + s->window + s->strstart + s->lookahead, + more); s->lookahead += n; /* Initialize the hash value now that we have some input: */ diff --git a/chromium/third_party/zlib/names.h b/chromium/third_party/zlib/names.h index cd98ec9940b..6252b02c302 100644 --- a/chromium/third_party/zlib/names.h +++ b/chromium/third_party/zlib/names.h @@ -160,7 +160,7 @@ #define crc_fold_init Cr_z_crc_fold_init #define crc_reset Cr_z_crc_reset #define fill_window_sse Cr_z_fill_window_sse -#define read_buf Cr_z_read_buf +#define deflate_read_buf Cr_z_deflate_read_buf #define x86_check_features Cr_z_x86_check_features /* FIXME: x86_cpu_enable_ssse3 wasn't part of the simd.patch */ #define x86_cpu_enable_ssse3 Cr_z_x86_cpu_enable_ssse3 @@ -171,4 +171,14 @@ #define adler32_simd_ Cr_z_adler32_simd_ #endif +#if defined(INFLATE_CHUNK_SIMD_SSE2) || defined(INFLATE_CHUNK_SIMD_NEON) +/* Symbols added by contrib/optimizations/inffast_chunk */ +#define inflate_fast_chunk_ Cr_z_inflate_fast_chunk_ +#endif + +#if defined(CRC32_SIMD_SSE42_PCLMUL) +/* Symbols added by crc32_simd.c */ +#define crc32_sse42_simd_ Cr_z_crc32_sse42_simd_ +#endif + #endif /* THIRD_PARTY_ZLIB_NAMES_H_ */ diff --git a/chromium/third_party/zlib/patches/0001-simd.patch b/chromium/third_party/zlib/patches/0001-simd.patch index 75828d26ea5..1fbf1956f66 100644 --- a/chromium/third_party/zlib/patches/0001-simd.patch +++ b/chromium/third_party/zlib/patches/0001-simd.patch @@ -559,7 +559,7 @@ index 1ec761448de9..aa0c9c67a6dc 100644 local void putShortMSB OF((deflate_state *s, uInt b)); local void flush_pending OF((z_streamp strm)); -local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); -+unsigned ZLIB_INTERNAL read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); ++unsigned ZLIB_INTERNAL deflate_read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); #ifdef ASMV # pragma message("Assembler code may have bugs -- use at your own risk") void match_init OF((void)); /* asm code initialization */ @@ -703,7 +703,7 @@ index 1ec761448de9..aa0c9c67a6dc 100644 * (See also flush_pending()). */ -local unsigned read_buf(strm, buf, size) -+ZLIB_INTERNAL unsigned read_buf(strm, buf, size) ++ZLIB_INTERNAL unsigned deflate_read_buf(strm, buf, size) z_streamp strm; Bytef *buf; unsigned size; @@ -859,7 +859,7 @@ new file mode 100644 index 000000000000..949ccce1ba9c --- /dev/null +++ b/fill_window_sse.c -@@ -0,0 +1,175 @@ +@@ -0,0 +1,177 @@ +/* + * Fill Window with SSE2-optimized hash shifting + * @@ -888,7 +888,7 @@ index 000000000000..949ccce1ba9c + }\ + }\ + -+extern int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); ++extern int deflate_read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); + +void fill_window_sse(deflate_state *s) +{ @@ -979,7 +979,9 @@ index 000000000000..949ccce1ba9c + */ + Assert(more >= 2, "more < 2"); + -+ n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); ++ n = deflate_read_buf(s->strm, ++ s->window + s->strstart + s->lookahead, ++ more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ @@ -1051,7 +1053,7 @@ index f18df5684dc5..3436baa4eb57 100644 +#define crc_fold_init Cr_z_crc_fold_init +#define crc_reset Cr_z_crc_reset +#define fill_window_sse Cr_z_fill_window_sse -+#define read_buf Cr_z_read_buf ++#define deflate_read_buf Cr_z_deflate_read_buf +#define x86_check_features Cr_z_x86_check_features +#define x86_cpu_enable_simd Cr_z_x86_cpu_enable_simd + diff --git a/chromium/third_party/zlib/patches/0005-adler32-simd.patch b/chromium/third_party/zlib/patches/0005-adler32-simd.patch index 7034389eb38..9242b1d76de 100644 --- a/chromium/third_party/zlib/patches/0005-adler32-simd.patch +++ b/chromium/third_party/zlib/patches/0005-adler32-simd.patch @@ -603,7 +603,7 @@ index 3436baa4eb57..cd98ec9940b6 100644 +++ b/third_party/zlib/names.h @@ -162,6 +162,13 @@ #define fill_window_sse Cr_z_fill_window_sse - #define read_buf Cr_z_read_buf + #define deflate_read_buf Cr_z_deflate_read_buf #define x86_check_features Cr_z_x86_check_features +/* FIXME: x86_cpu_enable_ssse3 wasn't part of the simd.patch */ +#define x86_cpu_enable_ssse3 Cr_z_x86_cpu_enable_ssse3 |