diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2019-05-24 11:40:17 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2019-05-24 12:42:11 +0000 |
commit | 5d87695f37678f96492b258bbab36486c59866b4 (patch) | |
tree | be9783bbaf04fb930c4d74ca9c00b5e7954c8bc6 /chromium/third_party/skia/third_party | |
parent | 6c11fb357ec39bf087b8b632e2b1e375aef1b38b (diff) | |
download | qtwebengine-chromium-5d87695f37678f96492b258bbab36486c59866b4.tar.gz |
BASELINE: Update Chromium to 75.0.3770.56
Change-Id: I86d2007fd27a45d5797eee06f4c9369b8b50ac4f
Reviewed-by: Alexandru Croitor <alexandru.croitor@qt.io>
Diffstat (limited to 'chromium/third_party/skia/third_party')
9 files changed, 250 insertions, 115 deletions
diff --git a/chromium/third_party/skia/third_party/gif/SkGifImageReader.cpp b/chromium/third_party/skia/third_party/gif/SkGifImageReader.cpp index 0cb77bab792..348456104e6 100644 --- a/chromium/third_party/skia/third_party/gif/SkGifImageReader.cpp +++ b/chromium/third_party/skia/third_party/gif/SkGifImageReader.cpp @@ -211,10 +211,9 @@ void SkGIFLZWContext::outputRow(const unsigned char* rowBegin) // Otherwise, decoding failed; returns false in this case, which will always cause the SkGifImageReader to set the "decode failed" flag. bool SkGIFLZWContext::doLZW(const unsigned char* block, size_t bytesInBlock) { - const int width = m_frameContext->width(); - if (rowIter == rowBuffer.end()) return true; + const int width = m_frameContext->width(); for (const unsigned char* ch = block; bytesInBlock-- > 0; ch++) { // Feed the next byte into the decoder's 32-bit input buffer. @@ -246,40 +245,71 @@ bool SkGIFLZWContext::doLZW(const unsigned char* block, size_t bytesInBlock) } const int tempCode = code; - unsigned short codeLength = 0; - if (code < avail) { - // This is a pre-existing code, so we already know what it - // encodes. - codeLength = suffixLength[code]; - rowIter += codeLength; - } else if (code == avail && oldcode != -1) { - // This is a new code just being added to the dictionary. - // It must encode the contents of the previous code, plus - // the first character of the previous code again. - codeLength = suffixLength[oldcode] + 1; - rowIter += codeLength; - *--rowIter = firstchar; - code = oldcode; - } else { + if (code > avail) { // This is an invalid code. The dictionary is just initialized // and the code is incomplete. We don't know how to handle // this case. return false; } - while (code >= clearCode) { - *--rowIter = suffix[code]; + if (code == avail) { + if (oldcode != -1) { + // This is a new code just being added to the dictionary. + // It must encode the contents of the previous code, plus + // the first character of the previous code again. + // Now we know avail is the new code we can use oldcode + // value to get the code related to that. + code = oldcode; + } else { + // This is an invalid code. The dictionary is just initialized + // and the code is incomplete. We don't know how to handle + // this case. + return false; + } + } + + // code length of the oldcode for new code which is + // avail = oldcode + firstchar of the oldcode + int remaining = suffixLength[code]; + + // Round remaining up to multiple of SK_DICTIONARY_WORD_SIZE, because that's + // the granularity of the chunks we copy. The last chunk may contain + // some garbage but it'll be overwritten by the next code or left unused. + // The working buffer is padded to account for this. + remaining += -remaining & (SK_DICTIONARY_WORD_SIZE - 1) ; + unsigned char* p = rowIter + remaining; + + // Place rowIter so that after writing pixels rowIter can be set to firstchar, thereby + // completing the code. + rowIter += suffixLength[code]; + + while (remaining > 0) { + p -= SK_DICTIONARY_WORD_SIZE; + std::copy_n(suffix[code].begin(), SK_DICTIONARY_WORD_SIZE, p); code = prefix[code]; + remaining -= SK_DICTIONARY_WORD_SIZE; } + const int firstchar = static_cast<unsigned char>(code); // (strictly `suffix[code][0]`) - *--rowIter = firstchar = suffix[code]; + // This completes the new code avail and writing the corresponding + // pixels on target. + if (tempCode == avail) { + *rowIter++ = firstchar; + } // Define a new codeword in the dictionary as long as we've read // more than one value from the stream. if (avail < SK_MAX_DICTIONARY_ENTRIES && oldcode != -1) { - prefix[avail] = oldcode; - suffix[avail] = firstchar; - suffixLength[avail] = suffixLength[oldcode] + 1; + // now add avail to the dictionary for future reference + unsigned short codeLength = suffixLength[oldcode] + 1; + int l = (codeLength - 1) & (SK_DICTIONARY_WORD_SIZE - 1); + // If the suffix buffer is full (l == 0) then oldcode becomes the new + // prefix, otherwise copy and extend oldcode's buffer and use the same + // prefix as oldcode used. + prefix[avail] = (l == 0) ? oldcode : prefix[oldcode]; + suffix[avail] = suffix[oldcode]; + suffix[avail][l] = firstchar; + suffixLength[avail] = codeLength; ++avail; // If we've used up all the codewords of a given length @@ -291,7 +321,6 @@ bool SkGIFLZWContext::doLZW(const unsigned char* block, size_t bytesInBlock) } } oldcode = tempCode; - rowIter += codeLength; // Output as many rows as possible. unsigned char* rowBegin = rowBuffer.begin(); @@ -905,20 +934,24 @@ bool SkGIFLZWContext::prepareToDecode() // the longest sequence (SK_MAX_DICTIONARY_ENTIRES + 1) - 2 values long. Since // each value is a byte, this is also the number of bytes in the longest // encodable sequence. - const size_t maxBytes = SK_MAX_DICTIONARY_ENTRIES - 1; + constexpr size_t kMaxSequence = SK_MAX_DICTIONARY_ENTRIES - 1; + constexpr size_t kMaxBytes = (kMaxSequence + SK_DICTIONARY_WORD_SIZE - 1) + & ~(SK_DICTIONARY_WORD_SIZE - 1); // Now allocate the output buffer. We decode directly into this buffer // until we have at least one row worth of data, then call outputRow(). // This means worst case we may have (row width - 1) bytes in the buffer - // and then decode a sequence |maxBytes| long to append. - rowBuffer.reset(m_frameContext->width() - 1 + maxBytes); + // and then decode a sequence |kMaxBytes| long to append. + rowBuffer.reset(m_frameContext->width() - 1 + kMaxBytes); rowIter = rowBuffer.begin(); rowsRemaining = m_frameContext->height(); // Clearing the whole suffix table lets us be more tolerant of bad data. for (int i = 0; i < clearCode; ++i) { - suffix[i] = i; + std::fill_n(suffix[i].begin(), SK_DICTIONARY_WORD_SIZE, 0); + suffix[i][0] = i; suffixLength[i] = 1; + prefix[i] = i; // ensure that we have a place to find firstchar } return true; } diff --git a/chromium/third_party/skia/third_party/gif/SkGifImageReader.h b/chromium/third_party/skia/third_party/gif/SkGifImageReader.h index a5cb26cc1a1..7ff424a1b70 100644 --- a/chromium/third_party/skia/third_party/gif/SkGifImageReader.h +++ b/chromium/third_party/skia/third_party/gif/SkGifImageReader.h @@ -60,6 +60,7 @@ typedef SkTArray<unsigned char, true> SkGIFRow; #define SK_MAX_DICTIONARY_ENTRIES 4096 // 2^SK_MAX_DICTIONARY_ENTRY_BITS #define SK_MAX_COLORS 256 #define SK_BYTES_PER_COLORMAP_ENTRY 3 +#define SK_DICTIONARY_WORD_SIZE 8 // List of possible parsing states. enum SkGIFState { @@ -97,7 +98,6 @@ public: , clearCode(0) , avail(0) , oldcode(0) - , firstchar(0) , bits(0) , datum(0) , ipass(0) @@ -120,7 +120,6 @@ private: int clearCode; // Codeword used to trigger dictionary reset. int avail; // Index of next available slot in dictionary. int oldcode; - unsigned char firstchar; int bits; // Number of unread bits in "datum". int datum; // 32-bit input buffer. int ipass; // Interlace pass; Ranges 1-4 if interlaced. @@ -128,7 +127,8 @@ private: size_t rowsRemaining; // Rows remaining to be output. unsigned short prefix[SK_MAX_DICTIONARY_ENTRIES]; - unsigned char suffix[SK_MAX_DICTIONARY_ENTRIES]; + std::array<std::array<unsigned char, SK_DICTIONARY_WORD_SIZE>, + SK_MAX_DICTIONARY_ENTRIES> suffix; unsigned short suffixLength[SK_MAX_DICTIONARY_ENTRIES]; SkGIFRow rowBuffer; // Single scanline temporary buffer. unsigned char* rowIter; diff --git a/chromium/third_party/skia/third_party/harfbuzz/BUILD.gn b/chromium/third_party/skia/third_party/harfbuzz/BUILD.gn index e657af0f98a..4718c8c639b 100644 --- a/chromium/third_party/skia/third_party/harfbuzz/BUILD.gn +++ b/chromium/third_party/skia/third_party/harfbuzz/BUILD.gn @@ -26,7 +26,6 @@ if (skia_use_system_harfbuzz) { "HAVE_ICU", "HAVE_ICU_BUILTIN", "HAVE_OT", - "HB_NO_MT", ] deps = [ "//third_party/icu", diff --git a/chromium/third_party/skia/third_party/imgui/BUILD.gn b/chromium/third_party/skia/third_party/imgui/BUILD.gn index 8dd3a4391e7..94ce3bf4b85 100644 --- a/chromium/third_party/skia/third_party/imgui/BUILD.gn +++ b/chromium/third_party/skia/third_party/imgui/BUILD.gn @@ -16,5 +16,6 @@ third_party("imgui") { "../externals/imgui/imgui_demo.cpp", "../externals/imgui/imgui_draw.cpp", "../externals/imgui/imgui_widgets.cpp", + "../externals/imgui/misc/cpp/imgui_stdlib.cpp", ] } diff --git a/chromium/third_party/skia/third_party/libpng/BUILD.gn b/chromium/third_party/skia/third_party/libpng/BUILD.gn index 2b2d72a0e64..938e723b373 100644 --- a/chromium/third_party/skia/third_party/libpng/BUILD.gn +++ b/chromium/third_party/skia/third_party/libpng/BUILD.gn @@ -46,7 +46,14 @@ if (skia_use_system_libpng) { sources += [ "../externals/libpng/arm/arm_init.c", "../externals/libpng/arm/filter_neon_intrinsics.c", + "../externals/libpng/arm/palette_neon_intrinsics.c", ] + if (is_clang && is_win) { + # TODO(mtklein): libpng includes arm64_neon.h in clang-cl.exe builds, + # where it should only do that in cl.exe builds, continuing to use + # arm_neon.h with Clang. Until that's fixed, NEON code won't build. + defines += [ "PNG_ARM_NEON_OPT=0" ] + } } if (current_cpu == "x86" || current_cpu == "x64") { diff --git a/chromium/third_party/skia/third_party/skcms/skcms.cc b/chromium/third_party/skia/third_party/skcms/skcms.cc index b965c563148..3d97dd198d1 100644 --- a/chromium/third_party/skia/third_party/skcms/skcms.cc +++ b/chromium/third_party/skia/third_party/skcms/skcms.cc @@ -17,6 +17,19 @@ #include <arm_neon.h> #elif defined(__SSE__) #include <immintrin.h> + + #if defined(__clang__) + // That #include <immintrin.h> is usually enough, but Clang's headers + // "helpfully" skip including the whole kitchen sink when _MSC_VER is + // defined, because lots of programs on Windows would include that and + // it'd be a lot slower. But we want all those headers included so we + // can use their features after runtime checks later. + #include <smmintrin.h> + #include <avxintrin.h> + #include <avx2intrin.h> + #include <avx512fintrin.h> + #include <avx512dqintrin.h> + #endif #endif // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others. @@ -1709,7 +1722,7 @@ bool skcms_ApproximateCurve(const skcms_Curve* curve, int mid = (L + N) / 2; float mid_x = mid / (N - 1.0f); float mid_y = eval_curve(curve, mid_x); - tf.g = log2f_(mid_y) / log2f_(mid_x);; + tf.g = log2f_(mid_y) / log2f_(mid_x); tf.a = 1; tf.b = 0; tf.e = tf.c*tf.d + tf.f @@ -1864,80 +1877,127 @@ namespace baseline { #if !defined(SKCMS_PORTABLE) && \ (( defined(__clang__) && __clang_major__ >= 5) || \ (!defined(__clang__) && defined(__GNUC__))) \ - && defined(__x86_64__) && !defined(__AVX2__) - - #if defined(__clang__) - #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function) - #elif defined(__GNUC__) - #pragma GCC push_options - #pragma GCC target("avx2,f16c") - #endif - - namespace hsw { - #define USING_AVX - #define USING_AVX_F16C - #define USING_AVX2 - #define N 8 - using F = Vec<N,float>; - using I32 = Vec<N,int32_t>; - using U64 = Vec<N,uint64_t>; - using U32 = Vec<N,uint32_t>; - using U16 = Vec<N,uint16_t>; - using U8 = Vec<N,uint8_t>; - - #include "src/Transform_inl.h" + && defined(__x86_64__) + + #if !defined(__AVX2__) + #if defined(__clang__) + #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function) + #elif defined(__GNUC__) + #pragma GCC push_options + #pragma GCC target("avx2,f16c") + #endif + + namespace hsw { + #define USING_AVX + #define USING_AVX_F16C + #define USING_AVX2 + #define N 8 + using F = Vec<N,float>; + using I32 = Vec<N,int32_t>; + using U64 = Vec<N,uint64_t>; + using U32 = Vec<N,uint32_t>; + using U16 = Vec<N,uint16_t>; + using U8 = Vec<N,uint8_t>; + + #include "src/Transform_inl.h" + + // src/Transform_inl.h will undefine USING_* for us. + #undef N + } - // src/Transform_inl.h will undefine USING_* for us. - #undef N - } + #if defined(__clang__) + #pragma clang attribute pop + #elif defined(__GNUC__) + #pragma GCC pop_options + #endif - #if defined(__clang__) - #pragma clang attribute pop - #elif defined(__GNUC__) - #pragma GCC pop_options + #define TEST_FOR_HSW #endif - #define TEST_FOR_HSW - - static bool hsw_ok() { - static const bool ok = []{ - // See http://www.sandpile.org/x86/cpuid.htm - - // First, a basic cpuid(1). - uint32_t eax, ebx, ecx, edx; - __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) - : "0"(1), "2"(0)); - - // Sanity check for prerequisites. - if ((edx & (1<<25)) != (1<<25)) { return false; } // SSE - if ((edx & (1<<26)) != (1<<26)) { return false; } // SSE2 - if ((ecx & (1<< 0)) != (1<< 0)) { return false; } // SSE3 - if ((ecx & (1<< 9)) != (1<< 9)) { return false; } // SSSE3 - if ((ecx & (1<<19)) != (1<<19)) { return false; } // SSE4.1 - if ((ecx & (1<<20)) != (1<<20)) { return false; } // SSE4.2 - - if ((ecx & (3<<26)) != (3<<26)) { return false; } // XSAVE + OSXSAVE - - { - uint32_t eax_xgetbv, edx_xgetbv; - __asm__ __volatile__("xgetbv" : "=a"(eax_xgetbv), "=d"(edx_xgetbv) : "c"(0)); - if ((eax_xgetbv & (3<<1)) != (3<<1)) { return false; } // XMM+YMM state saved? - } - - if ((ecx & (1<<28)) != (1<<28)) { return false; } // AVX - if ((ecx & (1<<29)) != (1<<29)) { return false; } // F16C - if ((ecx & (1<<12)) != (1<<12)) { return false; } // FMA (TODO: not currently used) + #if !defined(__AVX512F__) + #if defined(__clang__) + #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function) + #elif defined(__GNUC__) + #pragma GCC push_options + #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl") + #endif + + namespace skx { + #define USING_AVX512F + #define N 16 + using F = Vec<N,float>; + using I32 = Vec<N,int32_t>; + using U64 = Vec<N,uint64_t>; + using U32 = Vec<N,uint32_t>; + using U16 = Vec<N,uint16_t>; + using U8 = Vec<N,uint8_t>; + + #include "src/Transform_inl.h" + + // src/Transform_inl.h will undefine USING_* for us. + #undef N + } - // Call cpuid(7) to check for our final AVX2 feature bit! - __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) - : "0"(7), "2"(0)); - if ((ebx & (1<< 5)) != (1<< 5)) { return false; } // AVX2 + #if defined(__clang__) + #pragma clang attribute pop + #elif defined(__GNUC__) + #pragma GCC pop_options + #endif - return true; - }(); + #define TEST_FOR_SKX + #endif - return ok; - } + #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX) + enum class CpuType { None, HSW, SKX }; + static CpuType cpu_type() { + static const CpuType type = []{ + // See http://www.sandpile.org/x86/cpuid.htm + + // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX. + uint32_t eax, ebx, ecx, edx; + __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "0"(1), "2"(0)); + if ((edx & (1u<<25)) && // SSE + (edx & (1u<<26)) && // SSE2 + (ecx & (1u<< 0)) && // SSE3 + (ecx & (1u<< 9)) && // SSSE3 + (ecx & (1u<<12)) && // FMA (N.B. not used, avoided even) + (ecx & (1u<<19)) && // SSE4.1 + (ecx & (1u<<20)) && // SSE4.2 + (ecx & (1u<<26)) && // XSAVE + (ecx & (1u<<27)) && // OSXSAVE + (ecx & (1u<<28)) && // AVX + (ecx & (1u<<29))) { // F16C + + // Call cpuid(7) to check for AVX2 and AVX-512 bits. + __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "0"(7), "2"(0)); + // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved. + uint32_t xcr0, dont_need_edx; + __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0)); + + if ((xcr0 & (1u<<1)) && // XMM register state saved? + (xcr0 & (1u<<2)) && // YMM register state saved? + (ebx & (1u<<5))) { // AVX2 + // At this point we're at least HSW. Continue checking for SKX. + if ((xcr0 & (1u<< 5)) && // Opmasks state saved? + (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved? + (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved? + (ebx & (1u<<16)) && // AVX512F + (ebx & (1u<<17)) && // AVX512DQ + (ebx & (1u<<28)) && // AVX512CD + (ebx & (1u<<30)) && // AVX512BW + (ebx & (1u<<31))) { // AVX512VL + return CpuType::SKX; + } + return CpuType::HSW; + } + } + return CpuType::None; + }(); + return type; + } + #endif #endif @@ -1984,6 +2044,8 @@ static size_t bytes_per_pixel(skcms_PixelFormat fmt) { case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8; case skcms_PixelFormat_RGB_161616BE >> 1: return 6; case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8; + case skcms_PixelFormat_RGB_hhh_Norm >> 1: return 6; + case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: return 8; case skcms_PixelFormat_RGB_hhh >> 1: return 6; case skcms_PixelFormat_RGBA_hhhh >> 1: return 8; case skcms_PixelFormat_RGB_fff >> 1: return 12; @@ -2083,6 +2145,8 @@ bool skcms_TransformWithPalette(const void* src, case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break; case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_load_161616BE; break; case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break; + case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_load_hhh; break; + case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_load_hhhh; break; case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break; case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break; case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break; @@ -2092,6 +2156,10 @@ bool skcms_TransformWithPalette(const void* src, *args++ = palette; break; } + if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm || + srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) { + *ops++ = Op_clamp; + } if (srcFmt & 1) { *ops++ = Op_swap_rb; } @@ -2213,8 +2281,8 @@ bool skcms_TransformWithPalette(const void* src, if (!is_identity_tf(&inv_dst_tf_b)) { *ops++ = Op_tf_b; *args++ = &inv_dst_tf_b; } } - // Clamp here before premul to make sure we're clamping to fixed-point values _and_ gamut, - // not just to values that fit in the fixed point representation. + // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut, + // not just to values that fit in [0,1]. // // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5), // but would be carrying r > 1, which is really unexpected for downstream consumers. @@ -2242,6 +2310,8 @@ bool skcms_TransformWithPalette(const void* src, case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break; case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_store_161616BE; break; case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break; + case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_store_hhh; break; + case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_store_hhhh; break; case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break; case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break; case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break; @@ -2250,7 +2320,18 @@ bool skcms_TransformWithPalette(const void* src, auto run = baseline::run_program; #if defined(TEST_FOR_HSW) - if (hsw_ok()) { run = hsw::run_program; } + switch (cpu_type()) { + case CpuType::None: break; + case CpuType::HSW: run = hsw::run_program; break; + case CpuType::SKX: run = hsw::run_program; break; + } +#endif +#if defined(TEST_FOR_SKX) + switch (cpu_type()) { + case CpuType::None: break; + case CpuType::HSW: break; + case CpuType::SKX: run = skx::run_program; break; + } #endif run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp); return true; diff --git a/chromium/third_party/skia/third_party/skcms/skcms.h b/chromium/third_party/skia/third_party/skcms/skcms.h index d924f34d169..6ca82360f45 100644 --- a/chromium/third_party/skia/third_party/skcms/skcms.h +++ b/chromium/third_party/skia/third_party/skcms/skcms.h @@ -208,6 +208,11 @@ typedef enum skcms_PixelFormat { skcms_PixelFormat_RGBA_16161616 = skcms_PixelFormat_RGBA_16161616BE, skcms_PixelFormat_BGRA_16161616 = skcms_PixelFormat_BGRA_16161616BE, + skcms_PixelFormat_RGB_hhh_Norm, // 1-5-10 half-precision float in [0,1] + skcms_PixelFormat_BGR_hhh_Norm, // Pointers must be 16-bit aligned. + skcms_PixelFormat_RGBA_hhhh_Norm, + skcms_PixelFormat_BGRA_hhhh_Norm, + skcms_PixelFormat_RGB_hhh, // 1-5-10 half-precision float. skcms_PixelFormat_BGR_hhh, // Pointers must be 16-bit aligned. skcms_PixelFormat_RGBA_hhhh, diff --git a/chromium/third_party/skia/third_party/skcms/src/Transform_inl.h b/chromium/third_party/skia/third_party/skcms/src/Transform_inl.h index 69efc983840..7f76b993aca 100644 --- a/chromium/third_party/skia/third_party/skcms/src/Transform_inl.h +++ b/chromium/third_party/skia/third_party/skcms/src/Transform_inl.h @@ -43,6 +43,9 @@ #if !defined(USING_AVX2) && defined(USING_AVX) && defined(__AVX2__) #define USING_AVX2 #endif +#if !defined(USING_AVX512F) && N == 16 && defined(__AVX512F__) + #define USING_AVX512F +#endif // Similar to the AVX+ features, we define USING_NEON and USING_NEON_F16C. // This is more for organizational clarity... skcms.cc doesn't force these. @@ -104,13 +107,12 @@ SI D cast(const S& v) { return (D)v; #elif defined(__clang__) return __builtin_convertvector(v, D); -#elif N == 4 - return D{v[0],v[1],v[2],v[3]}; -#elif N == 8 - return D{v[0],v[1],v[2],v[3], v[4],v[5],v[6],v[7]}; -#elif N == 16 - return D{v[0],v[1],v[ 2],v[ 3], v[ 4],v[ 5],v[ 6],v[ 7], - v[8],v[9],v[10],v[11], v[12],v[13],v[14],v[15]}; +#else + D d; + for (int i = 0; i < N; i++) { + d[i] = v[i]; + } + return d; #endif } @@ -138,7 +140,7 @@ SI T if_then_else(I32 cond, T t, T e) { SI F F_from_Half(U16 half) { #if defined(USING_NEON_F16C) return vcvt_f32_f16((float16x4_t)half); -#elif defined(__AVX512F__) +#elif defined(USING_AVX512F) return (F)_mm512_cvtph_ps((__m256i)half); #elif defined(USING_AVX_F16C) typedef int16_t __attribute__((vector_size(16))) I16; @@ -165,7 +167,7 @@ SI F F_from_Half(U16 half) { SI U16 Half_from_F(F f) { #if defined(USING_NEON_F16C) return (U16)vcvt_f16_f32(f); -#elif defined(__AVX512F__) +#elif defined(USING_AVX512F) return (U16)_mm512_cvtps_ph((__m512 )f, _MM_FROUND_CUR_DIRECTION ); #elif defined(USING_AVX_F16C) return (U16)__builtin_ia32_vcvtps2ph256(f, 0x04/*_MM_FROUND_CUR_DIRECTION*/); @@ -206,8 +208,12 @@ SI F floor_(F x) { return floorf_(x); #elif defined(__aarch64__) return vrndmq_f32(x); -#elif defined(__AVX512F__) - return _mm512_floor_ps(x); +#elif defined(USING_AVX512F) + // Clang's _mm512_floor_ps() passes its mask as -1, not (__mmask16)-1, + // and integer santizer catches that this implicit cast changes the + // value from -1 to 65535. We'll cast manually to work around it. + // Read this as `return _mm512_floor_ps(x)`. + return _mm512_mask_floor_ps(x, (__mmask16)-1, x); #elif defined(USING_AVX) return __builtin_ia32_roundps256(x, 0x01/*_MM_FROUND_FLOOR*/); #elif defined(__SSE4_1__) @@ -1238,6 +1244,9 @@ static void run_program(const Op* program, const void** arguments, #if defined(USING_AVX2) #undef USING_AVX2 #endif +#if defined(USING_AVX512F) + #undef USING_AVX512F +#endif #if defined(USING_NEON) #undef USING_NEON diff --git a/chromium/third_party/skia/third_party/skcms/version.sha1 b/chromium/third_party/skia/third_party/skcms/version.sha1 index ea0efb2100c..bc80db40563 100755 --- a/chromium/third_party/skia/third_party/skcms/version.sha1 +++ b/chromium/third_party/skia/third_party/skcms/version.sha1 @@ -1 +1 @@ -5e67e5c19fd337d1b7e5601d162d3e21d01e11e5
\ No newline at end of file +668026c511f3d4be9447b0ae28ea7a73b5899262
\ No newline at end of file |