diff options
author | Tim Rowley <timothy.o.rowley@intel.com> | 2017-11-13 15:11:21 -0600 |
---|---|---|
committer | Andres Gomez <agomez@igalia.com> | 2017-11-21 18:16:46 +0200 |
commit | 0f4dfee254e1f054c851a729deb4217cc4e14003 (patch) | |
tree | 4337101485b7df1d90b66667ef7ca621fbf31851 | |
parent | 256733683b70731235eeac94e95d66e9d8a3c56e (diff) | |
download | mesa-0f4dfee254e1f054c851a729deb4217cc4e14003.tar.gz |
swr/rast: Use gather instruction for i32gather_ps on simd16/avx512
Speed up avx512 platforms; fixes performance regression caused
by swithc to simdlib.
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
Cc: mesa-stable@lists.freedesktop.org
(cherry picked from commit 439904847e9c2970494c18e8c47bd6c38c0ed8ab)
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl | 12 |
1 files changed, 1 insertions, 11 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl index 7447d35ee2f..0d2cd595875 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl @@ -524,17 +524,7 @@ SIMD_WRAPPER_2(unpacklo_ps); template<ScaleFactor ScaleT> static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT)) { - uint32_t *pOffsets = (uint32_t*)&idx; - Float vResult; - float* pResult = (float*)&vResult; - for (uint32_t i = 0; i < SIMD_WIDTH; ++i) - { - uint32_t offset = pOffsets[i]; - offset = offset * static_cast<uint32_t>(ScaleT); - pResult[i] = *(float const*)(((uint8_t const*)p + offset)); - } - - return vResult; + return _mm512_i32gather_ps(idx, p, static_cast<int>(ScaleT)); } static SIMDINLINE Float SIMDCALL load1_ps(float const *p) // return *p (broadcast 1 value to all elements) |