summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Rowley <timothy.o.rowley@intel.com>2017-11-13 15:11:21 -0600
committerEmil Velikov <emil.l.velikov@gmail.com>2017-11-17 19:24:29 +0000
commitc79820054391b7770c6f79a8554e21cf34a075a6 (patch)
treee023b71c5d2386e06c51cfc36f5f8f9c5e3dbeb5
parentf3caa303cf9b1efa3627fd3cd6912a0fbc5e71e7 (diff)
downloadmesa-c79820054391b7770c6f79a8554e21cf34a075a6.tar.gz
swr/rast: Use gather instruction for i32gather_ps on simd16/avx512
Speed up avx512 platforms; fixes performance regression caused by swithc to simdlib. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com> Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit 439904847e9c2970494c18e8c47bd6c38c0ed8ab)
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl12
1 files changed, 1 insertions, 11 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
index 95e4c319099..c13b9f616aa 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
@@ -484,17 +484,7 @@ SIMD_WRAPPER_2(unpacklo_ps);
template<ScaleFactor ScaleT>
static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
{
- uint32_t *pOffsets = (uint32_t*)&idx;
- Float vResult;
- float* pResult = (float*)&vResult;
- for (uint32_t i = 0; i < SIMD_WIDTH; ++i)
- {
- uint32_t offset = pOffsets[i];
- offset = offset * static_cast<uint32_t>(ScaleT);
- pResult[i] = *(float const*)(((uint8_t const*)p + offset));
- }
-
- return vResult;
+ return _mm512_i32gather_ps(idx, p, static_cast<int>(ScaleT));
}
static SIMDINLINE Float SIMDCALL load1_ps(float const *p) // return *p (broadcast 1 value to all elements)