summaryrefslogtreecommitdiff
path: root/lib/Headers/avx512fintrin.h
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-06-08 21:50:07 +0000
committerCraig Topper <craig.topper@intel.com>2018-06-08 21:50:07 +0000
commit3cef3a6363ad17ab52f3e41d8de9511bbd07a26f (patch)
tree4bf190ff251ffc45e7376531f10bb7616fe3559b /lib/Headers/avx512fintrin.h
parent8c873daccce7ee5339b9fd82c81fe02b73543b65 (diff)
downloadclang-3cef3a6363ad17ab52f3e41d8de9511bbd07a26f.tar.gz
[X86] Fold masking into subvector extract builtins.
I'm looking into making the select builtins require avx512f, avx512bw, or avx512vl since masking operations generally require those features. The extract builtins are funny because the 512-bit versions return a 128 or 256 bit vector with masking even when avx512vl is not supported. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@334330 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Headers/avx512fintrin.h')
-rw-r--r--lib/Headers/avx512fintrin.h64
1 files changed, 36 insertions, 28 deletions
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h
index 381011ed39..dbac414fff 100644
--- a/lib/Headers/avx512fintrin.h
+++ b/lib/Headers/avx512fintrin.h
@@ -3494,30 +3494,34 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
/* Vector Extract */
#define _mm512_extractf64x4_pd(A, I) \
- (__m256d)__builtin_ia32_extractf64x4((__v8df)(__m512d)(A), (int)(I))
+ (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
+ (__v4df)_mm256_undefined_si256(), \
+ (__mmask8)-1)
#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
- (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
- (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
- (__v4df)(__m256d)(W))
+ (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U))
#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
- (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
- (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
- (__v4df)_mm256_setzero_pd())
+ (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
+ (__v4df)_mm256_setzero_pd(), \
+ (__mmask8)(U))
#define _mm512_extractf32x4_ps(A, I) \
- (__m128)__builtin_ia32_extractf32x4((__v16sf)(__m512)(A), (int)(I))
+ (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
+ (__v4sf)_mm_undefined_ps(), \
+ (__mmask8)-1)
#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
- (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
- (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
- (__v4sf)(__m128)(W))
+ (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U))
#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
- (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
- (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
- (__v4sf)_mm_setzero_ps())
+ (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U))
/* Vector Blend */
@@ -7534,30 +7538,34 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
}
#define _mm512_extracti32x4_epi32(A, imm) \
- (__m128i)__builtin_ia32_extracti32x4((__v16si)(__m512i)(A), (int)(imm))
+ (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v4si)_mm_undefined_si128(), \
+ (__mmask8)-1)
#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
- (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
- (__v4si)(__m128i)(W))
+ (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v4si)(__m128i)(W), \
+ (__mmask8)(U))
#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
- (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
- (__v4si)_mm_setzero_si128())
+ (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v4si)_mm_setzero_si128(), \
+ (__mmask8)(U))
#define _mm512_extracti64x4_epi64(A, imm) \
- (__m256i)__builtin_ia32_extracti64x4((__v8di)(__m512i)(A), (int)(imm))
+ (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
+ (__v4di)_mm256_undefined_si256(), \
+ (__mmask8)-1)
#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
- (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
- (__v4di)(__m256i)(W))
+ (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
+ (__v4di)(__m256i)(W), \
+ (__mmask8)(U))
#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
- (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
- (__v4di)_mm256_setzero_si256())
+ (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
+ (__v4di)_mm256_setzero_si256(), \
+ (__mmask8)(U))
#define _mm512_insertf64x4(A, B, imm) \
(__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \