summaryrefslogtreecommitdiff
path: root/lib/Headers/avx512fintrin.h
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-06-10 06:01:42 +0000
committerCraig Topper <craig.topper@intel.com>2018-06-10 06:01:42 +0000
commit8c949cc5961e861aa79b1d780dc070a459045b19 (patch)
tree7af5db7db423cbd63be4d69a61e987d7b30bbc9a /lib/Headers/avx512fintrin.h
parent79d87e56ee55943d1d1b9f50fdf8374bc858e782 (diff)
downloadclang-8c949cc5961e861aa79b1d780dc070a459045b19.tar.gz
[X86] Remove masking from the 512-bit packed floating point add/sub/mul/div builtins. Use select in IR instead.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@334359 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Headers/avx512fintrin.h')
-rw-r--r--lib/Headers/avx512fintrin.h172
1 files changed, 70 insertions, 102 deletions
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h
index 9daa559bb1..a4c0101959 100644
--- a/lib/Headers/avx512fintrin.h
+++ b/lib/Headers/avx512fintrin.h
@@ -2060,40 +2060,32 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
}
#define _mm512_add_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(R))
#define _mm512_mask_add_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(W), (__mmask8)(U), \
- (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_add_round_pd((A), (B), (R)), \
+ (__v8df)(__m512d)(W));
#define _mm512_maskz_add_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_add_round_pd((A), (B), (R)), \
+ (__v8df)_mm512_setzero_pd());
#define _mm512_add_round_ps(A, B, R) \
- (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
+ (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(R))
#define _mm512_mask_add_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(W), (__mmask16)(U), \
- (int)(R))
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
+ (__v16sf)(__m512)(W));
#define _mm512_maskz_add_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
+ (__v16sf)_mm512_setzero_ps());
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@@ -2195,40 +2187,32 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
}
#define _mm512_sub_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(R))
#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(W), (__mmask8)(U), \
- (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
+ (__v8df)(__m512d)(W));
#define _mm512_maskz_sub_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
+ (__v8df)_mm512_setzero_pd());
#define _mm512_sub_round_ps(A, B, R) \
- (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
+ (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(R))
-#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(W), (__mmask16)(U), \
- (int)(R))
+#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
+ (__v16sf)(__m512)(W));
-#define _mm512_maskz_sub_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+#define _mm512_maskz_sub_round_ps(U, A, B, R) \
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
+ (__v16sf)_mm512_setzero_ps());
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@@ -2330,40 +2314,32 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
}
#define _mm512_mul_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(R))
#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(W), (__mmask8)(U), \
- (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
+ (__v8df)(__m512d)(W));
#define _mm512_maskz_mul_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
+ (__v8df)_mm512_setzero_pd());
#define _mm512_mul_round_ps(A, B, R) \
- (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
+ (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(R))
-#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(W), (__mmask16)(U), \
- (int)(R))
+#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
+ (__v16sf)(__m512)(W));
-#define _mm512_maskz_mul_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+#define _mm512_maskz_mul_round_ps(U, A, B, R) \
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
+ (__v16sf)_mm512_setzero_ps());
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@@ -2478,40 +2454,32 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
}
#define _mm512_div_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(R))
#define _mm512_mask_div_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(W), (__mmask8)(U), \
- (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_div_round_pd((A), (B), (R)), \
+ (__v8df)(__m512d)(W));
#define _mm512_maskz_div_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_div_round_pd((A), (B), (R)), \
+ (__v8df)_mm512_setzero_pd());
#define _mm512_div_round_ps(A, B, R) \
- (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
+ (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(R))
-#define _mm512_mask_div_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(W), (__mmask16)(U), \
- (int)(R))
+#define _mm512_mask_div_round_ps(W, U, A, B, R) \
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
+ (__v16sf)(__m512)(W));
-#define _mm512_maskz_div_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+#define _mm512_maskz_div_round_ps(U, A, B, R) \
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
+ (__v16sf)_mm512_setzero_ps());
#define _mm512_roundscale_ps(A, B) \
(__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \