diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-11-12 07:16:59 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-11-12 07:16:59 +0000 |
commit | 775c7affdce3b4ab05558f0cfccebf363da437b4 (patch) | |
tree | 44dcde0bc4d2bfcb4ef38d06d35268c16a96c307 /lib/Headers/avx512bwintrin.h | |
parent | 5a77ad29c9e42187c10aeb61cd8d0089d04954c5 (diff) | |
download | clang-775c7affdce3b4ab05558f0cfccebf363da437b4.tar.gz |
[AVX-512] Convert the rest of the masked shift by immediate and by single element builtins over to the newly added unmasked builtins and a select.
This should also fix PR30691 since the new builtins are handled like the legacy builtins in the backend.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@286714 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Headers/avx512bwintrin.h')
-rw-r--r-- | lib/Headers/avx512bwintrin.h | 178 |
1 files changed, 87 insertions, 91 deletions
diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h index fe1d4122d5..e4dfe212b7 100644 --- a/lib/Headers/avx512bwintrin.h +++ b/lib/Headers/avx512bwintrin.h @@ -1718,49 +1718,48 @@ _mm512_maskz_sllv_epi16 (__mmask32 __U, __m512i __A, __m512i __B) } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_sll_epi16 (__m512i __A, __m128i __B) +_mm512_sll_epi16(__m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) -1); + return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_sll_epi16 (__m512i __W, __mmask32 __U, __m512i __A, - __m128i __B) +_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_sll_epi16(__A, __B), \ + (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_sll_epi16 (__mmask32 __U, __m512i __A, __m128i __B) +_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_sll_epi16(__A, __B), \ + (__v32hi)_mm512_setzero_hi()); } -#define _mm512_slli_epi16(A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psllwi512_mask((__v32hi)(__m512i)(A), (int)(B), \ - (__v32hi)_mm512_setzero_hi(), \ - (__mmask32)-1); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_slli_epi16(__m512i __A, int __B) +{ + (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B); +} -#define _mm512_mask_slli_epi16(W, U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psllwi512_mask((__v32hi)(__m512i)(A), (int)(B), \ - (__v32hi)(__m512i)(W), \ - (__mmask32)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_slli_epi16(__A, __B), \ + (__v32hi)__W); +} -#define _mm512_maskz_slli_epi16(U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psllwi512_mask((__v32hi)(__m512i)(A), (int)(B), \ - (__v32hi)_mm512_setzero_hi(), \ - (__mmask32)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_slli_epi16(__A, __B), \ + (__v32hi)_mm512_setzero_hi()); +} #define _mm512_bslli_epi128(a, imm) __extension__ ({ \ (__m512i)__builtin_shufflevector( \ @@ -1892,95 +1891,92 @@ _mm512_maskz_srav_epi16 (__mmask32 __U, __m512i __A, __m512i __B) } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_sra_epi16 (__m512i __A, __m128i __B) +_mm512_sra_epi16(__m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) -1); + return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_sra_epi16 (__m512i __W, __mmask32 __U, __m512i __A, - __m128i __B) +_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_sra_epi16(__A, __B), \ + (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_sra_epi16 (__mmask32 __U, __m512i __A, __m128i __B) +_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_sra_epi16(__A, __B), \ + (__v32hi)_mm512_setzero_hi()); } -#define _mm512_srai_epi16(A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psrawi512_mask((__v32hi)(__m512i)(A), (int)(B), \ - (__v32hi)_mm512_setzero_hi(), \ - (__mmask32)-1); }) - -#define _mm512_mask_srai_epi16(W, U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psrawi512_mask((__v32hi)(__m512i)(A), (int)(B), \ - (__v32hi)(__m512i)(W), \ - (__mmask32)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_srai_epi16(__m512i __A, int __B) +{ + (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B); +} -#define _mm512_maskz_srai_epi16(U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psrawi512_mask((__v32hi)(__m512i)(A), (int)(B), \ - (__v32hi)_mm512_setzero_hi(), \ - (__mmask32)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_srai_epi16(__A, __B), \ + (__v32hi)__W); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_srai_epi16(__A, __B), \ + (__v32hi)_mm512_setzero_hi()); +} static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_srl_epi16 (__m512i __A, __m128i __B) +_mm512_srl_epi16(__m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) -1); + return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_srl_epi16 (__m512i __W, __mmask32 __U, __m512i __A, - __m128i __B) +_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_srl_epi16(__A, __B), \ + (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_srl_epi16 (__mmask32 __U, __m512i __A, __m128i __B) +_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_srl_epi16(__A, __B), \ + (__v32hi)_mm512_setzero_hi()); } -#define _mm512_srli_epi16(A, imm) __extension__ ({ \ - (__m512i)__builtin_ia32_psrlwi512_mask((__v32hi)(__m512i)(A), (int)(imm), \ - (__v32hi)_mm512_setzero_hi(), \ - (__mmask32)-1); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_srli_epi16(__m512i __A, int __B) +{ + (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B); +} -#define _mm512_mask_srli_epi16(W, U, A, imm) __extension__ ({ \ - (__m512i)__builtin_ia32_psrlwi512_mask((__v32hi)(__m512i)(A), (int)(imm), \ - (__v32hi)(__m512i)(W), \ - (__mmask32)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_srli_epi16(__A, __B), \ + (__v32hi)__W); +} -#define _mm512_maskz_srli_epi16(U, A, imm) __extension__ ({ \ - (__m512i)__builtin_ia32_psrlwi512_mask((__v32hi)(__m512i)(A), (int)(imm), \ - (__v32hi)_mm512_setzero_hi(), \ - (__mmask32)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_srli_epi16(__A, __B), \ + (__v32hi)_mm512_setzero_hi()); +} #define _mm512_bsrli_epi128(a, imm) __extension__ ({ \ (__m512i)__builtin_shufflevector( \ |