diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-04 13:34:44 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-04 13:34:44 +0000 |
commit | 4fa1f773ded58f1d018a863e213e3de6a8ee08ed (patch) | |
tree | 6f14153f6890287fe3ccf7ec7726bd478677f7ed /lib | |
parent | a2bfa04b9da19800471f2f33382ebb99f93a1e9a (diff) | |
download | clang-4fa1f773ded58f1d018a863e213e3de6a8ee08ed.tar.gz |
[X86][AVX512] Converted the VPERMPD/VPERMQ intrinsics to generic IR
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@274502 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Headers/avx512fintrin.h | 68 | ||||
-rw-r--r-- | lib/Headers/avx512vlintrin.h | 54 |
2 files changed, 69 insertions, 53 deletions
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index ab8f3d1b41..c0d44984fd 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -8678,35 +8678,49 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) -(__v2df)(__m128d)(Y), \ (__mmask8)(U), (int)(R)); }) -#define _mm512_permutex_pd(X, M) __extension__ ({ \ - (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \ - (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1); }) - -#define _mm512_mask_permutex_pd(W, U, X, M) __extension__ ({ \ - (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \ - (__v8df)(__m512d)(W), \ - (__mmask8)(U)); }) - -#define _mm512_maskz_permutex_pd(U, X, M) __extension__ ({ \ - (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U)); }) - -#define _mm512_permutex_epi64(X, I) __extension__ ({ \ - (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \ - (__v8di)_mm512_undefined_epi32(), \ - (__mmask8)-1); }) +#define _mm512_permutex_pd(X, C) __extension__ ({ \ + (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ + (__v8df)_mm512_undefined_pd(), \ + 0 + (((C) & 0x03) >> 0), \ + 0 + (((C) & 0x0c) >> 2), \ + 0 + (((C) & 0x30) >> 4), \ + 0 + (((C) & 0xc0) >> 6), \ + 4 + (((C) & 0x03) >> 0), \ + 4 + (((C) & 0x0c) >> 2), \ + 4 + (((C) & 0x30) >> 4), \ + 4 + (((C) & 0xc0) >> 6)); }) + +#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_permutex_pd((X), (C)), \ + (__v8df)(__m512d)(W)); }) -#define _mm512_mask_permutex_epi64(W, M, X, I) __extension__ ({ \ - (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \ - (__v8di)(__m512i)(W), \ - (__mmask8)(M)); }) +#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \ + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_permutex_pd((X), (C)), \ + (__v8df)_mm512_setzero_pd()); }) -#define _mm512_maskz_permutex_epi64(M, X, I) __extension__ ({ \ - (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(M)); }) +#define _mm512_permutex_epi64(X, C) __extension__ ({ \ + (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \ + (__v8di)_mm512_undefined_epi32(), \ + 0 + (((C) & 0x03) >> 0), \ + 0 + (((C) & 0x0c) >> 2), \ + 0 + (((C) & 0x30) >> 4), \ + 0 + (((C) & 0xc0) >> 6), \ + 4 + (((C) & 0x03) >> 0), \ + 4 + (((C) & 0x0c) >> 2), \ + 4 + (((C) & 0x30) >> 4), \ + 4 + (((C) & 0xc0) >> 6)); }) + +#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_permutex_epi64((X), (C)), \ + (__v8di)(__m512i)(W)); }) + +#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \ + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_permutex_epi64((X), (C)), \ + (__v8di)_mm512_setzero_si512()); }) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_permutexvar_pd (__m512i __X, __m512d __Y) diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h index 5b875d534c..b5b371823a 100644 --- a/lib/Headers/avx512vlintrin.h +++ b/lib/Headers/avx512vlintrin.h @@ -8806,35 +8806,37 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) (__v8si)(__m256i)(index), \ (__mmask8)(mask), (int)(scale)); }) -#define _mm256_mask_permutex_pd(W, U, X, imm) __extension__ ({ \ - (__m256d)__builtin_ia32_permdf256_mask((__v4df)(__m256d)(X), (int)(imm), \ - (__v4df)(__m256d)(W), \ - (__mmask8)(U)); }) - -#define _mm256_maskz_permutex_pd(U, X, imm) __extension__ ({ \ - (__m256d)__builtin_ia32_permdf256_mask((__v4df)(__m256d)(X), (int)(imm), \ - (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)); }) - -#define _mm256_permutex_pd(X, M) __extension__ ({ \ - (__m256d)__builtin_ia32_permdf256_mask((__v4df)(__m256d)(X), (int)(M), \ - (__v4df)_mm256_undefined_pd(), \ - (__mmask8)-1); }) +#define _mm256_permutex_pd(X, C) __extension__ ({ \ + (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \ + (__v4df)_mm256_undefined_pd(), \ + (C) & 0x3, ((C) & 0xc) >> 2, \ + ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); }) -#define _mm256_mask_permutex_epi64(W, M, X, I) __extension__ ({ \ - (__m256i)__builtin_ia32_permdi256_mask((__v4di)(__m256i)(X), (int)(I), \ - (__v4di)(__m256i)(W), \ - (__mmask8)(M)); }) +#define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \ + (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + (__v4df)_mm256_permutex_pd((X), (C)), \ + (__v4df)(__m256d)(W)); }) -#define _mm256_maskz_permutex_epi64(M, X, I) __extension__ ({ \ - (__m256i)__builtin_ia32_permdi256_mask((__v4di)(__m256i)(X), (int)(I), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)(M)); }) +#define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \ + (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + (__v4df)_mm256_permutex_pd((X), (C)), \ + (__v4df)_mm256_setzero_pd()); }) -#define _mm256_permutex_epi64(X, I) __extension__ ({ \ - (__m256i)__builtin_ia32_permdi256_mask((__v4di)(__m256i)(X), (int)(I), \ - (__v4di)_mm256_undefined_si256(), \ - (__mmask8)-1); }) +#define _mm256_permutex_epi64(X, C) __extension__ ({ \ + (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \ + (__v4di)_mm256_undefined_si256(), \ + (C) & 0x3, ((C) & 0xc) >> 2, \ + ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); }) + +#define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_permutex_epi64((X), (C)), \ + (__v4di)(__m256i)(W)); }) + +#define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \ + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_permutex_epi64((X), (C)), \ + (__v4di)_mm256_setzero_si256()); }) static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_permutexvar_pd (__m256i __X, __m256d __Y) |