summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-04 13:34:44 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-04 13:34:44 +0000
commit4fa1f773ded58f1d018a863e213e3de6a8ee08ed (patch)
tree6f14153f6890287fe3ccf7ec7726bd478677f7ed /lib
parenta2bfa04b9da19800471f2f33382ebb99f93a1e9a (diff)
downloadclang-4fa1f773ded58f1d018a863e213e3de6a8ee08ed.tar.gz
[X86][AVX512] Converted the VPERMPD/VPERMQ intrinsics to generic IR
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@274502 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Headers/avx512fintrin.h68
-rw-r--r--lib/Headers/avx512vlintrin.h54
2 files changed, 69 insertions, 53 deletions
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h
index ab8f3d1b41..c0d44984fd 100644
--- a/lib/Headers/avx512fintrin.h
+++ b/lib/Headers/avx512fintrin.h
@@ -8678,35 +8678,49 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
-(__v2df)(__m128d)(Y), \
(__mmask8)(U), (int)(R)); })
-#define _mm512_permutex_pd(X, M) __extension__ ({ \
- (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \
- (__v8df)_mm512_undefined_pd(), \
- (__mmask8)-1); })
-
-#define _mm512_mask_permutex_pd(W, U, X, M) __extension__ ({ \
- (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U)); })
-
-#define _mm512_maskz_permutex_pd(U, X, M) __extension__ ({ \
- (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U)); })
-
-#define _mm512_permutex_epi64(X, I) __extension__ ({ \
- (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \
- (__v8di)_mm512_undefined_epi32(), \
- (__mmask8)-1); })
+#define _mm512_permutex_pd(X, C) __extension__ ({ \
+ (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
+ (__v8df)_mm512_undefined_pd(), \
+ 0 + (((C) & 0x03) >> 0), \
+ 0 + (((C) & 0x0c) >> 2), \
+ 0 + (((C) & 0x30) >> 4), \
+ 0 + (((C) & 0xc0) >> 6), \
+ 4 + (((C) & 0x03) >> 0), \
+ 4 + (((C) & 0x0c) >> 2), \
+ 4 + (((C) & 0x30) >> 4), \
+ 4 + (((C) & 0xc0) >> 6)); })
+
+#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_permutex_pd((X), (C)), \
+ (__v8df)(__m512d)(W)); })
-#define _mm512_mask_permutex_epi64(W, M, X, I) __extension__ ({ \
- (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \
- (__v8di)(__m512i)(W), \
- (__mmask8)(M)); })
+#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_permutex_pd((X), (C)), \
+ (__v8df)_mm512_setzero_pd()); })
-#define _mm512_maskz_permutex_epi64(M, X, I) __extension__ ({ \
- (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)(M)); })
+#define _mm512_permutex_epi64(X, C) __extension__ ({ \
+ (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
+ (__v8di)_mm512_undefined_epi32(), \
+ 0 + (((C) & 0x03) >> 0), \
+ 0 + (((C) & 0x0c) >> 2), \
+ 0 + (((C) & 0x30) >> 4), \
+ 0 + (((C) & 0xc0) >> 6), \
+ 4 + (((C) & 0x03) >> 0), \
+ 4 + (((C) & 0x0c) >> 2), \
+ 4 + (((C) & 0x30) >> 4), \
+ 4 + (((C) & 0xc0) >> 6)); })
+
+#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
+ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_permutex_epi64((X), (C)), \
+ (__v8di)(__m512i)(W)); })
+
+#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
+ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_permutex_epi64((X), (C)), \
+ (__v8di)_mm512_setzero_si512()); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h
index 5b875d534c..b5b371823a 100644
--- a/lib/Headers/avx512vlintrin.h
+++ b/lib/Headers/avx512vlintrin.h
@@ -8806,35 +8806,37 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
(__v8si)(__m256i)(index), \
(__mmask8)(mask), (int)(scale)); })
-#define _mm256_mask_permutex_pd(W, U, X, imm) __extension__ ({ \
- (__m256d)__builtin_ia32_permdf256_mask((__v4df)(__m256d)(X), (int)(imm), \
- (__v4df)(__m256d)(W), \
- (__mmask8)(U)); })
-
-#define _mm256_maskz_permutex_pd(U, X, imm) __extension__ ({ \
- (__m256d)__builtin_ia32_permdf256_mask((__v4df)(__m256d)(X), (int)(imm), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)(U)); })
-
-#define _mm256_permutex_pd(X, M) __extension__ ({ \
- (__m256d)__builtin_ia32_permdf256_mask((__v4df)(__m256d)(X), (int)(M), \
- (__v4df)_mm256_undefined_pd(), \
- (__mmask8)-1); })
+#define _mm256_permutex_pd(X, C) __extension__ ({ \
+ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \
+ (__v4df)_mm256_undefined_pd(), \
+ (C) & 0x3, ((C) & 0xc) >> 2, \
+ ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
-#define _mm256_mask_permutex_epi64(W, M, X, I) __extension__ ({ \
- (__m256i)__builtin_ia32_permdi256_mask((__v4di)(__m256i)(X), (int)(I), \
- (__v4di)(__m256i)(W), \
- (__mmask8)(M)); })
+#define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \
+ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_permutex_pd((X), (C)), \
+ (__v4df)(__m256d)(W)); })
-#define _mm256_maskz_permutex_epi64(M, X, I) __extension__ ({ \
- (__m256i)__builtin_ia32_permdi256_mask((__v4di)(__m256i)(X), (int)(I), \
- (__v4di)_mm256_setzero_si256(), \
- (__mmask8)(M)); })
+#define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \
+ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_permutex_pd((X), (C)), \
+ (__v4df)_mm256_setzero_pd()); })
-#define _mm256_permutex_epi64(X, I) __extension__ ({ \
- (__m256i)__builtin_ia32_permdi256_mask((__v4di)(__m256i)(X), (int)(I), \
- (__v4di)_mm256_undefined_si256(), \
- (__mmask8)-1); })
+#define _mm256_permutex_epi64(X, C) __extension__ ({ \
+ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \
+ (__v4di)_mm256_undefined_si256(), \
+ (C) & 0x3, ((C) & 0xc) >> 2, \
+ ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
+
+#define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
+ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ (__v4di)_mm256_permutex_epi64((X), (C)), \
+ (__v4di)(__m256i)(W)); })
+
+#define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \
+ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ (__v4di)_mm256_permutex_epi64((X), (C)), \
+ (__v4di)_mm256_setzero_si256()); })
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_permutexvar_pd (__m256i __X, __m256d __Y)