diff options
author | Craig Topper <craig.topper@intel.com> | 2018-06-11 06:18:29 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-06-11 06:18:29 +0000 |
commit | adc6bb61d2581c4a5825aca5941b4e84d5a170d8 (patch) | |
tree | 953fbcee84fef456138afda8688357a491ead14f | |
parent | ceda64df0e97dfc30c4f30f23bc27a786a4bd3d2 (diff) | |
download | clang-adc6bb61d2581c4a5825aca5941b4e84d5a170d8.tar.gz |
[X86] Remove masking from dbpsadbw builtins, use select builtin instead.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@334385 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/clang/Basic/BuiltinsX86.def | 6 | ||||
-rw-r--r-- | lib/Headers/avx512bwintrin.h | 20 | ||||
-rw-r--r-- | lib/Headers/avx512vlbwintrin.h | 40 | ||||
-rw-r--r-- | lib/Sema/SemaChecking.cpp | 6 | ||||
-rw-r--r-- | test/CodeGen/avx512bw-builtins.c | 8 | ||||
-rw-r--r-- | test/CodeGen/avx512vlbw-builtins.c | 16 |
6 files changed, 45 insertions, 51 deletions
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index fc580df083..6a3016bcd3 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1745,9 +1745,9 @@ TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_dbpsadbw128_mask, "V8sV16cV16cIiV8sUc", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_dbpsadbw256_mask, "V16sV32cV32cIiV16sUs", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_dbpsadbw512_mask, "V32sV64cV64cIiV32sUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "nc", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "nc", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_dbpsadbw512, "V32sV64cV64cIi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8LLiV64cV64c", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8LLiV8LLiV8LLiUc", "nc", "avx512f") diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h index 2590ec7154..471ec38191 100644 --- a/lib/Headers/avx512bwintrin.h +++ b/lib/Headers/avx512bwintrin.h @@ -1819,22 +1819,18 @@ _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, (__v64qi)(__m512i)_mm512_setzero_si512()) #define _mm512_dbsad_epu8(A, B, imm) \ - (__m512i)__builtin_ia32_dbpsadbw512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(imm), \ - (__v32hi)_mm512_undefined_epi32(), \ - (__mmask32)-1) + (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), (int)(imm)) #define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ - (__m512i)__builtin_ia32_dbpsadbw512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(imm), \ - (__v32hi)(__m512i)(W), \ - (__mmask32)(U)) + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ + (__v32hi)(__m512i)(W)) #define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ - (__m512i)__builtin_ia32_dbpsadbw512_mask((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(imm), \ - (__v32hi)_mm512_setzero_si512(), \ - (__mmask32)(U)) + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ + (__v32hi)_mm512_setzero_si512()) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sad_epu8 (__m512i __A, __m512i __B) diff --git a/lib/Headers/avx512vlbwintrin.h b/lib/Headers/avx512vlbwintrin.h index a04ef6f17c..f2dfb567dd 100644 --- a/lib/Headers/avx512vlbwintrin.h +++ b/lib/Headers/avx512vlbwintrin.h @@ -2714,40 +2714,32 @@ _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, (__v32qi)_mm256_setzero_si256()) #define _mm_dbsad_epu8(A, B, imm) \ - (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), (int)(imm), \ - (__v8hi)_mm_setzero_si128(), \ - (__mmask8)-1) + (__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(imm)) #define _mm_mask_dbsad_epu8(W, U, A, B, imm) \ - (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), (int)(imm), \ - (__v8hi)(__m128i)(W), \ - (__mmask8)(U)) + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ + (__v8hi)(__m128i)(W)) #define _mm_maskz_dbsad_epu8(U, A, B, imm) \ - (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), (int)(imm), \ - (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(U)) + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ + (__v8hi)_mm_setzero_si128()) #define _mm256_dbsad_epu8(A, B, imm) \ - (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ - (__v32qi)(__m256i)(B), (int)(imm), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)-1) + (__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), (int)(imm)) #define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \ - (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ - (__v32qi)(__m256i)(B), (int)(imm), \ - (__v16hi)(__m256i)(W), \ - (__mmask16)(U)) + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ + (__v16hi)(__m256i)(W)) #define _mm256_maskz_dbsad_epu8(U, A, B, imm) \ - (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ - (__v32qi)(__m256i)(B), (int)(imm), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(U)) + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ + (__v16hi)_mm256_setzero_si256()) #undef __DEFAULT_FN_ATTRS diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index bf1e3a8b4d..8730408c8d 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -2838,9 +2838,9 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_shufps: case X86::BI__builtin_ia32_shufps256: case X86::BI__builtin_ia32_shufps512: - case X86::BI__builtin_ia32_dbpsadbw128_mask: - case X86::BI__builtin_ia32_dbpsadbw256_mask: - case X86::BI__builtin_ia32_dbpsadbw512_mask: + case X86::BI__builtin_ia32_dbpsadbw128: + case X86::BI__builtin_ia32_dbpsadbw256: + case X86::BI__builtin_ia32_dbpsadbw512: case X86::BI__builtin_ia32_vpshldd128_mask: case X86::BI__builtin_ia32_vpshldd256_mask: case X86::BI__builtin_ia32_vpshldd512_mask: diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c index 6803652a79..7d2e1fadf4 100644 --- a/test/CodeGen/avx512bw-builtins.c +++ b/test/CodeGen/avx512bw-builtins.c @@ -1934,19 +1934,21 @@ __m512i test_mm512_maskz_alignr_epi8(__mmask64 __U, __m512i __A,__m512i __B){ __m512i test_mm512_mm_dbsad_epu8(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mm_dbsad_epu8 - // CHECK: @llvm.x86.avx512.mask.dbpsadbw.512 + // CHECK: @llvm.x86.avx512.dbpsadbw.512 return _mm512_dbsad_epu8(__A, __B, 170); } __m512i test_mm512_mm_mask_dbsad_epu8(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mm_mask_dbsad_epu8 - // CHECK: @llvm.x86.avx512.mask.dbpsadbw.512 + // CHECK: @llvm.x86.avx512.dbpsadbw.512 + //CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_dbsad_epu8(__W, __U, __A, __B, 170); } __m512i test_mm512_mm_maskz_dbsad_epu8(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mm_maskz_dbsad_epu8 - // CHECK: @llvm.x86.avx512.mask.dbpsadbw.512 + // CHECK: @llvm.x86.avx512.dbpsadbw.512 + //CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_dbsad_epu8(__U, __A, __B, 170); } diff --git a/test/CodeGen/avx512vlbw-builtins.c b/test/CodeGen/avx512vlbw-builtins.c index 9cc58b6505..427037cafb 100644 --- a/test/CodeGen/avx512vlbw-builtins.c +++ b/test/CodeGen/avx512vlbw-builtins.c @@ -2964,37 +2964,41 @@ __m256i test_mm256_maskz_alignr_epi8(__mmask32 __U, __m256i __A, __m256i __B) { __m128i test_mm_dbsad_epu8(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_dbsad_epu8 - // CHECK: @llvm.x86.avx512.mask.dbpsadbw.128 + // CHECK: @llvm.x86.avx512.dbpsadbw.128 return _mm_dbsad_epu8(__A, __B, 170); } __m128i test_mm_mask_dbsad_epu8(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_dbsad_epu8 - // CHECK: @llvm.x86.avx512.mask.dbpsadbw.128 + // CHECK: @llvm.x86.avx512.dbpsadbw.128 + // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_dbsad_epu8(__W, __U, __A, __B, 170); } __m128i test_mm_maskz_dbsad_epu8(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_dbsad_epu8 - // CHECK: @llvm.x86.avx512.mask.dbpsadbw.128 + // CHECK: @llvm.x86.avx512.dbpsadbw.128 + // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_dbsad_epu8(__U, __A, __B, 170); } __m256i test_mm256_dbsad_epu8(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_dbsad_epu8 - // CHECK: @llvm.x86.avx512.mask.dbpsadbw.256 + // CHECK: @llvm.x86.avx512.dbpsadbw.256 return _mm256_dbsad_epu8(__A, __B, 170); } __m256i test_mm256_mask_dbsad_epu8(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_dbsad_epu8 - // CHECK: @llvm.x86.avx512.mask.dbpsadbw.256 + // CHECK: @llvm.x86.avx512.dbpsadbw.256 + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_dbsad_epu8(__W, __U, __A, __B, 170); } __m256i test_mm256_maskz_dbsad_epu8(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_dbsad_epu8 - // CHECK: @llvm.x86.avx512.mask.dbpsadbw.256 + // CHECK: @llvm.x86.avx512.dbpsadbw.256 + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_dbsad_epu8(__U, __A, __B, 170); } __mmask8 test_mm_movepi16_mask(__m128i __A) { |