diff options
author | Craig Topper <craig.topper@intel.com> | 2017-11-10 05:20:32 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2017-11-10 05:20:32 +0000 |
commit | 41ea3d5b6cc3bdb5a44b1e9bc292f635471d44c3 (patch) | |
tree | fa48f4e8964e8bab034494b9724f055373758fed /test/CodeGen/fma4-builtins.c | |
parent | 23cd700298fc43ba058cc55fcb5a9c7752efa618 (diff) | |
download | clang-41ea3d5b6cc3bdb5a44b1e9bc292f635471d44c3.tar.gz |
[X86] Reduce the number of FMA builtins needed by the frontend by adding negates to operands of the fmadd and fmaddsub builtins.
The backend should be able to combine the negates to create fmsub, fnmadd, and fnmsub. faddsub converting to fsubadd still needs work I think, but should be very doable.
This matches what we already do for the masked builtins.
This only covers the packed builtins. Scalar builtins will be done after FMA4 is fixed.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@317873 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/fma4-builtins.c')
-rw-r--r-- | test/CodeGen/fma4-builtins.c | 52 |
1 files changed, 36 insertions, 16 deletions
diff --git a/test/CodeGen/fma4-builtins.c b/test/CodeGen/fma4-builtins.c index 3ca5fac688..68a2ec6e3a 100644 --- a/test/CodeGen/fma4-builtins.c +++ b/test/CodeGen/fma4-builtins.c @@ -29,13 +29,15 @@ __m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_msub_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_msub_ps - // CHECK: @llvm.x86.fma.vfmsub.ps + // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]]) return _mm_msub_ps(a, b, c); } __m128d test_mm_msub_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_msub_pd - // CHECK: @llvm.x86.fma.vfmsub.pd + // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]]) return _mm_msub_pd(a, b, c); } @@ -53,13 +55,15 @@ __m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_nmacc_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_nmacc_ps - // CHECK: @llvm.x86.fma.vfnmadd.ps + // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> %{{.+}}) return _mm_nmacc_ps(a, b, c); } __m128d test_mm_nmacc_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_nmacc_pd - // CHECK: @llvm.x86.fma.vfnmadd.pd + // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> %{{.+}}) return _mm_nmacc_pd(a, b, c); } @@ -77,13 +81,17 @@ __m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_nmsub_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_nmsub_ps - // CHECK: @llvm.x86.fma.vfnmsub.ps + // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} + // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.ps(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> [[NEG2]]) return _mm_nmsub_ps(a, b, c); } __m128d test_mm_nmsub_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_nmsub_pd - // CHECK: @llvm.x86.fma.vfnmsub.pd + // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} + // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.pd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> [[NEG2]]) return _mm_nmsub_pd(a, b, c); } @@ -113,13 +121,15 @@ __m128d test_mm_maddsub_pd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_msubadd_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_msubadd_ps - // CHECK: @llvm.x86.fma.vfmsubadd.ps + // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}} + // CHECK: @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]]) return _mm_msubadd_ps(a, b, c); } __m128d test_mm_msubadd_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_msubadd_pd - // CHECK: @llvm.x86.fma.vfmsubadd.pd + // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}} + // CHECK: @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]]) return _mm_msubadd_pd(a, b, c); } @@ -137,37 +147,45 @@ __m256d test_mm256_macc_pd(__m256d a, __m256d b, __m256d c) { __m256 test_mm256_msub_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_msub_ps - // CHECK: @llvm.x86.fma.vfmsub.ps.256 + // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}} + // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> %{{.+}}, <8 x float> %{{.+}}, <8 x float> [[NEG]]) return _mm256_msub_ps(a, b, c); } __m256d test_mm256_msub_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_msub_pd - // CHECK: @llvm.x86.fma.vfmsub.pd.256 + // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> %{{.+}}, <4 x double> %{{.+}}, <4 x double> [[NEG]]) return _mm256_msub_pd(a, b, c); } __m256 test_mm256_nmacc_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_nmacc_ps - // CHECK: @llvm.x86.fma.vfnmadd.ps.256 + // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}} + // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> [[NEG]], <8 x float> %{{.+}}, <8 x float> %{{.+}}) return _mm256_nmacc_ps(a, b, c); } __m256d test_mm256_nmacc_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_nmacc_pd - // CHECK: @llvm.x86.fma.vfnmadd.pd.256 + // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> [[NEG]], <4 x double> %{{.+}}, <4 x double> %{{.+}}) return _mm256_nmacc_pd(a, b, c); } __m256 test_mm256_nmsub_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_nmsub_ps - // CHECK: @llvm.x86.fma.vfnmsub.ps.256 + // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}} + // CHECK: [[NEG2:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}} + // CHECK: @llvm.x86.fma.vfmadd.ps.256(<8 x float> [[NEG]], <8 x float> %{{.+}}, <8 x float> [[NEG2]]) return _mm256_nmsub_ps(a, b, c); } __m256d test_mm256_nmsub_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_nmsub_pd - // CHECK: @llvm.x86.fma.vfnmsub.pd.256 + // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}} + // CHECK: [[NEG2:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.pd.256(<4 x double> [[NEG]], <4 x double> %{{.+}}, <4 x double> [[NEG2]]) return _mm256_nmsub_pd(a, b, c); } @@ -185,12 +203,14 @@ __m256d test_mm256_maddsub_pd(__m256d a, __m256d b, __m256d c) { __m256 test_mm256_msubadd_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_msubadd_ps - // CHECK: @llvm.x86.fma.vfmsubadd.ps.256 + // CHECK: [[NEG:%.+]] = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}} + // CHECK: @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.+}}, <8 x float> [[NEG]]) return _mm256_msubadd_ps(a, b, c); } __m256d test_mm256_msubadd_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_msubadd_pd - // CHECK: @llvm.x86.fma.vfmsubadd.pd.256 + // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}} + // CHECK: @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.+}}, <4 x double> %{{.+}}, <4 x double> [[NEG]]) return _mm256_msubadd_pd(a, b, c); } |