summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authoruros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>2017-05-11 18:01:12 +0000
committeruros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>2017-05-11 18:01:12 +0000
commit17e3210c46e4e47882c5bedc550b64d5af80e632 (patch)
tree386c70bd14bff7e6ff87e00912026ee01b109c09 /gcc
parent23796153cce409ff9e9615d03a82263e4dab88c4 (diff)
downloadgcc-17e3210c46e4e47882c5bedc550b64d5af80e632.tar.gz
* config/i386/avx512fintrin.h (_mm_mask_rsqrt14_sd)
(_mm_maskz_rsqrt14_sd, _mm_mask_rsqrt14_ss, _mm_maskz_rsqrt14_ss): New intrinsics. * config/i386/i386-builtin.def (__builtin_ia32_rsqrt14sd_mask) (__builtin_ia32_rsqrt14ss_mask): New builtins. * config/i386/sse.md (rsqrt14_<mode>_mask): New pattern. testsuite/ChangeLog: * gcc.target/i386/avx512f-vrsqrt14sd-1.c: Test new intrinsics. * gcc.target/i386/avx512f-vrsqrt14sd-2.c: Ditto. * gcc.target/i386/avx512f-vrsqrt14ss-1.c: Ditto. * gcc.target/i386/avx512f-vrsqrt14ss-2.c: Ditto. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@247920 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/avx512fintrin.h40
-rw-r--r--gcc/config/i386/i386-builtin.def2
-rw-r--r--gcc/config/i386/sse.md17
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c7
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c17
8 files changed, 109 insertions, 4 deletions
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 7ac7cb6e836..ce139d137f2 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -1843,6 +1843,26 @@ _mm_rsqrt14_sd (__m128d __A, __m128d __B)
(__v2df) __A);
}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt14_ss (__m128 __A, __m128 __B)
@@ -1851,6 +1871,26 @@ _mm_rsqrt14_ss (__m128 __A, __m128 __B)
(__v4sf) __A);
}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 80ee7e10d9d..1e29198ad07 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1377,7 +1377,9 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf_mask, "__builtin_ia32_rcp14s
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14_v2df_mask, "__builtin_ia32_rsqrt14sd_mask", IX86_BUILTIN_RSQRT14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14_v4sf_mask, "__builtin_ia32_rsqrt14ss_mask", IX86_BUILTIN_RSQRT14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a118fcea520..220f69f0980 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1847,6 +1847,23 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
+(define_insn "rsqrt14_<mode>_mask"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
+ UNSPEC_RSQRT14)
+ (match_operand:VF_128 3 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
+ (match_operand:VF_128 2 "register_operand" "v")
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "sse_vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 2bb56df5dbf..eed64824127 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2017-05-11 Julia Koval <julia.koval@intel.com>
+
+ * gcc.target/i386/avx512f-vrsqrt14sd-1.c: Test new intrinsics.
+ * gcc.target/i386/avx512f-vrsqrt14sd-2.c: Ditto.
+ * gcc.target/i386/avx512f-vrsqrt14ss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vrsqrt14ss-2.c: Ditto.
+
2017-05-11 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/70538
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c
index de37e61a102..8f2369263f1 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c
@@ -1,14 +1,18 @@
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
-volatile __m128d x1, x2;
+volatile __m128d x1, x2, x3;
volatile __mmask8 m;
void extern
avx512f_test (void)
{
x1 = _mm_rsqrt14_sd (x1, x2);
+ x1 = _mm_mask_rsqrt14_sd (x3, m, x1, x2);
+ x1 = _mm_maskz_rsqrt14_sd (m, x1, x2);
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c
index dc96158b289..d106682ff0e 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c
@@ -4,6 +4,7 @@
#include <math.h>
#include "avx512f-check.h"
+#include "avx512f-helper.h"
static void
compute_vrsqrt14sd (double *s1, double *s2, double *r)
@@ -15,7 +16,8 @@ compute_vrsqrt14sd (double *s1, double *s2, double *r)
static void
avx512f_test (void)
{
- union128d s1, s2, res1;
+ union128d s1, s2, res1, res2, res3;
+ __mmask8 m = 0;
double res_ref[2];
s1.x = _mm_set_pd (-3.0, 111.111);
@@ -27,4 +29,17 @@ avx512f_test (void)
if (check_fp_union128d (res1, res_ref))
abort ();
+
+ res2.x = _mm_set_pd (-4.0, DEFAULT_VALUE);
+ res2.x = _mm_mask_rsqrt14_sd(res2.x, m, s1.x, s2.x);
+
+ MASK_MERGE (d) (res_ref, m, 1);
+ if (checkVd (res2.a, res_ref, 2))
+ abort();
+
+ res3.x = _mm_maskz_rsqrt14_sd(m, s1.x, s2.x);
+
+ MASK_ZERO (d) (res_ref, m, 1);
+ if (checkVd (res3.a, res_ref, 2))
+ abort();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c
index ba0811441fc..098df0d7370 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c
@@ -1,13 +1,18 @@
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
-volatile __m128 x1, x2;
+volatile __m128 x1, x2, x3;
+volatile __mmask8 m;
void extern
avx512f_test (void)
{
x1 = _mm_rsqrt14_ss (x1, x2);
+ x1 = _mm_mask_rsqrt14_ss (x3, m, x1, x2);
+ x1 = _mm_maskz_rsqrt14_ss (m, x1, x2);
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c
index 10d8664cda8..739a852cea6 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c
@@ -4,6 +4,7 @@
#include <math.h>
#include "avx512f-check.h"
+#include "avx512f-helper.h"
static void
compute_vrsqrt14ss (float *s1, float *s2, float *r)
@@ -17,7 +18,8 @@ compute_vrsqrt14ss (float *s1, float *s2, float *r)
static void
avx512f_test (void)
{
- union128 s1, s2, res1;
+ union128 s1, s2, res1, res2, res3;
+ __mmask8 m = 0;
float res_ref[4];
s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46);
@@ -29,4 +31,17 @@ avx512f_test (void)
if (check_fp_union128 (res1, res_ref))
abort ();
+
+ res2.x = _mm_set_ps (5.0, 6.0, 7.0, DEFAULT_VALUE);
+ res2.x = _mm_mask_rsqrt14_ss(res2.x, m, s1.x, s2.x);
+
+ MASK_MERGE () (res_ref, m, 1);
+ if (checkVf (res2.a, res_ref, 4))
+ abort();
+
+ res3.x = _mm_maskz_rsqrt14_ss(m, s1.x, s2.x);
+
+ MASK_ZERO () (res_ref, m, 1);
+ if (checkVf (res3.a, res_ref, 4))
+ abort();
}