diff options
author | uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4> | 2017-05-11 18:01:12 +0000 |
---|---|---|
committer | uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4> | 2017-05-11 18:01:12 +0000 |
commit | 17e3210c46e4e47882c5bedc550b64d5af80e632 (patch) | |
tree | 386c70bd14bff7e6ff87e00912026ee01b109c09 /gcc | |
parent | 23796153cce409ff9e9615d03a82263e4dab88c4 (diff) | |
download | gcc-17e3210c46e4e47882c5bedc550b64d5af80e632.tar.gz |
* config/i386/avx512fintrin.h (_mm_mask_rsqrt14_sd)
(_mm_maskz_rsqrt14_sd, _mm_mask_rsqrt14_ss, _mm_maskz_rsqrt14_ss):
New intrinsics.
* config/i386/i386-builtin.def (__builtin_ia32_rsqrt14sd_mask)
(__builtin_ia32_rsqrt14ss_mask): New builtins.
* config/i386/sse.md (rsqrt14_<mode>_mask): New pattern.
testsuite/ChangeLog:
* gcc.target/i386/avx512f-vrsqrt14sd-1.c: Test new intrinsics.
* gcc.target/i386/avx512f-vrsqrt14sd-2.c: Ditto.
* gcc.target/i386/avx512f-vrsqrt14ss-1.c: Ditto.
* gcc.target/i386/avx512f-vrsqrt14ss-2.c: Ditto.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@247920 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/avx512fintrin.h | 40 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin.def | 2 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 17 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c | 17 |
8 files changed, 109 insertions, 4 deletions
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 7ac7cb6e836..ce139d137f2 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -1843,6 +1843,26 @@ _mm_rsqrt14_sd (__m128d __A, __m128d __B) (__v2df) __A); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B, + (__v2df) __A, + (__v2df) __W, + (__mmask8) __U); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B, + (__v2df) __A, + (__v2df) _mm_setzero_pd (), + (__mmask8) __U); +} + extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_rsqrt14_ss (__m128 __A, __m128 __B) @@ -1851,6 +1871,26 @@ _mm_rsqrt14_ss (__m128 __A, __m128 __B) (__v4sf) __A); } +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B, + (__v4sf) __A, + (__v4sf) __W, + (__mmask8) __U); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B, + (__v4sf) __A, + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U); +} + #ifdef __OPTIMIZE__ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 80ee7e10d9d..1e29198ad07 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -1377,7 +1377,9 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf_mask, "__builtin_ia32_rcp14s BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14_v2df_mask, "__builtin_ia32_rsqrt14sd_mask", IX86_BUILTIN_RSQRT14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14_v4sf_mask, "__builtin_ia32_rsqrt14ss_mask", IX86_BUILTIN_RSQRT14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a118fcea520..220f69f0980 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1847,6 +1847,23 @@ (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) +(define_insn "rsqrt14_<mode>_mask" + [(set (match_operand:VF_128 0 "register_operand" "=v") + (vec_merge:VF_128 + (vec_merge:VF_128 + (unspec:VF_128 + [(match_operand:VF_128 1 "nonimmediate_operand" "vm")] + UNSPEC_RSQRT14) + (match_operand:VF_128 3 "vector_move_operand" "0C") + (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")) + (match_operand:VF_128 2 "register_operand" "v") + (const_int 1)))] + "TARGET_AVX512F" + "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}" + [(set_attr "type" "sse") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + (define_insn "sse_vmrsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x,x") (vec_merge:V4SF diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2bb56df5dbf..eed64824127 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2017-05-11 Julia Koval <julia.koval@intel.com> + + * gcc.target/i386/avx512f-vrsqrt14sd-1.c: Test new intrinsics. + * gcc.target/i386/avx512f-vrsqrt14sd-2.c: Ditto. + * gcc.target/i386/avx512f-vrsqrt14ss-1.c: Ditto. + * gcc.target/i386/avx512f-vrsqrt14ss-2.c: Ditto. + 2017-05-11 Paolo Carlini <paolo.carlini@oracle.com> PR c++/70538 diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c index de37e61a102..8f2369263f1 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c @@ -1,14 +1,18 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> -volatile __m128d x1, x2; +volatile __m128d x1, x2, x3; volatile __mmask8 m; void extern avx512f_test (void) { x1 = _mm_rsqrt14_sd (x1, x2); + x1 = _mm_mask_rsqrt14_sd (x3, m, x1, x2); + x1 = _mm_maskz_rsqrt14_sd (m, x1, x2); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c index dc96158b289..d106682ff0e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c @@ -4,6 +4,7 @@ #include <math.h> #include "avx512f-check.h" +#include "avx512f-helper.h" static void compute_vrsqrt14sd (double *s1, double *s2, double *r) @@ -15,7 +16,8 @@ compute_vrsqrt14sd (double *s1, double *s2, double *r) static void avx512f_test (void) { - union128d s1, s2, res1; + union128d s1, s2, res1, res2, res3; + __mmask8 m = 0; double res_ref[2]; s1.x = _mm_set_pd (-3.0, 111.111); @@ -27,4 +29,17 @@ avx512f_test (void) if (check_fp_union128d (res1, res_ref)) abort (); + + res2.x = _mm_set_pd (-4.0, DEFAULT_VALUE); + res2.x = _mm_mask_rsqrt14_sd(res2.x, m, s1.x, s2.x); + + MASK_MERGE (d) (res_ref, m, 1); + if (checkVd (res2.a, res_ref, 2)) + abort(); + + res3.x = _mm_maskz_rsqrt14_sd(m, s1.x, s2.x); + + MASK_ZERO (d) (res_ref, m, 1); + if (checkVd (res3.a, res_ref, 2)) + abort(); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c index ba0811441fc..098df0d7370 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c @@ -1,13 +1,18 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> -volatile __m128 x1, x2; +volatile __m128 x1, x2, x3; +volatile __mmask8 m; void extern avx512f_test (void) { x1 = _mm_rsqrt14_ss (x1, x2); + x1 = _mm_mask_rsqrt14_ss (x3, m, x1, x2); + x1 = _mm_maskz_rsqrt14_ss (m, x1, x2); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c index 10d8664cda8..739a852cea6 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c @@ -4,6 +4,7 @@ #include <math.h> #include "avx512f-check.h" +#include "avx512f-helper.h" static void compute_vrsqrt14ss (float *s1, float *s2, float *r) @@ -17,7 +18,8 @@ compute_vrsqrt14ss (float *s1, float *s2, float *r) static void avx512f_test (void) { - union128 s1, s2, res1; + union128 s1, s2, res1, res2, res3; + __mmask8 m = 0; float res_ref[4]; s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46); @@ -29,4 +31,17 @@ avx512f_test (void) if (check_fp_union128 (res1, res_ref)) abort (); + + res2.x = _mm_set_ps (5.0, 6.0, 7.0, DEFAULT_VALUE); + res2.x = _mm_mask_rsqrt14_ss(res2.x, m, s1.x, s2.x); + + MASK_MERGE () (res_ref, m, 1); + if (checkVf (res2.a, res_ref, 4)) + abort(); + + res3.x = _mm_maskz_rsqrt14_ss(m, s1.x, s2.x); + + MASK_ZERO () (res_ref, m, 1); + if (checkVf (res3.a, res_ref, 4)) + abort(); } |