diff options
author | kyukhin <kyukhin@138bc75d-0d04-0410-961f-82ee72b054a4> | 2014-01-14 06:21:06 +0000 |
---|---|---|
committer | kyukhin <kyukhin@138bc75d-0d04-0410-961f-82ee72b054a4> | 2014-01-14 06:21:06 +0000 |
commit | fbf4df62d6b0b02afe0d73288b1ff6c41f5ba5d1 (patch) | |
tree | 470c6de9ea5210430e48b4be6c4cf9b55623de2f /gcc/config/i386/avx512erintrin.h | |
parent | fb58de34645242336b51f8cc1552f5c9897199c3 (diff) | |
download | gcc-fbf4df62d6b0b02afe0d73288b1ff6c41f5ba5d1.tar.gz |
gcc/
* config/i386/avx512erintrin.h (_mm_rcp28_round_sd): New.
(_mm_rcp28_round_ss): Ditto.
(_mm_rsqrt28_round_sd): Ditto.
(_mm_rsqrt28_round_ss): Ditto.
(_mm_rcp28_sd): Ditto.
(_mm_rcp28_ss): Ditto.
(_mm_rsqrt28_sd): Ditto.
(_mm_rsqrt28_ss): Ditto.
* config/i386/avx512fintrin.h (_mm512_stream_load_si512): Ditto.
* config/i386/i386-builtin-types.def (V8DI_FTYPE_PV8DI): Ditto.
* config/i386/i386.c (IX86_BUILTIN_MOVNTDQA512): Ditto.
(IX86_BUILTIN_RCP28SD): Ditto.
(IX86_BUILTIN_RCP28SS): Ditto.
(IX86_BUILTIN_RSQRT28SD): Ditto.
(IX86_BUILTIN_RSQRT28SS): Ditto.
(bdesc_special_args): Define __builtin_ia32_movntdqa512,
__builtin_ia32_rcp28sd_round, __builtin_ia32_rcp28ss_round,
__builtin_ia32_rsqrt28sd_round, __builtin_ia32_rsqrt28ss_round.
(ix86_expand_special_args_builtin): Expand new FTYPE.
* config/i386/sse.md (define_mode_attr "sse4_1_avx2"): Expand to V8DI.
(srcp14<mode>): Make insn unary.
(avx512f_vmscalef<mode><round_name>): Use substed predicate.
(avx512f_sgetexp<mode><round_saeonly_name>): Ditto.
(avx512f_rndscale<mode><round_saeonly_name>): Ditto.
(<sse4_1_avx2>_movntdqa): Extend to 512 bits.
(avx512er_exp2<mode><mask_name><round_saeonly_name>):
Fix rounding: make it SAE only.
(<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>): Ditto.
(<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>): Ditto.
(avx512er_vmrcp28<mode><round_saeonly_name>): Ditto.
(avx512er_vmrsqrt28<mode><round_saeonly_name>): Ditto.
(avx512f_getmant<mode><mask_name><round_saeonly_name>): Ditto.
* config/i386/subst.md (round_saeonly_mask_scalar_operand3): Remove.
(round_saeonly_mask_scalar_operand4): Ditto.
(round_saeonly_mask_scalar_op3): Ditto.
(round_saeonly_mask_scalar_op4): Ditto.
gcc/testsuite/
* gcc.target/i386/avx-1.c: Fix __builtin_ia32_exp2ps_mask,
__builtin_ia32_exp2pd_mask, __builtin_ia32_rcp28ps_mask,
__builtin_ia32_rcp28pd_mask, __builtin_ia32_rsqrt28ps_mask,
__builtin_ia32_rsqrt28pd_mask. Add __builtin_ia32_rcp28ss_round,
__builtin_ia32_rcp28sd_round, __builtin_ia32_rsqrt28ss_round,
__builtin_ia32_rsqrt28sd_round.
* gcc.target/i386/avx512er-vexp2pd-1.c: Fix rounding mode.
* gcc.target/i386/avx512er-vexp2ps-1.c: Ditto.
* gcc.target/i386/avx512er-vrcp28pd-1.c: Ditto.
* gcc.target/i386/avx512er-vrcp28ps-1.c: Ditto.
* gcc.target/i386/avx512er-vrsqrt28pd-1.c: Ditto.
* gcc.target/i386/avx512er-vrsqrt28ps-1.c: Ditto.
* gcc.target/i386/avx512er-vrcp28sd-1.c: New.
* gcc.target/i386/avx512er-vrcp28sd-2.c: Ditto.
* gcc.target/i386/avx512er-vrcp28ss-1.c: Ditto.
* gcc.target/i386/avx512er-vrcp28ss-2.c: Ditto.
* gcc.target/i386/avx512er-vrsqrt28sd-1.c: Ditto.
* gcc.target/i386/avx512er-vrsqrt28sd-2.c: Ditto.
* gcc.target/i386/avx512er-vrsqrt28ss-1.c: Ditto.
* gcc.target/i386/avx512er-vrsqrt28ss-2.c: Ditto.
* gcc.target/i386/avx512f-vmovntdqa-1.c: Ditto.
* gcc.target/i386/avx512f-vmovntdqa-2.c: Ditto.
* gcc.target/i386/avx512f-vrcp14sd-2.c: Fix.
* gcc.target/i386/avx512f-vrcp14ss-2.c: Ditto.
* gcc.target/i386/sse-22.c: Extend with new built-ins,
fix wrong rounding mode (see above).
* gcc.target/i386/sse-23.c: Ditto.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@206596 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/i386/avx512erintrin.h')
-rw-r--r-- | gcc/config/i386/avx512erintrin.h | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/gcc/config/i386/avx512erintrin.h b/gcc/config/i386/avx512erintrin.h index f442f2bec94..6fe05bc6608 100644 --- a/gcc/config/i386/avx512erintrin.h +++ b/gcc/config/i386/avx512erintrin.h @@ -159,6 +159,24 @@ _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R) (__mmask16) __U, __R); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R) +{ + return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __A, + (__v2df) __B, + __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R) +{ + return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __A, + (__v4sf) __B, + __R); +} + extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_rsqrt28_round_pd (__m512d __A, int __R) @@ -214,6 +232,25 @@ _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R) (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R); } + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R) +{ + return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __A, + (__v2df) __B, + __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R) +{ + return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __A, + (__v4sf) __B, + __R); +} + #else #define _mm512_exp2a23_round_pd(A, C) \ __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) @@ -268,6 +305,19 @@ _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R) #define _mm512_maskz_rsqrt28_round_ps(U, A, C) \ __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) + +#define _mm_rcp28_round_sd(A, B, R) \ + __builtin_ia32_rcp28sd_round(A, B, R) + +#define _mm_rcp28_round_ss(A, B, R) \ + __builtin_ia32_rcp28ss_round(A, B, R) + +#define _mm_rsqrt28_round_sd(A, B, R) \ + __builtin_ia32_rsqrt28sd_round(A, B, R) + +#define _mm_rsqrt28_round_ss(A, B, R) \ + __builtin_ia32_rsqrt28ss_round(A, B, R) + #endif #define _mm512_exp2a23_pd(A) \ @@ -324,6 +374,18 @@ _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R) #define _mm512_maskz_rsqrt28_ps(U, A) \ _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) +#define _mm_rcp28_sd(A, B) \ + __builtin_ia32_rcp28sd_round(A, B, _MM_FROUND_CUR_DIRECTION) + +#define _mm_rcp28_ss(A, B) \ + __builtin_ia32_rcp28ss_round(A, B, _MM_FROUND_CUR_DIRECTION) + +#define _mm_rsqrt28_sd(A, B) \ + __builtin_ia32_rsqrt28sd_round(A, B, _MM_FROUND_CUR_DIRECTION) + +#define _mm_rsqrt28_ss(A, B) \ + __builtin_ia32_rsqrt28ss_round(A, B, _MM_FROUND_CUR_DIRECTION) + #ifdef __DISABLE_AVX512ER__ #undef __DISABLE_AVX512ER__ #pragma GCC pop_options |