diff options
author | jiwang <jiwang@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-07-25 14:49:57 +0000 |
---|---|---|
committer | jiwang <jiwang@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-07-25 14:49:57 +0000 |
commit | f16ee469a2fdac4b5c9dedfad84b0b835478b637 (patch) | |
tree | edb585c6d9bbaef6323ece41c3b3b8d226bdd120 /gcc | |
parent | ded47ca7d303d81b8fbac9a67c56f616c3145161 (diff) | |
download | gcc-f16ee469a2fdac4b5c9dedfad84b0b835478b637.tar.gz |
[AArch64][5/10] ARMv8.2-A FP16 lane vector intrinsics
gcc/
* config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to
"*aarch64_mulx_elt_from_dup<mode>".
(*aarch64_mul3_elt<mode>): Update schedule type.
(*aarch64_mul3_elt_from_dup<mode>): Likewise.
(*aarch64_fma4_elt_from_dup<mode>): Likewise.
(*aarch64_fnma4_elt_from_dup<mode>): Likewise.
* config/aarch64/iterators.md (VMUL): Supprt half precision float modes.
(f, fp): Support HF modes.
* config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16,
vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16,
vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16,
vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16,
vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16,
vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@238719 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 17 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 28 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 154 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 7 |
4 files changed, 190 insertions, 16 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5365986f252..3481f752fc1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,22 @@ 2016-07-25 Jiong Wang <jiong.wang@arm.com> + * config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to + "*aarch64_mulx_elt_from_dup<mode>". + (*aarch64_mul3_elt<mode>): Update schedule type. + (*aarch64_mul3_elt_from_dup<mode>): Likewise. + (*aarch64_fma4_elt_from_dup<mode>): Likewise. + (*aarch64_fnma4_elt_from_dup<mode>): Likewise. + * config/aarch64/iterators.md (VMUL): Supprt half precision float modes. + (f, fp): Support HF modes. + * config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16, + vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16, + vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16, + vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16, + vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16, + vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New. + +2016-07-25 Jiong Wang <jiong.wang@arm.com> + * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (fma<mode>4, fnma<mode>4): Extend to HF modes. diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 961c6d8a28b..7d2e97f9f01 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -351,7 +351,7 @@ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; } - [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] + [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] ) (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>" @@ -379,7 +379,7 @@ (match_operand:VMUL 2 "register_operand" "w")))] "TARGET_SIMD" "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"; - [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] + [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] ) (define_insn "aarch64_rsqrte<mode>" @@ -1634,7 +1634,7 @@ (match_operand:VMUL 3 "register_operand" "0")))] "TARGET_SIMD" "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" - [(set_attr "type" "neon<fp>_mla_<Vetype>_scalar<q>")] + [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")] ) (define_insn "*aarch64_fma4_elt_to_64v2df" @@ -1712,7 +1712,7 @@ (match_operand:VMUL 3 "register_operand" "0")))] "TARGET_SIMD" "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" - [(set_attr "type" "neon<fp>_mla_<Vetype>_scalar<q>")] + [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")] ) (define_insn "*aarch64_fnma4_elt_to_64v2df" @@ -3101,20 +3101,18 @@ [(set_attr "type" "neon_fp_mul_<Vetype><q>")] ) -;; vmulxq_lane_f64 +;; vmulxq_lane -(define_insn "*aarch64_mulx_elt_to_64v2df" - [(set (match_operand:V2DF 0 "register_operand" "=w") - (unspec:V2DF - [(match_operand:V2DF 1 "register_operand" "w") - (vec_duplicate:V2DF - (match_operand:DF 2 "register_operand" "w"))] +(define_insn "*aarch64_mulx_elt_from_dup<mode>" + [(set (match_operand:VHSDF 0 "register_operand" "=w") + (unspec:VHSDF + [(match_operand:VHSDF 1 "register_operand" "w") + (vec_duplicate:VHSDF + (match_operand:<VEL> 2 "register_operand" "w"))] UNSPEC_FMULX))] "TARGET_SIMD" - { - return "fmulx\t%0.2d, %1.2d, %2.d[0]"; - } - [(set_attr "type" "neon_fp_mul_d_scalar_q")] + "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"; + [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] ) ;; vmulxs_lane_f32, vmulxs_laneq_f32 diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index b0d0c7cb19f..8b31e31d600 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26773,6 +26773,160 @@ vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) return __builtin_aarch64_fnmav8hf (__b, __c, __a); } +/* ARMv8.2-A FP16 lane vector intrinsics. */ + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vfma_lane_f16 (float16x4_t __a, float16x4_t __b, + float16x4_t __c, const int __lane) +{ + return vfma_f16 (__a, __b, __aarch64_vdup_lane_f16 (__c, __lane)); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vfmaq_lane_f16 (float16x8_t __a, float16x8_t __b, + float16x4_t __c, const int __lane) +{ + return vfmaq_f16 (__a, __b, __aarch64_vdupq_lane_f16 (__c, __lane)); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vfma_laneq_f16 (float16x4_t __a, float16x4_t __b, + float16x8_t __c, const int __lane) +{ + return vfma_f16 (__a, __b, __aarch64_vdup_laneq_f16 (__c, __lane)); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vfmaq_laneq_f16 (float16x8_t __a, float16x8_t __b, + float16x8_t __c, const int __lane) +{ + return vfmaq_f16 (__a, __b, __aarch64_vdupq_laneq_f16 (__c, __lane)); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vfma_n_f16 (float16x4_t __a, float16x4_t __b, float16_t __c) +{ + return vfma_f16 (__a, __b, vdup_n_f16 (__c)); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vfmaq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c) +{ + return vfmaq_f16 (__a, __b, vdupq_n_f16 (__c)); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vfms_lane_f16 (float16x4_t __a, float16x4_t __b, + float16x4_t __c, const int __lane) +{ + return vfms_f16 (__a, __b, __aarch64_vdup_lane_f16 (__c, __lane)); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vfmsq_lane_f16 (float16x8_t __a, float16x8_t __b, + float16x4_t __c, const int __lane) +{ + return vfmsq_f16 (__a, __b, __aarch64_vdupq_lane_f16 (__c, __lane)); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vfms_laneq_f16 (float16x4_t __a, float16x4_t __b, + float16x8_t __c, const int __lane) +{ + return vfms_f16 (__a, __b, __aarch64_vdup_laneq_f16 (__c, __lane)); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vfmsq_laneq_f16 (float16x8_t __a, float16x8_t __b, + float16x8_t __c, const int __lane) +{ + return vfmsq_f16 (__a, __b, __aarch64_vdupq_laneq_f16 (__c, __lane)); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vfms_n_f16 (float16x4_t __a, float16x4_t __b, float16_t __c) +{ + return vfms_f16 (__a, __b, vdup_n_f16 (__c)); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vfmsq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c) +{ + return vfmsq_f16 (__a, __b, vdupq_n_f16 (__c)); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane) +{ + return vmul_f16 (__a, vdup_n_f16 (__aarch64_vget_lane_any (__b, __lane))); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane) +{ + return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane))); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmul_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane) +{ + return vmul_f16 (__a, vdup_n_f16 (__aarch64_vget_lane_any (__b, __lane))); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmulq_laneq_f16 (float16x8_t __a, float16x8_t __b, const int __lane) +{ + return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane))); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmul_n_f16 (float16x4_t __a, float16_t __b) +{ + return vmul_lane_f16 (__a, vdup_n_f16 (__b), 0); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmulq_n_f16 (float16x8_t __a, float16_t __b) +{ + return vmulq_laneq_f16 (__a, vdupq_n_f16 (__b), 0); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmulx_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane) +{ + return vmulx_f16 (__a, __aarch64_vdup_lane_f16 (__b, __lane)); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmulxq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane) +{ + return vmulxq_f16 (__a, __aarch64_vdupq_lane_f16 (__b, __lane)); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmulx_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane) +{ + return vmulx_f16 (__a, __aarch64_vdup_laneq_f16 (__b, __lane)); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmulxq_laneq_f16 (float16x8_t __a, float16x8_t __b, const int __lane) +{ + return vmulxq_f16 (__a, __aarch64_vdupq_laneq_f16 (__b, __lane)); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmulx_n_f16 (float16x4_t __a, float16_t __b) +{ + return vmulx_f16 (__a, vdup_n_f16 (__b)); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmulxq_n_f16 (float16x8_t __a, float16_t __b) +{ + return vmulxq_f16 (__a, vdupq_n_f16 (__b)); +} + #pragma GCC pop_options #undef __aarch64_vget_lane_any diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 35190b4343b..8d4dc6cedd4 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -218,7 +218,10 @@ (define_mode_iterator DX [DI DF]) ;; Modes available for <f>mul lane operations. -(define_mode_iterator VMUL [V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) +(define_mode_iterator VMUL [V4HI V8HI V2SI V4SI + (V4HF "TARGET_SIMD_F16INST") + (V8HF "TARGET_SIMD_F16INST") + V2SF V4SF V2DF]) ;; Modes available for <f>mul lane operations changing lane count. (define_mode_iterator VMUL_CHANGE_NLANES [V4HI V8HI V2SI V4SI V2SF V4SF]) @@ -730,6 +733,7 @@ (V4HI "") (V8HI "") (V2SI "") (V4SI "") (DI "") (V2DI "") + (V4HF "f") (V8HF "f") (V2SF "f") (V4SF "f") (V2DF "f") (DF "f")]) @@ -738,6 +742,7 @@ (V4HI "") (V8HI "") (V2SI "") (V4SI "") (DI "") (V2DI "") + (V4HF "_fp") (V8HF "_fp") (V2SF "_fp") (V4SF "_fp") (V2DF "_fp") (DF "_fp") (SF "_fp")]) |