summaryrefslogtreecommitdiff
path: root/gcc/config/arm/arm_neon.h
diff options
context:
space:
mode:
authoralalaw01 <alalaw01@138bc75d-0d04-0410-961f-82ee72b054a4>2015-09-08 18:48:47 +0000
committeralalaw01 <alalaw01@138bc75d-0d04-0410-961f-82ee72b054a4>2015-09-08 18:48:47 +0000
commitfb5f110d8371fcf4b9fe4f7f9c7b4ed3dd4336e5 (patch)
treeb8c1636a40d4fb18a5a2f0e5502a287f735b11f3 /gcc/config/arm/arm_neon.h
parent114e7e56ea341a9c3fb2e34830be6342233eda1e (diff)
downloadgcc-fb5f110d8371fcf4b9fe4f7f9c7b4ed3dd4336e5.tar.gz
[ARM] Remaining intrinsics
* config/arm/arm-builtins.c (VAR11, VAR12): New. * config/arm/arm_neon_builtins.def (vcombine, vld2_dup, vld3_dup, vld4_dup): Add v4hf variant. (vget_high, vget_low): Add v8hf variant. (vld1, vst1, vst1_lane, vld2, vld2_lane, vst2, vst2_lane, vld3, vld3_lane, vst3, vst3_lane, vld4, vld4_lane, vst4, vst4_lane): Add v4hf and v8hf variants. * config/arm/iterators.md (VD_LANE, VD_RE, VQ2, VQ_HS): New. (VDX): Add V4HF. (V_DOUBLE): Add case for V4HF. (VQX): Add V8HF. (V_HALF): Add case for V8HF. (VDQX): Add V4HF, V8HF. (V_elem, V_two_elem, V_three_elem, V_four_elem, V_cmp_result, V_uf_sclr, V_sz_elem, V_mode_nunits, q): Add cases for V4HF & V8HF. * config/arm/neon.md (vec_set<mode>internal, vec_extract<mode>, neon_vget_lane<mode>_sext_internal, neon_vget_lane<mode>_zext_internal, vec_load_lanesoi<mode>, neon_vld2<mode>, vec_store_lanesoi<mode>, neon_vst2<mode>, vec_load_lanesci<mode>, neon_vld3<mode>, neon_vld3qa<mode>, neon_vld3qb<mode>, vec_store_lanesci<mode>, neon_vst3<mode>, neon_vst3qa<mode>, neon_vst3qb<mode>, vec_load_lanesxi<mode>, neon_vld4<mode>, neon_vld4qa<mode>, neon_vld4qb<mode>, vec_store_lanesxi<mode>, neon_vst4<mode>, neon_vst4qa<mode>, neon_vst4qb<mode>): Change VQ iterator to VQ2. (neon_vcreate, neon_vreinterpretv8qi<mode>, neon_vreinterpretv4hi<mode>, neon_vreinterpretv2si<mode>, neon_vreinterpretv2sf<mode>, neon_vreinterpretdi<mode>): Change VDX to VD_RE. (neon_vld2_lane<mode>, neon_vst2_lane<mode>, neon_vld3_lane<mode>, neon_vst3_lane<mode>, neon_vld4_lane<mode>, neon_vst4_lane<mode>): Change VD iterator to VD_LANE, and VMQ iterator to VQ_HS. * config/arm/arm_neon.h (float16x4x2_t, float16x8x2_t, float16x4x3_t, float16x8x3_t, float16x4x4_t, float16x8x4_t, vcombine_f16, vget_high_f16, vget_low_f16, vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16, vst1q_lane_f16, vld2_f16, vld2q_f16, vld2_lane_f16, vld2q_lane_f16, vld2_dup_f16, vst2_f16, vst2q_f16, vst2_lane_f16, vst2q_lane_f16, vld3_f16, vld3q_f16, vld3_lane_f16, vld3q_lane_f16, vld3_dup_f16, vst3_f16, vst3q_f16, vst3_lane_f16, vst3q_lane_f16, vld4_f16, vld4q_f16, vld4_lane_f16, vld4q_lane_f16, vld4_dup_f16, vst4_f16, vst4q_f16, vst4_lane_f16, vst4q_lane_f16): New. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@227541 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/arm/arm_neon.h')
-rw-r--r--gcc/config/arm/arm_neon.h380
1 files changed, 380 insertions, 0 deletions
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index b1c9cc76a4c..66622dfcfe2 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -166,6 +166,20 @@ typedef struct uint64x2x2_t
uint64x2_t val[2];
} uint64x2x2_t;
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+typedef struct float16x4x2_t
+{
+ float16x4_t val[2];
+} float16x4x2_t;
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+typedef struct float16x8x2_t
+{
+ float16x8_t val[2];
+} float16x8x2_t;
+#endif
+
typedef struct float32x2x2_t
{
float32x2_t val[2];
@@ -292,6 +306,20 @@ typedef struct uint64x2x3_t
uint64x2_t val[3];
} uint64x2x3_t;
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+typedef struct float16x4x3_t
+{
+ float16x4_t val[3];
+} float16x4x3_t;
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+typedef struct float16x8x3_t
+{
+ float16x8_t val[3];
+} float16x8x3_t;
+#endif
+
typedef struct float32x2x3_t
{
float32x2_t val[3];
@@ -418,6 +446,20 @@ typedef struct uint64x2x4_t
uint64x2_t val[4];
} uint64x2x4_t;
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+typedef struct float16x4x4_t
+{
+ float16x4_t val[4];
+} float16x4x4_t;
+#endif
+
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+typedef struct float16x8x4_t
+{
+ float16x8_t val[4];
+} float16x8x4_t;
+#endif
+
typedef struct float32x2x4_t
{
float32x2_t val[4];
@@ -6045,6 +6087,14 @@ vcombine_s64 (int64x1_t __a, int64x1_t __b)
return (int64x2_t)__builtin_neon_vcombinedi (__a, __b);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vcombine_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_neon_vcombinev4hf (__a, __b);
+}
+#endif
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcombine_f32 (float32x2_t __a, float32x2_t __b)
{
@@ -6119,6 +6169,14 @@ vget_high_s64 (int64x2_t __a)
return (int64x1_t)__builtin_neon_vget_highv2di (__a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vget_high_f16 (float16x8_t __a)
+{
+ return __builtin_neon_vget_highv8hf (__a);
+}
+#endif
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vget_high_f32 (float32x4_t __a)
{
@@ -6179,6 +6237,14 @@ vget_low_s32 (int32x4_t __a)
return (int32x2_t)__builtin_neon_vget_lowv4si (__a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vget_low_f16 (float16x8_t __a)
+{
+ return __builtin_neon_vget_lowv8hf (__a);
+}
+#endif
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vget_low_f32 (float32x4_t __a)
{
@@ -8730,6 +8796,14 @@ vld1_s64 (const int64_t * __a)
return (int64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vld1_f16 (const float16_t * __a)
+{
+ return __builtin_neon_vld1v4hf (__a);
+}
+#endif
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_f32 (const float32_t * __a)
{
@@ -8804,6 +8878,14 @@ vld1q_s64 (const int64_t * __a)
return (int64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vld1q_f16 (const float16_t * __a)
+{
+ return __builtin_neon_vld1v8hf (__a);
+}
+#endif
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_f32 (const float32_t * __a)
{
@@ -9208,6 +9290,14 @@ vst1_s64 (int64_t * __a, int64x1_t __b)
__builtin_neon_vst1di ((__builtin_neon_di *) __a, __b);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f16 (float16_t * __a, float16x4_t __b)
+{
+ __builtin_neon_vst1v4hf (__a, __b);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_f32 (float32_t * __a, float32x2_t __b)
{
@@ -9282,6 +9372,14 @@ vst1q_s64 (int64_t * __a, int64x2_t __b)
__builtin_neon_vst1v2di ((__builtin_neon_di *) __a, __b);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f16 (float16_t * __a, float16x8_t __b)
+{
+ __builtin_neon_vst1v8hf (__a, __b);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_f32 (float32_t * __a, float32x4_t __b)
{
@@ -9342,6 +9440,14 @@ vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c)
__builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, __b, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_f16 (float16_t * __a, float16x4_t __b, const int __c)
+{
+ __builtin_neon_vst1_lanev4hf (__a, __b, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c)
{
@@ -9416,6 +9522,14 @@ vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c)
__builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, __b, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_f16 (float16_t * __a, float16x8_t __b, const int __c)
+{
+ __builtin_neon_vst1_lanev8hf (__a, __b, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c)
{
@@ -9496,6 +9610,16 @@ vld2_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vld2_f16 (const float16_t * __a)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+ __rv.__o = __builtin_neon_vld2v4hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_f32 (const float32_t * __a)
{
@@ -9594,6 +9718,16 @@ vld2q_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vld2q_f16 (const float16_t * __a)
+{
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld2v8hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_f32 (const float32_t * __a)
{
@@ -9669,6 +9803,17 @@ vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vld2_lane_f16 (const float16_t * __a, float16x4x2_t __b, const int __c)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+ __rv.__o = __builtin_neon_vld2_lanev4hf ( __a, __bu.__o, __c);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c)
{
@@ -9741,6 +9886,17 @@ vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vld2q_lane_f16 (const float16_t * __a, float16x8x2_t __b, const int __c)
+{
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld2_lanev8hf (__a, __bu.__o, __c);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c)
{
@@ -9801,6 +9957,16 @@ vld2_dup_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vld2_dup_f16 (const float16_t * __a)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+ __rv.__o = __builtin_neon_vld2_dupv4hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_dup_f32 (const float32_t * __a)
{
@@ -9896,6 +10062,15 @@ vst2_s32 (int32_t * __a, int32x2x2_t __b)
__builtin_neon_vst2v2si ((__builtin_neon_si *) __a, __bu.__o);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_f16 (float16_t * __a, float16x4x2_t __b)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+ __builtin_neon_vst2v4hf (__a, __bu.__o);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst2_f32 (float32_t * __a, float32x2x2_t __b)
{
@@ -9982,6 +10157,15 @@ vst2q_s32 (int32_t * __a, int32x4x2_t __b)
__builtin_neon_vst2v4si ((__builtin_neon_si *) __a, __bu.__o);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_f16 (float16_t * __a, float16x8x2_t __b)
+{
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ __builtin_neon_vst2v8hf (__a, __bu.__o);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_f32 (float32_t * __a, float32x4x2_t __b)
{
@@ -10045,6 +10229,15 @@ vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c)
__builtin_neon_vst2_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_f16 (float16_t * __a, float16x4x2_t __b, const int __c)
+{
+ union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+ __builtin_neon_vst2_lanev4hf (__a, __bu.__o, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c)
{
@@ -10101,6 +10294,15 @@ vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c)
__builtin_neon_vst2_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_f16 (float16_t * __a, float16x8x2_t __b, const int __c)
+{
+ union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ __builtin_neon_vst2_lanev8hf (__a, __bu.__o, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c)
{
@@ -10153,6 +10355,16 @@ vld3_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
+vld3_f16 (const float16_t * __a)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+ __rv.__o = __builtin_neon_vld3v4hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_f32 (const float32_t * __a)
{
@@ -10251,6 +10463,16 @@ vld3q_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
+vld3q_f16 (const float16_t * __a)
+{
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+ __rv.__o = __builtin_neon_vld3v8hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_f32 (const float32_t * __a)
{
@@ -10326,6 +10548,17 @@ vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
+vld3_lane_f16 (const float16_t * __a, float16x4x3_t __b, const int __c)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+ __rv.__o = __builtin_neon_vld3_lanev4hf (__a, __bu.__o, __c);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c)
{
@@ -10398,6 +10631,17 @@ vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
+vld3q_lane_f16 (const float16_t * __a, float16x8x3_t __b, const int __c)
+{
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+ __rv.__o = __builtin_neon_vld3_lanev8hf (__a, __bu.__o, __c);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c)
{
@@ -10458,6 +10702,16 @@ vld3_dup_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
+vld3_dup_f16 (const float16_t * __a)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+ __rv.__o = __builtin_neon_vld3_dupv4hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_dup_f32 (const float32_t * __a)
{
@@ -10553,6 +10807,15 @@ vst3_s32 (int32_t * __a, int32x2x3_t __b)
__builtin_neon_vst3v2si ((__builtin_neon_si *) __a, __bu.__o);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_f16 (float16_t * __a, float16x4x3_t __b)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+ __builtin_neon_vst3v4hf (__a, __bu.__o);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst3_f32 (float32_t * __a, float32x2x3_t __b)
{
@@ -10639,6 +10902,15 @@ vst3q_s32 (int32_t * __a, int32x4x3_t __b)
__builtin_neon_vst3v4si ((__builtin_neon_si *) __a, __bu.__o);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_f16 (float16_t * __a, float16x8x3_t __b)
+{
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+ __builtin_neon_vst3v8hf (__a, __bu.__o);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_f32 (float32_t * __a, float32x4x3_t __b)
{
@@ -10702,6 +10974,15 @@ vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c)
__builtin_neon_vst3_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_f16 (float16_t * __a, float16x4x3_t __b, const int __c)
+{
+ union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+ __builtin_neon_vst3_lanev4hf (__a, __bu.__o, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c)
{
@@ -10758,6 +11039,15 @@ vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c)
__builtin_neon_vst3_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_f16 (float16_t * __a, float16x8x3_t __b, const int __c)
+{
+ union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+ __builtin_neon_vst3_lanev8hf (__a, __bu.__o, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c)
{
@@ -10810,6 +11100,16 @@ vld4_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
+vld4_f16 (const float16_t * __a)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4v4hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_f32 (const float32_t * __a)
{
@@ -10908,6 +11208,16 @@ vld4q_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
+vld4q_f16 (const float16_t * __a)
+{
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4v8hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_f32 (const float32_t * __a)
{
@@ -10983,6 +11293,18 @@ vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
+vld4_lane_f16 (const float16_t * __a, float16x4x4_t __b, const int __c)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4_lanev4hf (__a,
+ __bu.__o, __c);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c)
{
@@ -11055,6 +11377,18 @@ vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
+vld4q_lane_f16 (const float16_t * __a, float16x8x4_t __b, const int __c)
+{
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4_lanev8hf (__a,
+ __bu.__o, __c);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c)
{
@@ -11115,6 +11449,16 @@ vld4_dup_s32 (const int32_t * __a)
return __rv.__i;
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
+vld4_dup_f16 (const float16_t * __a)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4_dupv4hf (__a);
+ return __rv.__i;
+}
+#endif
+
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_dup_f32 (const float32_t * __a)
{
@@ -11210,6 +11554,15 @@ vst4_s32 (int32_t * __a, int32x2x4_t __b)
__builtin_neon_vst4v2si ((__builtin_neon_si *) __a, __bu.__o);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_f16 (float16_t * __a, float16x4x4_t __b)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ __builtin_neon_vst4v4hf (__a, __bu.__o);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst4_f32 (float32_t * __a, float32x2x4_t __b)
{
@@ -11296,6 +11649,15 @@ vst4q_s32 (int32_t * __a, int32x4x4_t __b)
__builtin_neon_vst4v4si ((__builtin_neon_si *) __a, __bu.__o);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_f16 (float16_t * __a, float16x8x4_t __b)
+{
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+ __builtin_neon_vst4v8hf (__a, __bu.__o);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_f32 (float32_t * __a, float32x4x4_t __b)
{
@@ -11359,6 +11721,15 @@ vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c)
__builtin_neon_vst4_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_f16 (float16_t * __a, float16x4x4_t __b, const int __c)
+{
+ union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ __builtin_neon_vst4_lanev4hf (__a, __bu.__o, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c)
{
@@ -11415,6 +11786,15 @@ vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c)
__builtin_neon_vst4_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
}
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_f16 (float16_t * __a, float16x8x4_t __b, const int __c)
+{
+ union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+ __builtin_neon_vst4_lanev8hf (__a, __bu.__o, __c);
+}
+#endif
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c)
{