summaryrefslogtreecommitdiff
path: root/gcc/config/aarch64
diff options
context:
space:
mode:
authorktkachov <ktkachov@138bc75d-0d04-0410-961f-82ee72b054a4>2016-06-30 15:19:45 +0000
committerktkachov <ktkachov@138bc75d-0d04-0410-961f-82ee72b054a4>2016-06-30 15:19:45 +0000
commit443405c9d2da2bf7641c1210c2a9b1987d833157 (patch)
tree7aca1ace387a0f7a911df83b632b5d7d68b8100d /gcc/config/aarch64
parent6e3a3de215d62a8c47a8799833d0a5f72db21f7d (diff)
downloadgcc-443405c9d2da2bf7641c1210c2a9b1987d833157.tar.gz
[AArch64][2/2] (Re)Implement vcopy<q>_lane<q> intrinsics
2016-06-30 Kyrylo Tkachov <kyrylo.tkachov@arm.com> James Greenhalgh <james.greenhalgh@arm.com> * config/aarch64/arm_neon.h (vcopyq_lane_f32, vcopyq_lane_f64, vcopyq_lane_p8, vcopyq_lane_p16, vcopyq_lane_s8, vcopyq_lane_s16, vcopyq_lane_s32, vcopyq_lane_s64, vcopyq_lane_u8, vcopyq_lane_u16, vcopyq_lane_u32, vcopyq_lane_u64): Reimplement in C. (vcopy_lane_f32, vcopy_lane_f64, vcopy_lane_p8, vcopy_lane_p16, vcopy_lane_s8, vcopy_lane_s16, vcopy_lane_s32, vcopy_lane_s64, vcopy_lane_u8, vcopy_lane_u16, vcopy_lane_u32, vcopy_lane_u64, vcopy_laneq_f32, vcopy_laneq_f64, vcopy_laneq_p8, vcopy_laneq_p16, vcopy_laneq_s8, vcopy_laneq_s16, vcopy_laneq_s32, vcopy_laneq_s64, vcopy_laneq_u8, vcopy_laneq_u16, vcopy_laneq_u32, vcopy_laneq_u64, vcopyq_laneq_f32, vcopyq_laneq_f64, vcopyq_laneq_p8, vcopyq_laneq_p16, vcopyq_laneq_s8, vcopyq_laneq_s16, vcopyq_laneq_s32, vcopyq_laneq_s64, vcopyq_laneq_u8, vcopyq_laneq_u16, vcopyq_laneq_u32, vcopyq_laneq_u64): New intrinsics. * gcc.target/aarch64/vect_copy_lane_1.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@237883 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/aarch64')
-rw-r--r--gcc/config/aarch64/arm_neon.h548
1 files changed, 392 insertions, 156 deletions
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index ebf6fa2b63e..4d5292eaed5 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -5814,162 +5814,6 @@ vaddlvq_u32 (uint32x4_t a)
return result;
}
-#define vcopyq_lane_f32(a, b, c, d) \
- __extension__ \
- ({ \
- float32x4_t c_ = (c); \
- float32x4_t a_ = (a); \
- float32x4_t result; \
- __asm__ ("ins %0.s[%2], %3.s[%4]" \
- : "=w"(result) \
- : "0"(a_), "i"(b), "w"(c_), "i"(d) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vcopyq_lane_f64(a, b, c, d) \
- __extension__ \
- ({ \
- float64x2_t c_ = (c); \
- float64x2_t a_ = (a); \
- float64x2_t result; \
- __asm__ ("ins %0.d[%2], %3.d[%4]" \
- : "=w"(result) \
- : "0"(a_), "i"(b), "w"(c_), "i"(d) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vcopyq_lane_p8(a, b, c, d) \
- __extension__ \
- ({ \
- poly8x16_t c_ = (c); \
- poly8x16_t a_ = (a); \
- poly8x16_t result; \
- __asm__ ("ins %0.b[%2], %3.b[%4]" \
- : "=w"(result) \
- : "0"(a_), "i"(b), "w"(c_), "i"(d) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vcopyq_lane_p16(a, b, c, d) \
- __extension__ \
- ({ \
- poly16x8_t c_ = (c); \
- poly16x8_t a_ = (a); \
- poly16x8_t result; \
- __asm__ ("ins %0.h[%2], %3.h[%4]" \
- : "=w"(result) \
- : "0"(a_), "i"(b), "w"(c_), "i"(d) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vcopyq_lane_s8(a, b, c, d) \
- __extension__ \
- ({ \
- int8x16_t c_ = (c); \
- int8x16_t a_ = (a); \
- int8x16_t result; \
- __asm__ ("ins %0.b[%2], %3.b[%4]" \
- : "=w"(result) \
- : "0"(a_), "i"(b), "w"(c_), "i"(d) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vcopyq_lane_s16(a, b, c, d) \
- __extension__ \
- ({ \
- int16x8_t c_ = (c); \
- int16x8_t a_ = (a); \
- int16x8_t result; \
- __asm__ ("ins %0.h[%2], %3.h[%4]" \
- : "=w"(result) \
- : "0"(a_), "i"(b), "w"(c_), "i"(d) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vcopyq_lane_s32(a, b, c, d) \
- __extension__ \
- ({ \
- int32x4_t c_ = (c); \
- int32x4_t a_ = (a); \
- int32x4_t result; \
- __asm__ ("ins %0.s[%2], %3.s[%4]" \
- : "=w"(result) \
- : "0"(a_), "i"(b), "w"(c_), "i"(d) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vcopyq_lane_s64(a, b, c, d) \
- __extension__ \
- ({ \
- int64x2_t c_ = (c); \
- int64x2_t a_ = (a); \
- int64x2_t result; \
- __asm__ ("ins %0.d[%2], %3.d[%4]" \
- : "=w"(result) \
- : "0"(a_), "i"(b), "w"(c_), "i"(d) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vcopyq_lane_u8(a, b, c, d) \
- __extension__ \
- ({ \
- uint8x16_t c_ = (c); \
- uint8x16_t a_ = (a); \
- uint8x16_t result; \
- __asm__ ("ins %0.b[%2], %3.b[%4]" \
- : "=w"(result) \
- : "0"(a_), "i"(b), "w"(c_), "i"(d) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vcopyq_lane_u16(a, b, c, d) \
- __extension__ \
- ({ \
- uint16x8_t c_ = (c); \
- uint16x8_t a_ = (a); \
- uint16x8_t result; \
- __asm__ ("ins %0.h[%2], %3.h[%4]" \
- : "=w"(result) \
- : "0"(a_), "i"(b), "w"(c_), "i"(d) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vcopyq_lane_u32(a, b, c, d) \
- __extension__ \
- ({ \
- uint32x4_t c_ = (c); \
- uint32x4_t a_ = (a); \
- uint32x4_t result; \
- __asm__ ("ins %0.s[%2], %3.s[%4]" \
- : "=w"(result) \
- : "0"(a_), "i"(b), "w"(c_), "i"(d) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vcopyq_lane_u64(a, b, c, d) \
- __extension__ \
- ({ \
- uint64x2_t c_ = (c); \
- uint64x2_t a_ = (a); \
- uint64x2_t result; \
- __asm__ ("ins %0.d[%2], %3.d[%4]" \
- : "=w"(result) \
- : "0"(a_), "i"(b), "w"(c_), "i"(d) \
- : /* No clobbers */); \
- result; \
- })
-
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcvtx_f32_f64 (float64x2_t a)
{
@@ -12356,6 +12200,398 @@ vcntq_u8 (uint8x16_t __a)
return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
}
+/* vcopy_lane. */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcopy_lane_f32 (float32x2_t __a, const int __lane1,
+ float32x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vcopy_lane_f64 (float64x1_t __a, const int __lane1,
+ float64x1_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vcopy_lane_p8 (poly8x8_t __a, const int __lane1,
+ poly8x8_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vcopy_lane_p16 (poly16x4_t __a, const int __lane1,
+ poly16x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vcopy_lane_s8 (int8x8_t __a, const int __lane1,
+ int8x8_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcopy_lane_s16 (int16x4_t __a, const int __lane1,
+ int16x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcopy_lane_s32 (int32x2_t __a, const int __lane1,
+ int32x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vcopy_lane_s64 (int64x1_t __a, const int __lane1,
+ int64x1_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcopy_lane_u8 (uint8x8_t __a, const int __lane1,
+ uint8x8_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcopy_lane_u16 (uint16x4_t __a, const int __lane1,
+ uint16x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcopy_lane_u32 (uint32x2_t __a, const int __lane1,
+ uint32x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcopy_lane_u64 (uint64x1_t __a, const int __lane1,
+ uint64x1_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+/* vcopy_laneq. */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcopy_laneq_f32 (float32x2_t __a, const int __lane1,
+ float32x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vcopy_laneq_f64 (float64x1_t __a, const int __lane1,
+ float64x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vcopy_laneq_p8 (poly8x8_t __a, const int __lane1,
+ poly8x16_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vcopy_laneq_p16 (poly16x4_t __a, const int __lane1,
+ poly16x8_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vcopy_laneq_s8 (int8x8_t __a, const int __lane1,
+ int8x16_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcopy_laneq_s16 (int16x4_t __a, const int __lane1,
+ int16x8_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcopy_laneq_s32 (int32x2_t __a, const int __lane1,
+ int32x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vcopy_laneq_s64 (int64x1_t __a, const int __lane1,
+ int64x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcopy_laneq_u8 (uint8x8_t __a, const int __lane1,
+ uint8x16_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcopy_laneq_u16 (uint16x4_t __a, const int __lane1,
+ uint16x8_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcopy_laneq_u32 (uint32x2_t __a, const int __lane1,
+ uint32x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcopy_laneq_u64 (uint64x1_t __a, const int __lane1,
+ uint64x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+/* vcopyq_lane. */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcopyq_lane_f32 (float32x4_t __a, const int __lane1,
+ float32x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vcopyq_lane_f64 (float64x2_t __a, const int __lane1,
+ float64x1_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vcopyq_lane_p8 (poly8x16_t __a, const int __lane1,
+ poly8x8_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vcopyq_lane_p16 (poly16x8_t __a, const int __lane1,
+ poly16x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vcopyq_lane_s8 (int8x16_t __a, const int __lane1,
+ int8x8_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcopyq_lane_s16 (int16x8_t __a, const int __lane1,
+ int16x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcopyq_lane_s32 (int32x4_t __a, const int __lane1,
+ int32x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vcopyq_lane_s64 (int64x2_t __a, const int __lane1,
+ int64x1_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcopyq_lane_u8 (uint8x16_t __a, const int __lane1,
+ uint8x8_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcopyq_lane_u16 (uint16x8_t __a, const int __lane1,
+ uint16x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcopyq_lane_u32 (uint32x4_t __a, const int __lane1,
+ uint32x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcopyq_lane_u64 (uint64x2_t __a, const int __lane1,
+ uint64x1_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+/* vcopyq_laneq. */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcopyq_laneq_f32 (float32x4_t __a, const int __lane1,
+ float32x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vcopyq_laneq_f64 (float64x2_t __a, const int __lane1,
+ float64x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vcopyq_laneq_p8 (poly8x16_t __a, const int __lane1,
+ poly8x16_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vcopyq_laneq_p16 (poly16x8_t __a, const int __lane1,
+ poly16x8_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vcopyq_laneq_s8 (int8x16_t __a, const int __lane1,
+ int8x16_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcopyq_laneq_s16 (int16x8_t __a, const int __lane1,
+ int16x8_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcopyq_laneq_s32 (int32x4_t __a, const int __lane1,
+ int32x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vcopyq_laneq_s64 (int64x2_t __a, const int __lane1,
+ int64x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcopyq_laneq_u8 (uint8x16_t __a, const int __lane1,
+ uint8x16_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcopyq_laneq_u16 (uint16x8_t __a, const int __lane1,
+ uint16x8_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcopyq_laneq_u32 (uint32x4_t __a, const int __lane1,
+ uint32x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcopyq_laneq_u64 (uint64x2_t __a, const int __lane1,
+ uint64x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
/* vcvt (double -> float). */
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))