diff options
author | Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> | 2012-07-05 16:45:18 +0000 |
---|---|---|
committer | Ramana Radhakrishnan <ramana@gcc.gnu.org> | 2012-07-05 16:45:18 +0000 |
commit | 378913094d2510e1b0ea4738e41de1054a701fa6 (patch) | |
tree | 83768642a32f11702e2e8fe8e741cd07d1838947 /gcc/config/arm/arm_neon.h | |
parent | 573234ac8bd662ec1d9d5a235f13b39312c6cc03 (diff) | |
download | gcc-378913094d2510e1b0ea4738e41de1054a701fa6.tar.gz |
re PR rtl-optimization/49891 (ICE in redirect_jump_1)
2012-07-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
PR target/49891
PR target/51980
* gcc/testsuite/gcc.target/arm/neon/vtrnf32.c: Update.
* gcc/testsuite/gcc.target/arm/neon/vtrns32.c: Update.
* gcc/testsuite/gcc.target/arm/neon/vtrnu32.c: Update.
* gcc/testsuite/gcc.target/arm/neon/vzipf32.c: Update.
* gcc/testsuite/gcc.target/arm/neon/vzips32.c: Update.
* gcc/testsuite/gcc.target/arm/neon/vzipu32.c: Update.
2012-07-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Julian Brown <julian@codesourcery.com>
PR target/49891
PR target/51980
* config/arm/neon-gen.ml (return_by_ptr): Delete.
(print_function): Handle empty strings.
(return): Delete use of return_by_ptr.
(mask_shape_for_shuffle): New function.
(mask_elems): Likewise.
(shuffle_fn): Likewise.
(params): Simplify and remove use of return_by_ptr.
(get_shuffle): New function.
(print_variant): Update.
* config/arm/neon.ml (rev_elems): New function.
(permute_range): Likewise.
(zip_range): Likewise.
(uzip_range): Likewise.
(trn_range): Likewise.
(zip_elems): Likewise.
(uzip_elems): Likewise.
(trn_elems): Likewise.
(features): New enumeration Use_shuffle. Delete ReturnPtr.
(pf_su_8_16): New.
(suf_32): New.
(ops): Update entries for Vrev64, Vrev32, Vrev16, Vtr, Vzip, Vuzp.
* config/arm/arm_neon.h: Regenerate.
Co-Authored-By: Julian Brown <julian@codesourcery.com>
From-SVN: r189294
Diffstat (limited to 'gcc/config/arm/arm_neon.h')
-rw-r--r-- | gcc/config/arm/arm_neon.h | 318 |
1 files changed, 186 insertions, 132 deletions
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 0567895912b..b486d57be9c 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -7047,217 +7047,217 @@ vextq_p16 (poly16x8_t __a, poly16x8_t __b, const int __c) __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vrev64_s8 (int8x8_t __a) { - return (int8x8_t)__builtin_neon_vrev64v8qi (__a, 1); + return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vrev64_s16 (int16x4_t __a) { - return (int16x4_t)__builtin_neon_vrev64v4hi (__a, 1); + return (int16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vrev64_s32 (int32x2_t __a) { - return (int32x2_t)__builtin_neon_vrev64v2si (__a, 1); + return (int32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vrev64_f32 (float32x2_t __a) { - return (float32x2_t)__builtin_neon_vrev64v2sf (__a, 3); + return (float32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vrev64_u8 (uint8x8_t __a) { - return (uint8x8_t)__builtin_neon_vrev64v8qi ((int8x8_t) __a, 0); + return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vrev64_u16 (uint16x4_t __a) { - return (uint16x4_t)__builtin_neon_vrev64v4hi ((int16x4_t) __a, 0); + return (uint16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vrev64_u32 (uint32x2_t __a) { - return (uint32x2_t)__builtin_neon_vrev64v2si ((int32x2_t) __a, 0); + return (uint32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vrev64_p8 (poly8x8_t __a) { - return (poly8x8_t)__builtin_neon_vrev64v8qi ((int8x8_t) __a, 2); + return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); } __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vrev64_p16 (poly16x4_t __a) { - return (poly16x4_t)__builtin_neon_vrev64v4hi ((int16x4_t) __a, 2); + return (poly16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vrev64q_s8 (int8x16_t __a) { - return (int8x16_t)__builtin_neon_vrev64v16qi (__a, 1); + return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vrev64q_s16 (int16x8_t __a) { - return (int16x8_t)__builtin_neon_vrev64v8hi (__a, 1); + return (int16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vrev64q_s32 (int32x4_t __a) { - return (int32x4_t)__builtin_neon_vrev64v4si (__a, 1); + return (int32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrev64q_f32 (float32x4_t __a) { - return (float32x4_t)__builtin_neon_vrev64v4sf (__a, 3); + return (float32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vrev64q_u8 (uint8x16_t __a) { - return (uint8x16_t)__builtin_neon_vrev64v16qi ((int8x16_t) __a, 0); + return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vrev64q_u16 (uint16x8_t __a) { - return (uint16x8_t)__builtin_neon_vrev64v8hi ((int16x8_t) __a, 0); + return (uint16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vrev64q_u32 (uint32x4_t __a) { - return (uint32x4_t)__builtin_neon_vrev64v4si ((int32x4_t) __a, 0); + return (uint32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); } __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vrev64q_p8 (poly8x16_t __a) { - return (poly8x16_t)__builtin_neon_vrev64v16qi ((int8x16_t) __a, 2); + return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); } __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vrev64q_p16 (poly16x8_t __a) { - return (poly16x8_t)__builtin_neon_vrev64v8hi ((int16x8_t) __a, 2); + return (poly16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vrev32_s8 (int8x8_t __a) { - return (int8x8_t)__builtin_neon_vrev32v8qi (__a, 1); + return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vrev32_s16 (int16x4_t __a) { - return (int16x4_t)__builtin_neon_vrev32v4hi (__a, 1); + return (int16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vrev32_u8 (uint8x8_t __a) { - return (uint8x8_t)__builtin_neon_vrev32v8qi ((int8x8_t) __a, 0); + return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vrev32_u16 (uint16x4_t __a) { - return (uint16x4_t)__builtin_neon_vrev32v4hi ((int16x4_t) __a, 0); + return (uint16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vrev32_p8 (poly8x8_t __a) { - return (poly8x8_t)__builtin_neon_vrev32v8qi ((int8x8_t) __a, 2); + return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); } __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vrev32_p16 (poly16x4_t __a) { - return (poly16x4_t)__builtin_neon_vrev32v4hi ((int16x4_t) __a, 2); + return (poly16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vrev32q_s8 (int8x16_t __a) { - return (int8x16_t)__builtin_neon_vrev32v16qi (__a, 1); + return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vrev32q_s16 (int16x8_t __a) { - return (int16x8_t)__builtin_neon_vrev32v8hi (__a, 1); + return (int16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vrev32q_u8 (uint8x16_t __a) { - return (uint8x16_t)__builtin_neon_vrev32v16qi ((int8x16_t) __a, 0); + return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vrev32q_u16 (uint16x8_t __a) { - return (uint16x8_t)__builtin_neon_vrev32v8hi ((int16x8_t) __a, 0); + return (uint16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); } __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vrev32q_p8 (poly8x16_t __a) { - return (poly8x16_t)__builtin_neon_vrev32v16qi ((int8x16_t) __a, 2); + return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); } __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vrev32q_p16 (poly16x8_t __a) { - return (poly16x8_t)__builtin_neon_vrev32v8hi ((int16x8_t) __a, 2); + return (poly16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vrev16_s8 (int8x8_t __a) { - return (int8x8_t)__builtin_neon_vrev16v8qi (__a, 1); + return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vrev16_u8 (uint8x8_t __a) { - return (uint8x8_t)__builtin_neon_vrev16v8qi ((int8x8_t) __a, 0); + return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vrev16_p8 (poly8x8_t __a) { - return (poly8x8_t)__builtin_neon_vrev16v8qi ((int8x8_t) __a, 2); + return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vrev16q_s8 (int8x16_t __a) { - return (int8x16_t)__builtin_neon_vrev16v16qi (__a, 1); + return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vrev16q_u8 (uint8x16_t __a) { - return (uint8x16_t)__builtin_neon_vrev16v16qi ((int8x16_t) __a, 0); + return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); } __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vrev16q_p8 (poly8x16_t __a) { - return (poly8x16_t)__builtin_neon_vrev16v16qi ((int8x16_t) __a, 2); + return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) @@ -7396,7 +7396,8 @@ __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) vtrn_s8 (int8x8_t __a, int8x8_t __b) { int8x8x2_t __rv; - __builtin_neon_vtrnv8qi (&__rv.val[0], __a, __b); + __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); return __rv; } @@ -7404,23 +7405,8 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) vtrn_s16 (int16x4_t __a, int16x4_t __b) { int16x4x2_t __rv; - __builtin_neon_vtrnv4hi (&__rv.val[0], __a, __b); - return __rv; -} - -__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) -vtrn_s32 (int32x2_t __a, int32x2_t __b) -{ - int32x2x2_t __rv; - __builtin_neon_vtrnv2si (&__rv.val[0], __a, __b); - return __rv; -} - -__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) -vtrn_f32 (float32x2_t __a, float32x2_t __b) -{ - float32x2x2_t __rv; - __builtin_neon_vtrnv2sf (&__rv.val[0], __a, __b); + __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); return __rv; } @@ -7428,7 +7414,8 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) vtrn_u8 (uint8x8_t __a, uint8x8_t __b) { uint8x8x2_t __rv; - __builtin_neon_vtrnv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b); + __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); return __rv; } @@ -7436,15 +7423,8 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) vtrn_u16 (uint16x4_t __a, uint16x4_t __b) { uint16x4x2_t __rv; - __builtin_neon_vtrnv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b); - return __rv; -} - -__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) -vtrn_u32 (uint32x2_t __a, uint32x2_t __b) -{ - uint32x2x2_t __rv; - __builtin_neon_vtrnv2si ((int32x2_t *) &__rv.val[0], (int32x2_t) __a, (int32x2_t) __b); + __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); return __rv; } @@ -7452,7 +7432,8 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) vtrn_p8 (poly8x8_t __a, poly8x8_t __b) { poly8x8x2_t __rv; - __builtin_neon_vtrnv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b); + __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); return __rv; } @@ -7460,7 +7441,35 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) vtrn_p16 (poly16x4_t __a, poly16x4_t __b) { poly16x4x2_t __rv; - __builtin_neon_vtrnv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b); + __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); + return __rv; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vtrn_s32 (int32x2_t __a, int32x2_t __b) +{ + int32x2x2_t __rv; + __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); + return __rv; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vtrn_f32 (float32x2_t __a, float32x2_t __b) +{ + float32x2x2_t __rv; + __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); + return __rv; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vtrn_u32 (uint32x2_t __a, uint32x2_t __b) +{ + uint32x2x2_t __rv; + __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); return __rv; } @@ -7468,7 +7477,8 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) vtrnq_s8 (int8x16_t __a, int8x16_t __b) { int8x16x2_t __rv; - __builtin_neon_vtrnv16qi (&__rv.val[0], __a, __b); + __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); + __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); return __rv; } @@ -7476,7 +7486,8 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) vtrnq_s16 (int16x8_t __a, int16x8_t __b) { int16x8x2_t __rv; - __builtin_neon_vtrnv8hi (&__rv.val[0], __a, __b); + __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); return __rv; } @@ -7484,7 +7495,8 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) vtrnq_s32 (int32x4_t __a, int32x4_t __b) { int32x4x2_t __rv; - __builtin_neon_vtrnv4si (&__rv.val[0], __a, __b); + __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); return __rv; } @@ -7492,7 +7504,8 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) vtrnq_f32 (float32x4_t __a, float32x4_t __b) { float32x4x2_t __rv; - __builtin_neon_vtrnv4sf (&__rv.val[0], __a, __b); + __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); return __rv; } @@ -7500,7 +7513,8 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) vtrnq_u8 (uint8x16_t __a, uint8x16_t __b) { uint8x16x2_t __rv; - __builtin_neon_vtrnv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b); + __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); + __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); return __rv; } @@ -7508,7 +7522,8 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) vtrnq_u16 (uint16x8_t __a, uint16x8_t __b) { uint16x8x2_t __rv; - __builtin_neon_vtrnv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b); + __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); return __rv; } @@ -7516,7 +7531,8 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) vtrnq_u32 (uint32x4_t __a, uint32x4_t __b) { uint32x4x2_t __rv; - __builtin_neon_vtrnv4si ((int32x4_t *) &__rv.val[0], (int32x4_t) __a, (int32x4_t) __b); + __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); return __rv; } @@ -7524,7 +7540,8 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) vtrnq_p8 (poly8x16_t __a, poly8x16_t __b) { poly8x16x2_t __rv; - __builtin_neon_vtrnv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b); + __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); + __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); return __rv; } @@ -7532,7 +7549,8 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) vtrnq_p16 (poly16x8_t __a, poly16x8_t __b) { poly16x8x2_t __rv; - __builtin_neon_vtrnv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b); + __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); return __rv; } @@ -7540,7 +7558,8 @@ __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) vzip_s8 (int8x8_t __a, int8x8_t __b) { int8x8x2_t __rv; - __builtin_neon_vzipv8qi (&__rv.val[0], __a, __b); + __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); return __rv; } @@ -7548,23 +7567,8 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) vzip_s16 (int16x4_t __a, int16x4_t __b) { int16x4x2_t __rv; - __builtin_neon_vzipv4hi (&__rv.val[0], __a, __b); - return __rv; -} - -__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) -vzip_s32 (int32x2_t __a, int32x2_t __b) -{ - int32x2x2_t __rv; - __builtin_neon_vzipv2si (&__rv.val[0], __a, __b); - return __rv; -} - -__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) -vzip_f32 (float32x2_t __a, float32x2_t __b) -{ - float32x2x2_t __rv; - __builtin_neon_vzipv2sf (&__rv.val[0], __a, __b); + __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); return __rv; } @@ -7572,7 +7576,8 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) vzip_u8 (uint8x8_t __a, uint8x8_t __b) { uint8x8x2_t __rv; - __builtin_neon_vzipv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b); + __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); return __rv; } @@ -7580,15 +7585,8 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) vzip_u16 (uint16x4_t __a, uint16x4_t __b) { uint16x4x2_t __rv; - __builtin_neon_vzipv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b); - return __rv; -} - -__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) -vzip_u32 (uint32x2_t __a, uint32x2_t __b) -{ - uint32x2x2_t __rv; - __builtin_neon_vzipv2si ((int32x2_t *) &__rv.val[0], (int32x2_t) __a, (int32x2_t) __b); + __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); return __rv; } @@ -7596,7 +7594,8 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) vzip_p8 (poly8x8_t __a, poly8x8_t __b) { poly8x8x2_t __rv; - __builtin_neon_vzipv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b); + __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); return __rv; } @@ -7604,7 +7603,35 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) vzip_p16 (poly16x4_t __a, poly16x4_t __b) { poly16x4x2_t __rv; - __builtin_neon_vzipv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b); + __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); + return __rv; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vzip_s32 (int32x2_t __a, int32x2_t __b) +{ + int32x2x2_t __rv; + __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); + return __rv; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vzip_f32 (float32x2_t __a, float32x2_t __b) +{ + float32x2x2_t __rv; + __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); + return __rv; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vzip_u32 (uint32x2_t __a, uint32x2_t __b) +{ + uint32x2x2_t __rv; + __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); return __rv; } @@ -7612,7 +7639,8 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) vzipq_s8 (int8x16_t __a, int8x16_t __b) { int8x16x2_t __rv; - __builtin_neon_vzipv16qi (&__rv.val[0], __a, __b); + __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); + __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); return __rv; } @@ -7620,7 +7648,8 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) vzipq_s16 (int16x8_t __a, int16x8_t __b) { int16x8x2_t __rv; - __builtin_neon_vzipv8hi (&__rv.val[0], __a, __b); + __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); return __rv; } @@ -7628,7 +7657,8 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) vzipq_s32 (int32x4_t __a, int32x4_t __b) { int32x4x2_t __rv; - __builtin_neon_vzipv4si (&__rv.val[0], __a, __b); + __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); return __rv; } @@ -7636,7 +7666,8 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) vzipq_f32 (float32x4_t __a, float32x4_t __b) { float32x4x2_t __rv; - __builtin_neon_vzipv4sf (&__rv.val[0], __a, __b); + __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); return __rv; } @@ -7644,7 +7675,8 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) vzipq_u8 (uint8x16_t __a, uint8x16_t __b) { uint8x16x2_t __rv; - __builtin_neon_vzipv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b); + __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); + __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); return __rv; } @@ -7652,7 +7684,8 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) vzipq_u16 (uint16x8_t __a, uint16x8_t __b) { uint16x8x2_t __rv; - __builtin_neon_vzipv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b); + __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); return __rv; } @@ -7660,7 +7693,8 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) vzipq_u32 (uint32x4_t __a, uint32x4_t __b) { uint32x4x2_t __rv; - __builtin_neon_vzipv4si ((int32x4_t *) &__rv.val[0], (int32x4_t) __a, (int32x4_t) __b); + __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); return __rv; } @@ -7668,7 +7702,8 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) vzipq_p8 (poly8x16_t __a, poly8x16_t __b) { poly8x16x2_t __rv; - __builtin_neon_vzipv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b); + __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); + __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); return __rv; } @@ -7676,7 +7711,8 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) vzipq_p16 (poly16x8_t __a, poly16x8_t __b) { poly16x8x2_t __rv; - __builtin_neon_vzipv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b); + __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); return __rv; } @@ -7684,7 +7720,8 @@ __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) vuzp_s8 (int8x8_t __a, int8x8_t __b) { int8x8x2_t __rv; - __builtin_neon_vuzpv8qi (&__rv.val[0], __a, __b); + __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); return __rv; } @@ -7692,7 +7729,8 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) vuzp_s16 (int16x4_t __a, int16x4_t __b) { int16x4x2_t __rv; - __builtin_neon_vuzpv4hi (&__rv.val[0], __a, __b); + __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); return __rv; } @@ -7700,7 +7738,8 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) vuzp_s32 (int32x2_t __a, int32x2_t __b) { int32x2x2_t __rv; - __builtin_neon_vuzpv2si (&__rv.val[0], __a, __b); + __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); return __rv; } @@ -7708,7 +7747,8 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vuzp_f32 (float32x2_t __a, float32x2_t __b) { float32x2x2_t __rv; - __builtin_neon_vuzpv2sf (&__rv.val[0], __a, __b); + __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); return __rv; } @@ -7716,7 +7756,8 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) vuzp_u8 (uint8x8_t __a, uint8x8_t __b) { uint8x8x2_t __rv; - __builtin_neon_vuzpv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b); + __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); return __rv; } @@ -7724,7 +7765,8 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) vuzp_u16 (uint16x4_t __a, uint16x4_t __b) { uint16x4x2_t __rv; - __builtin_neon_vuzpv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b); + __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); return __rv; } @@ -7732,7 +7774,8 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) vuzp_u32 (uint32x2_t __a, uint32x2_t __b) { uint32x2x2_t __rv; - __builtin_neon_vuzpv2si ((int32x2_t *) &__rv.val[0], (int32x2_t) __a, (int32x2_t) __b); + __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); return __rv; } @@ -7740,7 +7783,8 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) vuzp_p8 (poly8x8_t __a, poly8x8_t __b) { poly8x8x2_t __rv; - __builtin_neon_vuzpv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b); + __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); return __rv; } @@ -7748,7 +7792,8 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) vuzp_p16 (poly16x4_t __a, poly16x4_t __b) { poly16x4x2_t __rv; - __builtin_neon_vuzpv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b); + __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); return __rv; } @@ -7756,7 +7801,8 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) vuzpq_s8 (int8x16_t __a, int8x16_t __b) { int8x16x2_t __rv; - __builtin_neon_vuzpv16qi (&__rv.val[0], __a, __b); + __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); + __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); return __rv; } @@ -7764,7 +7810,8 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) vuzpq_s16 (int16x8_t __a, int16x8_t __b) { int16x8x2_t __rv; - __builtin_neon_vuzpv8hi (&__rv.val[0], __a, __b); + __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); return __rv; } @@ -7772,7 +7819,8 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) vuzpq_s32 (int32x4_t __a, int32x4_t __b) { int32x4x2_t __rv; - __builtin_neon_vuzpv4si (&__rv.val[0], __a, __b); + __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); return __rv; } @@ -7780,7 +7828,8 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) vuzpq_f32 (float32x4_t __a, float32x4_t __b) { float32x4x2_t __rv; - __builtin_neon_vuzpv4sf (&__rv.val[0], __a, __b); + __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); return __rv; } @@ -7788,7 +7837,8 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) vuzpq_u8 (uint8x16_t __a, uint8x16_t __b) { uint8x16x2_t __rv; - __builtin_neon_vuzpv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b); + __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); + __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); return __rv; } @@ -7796,7 +7846,8 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) vuzpq_u16 (uint16x8_t __a, uint16x8_t __b) { uint16x8x2_t __rv; - __builtin_neon_vuzpv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b); + __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); return __rv; } @@ -7804,7 +7855,8 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) vuzpq_u32 (uint32x4_t __a, uint32x4_t __b) { uint32x4x2_t __rv; - __builtin_neon_vuzpv4si ((int32x4_t *) &__rv.val[0], (int32x4_t) __a, (int32x4_t) __b); + __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); return __rv; } @@ -7812,7 +7864,8 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) vuzpq_p8 (poly8x16_t __a, poly8x16_t __b) { poly8x16x2_t __rv; - __builtin_neon_vuzpv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b); + __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); + __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); return __rv; } @@ -7820,7 +7873,8 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) vuzpq_p16 (poly16x8_t __a, poly16x8_t __b) { poly16x8x2_t __rv; - __builtin_neon_vuzpv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b); + __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); return __rv; } |