summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorYvan Roux <yvan.roux@linaro.org>2015-12-07 11:10:13 +0100
committerLinaro Code Review <review@review.linaro.org>2015-12-10 08:57:55 +0000
commit7f7da9776a2100eb6e64c142732c89ecc719d45d (patch)
tree1816dfab68bb947ceddcd67805d27d0108c98a0f /gcc/config
parent00196009c167a037d02c42708a1bdb70de9dbb78 (diff)
downloadgcc-7f7da9776a2100eb6e64c142732c89ecc719d45d.tar.gz
gcc/
Backport from trunk r230142. 2015-11-11 Charles Baylis <charles.baylis@linaro.org> PR target/63870 * config/arm/arm-builtins.c (enum arm_type_qualifiers): New enumerator qualifier_struct_load_store_lane_index. (builtin_arg): New enumerator NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (arm_expand_neon_args): New parameter. Remove ellipsis. Handle NEON argument qualifiers. (arm_expand_neon_builtin): Handle new NEON argument qualifier. * config/arm/arm.h (NEON_ENDIAN_LANE_N): New macro. gcc/ Backport from trunk r230143. 2015-11-11 Charles Baylis <charles.baylis@linaro.org> PR target/63870 * config/arm/arm-builtins.c: (arm_load1_qualifiers) Use qualifier_struct_load_store_lane_index. (arm_storestruct_lane_qualifiers) Likewise. * config/arm/neon.md: (neon_vld1_lane<mode>) Reverse lane numbers for big-endian. (neon_vst1_lane<mode>) Likewise. (neon_vld2_lane<mode>) Likewise. (neon_vst2_lane<mode>) Likewise. (neon_vld3_lane<mode>) Likewise. (neon_vst3_lane<mode>) Likewise. (neon_vld4_lane<mode>) Likewise. (neon_vst4_lane<mode>) Likewise. gcc/ Backport from trunk r230144. 2015-11-11 Charles Baylis <charles.baylis@linaro.org> PR target/63870 * config/arm/neon.md (neon_vld1_lane<mode>): Remove error for invalid lane number. (neon_vst1_lane<mode>): Likewise. (neon_vld2_lane<mode>): Likewise. (neon_vst2_lane<mode>): Likewise. (neon_vld3_lane<mode>): Likewise. (neon_vst3_lane<mode>): Likewise. (neon_vld4_lane<mode>): Likewise. (neon_vst4_lane<mode>): Likewise. gcc/ Backport from trunk r230203. 2015-11-12 Charles Baylis <charles.baylis@linaro.org> * config/arm/neon.md: (neon_vld2_lane<mode>): Remove unused max variable. (neon_vst2_lane<mode>): Likewise. (neon_vld3_lane<mode>): Likewise. (neon_vst3_lane<mode>): Likewise. (neon_vld4_lane<mode>): Likewise. (neon_vst4_lane<mode>): Likewise. gcc/testsuite/ Backport from trunk r231077. 2015-11-30 Charles Baylis <charles.baylis@linaro.org> PR target/63870 * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f16_indices_1.c (f_vld2_lane_f16): Remove xfails for arm targets. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f32_indices_1.c (f_vld2_lane_f32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f64_indices_1.c (f_vld2_lane_f64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_p8_indices_1.c (f_vld2_lane_p8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s16_indices_1.c (f_vld2_lane_s16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s32_indices_1.c (f_vld2_lane_s32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s64_indices_1.c (f_vld2_lane_s64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s8_indices_1.c (f_vld2_lane_s8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u16_indices_1.c (f_vld2_lane_u16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u32_indices_1.c (f_vld2_lane_u32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u64_indices_1.c (f_vld2_lane_u64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u8_indices_1.c (f_vld2_lane_u8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f16_indices_1.c (f_vld2q_lane_f16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f32_indices_1.c (f_vld2q_lane_f32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f64_indices_1.c (f_vld2q_lane_f64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_p8_indices_1.c (f_vld2q_lane_p8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s16_indices_1.c (f_vld2q_lane_s16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s32_indices_1.c (f_vld2q_lane_s32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s64_indices_1.c (f_vld2q_lane_s64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s8_indices_1.c (f_vld2q_lane_s8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u16_indices_1.c (f_vld2q_lane_u16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u32_indices_1.c (f_vld2q_lane_u32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u64_indices_1.c (f_vld2q_lane_u64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u8_indices_1.c (f_vld2q_lane_u8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f16_indices_1.c (f_vld3_lane_f16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f32_indices_1.c (f_vld3_lane_f32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f64_indices_1.c (f_vld3_lane_f64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_p8_indices_1.c (f_vld3_lane_p8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s16_indices_1.c (f_vld3_lane_s16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s32_indices_1.c (f_vld3_lane_s32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s64_indices_1.c (f_vld3_lane_s64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s8_indices_1.c (f_vld3_lane_s8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u16_indices_1.c (f_vld3_lane_u16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u32_indices_1.c (f_vld3_lane_u32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u64_indices_1.c (f_vld3_lane_u64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u8_indices_1.c (f_vld3_lane_u8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f16_indices_1.c (f_vld3q_lane_f16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f32_indices_1.c (f_vld3q_lane_f32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f64_indices_1.c (f_vld3q_lane_f64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_p8_indices_1.c (f_vld3q_lane_p8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s16_indices_1.c (f_vld3q_lane_s16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s32_indices_1.c (f_vld3q_lane_s32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s64_indices_1.c (f_vld3q_lane_s64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s8_indices_1.c (f_vld3q_lane_s8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u16_indices_1.c (f_vld3q_lane_u16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u32_indices_1.c (f_vld3q_lane_u32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u64_indices_1.c (f_vld3q_lane_u64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u8_indices_1.c (f_vld3q_lane_u8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f16_indices_1.c (f_vld4_lane_f16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f32_indices_1.c (f_vld4_lane_f32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f64_indices_1.c (f_vld4_lane_f64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_p8_indices_1.c (f_vld4_lane_p8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s16_indices_1.c (f_vld4_lane_s16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s32_indices_1.c (f_vld4_lane_s32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s64_indices_1.c (f_vld4_lane_s64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s8_indices_1.c (f_vld4_lane_s8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u16_indices_1.c (f_vld4_lane_u16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u32_indices_1.c (f_vld4_lane_u32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u64_indices_1.c (f_vld4_lane_u64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u8_indices_1.c (f_vld4_lane_u8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f16_indices_1.c (f_vld4q_lane_f16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f32_indices_1.c (f_vld4q_lane_f32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f64_indices_1.c (f_vld4q_lane_f64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_p8_indices_1.c (f_vld4q_lane_p8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s16_indices_1.c (f_vld4q_lane_s16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s32_indices_1.c (f_vld4q_lane_s32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s64_indices_1.c (f_vld4q_lane_s64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s8_indices_1.c (f_vld4q_lane_s8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u16_indices_1.c (f_vld4q_lane_u16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u32_indices_1.c (f_vld4q_lane_u32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u64_indices_1.c (f_vld4q_lane_u64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u8_indices_1.c (f_vld4q_lane_u8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f16_indices_1.c (f_vst2_lane_f16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f32_indices_1.c (f_vst2_lane_f32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f64_indices_1.c (f_vst2_lane_f64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_p8_indices_1.c (f_vst2_lane_p8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s16_indices_1.c (f_vst2_lane_s16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s32_indices_1.c (f_vst2_lane_s32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s64_indices_1.c (f_vst2_lane_s64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s8_indices_1.c (f_vst2_lane_s8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u16_indices_1.c (f_vst2_lane_u16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u32_indices_1.c (f_vst2_lane_u32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u64_indices_1.c (f_vst2_lane_u64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u8_indices_1.c (f_vst2_lane_u8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f16_indices_1.c (f_vst2q_lane_f16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f32_indices_1.c (f_vst2q_lane_f32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f64_indices_1.c (f_vst2q_lane_f64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_p8_indices_1.c (f_vst2q_lane_p8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s16_indices_1.c (f_vst2q_lane_s16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s32_indices_1.c (f_vst2q_lane_s32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s64_indices_1.c (f_vst2q_lane_s64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s8_indices_1.c (f_vst2q_lane_s8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u16_indices_1.c (f_vst2q_lane_u16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u32_indices_1.c (f_vst2q_lane_u32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u64_indices_1.c (f_vst2q_lane_u64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u8_indices_1.c (f_vst2q_lane_u8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f16_indices_1.c (f_vst3_lane_f16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f32_indices_1.c (f_vst3_lane_f32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f64_indices_1.c (f_vst3_lane_f64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_p8_indices_1.c (f_vst3_lane_p8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s16_indices_1.c (f_vst3_lane_s16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s32_indices_1.c (f_vst3_lane_s32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s64_indices_1.c (f_vst3_lane_s64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s8_indices_1.c (f_vst3_lane_s8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u16_indices_1.c (f_vst3_lane_u16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u32_indices_1.c (f_vst3_lane_u32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u64_indices_1.c (f_vst3_lane_u64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u8_indices_1.c (f_vst3_lane_u8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f16_indices_1.c (f_vst3q_lane_f16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f32_indices_1.c (f_vst3q_lane_f32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f64_indices_1.c (f_vst3q_lane_f64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_p8_indices_1.c (f_vst3q_lane_p8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s16_indices_1.c (f_vst3q_lane_s16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s32_indices_1.c (f_vst3q_lane_s32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s64_indices_1.c (f_vst3q_lane_s64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s8_indices_1.c (f_vst3q_lane_s8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u16_indices_1.c (f_vst3q_lane_u16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u32_indices_1.c (f_vst3q_lane_u32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u64_indices_1.c (f_vst3q_lane_u64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u8_indices_1.c (f_vst3q_lane_u8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f16_indices_1.c (f_vst4_lane_f16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f32_indices_1.c (f_vst4_lane_f32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f64_indices_1.c (f_vst4_lane_f64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_p8_indices_1.c (f_vst4_lane_p8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s16_indices_1.c (f_vst4_lane_s16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s32_indices_1.c (f_vst4_lane_s32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s64_indices_1.c (f_vst4_lane_s64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s8_indices_1.c (f_vst4_lane_s8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u16_indices_1.c (f_vst4_lane_u16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u32_indices_1.c (f_vst4_lane_u32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u64_indices_1.c (f_vst4_lane_u64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u8_indices_1.c (f_vst4_lane_u8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f16_indices_1.c (f_vst4q_lane_f16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f32_indices_1.c (f_vst4q_lane_f32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f64_indices_1.c (f_vst4q_lane_f64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_p8_indices_1.c (f_vst4q_lane_p8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s16_indices_1.c (f_vst4q_lane_s16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s32_indices_1.c (f_vst4q_lane_s32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s64_indices_1.c (f_vst4q_lane_s64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s8_indices_1.c (f_vst4q_lane_s8): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u16_indices_1.c (f_vst4q_lane_u16): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u32_indices_1.c (f_vst4q_lane_u32): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u64_indices_1.c (f_vst4q_lane_u64): Ditto. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u8_indices_1.c (f_vst4q_lane_u8): Ditto. Change-Id: I87e2d302ba2db3e8d7ad6097de86710ba36f22fb
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/arm/arm-builtins.c52
-rw-r--r--gcc/config/arm/arm.c1
-rw-r--r--gcc/config/arm/arm.h6
-rw-r--r--gcc/config/arm/neon.md136
4 files changed, 106 insertions, 89 deletions
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 8b26cb6d35c..e961ee8b424 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -91,7 +91,9 @@ enum arm_type_qualifiers
/* Polynomial types. */
qualifier_poly = 0x100,
/* Lane indices - must be within range of previous argument = a vector. */
- qualifier_lane_index = 0x200
+ qualifier_lane_index = 0x200,
+ /* Lane indices for single lane structure loads and stores. */
+ qualifier_struct_load_store_lane_index = 0x400
};
/* The qualifier_internal allows generation of a unary builtin from
@@ -174,7 +176,7 @@ arm_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
static enum arm_type_qualifiers
arm_load1_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_const_pointer_map_mode,
- qualifier_none, qualifier_immediate };
+ qualifier_none, qualifier_struct_load_store_lane_index };
#define LOAD1LANE_QUALIFIERS (arm_load1_lane_qualifiers)
/* The first argument (return type) of a store should be void type,
@@ -193,7 +195,7 @@ arm_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
static enum arm_type_qualifiers
arm_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_void, qualifier_pointer_map_mode,
- qualifier_none, qualifier_immediate };
+ qualifier_none, qualifier_struct_load_store_lane_index };
#define STORE1LANE_QUALIFIERS (arm_storestruct_lane_qualifiers)
#define v8qi_UP V8QImode
@@ -1987,6 +1989,7 @@ typedef enum {
NEON_ARG_COPY_TO_REG,
NEON_ARG_CONSTANT,
NEON_ARG_LANE_INDEX,
+ NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
NEON_ARG_MEMORY,
NEON_ARG_STOP
} builtin_arg;
@@ -2044,9 +2047,9 @@ neon_dereference_pointer (tree exp, tree type, machine_mode mem_mode,
/* Expand a Neon builtin. */
static rtx
arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
- int icode, int have_retval, tree exp, ...)
+ int icode, int have_retval, tree exp,
+ builtin_arg *args)
{
- va_list ap;
rtx pat;
tree arg[SIMD_MAX_BUILTIN_ARGS];
rtx op[SIMD_MAX_BUILTIN_ARGS];
@@ -2061,13 +2064,11 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
|| !(*insn_data[icode].operand[0].predicate) (target, tmode)))
target = gen_reg_rtx (tmode);
- va_start (ap, exp);
-
formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
for (;;)
{
- builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
+ builtin_arg thisarg = args[argc];
if (thisarg == NEON_ARG_STOP)
break;
@@ -2103,6 +2104,18 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
break;
+ case NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX:
+ gcc_assert (argc > 1);
+ if (CONST_INT_P (op[argc]))
+ {
+ neon_lane_bounds (op[argc], 0,
+ GET_MODE_NUNITS (map_mode), exp);
+ /* Keep to GCC-vector-extension lane indices in the RTL. */
+ op[argc] =
+ GEN_INT (NEON_ENDIAN_LANE_N (map_mode, INTVAL (op[argc])));
+ }
+ goto constant_arg;
+
case NEON_ARG_LANE_INDEX:
/* Previous argument must be a vector, which this indexes. */
gcc_assert (argc > 0);
@@ -2113,19 +2126,22 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
}
/* Fall through - if the lane index isn't a constant then
the next case will error. */
+
case NEON_ARG_CONSTANT:
+constant_arg:
if (!(*insn_data[icode].operand[opno].predicate)
(op[argc], mode[argc]))
- error_at (EXPR_LOCATION (exp), "incompatible type for argument %d, "
- "expected %<const int%>", argc + 1);
+ {
+ error ("%Kargument %d must be a constant immediate",
+ exp, argc + 1);
+ return const0_rtx;
+ }
break;
+
case NEON_ARG_MEMORY:
/* Check if expand failed. */
if (op[argc] == const0_rtx)
- {
- va_end (ap);
return 0;
- }
gcc_assert (MEM_P (op[argc]));
PUT_MODE (op[argc], mode[argc]);
/* ??? arm_neon.h uses the same built-in functions for signed
@@ -2146,8 +2162,6 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
}
}
- va_end (ap);
-
if (have_retval)
switch (argc)
{
@@ -2259,6 +2273,8 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
args[k] = NEON_ARG_LANE_INDEX;
+ else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
+ args[k] = NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
args[k] = NEON_ARG_CONSTANT;
else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
@@ -2284,11 +2300,7 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
the function is void, and a 1 if it is not. */
return arm_expand_neon_args
(target, d->mode, fcode, icode, !is_void, exp,
- args[1],
- args[2],
- args[3],
- args[4],
- NEON_ARG_STOP);
+ &args[1]);
}
/* Expand an expression EXP that calls a built-in function,
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index eed3bf96582..bfd664b1537 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -29814,4 +29814,5 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
*pri = tmp;
return;
}
+
#include "gt-arm.h"
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 64f0aae4e17..0cef4474f6f 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -285,6 +285,12 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
#define TARGET_BPABI false
#endif
+/* Transform lane numbers on big endian targets. This is used to allow for the
+ endianness difference between NEON architectural lane numbers and those
+ used in RTL */
+#define NEON_ENDIAN_LANE_N(mode, n) \
+ (BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 - n : n)
+
/* Support for a compile-time default CPU, et cetera. The rules are:
--with-arch is ignored if -march or -mcpu are specified.
--with-cpu is ignored if -march or -mcpu are specified, and is overridden
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index e5a2b0f1c9a..62fb6daae99 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -4253,6 +4253,9 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_load1_1reg<q>")]
)
+;; The lane numbers in the RTL are in GCC lane order, having been flipped
+;; in arm_expand_neon_args. The lane numbers are restored to architectural
+;; lane order here.
(define_insn "neon_vld1_lane<mode>"
[(set (match_operand:VDX 0 "s_register_operand" "=w")
(unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
@@ -4261,10 +4264,9 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VLD1_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[3]);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
- if (lane < 0 || lane >= max)
- error ("lane out of range");
+ operands[3] = GEN_INT (lane);
if (max == 1)
return "vld1.<V_sz_elem>\t%P0, %A1";
else
@@ -4273,6 +4275,8 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_load1_one_lane<q>")]
)
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vld1_lane<mode>"
[(set (match_operand:VQX 0 "s_register_operand" "=w")
(unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
@@ -4281,12 +4285,11 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VLD1_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[3]);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+ operands[3] = GEN_INT (lane);
int regno = REGNO (operands[0]);
- if (lane < 0 || lane >= max)
- error ("lane out of range");
- else if (lane >= max / 2)
+ if (lane >= max / 2)
{
lane -= max / 2;
regno += 2;
@@ -4359,6 +4362,8 @@ if (BYTES_BIG_ENDIAN)
"vst1.<V_sz_elem>\t%h1, %A0"
[(set_attr "type" "neon_store1_1reg<q>")])
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vst1_lane<mode>"
[(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
(unspec:<V_elem>
@@ -4367,10 +4372,9 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VST1_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[2]);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
- if (lane < 0 || lane >= max)
- error ("lane out of range");
+ operands[2] = GEN_INT (lane);
if (max == 1)
return "vst1.<V_sz_elem>\t{%P1}, %A0";
else
@@ -4379,6 +4383,8 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_store1_one_lane<q>")]
)
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vst1_lane<mode>"
[(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
(unspec:<V_elem>
@@ -4387,17 +4393,15 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VST1_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[2]);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
int regno = REGNO (operands[1]);
- if (lane < 0 || lane >= max)
- error ("lane out of range");
- else if (lane >= max / 2)
+ if (lane >= max / 2)
{
lane -= max / 2;
regno += 2;
- operands[2] = GEN_INT (lane);
}
+ operands[2] = GEN_INT (lane);
operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
if (max == 2)
return "vst1.<V_sz_elem>\t{%P1}, %A0";
@@ -4448,6 +4452,8 @@ if (BYTES_BIG_ENDIAN)
"vld2.<V_sz_elem>\t%h0, %A1"
[(set_attr "type" "neon_load2_2reg_q")])
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vld2_lane<mode>"
[(set (match_operand:TI 0 "s_register_operand" "=w")
(unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
@@ -4457,22 +4463,21 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VLD2_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[3]);
- HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
int regno = REGNO (operands[0]);
rtx ops[4];
- if (lane < 0 || lane >= max)
- error ("lane out of range");
ops[0] = gen_rtx_REG (DImode, regno);
ops[1] = gen_rtx_REG (DImode, regno + 2);
ops[2] = operands[1];
- ops[3] = operands[3];
+ ops[3] = GEN_INT (lane);
output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
return "";
}
[(set_attr "type" "neon_load2_one_lane<q>")]
)
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vld2_lane<mode>"
[(set (match_operand:OI 0 "s_register_operand" "=w")
(unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
@@ -4482,13 +4487,11 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VLD2_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[3]);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
int regno = REGNO (operands[0]);
rtx ops[4];
- if (lane < 0 || lane >= max)
- error ("lane out of range");
- else if (lane >= max / 2)
+ if (lane >= max / 2)
{
lane -= max / 2;
regno += 2;
@@ -4563,6 +4566,8 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_store2_4reg<q>")]
)
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vst2_lane<mode>"
[(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
(unspec:<V_two_elem>
@@ -4572,22 +4577,21 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VST2_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[2]);
- HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
int regno = REGNO (operands[1]);
rtx ops[4];
- if (lane < 0 || lane >= max)
- error ("lane out of range");
ops[0] = operands[0];
ops[1] = gen_rtx_REG (DImode, regno);
ops[2] = gen_rtx_REG (DImode, regno + 2);
- ops[3] = operands[2];
+ ops[3] = GEN_INT (lane);
output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
return "";
}
[(set_attr "type" "neon_store2_one_lane<q>")]
)
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vst2_lane<mode>"
[(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
(unspec:<V_two_elem>
@@ -4597,13 +4601,11 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VST2_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[2]);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
int regno = REGNO (operands[1]);
rtx ops[4];
- if (lane < 0 || lane >= max)
- error ("lane out of range");
- else if (lane >= max / 2)
+ if (lane >= max / 2)
{
lane -= max / 2;
regno += 2;
@@ -4707,6 +4709,8 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_load3_3reg<q>")]
)
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vld3_lane<mode>"
[(set (match_operand:EI 0 "s_register_operand" "=w")
(unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
@@ -4716,17 +4720,14 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VLD3_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[3]);
- HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
int regno = REGNO (operands[0]);
rtx ops[5];
- if (lane < 0 || lane >= max)
- error ("lane out of range");
ops[0] = gen_rtx_REG (DImode, regno);
ops[1] = gen_rtx_REG (DImode, regno + 2);
ops[2] = gen_rtx_REG (DImode, regno + 4);
ops[3] = operands[1];
- ops[4] = operands[3];
+ ops[4] = GEN_INT (lane);
output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
ops);
return "";
@@ -4734,6 +4735,8 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_load3_one_lane<q>")]
)
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vld3_lane<mode>"
[(set (match_operand:CI 0 "s_register_operand" "=w")
(unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
@@ -4743,13 +4746,11 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VLD3_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[3]);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
int regno = REGNO (operands[0]);
rtx ops[5];
- if (lane < 0 || lane >= max)
- error ("lane out of range");
- else if (lane >= max / 2)
+ if (lane >= max / 2)
{
lane -= max / 2;
regno += 2;
@@ -4879,6 +4880,8 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_store3_3reg<q>")]
)
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vst3_lane<mode>"
[(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
(unspec:<V_three_elem>
@@ -4888,17 +4891,14 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VST3_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[2]);
- HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
int regno = REGNO (operands[1]);
rtx ops[5];
- if (lane < 0 || lane >= max)
- error ("lane out of range");
ops[0] = operands[0];
ops[1] = gen_rtx_REG (DImode, regno);
ops[2] = gen_rtx_REG (DImode, regno + 2);
ops[3] = gen_rtx_REG (DImode, regno + 4);
- ops[4] = operands[2];
+ ops[4] = GEN_INT (lane);
output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
ops);
return "";
@@ -4906,6 +4906,8 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_store3_one_lane<q>")]
)
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vst3_lane<mode>"
[(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
(unspec:<V_three_elem>
@@ -4915,13 +4917,11 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VST3_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[2]);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
int regno = REGNO (operands[1]);
rtx ops[5];
- if (lane < 0 || lane >= max)
- error ("lane out of range");
- else if (lane >= max / 2)
+ if (lane >= max / 2)
{
lane -= max / 2;
regno += 2;
@@ -5029,6 +5029,8 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_load4_4reg<q>")]
)
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vld4_lane<mode>"
[(set (match_operand:OI 0 "s_register_operand" "=w")
(unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
@@ -5038,18 +5040,15 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VLD4_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[3]);
- HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
int regno = REGNO (operands[0]);
rtx ops[6];
- if (lane < 0 || lane >= max)
- error ("lane out of range");
ops[0] = gen_rtx_REG (DImode, regno);
ops[1] = gen_rtx_REG (DImode, regno + 2);
ops[2] = gen_rtx_REG (DImode, regno + 4);
ops[3] = gen_rtx_REG (DImode, regno + 6);
ops[4] = operands[1];
- ops[5] = operands[3];
+ ops[5] = GEN_INT (lane);
output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
ops);
return "";
@@ -5057,6 +5056,8 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_load4_one_lane<q>")]
)
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vld4_lane<mode>"
[(set (match_operand:XI 0 "s_register_operand" "=w")
(unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
@@ -5066,13 +5067,11 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VLD4_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[3]);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
int regno = REGNO (operands[0]);
rtx ops[6];
- if (lane < 0 || lane >= max)
- error ("lane out of range");
- else if (lane >= max / 2)
+ if (lane >= max / 2)
{
lane -= max / 2;
regno += 2;
@@ -5209,6 +5208,8 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_store4_4reg<q>")]
)
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vst4_lane<mode>"
[(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
(unspec:<V_four_elem>
@@ -5218,18 +5219,15 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VST4_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[2]);
- HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
int regno = REGNO (operands[1]);
rtx ops[6];
- if (lane < 0 || lane >= max)
- error ("lane out of range");
ops[0] = operands[0];
ops[1] = gen_rtx_REG (DImode, regno);
ops[2] = gen_rtx_REG (DImode, regno + 2);
ops[3] = gen_rtx_REG (DImode, regno + 4);
ops[4] = gen_rtx_REG (DImode, regno + 6);
- ops[5] = operands[2];
+ ops[5] = GEN_INT (lane);
output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
ops);
return "";
@@ -5237,6 +5235,8 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_store4_one_lane<q>")]
)
+;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
+;; here on big endian targets.
(define_insn "neon_vst4_lane<mode>"
[(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
(unspec:<V_four_elem>
@@ -5246,13 +5246,11 @@ if (BYTES_BIG_ENDIAN)
UNSPEC_VST4_LANE))]
"TARGET_NEON"
{
- HOST_WIDE_INT lane = INTVAL (operands[2]);
+ HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
int regno = REGNO (operands[1]);
rtx ops[6];
- if (lane < 0 || lane >= max)
- error ("lane out of range");
- else if (lane >= max / 2)
+ if (lane >= max / 2)
{
lane -= max / 2;
regno += 2;