summaryrefslogtreecommitdiff
path: root/gcc/testsuite/gcc.target/aarch64
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2017-11-20 16:02:55 +0000
committerRichard Sandiford <richard.sandiford@linaro.org>2017-11-20 16:02:55 +0000
commitd58952aefb03632bbb5b441d5c0bd330711f0af1 (patch)
treed046e56bfbd6a40106ae6ab96fafc954f1dfc955 /gcc/testsuite/gcc.target/aarch64
parent648f8fc59b2cc39abd24f4c22388b346cdebcc31 (diff)
parent50221fae802a10fafe95e61d40504a58da33e98f (diff)
downloadgcc-linaro-dev/sve.tar.gz
Merge trunk into svelinaro-dev/sve
Diffstat (limited to 'gcc/testsuite/gcc.target/aarch64')
-rw-r--r--gcc/testsuite/gcc.target/aarch64/bsl-idiom.c88
-rw-r--r--gcc/testsuite/gcc.target/aarch64/construct_lane_zero_1.c37
-rw-r--r--gcc/testsuite/gcc.target/aarch64/copysign-bsl.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c26
-rw-r--r--gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c31
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_cap_4.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c11
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C14
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C10
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c64
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_ext_1.c64
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_ext_2.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_extract_1.c80
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_extract_2.c80
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_extract_3.c122
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c102
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_10.c72
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_11.c14
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c72
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c63
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_3_run.c41
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c20
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_4_run.c35
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c130
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_5_run.c161
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c50
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c26
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_8.c19
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_9.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_index_offset_1.c54
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_index_offset_1_run.c34
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1.c49
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1_run.c48
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.C56
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_ld1r_1_run.C64
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_ld1r_2.c61
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_ld1r_2_run.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_live_1.c52
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c52
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_live_2.c19
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_live_2_run.c32
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mad_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c83
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1_run.c72
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c69
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2_run.c98
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c65
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3_run.c47
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c27
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4_run.c37
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c156
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5_run.c177
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_6.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_7.c53
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c173
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1_run.c186
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_2.c17
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c22
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c28
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mla_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mls_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_msb_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c33
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.C48
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.c28
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.C47
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.c29
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.C48
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.c28
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.C59
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.c31
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.c (renamed from gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.C)23
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_rev_1.c49
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_revb_1.c10
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_revh_1.c14
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_revw_1.c10
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c134
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1_run.c155
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_2.c10
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_3.c32
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_4.c10
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_5.c23
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_6.c36
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_7.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_1.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_10.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_11.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_12.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_13.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_2.c11
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_3.c11
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_4.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_5.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c39
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_6.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c43
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_7.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c47
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_8.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_9.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_1.c40
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_3.c32
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_4.c33
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_5.c34
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_6.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_7.c34
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_8.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_store_1.c40
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_store_2.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_store_3.c33
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_store_4.c33
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_store_5.c34
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_store_6.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_store_7.c34
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c93
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c84
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c87
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_move_4.c116
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_move_5.c111
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_move_6.c129
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c20
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c20
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c75
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13_run.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c58
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c69
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17_run.c32
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c22
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c56
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c41
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c49
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c41
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23.c47
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23_run.c45
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c36
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c86
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c86
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c60
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c59
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c28
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c176
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c112
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c102
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c88
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c88
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c28
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c86
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_while_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_while_2.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_while_3.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_while_4.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_2.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_3.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c36
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c4
272 files changed, 3993 insertions, 4414 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/bsl-idiom.c b/gcc/testsuite/gcc.target/aarch64/bsl-idiom.c
new file mode 100644
index 00000000000..8151387600f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bsl-idiom.c
@@ -0,0 +1,88 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-rtl-combine --save-temps" } */
+
+/* Test that we don't generate BSL when in DImode with values in integer
+ registers, and do generate it where we have values in floating-point
+ registers. This is useful, as it allows us to avoid register moves
+ in the general case.
+
+ We want:
+ eor x0, x0, x1
+ and x0, x0, x2
+ eor x0, x0, x1
+ ret
+
+ Rather than:
+ fmov d2, x0
+ fmov d0, x2
+ fmov d1, x1
+ bsl v0.8b, v2.8b, v1.8b
+ fmov x0, d0
+ ret */
+
+extern void abort (void);
+
+unsigned long long __attribute__ ((noinline))
+foo (unsigned long long a, unsigned long long b, unsigned long long c)
+{
+ return ((a ^ b) & c) ^ b;
+}
+
+unsigned long long __attribute__ ((noinline))
+foo2 (unsigned long long a, unsigned long long b, unsigned long long c)
+{
+ return ((a ^ b) & c) ^ a;
+}
+
+#define force_simd(V1) asm volatile ("mov %d0, %1.d[0]" \
+ : "=w"(V1) \
+ : "w"(V1) \
+ : /* No clobbers */);
+
+unsigned long long __attribute__ ((noinline))
+bar (unsigned long long a, unsigned long long b, unsigned long long c)
+{
+ force_simd (a);
+ force_simd (b);
+ force_simd (c);
+ c = ((a ^ b) & c) ^ b;
+ force_simd (c);
+ return c;
+}
+
+unsigned long long __attribute__ ((noinline))
+bar2 (unsigned long long a, unsigned long long b, unsigned long long c)
+{
+ force_simd (a);
+ force_simd (b);
+ force_simd (c);
+ c = ((a ^ b) & c) ^ a;
+ force_simd (c);
+ return c;
+}
+
+int
+main (int argc, char** argv)
+{
+ unsigned long long a = 0x0123456789abcdefULL;
+ unsigned long long b = 0xfedcba9876543210ULL;
+ unsigned long long c = 0xaabbccddeeff7777ULL;
+ if (foo (a, b, c) != bar (a, b, c))
+ abort ();
+ if (foo2 (a, b, c) != bar2 (a, b, c))
+ abort ();
+ return 0;
+}
+
+/* 2 BSL, 6 FMOV (to floating-point registers), and 2 FMOV (to general
+purpose registers) for the "bar" tests, which should still use BSL. */
+/* { dg-final { scan-assembler-times "bsl\tv\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "fmov\td\[0-9\]" 6 } } */
+/* { dg-final { scan-assembler-times "fmov\tx\[0-9\]" 2 } } */
+
+/* { dg-final { scan-assembler-not "bif\tv\[0-9\]" } } */
+/* { dg-final { scan-assembler-not "bit\tv\[0-9\]" } } */
+
+/* We always match the idiom during combine. */
+/* { dg-final { scan-rtl-dump-times "aarch64_simd_bsldi_internal" 2 "combine" } } */
+/* { dg-final { scan-rtl-dump-times "aarch64_simd_bsldi_alt" 2 "combine" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/construct_lane_zero_1.c b/gcc/testsuite/gcc.target/aarch64/construct_lane_zero_1.c
new file mode 100644
index 00000000000..d87f3290828
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/construct_lane_zero_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef long long v2di __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+
+v2di
+construct_lanedi (long long *y)
+{
+ v2di x =
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ { 0, y[0] }
+#else
+ { y[0], 0 }
+#endif
+ ;
+ return x;
+}
+
+v2df
+construct_lanedf (double *y)
+{
+ v2df x =
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ { 0.0, y[0] }
+#else
+ { y[0], 0.0 }
+#endif
+ ;
+ return x;
+}
+
+/* Check that creating V2DI and V2DF vectors from a lane with a zero
+ makes use of the D-reg LDR rather than doing explicit lane inserts. */
+
+/* { dg-final { scan-assembler-times "ldr\td\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-not "ins\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign-bsl.c b/gcc/testsuite/gcc.target/aarch64/copysign-bsl.c
new file mode 100644
index 00000000000..0ec7109c738
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign-bsl.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Test that we can generate DImode BSL when we are using
+ copysign. */
+
+double
+foo (double a, double b)
+{
+ return __builtin_copysign (a, b);
+}
+
+/* { dg-final { scan-assembler "b\(sl|it|if\)\tv\[0-9\]" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c b/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c
index cce88155aca..ae5b3797021 100644
--- a/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c
+++ b/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c
@@ -3,7 +3,7 @@
/* { dg-options "-O0 -gdwarf-2" } */
/* { dg-final { scan-assembler ".cfi_restore 30" } } */
/* { dg-final { scan-assembler ".cfi_restore 29" } } */
-/* { dg-final { scan-assembler ".cfi_def_cfa 31, 0" } } */
+/* { dg-final { scan-assembler ".cfi_def_cfa_offset 0" } } */
/* { dg-final { scan-assembler "ret" } } */
int bar (unsigned int);
diff --git a/gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c b/gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c
new file mode 100644
index 00000000000..3c31b340154
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef long long v2di __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+
+v2di
+construct_lanedi (long long *y)
+{
+ v2di x = { y[0], y[1] };
+ return x;
+}
+
+v2df
+construct_lanedf (double *y)
+{
+ v2df x = { y[0], y[1] };
+ return x;
+}
+
+/* We can use the load_pair_lanes<mode> pattern to vec_concat two DI/DF
+ values from consecutive memory into a 2-element vector by using
+ a Q-reg LDR. */
+
+/* { dg-final { scan-assembler-times "ldr\tq\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-not "ins\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c b/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c
new file mode 100644
index 00000000000..6810db3c54d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef long long v2di __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+
+void
+construct_lane_1 (double *y, v2df *z)
+{
+ double y0 = y[0] + 1;
+ double y1 = y[1] + 2;
+ v2df x = {y0, y1};
+ z[2] = x;
+}
+
+void
+construct_lane_2 (long long *y, v2di *z)
+{
+ long long y0 = y[0] + 1;
+ long long y1 = y[1] + 2;
+ v2di x = {y0, y1};
+ z[2] = x;
+}
+
+/* We can use the load_pair_lanes<mode> pattern to vec_concat two DI/DF
+ values from consecutive memory into a 2-element vector by using
+ a Q-reg LDR. */
+
+/* { dg-final { scan-assembler-times "stp\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-not "ins\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c b/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c
index b22828d621b..c3bf2f326d3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c
@@ -36,7 +36,7 @@ LOOP (double)
/* { dg-final { scan-assembler-times {\tstr\td[0-9]+} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]} 4 } } */
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]} 4 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+\.s, sxtw 2\]} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d, lsl 3\]} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c
index a176d9ce251..4651c70afda 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c
@@ -1,11 +1,11 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define N 32
/* Simple condition reduction. */
-int
+int __attribute__ ((noinline, noclone))
condition_reduction (int *a, int min_v)
{
int last = 66; /* High start value. */
@@ -17,6 +17,4 @@ condition_reduction (int *a, int min_v)
return last;
}
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c
index 8e6444e4239..0dcba03b61c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c
@@ -1,24 +1,22 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "sve_clastb_1.c"
-extern void abort (void) __attribute__ ((noreturn));
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
int a[N] = {
- 11, -12, 13, 14, 15, 16, 17, 18, 19, 20,
- 1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
- 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
- 31, 32
+ 11, -12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
};
int ret = condition_reduction (a, 1);
if (ret != 17)
- abort ();
+ __builtin_abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c
index dcae41f5425..381cbd17577 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c
@@ -1,15 +1,17 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
#if !defined(TYPE)
-#define TYPE unsigned int
+#define TYPE uint32_t
#endif
#define N 254
/* Non-simple condition reduction. */
-TYPE
+TYPE __attribute__ ((noinline, noclone))
condition_reduction (TYPE *a, TYPE min_v)
{
TYPE last = 65;
@@ -21,7 +23,4 @@ condition_reduction (TYPE *a, TYPE min_v)
return last;
}
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c
index 0503ba36c3d..0d5187ba3ae 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c
@@ -1,25 +1,23 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "sve_clastb_2.c"
-extern void abort (void) __attribute__ ((noreturn));
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
unsigned int a[N] = {
- 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
- 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
- 31, 32
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
};
- __builtin_memset (a+32, 43, (N-32)*sizeof (int));
+ __builtin_memset (a + 32, 43, (N - 32) * sizeof (int));
unsigned int ret = condition_reduction (a, 16);
if (ret != 10)
- abort ();
+ __builtin_abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c
index 1061194a08e..90a3b938593 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c
@@ -1,11 +1,8 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TYPE unsigned char
+#define TYPE uint8_t
#include "sve_clastb_2.c"
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.b} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c
index 90c3e4a0cf3..f90fbfc5e9b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c
@@ -1,25 +1,23 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "sve_clastb_3.c"
-extern void abort (void) __attribute__ ((noreturn));
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
unsigned char a[N] = {
- 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
- 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
- 31, 32
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
};
- __builtin_memset (a+32, 43, N-32);
+ __builtin_memset (a + 32, 43, N - 32);
unsigned char ret = condition_reduction (a, 16);
if (ret != 10)
- abort ();
+ __builtin_abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c
index 698d958693a..dc01b21c273 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c
@@ -1,11 +1,8 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TYPE short
+#define TYPE int16_t
#include "sve_clastb_2.c"
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.h} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c
index d0337ab300d..e17199f3672 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c
@@ -5,7 +5,7 @@
extern void abort (void) __attribute__ ((noreturn));
-int
+int __attribute__ ((optimize (1)))
main (void)
{
short a[N] = {
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c
index 655f95f410a..aef2a80c68f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c
@@ -1,11 +1,8 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TYPE long
+#define TYPE uint64_t
#include "sve_clastb_2.c"
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\tx[0-9]+, p[0-7], x[0-9]+, z[0-9]+\.d} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c
index 573787233d8..e251db0bb76 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c
@@ -1,25 +1,23 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "sve_clastb_5.c"
-extern void abort (void) __attribute__ ((noreturn));
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
long a[N] = {
- 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
- 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
- 31, 32
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
};
- __builtin_memset (a+32, 43, (N-32)*sizeof (long));
+ __builtin_memset (a + 32, 43, (N - 32) * sizeof (long));
long ret = condition_reduction (a, 16);
if (ret != 10)
- abort ();
+ __builtin_abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c
index bf1bc1a346a..93fec6396a2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c
@@ -1,5 +1,5 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define N 32
@@ -9,7 +9,7 @@
/* Non-integer data types. */
-TYPE
+TYPE __attribute__ ((noinline, noclone))
condition_reduction (TYPE *a, TYPE min_v)
{
TYPE last = 0;
@@ -21,8 +21,4 @@ condition_reduction (TYPE *a, TYPE min_v)
return last;
}
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c
index 4c760daba89..c204ed4c4f0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c
@@ -1,24 +1,22 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "sve_clastb_6.c"
-extern void abort (void) __attribute__ ((noreturn));
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
float a[N] = {
- 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20,
- 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6,
- 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30,
- 31.111, 32.322
+ 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20,
+ 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6,
+ 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30,
+ 31.111, 32.322
};
float ret = condition_reduction (a, 16.7);
- if (ret != (float)10.6)
- abort ();
+ if (ret != (float) 10.6)
+ __builtin_abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c
index 12e53b75e8a..d232a87e41d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c
@@ -1,11 +1,7 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE double
#include "sve_clastb_6.c"
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c
index d0001a923e8..2f87a4766e0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c
@@ -1,24 +1,22 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "sve_clastb_7.c"
-extern void abort (void) __attribute__ ((noreturn));
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
double a[N] = {
- 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20,
- 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6,
- 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30,
- 31.111, 32.322
+ 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20,
+ 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6,
+ 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30,
+ 31.111, 32.322
};
double ret = condition_reduction (a, 16.7);
- if (ret != (double)10.6)
- abort ();
+ if (ret != 10.6)
+ __builtin_abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C
index 4937e7f10e5..3f30a527cae 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C
+++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C
@@ -1,15 +1,15 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */
-typedef signed char v32qi __attribute__((vector_size(32)));
+typedef signed char vnx16qi __attribute__((vector_size(32)));
-v32qi
-foo (v32qi x, v32qi y)
+vnx16qi
+foo (vnx16qi x, vnx16qi y)
{
- return (v32qi) { -1, 0, 0, -1, -1, -1, 0, 0,
- -1, -1, -1, -1, 0, 0, 0, 0,
- -1, -1, -1, -1, -1, -1, -1, -1,
- 0, 0, 0, 0, 0, 0, 0, 0 } ? x : y;
+ return (vnx16qi) { -1, 0, 0, -1, -1, -1, 0, 0,
+ -1, -1, -1, -1, 0, 0, 0, 0,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 0, 0, 0, 0, 0, 0, 0 } ? x : y;
}
/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C
index 3de4a8ccd00..ec8a0ab9d69 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C
+++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C
@@ -1,13 +1,13 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */
-typedef short v16hi __attribute__((vector_size(32)));
+typedef short vnx8hi __attribute__((vector_size(32)));
-v16hi
-foo (v16hi x, v16hi y)
+vnx8hi
+foo (vnx8hi x, vnx8hi y)
{
- return (v16hi) { -1, 0, 0, -1, -1, -1, 0, 0,
- -1, -1, -1, -1, 0, 0, 0, 0 } ? x : y;
+ return (vnx8hi) { -1, 0, 0, -1, -1, -1, 0, 0,
+ -1, -1, -1, -1, 0, 0, 0, 0 } ? x : y;
}
/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C
index 8185f7baa76..ab1429d4e40 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C
+++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C
@@ -1,12 +1,12 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */
-typedef int v8si __attribute__((vector_size(32)));
+typedef int vnx4si __attribute__((vector_size(32)));
-v8si
-foo (v8si x, v8si y)
+vnx4si
+foo (vnx4si x, vnx4si y)
{
- return (v8si) { -1, 0, 0, -1, -1, -1, 0, 0 } ? x : y;
+ return (vnx4si) { -1, 0, 0, -1, -1, -1, 0, 0 } ? x : y;
}
/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C
index b15da8a59e2..3ad39b9df7d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C
+++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C
@@ -1,12 +1,12 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */
-typedef long long v4di __attribute__((vector_size(32)));
+typedef long long vnx2di __attribute__((vector_size(32)));
-v4di
-foo (v4di x, v4di y)
+vnx2di
+foo (vnx2di x, vnx2di y)
{
- return (v4di) { -1, 0, 0, -1 } ? x : y;
+ return (vnx2di) { -1, 0, 0, -1 } ? x : y;
}
/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c
index ea977207226..8df86eb6b1b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define MASK_2(X) X, X
#define MASK_4(X) MASK_2 (X), MASK_2 (X)
@@ -17,10 +17,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
#define MASK_16(X) MASK_8 (X), MASK_8 (X)
#define MASK_32(X) MASK_16 (X), MASK_16 (X)
-#define INDEX_4 v4di
-#define INDEX_8 v8si
-#define INDEX_16 v16hi
-#define INDEX_32 v32qi
+#define INDEX_4 vnx2di
+#define INDEX_8 vnx4si
+#define INDEX_16 vnx8hi
+#define INDEX_32 vnx16qi
#define DUP_LANE(TYPE, NUNITS, INDEX) \
TYPE dup_##INDEX##_##TYPE (TYPE values1, TYPE values2) \
@@ -30,27 +30,27 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (v4di, 4, 0) \
- T (v4di, 4, 2) \
- T (v4di, 4, 3) \
- T (v8si, 8, 0) \
- T (v8si, 8, 5) \
- T (v8si, 8, 7) \
- T (v16hi, 16, 0) \
- T (v16hi, 16, 6) \
- T (v16hi, 16, 15) \
- T (v32qi, 32, 0) \
- T (v32qi, 32, 19) \
- T (v32qi, 32, 31) \
- T (v4df, 4, 0) \
- T (v4df, 4, 2) \
- T (v4df, 4, 3) \
- T (v8sf, 8, 0) \
- T (v8sf, 8, 5) \
- T (v8sf, 8, 7) \
- T (v16hf, 16, 0) \
- T (v16hf, 16, 6) \
- T (v16hf, 16, 15) \
+ T (vnx2di, 4, 0) \
+ T (vnx2di, 4, 2) \
+ T (vnx2di, 4, 3) \
+ T (vnx4si, 8, 0) \
+ T (vnx4si, 8, 5) \
+ T (vnx4si, 8, 7) \
+ T (vnx8hi, 16, 0) \
+ T (vnx8hi, 16, 6) \
+ T (vnx8hi, 16, 15) \
+ T (vnx16qi, 32, 0) \
+ T (vnx16qi, 32, 19) \
+ T (vnx16qi, 32, 31) \
+ T (vnx2df, 4, 0) \
+ T (vnx2df, 4, 2) \
+ T (vnx2df, 4, 3) \
+ T (vnx4sf, 8, 0) \
+ T (vnx4sf, 8, 5) \
+ T (vnx4sf, 8, 7) \
+ T (vnx8hf, 16, 0) \
+ T (vnx8hf, 16, 6) \
+ T (vnx8hf, 16, 15) \
TEST_ALL (DUP_LANE)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c b/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c
index 1ec51aa2eaf..05bd6dc8f65 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define MASK_2(X) X, X + 1
#define MASK_4(X) MASK_2 (X), MASK_2 (X + 2)
@@ -17,10 +17,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
#define MASK_16(X) MASK_8 (X), MASK_8 (X + 8)
#define MASK_32(X) MASK_16 (X), MASK_16 (X + 16)
-#define INDEX_4 v4di
-#define INDEX_8 v8si
-#define INDEX_16 v16hi
-#define INDEX_32 v32qi
+#define INDEX_4 vnx2di
+#define INDEX_8 vnx4si
+#define INDEX_16 vnx8hi
+#define INDEX_32 vnx16qi
#define DUP_LANE(TYPE, NUNITS, INDEX) \
TYPE dup_##INDEX##_##TYPE (TYPE values1, TYPE values2) \
@@ -30,27 +30,27 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (v4di, 4, 1) \
- T (v4di, 4, 2) \
- T (v4di, 4, 3) \
- T (v8si, 8, 1) \
- T (v8si, 8, 5) \
- T (v8si, 8, 7) \
- T (v16hi, 16, 1) \
- T (v16hi, 16, 6) \
- T (v16hi, 16, 15) \
- T (v32qi, 32, 1) \
- T (v32qi, 32, 19) \
- T (v32qi, 32, 31) \
- T (v4df, 4, 1) \
- T (v4df, 4, 2) \
- T (v4df, 4, 3) \
- T (v8sf, 8, 1) \
- T (v8sf, 8, 5) \
- T (v8sf, 8, 7) \
- T (v16hf, 16, 1) \
- T (v16hf, 16, 6) \
- T (v16hf, 16, 15) \
+ T (vnx2di, 4, 1) \
+ T (vnx2di, 4, 2) \
+ T (vnx2di, 4, 3) \
+ T (vnx4si, 8, 1) \
+ T (vnx4si, 8, 5) \
+ T (vnx4si, 8, 7) \
+ T (vnx8hi, 16, 1) \
+ T (vnx8hi, 16, 6) \
+ T (vnx8hi, 16, 15) \
+ T (vnx16qi, 32, 1) \
+ T (vnx16qi, 32, 19) \
+ T (vnx16qi, 32, 31) \
+ T (vnx2df, 4, 1) \
+ T (vnx2df, 4, 2) \
+ T (vnx2df, 4, 3) \
+ T (vnx4sf, 8, 1) \
+ T (vnx4sf, 8, 5) \
+ T (vnx4sf, 8, 7) \
+ T (vnx8hf, 16, 1) \
+ T (vnx8hf, 16, 6) \
+ T (vnx8hf, 16, 15) \
TEST_ALL (DUP_LANE)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c b/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c
index b93574e50f7..047d4c59651 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c
@@ -1,16 +1,16 @@
/* { dg-do compile } */
/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */
-typedef int v8si __attribute__((vector_size (32)));
+typedef int vnx4si __attribute__((vector_size (32)));
void
foo (void)
{
- register v8si x asm ("z0");
- register v8si y asm ("z1");
+ register vnx4si x asm ("z0");
+ register vnx4si y asm ("z1");
asm volatile ("" : "=w" (y));
- x = __builtin_shuffle (y, y, (v8si) { 1, 2, 3, 4, 5, 6, 7, 8 });
+ x = __builtin_shuffle (y, y, (vnx4si) { 1, 2, 3, 4, 5, 6, 7, 8 });
asm volatile ("" :: "w" (x));
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c
index 1ba277ffa6d..f9cd8d2998e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define EXTRACT(ELT_TYPE, TYPE, INDEX) \
ELT_TYPE permute_##TYPE##_##INDEX (void) \
@@ -20,39 +20,39 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (int64_t, v4di, 0) \
- T (int64_t, v4di, 1) \
- T (int64_t, v4di, 2) \
- T (int64_t, v4di, 3) \
- T (int32_t, v8si, 0) \
- T (int32_t, v8si, 1) \
- T (int32_t, v8si, 3) \
- T (int32_t, v8si, 4) \
- T (int32_t, v8si, 7) \
- T (int16_t, v16hi, 0) \
- T (int16_t, v16hi, 1) \
- T (int16_t, v16hi, 7) \
- T (int16_t, v16hi, 8) \
- T (int16_t, v16hi, 15) \
- T (int8_t, v32qi, 0) \
- T (int8_t, v32qi, 1) \
- T (int8_t, v32qi, 15) \
- T (int8_t, v32qi, 16) \
- T (int8_t, v32qi, 31) \
- T (double, v4df, 0) \
- T (double, v4df, 1) \
- T (double, v4df, 2) \
- T (double, v4df, 3) \
- T (float, v8sf, 0) \
- T (float, v8sf, 1) \
- T (float, v8sf, 3) \
- T (float, v8sf, 4) \
- T (float, v8sf, 7) \
- T (_Float16, v16hf, 0) \
- T (_Float16, v16hf, 1) \
- T (_Float16, v16hf, 7) \
- T (_Float16, v16hf, 8) \
- T (_Float16, v16hf, 15)
+ T (int64_t, vnx2di, 0) \
+ T (int64_t, vnx2di, 1) \
+ T (int64_t, vnx2di, 2) \
+ T (int64_t, vnx2di, 3) \
+ T (int32_t, vnx4si, 0) \
+ T (int32_t, vnx4si, 1) \
+ T (int32_t, vnx4si, 3) \
+ T (int32_t, vnx4si, 4) \
+ T (int32_t, vnx4si, 7) \
+ T (int16_t, vnx8hi, 0) \
+ T (int16_t, vnx8hi, 1) \
+ T (int16_t, vnx8hi, 7) \
+ T (int16_t, vnx8hi, 8) \
+ T (int16_t, vnx8hi, 15) \
+ T (int8_t, vnx16qi, 0) \
+ T (int8_t, vnx16qi, 1) \
+ T (int8_t, vnx16qi, 15) \
+ T (int8_t, vnx16qi, 16) \
+ T (int8_t, vnx16qi, 31) \
+ T (double, vnx2df, 0) \
+ T (double, vnx2df, 1) \
+ T (double, vnx2df, 2) \
+ T (double, vnx2df, 3) \
+ T (float, vnx4sf, 0) \
+ T (float, vnx4sf, 1) \
+ T (float, vnx4sf, 3) \
+ T (float, vnx4sf, 4) \
+ T (float, vnx4sf, 7) \
+ T (_Float16, vnx8hf, 0) \
+ T (_Float16, vnx8hf, 1) \
+ T (_Float16, vnx8hf, 7) \
+ T (_Float16, vnx8hf, 8) \
+ T (_Float16, vnx8hf, 15)
TEST_ALL (EXTRACT)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c
index b163f28ef28..717546997b3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v8di __attribute__((vector_size (64)));
-typedef int32_t v16si __attribute__((vector_size (64)));
-typedef int16_t v32hi __attribute__((vector_size (64)));
-typedef int8_t v64qi __attribute__((vector_size (64)));
-typedef double v8df __attribute__((vector_size (64)));
-typedef float v16sf __attribute__((vector_size (64)));
-typedef _Float16 v32hf __attribute__((vector_size (64)));
+typedef int64_t vnx4di __attribute__((vector_size (64)));
+typedef int32_t vnx8si __attribute__((vector_size (64)));
+typedef int16_t vnx16hi __attribute__((vector_size (64)));
+typedef int8_t vnx32qi __attribute__((vector_size (64)));
+typedef double vnx4df __attribute__((vector_size (64)));
+typedef float vnx8sf __attribute__((vector_size (64)));
+typedef _Float16 vnx16hf __attribute__((vector_size (64)));
#define EXTRACT(ELT_TYPE, TYPE, INDEX) \
ELT_TYPE permute_##TYPE##_##INDEX (void) \
@@ -20,39 +20,39 @@ typedef _Float16 v32hf __attribute__((vector_size (64)));
}
#define TEST_ALL(T) \
- T (int64_t, v8di, 0) \
- T (int64_t, v8di, 1) \
- T (int64_t, v8di, 2) \
- T (int64_t, v8di, 7) \
- T (int32_t, v16si, 0) \
- T (int32_t, v16si, 1) \
- T (int32_t, v16si, 3) \
- T (int32_t, v16si, 4) \
- T (int32_t, v16si, 15) \
- T (int16_t, v32hi, 0) \
- T (int16_t, v32hi, 1) \
- T (int16_t, v32hi, 7) \
- T (int16_t, v32hi, 8) \
- T (int16_t, v32hi, 31) \
- T (int8_t, v64qi, 0) \
- T (int8_t, v64qi, 1) \
- T (int8_t, v64qi, 15) \
- T (int8_t, v64qi, 16) \
- T (int8_t, v64qi, 63) \
- T (double, v8df, 0) \
- T (double, v8df, 1) \
- T (double, v8df, 2) \
- T (double, v8df, 7) \
- T (float, v16sf, 0) \
- T (float, v16sf, 1) \
- T (float, v16sf, 3) \
- T (float, v16sf, 4) \
- T (float, v16sf, 15) \
- T (_Float16, v32hf, 0) \
- T (_Float16, v32hf, 1) \
- T (_Float16, v32hf, 7) \
- T (_Float16, v32hf, 8) \
- T (_Float16, v32hf, 31)
+ T (int64_t, vnx4di, 0) \
+ T (int64_t, vnx4di, 1) \
+ T (int64_t, vnx4di, 2) \
+ T (int64_t, vnx4di, 7) \
+ T (int32_t, vnx8si, 0) \
+ T (int32_t, vnx8si, 1) \
+ T (int32_t, vnx8si, 3) \
+ T (int32_t, vnx8si, 4) \
+ T (int32_t, vnx8si, 15) \
+ T (int16_t, vnx16hi, 0) \
+ T (int16_t, vnx16hi, 1) \
+ T (int16_t, vnx16hi, 7) \
+ T (int16_t, vnx16hi, 8) \
+ T (int16_t, vnx16hi, 31) \
+ T (int8_t, vnx32qi, 0) \
+ T (int8_t, vnx32qi, 1) \
+ T (int8_t, vnx32qi, 15) \
+ T (int8_t, vnx32qi, 16) \
+ T (int8_t, vnx32qi, 63) \
+ T (double, vnx4df, 0) \
+ T (double, vnx4df, 1) \
+ T (double, vnx4df, 2) \
+ T (double, vnx4df, 7) \
+ T (float, vnx8sf, 0) \
+ T (float, vnx8sf, 1) \
+ T (float, vnx8sf, 3) \
+ T (float, vnx8sf, 4) \
+ T (float, vnx8sf, 15) \
+ T (_Float16, vnx16hf, 0) \
+ T (_Float16, vnx16hf, 1) \
+ T (_Float16, vnx16hf, 7) \
+ T (_Float16, vnx16hf, 8) \
+ T (_Float16, vnx16hf, 31)
TEST_ALL (EXTRACT)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c
index 87ac2351768..19a22cdd7b7 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v16di __attribute__((vector_size (128)));
-typedef int32_t v32si __attribute__((vector_size (128)));
-typedef int16_t v64hi __attribute__((vector_size (128)));
-typedef int8_t v128qi __attribute__((vector_size (128)));
-typedef double v16df __attribute__((vector_size (128)));
-typedef float v32sf __attribute__((vector_size (128)));
-typedef _Float16 v64hf __attribute__((vector_size (128)));
+typedef int64_t vnx8di __attribute__((vector_size (128)));
+typedef int32_t vnx16si __attribute__((vector_size (128)));
+typedef int16_t vnx32hi __attribute__((vector_size (128)));
+typedef int8_t vnx64qi __attribute__((vector_size (128)));
+typedef double vnx8df __attribute__((vector_size (128)));
+typedef float vnx16sf __attribute__((vector_size (128)));
+typedef _Float16 vnx32hf __attribute__((vector_size (128)));
#define EXTRACT(ELT_TYPE, TYPE, INDEX) \
ELT_TYPE permute_##TYPE##_##INDEX (void) \
@@ -20,60 +20,60 @@ typedef _Float16 v64hf __attribute__((vector_size (128)));
}
#define TEST_ALL(T) \
- T (int64_t, v16di, 0) \
- T (int64_t, v16di, 1) \
- T (int64_t, v16di, 2) \
- T (int64_t, v16di, 7) \
- T (int64_t, v16di, 8) \
- T (int64_t, v16di, 9) \
- T (int64_t, v16di, 15) \
- T (int32_t, v32si, 0) \
- T (int32_t, v32si, 1) \
- T (int32_t, v32si, 3) \
- T (int32_t, v32si, 4) \
- T (int32_t, v32si, 15) \
- T (int32_t, v32si, 16) \
- T (int32_t, v32si, 21) \
- T (int32_t, v32si, 31) \
- T (int16_t, v64hi, 0) \
- T (int16_t, v64hi, 1) \
- T (int16_t, v64hi, 7) \
- T (int16_t, v64hi, 8) \
- T (int16_t, v64hi, 31) \
- T (int16_t, v64hi, 32) \
- T (int16_t, v64hi, 47) \
- T (int16_t, v64hi, 63) \
- T (int8_t, v128qi, 0) \
- T (int8_t, v128qi, 1) \
- T (int8_t, v128qi, 15) \
- T (int8_t, v128qi, 16) \
- T (int8_t, v128qi, 63) \
- T (int8_t, v128qi, 64) \
- T (int8_t, v128qi, 100) \
- T (int8_t, v128qi, 127) \
- T (double, v16df, 0) \
- T (double, v16df, 1) \
- T (double, v16df, 2) \
- T (double, v16df, 7) \
- T (double, v16df, 8) \
- T (double, v16df, 9) \
- T (double, v16df, 15) \
- T (float, v32sf, 0) \
- T (float, v32sf, 1) \
- T (float, v32sf, 3) \
- T (float, v32sf, 4) \
- T (float, v32sf, 15) \
- T (float, v32sf, 16) \
- T (float, v32sf, 21) \
- T (float, v32sf, 31) \
- T (_Float16, v64hf, 0) \
- T (_Float16, v64hf, 1) \
- T (_Float16, v64hf, 7) \
- T (_Float16, v64hf, 8) \
- T (_Float16, v64hf, 31) \
- T (_Float16, v64hf, 32) \
- T (_Float16, v64hf, 47) \
- T (_Float16, v64hf, 63)
+ T (int64_t, vnx8di, 0) \
+ T (int64_t, vnx8di, 1) \
+ T (int64_t, vnx8di, 2) \
+ T (int64_t, vnx8di, 7) \
+ T (int64_t, vnx8di, 8) \
+ T (int64_t, vnx8di, 9) \
+ T (int64_t, vnx8di, 15) \
+ T (int32_t, vnx16si, 0) \
+ T (int32_t, vnx16si, 1) \
+ T (int32_t, vnx16si, 3) \
+ T (int32_t, vnx16si, 4) \
+ T (int32_t, vnx16si, 15) \
+ T (int32_t, vnx16si, 16) \
+ T (int32_t, vnx16si, 21) \
+ T (int32_t, vnx16si, 31) \
+ T (int16_t, vnx32hi, 0) \
+ T (int16_t, vnx32hi, 1) \
+ T (int16_t, vnx32hi, 7) \
+ T (int16_t, vnx32hi, 8) \
+ T (int16_t, vnx32hi, 31) \
+ T (int16_t, vnx32hi, 32) \
+ T (int16_t, vnx32hi, 47) \
+ T (int16_t, vnx32hi, 63) \
+ T (int8_t, vnx64qi, 0) \
+ T (int8_t, vnx64qi, 1) \
+ T (int8_t, vnx64qi, 15) \
+ T (int8_t, vnx64qi, 16) \
+ T (int8_t, vnx64qi, 63) \
+ T (int8_t, vnx64qi, 64) \
+ T (int8_t, vnx64qi, 100) \
+ T (int8_t, vnx64qi, 127) \
+ T (double, vnx8df, 0) \
+ T (double, vnx8df, 1) \
+ T (double, vnx8df, 2) \
+ T (double, vnx8df, 7) \
+ T (double, vnx8df, 8) \
+ T (double, vnx8df, 9) \
+ T (double, vnx8df, 15) \
+ T (float, vnx16sf, 0) \
+ T (float, vnx16sf, 1) \
+ T (float, vnx16sf, 3) \
+ T (float, vnx16sf, 4) \
+ T (float, vnx16sf, 15) \
+ T (float, vnx16sf, 16) \
+ T (float, vnx16sf, 21) \
+ T (float, vnx16sf, 31) \
+ T (_Float16, vnx32hf, 0) \
+ T (_Float16, vnx32hf, 1) \
+ T (_Float16, vnx32hf, 7) \
+ T (_Float16, vnx32hf, 8) \
+ T (_Float16, vnx32hf, 31) \
+ T (_Float16, vnx32hf, 32) \
+ T (_Float16, vnx32hf, 47) \
+ T (_Float16, vnx32hf, 63)
TEST_ALL (EXTRACT)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c
index b193726ea0a..5934b2dfb12 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vdiv_##TYPE (TYPE *x, TYPE y) \
@@ -29,9 +29,9 @@ void vdivr_##TYPE (TYPE *x, TYPE y) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c
index 2b1dbb087bc..7b1575f9ee4 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c
index d5e4df266bf..381af4c8517 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c
index c3f2c8a5823..744d0bb7bcc 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c
index 30e1895c8d5..e1251bd9cf6 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c
index 84a95187314..238bd852117 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfnmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfnmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c
index dcc4811f1d8..f258a7454da 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfnmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfnmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c
index 7a89399f4be..4d859d4b0a1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfnmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfnmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c
index 6c95b0abc8e..2510a6f2831 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options " -O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfnmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfnmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c
index 096a969d756..6ed5c06bd51 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c
@@ -1,72 +1,32 @@
/* { dg-do assemble } */
-/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */
-
-void gather_load64(unsigned long * restrict dst, unsigned long * restrict src, unsigned long * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load32(unsigned int * restrict dst, unsigned int * restrict src, unsigned int * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load16(unsigned short * restrict dst, unsigned short * restrict src, unsigned short * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load8(unsigned char * restrict dst, unsigned char * restrict src, unsigned char * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load64s(signed long * restrict dst, signed long * restrict src, unsigned long * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load32s(signed int * restrict dst, signed int * restrict src, unsigned int * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load16s(signed short * restrict dst, signed short * restrict src, unsigned short * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load8s(signed char * restrict dst, signed char * restrict src, unsigned char * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load_double(double * restrict dst, double * restrict src, unsigned long * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load_float(float * restrict dst, float * restrict src, unsigned int * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 3 } } */
-/* { dg-final { scan-assembler-not "ld1d\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 3\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 2\\\]" } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" 3 } } */
-/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 1\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 1\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw\\\ ]" } } */
-/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\ ]" } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+/* Invoked 18 times for each data size. */
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ INDEX##BITS *indices, int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ dest[i] += src[indices[i]]; \
+ }
+
+#define TEST_ALL(T) \
+ T (int32_t, 32) \
+ T (uint32_t, 32) \
+ T (float, 32) \
+ T (int64_t, 64) \
+ T (uint64_t, 64) \
+ T (double, 64)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_10.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_10.c
deleted file mode 100644
index b31b4508114..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_10.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/* { dg-do assemble } */
-/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */
-
-void gather_load64(unsigned long * restrict dst, unsigned long * restrict src, signed long * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load32(unsigned int * restrict dst, unsigned int * restrict src, signed int * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load16(unsigned short * restrict dst, unsigned short * restrict src, signed short * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load8(unsigned char * restrict dst, unsigned char * restrict src, signed char * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load64s(signed long * restrict dst, signed long * restrict src, signed long * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load32s(signed int * restrict dst, signed int * restrict src, signed int * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load16s(signed short * restrict dst, signed short * restrict src, signed short * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load8s(signed char * restrict dst, signed char * restrict src, signed char * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load_double(double * restrict dst, double * restrict src, signed long * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load_float(float * restrict dst, float * restrict src, signed int * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 3 } } */
-/* { dg-final { scan-assembler-not "ld1d\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 3\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 2\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 3 } } */
-/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 1\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 1\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw\\\ ]" } } */
-/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\ ]" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_11.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_11.c
deleted file mode 100644
index d8a85396eb4..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_11.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/* { dg-do assemble } */
-/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */
-
-void
-f (double *restrict a, double *restrict b, short *c, int *d, int n)
-{
- for (int i = 0; i < n; i++)
- a[i] = b[c[i] + d[i]];
-}
-
-/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+.h,} 1 } } */
-/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+.s,} 2 } } */
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+.d,} 4 } } */
-/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+.d,} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c
index 9b62b12904e..4e348db3bf1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c
@@ -1,72 +1,10 @@
/* { dg-do assemble } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-void gather_loadu64_s16(unsigned long * restrict dst, unsigned long * restrict src,
- short int * restrict indices, short n)
-{
- for (short i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
-void gather_loadu64_u16(unsigned long * restrict dst, unsigned long * restrict src,
- unsigned short int * restrict indices, short n)
-{
- for (short i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
+#include "sve_gather_load_1.c"
-void gather_loadd_s16(double * restrict dst, double * restrict src,
- short * restrict indices, short n)
-{
- for (short i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_loadd_u16(double * restrict dst, double * restrict src,
- unsigned short * restrict indices, short n)
-{
- for (short i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_loadu64_s32(unsigned long * restrict dst, unsigned long * restrict src,
- int * restrict indices, int n)
-{
- for (int i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_loadu64_u32(unsigned long * restrict dst, unsigned long * restrict src,
- unsigned int * restrict indices, int n)
-{
- for (int i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_loadd_s32(double * restrict dst, double * restrict src,
- int * restrict indices, int n)
-{
- for (int i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_loadd_u32(double * restrict dst, double * restrict src,
- unsigned int * restrict indices, int n)
-{
- for (int i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
-
-/* At present we only use unpacks for the 32/64 combinations. */
-/* { dg-final { scan-assembler-times {\tpunpklo\tp[0-9]+\.h, p[0-9]+\.b} 4 } } */
-/* { dg-final { scan-assembler-times {\tpunpkhi\tp[0-9]+\.h, p[0-9]+\.b} 4 } } */
-
-/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */
-/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */
-/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */
-/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */
-/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */
-/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */
-/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */
-/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]} 24 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c
index 0a8f802ce56..a113a0faeb9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c
@@ -1,45 +1,32 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \
- DATA_TYPE __attribute__ ((noinline)) \
- NAME (char *data, INDEX_TYPE *indices, int n) \
- { \
- DATA_TYPE sum = 0; \
- for (int i = 0; i < n; ++i) \
- sum += *(DATA_TYPE *) (data + indices[i]); \
- return sum; \
- }
+#include <stdint.h>
-#define TEST32(NAME, DATA_TYPE) \
- TEST_LOOP (NAME ## _u8, DATA_TYPE, unsigned char) \
- TEST_LOOP (NAME ## _u16, DATA_TYPE, unsigned short) \
- TEST_LOOP (NAME ## _u32, DATA_TYPE, unsigned int) \
- TEST_LOOP (NAME ## _s8, DATA_TYPE, signed char) \
- TEST_LOOP (NAME ## _s16, DATA_TYPE, signed short) \
- TEST_LOOP (NAME ## _s32, DATA_TYPE, signed int)
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
-#define TEST64(NAME, DATA_TYPE) \
- TEST_LOOP (NAME ## _s8, DATA_TYPE, signed char) \
- TEST_LOOP (NAME ## _u8, DATA_TYPE, unsigned char) \
- TEST_LOOP (NAME ## _s16, DATA_TYPE, short) \
- TEST_LOOP (NAME ## _u16, DATA_TYPE, unsigned short) \
- TEST_LOOP (NAME ## _s32, DATA_TYPE, int) \
- TEST_LOOP (NAME ## _u32, DATA_TYPE, unsigned int) \
- TEST_LOOP (NAME ## _s64, DATA_TYPE, long) \
- TEST_LOOP (NAME ## _u64, DATA_TYPE, unsigned long)
+/* Invoked 18 times for each data size. */
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ INDEX##BITS *indices, int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ dest[i] += *(DATA_TYPE *) ((char *) src + indices[i]); \
+ }
-TEST32 (f_s32, int)
-TEST32 (f_u32, unsigned int)
-TEST32 (f_f32, float)
+#define TEST_ALL(T) \
+ T (int32_t, 32) \
+ T (uint32_t, 32) \
+ T (float, 32) \
+ T (int64_t, 64) \
+ T (uint64_t, 64) \
+ T (double, 64)
-TEST64 (f_s64, long)
-TEST64 (f_u64, unsigned long)
-TEST64 (f_f64, double)
+TEST_ALL (TEST_LOOP)
-/* (4 + 2 + 1) * 3 */
-/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw\]} 21 } } */
-/* (4 + 2 + 1) * 3 */
-/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw\]} 21 } } */
-/* (8 + 8 + 4 + 4 + 2 + 2 + 1 + 1) * 3 */
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]} 90 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3_run.c
deleted file mode 100644
index baa90d5d5fc..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3_run.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */
-
-#include "sve_gather_load_3.c"
-
-extern void abort (void);
-
-#define N 57
-
-#undef TEST_LOOP
-#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \
- { \
- INDEX_TYPE indices[N]; \
- DATA_TYPE data[N * 2]; \
- for (int i = 0; i < N * 2; ++i) \
- data[i] = (i / 2) * 4 + i % 2; \
- DATA_TYPE sum = 0; \
- for (int i = 0; i < N; ++i) \
- { \
- INDEX_TYPE j = (i * 3 / 2) * sizeof (DATA_TYPE); \
- j &= (1ULL << (sizeof (INDEX_TYPE) * 8 - 1)) - 1; \
- sum += data[j / sizeof (DATA_TYPE)]; \
- indices[i] = j; \
- } \
- DATA_TYPE res = NAME ((char *) data, indices, N); \
- if (res != sum) \
- abort (); \
- }
-
-int __attribute__ ((optimize (1)))
-main ()
-{
- TEST32 (f_s32, int)
- TEST32 (f_u32, unsigned int)
- TEST32 (f_f32, float)
-
- TEST64 (f_s64, long)
- TEST64 (f_u64, unsigned long)
- TEST64 (f_f64, double)
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c
index 4d0da987d30..5382e523689 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c
@@ -1,18 +1,10 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TEST_LOOP(NAME, TYPE) \
- TYPE __attribute__ ((noinline)) \
- NAME (TYPE **indices, int n) \
- { \
- TYPE sum = 0; \
- for (int i = 0; i < n; ++i) \
- sum += *indices[i]; \
- return sum; \
- }
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
-TEST_LOOP (f_s64, long)
-TEST_LOOP (f_u64, unsigned long)
-TEST_LOOP (f_f64, double)
+#include "sve_gather_load_3.c"
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+\.d\]} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4_run.c
deleted file mode 100644
index 00d3dea6acd..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4_run.c
+++ /dev/null
@@ -1,35 +0,0 @@
-/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */
-
-#include "sve_gather_load_4.c"
-
-extern void abort (void);
-
-#define N 57
-
-#undef TEST_LOOP
-#define TEST_LOOP(NAME, TYPE) \
- { \
- TYPE *ptrs[N]; \
- TYPE data[N * 2]; \
- for (int i = 0; i < N * 2; ++i) \
- data[i] = (i / 2) * 4 + i % 2; \
- TYPE sum = 0; \
- for (int i = 0; i < N; ++i) \
- { \
- ptrs[i] = &data[i * 3 / 2]; \
- sum += *ptrs[i]; \
- } \
- TYPE res = NAME (ptrs, N); \
- if (res != sum) \
- abort (); \
- }
-
-int __attribute__ ((optimize (1)))
-main ()
-{
- TEST_LOOP (f_s64, long)
- TEST_LOOP (f_u64, unsigned long)
- TEST_LOOP (f_f64, double)
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c
index 0aaf9553a11..8e4f689243b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c
@@ -1,113 +1,23 @@
/* { dg-do assemble } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define GATHER_LOAD1(OBJTYPE,STRIDETYPE,STRIDE)\
-void gather_load1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- dst[i] = src[i * STRIDE];\
-}
-
-#define GATHER_LOAD2(OBJTYPE,STRIDETYPE)\
-void gather_load2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- dst[i] = src[i * stride];\
-}
-
-#define GATHER_LOAD3(OBJTYPE,STRIDETYPE)\
-void gather_load3s5##OBJTYPE##STRIDETYPE\
- (OBJTYPE * restrict d1, OBJTYPE * restrict d2, OBJTYPE * restrict d3,\
- OBJTYPE * restrict d4, OBJTYPE * restrict d5, OBJTYPE * restrict src,\
- STRIDETYPE count)\
-{\
- const STRIDETYPE STRIDE = 5;\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- d1[i] = src[0 + (i * STRIDE)];\
- d2[i] = src[1 + (i * STRIDE)];\
- d3[i] = src[2 + (i * STRIDE)];\
- d4[i] = src[3 + (i * STRIDE)];\
- d5[i] = src[4 + (i * STRIDE)];\
- }\
-}
-
-#define GATHER_LOAD4(OBJTYPE,STRIDETYPE,STRIDE)\
-void gather_load4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- *dst = *src;\
- dst += 1;\
- src += STRIDE;\
- }\
-}
-
-#define GATHER_LOAD5(OBJTYPE,STRIDETYPE)\
-void gather_load5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- *dst = *src;\
- dst += 1;\
- src += stride;\
- }\
-}
-
-GATHER_LOAD1 (double, long, 5)
-GATHER_LOAD1 (double, long, 8)
-GATHER_LOAD1 (double, long, 21)
-GATHER_LOAD1 (double, long, 1009)
-
-GATHER_LOAD1 (float, int, 5)
-GATHER_LOAD1 (float, int, 8)
-GATHER_LOAD1 (float, int, 21)
-GATHER_LOAD1 (float, int, 1009)
-
-GATHER_LOAD2 (double, long)
-GATHER_LOAD2 (float, int)
-
-GATHER_LOAD3 (double, long)
-GATHER_LOAD3 (float, int)
-
-GATHER_LOAD4 (double, long, 5)
-
-/* NOTE: We can't vectorize GATHER_LOAD4 (float, int, 5) because we can't prove
- that the offsets used for the gather load won't overflow. */
-
-GATHER_LOAD5 (double, long)
-GATHER_LOAD5 (float, int)
-
-/* Widened forms. */
-GATHER_LOAD1 (double, int, 5)
-GATHER_LOAD1 (double, int, 8)
-GATHER_LOAD1 (double, short, 5)
-GATHER_LOAD1 (double, short, 8)
-
-GATHER_LOAD1 (float, short, 5)
-GATHER_LOAD1 (float, short, 8)
-
-GATHER_LOAD2 (double, int)
-GATHER_LOAD2 (float, short)
-
-GATHER_LOAD4 (double, int, 5)
-GATHER_LOAD4 (float, short, 5)
-
-GATHER_LOAD5 (double, int)
-
-/* TODO: We generate abysmal code for this even though we don't use gathers. */
-/*GATHER_LOAD5 (float, short)*/
-
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 19 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 12 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */
+#include <stdint.h>
+
+/* Invoked 18 times for each data size. */
+#define TEST_LOOP(DATA_TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict *src, \
+ int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ dest[i] += *src[i]; \
+ }
+
+#define TEST_ALL(T) \
+ T (int64_t) \
+ T (uint64_t) \
+ T (double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5_run.c
deleted file mode 100644
index 7608f9b569b..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5_run.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include <unistd.h>
-
-extern void abort (void);
-extern void *memset(void *, int, size_t);
-
-#include "sve_gather_load_5.c"
-
-#define NUM_DST_ELEMS 13
-#define NUM_SRC_ELEMS(STRIDE) (NUM_DST_ELEMS * STRIDE)
-
-#define TEST_GATHER_LOAD_COMMON1(FUN,OBJTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\
- memset (real_dst, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- src[i * STRIDE] = i;\
- FUN##OBJTYPE##STRIDETYPE##STRIDE \
- (dst, src, NUM_DST_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- if (dst[i] != i)\
- abort ();\
-}
-
-#define TEST_GATHER_LOAD_COMMON2(FUN,OBJTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\
- memset (real_dst, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- src[i * STRIDE] = i;\
- FUN##OBJTYPE##STRIDETYPE \
- (dst, src, STRIDE, NUM_DST_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- if (dst[i] != i)\
- abort ();\
-}
-
-#define TEST_GATHER_LOAD1(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_GATHER_LOAD_COMMON1 (gather_load1, OBJTYPE, STRIDETYPE, STRIDE)
-
-#define TEST_GATHER_LOAD2(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_GATHER_LOAD_COMMON2 (gather_load2, OBJTYPE, STRIDETYPE, STRIDE)
-
-#define TEST_GATHER_LOAD3(OBJTYPE,STRIDETYPE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS (5)]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst1[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst2[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst3[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst4[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst5[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS (5)) * sizeof (OBJTYPE));\
- memset (real_dst1, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\
- memset (real_dst2, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\
- memset (real_dst3, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\
- memset (real_dst4, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\
- memset (real_dst5, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst1 = &real_dst1[1];\
- OBJTYPE *dst2 = &real_dst2[1];\
- OBJTYPE *dst3 = &real_dst3[1];\
- OBJTYPE *dst4 = &real_dst4[1];\
- OBJTYPE *dst5 = &real_dst5[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS (5); i++)\
- src[i] = i;\
- gather_load3s5##OBJTYPE##STRIDETYPE \
- (dst1, dst2, dst3, dst4, dst5, src, NUM_DST_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- {\
- STRIDETYPE base = i * 5;\
- if (dst1[i] != base)\
- abort ();\
- if (dst2[i] != (base + 1))\
- abort ();\
- if (dst3[i] != (base + 2))\
- abort ();\
- if (dst4[i] != (base + 3))\
- abort ();\
- if (dst5[i] != (base + 4))\
- abort ();\
- }\
-}
-
-#define TEST_GATHER_LOAD4(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_GATHER_LOAD_COMMON1 (gather_load4, OBJTYPE, STRIDETYPE, STRIDE)
-
-#define TEST_GATHER_LOAD5(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_GATHER_LOAD_COMMON2 (gather_load5, OBJTYPE, STRIDETYPE, STRIDE)
-
-int __attribute__ ((optimize (1)))
-main ()
-{
- TEST_GATHER_LOAD1 (double, long, 5);
- TEST_GATHER_LOAD1 (double, long, 8);
- TEST_GATHER_LOAD1 (double, long, 21);
-
- TEST_GATHER_LOAD1 (float, int, 5);
- TEST_GATHER_LOAD1 (float, int, 8);
- TEST_GATHER_LOAD1 (float, int, 21);
-
- TEST_GATHER_LOAD2 (double, long, 5);
- TEST_GATHER_LOAD2 (double, long, 8);
- TEST_GATHER_LOAD2 (double, long, 21);
-
- TEST_GATHER_LOAD2 (float, int, 5);
- TEST_GATHER_LOAD2 (float, int, 8);
- TEST_GATHER_LOAD2 (float, int, 21);
-
- TEST_GATHER_LOAD3 (double, long);
- TEST_GATHER_LOAD3 (float, int);
-
- TEST_GATHER_LOAD4 (double, long, 5);
-
- TEST_GATHER_LOAD5 (double, long, 5);
- TEST_GATHER_LOAD5 (float, int, 5);
-
- /* Widened forms. */
- TEST_GATHER_LOAD1 (double, int, 5)
- TEST_GATHER_LOAD1 (double, int, 8)
- TEST_GATHER_LOAD1 (double, short, 5)
- TEST_GATHER_LOAD1 (double, short, 8)
-
- TEST_GATHER_LOAD1 (float, short, 5)
- TEST_GATHER_LOAD1 (float, short, 8)
-
- TEST_GATHER_LOAD2 (double, int, 5);
- TEST_GATHER_LOAD2 (double, int, 8);
- TEST_GATHER_LOAD2 (double, int, 21);
-
- TEST_GATHER_LOAD2 (float, short, 5);
- TEST_GATHER_LOAD2 (float, short, 8);
- TEST_GATHER_LOAD2 (float, short, 21);
-
- TEST_GATHER_LOAD4 (double, int, 5);
- TEST_GATHER_LOAD4 (float, short, 5);
-
- TEST_GATHER_LOAD5 (double, int, 5);
-
- return 0;
-}
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c
index 68b0b4d59b6..745e00f1e50 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c
@@ -1,14 +1,36 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
-
-void
-foo (double *__restrict y, double *__restrict x1,
- double *__restrict x2, int m)
-{
- for (int i = 0; i < 256; ++i)
- y[i * m] = x1[i * m] + x2[i * m];
-}
-
-/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, x[0-9]+} 1 } } */
-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 1 } } */
-/* { dg-final { scan-assembler-not {\torr\tz[0-9]+} } } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -fwrapv -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#endif
+
+/* Invoked 18 times for each data size. */
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ INDEX##BITS *indices, INDEX##BITS mask, int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ dest[i] = src[(INDEX##BITS) (indices[i] | mask)]; \
+ }
+
+#define TEST_ALL(T) \
+ T (int32_t, 16) \
+ T (uint32_t, 16) \
+ T (float, 16) \
+ T (int64_t, 32) \
+ T (uint64_t, 32) \
+ T (double, 32)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c
index 788aeb08df2..8f2dfb75149 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c
@@ -1,15 +1,15 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-void
-foo (double *x, int m)
-{
- for (int i = 0; i < 256; ++i)
- x[i * m] += x[i * m];
-}
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
-/* { dg-final { scan-assembler-times {\tcbz\tw1,} 1 } } */
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, } 1 } } */
-/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, } 1 } } */
-/* { dg-final { scan-assembler-times {\tldr\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */
+#include "sve_gather_load_6.c"
+
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* Either extension type is OK here. */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, [us]xtw 2\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_8.c
deleted file mode 100644
index 0c0cf73be55..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_8.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
-
-void
-f (int *__restrict a,
- int *__restrict b,
- int *__restrict c,
- int count)
-{
- for (int i = 0; i < count; ++i)
- a[i] = (b[i * 4] + b[i * 4 + 1] + b[i * 4 + 2]
- + c[i * 5] + c[i * 5 + 3]);
-}
-
-/* There must be a final scalar iteration because b[(count - 1) * 4 + 3]
- is not accessed by the original code. */
-/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.*}} 1 } } */
-/* { dg-final { scan-assembler {\tldr\t} } } */
-/* { dg-final { scan-assembler {\tstr\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_9.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_9.c
deleted file mode 100644
index dad798c8106..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_9.c
+++ /dev/null
@@ -1,18 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
-
-void
-f (int *__restrict a,
- int *__restrict b,
- int *__restrict c,
- int count)
-{
- for (int i = 0; i < count; ++i)
- a[i] = (b[i * 4] + b[i * 4 + 1] + b[i * 4 + 3]
- + c[i * 5] + c[i * 5 + 3]);
-}
-
-/* There's no need for a scalar tail here. */
-/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.*}} 1 } } */
-/* { dg-final { scan-assembler-not {\tldr\t} } } */
-/* { dg-final { scan-assembler-not {\tstr\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1.c
new file mode 100644
index 00000000000..9c4bb37f04e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */
+
+#define SIZE (15 * 8 + 3)
+
+#define DEF_INDEX_OFFSET(SIGNED, TYPE, ITERTYPE) \
+void __attribute__ ((noinline, noclone)) \
+set_##SIGNED##_##TYPE##_##ITERTYPE (SIGNED TYPE *restrict out, \
+ SIGNED TYPE *restrict in) \
+{ \
+ SIGNED ITERTYPE i; \
+ for (i = 0; i < SIZE; i++) \
+ { \
+ out[i] = in[i]; \
+ } \
+} \
+void __attribute__ ((noinline, noclone)) \
+set_##SIGNED##_##TYPE##_##ITERTYPE##_var (SIGNED TYPE *restrict out, \
+ SIGNED TYPE *restrict in, \
+ SIGNED ITERTYPE n) \
+{ \
+ SIGNED ITERTYPE i; \
+ for (i = 0; i < n; i++) \
+ { \
+ out[i] = in[i]; \
+ } \
+}
+
+#define TEST_TYPE(T, SIGNED, TYPE) \
+ T (SIGNED, TYPE, char) \
+ T (SIGNED, TYPE, short) \
+ T (SIGNED, TYPE, int) \
+ T (SIGNED, TYPE, long)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, signed, long) \
+ TEST_TYPE (T, unsigned, long) \
+ TEST_TYPE (T, signed, int) \
+ TEST_TYPE (T, unsigned, int) \
+ TEST_TYPE (T, signed, short) \
+ TEST_TYPE (T, unsigned, short) \
+ TEST_TYPE (T, signed, char) \
+ TEST_TYPE (T, unsigned, char)
+
+TEST_ALL (DEF_INDEX_OFFSET)
+
+/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 3\\\]" 16 } } */
+/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 3\\\]" 16 } } */
+/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 2\\\]" 16 } } */
+/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 2\\\]" 16 } } */
+/* { dg-final { scan-assembler-times "ld1h\\tz\[0-9\]+.h, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 1\\\]" 16 } } */
+/* { dg-final { scan-assembler-times "st1h\\tz\[0-9\]+.h, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 1\\\]" 16 } } */
+/* { dg-final { scan-assembler-times "ld1b\\tz\[0-9\]+.b, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+\\\]" 16 } } */
+/* { dg-final { scan-assembler-times "st1b\\tz\[0-9\]+.b, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+\\\]" 16 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1_run.c
new file mode 100644
index 00000000000..276d259ac3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1_run.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */
+
+#include "sve_index_offset_1.c"
+
+#define TEST_INDEX_OFFSET(SIGNED, TYPE, ITERTYPE) \
+{ \
+ SIGNED TYPE out[SIZE + 1]; \
+ SIGNED TYPE in1[SIZE + 1]; \
+ SIGNED TYPE in2[SIZE + 1]; \
+ for (int i = 0; i < SIZE + 1; ++i) \
+ { \
+ in1[i] = (i * 4) ^ i; \
+ in2[i] = (i * 2) ^ i; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ out[SIZE] = 42; \
+ set_##SIGNED##_##TYPE##_##ITERTYPE (out, in1); \
+ if (0 != __builtin_memcmp (out, in1, SIZE * sizeof (TYPE))) \
+ __builtin_abort (); \
+ set_##SIGNED##_##TYPE##_##ITERTYPE##_var (out, in2, SIZE); \
+ if (0 != __builtin_memcmp (out, in2, SIZE * sizeof (TYPE))) \
+ __builtin_abort (); \
+ if (out[SIZE] != 42) \
+ __builtin_abort (); \
+}
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST_ALL (TEST_INDEX_OFFSET);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1.c
deleted file mode 100644
index 949449cde9f..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-std=c99 -ftree-vectorize -O2 -fno-inline -march=armv8-a+sve -msve-vector-bits=256" } */
-
-#define SIZE 15*8+3
-
-#define INDEX_OFFSET_TEST_1(SIGNED, TYPE, ITERTYPE) \
-void set_##SIGNED##_##TYPE##_##ITERTYPE (SIGNED TYPE *__restrict out, \
- SIGNED TYPE *__restrict in) \
-{ \
- SIGNED ITERTYPE i; \
- for (i = 0; i < SIZE; i++) \
- { \
- out[i] = in[i]; \
- } \
-} \
-void set_##SIGNED##_##TYPE##_##ITERTYPE##_var (SIGNED TYPE *__restrict out, \
- SIGNED TYPE *__restrict in, \
- SIGNED ITERTYPE n) \
-{\
- SIGNED ITERTYPE i;\
- for (i = 0; i < n; i++)\
- {\
- out[i] = in[i];\
- }\
-}
-
-#define INDEX_OFFSET_TEST(SIGNED, TYPE)\
- INDEX_OFFSET_TEST_1 (SIGNED, TYPE, char) \
- INDEX_OFFSET_TEST_1 (SIGNED, TYPE, short) \
- INDEX_OFFSET_TEST_1 (SIGNED, TYPE, int) \
- INDEX_OFFSET_TEST_1 (SIGNED, TYPE, long)
-
-INDEX_OFFSET_TEST (signed, long)
-INDEX_OFFSET_TEST (unsigned, long)
-INDEX_OFFSET_TEST (signed, int)
-INDEX_OFFSET_TEST (unsigned, int)
-INDEX_OFFSET_TEST (signed, short)
-INDEX_OFFSET_TEST (unsigned, short)
-INDEX_OFFSET_TEST (signed, char)
-INDEX_OFFSET_TEST (unsigned, char)
-
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 3\\\]" 16 } } */
-/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 3\\\]" 16 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 2\\\]" 16 } } */
-/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 2\\\]" 16 } } */
-/* { dg-final { scan-assembler-times "ld1h\\tz\[0-9\]+.h, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 1\\\]" 16 } } */
-/* { dg-final { scan-assembler-times "st1h\\tz\[0-9\]+.h, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 1\\\]" 16 } } */
-/* { dg-final { scan-assembler-times "ld1b\\tz\[0-9\]+.b, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+\\\]" 16 } } */
-/* { dg-final { scan-assembler-times "st1b\\tz\[0-9\]+.b, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+\\\]" 16 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1_run.c
deleted file mode 100644
index d6b2646798c..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1_run.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-std=c99 -ftree-vectorize -O2 -fno-inline -march=armv8-a+sve" } */
-/* { dg-options "-std=c99 -ftree-vectorize -O2 -fno-inline -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */
-
-#include "sve_indexoffset_1.c"
-
-#include <string.h>
-
-#define CALL_INDEX_OFFSET_TEST_1(SIGNED, TYPE, ITERTYPE)\
-{\
- SIGNED TYPE out[SIZE + 1];\
- SIGNED TYPE in1[SIZE + 1];\
- SIGNED TYPE in2[SIZE + 1];\
- for (int i = 0; i < SIZE + 1; ++i)\
- {\
- in1[i] = (i * 4) ^ i;\
- in2[i] = (i * 2) ^ i;\
- }\
- out[SIZE] = 42;\
- set_##SIGNED##_##TYPE##_##ITERTYPE (out, in1); \
- if (0 != memcmp (out, in1, SIZE * sizeof (TYPE)))\
- return 1;\
- set_##SIGNED##_##TYPE##_##ITERTYPE##_var (out, in2, SIZE); \
- if (0 != memcmp (out, in2, SIZE * sizeof (TYPE)))\
- return 1;\
- if (out[SIZE] != 42)\
- return 1;\
-}
-
-#define CALL_INDEX_OFFSET_TEST(SIGNED, TYPE)\
- CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, char) \
- CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, short) \
- CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, int) \
- CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, long)
-
-int
-main (void)
-{
- CALL_INDEX_OFFSET_TEST (signed, long)
- CALL_INDEX_OFFSET_TEST (unsigned, long)
- CALL_INDEX_OFFSET_TEST (signed, int)
- CALL_INDEX_OFFSET_TEST (unsigned, int)
- CALL_INDEX_OFFSET_TEST (signed, short)
- CALL_INDEX_OFFSET_TEST (unsigned, short)
- CALL_INDEX_OFFSET_TEST (signed, char)
- CALL_INDEX_OFFSET_TEST (unsigned, char)
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.C b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.C
deleted file mode 100644
index 4c196684364..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.C
+++ /dev/null
@@ -1,56 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-std=c++11 -O3 -fno-inline -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */
-
-#include <stdint.h>
-
-#define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE))
-
-#define DEF_LOAD_BROADCAST(TYPE)\
-void set_##TYPE (TYPE *__restrict__ a, TYPE *__restrict__ b)\
-{\
- for (int i = 0; i < NUM_ELEMS (TYPE); i++)\
- a[i] = *b;\
-}\
-
-#define DEF_LOAD_BROADCAST_IMM(TYPE,IMM,SUFFIX)\
-void set_##TYPE##SUFFIX (TYPE *__restrict__ a)\
-{\
- for (int i = 0; i < NUM_ELEMS (TYPE); i++)\
- a[i] = IMM;\
-}\
-
-/* --- VALID --- */
-
-DEF_LOAD_BROADCAST (int8_t)
-DEF_LOAD_BROADCAST (int16_t)
-DEF_LOAD_BROADCAST (int32_t)
-DEF_LOAD_BROADCAST (int64_t)
-
-DEF_LOAD_BROADCAST_IMM (int16_t, 129, imm_129)
-DEF_LOAD_BROADCAST_IMM (int32_t, 129, imm_129)
-DEF_LOAD_BROADCAST_IMM (int64_t, 129, imm_129)
-
-DEF_LOAD_BROADCAST_IMM (int16_t, -130, imm_m130)
-DEF_LOAD_BROADCAST_IMM (int32_t, -130, imm_m130)
-DEF_LOAD_BROADCAST_IMM (int64_t, -130, imm_m130)
-
-DEF_LOAD_BROADCAST_IMM (int16_t, 0x1234, imm_0x1234)
-DEF_LOAD_BROADCAST_IMM (int32_t, 0x1234, imm_0x1234)
-DEF_LOAD_BROADCAST_IMM (int64_t, 0x1234, imm_0x1234)
-
-DEF_LOAD_BROADCAST_IMM (int16_t, 0xFEDC, imm_0xFEDC)
-DEF_LOAD_BROADCAST_IMM (int32_t, 0xFEDC, imm_0xFEDC)
-DEF_LOAD_BROADCAST_IMM (int64_t, 0xFEDC, imm_0xFEDC)
-
-DEF_LOAD_BROADCAST_IMM (int32_t, 0x12345678, imm_0x12345678)
-DEF_LOAD_BROADCAST_IMM (int64_t, 0x12345678, imm_0x12345678)
-
-DEF_LOAD_BROADCAST_IMM (int32_t, 0xF2345678, imm_0xF2345678)
-DEF_LOAD_BROADCAST_IMM (int64_t, 0xF2345678, imm_0xF2345678)
-
-DEF_LOAD_BROADCAST_IMM (int64_t, int64_t (0xFEBA716B12371765), imm_FEBA716B12371765)
-
-/* { dg-final { scan-assembler-times {\tld1rb\tz[0-9]+\.b, p[0-7]/z, } 1 } } */
-/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, p[0-7]/z, } 5 } } */
-/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, p[0-7]/z, } 7 } } */
-/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, p[0-7]/z, } 8 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1_run.C
deleted file mode 100644
index 8e954f3e32c..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1_run.C
+++ /dev/null
@@ -1,64 +0,0 @@
-/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-std=c++11 -O3 -fno-inline -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */
-
-#include "sve_ld1r_1.C"
-
-#include <stdlib.h>
-#include <stdio.h>
-
-#define TEST_LOAD_BROADCAST(TYPE,IMM)\
- {\
- TYPE v[NUM_ELEMS (TYPE)];\
- TYPE temp = 0;\
- set_##TYPE (v, IMM);\
- for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\
- temp += v[i];\
- result += temp;\
- }\
-
-#define TEST_LOAD_BROADCAST_IMM(TYPE,IMM,SUFFIX)\
- {\
- TYPE v[NUM_ELEMS (TYPE)];\
- TYPE temp = 0;\
- set_##TYPE##SUFFIX (v);\
- for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\
- temp += v[i];\
- result += temp;\
- }\
-
-int main (int argc, char **argv)
-{
- long long int result = 0;
-
- TEST_LOAD_BROADCAST_IMM (int16_t, 129, imm_129)
- TEST_LOAD_BROADCAST_IMM (int32_t, 129, imm_129)
- TEST_LOAD_BROADCAST_IMM (int64_t, 129, imm_129)
-
- TEST_LOAD_BROADCAST_IMM (int16_t, -130, imm_m130)
- TEST_LOAD_BROADCAST_IMM (int32_t, -130, imm_m130)
- TEST_LOAD_BROADCAST_IMM (int64_t, -130, imm_m130)
-
- TEST_LOAD_BROADCAST_IMM (int16_t, 0x1234, imm_0x1234)
- TEST_LOAD_BROADCAST_IMM (int32_t, 0x1234, imm_0x1234)
- TEST_LOAD_BROADCAST_IMM (int64_t, 0x1234, imm_0x1234)
-
- TEST_LOAD_BROADCAST_IMM (int16_t, int16_t (0xFEDC), imm_0xFEDC)
- TEST_LOAD_BROADCAST_IMM (int32_t, 0xFEDC, imm_0xFEDC)
- TEST_LOAD_BROADCAST_IMM (int64_t, 0xFEDC, imm_0xFEDC)
-
- TEST_LOAD_BROADCAST_IMM (int32_t, 0x12345678, imm_0x12345678)
- TEST_LOAD_BROADCAST_IMM (int64_t, 0x12345678, imm_0x12345678)
-
- TEST_LOAD_BROADCAST_IMM (int32_t, 0xF2345678, imm_0xF2345678)
- TEST_LOAD_BROADCAST_IMM (int64_t, 0xF2345678, imm_0xF2345678)
-
- TEST_LOAD_BROADCAST_IMM (int64_t, int64_t (0xFEBA716B12371765),
- imm_FEBA716B12371765)
-
- if (result != int64_t (6717319005707226880))
- {
- fprintf (stderr, "result = %lld\n", result);
- abort ();
- }
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2.c b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2.c
new file mode 100644
index 00000000000..89d5f4289de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */
+
+#include <stdint.h>
+
+#define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE))
+
+#define DEF_LOAD_BROADCAST(TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ set_##TYPE (TYPE *restrict a, TYPE *restrict b) \
+ { \
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \
+ a[i] = *b; \
+ }
+
+#define DEF_LOAD_BROADCAST_IMM(TYPE, IMM, SUFFIX) \
+ void __attribute__ ((noinline, noclone)) \
+ set_##TYPE##_##SUFFIX (TYPE *a) \
+ { \
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \
+ a[i] = IMM; \
+ }
+
+#define FOR_EACH_LOAD_BROADCAST(T) \
+ T (int8_t) \
+ T (int16_t) \
+ T (int32_t) \
+ T (int64_t)
+
+#define FOR_EACH_LOAD_BROADCAST_IMM(T) \
+ T (int16_t, 129, imm_129) \
+ T (int32_t, 129, imm_129) \
+ T (int64_t, 129, imm_129) \
+ \
+ T (int16_t, -130, imm_m130) \
+ T (int32_t, -130, imm_m130) \
+ T (int64_t, -130, imm_m130) \
+ \
+ T (int16_t, 0x1234, imm_0x1234) \
+ T (int32_t, 0x1234, imm_0x1234) \
+ T (int64_t, 0x1234, imm_0x1234) \
+ \
+ T (int16_t, 0xFEDC, imm_0xFEDC) \
+ T (int32_t, 0xFEDC, imm_0xFEDC) \
+ T (int64_t, 0xFEDC, imm_0xFEDC) \
+ \
+ T (int32_t, 0x12345678, imm_0x12345678) \
+ T (int64_t, 0x12345678, imm_0x12345678) \
+ \
+ T (int32_t, 0xF2345678, imm_0xF2345678) \
+ T (int64_t, 0xF2345678, imm_0xF2345678) \
+ \
+ T (int64_t, (int64_t) 0xFEBA716B12371765, imm_FEBA716B12371765)
+
+FOR_EACH_LOAD_BROADCAST (DEF_LOAD_BROADCAST)
+FOR_EACH_LOAD_BROADCAST_IMM (DEF_LOAD_BROADCAST_IMM)
+
+/* { dg-final { scan-assembler-times {\tld1rb\tz[0-9]+\.b, p[0-7]/z, } 1 } } */
+/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, p[0-7]/z, } 5 } } */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, p[0-7]/z, } 7 } } */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, p[0-7]/z, } 8 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2_run.c
new file mode 100644
index 00000000000..510b2eca517
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2_run.c
@@ -0,0 +1,38 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */
+
+#include "sve_ld1r_2.c"
+
+#define TEST_LOAD_BROADCAST(TYPE) \
+ { \
+ TYPE v[NUM_ELEMS (TYPE)]; \
+ TYPE val = 99; \
+ set_##TYPE (v, &val); \
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \
+ { \
+ if (v[i] != (TYPE) 99) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+#define TEST_LOAD_BROADCAST_IMM(TYPE, IMM, SUFFIX) \
+ { \
+ TYPE v[NUM_ELEMS (TYPE)]; \
+ set_##TYPE##_##SUFFIX (v); \
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++ ) \
+ { \
+ if (v[i] != (TYPE) IMM) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (int argc, char **argv)
+{
+ FOR_EACH_LOAD_BROADCAST (TEST_LOAD_BROADCAST)
+ FOR_EACH_LOAD_BROADCAST_IMM (TEST_LOAD_BROADCAST_IMM)
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_1.c b/gcc/testsuite/gcc.target/aarch64/sve_live_1.c
index 2d92708fbd2..407d1277c50 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_live_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_live_1.c
@@ -1,19 +1,41 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -fno-tree-scev-cprop -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-int
-liveloop (int start, int n, int *x)
-{
- int i = start;
- int j;
+#include <stdint.h>
- for (j = 0; j < n; ++j)
- {
- i += 1;
- x[j] = i;
- }
- return i;
-}
+#define EXTRACT_LAST(TYPE) \
+ TYPE __attribute__ ((noinline, noclone)) \
+ test_##TYPE (TYPE *x, int n, TYPE value) \
+ { \
+ TYPE last; \
+ for (int j = 0; j < n; ++j) \
+ { \
+ last = x[j]; \
+ x[j] = last * value; \
+ } \
+ return last; \
+ }
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Using a fully-masked loop" 1 "vect" } } */
+#define TEST_ALL(T) \
+ T (uint8_t) \
+ T (uint16_t) \
+ T (uint32_t) \
+ T (uint64_t) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (EXTRACT_LAST)
+
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7].b, } 2 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7].h, } 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7].s, } 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7].d, } 4 } } */
+
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c
index 99f0be353aa..2a1f6df4788 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c
@@ -1,29 +1,35 @@
/* { dg-do run { target { aarch64_sve_hw } } } */
/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-extern void abort(void);
-#include <string.h>
-
#include "sve_live_1.c"
-#define MAX 62
-#define START 27
-
-int main (void)
+#define N 107
+#define OP 70
+
+#define TEST_LOOP(TYPE) \
+ { \
+ TYPE a[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + (i % 3); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ TYPE expected = a[N - 1]; \
+ TYPE res = test_##TYPE (a, N, OP); \
+ if (res != expected) \
+ __builtin_abort (); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE old = i * 2 + (i % 3); \
+ if (a[i] != (TYPE) (old * (TYPE) OP)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
{
- int a[MAX];
- int i;
-
- memset (a, 0, MAX*sizeof (int));
-
- int ret = liveloop (START, MAX, a);
-
- if (ret != 89)
- abort ();
-
- for (i=0; i<MAX; i++)
- {
- if (a[i] != i+START+1)
- abort ();
- }
-} \ No newline at end of file
+ TEST_ALL (TEST_LOOP);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_2.c b/gcc/testsuite/gcc.target/aarch64/sve_live_2.c
deleted file mode 100644
index 06d95fa8ea6..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_live_2.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -fno-tree-scev-cprop -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
-
-int
-liveloop (int start, int n, int * __restrict__ x, char * __restrict__ y)
-{
- int i = start;
- int j;
-
- for (j = 0; j < n; ++j)
- {
- i += 1;
- x[j] = y[j] + 1;
- }
- return i;
-}
-
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Can't use a fully-masked loop because ncopies is greater than 1" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_live_2_run.c
deleted file mode 100644
index e7924e020cb..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_live_2_run.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-extern void abort(void);
-#include <string.h>
-#include <stdio.h>
-
-#include "sve_live_2.c"
-
-#define MAX 193
-#define START 84
-
-int main (void)
-{
- int a[MAX];
- char b[MAX];
- int i;
-
- memset (a, 0, MAX*sizeof (int));
- memset (b, 23, MAX*sizeof (char));
-
- int ret = liveloop (START, MAX, a, b);
-
- if (ret != 277)
- abort ();
-
- for (i=0; i<MAX; i++)
- {
- if (a[i] != 24)
- abort ();
- }
-} \ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c
index 0bc757907cf..882da83237e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c
@@ -3,10 +3,10 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__ ((vector_size (32)));
-typedef int32_t v8si __attribute__ ((vector_size (32)));
-typedef int16_t v16hi __attribute__ ((vector_size (32)));
-typedef int8_t v32qi __attribute__ ((vector_size (32)));
+typedef int64_t vnx2di __attribute__ ((vector_size (32)));
+typedef int32_t vnx4si __attribute__ ((vector_size (32)));
+typedef int16_t vnx8hi __attribute__ ((vector_size (32)));
+typedef int8_t vnx16qi __attribute__ ((vector_size (32)));
#define TEST_TYPE(TYPE) \
void sve_load_##TYPE##_neg9 (TYPE *a) \
@@ -45,10 +45,10 @@ typedef int8_t v32qi __attribute__ ((vector_size (32)));
asm volatile ("" :: "w" (x)); \
}
-TEST_TYPE (v4di)
-TEST_TYPE (v8si)
-TEST_TYPE (v16hi)
-TEST_TYPE (v32qi)
+TEST_TYPE (vnx2di)
+TEST_TYPE (vnx4si)
+TEST_TYPE (vnx8hi)
+TEST_TYPE (vnx16qi)
/* { dg-final { scan-assembler-times {\tsub\tx[0-9]+, x0, #288\n} 4 } } */
/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x0, 16\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c
index a0ced0d9be4..78cfc7a9bd8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c
@@ -1,11 +1,11 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -save-temps -msve-vector-bits=256" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -save-temps" } */
void
-f (unsigned int *restrict a, unsigned char *restrict b, int n)
+f (unsigned int *restrict a, signed char *restrict b, signed char mask, int n)
{
for (int i = 0; i < n; ++i)
- a[i] += b[i];
+ a[i] += (signed char) (b[i] | mask);
}
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c
index 00731d995c8..51732b03784 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c
@@ -1,12 +1,7 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -save-temps -msve-vector-bits=256" } */
-void
-f (unsigned int *restrict a, unsigned char *restrict b, int n)
-{
- for (int i = 0; i < n; ++i)
- a[i] += b[i];
-}
+#include "sve_load_const_offset_2.c"
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #1, mul vl\]\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c
index 9163702db1d..f1c37d388f9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c
@@ -3,65 +3,65 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__ ((vector_size (32)));
-typedef int32_t v8si __attribute__ ((vector_size (32)));
-typedef int16_t v16hi __attribute__ ((vector_size (32)));
-typedef int8_t v32qi __attribute__ ((vector_size (32)));
+typedef int64_t vnx2di __attribute__ ((vector_size (32)));
+typedef int32_t vnx4si __attribute__ ((vector_size (32)));
+typedef int16_t vnx8hi __attribute__ ((vector_size (32)));
+typedef int8_t vnx16qi __attribute__ ((vector_size (32)));
void sve_load_64_u_lsl (uint64_t *a)
{
register unsigned long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v4di *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx2di *)&a[i]));
}
void sve_load_64_s_lsl (int64_t *a)
{
register long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v4di *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx2di *)&a[i]));
}
void sve_load_32_u_lsl (uint32_t *a)
{
register unsigned long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v8si *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx4si *)&a[i]));
}
void sve_load_32_s_lsl (int32_t *a)
{
register long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v8si *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx4si *)&a[i]));
}
void sve_load_16_z_lsl (uint16_t *a)
{
register unsigned long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v16hi *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx8hi *)&a[i]));
}
void sve_load_16_s_lsl (int16_t *a)
{
register long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v16hi *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx8hi *)&a[i]));
}
void sve_load_8_z (uint8_t *a)
{
register unsigned long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v32qi *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx16qi *)&a[i]));
}
void sve_load_8_s (int8_t *a)
{
register long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v32qi *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx16qi *)&a[i]));
}
/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0, x1, lsl 3\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c
index 2d11a221e93..0f918a4155f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c
@@ -10,7 +10,10 @@
{ \
TYPE a[N]; \
for (int i = 0; i < N; ++i) \
- a[i] = i * i + i % 5; \
+ { \
+ a[i] = i * i + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
test_##TYPE##_##NAME (a, BASE, N); \
for (int i = 0; i < N; ++i) \
{ \
@@ -20,7 +23,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (TEST_LOOP)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c
index ccb20b4191f..551b451495d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c
@@ -3,10 +3,10 @@
#include <stdint.h>
-typedef int8_t v32qi __attribute__((vector_size(32)));
-typedef int16_t v16hi __attribute__((vector_size(32)));
-typedef int32_t v8si __attribute__((vector_size(32)));
-typedef int64_t v4di __attribute__((vector_size(32)));
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -23,10 +23,10 @@ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v32qi)
-DO_OP (v16hi)
-DO_OP (v8si)
-DO_OP (v4di)
+DO_OP (vnx16qi)
+DO_OP (vnx8hi)
+DO_OP (vnx4si)
+DO_OP (vnx2di)
/* { dg-final { scan-assembler-times {\tmad\tz0\.b, p[0-7]/m, z2\.b, z4\.b} 1 } } */
/* { dg-final { scan-assembler-times {\tmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c
index 4d47bce14fd..469e3c670d3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c
@@ -1,37 +1,52 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */
#include <stdint.h>
-#define INVALID_INDEX(TYPE) ((TYPE) 107)
-#define IS_VALID_INDEX(TYPE, VAL) (VAL < INVALID_INDEX (TYPE))
-#define ODD(VAL) (VAL & 0x1)
-
-/* TODO: This is a bit ugly for floating point types as it involves FP<>INT
- conversions, but I can't find another way of auto-vectorizing the code to
- make use of SVE gather instructions. */
-#define DEF_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\
-void fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE (OUTTYPE *__restrict out,\
- LOOKUPTYPE *__restrict lookup,\
- INDEXTYPE *__restrict index, int n)\
-{\
- int i;\
- for (i = 0; i < n; ++i)\
- {\
- INDEXTYPE x = index[i];\
- if (IS_VALID_INDEX (INDEXTYPE, x))\
- x = lookup[x];\
- out[i] = x;\
- }\
-}\
-
-DEF_MASK_GATHER_LOAD (int32_t, int32_t, int32_t)
-DEF_MASK_GATHER_LOAD (int64_t, int64_t, int64_t)
-DEF_MASK_GATHER_LOAD (uint32_t, uint32_t, uint32_t)
-DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint64_t)
-DEF_MASK_GATHER_LOAD (float, float, int32_t)
-DEF_MASK_GATHER_LOAD (double, double, int64_t)
-
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 3 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" 1 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 2 } } */
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, BITS) \
+ void \
+ f_##DATA_TYPE##_##CMP_TYPE \
+ (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX##BITS *indices, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ if (cmp1[i] == cmp2[i]) \
+ dest[i] += src[indices[i]]; \
+ }
+
+#define TEST32(T, DATA_TYPE) \
+ T (DATA_TYPE, int32_t, 32) \
+ T (DATA_TYPE, uint32_t, 32) \
+ T (DATA_TYPE, float, 32)
+
+#define TEST64(T, DATA_TYPE) \
+ T (DATA_TYPE, int64_t, 64) \
+ T (DATA_TYPE, uint64_t, 64) \
+ T (DATA_TYPE, double, 64)
+
+#define TEST_ALL(T) \
+ TEST32 (T, int32_t) \
+ TEST32 (T, uint32_t) \
+ TEST32 (T, float) \
+ TEST64 (T, int64_t) \
+ TEST64 (T, uint64_t) \
+ TEST64 (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1_run.c
deleted file mode 100644
index 89ccf3e35a4..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1_run.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include "sve_mask_gather_load_1.c"
-
-#include <stdio.h>
-
-extern void abort ();
-
-/* TODO: Support widening forms of gather loads and test them here. */
-
-#define NUM_ELEMS(TYPE) (32 / sizeof (TYPE))
-
-#define INDEX_VEC_INIT(INDEXTYPE)\
- INDEXTYPE index_##INDEXTYPE[NUM_ELEMS (INDEXTYPE)];\
-
-#define VEC_INIT(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\
- LOOKUPTYPE lookup_##LOOKUPTYPE[NUM_ELEMS (LOOKUPTYPE)];\
- OUTTYPE out_##OUTTYPE[NUM_ELEMS (OUTTYPE)];\
- {\
- int i;\
- for (i = 0; i < NUM_ELEMS (INDEXTYPE); i++)\
- {\
- lookup_##LOOKUPTYPE [i] = i * 2;\
- index_##INDEXTYPE [i] = ODD (i) ? i : INVALID_INDEX (INDEXTYPE);\
- }\
- }
-
-#define TEST_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\
- fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE\
- (out_##OUTTYPE, lookup_##LOOKUPTYPE, index_##INDEXTYPE,\
- NUM_ELEMS (INDEXTYPE));\
- {\
- int i;\
- for (i = 0; i < NUM_ELEMS (OUTTYPE); i++)\
- {\
- if (ODD (i) && out_##OUTTYPE[i] != (i * 2))\
- break;\
- else if (!ODD (i) && out_##OUTTYPE[i] != INVALID_INDEX (INDEXTYPE))\
- break;\
- }\
- if (i < NUM_ELEMS (OUTTYPE))\
- {\
- fprintf (stderr, "out_" # OUTTYPE "[%d] = %d\n",\
- i, (int) out_##OUTTYPE[i]);\
- abort ();\
- }\
- }
-
-int main()
-{
- INDEX_VEC_INIT (int32_t)
- INDEX_VEC_INIT (int64_t)
- INDEX_VEC_INIT (uint32_t)
- INDEX_VEC_INIT (uint64_t)
-
- VEC_INIT (int32_t, int32_t, int32_t)
- VEC_INIT (int64_t, int64_t, int64_t)
- VEC_INIT (uint32_t, uint32_t, uint32_t)
- VEC_INIT (uint64_t, uint64_t, uint64_t)
- VEC_INIT (float, float, int32_t)
- VEC_INIT (double, double, int64_t)
-
- TEST_MASK_GATHER_LOAD (int32_t, int32_t, int32_t)
- TEST_MASK_GATHER_LOAD (int64_t, int64_t, int64_t)
- TEST_MASK_GATHER_LOAD (uint32_t, uint32_t, uint32_t)
- TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint64_t)
- TEST_MASK_GATHER_LOAD (float, float, int32_t)
- TEST_MASK_GATHER_LOAD (double, double, int64_t)
-
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c
index 48db58ffefd..8dd48462b51 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c
@@ -1,60 +1,19 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */
-#include <stdint.h>
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
-#define NUM_ELEMS(TYPE) (4 * (32 / sizeof (TYPE)))
-#define INVALID_INDEX(TYPE) ((TYPE) 107)
-#define IS_VALID_INDEX(TYPE, VAL) (VAL < INVALID_INDEX (TYPE))
+#include "sve_mask_gather_load_1.c"
-/* TODO: This is a bit ugly for floating point types as it involves FP<>INT
- conversions, but I can't find another way of auto-vectorizing the code to
- make use of SVE gather instructions. */
-#define DEF_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\
-void fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE (OUTTYPE *__restrict out,\
- LOOKUPTYPE *__restrict lookup,\
- INDEXTYPE *__restrict index, INDEXTYPE n)\
-{\
- INDEXTYPE i;\
- for (i = 0; i < n; ++i)\
- {\
- LOOKUPTYPE x = index[i];\
- if (IS_VALID_INDEX (LOOKUPTYPE, x))\
- x = lookup[x];\
- out[i] = x;\
- }\
-}\
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
-DEF_MASK_GATHER_LOAD (int32_t, int32_t, int8_t)
-DEF_MASK_GATHER_LOAD (int64_t, int64_t, int8_t)
-DEF_MASK_GATHER_LOAD (int32_t, int32_t, int16_t)
-DEF_MASK_GATHER_LOAD (int64_t, int64_t, int16_t)
-DEF_MASK_GATHER_LOAD (int64_t, int64_t, int32_t)
-DEF_MASK_GATHER_LOAD (uint32_t, uint32_t, uint8_t)
-DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint8_t)
-DEF_MASK_GATHER_LOAD (uint32_t, uint32_t, uint16_t)
-DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint16_t)
-DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint32_t)
-
-/* At present we only use predicate unpacks when the index type is
- half the size of the result type. */
-/* { dg-final { scan-assembler-times "\tpunpklo\\tp\[0-9\]+\.h, p\[0-9\]+\.b" 4 } } */
-/* { dg-final { scan-assembler-times "\tpunpkhi\\tp\[0-9\]+\.h, p\[0-9\]+\.b" 4 } } */
-
-/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */
-/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */
-/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */
-/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */
-/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */
-/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */
-
-/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */
-/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */
-/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */
-/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */
-/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */
-/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */
-
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" 6 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 6 } } */
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 28 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2_run.c
deleted file mode 100644
index c5280546206..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2_run.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include "sve_mask_gather_load_2.c"
-
-#include <stdio.h>
-
-extern void abort ();
-
-#define ODD(VAL) (VAL & 0x1)
-#define INDEX_VEC_INIT(INDEXTYPE)\
- INDEXTYPE index_##INDEXTYPE[NUM_ELEMS (int8_t)];\
-
-#define VEC_INIT(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\
- LOOKUPTYPE lookup_##LOOKUPTYPE[NUM_ELEMS (OUTTYPE)];\
- OUTTYPE out_##OUTTYPE[NUM_ELEMS (OUTTYPE)];\
- {\
- int i;\
- for (i = 0; i < NUM_ELEMS (OUTTYPE); i++)\
- {\
- lookup_##LOOKUPTYPE [i] = i * 2;\
- index_##INDEXTYPE [i] = ODD (i) ? i : INVALID_INDEX (INDEXTYPE);\
- }\
- }
-
-#define TEST_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\
- fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE\
- (out_##OUTTYPE, lookup_##LOOKUPTYPE, index_##INDEXTYPE,\
- NUM_ELEMS (OUTTYPE));\
- {\
- int i;\
- for (i = 0; i < NUM_ELEMS (OUTTYPE); i++)\
- {\
- if (ODD (i) && out_##OUTTYPE[i] != (i * 2))\
- break;\
- else if (!ODD (i) && out_##OUTTYPE[i] != INVALID_INDEX (OUTTYPE))\
- break;\
- }\
- if (i < NUM_ELEMS (OUTTYPE))\
- {\
- fprintf (stderr, "out_" # OUTTYPE "[%d] = %d\n",\
- i, (int) out_##OUTTYPE[i]);\
- abort ();\
- }\
- }
-
-int main()
-{
- INDEX_VEC_INIT (int8_t)
- INDEX_VEC_INIT (int16_t)
- INDEX_VEC_INIT (int32_t)
- INDEX_VEC_INIT (uint8_t)
- INDEX_VEC_INIT (uint16_t)
- INDEX_VEC_INIT (uint32_t)
-
- {
- VEC_INIT (int32_t, int32_t, int8_t)
- TEST_MASK_GATHER_LOAD (int32_t, int32_t, int8_t)
- }
- {
- VEC_INIT (int64_t, int64_t, int8_t)
- TEST_MASK_GATHER_LOAD (int64_t, int64_t, int8_t)
- }
- {
- VEC_INIT (int32_t, int32_t, int16_t)
- TEST_MASK_GATHER_LOAD (int32_t, int32_t, int16_t)
- }
- {
- VEC_INIT (int64_t, int64_t, int16_t)
- TEST_MASK_GATHER_LOAD (int64_t, int64_t, int16_t)
- }
- {
- VEC_INIT (int64_t, int64_t, int32_t)
- TEST_MASK_GATHER_LOAD (int64_t, int64_t, int32_t)
- }
- {
- VEC_INIT (uint32_t, uint32_t, uint8_t)
- TEST_MASK_GATHER_LOAD (uint32_t, uint32_t, uint8_t)
- }
- {
- VEC_INIT (uint64_t, uint64_t, uint8_t)
- TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint8_t)
- }
- {
- VEC_INIT (uint32_t, uint32_t, uint16_t)
- TEST_MASK_GATHER_LOAD (uint32_t, uint32_t, uint16_t)
- }
- {
- VEC_INIT (uint64_t, uint64_t, uint16_t)
- TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint16_t)
- }
- {
- VEC_INIT (uint64_t, uint64_t, uint32_t)
- TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint32_t)
- }
-
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c
index 2965760e058..b370f532f2c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c
@@ -1,29 +1,52 @@
/* { dg-do assemble } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */
-#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \
- DATA_TYPE __attribute__ ((noinline)) \
- NAME (char *data, INDEX_TYPE *indices, signed char n) \
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, BITS) \
+ void \
+ f_##DATA_TYPE##_##CMP_TYPE \
+ (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX##BITS *indices, int n) \
{ \
- DATA_TYPE sum = 0; \
- for (signed char i = 0; i < n; ++i) \
- { \
- INDEX_TYPE index = indices[i]; \
- sum += (index & 16 ? *(DATA_TYPE *) (data + index) : 1); \
- } \
- return sum; \
+ for (int i = 0; i < n; ++i) \
+ if (cmp1[i] == cmp2[i]) \
+ dest[i] += *(DATA_TYPE *) ((char *) src + indices[i]); \
}
-TEST_LOOP (f_s32, int, unsigned int)
-TEST_LOOP (f_u32, unsigned int, unsigned int)
-TEST_LOOP (f_f32, float, unsigned int)
+#define TEST32(T, DATA_TYPE) \
+ T (DATA_TYPE, int32_t, 32) \
+ T (DATA_TYPE, uint32_t, 32) \
+ T (DATA_TYPE, float, 32)
+
+#define TEST64(T, DATA_TYPE) \
+ T (DATA_TYPE, int64_t, 64) \
+ T (DATA_TYPE, uint64_t, 64) \
+ T (DATA_TYPE, double, 64)
+
+#define TEST_ALL(T) \
+ TEST32 (T, int32_t) \
+ TEST32 (T, uint32_t) \
+ TEST32 (T, float) \
+ TEST64 (T, int64_t) \
+ TEST64 (T, uint64_t) \
+ TEST64 (T, double)
+
+TEST_ALL (TEST_LOOP)
-TEST_LOOP (f_s64_s64, long, long)
-TEST_LOOP (f_s64_u64, long, unsigned long)
-TEST_LOOP (f_u64_s64, unsigned long, long)
-TEST_LOOP (f_u64_u64, unsigned long, unsigned long)
-TEST_LOOP (f_f64_s64, double, long)
-TEST_LOOP (f_f64_u64, double, unsigned long)
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
-/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw\]} 3 } } */
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3_run.c
deleted file mode 100644
index aa73c81ffca..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3_run.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */
-
-#include "sve_mask_gather_load_3.c"
-
-extern void abort (void);
-
-#define N 57
-
-#undef TEST_LOOP
-#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \
- { \
- INDEX_TYPE indices[N]; \
- DATA_TYPE data[N * 2]; \
- for (int i = 0; i < N * 2; ++i) \
- data[i] = (i / 2) * 4 + i % 2; \
- DATA_TYPE sum = 0; \
- for (int i = 0; i < N; ++i) \
- { \
- INDEX_TYPE j = (i * 3 / 2) * sizeof (DATA_TYPE); \
- j &= (1ULL << (sizeof (INDEX_TYPE) * 8 - 1)) - 1; \
- if (j & 16) \
- sum += data[j / sizeof (DATA_TYPE)]; \
- else \
- sum += 1; \
- indices[i] = j; \
- } \
- DATA_TYPE res = NAME ((char *) data, indices, N); \
- if (res != sum) \
- abort (); \
- }
-
-int __attribute__ ((optimize (1)))
-main ()
-{
- TEST_LOOP (f_s32, int, unsigned int)
- TEST_LOOP (f_u32, unsigned int, unsigned int)
- TEST_LOOP (f_f32, float, unsigned int)
-
- TEST_LOOP (f_s64_s64, long, long)
- TEST_LOOP (f_s64_u64, long, unsigned long)
- TEST_LOOP (f_u64_s64, unsigned long, long)
- TEST_LOOP (f_u64_u64, unsigned long, unsigned long)
- TEST_LOOP (f_f64_s64, double, long)
- TEST_LOOP (f_f64_u64, double, unsigned long)
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c
index 38bb5275e59..0464e9343a3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c
@@ -1,18 +1,19 @@
/* { dg-do assemble } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */
-#define TEST_LOOP(NAME, TYPE) \
- TYPE __attribute__ ((noinline)) \
- NAME (TYPE **indices, long *mask, int n) \
- { \
- TYPE sum = 0; \
- for (int i = 0; i < n; ++i) \
- sum += mask[i] ? *indices[i] : 1; \
- return sum; \
- }
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
-TEST_LOOP (f_s64, long)
-TEST_LOOP (f_u64, unsigned long)
-TEST_LOOP (f_f64, double)
+#include "sve_mask_gather_load_3.c"
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+\.d\]} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4_run.c
deleted file mode 100644
index 8a6320a002c..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4_run.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */
-
-#include "sve_mask_gather_load_4.c"
-
-extern void abort (void);
-
-#define N 57
-
-#undef TEST_LOOP
-#define TEST_LOOP(NAME, TYPE) \
- { \
- TYPE *ptrs[N]; \
- TYPE data[N * 2]; \
- long mask[N]; \
- for (int i = 0; i < N * 2; ++i) \
- data[i] = (i / 2) * 4 + i % 2; \
- TYPE sum = 0; \
- for (int i = 0; i < N; ++i) \
- { \
- mask[i] = i & 15; \
- ptrs[i] = &data[i * 3 / 2]; \
- sum += mask[i] ? *ptrs[i] : 1; \
- } \
- TYPE res = NAME (ptrs, mask, N); \
- if (res != sum) \
- abort (); \
- }
-
-int __attribute__ ((optimize (1)))
-main ()
-{
- TEST_LOOP (f_s64, long)
- TEST_LOOP (f_u64, unsigned long)
- TEST_LOOP (f_f64, double)
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c
index abb38e40f72..831d594654a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c
@@ -1,120 +1,38 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-
-#define MASK_GATHER_LOAD1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-void mgather_load1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- if (masks[i * STRIDE])\
- dst[i] = src[i * STRIDE];\
-}
-
-#define MASK_GATHER_LOAD2(OBJTYPE,MASKTYPE,STRIDETYPE)\
-void mgather_load2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- if (masks[i * stride])\
- dst[i] = src[i * stride];\
-}
-
-#define MASK_GATHER_LOAD3(OBJTYPE,MASKTYPE,STRIDETYPE)\
-void mgather_load3s5##OBJTYPE##STRIDETYPE\
- (OBJTYPE * restrict d1, OBJTYPE * restrict d2, OBJTYPE * restrict d3,\
- OBJTYPE * restrict d4, OBJTYPE * restrict d5, OBJTYPE * restrict src,\
- MASKTYPE * restrict masks, STRIDETYPE count)\
-{\
- const STRIDETYPE STRIDE = 5;\
- for (STRIDETYPE i=0; i<count; i++)\
- if (masks[i * STRIDE])\
- {\
- d1[i] = src[0 + (i * STRIDE)];\
- d2[i] = src[1 + (i * STRIDE)];\
- d3[i] = src[2 + (i * STRIDE)];\
- d4[i] = src[3 + (i * STRIDE)];\
- d5[i] = src[4 + (i * STRIDE)];\
- }\
-}
-
-#define MASK_GATHER_LOAD4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-void mgather_load4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- if (masks[i * STRIDE])\
- *dst = *src;\
- dst += 1;\
- src += STRIDE;\
- }\
-}
-
-#define MASK_GATHER_LOAD5(OBJTYPE,MASKTYPE,STRIDETYPE)\
-void mgather_load5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- if (masks[i * stride])\
- *dst = *src;\
- dst += 1;\
- src += stride;\
- }\
-}
-
-MASK_GATHER_LOAD1 (double, long, long, 5)
-MASK_GATHER_LOAD1 (double, long, long, 8)
-MASK_GATHER_LOAD1 (double, long, long, 21)
-MASK_GATHER_LOAD1 (double, long, long, 1009)
-
-MASK_GATHER_LOAD1 (float, int, int, 5)
-MASK_GATHER_LOAD1 (float, int, int, 8)
-MASK_GATHER_LOAD1 (float, int, int, 21)
-MASK_GATHER_LOAD1 (float, int, int, 1009)
-
-MASK_GATHER_LOAD2 (double, long, long)
-MASK_GATHER_LOAD2 (float, int, int)
-
-MASK_GATHER_LOAD3 (double, long, long)
-MASK_GATHER_LOAD3 (float, int, int)
-
-MASK_GATHER_LOAD4 (double, long, long, 5)
-
-/* NOTE: We can't vectorize MASK_GATHER_LOAD4 (float, int, int, 5) because we
- can't prove that the offsets used for the gather load won't overflow. */
-
-MASK_GATHER_LOAD5 (double, long, long)
-MASK_GATHER_LOAD5 (float, int, int)
-
-/* Widened forms. */
-MASK_GATHER_LOAD1 (double, long, int, 5)
-MASK_GATHER_LOAD1 (double, long, int, 8)
-MASK_GATHER_LOAD1 (double, long, short, 5)
-MASK_GATHER_LOAD1 (double, long, short, 8)
-
-MASK_GATHER_LOAD1 (float, int, short, 5)
-MASK_GATHER_LOAD1 (float, int, short, 8)
-
-MASK_GATHER_LOAD2 (double, long, int)
-MASK_GATHER_LOAD2 (float, int, short)
-
-MASK_GATHER_LOAD4 (double, long, int, 5)
-MASK_GATHER_LOAD4 (float, int, short, 5)
-
-MASK_GATHER_LOAD5 (double, long, int)
-
-/* Loads including masks. */
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 34 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 20 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 6 } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE) \
+ void \
+ f_##DATA_TYPE##_##CMP_TYPE \
+ (DATA_TYPE *restrict dest, DATA_TYPE *restrict *restrict src, \
+ CMP_TYPE *cmp1, CMP_TYPE *cmp2, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ if (cmp1[i] == cmp2[i]) \
+ dest[i] += *src[i]; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, int64_t) \
+ T (DATA_TYPE, uint64_t) \
+ T (DATA_TYPE, double)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+\.d\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5_run.c
deleted file mode 100644
index 445c47f23ac..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5_run.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include <unistd.h>
-
-extern void abort (void);
-extern void *memset(void *, int, size_t);
-
-#include "sve_mask_gather_load_5.c"
-
-#define NUM_DST_ELEMS 13
-#define NUM_SRC_ELEMS(STRIDE) (NUM_DST_ELEMS * STRIDE)
-
-#define MASKED_VALUE 3
-
-#define TEST_MASK_GATHER_LOAD_COMMON1(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- MASKTYPE masks[NUM_SRC_ELEMS (STRIDE)];\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\
- memset (masks, 0, (NUM_SRC_ELEMS (STRIDE)) * sizeof (MASKTYPE));\
- real_dst[0] = 0;\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- {\
- src[i * STRIDE] = i;\
- dst[i] = MASKED_VALUE;\
- masks[i * STRIDE] = i & 0x1;\
- }\
- FUN##OBJTYPE##STRIDETYPE##STRIDE \
- (dst, src, masks, NUM_DST_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- if (dst[i] != (masks[i * STRIDE] ? i : MASKED_VALUE))\
- abort ();\
-}
-
-#define TEST_MASK_GATHER_LOAD_COMMON2(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- MASKTYPE masks[NUM_SRC_ELEMS (STRIDE)];\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\
- memset (masks, 0, (NUM_SRC_ELEMS (STRIDE)) * sizeof (MASKTYPE));\
- real_dst[0] = 0;\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- {\
- src[i * STRIDE] = i;\
- dst[i] = MASKED_VALUE;\
- masks[i * STRIDE] = i & 0x1;\
- }\
- FUN##OBJTYPE##STRIDETYPE \
- (dst, src, masks, STRIDE, NUM_DST_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- if (dst[i] != (masks[i * STRIDE] ? i : MASKED_VALUE))\
- abort ();\
-}
-
-#define TEST_MASK_GATHER_LOAD1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_GATHER_LOAD_COMMON1 (mgather_load1, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-#define TEST_MASK_GATHER_LOAD2(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_GATHER_LOAD_COMMON2 (mgather_load2, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-#define TEST_MASK_GATHER_LOAD3(OBJTYPE,MASKTYPE,STRIDETYPE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS (5)]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst1[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst2[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst3[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst4[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst5[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- MASKTYPE masks[NUM_SRC_ELEMS (5)];\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS (5)) * sizeof (OBJTYPE));\
- memset (masks, 0, (NUM_SRC_ELEMS (5)) * sizeof (MASKTYPE));\
- real_dst1[0] = real_dst2[0] = real_dst3[0] = real_dst4[0] = real_dst5[0] = 0;\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst1 = &real_dst1[1];\
- OBJTYPE *dst2 = &real_dst2[1];\
- OBJTYPE *dst3 = &real_dst3[1];\
- OBJTYPE *dst4 = &real_dst4[1];\
- OBJTYPE *dst5 = &real_dst5[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS (5); i++)\
- src[i] = i;\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- {\
- dst1[i] = MASKED_VALUE;\
- dst2[i] = MASKED_VALUE;\
- dst3[i] = MASKED_VALUE;\
- dst4[i] = MASKED_VALUE;\
- dst5[i] = MASKED_VALUE;\
- masks[i * 5] = i & 0x1;\
- }\
- mgather_load3s5##OBJTYPE##STRIDETYPE \
- (dst1, dst2, dst3, dst4, dst5, src, masks, NUM_DST_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- {\
- STRIDETYPE base = i * 5;\
- if (dst1[i] != (masks[base] ? base : MASKED_VALUE))\
- abort ();\
- if (dst2[i] != (masks[base] ? (base + 1) : MASKED_VALUE))\
- abort ();\
- if (dst3[i] != (masks[base] ? (base + 2) : MASKED_VALUE))\
- abort ();\
- if (dst4[i] != (masks[base] ? (base + 3) : MASKED_VALUE))\
- abort ();\
- if (dst5[i] != (masks[base] ? (base + 4) : MASKED_VALUE))\
- abort ();\
- }\
-}
-
-#define TEST_MASK_GATHER_LOAD4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_GATHER_LOAD_COMMON1 (mgather_load4, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-#define TEST_MASK_GATHER_LOAD5(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_GATHER_LOAD_COMMON2 (mgather_load5, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-int main ()
-{
- TEST_MASK_GATHER_LOAD1 (double, long, long, 5);
- TEST_MASK_GATHER_LOAD1 (double, long, long, 8);
- TEST_MASK_GATHER_LOAD1 (double, long, long, 21);
-
- TEST_MASK_GATHER_LOAD1 (float, int, int, 5);
- TEST_MASK_GATHER_LOAD1 (float, int, int, 8);
- TEST_MASK_GATHER_LOAD1 (float, int, int, 21);
-
- TEST_MASK_GATHER_LOAD2 (double, long, long, 5);
- TEST_MASK_GATHER_LOAD2 (double, long, long, 8);
- TEST_MASK_GATHER_LOAD2 (double, long, long, 21);
-
- TEST_MASK_GATHER_LOAD3 (double, long, long);
- TEST_MASK_GATHER_LOAD3 (float, int, int);
-
- TEST_MASK_GATHER_LOAD4 (double, long, long, 5);
-
- TEST_MASK_GATHER_LOAD5 (double, long, long, 5);
- TEST_MASK_GATHER_LOAD5 (float, int, int, 5);
-
- /* Widened forms. */
- TEST_MASK_GATHER_LOAD1 (double, long, int, 5)
- TEST_MASK_GATHER_LOAD1 (double, long, int, 8)
- TEST_MASK_GATHER_LOAD1 (double, long, short, 5)
- TEST_MASK_GATHER_LOAD1 (double, long, short, 8)
-
- TEST_MASK_GATHER_LOAD1 (float, int, short, 5)
- TEST_MASK_GATHER_LOAD1 (float, int, short, 8)
-
- TEST_MASK_GATHER_LOAD2 (double, long, int, 5);
- TEST_MASK_GATHER_LOAD2 (double, long, int, 8);
- TEST_MASK_GATHER_LOAD2 (double, long, int, 21);
-
- TEST_MASK_GATHER_LOAD4 (double, long, int, 5);
- TEST_MASK_GATHER_LOAD4 (float, int, short, 5);
-
- TEST_MASK_GATHER_LOAD5 (double, long, int, 5);
-
- return 0;
-}
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_6.c
new file mode 100644
index 00000000000..64eb0c46278
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_6.c
@@ -0,0 +1,38 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, INDEX_TYPE) \
+ void \
+ f_##DATA_TYPE##_##CMP_TYPE##_##INDEX_TYPE \
+ (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX_TYPE *indices, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ if (cmp1[i] == cmp2[i]) \
+ dest[i] += src[indices[i]]; \
+ }
+
+#define TEST32(T, DATA_TYPE) \
+ T (DATA_TYPE, int64_t, int32_t) \
+ T (DATA_TYPE, uint64_t, int32_t) \
+ T (DATA_TYPE, double, int32_t) \
+ T (DATA_TYPE, int64_t, uint32_t) \
+ T (DATA_TYPE, uint64_t, uint32_t) \
+ T (DATA_TYPE, double, uint32_t)
+
+#define TEST_ALL(T) \
+ TEST32 (T, int32_t) \
+ TEST32 (T, uint32_t) \
+ TEST32 (T, float)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 72 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 24 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 18 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_7.c
new file mode 100644
index 00000000000..4a8b38e13af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_7.c
@@ -0,0 +1,53 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, INDEX_TYPE) \
+ void \
+ f_##DATA_TYPE##_##CMP_TYPE##_##INDEX_TYPE \
+ (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX_TYPE *indices, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ if (cmp1[i] == cmp2[i]) \
+ dest[i] += src[indices[i]]; \
+ }
+
+#define TEST32(T, DATA_TYPE) \
+ T (DATA_TYPE, int16_t, int32_t) \
+ T (DATA_TYPE, uint16_t, int32_t) \
+ T (DATA_TYPE, _Float16, int32_t) \
+ T (DATA_TYPE, int16_t, uint32_t) \
+ T (DATA_TYPE, uint16_t, uint32_t) \
+ T (DATA_TYPE, _Float16, uint32_t)
+
+#define TEST64(T, DATA_TYPE) \
+ T (DATA_TYPE, int32_t, int64_t) \
+ T (DATA_TYPE, uint32_t, int64_t) \
+ T (DATA_TYPE, float, int64_t) \
+ T (DATA_TYPE, int32_t, uint64_t) \
+ T (DATA_TYPE, uint32_t, uint64_t) \
+ T (DATA_TYPE, float, uint64_t)
+
+#define TEST_ALL(T) \
+ TEST32 (T, int32_t) \
+ TEST32 (T, uint32_t) \
+ TEST32 (T, float) \
+ TEST64 (T, int64_t) \
+ TEST64 (T, uint64_t) \
+ TEST64 (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 1\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 18 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 18 } } */
+
+/* Also used for the TEST32 indices. */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 72 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 36 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c
index a7f2995a6cd..562bdb720de 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c
@@ -1,124 +1,51 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-
-#define MASK_SCATTER_STORE1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-void mscatter_store1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- if (masks[i * STRIDE])\
- dst[i * STRIDE] = src[i];\
-}
-
-#define MASK_SCATTER_STORE2(OBJTYPE,MASKTYPE,STRIDETYPE)\
-void mscatter_store2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- if (masks[i * stride])\
- dst[i * stride] = src[i];\
-}
-
-#define MASK_SCATTER_STORE3(OBJTYPE,MASKTYPE,STRIDETYPE)\
-void mscatter_store3s5##OBJTYPE##STRIDETYPE\
- (OBJTYPE * restrict dst, OBJTYPE * restrict s1, OBJTYPE * restrict s2,\
- OBJTYPE * restrict s3, OBJTYPE * restrict s4, OBJTYPE * restrict s5,\
- MASKTYPE * restrict masks, STRIDETYPE count)\
-{\
- const STRIDETYPE STRIDE = 5;\
- for (STRIDETYPE i=0; i<count; i++)\
- if (masks[i * STRIDE])\
- {\
- dst[0 + (i * STRIDE)] = s1[i];\
- dst[1 + (i * STRIDE)] = s2[i];\
- dst[2 + (i * STRIDE)] = s3[i];\
- dst[3 + (i * STRIDE)] = s4[i];\
- dst[4 + (i * STRIDE)] = s5[i];\
- }\
-}
-
-#define MASK_SCATTER_STORE4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-void mscatter_store4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- if (masks[i * STRIDE])\
- *dst = *src;\
- dst += STRIDE;\
- src += 1;\
- }\
-}
-
-#define MASK_SCATTER_STORE5(OBJTYPE,MASKTYPE,STRIDETYPE)\
-void mscatter_store5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- if (masks[i * stride])\
- *dst = *src;\
- dst += stride;\
- src += 1;\
- }\
-}
-
-MASK_SCATTER_STORE1 (double, long, long, 5)
-MASK_SCATTER_STORE1 (double, long, long, 8)
-MASK_SCATTER_STORE1 (double, long, long, 21)
-MASK_SCATTER_STORE1 (double, long, long, 1009)
-
-MASK_SCATTER_STORE1 (float, int, int, 5)
-
-MASK_SCATTER_STORE1 (float, int, int, 8)
-MASK_SCATTER_STORE1 (float, int, int, 21)
-MASK_SCATTER_STORE1 (float, int, int, 1009)
-
-MASK_SCATTER_STORE2 (double, long, long)
-MASK_SCATTER_STORE2 (float, int, int)
-
-MASK_SCATTER_STORE3 (double, long, long)
-MASK_SCATTER_STORE3 (float, int, int)
-
-MASK_SCATTER_STORE4 (double, long, long, 5)
-/* NOTE: We can't vectorize MASK_SCATTER_STORE4 (float, int, int, 3) because we
- can't prove that the offsets used for the gather load won't overflow. */
-
-MASK_SCATTER_STORE5 (double, long, long)
-MASK_SCATTER_STORE5 (float, int, int)
-
-/* Widened forms. */
-MASK_SCATTER_STORE1 (double, long, int, 5)
-MASK_SCATTER_STORE1 (double, long, int, 8)
-MASK_SCATTER_STORE1 (double, long, short, 5)
-MASK_SCATTER_STORE1 (double, long, short, 8)
-
-MASK_SCATTER_STORE1 (float, int, short, 5)
-MASK_SCATTER_STORE1 (float, int, short, 8)
-
-MASK_SCATTER_STORE2 (double, long, int)
-MASK_SCATTER_STORE2 (float, int, short)
-
-MASK_SCATTER_STORE4 (double, long, int, 5)
-MASK_SCATTER_STORE4 (float, int, short, 5)
-
-MASK_SCATTER_STORE5 (double, long, int)
-
-/* Gather loads are for the masks. */
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 15 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 8 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */
-
-/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 19 } } */
-/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 12 } } */
-/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, BITS) \
+ void \
+ f_##DATA_TYPE##_##CMP_TYPE \
+ (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ CMP_TYPE *restrict cmp1, CMP_TYPE *restrict cmp2, \
+ INDEX##BITS *restrict indices, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ if (cmp1[i] == cmp2[i]) \
+ dest[indices[i]] = src[i] + 1; \
+ }
+
+#define TEST32(T, DATA_TYPE) \
+ T (DATA_TYPE, int32_t, 32) \
+ T (DATA_TYPE, uint32_t, 32) \
+ T (DATA_TYPE, float, 32)
+
+#define TEST64(T, DATA_TYPE) \
+ T (DATA_TYPE, int64_t, 64) \
+ T (DATA_TYPE, uint64_t, 64) \
+ T (DATA_TYPE, double, 64)
+
+#define TEST_ALL(T) \
+ TEST32 (T, int32_t) \
+ TEST32 (T, uint32_t) \
+ TEST32 (T, float) \
+ TEST64 (T, int64_t) \
+ TEST64 (T, uint64_t) \
+ TEST64 (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1_run.c
deleted file mode 100644
index 3222d420763..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1_run.c
+++ /dev/null
@@ -1,186 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include <unistd.h>
-#include <stdio.h>
-
-extern void abort (void);
-extern void *memset(void *, int, size_t);
-
-#include "sve_mask_scatter_store_1.c"
-
-#define NUM_SRC_ELEMS 13
-#define NUM_DST_ELEMS(STRIDE) (NUM_SRC_ELEMS * STRIDE)
-
-#define MASKED_VALUE 3
-
-#define TEST_MASK_SCATTER_STORE_COMMON1(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- MASKTYPE masks[NUM_DST_ELEMS (STRIDE)];\
- memset (masks, 0, (NUM_DST_ELEMS (STRIDE)) * sizeof (MASKTYPE));\
- real_src[0] = 0;\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- {\
- src[i] = i;\
- masks[i * STRIDE] = i & 0x1;\
- }\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (STRIDE); i++)\
- dst[i] = MASKED_VALUE;\
- FUN##OBJTYPE##STRIDETYPE##STRIDE (dst, src, masks, NUM_SRC_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- if (dst[i * STRIDE] != (masks[i * STRIDE] ? i : MASKED_VALUE))\
- abort ();\
-}
-
-#define TEST_MASK_SCATTER_STORE_COMMON2(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- MASKTYPE masks[NUM_DST_ELEMS (STRIDE)];\
- memset (masks, 0, (NUM_DST_ELEMS (STRIDE)) * sizeof (MASKTYPE));\
- real_src[0] = 0;\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- {\
- src[i] = i;\
- masks[i * STRIDE] = i & 0x1;\
- }\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (STRIDE); i++)\
- dst[i] = MASKED_VALUE;\
- FUN##OBJTYPE##STRIDETYPE (dst, src, masks, STRIDE, NUM_SRC_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- if (dst[i * STRIDE] != (masks[i * STRIDE] ? i : MASKED_VALUE))\
- abort ();\
-}
-
-#define TEST_MASK_SCATTER_STORE1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_SCATTER_STORE_COMMON1 (mscatter_store1, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-#define TEST_MASK_SCATTER_STORE2(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_SCATTER_STORE_COMMON2 (mscatter_store2, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-#define TEST_MASK_SCATTER_STORE3(OBJTYPE,MASKTYPE,STRIDETYPE)\
-{\
- OBJTYPE real_src1[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src2[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src3[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src4[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src5[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS (5)]\
- __attribute__((aligned (32)));\
- MASKTYPE masks[NUM_DST_ELEMS (5)];\
- memset (masks, 0, (NUM_DST_ELEMS (5)) * sizeof (MASKTYPE));\
- real_src1[0] = real_src2[0] = real_src3[0] = real_src4[0] = real_src5[0] = 0;\
- OBJTYPE *src1 = &real_src1[1];\
- OBJTYPE *src2 = &real_src2[1];\
- OBJTYPE *src3 = &real_src3[1];\
- OBJTYPE *src4 = &real_src4[1];\
- OBJTYPE *src5 = &real_src5[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- {\
- STRIDETYPE base = i * 5;\
- src1[i] = base;\
- src2[i] = base + 1;\
- src3[i] = base + 2;\
- src4[i] = base + 3;\
- src5[i] = base + 4;\
- masks[i * 5] = i & 0x1;\
- }\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (5); i++)\
- dst[i] = MASKED_VALUE;\
- mscatter_store3s5##OBJTYPE##STRIDETYPE \
- (dst, src1, src2, src3, src4, src5, masks, NUM_SRC_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- {\
- STRIDETYPE base = i * 5;\
- if (dst[base] != (masks[i * 5] ? base : MASKED_VALUE))\
- abort ();\
- if (dst[base + 1] != (masks[i * 5] ? (base + 1) : MASKED_VALUE))\
- abort ();\
- if (dst[base + 2] != (masks[i * 5] ? (base + 2) : MASKED_VALUE))\
- abort ();\
- if (dst[base + 3] != (masks[i * 5] ? (base + 3) : MASKED_VALUE))\
- abort ();\
- if (dst[base + 4] != (masks[i * 5] ? (base + 4) : MASKED_VALUE))\
- abort ();\
- }\
-}
-
-#define TEST_MASK_SCATTER_STORE4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_SCATTER_STORE_COMMON1 (mscatter_store4, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-#define TEST_MASK_SCATTER_STORE5(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_SCATTER_STORE_COMMON2 (mscatter_store5, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-int __attribute__ ((optimize (1)))
-main ()
-{
- TEST_MASK_SCATTER_STORE1 (double, long, long, 5);
-
- TEST_MASK_SCATTER_STORE1 (double, long, long, 8);
- TEST_MASK_SCATTER_STORE1 (double, long, long, 21);
-
- TEST_MASK_SCATTER_STORE1 (float, int, int, 5);
- TEST_MASK_SCATTER_STORE1 (float, int, int, 8);
- TEST_MASK_SCATTER_STORE1 (float, int, int, 21);
-
- TEST_MASK_SCATTER_STORE2 (double, long, long, 5);
- TEST_MASK_SCATTER_STORE2 (double, long, long, 8);
- TEST_MASK_SCATTER_STORE2 (double, long, long, 21);
-
- TEST_MASK_SCATTER_STORE2 (float, int, int, 5);
- TEST_MASK_SCATTER_STORE2 (float, int, int, 8);
- TEST_MASK_SCATTER_STORE2 (float, int, int, 21);
-
- TEST_MASK_SCATTER_STORE3 (double, long, long);
- TEST_MASK_SCATTER_STORE3 (float, int, int);
-
- TEST_MASK_SCATTER_STORE4 (double, long, long, 5);
-
- TEST_MASK_SCATTER_STORE5 (double, long, long, 5);
- TEST_MASK_SCATTER_STORE5 (float, int, int, 5);
-
- /* Widened forms. */
- TEST_MASK_SCATTER_STORE1 (double, long, int, 5)
- TEST_MASK_SCATTER_STORE1 (double, long, int, 8)
- TEST_MASK_SCATTER_STORE1 (double, long, short, 5)
- TEST_MASK_SCATTER_STORE1 (double, long, short, 8)
-
- TEST_MASK_SCATTER_STORE1 (float, int, short, 5)
- TEST_MASK_SCATTER_STORE1 (float, int, short, 8)
-
- TEST_MASK_SCATTER_STORE2 (double, long, int, 5);
- TEST_MASK_SCATTER_STORE2 (double, long, int, 8);
- TEST_MASK_SCATTER_STORE2 (double, long, int, 21);
-
- TEST_MASK_SCATTER_STORE2 (float, int, short, 5);
- TEST_MASK_SCATTER_STORE2 (float, int, short, 8);
- TEST_MASK_SCATTER_STORE2 (float, int, short, 21);
-
- TEST_MASK_SCATTER_STORE4 (double, long, int, 5);
- TEST_MASK_SCATTER_STORE4 (float, int, short, 5);
-
- TEST_MASK_SCATTER_STORE5 (double, long, int, 5);
-
- return 0;
-}
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_2.c
new file mode 100644
index 00000000000..c0f291673dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_2.c
@@ -0,0 +1,17 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */
+
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "sve_mask_scatter_store_1.c"
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c
index 4a6247db978..9eff539c1d8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -28,6 +28,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -44,10 +45,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
Out 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for half float)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tld2h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tld2h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c
index 626b78c29e1..72086145290 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#include "sve_mask_struct_load_1.c"
#define N 100
-volatile int x;
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
{ \
@@ -17,6 +15,7 @@ volatile int x;
{ \
out[i] = i * 7 / 2; \
mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
} \
for (int i = 0; i < N * 2; ++i) \
in[i] = i * 9 / 2; \
@@ -27,11 +26,11 @@ volatile int x;
OUTTYPE if_false = i * 7 / 2; \
if (out[i] != (mask[i] ? if_true : if_false)) \
__builtin_abort (); \
- x += 1; \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c
index 0004e673d49..fe69b96e35a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -30,6 +30,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -46,10 +47,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
Out 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for _Float16)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tld3h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c
index 86219b4a191..a9784676efb 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#include "sve_mask_struct_load_2.c"
#define N 100
-volatile int x;
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
{ \
@@ -17,6 +15,7 @@ volatile int x;
{ \
out[i] = i * 7 / 2; \
mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
} \
for (int i = 0; i < N * 3; ++i) \
in[i] = i * 9 / 2; \
@@ -29,11 +28,11 @@ volatile int x;
OUTTYPE if_false = i * 7 / 2; \
if (out[i] != (mask[i] ? if_true : if_false)) \
__builtin_abort (); \
- x += 1; \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c
index 5f784e7dd36..b8bdd51459f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -31,6 +31,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -47,10 +48,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
Out 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for half float)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c
index 51bd38e2890..f168d656af9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#include "sve_mask_struct_load_3.c"
#define N 100
-volatile int x;
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
{ \
@@ -17,6 +15,7 @@ volatile int x;
{ \
out[i] = i * 7 / 2; \
mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
} \
for (int i = 0; i < N * 4; ++i) \
in[i] = i * 9 / 2; \
@@ -30,11 +29,11 @@ volatile int x;
OUTTYPE if_false = i * 7 / 2; \
if (out[i] != (mask[i] ? if_true : if_false)) \
__builtin_abort (); \
- x += 1; \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c
index 6608558d3ff..2b319229d1f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void \
+ void __attribute__ ((noinline, noclone)) \
NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -28,6 +28,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -44,10 +45,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
Out 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for half float)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tld3h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c
index 003cf650d7d..a81c647004f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void \
+ void __attribute__ ((noinline, noclone)) \
NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -28,6 +28,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -44,10 +45,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
Out 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for half float)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c
index a6161f31536..b6e3f55d7e8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void \
+ void __attribute__ ((noinline, noclone)) \
NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -28,6 +28,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c
index 75a3e43f267..da97e2795a9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void \
+ void __attribute__ ((noinline, noclone)) \
NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -28,6 +28,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c
index e87ad0bc074..c3884b0b074 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void \
+ void __attribute__ ((noinline, noclone)) \
NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -28,6 +28,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c
index 966968d4b91..9af479f478d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c
@@ -2,16 +2,19 @@
/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, INTYPE bias, int n) \
{ \
for (int i = 0; i < n; ++i) \
- if (cond[i]) \
- { \
- dest[i * 2] = src[i]; \
- dest[i * 2 + 1] = src[i]; \
- } \
+ { \
+ INTYPE value = src[i] + bias; \
+ if (cond[i]) \
+ { \
+ dest[i * 2] = value; \
+ dest[i * 2 + 1] = value; \
+ } \
+ } \
}
#define TEST2(NAME, OUTTYPE, INTYPE) \
@@ -31,6 +34,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -47,10 +51,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
In 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for _Float16)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tst2h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tst2h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c
index fd48a4c96f9..f472e1da01d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#include "sve_mask_struct_store_1.c"
#define N 100
-volatile int x;
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
{ \
@@ -17,21 +15,22 @@ volatile int x;
{ \
in[i] = i * 7 / 2; \
mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
} \
for (int i = 0; i < N * 2; ++i) \
out[i] = i * 9 / 2; \
- NAME##_2 (out, in, mask, N); \
+ NAME##_2 (out, in, mask, 17, N); \
for (int i = 0; i < N * 2; ++i) \
{ \
- OUTTYPE if_true = in[i / 2]; \
+ OUTTYPE if_true = (INTYPE) (in[i / 2] + 17); \
OUTTYPE if_false = i * 9 / 2; \
if (out[i] != (mask[i / 2] ? if_true : if_false)) \
__builtin_abort (); \
- x += 1; \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c
index 5359c6a457a..b817a095abe 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c
@@ -2,17 +2,20 @@
/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, INTYPE bias, int n) \
{ \
for (int i = 0; i < n; ++i) \
- if (cond[i]) \
- { \
- dest[i * 3] = src[i]; \
- dest[i * 3 + 1] = src[i]; \
- dest[i * 3 + 2] = src[i]; \
- } \
+ { \
+ INTYPE value = src[i] + bias; \
+ if (cond[i]) \
+ { \
+ dest[i * 3] = value; \
+ dest[i * 3 + 1] = value; \
+ dest[i * 3 + 2] = value; \
+ } \
+ } \
}
#define TEST2(NAME, OUTTYPE, INTYPE) \
@@ -32,6 +35,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -48,10 +52,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
In 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for _Float16)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tst3h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tst3h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c
index f8845ebd7ec..c1771d52298 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#include "sve_mask_struct_store_2.c"
#define N 100
-volatile int x;
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
{ \
@@ -17,21 +15,22 @@ volatile int x;
{ \
in[i] = i * 7 / 2; \
mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
} \
for (int i = 0; i < N * 3; ++i) \
out[i] = i * 9 / 2; \
- NAME##_3 (out, in, mask, N); \
+ NAME##_3 (out, in, mask, 11, N); \
for (int i = 0; i < N * 3; ++i) \
{ \
- OUTTYPE if_true = in[i / 3]; \
+ OUTTYPE if_true = (INTYPE) (in[i / 3] + 11); \
OUTTYPE if_false = i * 9 / 2; \
if (out[i] != (mask[i / 3] ? if_true : if_false)) \
__builtin_abort (); \
- x += 1; \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c
index cc614847e7e..d604bd77efe 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c
@@ -1,19 +1,22 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, INTYPE bias, int n) \
{ \
for (int i = 0; i < n; ++i) \
- if (cond[i]) \
- { \
- dest[i * 4] = src[i]; \
- dest[i * 4 + 1] = src[i]; \
- dest[i * 4 + 2] = src[i]; \
- dest[i * 4 + 3] = src[i]; \
- } \
+ { \
+ INTYPE value = src[i] + bias; \
+ if (cond[i]) \
+ { \
+ dest[i * 4] = value; \
+ dest[i * 4 + 1] = value; \
+ dest[i * 4 + 2] = value; \
+ dest[i * 4 + 3] = value; \
+ } \
+ } \
}
#define TEST2(NAME, OUTTYPE, INTYPE) \
@@ -33,6 +36,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -49,10 +53,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
In 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for half float)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tst4h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tst4h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c
index f845818fa4d..cbac3da9db2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#include "sve_mask_struct_store_3.c"
#define N 100
-volatile int x;
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
{ \
@@ -17,21 +15,22 @@ volatile int x;
{ \
in[i] = i * 7 / 2; \
mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
} \
for (int i = 0; i < N * 4; ++i) \
out[i] = i * 9 / 2; \
- NAME##_4 (out, in, mask, N); \
+ NAME##_4 (out, in, mask, 42, N); \
for (int i = 0; i < N * 4; ++i) \
{ \
- OUTTYPE if_true = in[i / 4]; \
+ OUTTYPE if_true = (INTYPE) (in[i / 4] + 42); \
OUTTYPE if_false = i * 9 / 2; \
if (out[i] != (mask[i / 4] ? if_true : if_false)) \
__builtin_abort (); \
- x += 1; \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c
index ac2df82c539..9b4e75554f9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c
@@ -2,7 +2,7 @@
/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -32,6 +32,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c
index a4d705e38ba..a2e671de3d3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c
@@ -3,10 +3,10 @@
#include <stdint.h>
-typedef int8_t v32qi __attribute__((vector_size(32)));
-typedef int16_t v16hi __attribute__((vector_size(32)));
-typedef int32_t v8si __attribute__((vector_size(32)));
-typedef int64_t v4di __attribute__((vector_size(32)));
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -23,10 +23,10 @@ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v32qi)
-DO_OP (v16hi)
-DO_OP (v8si)
-DO_OP (v4di)
+DO_OP (vnx16qi)
+DO_OP (vnx8hi)
+DO_OP (vnx4si)
+DO_OP (vnx2di)
/* { dg-final { scan-assembler-times {\tmla\tz0\.b, p[0-7]/m, z2\.b, z4\.b\n} 1 } } */
/* { dg-final { scan-assembler-times {\tmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c
index b7cc1dba087..fb4454a1426 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c
@@ -3,10 +3,10 @@
#include <stdint.h>
-typedef int8_t v32qi __attribute__((vector_size(32)));
-typedef int16_t v16hi __attribute__((vector_size(32)));
-typedef int32_t v8si __attribute__((vector_size(32)));
-typedef int64_t v4di __attribute__((vector_size(32)));
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -23,10 +23,10 @@ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v32qi)
-DO_OP (v16hi)
-DO_OP (v8si)
-DO_OP (v4di)
+DO_OP (vnx16qi)
+DO_OP (vnx8hi)
+DO_OP (vnx4si)
+DO_OP (vnx2di)
/* { dg-final { scan-assembler-times {\tmls\tz0\.b, p[0-7]/m, z2\.b, z4\.b\n} 1 } } */
/* { dg-final { scan-assembler-times {\tmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c
index a38375af017..756263253c0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c
@@ -3,9 +3,9 @@
void sve_copy_rr (void)
{
- typedef int v8si __attribute__((vector_size(32)));
- register v8si x asm ("z1");
- register v8si y asm ("z2");
+ typedef int vnx4si __attribute__((vector_size(32)));
+ register vnx4si x asm ("z1");
+ register vnx4si y asm ("z2");
asm volatile ("#foo" : "=w" (x));
y = x;
asm volatile ("#foo" :: "w" (y));
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c
index fc05837a920..38aab512376 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c
@@ -3,10 +3,10 @@
#include <stdint.h>
-typedef int8_t v32qi __attribute__((vector_size(32)));
-typedef int16_t v16hi __attribute__((vector_size(32)));
-typedef int32_t v8si __attribute__((vector_size(32)));
-typedef int64_t v4di __attribute__((vector_size(32)));
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -23,10 +23,10 @@ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v32qi)
-DO_OP (v16hi)
-DO_OP (v8si)
-DO_OP (v4di)
+DO_OP (vnx16qi)
+DO_OP (vnx8hi)
+DO_OP (vnx4si)
+DO_OP (vnx2di)
/* { dg-final { scan-assembler-times {\tmsb\tz0\.b, p[0-7]/m, z2\.b, z4\.b} 1 } } */
/* { dg-final { scan-assembler-times {\tmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c b/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c
index 8f50308ebd5..a87fdd2aed2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c
@@ -1,36 +1,39 @@
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */
-#define TEST(NAME, TYPE, ITYPE) \
+#include <stdint.h>
+
+#define TEST(NAME, TYPE) \
void \
- NAME##1 (TYPE *x, ITYPE n) \
+ NAME##1 (TYPE *x, int n) \
{ \
- for (ITYPE i = 0; i < n; ++i) \
+ for (int i = 0; i < n; ++i) \
x[i] += 1; \
} \
TYPE NAME##_array[1024]; \
void \
NAME##2 (void) \
{ \
- for (ITYPE i = 1; i < 200; ++i) \
+ for (int i = 1; i < 200; ++i) \
NAME##_array[i] += 1; \
}
-TEST (sc, signed char, unsigned char)
-TEST (uc, unsigned char, unsigned char)
-TEST (ss, signed short, unsigned short)
-TEST (us, unsigned short, signed short)
-TEST (si, signed int, signed int)
-TEST (ui, unsigned int, unsigned int)
-TEST (sl, signed long, unsigned long)
-TEST (ul, unsigned long, signed long)
-TEST (f, float, int)
-TEST (d, double, long)
+TEST (s8, int8_t)
+TEST (u8, uint8_t)
+TEST (s16, int16_t)
+TEST (u16, uint16_t)
+TEST (s32, int32_t)
+TEST (u32, uint32_t)
+TEST (s64, int64_t)
+TEST (u64, uint64_t)
+TEST (f16, _Float16)
+TEST (f32, float)
+TEST (f64, double)
/* No scalar memory accesses. */
/* { dg-final { scan-assembler-not {[wx][0-9]*, \[} } } */
/* 2 for each NAME##1 test, one in the header and one in the main loop
and 1 for each NAME##2 test, in the main loop only. */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b,} 6 } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 6 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 9 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 9 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c
index a39f8241f46..23b1b2a51e5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c
@@ -9,7 +9,7 @@
int x[N] __attribute__((aligned(32)));
-void __attribute__((weak))
+void __attribute__((noinline, noclone))
foo (void)
{
unsigned int v = 0;
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c
index 1ebaeea2bb9..6ed98ec075c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx" } */
-/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" { target aarch64_sve256_hw } } */
+/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx -msve-vector-bits=256" { target aarch64_sve256_hw } } */
#include "sve_peel_ind_1.c"
-volatile int y;
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
foo ();
@@ -14,7 +12,7 @@ main (void)
{
if (x[i] != (i < START || i >= END ? 0 : (i - START) * 5))
__builtin_abort ();
- y++;
+ asm volatile ("" ::: "memory");
}
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c
index 9ef8c7f85e4..af1a5aaa0ec 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c
@@ -9,7 +9,7 @@
int x[N] __attribute__((aligned(32)));
-void __attribute__((weak))
+void __attribute__((noinline, noclone))
foo (void)
{
for (unsigned int i = START; i < END; ++i)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c
index b3e56bbbb7c..5565c32a888 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx" } */
-/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" { target aarch64_sve256_hw } } */
+/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx -msve-vector-bits=256" { target aarch64_sve256_hw } } */
#include "sve_peel_ind_2.c"
-volatile int y;
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
foo ();
@@ -14,7 +12,7 @@ main (void)
{
if (x[i] != (i < START || i >= END ? 0 : i))
__builtin_abort ();
- y++;
+ asm volatile ("" ::: "memory");
}
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c
index 97a29f18361..a2602e781a1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c
@@ -9,7 +9,7 @@
int x[MAX_START][N] __attribute__((aligned(32)));
-void __attribute__((weak))
+void __attribute__((noinline, noclone))
foo (int start)
{
for (int i = start; i < start + COUNT; ++i)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c
index 9851c1cce64..ee8061a1163 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx" } */
-/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" { target aarch64_sve256_hw } } */
+/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx -msve-vector-bits=256" { target aarch64_sve256_hw } } */
#include "sve_peel_ind_3.c"
-volatile int y;
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
for (int start = 0; start < MAX_START; ++start)
@@ -16,7 +14,7 @@ main (void)
{
if (x[start][i] != (i < start || i >= start + COUNT ? 0 : i))
__builtin_abort ();
- y++;
+ asm volatile ("" ::: "memory");
}
}
return 0;
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c
index e5c55877341..6ab089522fb 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c
@@ -6,7 +6,7 @@
#define START 1
#define END 505
-void __attribute__((weak))
+void __attribute__((noinline, noclone))
foo (double *x)
{
double v = 10.0;
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c
index 60be4a038de..3764457ffcc 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c
@@ -1,17 +1,18 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-Ofast -march=armv8-a+sve -mtune=thunderx -fno-vect-cost-model" } */
-/* { dg-options "-Ofast -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx -fno-vect-cost-model" { target aarch64_sve256_hw } } */
+/* { dg-options "-Ofast -march=armv8-a+sve -mtune=thunderx" } */
+/* { dg-options "-Ofast -march=armv8-a+sve -mtune=thunderx -mtune=thunderx" { target aarch64_sve256_hw } } */
#include "sve_peel_ind_4.c"
-volatile int y;
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
double x[END + 1];
for (int i = 0; i < END + 1; ++i)
- x[i] = i;
+ {
+ x[i] = i;
+ asm volatile ("" ::: "memory");
+ }
foo (x);
for (int i = 0; i < END + 1; ++i)
{
@@ -22,7 +23,7 @@ main (void)
expected = 10 + (i - START) * 5;
if (x[i] != expected)
__builtin_abort ();
- y++;
+ asm volatile ("" ::: "memory");
}
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.C
deleted file mode 100644
index 53e10bcea01..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.C
+++ /dev/null
@@ -1,48 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include <math.h>
-
-#define NUM_ELEMS(TYPE) (int)(5 * (256 / sizeof (TYPE)) + 3)
-
-#define DEF_REDUC_PLUS(TYPE)\
-TYPE reduc_plus_##TYPE (TYPE *__restrict__ a, TYPE *__restrict__ b)\
-{\
- TYPE r = 0, q = 3;\
- for (int i = 0; i < NUM_ELEMS(TYPE); i++)\
- {\
- r += a[i];\
- q -= b[i];\
- }\
- return r * q;\
-}\
-
-DEF_REDUC_PLUS (float)
-DEF_REDUC_PLUS (double)
-
-#define DEF_REDUC_MAXMIN(TYPE,FUN)\
-TYPE reduc_##FUN (TYPE *__restrict__ a, TYPE *__restrict__ b)\
-{\
- TYPE r = a[0], q = b[0];\
- for (int i = 0; i < NUM_ELEMS(TYPE); i++)\
- {\
- r = FUN (a[i], r);\
- q = FUN (b[i], q);\
- }\
- return r * q;\
-}\
-
-DEF_REDUC_MAXMIN (float, fmaxf)
-DEF_REDUC_MAXMIN (double, fmax)
-DEF_REDUC_MAXMIN (float, fminf)
-DEF_REDUC_MAXMIN (double, fmin)
-
-
-/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 2 } } */
-/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 2 } } */
-
-/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 { xfail *-*-* } } } */
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.c
new file mode 100644
index 00000000000..eb3e7e656d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#define NUM_ELEMS(TYPE) ((int)(5 * (256 / sizeof (TYPE)) + 3))
+
+#define DEF_REDUC_PLUS(TYPE) \
+ TYPE __attribute__ ((noinline, noclone)) \
+ reduc_plus_##TYPE (TYPE *a, TYPE *b) \
+ { \
+ TYPE r = 0, q = 3; \
+ for (int i = 0; i < NUM_ELEMS(TYPE); i++) \
+ { \
+ r += a[i]; \
+ q -= b[i]; \
+ } \
+ return r * q; \
+ }
+
+#define TEST_ALL(T) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (DEF_REDUC_PLUS)
+
+/* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.C
deleted file mode 100644
index 769d25165ea..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.C
+++ /dev/null
@@ -1,47 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include "sve_reduc_strict_1.C"
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-
-#define DEF_INIT_VECTOR(TYPE)\
- TYPE a_##TYPE[NUM_ELEMS (TYPE)];\
- TYPE b_##TYPE[NUM_ELEMS (TYPE)];\
- for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\
- {\
- a_##TYPE[i] = (i * 2) * (i & 1 ? 1 : -1);\
- b_##TYPE[i] = (i * 3) * (i & 1 ? 1 : -1);\
- }
-
-#define TEST_REDUC_PLUS(RES,TYPE) (RES) += reduc_plus_##TYPE (a_##TYPE, b_##TYPE);
-#define TEST_REDUC_MAX(RES,TYPE) (RES) += reduc_fmax (a_##TYPE, b_##TYPE);
-#define TEST_REDUC_MAXF(RES,TYPE) (RES) += reduc_fmaxf (a_##TYPE, b_##TYPE);
-#define TEST_REDUC_MIN(RES,TYPE) (RES) += reduc_fmin (a_##TYPE, b_##TYPE);
-#define TEST_REDUC_MINF(RES,TYPE) (RES) += reduc_fminf (a_##TYPE, b_##TYPE);
-
-int main ()
-{
- double result = 0.0;
- DEF_INIT_VECTOR (float)
- DEF_INIT_VECTOR (double)
-
- TEST_REDUC_PLUS (result, float)
- TEST_REDUC_PLUS (result, double)
-
- TEST_REDUC_MINF (result, float)
- TEST_REDUC_MIN (result, double)
-
- TEST_REDUC_MAXF (result, float)
- TEST_REDUC_MAX (result, double)
-
- if (result != double (1356996))
- {
- fprintf (stderr, "result = %1.16lf\n", result);
- abort ();
- }
-
- return 0;
-}
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.c
new file mode 100644
index 00000000000..4c810d4a337
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.c
@@ -0,0 +1,29 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_reduc_strict_1.c"
+
+#define TEST_REDUC_PLUS(TYPE) \
+ { \
+ TYPE a[NUM_ELEMS (TYPE)]; \
+ TYPE b[NUM_ELEMS (TYPE)]; \
+ TYPE r = 0, q = 3; \
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \
+ { \
+ a[i] = (i * 0.1) * (i & 1 ? 1 : -1); \
+ b[i] = (i * 0.3) * (i & 1 ? 1 : -1); \
+ r += a[i]; \
+ q -= b[i]; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ TYPE res = reduc_plus_##TYPE (a, b); \
+ if (res != r * q) \
+ __builtin_abort (); \
+ }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+ TEST_ALL (TEST_REDUC_PLUS);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.C
deleted file mode 100644
index 542918abeb8..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.C
+++ /dev/null
@@ -1,48 +0,0 @@
-/* { dg-do compile } */
-/* FIXME: With -O3 we don't generate reductions as the compiler unrolls the outer loop
- and processes the rows in parallel, performing in order reductions on the inner loop. */
-/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include <math.h>
-
-#define NUM_ELEMS(TYPE) (int)(5 * (256 / sizeof (TYPE)) + 3)
-
-/* TODO: Test with inner loop = n * NUM_ELEMS(TYPE). */
-#define DEF_REDUC_PLUS(TYPE)\
-void reduc_plus_##TYPE (TYPE (*__restrict__ a)[NUM_ELEMS(TYPE)], TYPE *__restrict__ r, int n)\
-{\
- for (int i = 0; i < n; i++)\
- {\
- r[i] = 0;\
- for (int j = 0; j < NUM_ELEMS(TYPE); j++)\
- r[i] += a[i][j];\
- }\
-}\
-
-DEF_REDUC_PLUS (float)
-DEF_REDUC_PLUS (double)
-
-#define DEF_REDUC_MAXMIN(TYPE,FUN)\
-void reduc_##FUN (TYPE (*__restrict__ a)[NUM_ELEMS(TYPE)], TYPE *__restrict__ r, int n)\
-{\
- for (int i = 0; i < n; i++)\
- {\
- r[i] = a[i][0];\
- for (int j = 0; j < NUM_ELEMS(TYPE); j++)\
- r[i] = FUN (a[i][j], r[i]);\
- }\
-}\
-
-DEF_REDUC_MAXMIN (float, fmaxf)
-DEF_REDUC_MAXMIN (double, fmax)
-DEF_REDUC_MAXMIN (float, fminf)
-DEF_REDUC_MAXMIN (double, fmin)
-
-/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 1 } } */
-/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 1 } } */
-
-/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 1 { xfail *-*-* } } } */
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.c
new file mode 100644
index 00000000000..672be8f793e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#define NUM_ELEMS(TYPE) ((int) (5 * (256 / sizeof (TYPE)) + 3))
+
+#define DEF_REDUC_PLUS(TYPE) \
+void __attribute__ ((noinline, noclone)) \
+reduc_plus_##TYPE (TYPE (*restrict a)[NUM_ELEMS(TYPE)], \
+ TYPE *restrict r, int n) \
+{ \
+ for (int i = 0; i < n; i++) \
+ { \
+ r[i] = 0; \
+ for (int j = 0; j < NUM_ELEMS(TYPE); j++) \
+ r[i] += a[i][j]; \
+ } \
+}
+
+#define TEST_ALL(T) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (DEF_REDUC_PLUS)
+
+/* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.C
deleted file mode 100644
index 86a930c7d33..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.C
+++ /dev/null
@@ -1,59 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include "sve_reduc_strict_2.C"
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-
-#define NROWS 5
-
-#define DEF_INIT_VECTOR(TYPE)\
- TYPE mat_##TYPE[NROWS][NUM_ELEMS (TYPE)];\
- TYPE r_##TYPE[NROWS];\
- for (int i = 0; i < NROWS; i++)\
- for (int j = 0; j < NUM_ELEMS (TYPE); j++ )\
- mat_##TYPE[i][j] = i + (j * 2) * (j & 1 ? 1 : -1);\
-
-#define TEST_REDUC_PLUS(TYPE) reduc_plus_##TYPE (mat_##TYPE, r_##TYPE, NROWS);
-#define TEST_REDUC_MAXF reduc_fmaxf (mat_float, r_float, NROWS);
-#define TEST_REDUC_MAX reduc_fmax (mat_double, r_double, NROWS);
-#define TEST_REDUC_MINF reduc_fminf (mat_float, r_float, NROWS);
-#define TEST_REDUC_MIN reduc_fmin (mat_double, r_double, NROWS);
-
-#define SUM_VECTOR(RES, TYPE)\
- for (int i = 0; i < NROWS; i++)\
- (RES) += r_##TYPE[i];
-
-#define SUM_FLOAT_RESULT(RES)\
- SUM_VECTOR (RES, float);\
- SUM_VECTOR (RES, double);\
-
-int main ()
-{
- double resultF = 0.0;
- DEF_INIT_VECTOR (float)
- DEF_INIT_VECTOR (double)
-
- TEST_REDUC_PLUS (float)
- TEST_REDUC_PLUS (double)
- SUM_FLOAT_RESULT (resultF);
-
- TEST_REDUC_MAXF
- TEST_REDUC_MAX
- SUM_FLOAT_RESULT (resultF);
-
- TEST_REDUC_MINF
- TEST_REDUC_MIN
- SUM_FLOAT_RESULT (resultF);
-
- if (resultF != double (2460))
- {
- fprintf (stderr, "resultF = %1.16lf\n", resultF);
- abort ();
- }
-
- return 0;
-}
-
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.c
new file mode 100644
index 00000000000..4741e6acb14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.c
@@ -0,0 +1,31 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+
+#include "sve_reduc_strict_2.c"
+
+#define NROWS 5
+
+#define TEST_REDUC_PLUS(TYPE) \
+ { \
+ TYPE a[NROWS][NUM_ELEMS (TYPE)]; \
+ TYPE r[NROWS]; \
+ TYPE expected[NROWS] = {}; \
+ for (int i = 0; i < NROWS; ++i) \
+ for (int j = 0; j < NUM_ELEMS (TYPE); ++j) \
+ { \
+ a[i][j] = (i * 0.1 + j * 0.6) * (j & 1 ? 1 : -1); \
+ expected[i] += a[i][j]; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ reduc_plus_##TYPE (a, r, NROWS); \
+ for (int i = 0; i < NROWS; ++i) \
+ if (r[i] != expected[i]) \
+ __builtin_abort (); \
+ }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+ TEST_ALL (TEST_REDUC_PLUS);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.c
index 338aa614b47..ebed8e697c1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.C
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.c
@@ -1,12 +1,13 @@
/* { dg-do compile } */
-/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -msve-vector-bits=256 -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -msve-vector-bits=256 -fdump-tree-vect-details" } */
double mat[100][4];
double mat2[100][8];
double mat3[100][12];
double mat4[100][3];
-double slp_reduc_plus (int n)
+double
+slp_reduc_plus (int n)
{
double tmp = 0.0;
for (int i = 0; i < n; i++)
@@ -19,7 +20,8 @@ double slp_reduc_plus (int n)
return tmp;
}
-double slp_reduc_plus2 (int n)
+double
+slp_reduc_plus2 (int n)
{
double tmp = 0.0;
for (int i = 0; i < n; i++)
@@ -36,7 +38,8 @@ double slp_reduc_plus2 (int n)
return tmp;
}
-double slp_reduc_plus3 (int n)
+double
+slp_reduc_plus3 (int n)
{
double tmp = 0.0;
for (int i = 0; i < n; i++)
@@ -57,7 +60,8 @@ double slp_reduc_plus3 (int n)
return tmp;
}
-void slp_non_chained_reduc (int n, double * __restrict__ out)
+void
+slp_non_chained_reduc (int n, double * restrict out)
{
for (int i = 0; i < 3; i++)
out[i] = 0;
@@ -73,7 +77,8 @@ void slp_non_chained_reduc (int n, double * __restrict__ out)
/* Strict FP reductions shouldn't be used for the outer loops, only the
inner loops. */
-float double_reduc1 (float (*__restrict__ i)[16])
+float
+double_reduc1 (float (*restrict i)[16])
{
float l = 0;
@@ -83,7 +88,8 @@ float double_reduc1 (float (*__restrict__ i)[16])
return l;
}
-float double_reduc2 (float *__restrict__ i)
+float
+double_reduc2 (float *restrict i)
{
float l = 0;
@@ -98,7 +104,8 @@ float double_reduc2 (float *__restrict__ i)
return l;
}
-float double_reduc3 (float *__restrict__ i, float *__restrict__ j)
+float
+double_reduc3 (float *restrict i, float *restrict j)
{
float k = 0, l = 0;
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_rev_1.c b/gcc/testsuite/gcc.target/aarch64/sve_rev_1.c
new file mode 100644
index 00000000000..7c4290a2dc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_rev_1.c
@@ -0,0 +1,49 @@
+/* { dg-do assemble } */
+/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+
+#define MASK_2(X, Y) (Y) - 1 - (X), (Y) - 2 - (X)
+#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
+#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
+#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
+#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
+
+#define INDEX_32 vnx16qi
+#define INDEX_16 vnx8hi
+#define INDEX_8 vnx4si
+#define INDEX_4 vnx2di
+
+#define PERMUTE(TYPE, NUNITS) \
+ TYPE permute_##TYPE (TYPE values1, TYPE values2) \
+ { \
+ return __builtin_shuffle \
+ (values1, values2, \
+ ((INDEX_##NUNITS) { MASK_##NUNITS (0, NUNITS) })); \
+ }
+
+#define TEST_ALL(T) \
+ T (vnx16qi, 32) \
+ T (vnx8hi, 16) \
+ T (vnx4si, 8) \
+ T (vnx2di, 4) \
+ T (vnx8hf, 16) \
+ T (vnx4sf, 8) \
+ T (vnx2df, 4)
+
+TEST_ALL (PERMUTE)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c
index 9307200fb05..709fd3b37b4 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c
@@ -3,7 +3,7 @@
#include <stdint.h>
-typedef int8_t v32qi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
#define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
@@ -11,7 +11,7 @@ typedef int8_t v32qi __attribute__((vector_size (32)));
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
-#define INDEX_32 v32qi
+#define INDEX_32 vnx16qi
#define PERMUTE(TYPE, NUNITS, REV_NUNITS) \
TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2) \
@@ -22,9 +22,9 @@ typedef int8_t v32qi __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (v32qi, 32, 2) \
- T (v32qi, 32, 4) \
- T (v32qi, 32, 8)
+ T (vnx16qi, 32, 2) \
+ T (vnx16qi, 32, 4) \
+ T (vnx16qi, 32, 8)
TEST_ALL (PERMUTE)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c
index fb238373c4e..fe3533cf6db 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c
@@ -3,8 +3,8 @@
#include <stdint.h>
-typedef uint16_t v16hi __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef uint16_t vnx8hi __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
@@ -12,7 +12,7 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
-#define INDEX_16 v16hi
+#define INDEX_16 vnx8hi
#define PERMUTE(TYPE, NUNITS, REV_NUNITS) \
TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2) \
@@ -23,10 +23,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (v16hi, 16, 2) \
- T (v16hi, 16, 4) \
- T (v16hf, 16, 2) \
- T (v16hf, 16, 4)
+ T (vnx8hi, 16, 2) \
+ T (vnx8hi, 16, 4) \
+ T (vnx8hf, 16, 2) \
+ T (vnx8hf, 16, 4)
TEST_ALL (PERMUTE)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c
index 4834e2c2b01..a6b95f52880 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c
@@ -3,14 +3,14 @@
#include <stdint.h>
-typedef uint32_t v8si __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
+typedef uint32_t vnx4si __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
#define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
-#define INDEX_8 v8si
+#define INDEX_8 vnx4si
#define PERMUTE(TYPE, NUNITS, REV_NUNITS) \
TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2) \
@@ -21,8 +21,8 @@ typedef float v8sf __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (v8si, 8, 2) \
- T (v8sf, 8, 2)
+ T (vnx4si, 8, 2) \
+ T (vnx4sf, 8, 2)
TEST_ALL (PERMUTE)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c
index 2270be2bd29..43a7e831cae 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c
@@ -1,109 +1,31 @@
/* { dg-do assemble } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define SCATTER_STORE1(OBJTYPE,STRIDETYPE,STRIDE)\
-void scatter_store1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- dst[i * STRIDE] = src[i];\
-}
-
-#define SCATTER_STORE2(OBJTYPE,STRIDETYPE)\
-void scatter_store2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- dst[i * stride] = src[i];\
-}
-
-#define SCATTER_STORE3(OBJTYPE,STRIDETYPE)\
-void scatter_store3s5##OBJTYPE##STRIDETYPE\
- (OBJTYPE * restrict dst, OBJTYPE * restrict s1, OBJTYPE * restrict s2,\
- OBJTYPE * restrict s3, OBJTYPE * restrict s4, OBJTYPE * restrict s5,\
- STRIDETYPE count)\
-{\
- const STRIDETYPE STRIDE = 5;\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- dst[0 + (i * STRIDE)] = s1[i];\
- dst[4 + (i * STRIDE)] = s5[i];\
- dst[1 + (i * STRIDE)] = s2[i];\
- dst[2 + (i * STRIDE)] = s3[i];\
- dst[3 + (i * STRIDE)] = s4[i];\
- }\
-}
-
-#define SCATTER_STORE4(OBJTYPE,STRIDETYPE,STRIDE)\
-void scatter_store4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- *dst = *src;\
- dst += STRIDE;\
- src += 1;\
- }\
-}
-
-#define SCATTER_STORE5(OBJTYPE,STRIDETYPE)\
-void scatter_store5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- *dst = *src;\
- dst += stride;\
- src += 1;\
- }\
-}
-
-SCATTER_STORE1 (double, long, 5)
-SCATTER_STORE1 (double, long, 8)
-SCATTER_STORE1 (double, long, 21)
-SCATTER_STORE1 (double, long, 1009)
-
-SCATTER_STORE1 (float, int, 5)
-SCATTER_STORE1 (float, int, 8)
-SCATTER_STORE1 (float, int, 21)
-SCATTER_STORE1 (float, int, 1009)
-
-SCATTER_STORE2 (double, long)
-SCATTER_STORE2 (float, int)
-
-SCATTER_STORE3 (double, long)
-SCATTER_STORE3 (float, int)
-
-SCATTER_STORE4 (double, long, 5)
-/* NOTE: We can't vectorize SCATTER_STORE4 (float, int, 5) because we can't
- prove that the offsets used for the gather load won't overflow. */
-
-SCATTER_STORE5 (double, long)
-SCATTER_STORE5 (float, int)
-
-/* Widened forms. */
-SCATTER_STORE1 (double, int, 5)
-SCATTER_STORE1 (double, int, 8)
-SCATTER_STORE1 (double, short, 5)
-SCATTER_STORE1 (double, short, 8)
-
-SCATTER_STORE1 (float, short, 5)
-SCATTER_STORE1 (float, short, 8)
-
-SCATTER_STORE2 (double, int)
-SCATTER_STORE2 (float, short)
-
-SCATTER_STORE4 (double, int, 5)
-SCATTER_STORE4 (float, short, 5)
-
-SCATTER_STORE5 (double, int)
-
-/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 19 } } */
-/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 12 } } */
-/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ INDEX##BITS *indices, int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ dest[indices[i]] = src[i] + 1; \
+ }
+
+#define TEST_ALL(T) \
+ T (int32_t, 32) \
+ T (uint32_t, 32) \
+ T (float, 32) \
+ T (int64_t, 64) \
+ T (uint64_t, 64) \
+ T (double, 64)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1_run.c
deleted file mode 100644
index 4d8cddc510f..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1_run.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include <unistd.h>
-
-extern void abort (void);
-extern void *memset(void *, int, size_t);
-
-#include "sve_scatter_store_1.c"
-
-#define NUM_SRC_ELEMS 13
-#define NUM_DST_ELEMS(STRIDE) (NUM_SRC_ELEMS * STRIDE)
-
-#define TEST_SCATTER_STORE_COMMON1(FUN,OBJTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\
- memset (real_dst, 0, (1 + NUM_DST_ELEMS (STRIDE)) * sizeof (OBJTYPE));\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- src[i] = i;\
- FUN##OBJTYPE##STRIDETYPE##STRIDE (dst, src, NUM_SRC_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- if (dst[i * STRIDE] != i)\
- abort ();\
-}
-
-#define TEST_SCATTER_STORE_COMMON2(FUN,OBJTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\
- memset (real_dst, 0, (1 + NUM_DST_ELEMS (STRIDE)) * sizeof (OBJTYPE));\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- src[i] = i;\
- FUN##OBJTYPE##STRIDETYPE (dst, src, STRIDE, NUM_SRC_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- if (dst[i * STRIDE] != i)\
- abort ();\
-}
-
-#define TEST_SCATTER_STORE1(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_SCATTER_STORE_COMMON1 (scatter_store1, OBJTYPE, STRIDETYPE, STRIDE)
-
-#define TEST_SCATTER_STORE2(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_SCATTER_STORE_COMMON2 (scatter_store2, OBJTYPE, STRIDETYPE, STRIDE)
-
-#define TEST_SCATTER_STORE3(OBJTYPE,STRIDETYPE)\
-{\
- OBJTYPE real_src1[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src2[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src3[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src4[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src5[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS (5)]\
- __attribute__((aligned (32)));\
- memset (real_src1, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\
- memset (real_src2, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\
- memset (real_src3, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\
- memset (real_src4, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\
- memset (real_src5, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\
- memset (real_dst, 0, (1 + NUM_DST_ELEMS (5)) * sizeof (OBJTYPE));\
- OBJTYPE *src1 = &real_src1[1];\
- OBJTYPE *src2 = &real_src2[1];\
- OBJTYPE *src3 = &real_src3[1];\
- OBJTYPE *src4 = &real_src4[1];\
- OBJTYPE *src5 = &real_src5[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- {\
- STRIDETYPE base = i * 5;\
- src1[i] = base;\
- src2[i] = base + 1;\
- src3[i] = base + 2;\
- src4[i] = base + 3;\
- src5[i] = base + 4;\
- }\
- scatter_store3s5##OBJTYPE##STRIDETYPE \
- (dst, src1, src2, src3, src4, src5, NUM_SRC_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (5); i++)\
- if (dst[i] != i)\
- abort ();\
-}
-
-#define TEST_SCATTER_STORE4(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_SCATTER_STORE_COMMON1 (scatter_store4, OBJTYPE, STRIDETYPE, STRIDE)
-
-#define TEST_SCATTER_STORE5(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_SCATTER_STORE_COMMON2 (scatter_store5, OBJTYPE, STRIDETYPE, STRIDE)
-
-int __attribute__ ((optimize (1)))
-main ()
-{
- TEST_SCATTER_STORE1 (double, long, 5);
- TEST_SCATTER_STORE1 (double, long, 8);
- TEST_SCATTER_STORE1 (double, long, 21);
-
- TEST_SCATTER_STORE1 (float, int, 5);
- TEST_SCATTER_STORE1 (float, int, 8);
- TEST_SCATTER_STORE1 (float, int, 21);
-
- TEST_SCATTER_STORE2 (double, long, 5);
- TEST_SCATTER_STORE2 (double, long, 8);
- TEST_SCATTER_STORE2 (double, long, 21);
-
- TEST_SCATTER_STORE2 (float, int, 5);
- TEST_SCATTER_STORE2 (float, int, 8);
- TEST_SCATTER_STORE2 (float, int, 21);
-
- TEST_SCATTER_STORE3 (double, long);
- TEST_SCATTER_STORE3 (float, int);
-
- TEST_SCATTER_STORE4 (double, long, 5);
-
- TEST_SCATTER_STORE5 (double, long, 5);
- TEST_SCATTER_STORE5 (float, int, 5);
-
- /* Widened forms. */
- TEST_SCATTER_STORE1 (double, int, 5)
- TEST_SCATTER_STORE1 (double, int, 8)
- TEST_SCATTER_STORE1 (double, short, 5)
- TEST_SCATTER_STORE1 (double, short, 8)
-
- TEST_SCATTER_STORE1 (float, short, 5)
- TEST_SCATTER_STORE1 (float, short, 8)
-
- TEST_SCATTER_STORE2 (double, int, 5);
- TEST_SCATTER_STORE2 (double, int, 8);
- TEST_SCATTER_STORE2 (double, int, 21);
-
- TEST_SCATTER_STORE2 (float, short, 5);
- TEST_SCATTER_STORE2 (float, short, 8);
- TEST_SCATTER_STORE2 (float, short, 21);
-
- TEST_SCATTER_STORE4 (double, int, 5);
- TEST_SCATTER_STORE4 (float, short, 5);
-
- TEST_SCATTER_STORE5 (double, int, 5);
-
- return 0;
-}
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_2.c
new file mode 100644
index 00000000000..dcc96f07fc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_2.c
@@ -0,0 +1,10 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "sve_scatter_store_1.c"
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_3.c
new file mode 100644
index 00000000000..d09c4015aa0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_3.c
@@ -0,0 +1,32 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+/* Invoked 18 times for each data size. */
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ INDEX##BITS *indices, int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ *(DATA_TYPE *) ((char *) dest + indices[i]) = src[i] + 1; \
+ }
+
+#define TEST_ALL(T) \
+ T (int32_t, 32) \
+ T (uint32_t, 32) \
+ T (float, 32) \
+ T (int64_t, 64) \
+ T (uint64_t, 64) \
+ T (double, 64)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_4.c
new file mode 100644
index 00000000000..c4f2dae481b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_4.c
@@ -0,0 +1,10 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "sve_scatter_store_3.c"
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_5.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_5.c
new file mode 100644
index 00000000000..7b117bc0b2b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_5.c
@@ -0,0 +1,23 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+/* Invoked 18 times for each data size. */
+#define TEST_LOOP(DATA_TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict *dest, DATA_TYPE *restrict src, \
+ int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ *dest[i] = src[i] + 1; \
+ }
+
+#define TEST_ALL(T) \
+ T (int64_t) \
+ T (uint64_t) \
+ T (double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_6.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_6.c
new file mode 100644
index 00000000000..14e68267c9f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_6.c
@@ -0,0 +1,36 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -fwrapv -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#endif
+
+/* Invoked 18 times for each data size. */
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ INDEX##BITS *indices, INDEX##BITS mask, int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ dest[(INDEX##BITS) (indices[i] | mask)] = src[i] + 1; \
+ }
+
+#define TEST_ALL(T) \
+ T (int32_t, 16) \
+ T (uint32_t, 16) \
+ T (float, 16) \
+ T (int64_t, 32) \
+ T (uint64_t, 32) \
+ T (double, 32)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_7.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_7.c
new file mode 100644
index 00000000000..89e2d305c29
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_7.c
@@ -0,0 +1,15 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
+
+#include "sve_scatter_store_6.c"
+
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* Either extension type is OK here. */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, [us]xtw 2\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c
index 460359e4be3..23327a7a152 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n) \
{ \
for (int i = 0; i < n; ++i) \
@@ -23,15 +23,18 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
TEST_ALL (VEC_PERM)
-/* We should use one DUP for each of the 8-, 16- and 32-bit types.
- We should use two DUPs for each of the three 64-bit types. */
+/* We should use one DUP for each of the 8-, 16- and 32-bit types,
+ although we currently use LD1RW for _Float16. We should use two
+ DUPs for each of the three 64-bit types. */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, [hw]} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 1 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, [dx]} 9 } } */
/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
/* { dg-final { scan-assembler-not {\tzip2\t} } } */
@@ -39,17 +42,18 @@ TEST_ALL (VEC_PERM)
/* The loop should be fully-masked. */
/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 3 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
/* { dg-final { scan-assembler-not {\tldr} } } */
-/* { dg-final { scan-assembler-not {\tstr} } } */
+/* { dg-final { scan-assembler-times {\tstr} 2 } } */
+/* { dg-final { scan-assembler-times {\tstr\th[0-9]+} 2 } } */
/* { dg-final { scan-assembler-not {\tuqdec} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c
index 7dd3640966a..0c10d934259 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
{ \
for (int i = 0; i < n; ++i) \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c
index c1aeaf9b06e..08cad65ab63 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c
@@ -47,7 +47,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c
index 3db5769deed..ce6060a52df 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE1, TYPE2) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE1##_##TYPE2 (TYPE1 *restrict a, \
TYPE2 *restrict b, int n) \
{ \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c
index c302ef6fb76..aa49952b470 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c
@@ -38,7 +38,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c
index 9afe7e59ef2..77bf7b72454 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c
@@ -6,7 +6,7 @@
#define N1 (19 * 2)
#define VEC_PERM(TYPE) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b) \
{ \
for (int i = 0; i < N1; ++i) \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c
index 8c854d4207c..e926de602bd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c
@@ -46,7 +46,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c
index f3ecbd7adbc..ff3046e127d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c
@@ -5,7 +5,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, int n) \
{ \
TYPE res = 0; \
@@ -26,6 +26,7 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
@@ -35,7 +36,7 @@ TEST_ALL (VEC_PERM)
/* ??? We don't treat the uint loops as SLP. */
/* The loop should be fully-masked. */
/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 3 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 2 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 3 { xfail *-*-* } } } */
@@ -43,7 +44,7 @@ TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-not {\tldr} { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
@@ -51,6 +52,7 @@ TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d\n} 1 } } */
/* { dg-final { scan-assembler-not {\tfadd\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c
index 282f1ae2310..2824073cf14 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "sve_slp_13.c"
@@ -21,7 +21,7 @@
__builtin_abort (); \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c
index 6c1b38277ec..3971acde999 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c
@@ -9,7 +9,10 @@
{ \
TYPE a[N], b[2] = { 3, 11 }; \
for (unsigned int i = 0; i < N; ++i) \
- a[i] = i * 2 + i % 5; \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
vec_slp_##TYPE (a, b[0], b[1], N / 2); \
for (unsigned int i = 0; i < N; ++i) \
{ \
@@ -20,7 +23,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c
index 3e71596021f..ba3506ab4e4 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, int n) \
{ \
for (int i = 0; i < n; ++i) \
@@ -23,13 +23,14 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, } 2 } } */
-/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 } } */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 3 } } */
/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 5 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #10\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */
@@ -39,14 +40,14 @@ TEST_ALL (VEC_PERM)
/* The loop should be fully-masked. */
/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 3 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
/* { dg-final { scan-assembler-not {\tldr} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c
index 7d4d5e8ca3d..c0411459b94 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c
@@ -9,7 +9,10 @@
{ \
TYPE a[N], b[2] = { 10, 17 }; \
for (unsigned int i = 0; i < N; ++i) \
- a[i] = i * 2 + i % 5; \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
vec_slp_##TYPE (a, N / 2); \
for (unsigned int i = 0; i < N; ++i) \
{ \
@@ -20,7 +23,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c
index 3ac0eebf422..326630f421f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, int n) \
{ \
for (int i = 0; i < n; ++i) \
@@ -25,6 +25,7 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
@@ -33,7 +34,7 @@ TEST_ALL (VEC_PERM)
/* 1 for each 8-bit type. */
/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 } } */
/* 1 for each 16-bit type, 2 for each 32-bit type, and 4 for double. */
-/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 12 } } */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 13 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #41\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #25\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #31\n} 2 } } */
@@ -49,14 +50,14 @@ TEST_ALL (VEC_PERM)
and stores each. */
/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 6 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 12 } } */
/* { dg-final { scan-assembler-not {\tldr} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c
index 7306355b873..de33f41c2c1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c
@@ -9,7 +9,10 @@
{ \
TYPE a[N], b[4] = { 41, 25, 31, 62 }; \
for (unsigned int i = 0; i < N; ++i) \
- a[i] = i * 2 + i % 5; \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
vec_slp_##TYPE (a, N / 4); \
for (unsigned int i = 0; i < N; ++i) \
{ \
@@ -20,7 +23,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c
index b0890fd934b..32c14ebe4bf 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, int n) \
{ \
for (int i = 0; i < n; ++i) \
@@ -29,6 +29,7 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
@@ -36,7 +37,7 @@ TEST_ALL (VEC_PERM)
/* 1 for each 8-bit type, 2 for each 16-bit type, 4 for each 32-bit type
and 8 for double. */
-/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 26 } } */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 28 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #99\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #11\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */
@@ -55,21 +56,21 @@ TEST_ALL (VEC_PERM)
ZIP1 ZIP1 ZIP1 ZIP1 (4 ZIP2s optimized away)
ZIP1 ZIP2 ZIP1 ZIP2
ZIP1 ZIP2 ZIP1 ZIP2. */
-/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 35 } } */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 36 } } */
/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 15 } } */
/* The loop should be fully-masked. The 32-bit types need two loads
and stores each and the 64-bit types need four. */
/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 6 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 6 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 12 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 12 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 12 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 24 } } */
/* { dg-final { scan-assembler-not {\tldr} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c
index 2eb2a5ff07e..e0fe656859d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c
@@ -9,7 +9,10 @@
{ \
TYPE a[N], b[8] = { 99, 11, 17, 80, 63, 37, 24, 81 }; \
for (unsigned int i = 0; i < N; ++i) \
- a[i] = i * 2 + i % 5; \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
vec_slp_##TYPE (a, N / 8); \
for (unsigned int i = 0; i < N; ++i) \
{ \
@@ -20,7 +23,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c
index 0f8cf624e20..e0bacb0cad8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
{ \
TYPE x0 = b[0]; \
@@ -27,6 +27,7 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
@@ -37,9 +38,9 @@ TEST_ALL (VEC_PERM)
/* ??? At present we don't treat the int8_t and int16_t loops as
reductions. */
/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 3 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-not {\tld2b\t} } } */
@@ -52,12 +53,14 @@ TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
-/* Should be 4, if we used reductions for int8_t and int16_t. */
+/* Should be 4 and 6 respectively, if we used reductions for int8_t and
+ int16_t. */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c
index 476b40cb0e9..bb5421700da 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c
@@ -5,25 +5,30 @@
#define N (141 * 2)
-#define HARNESS(TYPE) \
- { \
- TYPE a[N], b[2] = { 40, 22 }; \
- for (unsigned int i = 0; i < N; ++i) \
- a[i] = i * 2 + i % 5; \
- vec_slp_##TYPE (a, b, N / 2); \
- TYPE x0 = 40; \
- TYPE x1 = 22; \
- for (unsigned int i = 0; i < N; i += 2) \
- { \
- x0 += a[i]; \
- x1 += a[i + 1]; \
- asm volatile (""); \
- } \
- if (x0 != b[0] || x1 != b[1]) \
- __builtin_abort (); \
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N], b[2] = { 40, 22 }; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vec_slp_##TYPE (a, b, N / 2); \
+ TYPE x0 = 40; \
+ TYPE x1 = 22; \
+ for (unsigned int i = 0; i < N; i += 2) \
+ { \
+ x0 += a[i]; \
+ x1 += a[i + 1]; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ /* _Float16 isn't precise enough for this. */ \
+ if ((TYPE) 0x1000 + 1 != (TYPE) 0x1000 \
+ && (x0 != b[0] || x1 != b[1])) \
+ __builtin_abort (); \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c
index 8cdceb57dc6..b3bdb04e2ab 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
{ \
TYPE x0 = b[0]; \
@@ -30,6 +30,7 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c
index a9ca327c907..e2ad116f91d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c
@@ -5,27 +5,32 @@
#define N (77 * 3)
-#define HARNESS(TYPE) \
- { \
- TYPE a[N], b[3] = { 40, 22, 75 }; \
- for (unsigned int i = 0; i < N; ++i) \
- a[i] = i * 2 + i % 5; \
- vec_slp_##TYPE (a, b, N / 3); \
- TYPE x0 = 40; \
- TYPE x1 = 22; \
- TYPE x2 = 75; \
- for (unsigned int i = 0; i < N; i += 3) \
- { \
- x0 += a[i]; \
- x1 += a[i + 1]; \
- x2 += a[i + 2]; \
- asm volatile (""); \
- } \
- if (x0 != b[0] || x1 != b[1] || x2 != b[2]) \
- __builtin_abort (); \
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N], b[3] = { 40, 22, 75 }; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vec_slp_##TYPE (a, b, N / 3); \
+ TYPE x0 = 40; \
+ TYPE x1 = 22; \
+ TYPE x2 = 75; \
+ for (unsigned int i = 0; i < N; i += 3) \
+ { \
+ x0 += a[i]; \
+ x1 += a[i + 1]; \
+ x2 += a[i + 2]; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ /* _Float16 isn't precise enough for this. */ \
+ if ((TYPE) 0x1000 + 1 != (TYPE) 0x1000 \
+ && (x0 != b[0] || x1 != b[1] || x2 != b[2])) \
+ __builtin_abort (); \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c
index 4dc9fafcdde..372c7575cdb 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
{ \
TYPE x0 = b[0]; \
@@ -33,6 +33,7 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
@@ -45,9 +46,9 @@ TEST_ALL (VEC_PERM)
/* ??? At present we don't treat the int8_t and int16_t loops as
reductions. */
/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld4d\t} 3 } } */
/* { dg-final { scan-assembler-not {\tld4b\t} } } */
@@ -60,12 +61,14 @@ TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s} 8 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 8 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */
/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */
-/* Should be 4, if we used reductions for int8_t and int16_t. */
+/* Should be 4 and 6 respectively, if we used reductions for int8_t and
+ int16_t. */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c
index 12446972fde..5a8bf99bc5b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c
@@ -5,29 +5,34 @@
#define N (54 * 4)
-#define HARNESS(TYPE) \
- { \
- TYPE a[N], b[4] = { 40, 22, 75, 19 }; \
- for (unsigned int i = 0; i < N; ++i) \
- a[i] = i * 2 + i % 5; \
- vec_slp_##TYPE (a, b, N / 4); \
- TYPE x0 = 40; \
- TYPE x1 = 22; \
- TYPE x2 = 75; \
- TYPE x3 = 19; \
- for (unsigned int i = 0; i < N; i += 4) \
- { \
- x0 += a[i]; \
- x1 += a[i + 1]; \
- x2 += a[i + 2]; \
- x3 += a[i + 3]; \
- asm volatile (""); \
- } \
- if (x0 != b[0] || x1 != b[1] || x2 != b[2] || x3 != b[3]) \
- __builtin_abort (); \
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N], b[4] = { 40, 22, 75, 19 }; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vec_slp_##TYPE (a, b, N / 4); \
+ TYPE x0 = 40; \
+ TYPE x1 = 22; \
+ TYPE x2 = 75; \
+ TYPE x3 = 19; \
+ for (unsigned int i = 0; i < N; i += 4) \
+ { \
+ x0 += a[i]; \
+ x1 += a[i + 1]; \
+ x2 += a[i + 2]; \
+ x3 += a[i + 3]; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ /* _Float16 isn't precise enough for this. */ \
+ if ((TYPE) 0x1000 + 1 != (TYPE) 0x1000 \
+ && (x0 != b[0] || x1 != b[1] || x2 != b[2] || x3 != b[3])) \
+ __builtin_abort (); \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c
index caae4528d82..d57457fbef0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
{ \
for (int i = 0; i < n; ++i) \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c
index 2717ca62de1..09a6d648c52 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c
@@ -37,7 +37,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c
index af06270b6f2..65e1cb8f044 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE1, TYPE2) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE1##_##TYPE2 (TYPE1 *restrict a, \
TYPE2 *restrict b, int n) \
{ \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c
index 0bde3b6ea03..3e69a48580b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c
@@ -32,7 +32,7 @@
} \
}
-int
+int __attribute__ ((noinline, noclone))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c
index 25f3047444e..db35711a193 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c
@@ -21,6 +21,6 @@ FPTYPE spec_fp_loop_##ARGTYPE##INDUCTYPE (ARGTYPE mask, ARGTYPE limit)\
SPEC_FP_LOOP (uint32_t, uint32_t, double)
-/* { dg-final { scan-tree-dump-times "Not vectorized: Multiple ncopies not supported" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "not vectorized: ncopies is greater than 1" 1 "vect" } } */
/* { dg-final { scan-assembler-not "brka\tp\[0-9\]*.b, p\[0-9\]*\/z, p\[0-9\]*.b" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c
index 4765b22f014..1b71687a257 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c
@@ -41,4 +41,4 @@ SPEC_LOOP (uint64_t, uint16_t)
SPEC_LOOP (uint64_t, uint32_t)
/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "Speculative loop mask load/stores not supported" "vect" } } */
+/* { dg-final { scan-tree-dump "speculative mask loads not supported" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c
index 3e7367cd9fa..1a48f7b6080 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c
@@ -3,50 +3,50 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size(32)));
-typedef int32_t v8si __attribute__((vector_size(32)));
-typedef int16_t v16hi __attribute__((vector_size(32)));
-typedef int8_t v32qi __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
void sve_store_64_z_lsl (uint64_t *a, unsigned long i)
{
- asm volatile ("" : "=w" (*(v4di *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx2di *) &a[i]));
}
void sve_store_64_s_lsl (int64_t *a, signed long i)
{
- asm volatile ("" : "=w" (*(v4di *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx2di *) &a[i]));
}
void sve_store_32_z_lsl (uint32_t *a, unsigned long i)
{
- asm volatile ("" : "=w" (*(v8si *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx4si *) &a[i]));
}
void sve_store_32_s_lsl (int32_t *a, signed long i)
{
- asm volatile ("" : "=w" (*(v8si *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx4si *) &a[i]));
}
void sve_store_16_z_lsl (uint16_t *a, unsigned long i)
{
- asm volatile ("" : "=w" (*(v16hi *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx8hi *) &a[i]));
}
void sve_store_16_s_lsl (int16_t *a, signed long i)
{
- asm volatile ("" : "=w" (*(v16hi *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx8hi *) &a[i]));
}
/* ??? The other argument order leads to a redundant move. */
void sve_store_8_z (unsigned long i, uint8_t *a)
{
- asm volatile ("" : "=w" (*(v32qi *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx16qi *) &a[i]));
}
void sve_store_8_s (signed long i, int8_t *a)
{
- asm volatile ("" : "=w" (*(v32qi *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx16qi *) &a[i]));
}
/* { dg-final { scan-assembler-times {\tst1d\tz0\.d, p[0-7], \[x0, x1, lsl 3\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_1.c
new file mode 100644
index 00000000000..b940ba9d4de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_1.c
@@ -0,0 +1,40 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX8
+#define INDEX8 int8_t
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, \
+ INDEX##BITS stride, INDEX##BITS n) \
+ { \
+ for (INDEX##BITS i = 0; i < n; ++i) \
+ dest[i] += src[i * stride]; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 8) \
+ T (DATA_TYPE, 16) \
+ T (DATA_TYPE, 32) \
+ T (DATA_TYPE, 64)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 12 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c
new file mode 100644
index 00000000000..a834989091d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define INDEX8 uint8_t
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "sve_strided_load_1.c"
+
+/* 8 and 16 bits are signed because the multiplication promotes to int.
+ Using uxtw for all 9 would be OK. */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+/* The 32-bit loop needs to honor the defined overflow in uint32_t,
+ so we vectorize the offset calculation. This means that the
+ 64-bit version needs two copies. */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_3.c
new file mode 100644
index 00000000000..8f0bfdd4bb8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_3.c
@@ -0,0 +1,32 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, OTHER_TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, \
+ OTHER_TYPE *restrict other, \
+ OTHER_TYPE mask, \
+ int stride, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[i] = src[i * stride] + (OTHER_TYPE) (other[i] | mask); \
+ }
+
+#define TEST_ALL(T) \
+ T (int32_t, int16_t) \
+ T (uint32_t, int16_t) \
+ T (float, int16_t) \
+ T (int64_t, int32_t) \
+ T (uint64_t, int32_t) \
+ T (double, int32_t)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 1\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_4.c
new file mode 100644
index 00000000000..b7dc12fb3c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_4.c
@@ -0,0 +1,33 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[i] += src[i * SCALE]; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 5, 5) \
+ T (DATA_TYPE, 7, 7) \
+ T (DATA_TYPE, 11, 11) \
+ T (DATA_TYPE, 200, 200) \
+ T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 15 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_5.c
new file mode 100644
index 00000000000..6cbcc963595
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_5.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, long n) \
+ { \
+ for (long i = 0; i < n; ++i) \
+ dest[i] += src[i * SCALE]; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 5, 5) \
+ T (DATA_TYPE, 7, 7) \
+ T (DATA_TYPE, 11, 11) \
+ T (DATA_TYPE, 200, 200) \
+ T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_6.c
new file mode 100644
index 00000000000..aaf743b3d82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_6.c
@@ -0,0 +1,7 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable --save-temps" } */
+
+#include "sve_strided_load_5.c"
+
+/* { dg-final { scan-assembler-not {\[x[0-9]+, z[0-9]+\.s} } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_7.c
new file mode 100644
index 00000000000..ddf6667e8c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_7.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src) \
+ { \
+ for (long i = 0; i < 1000; ++i) \
+ dest[i] += src[i * SCALE]; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 5, 5) \
+ T (DATA_TYPE, 7, 7) \
+ T (DATA_TYPE, 11, 11) \
+ T (DATA_TYPE, 200, 200) \
+ T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_8.c
new file mode 100644
index 00000000000..788aeb08df2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_8.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+void
+foo (double *x, int m)
+{
+ for (int i = 0; i < 256; ++i)
+ x[i * m] += x[i * m];
+}
+
+/* { dg-final { scan-assembler-times {\tcbz\tw1,} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, } 1 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, } 1 } } */
+/* { dg-final { scan-assembler-times {\tldr\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_1.c
new file mode 100644
index 00000000000..4f84b3fdec5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_1.c
@@ -0,0 +1,40 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX8
+#define INDEX8 int8_t
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, \
+ INDEX##BITS stride, INDEX##BITS n) \
+ { \
+ for (INDEX##BITS i = 0; i < n; ++i) \
+ dest[i * stride] = src[i] + 1; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 8) \
+ T (DATA_TYPE, 16) \
+ T (DATA_TYPE, 32) \
+ T (DATA_TYPE, 64)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 12 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_2.c
new file mode 100644
index 00000000000..1a8df604ead
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_2.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define INDEX8 uint8_t
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "sve_strided_store_1.c"
+
+/* 8 and 16 bits are signed because the multiplication promotes to int.
+ Using uxtw for all 9 would be OK. */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+/* The 32-bit loop needs to honor the defined overflow in uint32_t,
+ so we vectorize the offset calculation. This means that the
+ 64-bit version needs two copies. */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_3.c
new file mode 100644
index 00000000000..19454565f97
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_3.c
@@ -0,0 +1,33 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, OTHER_TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, \
+ OTHER_TYPE *restrict other, \
+ OTHER_TYPE mask, \
+ int stride, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[i * stride] = src[i] + (OTHER_TYPE) (other[i] | mask); \
+ }
+
+#define TEST_ALL(T) \
+ T (int32_t, int16_t) \
+ T (uint32_t, int16_t) \
+ T (float, int16_t) \
+ T (int64_t, int32_t) \
+ T (uint64_t, int32_t) \
+ T (double, int32_t)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 1\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_4.c
new file mode 100644
index 00000000000..23f1329c69b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_4.c
@@ -0,0 +1,33 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[i * SCALE] = src[i] + 1; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 5, 5) \
+ T (DATA_TYPE, 7, 7) \
+ T (DATA_TYPE, 11, 11) \
+ T (DATA_TYPE, 200, 200) \
+ T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 15 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_5.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_5.c
new file mode 100644
index 00000000000..68f2a539c27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_5.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, long n) \
+ { \
+ for (long i = 0; i < n; ++i) \
+ dest[i * SCALE] = src[i] + 1; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 5, 5) \
+ T (DATA_TYPE, 7, 7) \
+ T (DATA_TYPE, 11, 11) \
+ T (DATA_TYPE, 200, 200) \
+ T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_6.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_6.c
new file mode 100644
index 00000000000..da124b7348b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_6.c
@@ -0,0 +1,7 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable --save-temps" } */
+
+#include "sve_strided_store_5.c"
+
+/* { dg-final { scan-assembler-not {\[x[0-9]+, z[0-9]+\.s} } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_7.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_7.c
new file mode 100644
index 00000000000..a76ac359f01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_7.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src) \
+ { \
+ for (long i = 0; i < 1000; ++i) \
+ dest[i * SCALE] = src[i] + 1; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 5, 5) \
+ T (DATA_TYPE, 7, 7) \
+ T (DATA_TYPE, 11, 11) \
+ T (DATA_TYPE, 200, 200) \
+ T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c
index bb23f9886c6..e9ac4790c7b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c
@@ -1,32 +1,35 @@
-/* { dg-do compile } */
-/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */
+/* { dg-do assemble } */
+/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mbig-endian --save-temps" } */
-typedef char v32qi __attribute__((vector_size(32)));
-typedef struct { v32qi a[2]; } v64qi;
+typedef char vnx16qi __attribute__((vector_size(32)));
+typedef struct { vnx16qi a[2]; } vnx32qi;
-typedef short v16hi __attribute__((vector_size(32)));
-typedef struct { v16hi a[2]; } v32hi;
+typedef short vnx8hi __attribute__((vector_size(32)));
+typedef struct { vnx8hi a[2]; } vnx16hi;
-typedef int v8si __attribute__((vector_size(32)));
-typedef struct { v8si a[2]; } v16si;
+typedef int vnx4si __attribute__((vector_size(32)));
+typedef struct { vnx4si a[2]; } vnx8si;
-typedef long v4di __attribute__((vector_size(32)));
-typedef struct { v4di a[2]; } v8di;
+typedef long vnx2di __attribute__((vector_size(32)));
+typedef struct { vnx2di a[2]; } vnx4di;
-typedef float v8sf __attribute__((vector_size(32)));
-typedef struct { v8sf a[2]; } v16sf;
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef struct { vnx8hf a[2]; } vnx16hf;
-typedef double v4df __attribute__((vector_size(32)));
-typedef struct { v4df a[2]; } v8df;
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef struct { vnx4sf a[2]; } vnx8sf;
+
+typedef double vnx2df __attribute__((vector_size(32)));
+typedef struct { vnx2df a[2]; } vnx4df;
#define TEST_TYPE(TYPE, REG1, REG2) \
void \
f1_##TYPE (TYPE *a) \
{ \
register TYPE x asm (#REG1) = a[0]; \
- asm volatile ("# test " #TYPE " 1 %0" :: "w" (x)); \
+ asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \
register TYPE y asm (#REG2) = x; \
- asm volatile ("# test " #TYPE " 2 %0, %1, %2" \
+ asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \
: "=&w" (x) : "0" (x), "w" (y)); \
a[1] = x; \
} \
@@ -54,63 +57,73 @@ typedef struct { v4df a[2]; } v8df;
asm volatile ("# %0" :: "w" (x)); \
}
-TEST_TYPE (v64qi, z0, z2)
-TEST_TYPE (v32hi, z5, z7)
-TEST_TYPE (v16si, z10, z12)
-TEST_TYPE (v8di, z15, z17)
-TEST_TYPE (v16sf, z20, z23)
-TEST_TYPE (v8df, z28, z30)
+TEST_TYPE (vnx32qi, z0, z2)
+TEST_TYPE (vnx16hi, z5, z7)
+TEST_TYPE (vnx8si, z10, z12)
+TEST_TYPE (vnx4di, z15, z17)
+TEST_TYPE (vnx16hf, z18, z20)
+TEST_TYPE (vnx8sf, z21, z23)
+TEST_TYPE (vnx4df, z28, z30)
/* { dg-final { scan-assembler {\tld1b\tz0.b, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1b\tz1.b, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v64qi 1 z0\n} } } */
+/* { dg-final { scan-assembler { test vnx32qi 1 z0\n} } } */
/* { dg-final { scan-assembler {\tmov\tz2.d, z0.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz3.d, z1.d\n} } } */
-/* { dg-final { scan-assembler { test v64qi 2 z0, z0, z2\n} } } */
+/* { dg-final { scan-assembler { test vnx32qi 2 z0, z0, z2\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz0.b, p[0-7], \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz1.b, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1h\tz5.h, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1h\tz6.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v32hi 1 z5\n} } } */
+/* { dg-final { scan-assembler { test vnx16hi 1 z5\n} } } */
/* { dg-final { scan-assembler {\tmov\tz7.d, z5.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz8.d, z6.d\n} } } */
-/* { dg-final { scan-assembler { test v32hi 2 z5, z5, z7\n} } } */
+/* { dg-final { scan-assembler { test vnx16hi 2 z5, z5, z7\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz5.h, p[0-7], \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz6.h, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz10.s, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz11.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v16si 1 z10\n} } } */
+/* { dg-final { scan-assembler { test vnx8si 1 z10\n} } } */
/* { dg-final { scan-assembler {\tmov\tz12.d, z10.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz13.d, z11.d\n} } } */
-/* { dg-final { scan-assembler { test v16si 2 z10, z10, z12\n} } } */
+/* { dg-final { scan-assembler { test vnx8si 2 z10, z10, z12\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz10.s, p[0-7], \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz11.s, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz15.d, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz16.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v8di 1 z15\n} } } */
+/* { dg-final { scan-assembler { test vnx4di 1 z15\n} } } */
/* { dg-final { scan-assembler {\tmov\tz17.d, z15.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz18.d, z16.d\n} } } */
-/* { dg-final { scan-assembler { test v8di 2 z15, z15, z17\n} } } */
+/* { dg-final { scan-assembler { test vnx4di 2 z15, z15, z17\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz15.d, p[0-7], \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz16.d, p[0-7], \[x0, #3, mul vl\]\n} } } */
-/* { dg-final { scan-assembler {\tld1w\tz20.s, p[0-7]/z, \[x0\]\n} } } */
-/* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v16sf 1 z20\n} } } */
-/* { dg-final { scan-assembler {\tmov\tz23.d, z20.d\n} } } */
-/* { dg-final { scan-assembler {\tmov\tz24.d, z21.d\n} } } */
-/* { dg-final { scan-assembler { test v16sf 2 z20, z20, z23\n} } } */
-/* { dg-final { scan-assembler {\tst1w\tz20.s, p[0-7], \[x0, #2, mul vl\]\n} } } */
-/* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz18.h, p[0-7]/z, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz19.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx16hf 1 z18\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz20.d, z18.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz21.d, z19.d\n} } } */
+/* { dg-final { scan-assembler { test vnx16hf 2 z18, z18, z20\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz18.h, p[0-7], \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz19.h, p[0-7], \[x0, #3, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tld1w\tz22.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx8sf 1 z21\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz23.d, z21.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz24.d, z22.d\n} } } */
+/* { dg-final { scan-assembler { test vnx8sf 2 z21, z21, z23\n} } } */
+/* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1w\tz22.s, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz28.d, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz29.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v8df 1 z28\n} } } */
+/* { dg-final { scan-assembler { test vnx4df 1 z28\n} } } */
/* { dg-final { scan-assembler {\tmov\tz30.d, z28.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz31.d, z29.d\n} } } */
-/* { dg-final { scan-assembler { test v8df 2 z28, z28, z30\n} } } */
+/* { dg-final { scan-assembler { test vnx4df 2 z28, z28, z30\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz28.d, p[0-7], \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz29.d, p[0-7], \[x0, #3, mul vl\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c
index d36aa75483a..faf503c35e1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c
@@ -1,51 +1,55 @@
-/* { dg-do compile } */
-/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */
+/* { dg-do assemble } */
+/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mbig-endian --save-temps" } */
-typedef char v32qi __attribute__((vector_size(32)));
-typedef struct { v32qi a[3]; } v96qi;
+typedef char vnx16qi __attribute__((vector_size(32)));
+typedef struct { vnx16qi a[3]; } vnx48qi;
-typedef short v16hi __attribute__((vector_size(32)));
-typedef struct { v16hi a[3]; } v48hi;
+typedef short vnx8hi __attribute__((vector_size(32)));
+typedef struct { vnx8hi a[3]; } vnx24hi;
-typedef int v8si __attribute__((vector_size(32)));
-typedef struct { v8si a[3]; } v24si;
+typedef int vnx4si __attribute__((vector_size(32)));
+typedef struct { vnx4si a[3]; } vnx12si;
-typedef long v4di __attribute__((vector_size(32)));
-typedef struct { v4di a[3]; } v12di;
+typedef long vnx2di __attribute__((vector_size(32)));
+typedef struct { vnx2di a[3]; } vnx6di;
-typedef float v8sf __attribute__((vector_size(32)));
-typedef struct { v8sf a[3]; } v24sf;
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef struct { vnx8hf a[3]; } vnx24hf;
-typedef double v4df __attribute__((vector_size(32)));
-typedef struct { v4df a[3]; } v12df;
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef struct { vnx4sf a[3]; } vnx12sf;
+
+typedef double vnx2df __attribute__((vector_size(32)));
+typedef struct { vnx2df a[3]; } vnx6df;
#define TEST_TYPE(TYPE, REG1, REG2) \
void \
f_##TYPE (TYPE *a) \
{ \
register TYPE x asm (#REG1) = a[0]; \
- asm volatile ("# test " #TYPE " 1 %0" :: "w" (x)); \
+ asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \
register TYPE y asm (#REG2) = x; \
- asm volatile ("# test " #TYPE " 2 %0, %1, %2" \
+ asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \
: "=&w" (x) : "0" (x), "w" (y)); \
a[1] = x; \
}
-TEST_TYPE (v96qi, z0, z3)
-TEST_TYPE (v48hi, z6, z2)
-TEST_TYPE (v24si, z12, z15)
-TEST_TYPE (v12di, z16, z13)
-TEST_TYPE (v24sf, z20, z23)
-TEST_TYPE (v12df, z26, z29)
+TEST_TYPE (vnx48qi, z0, z3)
+TEST_TYPE (vnx24hi, z6, z2)
+TEST_TYPE (vnx12si, z12, z15)
+TEST_TYPE (vnx6di, z16, z13)
+TEST_TYPE (vnx24hf, z18, z1)
+TEST_TYPE (vnx12sf, z20, z23)
+TEST_TYPE (vnx6df, z26, z29)
/* { dg-final { scan-assembler {\tld1b\tz0.b, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1b\tz1.b, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1b\tz2.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v96qi 1 z0\n} } } */
+/* { dg-final { scan-assembler { test vnx48qi 1 z0\n} } } */
/* { dg-final { scan-assembler {\tmov\tz3.d, z0.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz4.d, z1.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz5.d, z2.d\n} } } */
-/* { dg-final { scan-assembler { test v96qi 2 z0, z0, z3\n} } } */
+/* { dg-final { scan-assembler { test vnx48qi 2 z0, z0, z3\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz0.b, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz1.b, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz2.b, p[0-7], \[x0, #5, mul vl\]\n} } } */
@@ -53,11 +57,11 @@ TEST_TYPE (v12df, z26, z29)
/* { dg-final { scan-assembler {\tld1h\tz6.h, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1h\tz7.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1h\tz8.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v48hi 1 z6\n} } } */
+/* { dg-final { scan-assembler { test vnx24hi 1 z6\n} } } */
/* { dg-final { scan-assembler {\tmov\tz2.d, z6.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz3.d, z7.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz4.d, z8.d\n} } } */
-/* { dg-final { scan-assembler { test v48hi 2 z6, z6, z2\n} } } */
+/* { dg-final { scan-assembler { test vnx24hi 2 z6, z6, z2\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz6.h, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz7.h, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz8.h, p[0-7], \[x0, #5, mul vl\]\n} } } */
@@ -65,11 +69,11 @@ TEST_TYPE (v12df, z26, z29)
/* { dg-final { scan-assembler {\tld1w\tz12.s, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz13.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz14.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v24si 1 z12\n} } } */
+/* { dg-final { scan-assembler { test vnx12si 1 z12\n} } } */
/* { dg-final { scan-assembler {\tmov\tz15.d, z12.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz16.d, z13.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz17.d, z14.d\n} } } */
-/* { dg-final { scan-assembler { test v24si 2 z12, z12, z15\n} } } */
+/* { dg-final { scan-assembler { test vnx12si 2 z12, z12, z15\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz12.s, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz13.s, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz14.s, p[0-7], \[x0, #5, mul vl\]\n} } } */
@@ -77,23 +81,35 @@ TEST_TYPE (v12df, z26, z29)
/* { dg-final { scan-assembler {\tld1d\tz16.d, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz17.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz18.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v12di 1 z16\n} } } */
+/* { dg-final { scan-assembler { test vnx6di 1 z16\n} } } */
/* { dg-final { scan-assembler {\tmov\tz13.d, z16.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz14.d, z17.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz15.d, z18.d\n} } } */
-/* { dg-final { scan-assembler { test v12di 2 z16, z16, z13\n} } } */
+/* { dg-final { scan-assembler { test vnx6di 2 z16, z16, z13\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz16.d, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz17.d, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz18.d, p[0-7], \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz18.h, p[0-7]/z, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz19.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz20.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx24hf 1 z18\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz1.d, z18.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz2.d, z19.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz3.d, z20.d\n} } } */
+/* { dg-final { scan-assembler { test vnx24hf 2 z18, z18, z1\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz18.h, p[0-7], \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz19.h, p[0-7], \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz20.h, p[0-7], \[x0, #5, mul vl\]\n} } } */
+
/* { dg-final { scan-assembler {\tld1w\tz20.s, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz22.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v24sf 1 z20\n} } } */
+/* { dg-final { scan-assembler { test vnx12sf 1 z20\n} } } */
/* { dg-final { scan-assembler {\tmov\tz23.d, z20.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz24.d, z21.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz25.d, z22.d\n} } } */
-/* { dg-final { scan-assembler { test v24sf 2 z20, z20, z23\n} } } */
+/* { dg-final { scan-assembler { test vnx12sf 2 z20, z20, z23\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz20.s, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz22.s, p[0-7], \[x0, #5, mul vl\]\n} } } */
@@ -101,11 +117,11 @@ TEST_TYPE (v12df, z26, z29)
/* { dg-final { scan-assembler {\tld1d\tz26.d, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz27.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz28.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v12df 1 z26\n} } } */
+/* { dg-final { scan-assembler { test vnx6df 1 z26\n} } } */
/* { dg-final { scan-assembler {\tmov\tz29.d, z26.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz30.d, z27.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz31.d, z28.d\n} } } */
-/* { dg-final { scan-assembler { test v12df 2 z26, z26, z29\n} } } */
+/* { dg-final { scan-assembler { test vnx6df 2 z26, z26, z29\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz26.d, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz27.d, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz28.d, p[0-7], \[x0, #5, mul vl\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c
index d97d6973359..101a33701a5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c
@@ -1,53 +1,57 @@
-/* { dg-do compile } */
-/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */
+/* { dg-do assemble } */
+/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mbig-endian --save-temps" } */
-typedef char v32qi __attribute__((vector_size(32)));
-typedef struct { v32qi a[4]; } v128qi;
+typedef char vnx16qi __attribute__((vector_size(32)));
+typedef struct { vnx16qi a[4]; } vnx64qi;
-typedef short v16hi __attribute__((vector_size(32)));
-typedef struct { v16hi a[4]; } v64hi;
+typedef short vnx8hi __attribute__((vector_size(32)));
+typedef struct { vnx8hi a[4]; } vnx32hi;
-typedef int v8si __attribute__((vector_size(32)));
-typedef struct { v8si a[4]; } v32si;
+typedef int vnx4si __attribute__((vector_size(32)));
+typedef struct { vnx4si a[4]; } vnx16si;
-typedef long v4di __attribute__((vector_size(32)));
-typedef struct { v4di a[4]; } v16di;
+typedef long vnx2di __attribute__((vector_size(32)));
+typedef struct { vnx2di a[4]; } vnx8di;
-typedef float v8sf __attribute__((vector_size(32)));
-typedef struct { v8sf a[4]; } v32sf;
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef struct { vnx8hf a[4]; } vnx32hf;
-typedef double v4df __attribute__((vector_size(32)));
-typedef struct { v4df a[4]; } v16df;
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef struct { vnx4sf a[4]; } vnx16sf;
+
+typedef double vnx2df __attribute__((vector_size(32)));
+typedef struct { vnx2df a[4]; } vnx8df;
#define TEST_TYPE(TYPE, REG1, REG2) \
void \
f_##TYPE (TYPE *a) \
{ \
register TYPE x asm (#REG1) = a[0]; \
- asm volatile ("# test " #TYPE " 1 %0" :: "w" (x)); \
+ asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \
register TYPE y asm (#REG2) = x; \
- asm volatile ("# test " #TYPE " 2 %0, %1, %2" \
+ asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \
: "=&w" (x) : "0" (x), "w" (y)); \
a[1] = x; \
}
-TEST_TYPE (v128qi, z0, z4)
-TEST_TYPE (v64hi, z6, z2)
-TEST_TYPE (v32si, z12, z16)
-TEST_TYPE (v16di, z17, z13)
-TEST_TYPE (v32sf, z20, z16)
-TEST_TYPE (v16df, z24, z28)
+TEST_TYPE (vnx64qi, z0, z4)
+TEST_TYPE (vnx32hi, z6, z2)
+TEST_TYPE (vnx16si, z12, z16)
+TEST_TYPE (vnx8di, z17, z13)
+TEST_TYPE (vnx32hf, z18, z1)
+TEST_TYPE (vnx16sf, z20, z16)
+TEST_TYPE (vnx8df, z24, z28)
/* { dg-final { scan-assembler {\tld1b\tz0.b, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1b\tz1.b, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1b\tz2.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1b\tz3.b, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v128qi 1 z0\n} } } */
+/* { dg-final { scan-assembler { test vnx64qi 1 z0\n} } } */
/* { dg-final { scan-assembler {\tmov\tz4.d, z0.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz5.d, z1.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz6.d, z2.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz7.d, z3.d\n} } } */
-/* { dg-final { scan-assembler { test v128qi 2 z0, z0, z4\n} } } */
+/* { dg-final { scan-assembler { test vnx64qi 2 z0, z0, z4\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz0.b, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz1.b, p[0-7], \[x0, #5, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz2.b, p[0-7], \[x0, #6, mul vl\]\n} } } */
@@ -57,12 +61,12 @@ TEST_TYPE (v16df, z24, z28)
/* { dg-final { scan-assembler {\tld1h\tz7.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1h\tz8.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1h\tz9.h, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v64hi 1 z6\n} } } */
+/* { dg-final { scan-assembler { test vnx32hi 1 z6\n} } } */
/* { dg-final { scan-assembler {\tmov\tz2.d, z6.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz3.d, z7.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz4.d, z8.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz5.d, z9.d\n} } } */
-/* { dg-final { scan-assembler { test v64hi 2 z6, z6, z2\n} } } */
+/* { dg-final { scan-assembler { test vnx32hi 2 z6, z6, z2\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz6.h, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz7.h, p[0-7], \[x0, #5, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz8.h, p[0-7], \[x0, #6, mul vl\]\n} } } */
@@ -72,12 +76,12 @@ TEST_TYPE (v16df, z24, z28)
/* { dg-final { scan-assembler {\tld1w\tz13.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz14.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz15.s, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v32si 1 z12\n} } } */
+/* { dg-final { scan-assembler { test vnx16si 1 z12\n} } } */
/* { dg-final { scan-assembler {\tmov\tz16.d, z12.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz17.d, z13.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz18.d, z14.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz19.d, z15.d\n} } } */
-/* { dg-final { scan-assembler { test v32si 2 z12, z12, z16\n} } } */
+/* { dg-final { scan-assembler { test vnx16si 2 z12, z12, z16\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz12.s, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz13.s, p[0-7], \[x0, #5, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz14.s, p[0-7], \[x0, #6, mul vl\]\n} } } */
@@ -87,27 +91,42 @@ TEST_TYPE (v16df, z24, z28)
/* { dg-final { scan-assembler {\tld1d\tz18.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz19.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz20.d, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v16di 1 z17\n} } } */
+/* { dg-final { scan-assembler { test vnx8di 1 z17\n} } } */
/* { dg-final { scan-assembler {\tmov\tz13.d, z17.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz14.d, z18.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz15.d, z19.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz16.d, z20.d\n} } } */
-/* { dg-final { scan-assembler { test v16di 2 z17, z17, z13\n} } } */
+/* { dg-final { scan-assembler { test vnx8di 2 z17, z17, z13\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz17.d, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz18.d, p[0-7], \[x0, #5, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz19.d, p[0-7], \[x0, #6, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz20.d, p[0-7], \[x0, #7, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz18.h, p[0-7]/z, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz19.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz20.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz21.h, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx32hf 1 z18\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz1.d, z18.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz2.d, z19.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz3.d, z20.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz4.d, z21.d\n} } } */
+/* { dg-final { scan-assembler { test vnx32hf 2 z18, z18, z1\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz18.h, p[0-7], \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz19.h, p[0-7], \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz20.h, p[0-7], \[x0, #6, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz21.h, p[0-7], \[x0, #7, mul vl\]\n} } } */
+
/* { dg-final { scan-assembler {\tld1w\tz20.s, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz22.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz23.s, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v32sf 1 z20\n} } } */
+/* { dg-final { scan-assembler { test vnx16sf 1 z20\n} } } */
/* { dg-final { scan-assembler {\tmov\tz16.d, z20.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz17.d, z21.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz18.d, z22.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz19.d, z23.d\n} } } */
-/* { dg-final { scan-assembler { test v32sf 2 z20, z20, z16\n} } } */
+/* { dg-final { scan-assembler { test vnx16sf 2 z20, z20, z16\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz20.s, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #5, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz22.s, p[0-7], \[x0, #6, mul vl\]\n} } } */
@@ -117,12 +136,12 @@ TEST_TYPE (v16df, z24, z28)
/* { dg-final { scan-assembler {\tld1d\tz25.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz26.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz27.d, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v16df 1 z24\n} } } */
+/* { dg-final { scan-assembler { test vnx8df 1 z24\n} } } */
/* { dg-final { scan-assembler {\tmov\tz28.d, z24.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz29.d, z25.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz30.d, z26.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz31.d, z27.d\n} } } */
-/* { dg-final { scan-assembler { test v16df 2 z24, z24, z28\n} } } */
+/* { dg-final { scan-assembler { test vnx8df 2 z24, z24, z28\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz24.d, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz25.d, p[0-7], \[x0, #5, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz26.d, p[0-7], \[x0, #6, mul vl\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_4.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_4.c
new file mode 100644
index 00000000000..40ec0481e84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_4.c
@@ -0,0 +1,116 @@
+/* { dg-do assemble } */
+/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mlittle-endian --save-temps" } */
+
+typedef char vnx16qi __attribute__((vector_size(32)));
+typedef struct { vnx16qi a[2]; } vnx32qi;
+
+typedef short vnx8hi __attribute__((vector_size(32)));
+typedef struct { vnx8hi a[2]; } vnx16hi;
+
+typedef int vnx4si __attribute__((vector_size(32)));
+typedef struct { vnx4si a[2]; } vnx8si;
+
+typedef long vnx2di __attribute__((vector_size(32)));
+typedef struct { vnx2di a[2]; } vnx4di;
+
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef struct { vnx4sf a[2]; } vnx8sf;
+
+typedef double vnx2df __attribute__((vector_size(32)));
+typedef struct { vnx2df a[2]; } vnx4df;
+
+#define TEST_TYPE(TYPE, REG1, REG2) \
+ void \
+ f1_##TYPE (TYPE *a) \
+ { \
+ register TYPE x asm (#REG1) = a[0]; \
+ asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \
+ register TYPE y asm (#REG2) = x; \
+ asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \
+ : "=&w" (x) : "0" (x), "w" (y)); \
+ a[1] = x; \
+ } \
+ /* This must compile, but we don't care how. */ \
+ void \
+ f2_##TYPE (TYPE *a) \
+ { \
+ TYPE x = a[0]; \
+ x.a[0][3] = 1; \
+ x.a[1][2] = 12; \
+ asm volatile ("# %0" :: "w" (x)); \
+ } \
+ void \
+ f3_##TYPE (TYPE *a, int i) \
+ { \
+ TYPE x = a[0]; \
+ x.a[0][i] = 1; \
+ asm volatile ("# %0" :: "w" (x)); \
+ } \
+ void \
+ f4_##TYPE (TYPE *a, int i, int j) \
+ { \
+ TYPE x = a[0]; \
+ x.a[i][j] = 44; \
+ asm volatile ("# %0" :: "w" (x)); \
+ }
+
+TEST_TYPE (vnx32qi, z0, z2)
+TEST_TYPE (vnx16hi, z5, z7)
+TEST_TYPE (vnx8si, z10, z12)
+TEST_TYPE (vnx4di, z15, z17)
+TEST_TYPE (vnx8sf, z20, z23)
+TEST_TYPE (vnx4df, z28, z30)
+
+/* { dg-final { scan-assembler {\tldr\tz0, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz1, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx32qi 1 z0\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz2.d, z0.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz3.d, z1.d\n} } } */
+/* { dg-final { scan-assembler { test vnx32qi 2 z0, z0, z2\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz0, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz1, \[x0, #3, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz5, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz6, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx16hi 1 z5\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz7.d, z5.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz8.d, z6.d\n} } } */
+/* { dg-final { scan-assembler { test vnx16hi 2 z5, z5, z7\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz5, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz6, \[x0, #3, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz10, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz11, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx8si 1 z10\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz12.d, z10.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz13.d, z11.d\n} } } */
+/* { dg-final { scan-assembler { test vnx8si 2 z10, z10, z12\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz10, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz11, \[x0, #3, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz15, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz16, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx4di 1 z15\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz17.d, z15.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz18.d, z16.d\n} } } */
+/* { dg-final { scan-assembler { test vnx4di 2 z15, z15, z17\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz15, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz16, \[x0, #3, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz20, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz21, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx8sf 1 z20\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz23.d, z20.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz24.d, z21.d\n} } } */
+/* { dg-final { scan-assembler { test vnx8sf 2 z20, z20, z23\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz20, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz21, \[x0, #3, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz28, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz29, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx4df 1 z28\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz30.d, z28.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz31.d, z29.d\n} } } */
+/* { dg-final { scan-assembler { test vnx4df 2 z28, z28, z30\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz28, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz29, \[x0, #3, mul vl\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_5.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_5.c
new file mode 100644
index 00000000000..ee04c3e0f23
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_5.c
@@ -0,0 +1,111 @@
+/* { dg-do assemble } */
+/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mlittle-endian --save-temps" } */
+
+typedef char vnx16qi __attribute__((vector_size(32)));
+typedef struct { vnx16qi a[3]; } vnx48qi;
+
+typedef short vnx8hi __attribute__((vector_size(32)));
+typedef struct { vnx8hi a[3]; } vnx24hi;
+
+typedef int vnx4si __attribute__((vector_size(32)));
+typedef struct { vnx4si a[3]; } vnx12si;
+
+typedef long vnx2di __attribute__((vector_size(32)));
+typedef struct { vnx2di a[3]; } vnx6di;
+
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef struct { vnx4sf a[3]; } vnx12sf;
+
+typedef double vnx2df __attribute__((vector_size(32)));
+typedef struct { vnx2df a[3]; } vnx6df;
+
+#define TEST_TYPE(TYPE, REG1, REG2) \
+ void \
+ f_##TYPE (TYPE *a) \
+ { \
+ register TYPE x asm (#REG1) = a[0]; \
+ asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \
+ register TYPE y asm (#REG2) = x; \
+ asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \
+ : "=&w" (x) : "0" (x), "w" (y)); \
+ a[1] = x; \
+ }
+
+TEST_TYPE (vnx48qi, z0, z3)
+TEST_TYPE (vnx24hi, z6, z2)
+TEST_TYPE (vnx12si, z12, z15)
+TEST_TYPE (vnx6di, z16, z13)
+TEST_TYPE (vnx12sf, z20, z23)
+TEST_TYPE (vnx6df, z26, z29)
+
+/* { dg-final { scan-assembler {\tldr\tz0, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz1, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz2, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx48qi 1 z0\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz3.d, z0.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz4.d, z1.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz5.d, z2.d\n} } } */
+/* { dg-final { scan-assembler { test vnx48qi 2 z0, z0, z3\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz0, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz1, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz2, \[x0, #5, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz6, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz7, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz8, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx24hi 1 z6\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz2.d, z6.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz3.d, z7.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz4.d, z8.d\n} } } */
+/* { dg-final { scan-assembler { test vnx24hi 2 z6, z6, z2\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz6, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz7, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz8, \[x0, #5, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz12, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz13, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz14, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx12si 1 z12\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz15.d, z12.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz16.d, z13.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz17.d, z14.d\n} } } */
+/* { dg-final { scan-assembler { test vnx12si 2 z12, z12, z15\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz12, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz13, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz14, \[x0, #5, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz16, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz17, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz18, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx6di 1 z16\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz13.d, z16.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz14.d, z17.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz15.d, z18.d\n} } } */
+/* { dg-final { scan-assembler { test vnx6di 2 z16, z16, z13\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz16, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz17, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz18, \[x0, #5, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz20, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz21, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz22, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx12sf 1 z20\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz23.d, z20.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz24.d, z21.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz25.d, z22.d\n} } } */
+/* { dg-final { scan-assembler { test vnx12sf 2 z20, z20, z23\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz20, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz21, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz22, \[x0, #5, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz26, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz27, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz28, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx6df 1 z26\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz29.d, z26.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz30.d, z27.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz31.d, z28.d\n} } } */
+/* { dg-final { scan-assembler { test vnx6df 2 z26, z26, z29\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz26, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz27, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz28, \[x0, #5, mul vl\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_6.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_6.c
new file mode 100644
index 00000000000..8bfd9f6d1af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_6.c
@@ -0,0 +1,129 @@
+/* { dg-do assemble } */
+/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mlittle-endian --save-temps" } */
+
+typedef char vnx16qi __attribute__((vector_size(32)));
+typedef struct { vnx16qi a[4]; } vnx64qi;
+
+typedef short vnx8hi __attribute__((vector_size(32)));
+typedef struct { vnx8hi a[4]; } vnx32hi;
+
+typedef int vnx4si __attribute__((vector_size(32)));
+typedef struct { vnx4si a[4]; } vnx16si;
+
+typedef long vnx2di __attribute__((vector_size(32)));
+typedef struct { vnx2di a[4]; } vnx8di;
+
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef struct { vnx4sf a[4]; } vnx16sf;
+
+typedef double vnx2df __attribute__((vector_size(32)));
+typedef struct { vnx2df a[4]; } vnx8df;
+
+#define TEST_TYPE(TYPE, REG1, REG2) \
+ void \
+ f_##TYPE (TYPE *a) \
+ { \
+ register TYPE x asm (#REG1) = a[0]; \
+ asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \
+ register TYPE y asm (#REG2) = x; \
+ asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \
+ : "=&w" (x) : "0" (x), "w" (y)); \
+ a[1] = x; \
+ }
+
+TEST_TYPE (vnx64qi, z0, z4)
+TEST_TYPE (vnx32hi, z6, z2)
+TEST_TYPE (vnx16si, z12, z16)
+TEST_TYPE (vnx8di, z17, z13)
+TEST_TYPE (vnx16sf, z20, z16)
+TEST_TYPE (vnx8df, z24, z28)
+
+/* { dg-final { scan-assembler {\tldr\tz0, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz1, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz2, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz3, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx64qi 1 z0\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz4.d, z0.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz5.d, z1.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz6.d, z2.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz7.d, z3.d\n} } } */
+/* { dg-final { scan-assembler { test vnx64qi 2 z0, z0, z4\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz0, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz1, \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz2, \[x0, #6, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz3, \[x0, #7, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz6, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz7, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz8, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz9, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx32hi 1 z6\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz2.d, z6.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz3.d, z7.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz4.d, z8.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz5.d, z9.d\n} } } */
+/* { dg-final { scan-assembler { test vnx32hi 2 z6, z6, z2\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz6, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz7, \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz8, \[x0, #6, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz9, \[x0, #7, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz12, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz13, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz14, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz15, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx16si 1 z12\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz16.d, z12.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz17.d, z13.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz18.d, z14.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz19.d, z15.d\n} } } */
+/* { dg-final { scan-assembler { test vnx16si 2 z12, z12, z16\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz12, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz13, \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz14, \[x0, #6, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz15, \[x0, #7, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz17, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz18, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz19, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz20, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx8di 1 z17\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz13.d, z17.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz14.d, z18.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz15.d, z19.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz16.d, z20.d\n} } } */
+/* { dg-final { scan-assembler { test vnx8di 2 z17, z17, z13\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz17, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz18, \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz19, \[x0, #6, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz20, \[x0, #7, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz20, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz21, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz22, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz23, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx16sf 1 z20\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz16.d, z20.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz17.d, z21.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz18.d, z22.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz19.d, z23.d\n} } } */
+/* { dg-final { scan-assembler { test vnx16sf 2 z20, z20, z16\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz20, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz21, \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz22, \[x0, #6, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz23, \[x0, #7, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz24, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz25, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz26, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz27, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx8df 1 z24\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz28.d, z24.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz29.d, z25.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz30.d, z26.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz31.d, z27.d\n} } } */
+/* { dg-final { scan-assembler { test vnx8df 2 z24, z24, z28\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz24, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz25, \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz26, \[x0, #6, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz27, \[x0, #7, mul vl\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c
index 6d7b5fecbce..3405bd76eb1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#ifndef TYPE
#define TYPE unsigned char
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c
index 7ae718ada2c..dff9e963e06 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned long
#define ITYPE long
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c
index 5ab3ff68bda..611cbbda078 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned long
#define ITYPE long
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c
index 6771938131b..80e69463e18 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c
@@ -1,13 +1,13 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TYPE float
-#define ITYPE int
+#define TYPE _Float16
+#define ITYPE short
#include "sve_struct_vect_7.c"
-/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c
index f9c129801fc..bfab53d9b6b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c
@@ -1,6 +1,6 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TYPE float
-#define ITYPE int
+#define TYPE _Float16
+#define ITYPE short
#include "sve_struct_vect_7_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c
index 37c11b3b29a..47279e0a80e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c
@@ -1,13 +1,13 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TYPE double
-#define ITYPE long
+#define TYPE float
+#define ITYPE int
#include "sve_struct_vect_7.c"
-/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c
index c7ed3fe2806..74007a938b7 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c
@@ -1,6 +1,6 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TYPE double
-#define ITYPE long
+#define TYPE float
+#define ITYPE int
#include "sve_struct_vect_7_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c
index 3e3b9d733e4..5ebf5d8ee38 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c
@@ -1,66 +1,13 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */
-
-#define TYPE unsigned char
-#define NAME(X) qi_##X
-#include "sve_struct_vect_1.c"
-#undef NAME
-#undef TYPE
-
-#define TYPE unsigned short
-#define NAME(X) hi_##X
-#include "sve_struct_vect_1.c"
-#undef NAME
-#undef TYPE
-
-#define TYPE unsigned int
-#define NAME(X) si_##X
-#include "sve_struct_vect_1.c"
-#undef NAME
-#undef TYPE
-
-#define TYPE unsigned long
-#define NAME(X) di_##X
-#include "sve_struct_vect_1.c"
-#undef NAME
-#undef TYPE
-
-#define TYPE float
-#define NAME(X) sf_##X
-#include "sve_struct_vect_1.c"
-#undef NAME
-#undef TYPE
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE double
-#define NAME(X) df_##X
-#include "sve_struct_vect_1.c"
-#undef NAME
-#undef TYPE
-
-/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-
-/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-
-/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */
-
-/* { dg-final { scan-assembler-times {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+#define ITYPE long
+#include "sve_struct_vect_7.c"
+
+/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13_run.c
new file mode 100644
index 00000000000..6fb5329913b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13_run.c
@@ -0,0 +1,6 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define TYPE double
+#define ITYPE long
+#include "sve_struct_vect_7_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c
index c3e81f500e0..46126e841dc 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c
@@ -1,7 +1,47 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=512" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-#include "sve_struct_vect_13.c"
+#define TYPE unsigned char
+#define NAME(X) qi_##X
+#include "sve_struct_vect_1.c"
+#undef NAME
+#undef TYPE
+
+#define TYPE unsigned short
+#define NAME(X) hi_##X
+#include "sve_struct_vect_1.c"
+#undef NAME
+#undef TYPE
+
+#define TYPE unsigned int
+#define NAME(X) si_##X
+#include "sve_struct_vect_1.c"
+#undef NAME
+#undef TYPE
+
+#define TYPE unsigned long
+#define NAME(X) di_##X
+#include "sve_struct_vect_1.c"
+#undef NAME
+#undef TYPE
+
+#define TYPE _Float16
+#define NAME(X) hf_##X
+#include "sve_struct_vect_1.c"
+#undef NAME
+#undef TYPE
+
+#define TYPE float
+#define NAME(X) sf_##X
+#include "sve_struct_vect_1.c"
+#undef NAME
+#undef TYPE
+
+#define TYPE double
+#define NAME(X) df_##X
+#include "sve_struct_vect_1.c"
+#undef NAME
+#undef TYPE
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
@@ -10,12 +50,12 @@
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c
index 635910e11a0..c1ccf7f09bb 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c
@@ -1,7 +1,7 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=1024" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=512 --save-temps" } */
-#include "sve_struct_vect_13.c"
+#include "sve_struct_vect_14.c"
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
@@ -10,12 +10,12 @@
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c
index 9afc0708fb1..61985f98974 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c
@@ -1,7 +1,7 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=2048" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=1024 --save-temps" } */
-#include "sve_struct_vect_13.c"
+#include "sve_struct_vect_14.c"
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
@@ -10,12 +10,12 @@
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c
index 80c99961791..6dd2878c552 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c
@@ -1,47 +1,32 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=2048 --save-temps" } */
-#define N 2000
+#include "sve_struct_vect_14.c"
-#define TEST_LOOP(NAME, TYPE) \
- void __attribute__((weak)) \
- NAME (TYPE *restrict dest, TYPE *restrict src) \
- { \
- for (int i = 0; i < N; ++i) \
- dest[i] += src[i * 2]; \
- }
+/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-#define TEST(NAME) \
- TEST_LOOP (NAME##_i8, signed char) \
- TEST_LOOP (NAME##_i16, unsigned short) \
- TEST_LOOP (NAME##_f32, float) \
- TEST_LOOP (NAME##_f64, double)
+/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
-TEST (test)
+/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */
-/* Check the vectorized loop. */
-/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2d\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
-
-/* Check the scalar tail. */
-/* { dg-final { scan-assembler-times {\tldrb\tw} 2 } } */
-/* { dg-final { scan-assembler-times {\tstrb\tw} 1 } } */
-/* { dg-final { scan-assembler-times {\tldrh\tw} 2 } } */
-/* { dg-final { scan-assembler-times {\tstrh\tw} 1 } } */
-/* { dg-final { scan-assembler-times {\tldr\ts} 2 } } */
-/* { dg-final { scan-assembler-times {\tstr\ts} 1 } } */
-/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */
-/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */
-
-/* The only branches should be in the vectorized loop. */
-/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */
+/* { dg-final { scan-assembler-times {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17_run.c
deleted file mode 100644
index 970c6de6f08..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17_run.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
-
-#include "sve_struct_vect_17.c"
-
-volatile int x;
-
-#undef TEST_LOOP
-#define TEST_LOOP(NAME, TYPE) \
- { \
- TYPE out[N]; \
- TYPE in[N * 2]; \
- for (int i = 0; i < N; ++i) \
- out[i] = i * 7 / 2; \
- for (int i = 0; i < N * 2; ++i) \
- in[i] = i * 9 / 2; \
- NAME (out, in); \
- for (int i = 0; i < N; ++i) \
- { \
- TYPE expected = i * 7 / 2 + in[i * 2]; \
- if (out[i] != expected) \
- __builtin_abort (); \
- x += 1; \
- } \
- }
-
-int
-main (void)
-{
- TEST (test);
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c
index 90e0b53c7df..fd0ce83ffac 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c
@@ -4,11 +4,11 @@
#define N 2000
#define TEST_LOOP(NAME, TYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME (TYPE *restrict dest, TYPE *restrict src) \
{ \
for (int i = 0; i < N; ++i) \
- dest[i] += src[i * 4]; \
+ dest[i] += src[i * 3]; \
}
#define TEST(NAME) \
@@ -21,16 +21,16 @@ TEST (test)
/* Check the vectorized loop. */
/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
/* Check the scalar tail. */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c
index f7db5aea413..6467fa23b83 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c
@@ -3,28 +3,32 @@
#include "sve_struct_vect_18.c"
-volatile int x;
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, TYPE) \
{ \
TYPE out[N]; \
- TYPE in[N * 4]; \
+ TYPE in[N * 3]; \
for (int i = 0; i < N; ++i) \
- out[i] = i * 7 / 2; \
- for (int i = 0; i < N * 4; ++i) \
- in[i] = i * 9 / 2; \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 3; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
NAME (out, in); \
for (int i = 0; i < N; ++i) \
{ \
- TYPE expected = i * 7 / 2 + in[i * 4]; \
+ TYPE expected = i * 7 / 2 + in[i * 3]; \
if (out[i] != expected) \
__builtin_abort (); \
- x += 1; \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c
index 3430459a2f3..2a099d05d65 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c
@@ -2,11 +2,11 @@
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, TYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME (TYPE *restrict dest, TYPE *restrict src, int n) \
{ \
for (int i = 0; i < n; ++i) \
- dest[i] += src[i * 2]; \
+ dest[i] += src[i * 3]; \
}
#define TEST(NAME) \
@@ -19,16 +19,16 @@ TEST (test)
/* Check the vectorized loop. */
/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
/* Check the scalar tail. */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c
index 94593cef684..f9bf095d3a5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c
@@ -3,37 +3,41 @@
#include "sve_struct_vect_19.c"
-volatile int x;
-
#define N 1000
#undef TEST_LOOP
-#define TEST_LOOP(NAME, TYPE) \
- { \
- TYPE out[N]; \
- TYPE in[N * 2]; \
- int counts[] = { 0, 1, N - 1 }; \
- for (int j = 0; j < 3; ++j) \
- { \
- int count = counts[j]; \
- for (int i = 0; i < N; ++i) \
- out[i] = i * 7 / 2; \
- for (int i = 0; i < N * 2; ++i) \
- in[i] = i * 9 / 2; \
- NAME (out, in, count); \
- for (int i = 0; i < N; ++i) \
- { \
- TYPE expected = i * 7 / 2; \
- if (i < count) \
- expected += in[i * 2]; \
- if (out[i] != expected) \
- __builtin_abort (); \
- x += 1; \
- } \
- } \
+#define TEST_LOOP(NAME, TYPE) \
+ { \
+ TYPE out[N]; \
+ TYPE in[N * 3]; \
+ int counts[] = { 0, 1, N - 1 }; \
+ for (int j = 0; j < 3; ++j) \
+ { \
+ int count = counts[j]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 3; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in, count); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE expected = i * 7 / 2; \
+ if (i < count) \
+ expected += in[i * 3]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ } \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c
index 1f99c676586..a94142f2c9e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c
@@ -1,10 +1,8 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#include "sve_struct_vect_1.c"
-extern void abort() __attribute__((noreturn));
-
TYPE a[N], b[N], c[N], d[N], e[N * 4];
void __attribute__ ((noinline, noclone))
@@ -19,10 +17,10 @@ check_array (TYPE *array, int n, TYPE base, TYPE step)
{
for (int i = 0; i < n; ++i)
if (array[i] != (TYPE) (base + step * i))
- abort ();
+ __builtin_abort ();
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
init_array (e, 2 * N, 11, 5);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c
index 8e5a96361f6..0d51808552e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned short
#include "sve_struct_vect_1.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c
index aad0e104379..3a2907f4ad9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c
@@ -1,12 +1,14 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+#define N 2000
+
#define TEST_LOOP(NAME, TYPE) \
- void __attribute__((weak)) \
- NAME (TYPE *restrict dest, TYPE *restrict src, int n) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME (TYPE *restrict dest, TYPE *restrict src) \
{ \
- for (int i = 0; i < n; ++i) \
- dest[i] += src[i * 4]; \
+ for (int i = 0; i < N; ++i) \
+ dest[i] += src[i * 2]; \
}
#define TEST(NAME) \
@@ -19,16 +21,16 @@ TEST (test)
/* Check the vectorized loop. */
/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2d\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
/* Check the scalar tail. */
@@ -41,7 +43,5 @@ TEST (test)
/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */
/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */
-/* Each function should have three branches: one directly to the exit
- (n <= 0), one to the single scalar epilogue iteration (n == 1),
- and one branch-back for the vectorized loop. */
-/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */
+/* The only branches should be in the vectorized loop. */
+/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c
index 3be63364455..de563c98c1f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c
@@ -3,37 +3,32 @@
#include "sve_struct_vect_20.c"
-volatile int x;
-
-#define N 1000
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, TYPE) \
{ \
TYPE out[N]; \
- TYPE in[N * 4]; \
- int counts[] = { 0, 1, N - 1 }; \
- for (int j = 0; j < 3; ++j) \
+ TYPE in[N * 2]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 2; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in); \
+ for (int i = 0; i < N; ++i) \
{ \
- int count = counts[j]; \
- for (int i = 0; i < N; ++i) \
- out[i] = i * 7 / 2; \
- for (int i = 0; i < N * 4; ++i) \
- in[i] = i * 9 / 2; \
- NAME (out, in, count); \
- for (int i = 0; i < N; ++i) \
- { \
- TYPE expected = i * 7 / 2; \
- if (i < count) \
- expected += in[i * 4]; \
- if (out[i] != expected) \
- __builtin_abort (); \
- x += 1; \
- } \
+ TYPE expected = i * 7 / 2 + in[i * 2]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c
index ac3a7dd2383..bb29747b0c1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c
@@ -1,14 +1,12 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
-#define N 2000
-
#define TEST_LOOP(NAME, TYPE) \
- void __attribute__((weak)) \
- NAME (TYPE *restrict dest, TYPE *restrict src) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME (TYPE *restrict dest, TYPE *restrict src, int n) \
{ \
- for (int i = 0; i < N; ++i) \
- dest[i] += src[i * 3]; \
+ for (int i = 0; i < n; ++i) \
+ dest[i] += src[i * 2]; \
}
#define TEST(NAME) \
@@ -21,16 +19,16 @@ TEST (test)
/* Check the vectorized loop. */
/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2d\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
/* Check the scalar tail. */
@@ -43,5 +41,7 @@ TEST (test)
/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */
/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */
-/* The only branches should be in the vectorized loop. */
-/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */
+/* Each function should have three branches: one directly to the exit
+ (n <= 0), one to the single scalar epilogue iteration (n == 1),
+ and one branch-back for the vectorized loop. */
+/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c
index 94d72d1835a..6f9a4e3dc32 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c
@@ -3,28 +3,41 @@
#include "sve_struct_vect_21.c"
-volatile int x;
+#define N 1000
#undef TEST_LOOP
-#define TEST_LOOP(NAME, TYPE) \
- { \
- TYPE out[N]; \
- TYPE in[N * 3]; \
- for (int i = 0; i < N; ++i) \
- out[i] = i * 7 / 2; \
- for (int i = 0; i < N * 3; ++i) \
- in[i] = i * 9 / 2; \
- NAME (out, in); \
- for (int i = 0; i < N; ++i) \
- { \
- TYPE expected = i * 7 / 2 + in[i * 3]; \
- if (out[i] != expected) \
- __builtin_abort (); \
- x += 1; \
- } \
+#define TEST_LOOP(NAME, TYPE) \
+ { \
+ TYPE out[N]; \
+ TYPE in[N * 2]; \
+ int counts[] = { 0, 1, N - 1 }; \
+ for (int j = 0; j < 3; ++j) \
+ { \
+ int count = counts[j]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 2; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in, count); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE expected = i * 7 / 2; \
+ if (i < count) \
+ expected += in[i * 2]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ } \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c
index c17766c7d23..8ee25a0e279 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c
@@ -1,12 +1,14 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+#define N 2000
+
#define TEST_LOOP(NAME, TYPE) \
- void __attribute__((weak)) \
- NAME (TYPE *restrict dest, TYPE *restrict src, int n) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME (TYPE *restrict dest, TYPE *restrict src) \
{ \
- for (int i = 0; i < n; ++i) \
- dest[i] += src[i * 3]; \
+ for (int i = 0; i < N; ++i) \
+ dest[i] += src[i * 4]; \
}
#define TEST(NAME) \
@@ -19,16 +21,16 @@ TEST (test)
/* Check the vectorized loop. */
/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
/* Check the scalar tail. */
@@ -41,7 +43,5 @@ TEST (test)
/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */
/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */
-/* Each function should have three branches: one directly to the exit
- (n <= 0), one to the single scalar epilogue iteration (n == 1),
- and one branch-back for the vectorized loop. */
-/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */
+/* The only branches should be in the vectorized loop. */
+/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c
index 550364b16d1..1c3699292c0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c
@@ -3,37 +3,32 @@
#include "sve_struct_vect_22.c"
-volatile int x;
-
-#define N 1000
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, TYPE) \
{ \
TYPE out[N]; \
- TYPE in[N * 3]; \
- int counts[] = { 0, 1, N - 1 }; \
- for (int j = 0; j < 3; ++j) \
+ TYPE in[N * 4]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 4; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in); \
+ for (int i = 0; i < N; ++i) \
{ \
- int count = counts[j]; \
- for (int i = 0; i < N; ++i) \
- out[i] = i * 7 / 2; \
- for (int i = 0; i < N * 3; ++i) \
- in[i] = i * 9 / 2; \
- NAME (out, in, count); \
- for (int i = 0; i < N; ++i) \
- { \
- TYPE expected = i * 7 / 2; \
- if (i < count) \
- expected += in[i * 3]; \
- if (out[i] != expected) \
- __builtin_abort (); \
- x += 1; \
- } \
+ TYPE expected = i * 7 / 2 + in[i * 4]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23.c
new file mode 100644
index 00000000000..7542e531624
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#define TEST_LOOP(NAME, TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME (TYPE *restrict dest, TYPE *restrict src, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[i] += src[i * 4]; \
+ }
+
+#define TEST(NAME) \
+ TEST_LOOP (NAME##_i8, signed char) \
+ TEST_LOOP (NAME##_i16, unsigned short) \
+ TEST_LOOP (NAME##_f32, float) \
+ TEST_LOOP (NAME##_f64, double)
+
+TEST (test)
+
+/* Check the vectorized loop. */
+/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
+
+/* Check the scalar tail. */
+/* { dg-final { scan-assembler-times {\tldrb\tw} 2 } } */
+/* { dg-final { scan-assembler-times {\tstrb\tw} 1 } } */
+/* { dg-final { scan-assembler-times {\tldrh\tw} 2 } } */
+/* { dg-final { scan-assembler-times {\tstrh\tw} 1 } } */
+/* { dg-final { scan-assembler-times {\tldr\ts} 2 } } */
+/* { dg-final { scan-assembler-times {\tstr\ts} 1 } } */
+/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */
+/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */
+
+/* Each function should have three branches: one directly to the exit
+ (n <= 0), one to the single scalar epilogue iteration (n == 1),
+ and one branch-back for the vectorized loop. */
+/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23_run.c
new file mode 100644
index 00000000000..83f13dd46cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23_run.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_struct_vect_23.c"
+
+#define N 1000
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, TYPE) \
+ { \
+ TYPE out[N]; \
+ TYPE in[N * 4]; \
+ int counts[] = { 0, 1, N - 1 }; \
+ for (int j = 0; j < 3; ++j) \
+ { \
+ int count = counts[j]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 4; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in, count); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE expected = i * 7 / 2; \
+ if (i < count) \
+ expected += in[i * 4]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c
index 6229b78b72e..0da23e144af 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned short
#include "sve_struct_vect_1_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c
index 3a29ae16701..b1e37e536e5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned int
#include "sve_struct_vect_1.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c
index 7703dc6c043..74a5bd3233b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned int
#include "sve_struct_vect_1_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c
index 0c526365829..af20d763bdd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned long
#include "sve_struct_vect_1.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c
index 4ea2cff9dd0..a8aedd188c8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned long
#include "sve_struct_vect_1_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c
index efc1c9d2e2c..4b1f8cd341a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE float
#include "sve_struct_vect_1.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c
index f0d56e87dcc..22ba35ff702 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE float
#include "sve_struct_vect_1_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c
index ff445c1fbb0..981c9d31950 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE double
#include "sve_struct_vect_1.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c
index b0b685c0789..dbcbae8259f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE double
#include "sve_struct_vect_1_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c
index 9712f89d171..8067d5ed169 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#ifndef TYPE
#define TYPE unsigned char
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c
index 5cfb7559a5c..8cc1993e997 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#include "sve_struct_vect_7.c"
#define N 93
-extern void abort() __attribute__((noreturn));
-
TYPE a[N], b[N], c[N], d[N], e[N * 4];
void __attribute__ ((noinline, noclone))
@@ -21,10 +19,10 @@ check_array (TYPE *array, int n, TYPE base, TYPE step)
{
for (int i = 0; i < n; ++i)
if (array[i] != (TYPE) (base + step * i))
- abort ();
+ __builtin_abort ();
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
init_array (e, 2 * N, 11, 5);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c
index 57cb93de5d9..e807179a6a5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned short
#define ITYPE short
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c
index 59005a2f05b..954043fa874 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned short
#define ITYPE short
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c
index d897d556d05..a167a7b2caf 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned int
#define ITYPE int
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c
index ab694b4a971..4b94d383fec 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned int
#define ITYPE int
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c
index 0c7b887d232..754b188a206 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c
@@ -7,13 +7,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define MASK_2(X, Y) X, Y + X
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
@@ -21,10 +21,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
-#define INDEX_4 v4di
-#define INDEX_8 v8si
-#define INDEX_16 v16hi
-#define INDEX_32 v32qi
+#define INDEX_4 vnx2di
+#define INDEX_8 vnx4si
+#define INDEX_16 vnx8hi
+#define INDEX_32 vnx16qi
#define PERMUTE(TYPE, NUNITS) \
TYPE permute_##TYPE (TYPE values1, TYPE values2) \
@@ -35,13 +35,13 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (v4di, 4) \
- T (v8si, 8) \
- T (v16hi, 16) \
- T (v32qi, 32) \
- T (v4df, 4) \
- T (v8sf, 8) \
- T (v16hf, 16)
+ T (vnx2di, 4) \
+ T (vnx4si, 8) \
+ T (vnx8hi, 16) \
+ T (vnx16qi, 32) \
+ T (vnx2df, 4) \
+ T (vnx4sf, 8) \
+ T (vnx8hf, 16)
TEST_ALL (PERMUTE)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c
index 4d345cf81e9..303276a64cf 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c
@@ -3,12 +3,12 @@
#include <stdint.h>
-#define UNPACK(TYPED, TYPES) \
-void __attribute__ ((noinline, noclone)) \
-unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, int size) \
-{ \
- for (int i = 0; i < size; i++) \
- d[i] = s[i] + 1; \
+#define UNPACK(TYPED, TYPES) \
+void __attribute__ ((noinline, noclone)) \
+unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, TYPES mask, int size) \
+{ \
+ for (int i = 0; i < size; i++) \
+ d[i] = (TYPES) (s[i] | mask); \
}
#define TEST_ALL(T) \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c
index d183408d124..da29eda1434 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c
@@ -14,9 +14,9 @@
arrays[i] = (i - 10) * 3; \
asm volatile ("" ::: "memory"); \
} \
- unpack_##TYPED##_##TYPES (arrayd, arrays, ARRAY_SIZE); \
+ unpack_##TYPED##_##TYPES (arrayd, arrays, 7, ARRAY_SIZE); \
for (int i = 0; i < ARRAY_SIZE; i++) \
- if (arrayd[i] != (TYPED) ((TYPES) ((i - 10) * 3) + 1)) \
+ if (arrayd[i] != (TYPED) (TYPES) (((i - 10) * 3) | 7)) \
__builtin_abort (); \
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c
index fa8de963264..8c927873340 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c
@@ -8,7 +8,7 @@ void __attribute__ ((noinline, noclone)) \
unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, int size) \
{ \
for (int i = 0; i < size; i++) \
- d[i] = s[i] + 1; \
+ d[i] = (TYPES) (s[i] + 1); \
}
#define TEST_ALL(T) \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c
index 3fa66220f17..d2df061e88d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c
@@ -16,7 +16,7 @@
} \
unpack_##TYPED##_##TYPES (arrayd, arrays, ARRAY_SIZE); \
for (int i = 0; i < ARRAY_SIZE; i++) \
- if (arrayd[i] != (TYPED) ((TYPES) ((i - 10) * 3) + 1)) \
+ if (arrayd[i] != (TYPED) (TYPES) (((i - 10) * 3) + 1)) \
__builtin_abort (); \
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c
index aaa4fdccbf0..36048f03f99 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define UZP1(TYPE, MASK) \
TYPE uzp1_##TYPE (TYPE values1, TYPE values2) \
@@ -18,18 +18,18 @@ TYPE uzp1_##TYPE (TYPE values1, TYPE values2) \
}
-UZP1 (v4di, ((v4di) { 0, 2, 4, 6 }));
-UZP1 (v8si, ((v8si) { 0, 2, 4, 6, 8, 10, 12, 14 }));
-UZP1 (v16hi, ((v16hi) { 0, 2, 4, 6, 8, 10, 12, 14,
- 16, 18, 20, 22, 24, 26, 28, 30 }));
-UZP1 (v32qi, ((v32qi) { 0, 2, 4, 6, 8, 10, 12, 14,
- 16, 18, 20, 22, 24, 26, 28, 30,
- 32, 34, 36, 38, 40, 42, 44, 46,
- 48, 50, 52, 54, 56, 58, 60, 62 }));
-UZP1 (v4df, ((v4di) { 0, 2, 4, 6 }));
-UZP1 (v8sf, ((v8si) { 0, 2, 4, 6, 8, 10, 12, 14 }));
-UZP1 (v16hf, ((v16hi) { 0, 2, 4, 6, 8, 10, 12, 14,
- 16, 18, 20, 22, 24, 26, 28, 30 }));
+UZP1 (vnx2di, ((vnx2di) { 0, 2, 4, 6 }));
+UZP1 (vnx4si, ((vnx4si) { 0, 2, 4, 6, 8, 10, 12, 14 }));
+UZP1 (vnx8hi, ((vnx8hi) { 0, 2, 4, 6, 8, 10, 12, 14,
+ 16, 18, 20, 22, 24, 26, 28, 30 }));
+UZP1 (vnx16qi, ((vnx16qi) { 0, 2, 4, 6, 8, 10, 12, 14,
+ 16, 18, 20, 22, 24, 26, 28, 30,
+ 32, 34, 36, 38, 40, 42, 44, 46,
+ 48, 50, 52, 54, 56, 58, 60, 62 }));
+UZP1 (vnx2df, ((vnx2di) { 0, 2, 4, 6 }));
+UZP1 (vnx4sf, ((vnx4si) { 0, 2, 4, 6, 8, 10, 12, 14 }));
+UZP1 (vnx8hf, ((vnx8hi) { 0, 2, 4, 6, 8, 10, 12, 14,
+ 16, 18, 20, 22, 24, 26, 28, 30 }));
/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */
/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c
index d35dad0ffca..622f0d10f5f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c
@@ -16,48 +16,48 @@
int main (void)
{
- TEST_UZP1 (v4di,
- ((v4di) { 4, 6, 12, 36 }),
- ((v4di) { 4, 5, 6, 7 }),
- ((v4di) { 12, 24, 36, 48 }));
- TEST_UZP1 (v8si,
- ((v8si) { 3, 5, 7, 9, 33, 35, 37, 39 }),
- ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
- ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
- TEST_UZP1 (v16hi,
- ((v16hi) { 3, 5, 7, 9, 11, 13, 15, 17,
- 33, 35, 37, 39, 41, 43, 45, 47 }),
- ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18 }),
- ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48 }));
- TEST_UZP1 (v32qi,
- ((v32qi) { 4, 6, 4, 6, 4, 6, 4, 6,
- 4, 6, 4, 6, 4, 6, 4, 6,
- 12, 36, 12, 36, 12, 36, 12, 36,
- 12, 36, 12, 36, 12, 36, 12, 36 }),
- ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 }),
- ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48 }));
- TEST_UZP1 (v4df,
- ((v4df) { 4.0, 6.0, 12.0, 36.0 }),
- ((v4df) { 4.0, 5.0, 6.0, 7.0 }),
- ((v4df) { 12.0, 24.0, 36.0, 48.0 }));
- TEST_UZP1 (v8sf,
- ((v8sf) { 3.0, 5.0, 7.0, 9.0, 33.0, 35.0, 37.0, 39.0 }),
- ((v8sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }),
- ((v8sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 }));
- TEST_UZP1 (v16hf,
- ((v16hf) { 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0,
- 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0 }),
- ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
- 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
- ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
- 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
+ TEST_UZP1 (vnx2di,
+ ((vnx2di) { 4, 6, 12, 36 }),
+ ((vnx2di) { 4, 5, 6, 7 }),
+ ((vnx2di) { 12, 24, 36, 48 }));
+ TEST_UZP1 (vnx4si,
+ ((vnx4si) { 3, 5, 7, 9, 33, 35, 37, 39 }),
+ ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+ ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
+ TEST_UZP1 (vnx8hi,
+ ((vnx8hi) { 3, 5, 7, 9, 11, 13, 15, 17,
+ 33, 35, 37, 39, 41, 43, 45, 47 }),
+ ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18 }),
+ ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48 }));
+ TEST_UZP1 (vnx16qi,
+ ((vnx16qi) { 4, 6, 4, 6, 4, 6, 4, 6,
+ 4, 6, 4, 6, 4, 6, 4, 6,
+ 12, 36, 12, 36, 12, 36, 12, 36,
+ 12, 36, 12, 36, 12, 36, 12, 36 }),
+ ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7 }),
+ ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48 }));
+ TEST_UZP1 (vnx2df,
+ ((vnx2df) { 4.0, 6.0, 12.0, 36.0 }),
+ ((vnx2df) { 4.0, 5.0, 6.0, 7.0 }),
+ ((vnx2df) { 12.0, 24.0, 36.0, 48.0 }));
+ TEST_UZP1 (vnx4sf,
+ ((vnx4sf) { 3.0, 5.0, 7.0, 9.0, 33.0, 35.0, 37.0, 39.0 }),
+ ((vnx4sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }),
+ ((vnx4sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 }));
+ TEST_UZP1 (vnx8hf,
+ ((vnx8hf) { 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0,
+ 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0 }),
+ ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+ 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+ ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+ 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c
index 1bb84d80eb0..a9e4a63fb4d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define UZP2(TYPE, MASK) \
TYPE uzp2_##TYPE (TYPE values1, TYPE values2) \
@@ -17,18 +17,18 @@ TYPE uzp2_##TYPE (TYPE values1, TYPE values2) \
return __builtin_shuffle (values1, values2, MASK); \
}
-UZP2 (v4di, ((v4di) { 1, 3, 5, 7 }));
-UZP2 (v8si, ((v8si) { 1, 3, 5, 7, 9, 11, 13, 15 }));
-UZP2 (v16hi, ((v16hi) { 1, 3, 5, 7, 9, 11, 13, 15,
- 17, 19, 21, 23, 25, 27, 29, 31 }));
-UZP2 (v32qi, ((v32qi) { 1, 3, 5, 7, 9, 11, 13, 15,
- 17, 19, 21, 23, 25, 27, 29, 31,
- 33, 35, 37, 39, 41, 43, 45, 47,
- 49, 51, 53, 55, 57, 59, 61, 63 }));
-UZP2 (v4df, ((v4di) { 1, 3, 5, 7 }));
-UZP2 (v8sf, ((v8si) { 1, 3, 5, 7, 9, 11, 13, 15 }));
-UZP2 (v16hf, ((v16hi) { 1, 3, 5, 7, 9, 11, 13, 15,
- 17, 19, 21, 23, 25, 27, 29, 31 }));
+UZP2 (vnx2di, ((vnx2di) { 1, 3, 5, 7 }));
+UZP2 (vnx4si, ((vnx4si) { 1, 3, 5, 7, 9, 11, 13, 15 }));
+UZP2 (vnx8hi, ((vnx8hi) { 1, 3, 5, 7, 9, 11, 13, 15,
+ 17, 19, 21, 23, 25, 27, 29, 31 }));
+UZP2 (vnx16qi, ((vnx16qi) { 1, 3, 5, 7, 9, 11, 13, 15,
+ 17, 19, 21, 23, 25, 27, 29, 31,
+ 33, 35, 37, 39, 41, 43, 45, 47,
+ 49, 51, 53, 55, 57, 59, 61, 63 }));
+UZP2 (vnx2df, ((vnx2di) { 1, 3, 5, 7 }));
+UZP2 (vnx4sf, ((vnx4si) { 1, 3, 5, 7, 9, 11, 13, 15 }));
+UZP2 (vnx8hf, ((vnx8hi) { 1, 3, 5, 7, 9, 11, 13, 15,
+ 17, 19, 21, 23, 25, 27, 29, 31 }));
/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */
/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c
index d7a241c1258..05d82fe08c1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c
@@ -16,48 +16,48 @@
int main (void)
{
- TEST_UZP2 (v4di,
- ((v4di) { 5, 7, 24, 48 }),
- ((v4di) { 4, 5, 6, 7 }),
- ((v4di) { 12, 24, 36, 48 }));
- TEST_UZP2 (v8si,
- ((v8si) { 4, 6, 8, 10, 34, 36, 38, 40 }),
- ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
- ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
- TEST_UZP2 (v16hi,
- ((v16hi) { 4, 6, 8, 10, 12, 14, 16, 18,
- 34, 36, 38, 40, 42, 44, 46, 48 }),
- ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18 }),
- ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48 }));
- TEST_UZP2 (v32qi,
- ((v32qi) { 5, 7, 5, 7, 5, 7, 5, 7,
- 5, 7, 5, 7, 5, 7, 5, 7,
- 24, 48, 24, 48, 24, 48, 24, 48,
- 24, 48, 24, 48, 24, 48, 24, 48 }),
- ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 }),
- ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48 }));
- TEST_UZP2 (v4df,
- ((v4df) { 5.0, 7.0, 24.0, 48.0 }),
- ((v4df) { 4.0, 5.0, 6.0, 7.0 }),
- ((v4df) { 12.0, 24.0, 36.0, 48.0 }));
- TEST_UZP2 (v8sf,
- ((v8sf) { 4.0, 6.0, 8.0, 10.0, 34.0, 36.0, 38.0, 40.0 }),
- ((v8sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }),
- ((v8sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 }));
- TEST_UZP2 (v16hf,
- ((v16hf) { 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0,
- 34.0, 36.0, 38.0, 40.0, 42.0, 44.0, 46.0, 48.0 }),
- ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
- 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
- ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
- 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
+ TEST_UZP2 (vnx2di,
+ ((vnx2di) { 5, 7, 24, 48 }),
+ ((vnx2di) { 4, 5, 6, 7 }),
+ ((vnx2di) { 12, 24, 36, 48 }));
+ TEST_UZP2 (vnx4si,
+ ((vnx4si) { 4, 6, 8, 10, 34, 36, 38, 40 }),
+ ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+ ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
+ TEST_UZP2 (vnx8hi,
+ ((vnx8hi) { 4, 6, 8, 10, 12, 14, 16, 18,
+ 34, 36, 38, 40, 42, 44, 46, 48 }),
+ ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18 }),
+ ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48 }));
+ TEST_UZP2 (vnx16qi,
+ ((vnx16qi) { 5, 7, 5, 7, 5, 7, 5, 7,
+ 5, 7, 5, 7, 5, 7, 5, 7,
+ 24, 48, 24, 48, 24, 48, 24, 48,
+ 24, 48, 24, 48, 24, 48, 24, 48 }),
+ ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7 }),
+ ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48 }));
+ TEST_UZP2 (vnx2df,
+ ((vnx2df) { 5.0, 7.0, 24.0, 48.0 }),
+ ((vnx2df) { 4.0, 5.0, 6.0, 7.0 }),
+ ((vnx2df) { 12.0, 24.0, 36.0, 48.0 }));
+ TEST_UZP2 (vnx4sf,
+ ((vnx4sf) { 4.0, 6.0, 8.0, 10.0, 34.0, 36.0, 38.0, 40.0 }),
+ ((vnx4sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }),
+ ((vnx4sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 }));
+ TEST_UZP2 (vnx8hf,
+ ((vnx8hf) { 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0,
+ 34.0, 36.0, 38.0, 40.0, 42.0, 44.0, 46.0, 48.0 }),
+ ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+ 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+ ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+ 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c
index 958dce4262d..74acc7983b8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c
@@ -16,7 +16,7 @@ f (TYPE *x, TYPE *y, unsigned short n, unsigned short m)
/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
/* Should multiply by (257-1)*4 rather than (VF-1)*4. */
-/* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x[0-9]+, 10, 16} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x[0-9]+, x[0-9]+, lsl 10\n} 2 } } */
/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
/* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c
index 54d592d8ef1..f915e90b12e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c
@@ -16,7 +16,7 @@ f (TYPE *x, TYPE *y, int n, int m)
/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
/* Should multiply by (257-1)*4 rather than (VF-1)*4. */
-/* { dg-final { scan-assembler-times {\tsbfiz\tx[0-9]+, x[0-9]+, 10, 32} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tx[0-9]+, x[0-9]+, 10\n} 2 } } */
/* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */
/* { dg-final { scan-assembler {\tcmp\tw3, 0} } } */
/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C b/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C
index 9be09546c80..d0febc69533 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C
@@ -3,10 +3,10 @@
#include <stdint.h>
-typedef int8_t v32qi __attribute__((vector_size(32)));
-typedef int16_t v16hi __attribute__((vector_size(32)));
-typedef int32_t v8si __attribute__((vector_size(32)));
-typedef int64_t v4di __attribute__((vector_size(32)));
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
typedef uint8_t v32qu __attribute__((vector_size(32)));
typedef uint16_t v16hu __attribute__((vector_size(32)));
@@ -30,10 +30,10 @@ TYPE vcond_imm_##TYPE##_##SUFFIX (TYPE x, TYPE y, TYPE a) \
}
#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \
- T (v32qi, COND, SUFFIX) \
- T (v16hi, COND, SUFFIX) \
- T (v8si, COND, SUFFIX) \
- T (v4di, COND, SUFFIX)
+ T (vnx16qi, COND, SUFFIX) \
+ T (vnx8hi, COND, SUFFIX) \
+ T (vnx4si, COND, SUFFIX) \
+ T (vnx2di, COND, SUFFIX)
#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \
T (v32qu, COND, SUFFIX) \
@@ -54,10 +54,10 @@ TYPE vcond_imm_##TYPE##_##SUFFIX (TYPE x, TYPE y, TYPE a) \
TEST_COND_VAR_ALL (T, !=, ne)
#define TEST_COND_IMM_SIGNED_ALL(T, COND, IMM, SUFFIX) \
- T (v32qi, COND, IMM, SUFFIX) \
- T (v16hi, COND, IMM, SUFFIX) \
- T (v8si, COND, IMM, SUFFIX) \
- T (v4di, COND, IMM, SUFFIX)
+ T (vnx16qi, COND, IMM, SUFFIX) \
+ T (vnx8hi, COND, IMM, SUFFIX) \
+ T (vnx4si, COND, IMM, SUFFIX) \
+ T (vnx2di, COND, IMM, SUFFIX)
#define TEST_COND_IMM_UNSIGNED_ALL(T, COND, IMM, SUFFIX) \
T (v32qu, COND, IMM, SUFFIX) \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c
index 3b7c3e75775..d94cbb37b6a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c
@@ -1,57 +1,41 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include <stdint.h>
#include <stdbool.h>
-#define VEC_BOOL_CMPNE(VARTYPE, INDUCTYPE) \
-void \
-vec_bool_cmpne##VARTYPE##INDUCTYPE (VARTYPE *dst, VARTYPE *src, \
- INDUCTYPE start, INDUCTYPE n, \
- INDUCTYPE mask) \
+#define VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE) \
+void __attribute__ ((noinline, noclone)) \
+vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (VARTYPE *dst, VARTYPE *src, \
+ INDUCTYPE start, \
+ INDUCTYPE n, \
+ INDUCTYPE mask) \
{ \
- INDUCTYPE i; \
- for (i = 0; i < n; i++) \
+ for (INDUCTYPE i = 0; i < n; i++) \
{ \
bool lhs = i >= start; \
bool rhs = (i & mask) != 0x3D; \
- if (lhs != rhs) \
+ if (lhs OP rhs) \
dst[i] = src[i]; \
} \
}
-#define VEC_BOOL_CMPEQ(VARTYPE, INDUCTYPE) \
-void \
-vec_bool_cmpeq##VARTYPE##INDUCTYPE (VARTYPE *dst, VARTYPE *src, \
- INDUCTYPE start, INDUCTYPE n, \
- INDUCTYPE mask) \
-{ \
- INDUCTYPE i; \
- for (i = 0; i < n; i++) \
- { \
- bool lhs = i >= start; \
- bool rhs = (i & mask) != 0x3D; \
- if (lhs == rhs) \
- dst[i] = src[i]; \
- } \
-}
+#define TEST_OP(T, NAME, OP) \
+ T (NAME, OP, uint8_t, uint8_t) \
+ T (NAME, OP, uint16_t, uint16_t) \
+ T (NAME, OP, uint32_t, uint32_t) \
+ T (NAME, OP, uint64_t, uint64_t) \
+ T (NAME, OP, float, uint32_t) \
+ T (NAME, OP, double, uint64_t)
-VEC_BOOL_CMPNE (uint8_t, uint8_t)
-VEC_BOOL_CMPNE (uint16_t, uint16_t)
-VEC_BOOL_CMPNE (uint32_t, uint32_t)
-VEC_BOOL_CMPNE (uint64_t, uint64_t)
-VEC_BOOL_CMPNE (float, uint32_t)
-VEC_BOOL_CMPNE (double, uint64_t)
+#define TEST_ALL(T) \
+ TEST_OP (T, cmpeq, ==) \
+ TEST_OP (T, cmpne, !=)
-VEC_BOOL_CMPEQ (uint8_t, uint8_t)
-VEC_BOOL_CMPEQ (uint16_t, uint16_t)
-VEC_BOOL_CMPEQ (uint32_t, uint32_t)
-VEC_BOOL_CMPEQ (uint64_t, uint64_t)
-VEC_BOOL_CMPEQ (float, uint32_t)
-VEC_BOOL_CMPEQ (double, uint64_t)
+TEST_ALL (VEC_BOOL)
-/* Both CMPNE and CMPEQ loops will contain an exclusive predicate or. */
+/* Both cmpne and cmpeq loops will contain an exclusive predicate or. */
/* { dg-final { scan-assembler-times {\teors?\tp[0-9]*\.b, p[0-7]/z, p[0-9]*\.b, p[0-9]*\.b\n} 12 } } */
-/* CMPEQ will also contain a masked predicate not operation, which gets
+/* cmpeq will also contain a masked predicate not operation, which gets
folded to BIC. */
/* { dg-final { scan-assembler-times {\tbic\tp[0-9]+\.b, p[0-7]/z, p[0-9]+\.b, p[0-9]+\.b\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c
index 8c341c0e932..092aa386c60 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c
@@ -3,32 +3,9 @@
#include "sve_vec_bool_cmp_1.c"
-extern void abort (void);
-
#define N 103
-#define TEST_VEC_BOOL_CMPNE(VARTYPE,INDUCTYPE) \
-{ \
- INDUCTYPE i; \
- VARTYPE src[N]; \
- VARTYPE dst[N]; \
- for (i = 0; i < N; i++) \
- { \
- src[i] = i; \
- dst[i] = i * 2; \
- } \
- vec_bool_cmpne##VARTYPE##INDUCTYPE (dst, src, 13, 97, 0xFF); \
- for (i = 0; i < 13; i++) \
- if (dst[i] != i) \
- abort (); \
- for (i = 13; i < N; i++) \
- if (i != 0x3D && dst[i] != (i * 2)) \
- abort (); \
- else if (i == 0x3D && dst[i] != 0x3D) \
- abort (); \
-}
-
-#define TEST_VEC_BOOL_CMPEQ(VARTYPE,INDUCTYPE) \
+#define TEST_VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE) \
{ \
INDUCTYPE i; \
VARTYPE src[N]; \
@@ -37,36 +14,24 @@ extern void abort (void);
{ \
src[i] = i; \
dst[i] = i * 2; \
+ asm volatile ("" ::: "memory"); \
} \
- vec_bool_cmpeq##VARTYPE##INDUCTYPE (dst, src, 13, 97, 0xFF); \
+ vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (dst, src, 13, \
+ 97, 0xFF); \
for (i = 0; i < 13; i++) \
- if (dst[i] != (i * 2)) \
- abort (); \
+ if (dst[i] != (VARTYPE) (0 OP 1 ? i : i * 2)) \
+ __builtin_abort (); \
for (i = 13; i < 97; i++) \
- if (i != 0x3D && dst[i] != i) \
- abort (); \
- else if (i == 0x3D && dst[i] != (0x3D) * 2) \
- abort (); \
+ if (dst[i] != (VARTYPE) (1 OP (i != 0x3D) ? i : i * 2)) \
+ __builtin_abort (); \
for (i = 97; i < N; i++) \
if (dst[i] != (i * 2)) \
- abort (); \
+ __builtin_abort (); \
}
-int main ()
+int __attribute__ ((optimize (1)))
+main ()
{
- TEST_VEC_BOOL_CMPNE (uint8_t, uint8_t);
- TEST_VEC_BOOL_CMPNE (uint16_t, uint16_t);
- TEST_VEC_BOOL_CMPNE (uint32_t, uint32_t);
- TEST_VEC_BOOL_CMPNE (uint64_t, uint64_t);
- TEST_VEC_BOOL_CMPNE (float, uint32_t);
- TEST_VEC_BOOL_CMPNE (double, uint64_t);
-
- TEST_VEC_BOOL_CMPEQ (uint8_t, uint8_t);
- TEST_VEC_BOOL_CMPEQ (uint16_t, uint16_t);
- TEST_VEC_BOOL_CMPEQ (uint32_t, uint32_t);
- TEST_VEC_BOOL_CMPEQ (uint64_t, uint64_t);
- TEST_VEC_BOOL_CMPEQ (float, uint32_t);
- TEST_VEC_BOOL_CMPEQ (double, uint64_t);
-
+ TEST_ALL (TEST_VEC_BOOL)
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c
index 3d5b584e9e5..95b278e58f5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c
@@ -1,10 +1,10 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */
-typedef unsigned int v8si __attribute__ ((vector_size(32)));
+typedef unsigned int vnx4si __attribute__ ((vector_size(32)));
void
-f (v8si *ptr, int x)
+f (vnx4si *ptr, int x)
{
- *ptr += (v8si) { x, x, 1, 2, 3, x, x, 4 };
+ *ptr += (vnx4si) { x, x, 1, 2, 3, x, x, 4 };
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c
index ae8542f2c75..31283fcf424 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define VEC_PERM(TYPE, MASKTYPE) \
TYPE __attribute__ ((noinline, noclone)) \
@@ -18,13 +18,13 @@ vec_perm_##TYPE (TYPE values1, TYPE values2, MASKTYPE mask) \
return __builtin_shuffle (values1, values2, mask); \
}
-VEC_PERM (v4di, v4di);
-VEC_PERM (v8si, v8si);
-VEC_PERM (v16hi, v16hi);
-VEC_PERM (v32qi, v32qi);
-VEC_PERM (v4df, v4di);
-VEC_PERM (v8sf, v8si);
-VEC_PERM (v16hf, v16hi);
+VEC_PERM (vnx2di, vnx2di);
+VEC_PERM (vnx4si, vnx4si);
+VEC_PERM (vnx8hi, vnx8hi);
+VEC_PERM (vnx16qi, vnx16qi);
+VEC_PERM (vnx2df, vnx2di);
+VEC_PERM (vnx4sf, vnx4si);
+VEC_PERM (vnx8hf, vnx8hi);
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c
index 6ab82250d4c..1b98389d996 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c
@@ -19,93 +19,93 @@
int main (void)
{
- TEST_VEC_PERM (v4di, v4di,
- ((v4di) { 5, 36, 7, 48 }),
- ((v4di) { 4, 5, 6, 7 }),
- ((v4di) { 12, 24, 36, 48 }),
- ((v4di) { 1 + (8 * 1), 6 + (8 * 3),
- 3 + (8 * 1), 7 + (8 * 5) }));
- TEST_VEC_PERM (v8si, v8si,
- ((v8si) { 34, 38, 40, 10, 9, 8, 7, 35 }),
- ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
- ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }),
- ((v8si) { 9 + (16 * 2), 13 + (16 * 5),
- 15 + (16 * 1), 7 + (16 * 0),
- 6 + (16 * 8), 5 + (16 * 2),
- 4 + (16 * 3), 10 + (16 * 2) }));
- TEST_VEC_PERM (v16hi, v16hi,
- ((v16hi) { 12, 16, 18, 10, 42, 43, 44, 34,
- 7, 48, 3, 35, 9, 8, 7, 13 }),
- ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18 }),
- ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48 }),
- ((v16hi) { 9 + (32 * 2), 13 + (32 * 2),
- 15 + (32 * 8), 7 + (32 * 9),
- 25 + (32 * 4), 26 + (32 * 3),
- 27 + (32 * 1), 17 + (32 * 2),
- 4 + (32 * 6), 31 + (32 * 7),
- 0 + (32 * 8), 18 + (32 * 9),
- 6 + (32 * 6), 5 + (32 * 7),
- 4 + (32 * 2), 10 + (32 * 2) }));
- TEST_VEC_PERM (v32qi, v32qi,
- ((v32qi) { 5, 6, 7, 4, 5, 6, 4, 5,
- 6, 7, 12, 24, 36, 48, 12, 24,
- 5, 6, 7, 4, 5, 6, 4, 5,
- 6, 7, 12, 24, 36, 48, 12, 24 }),
- ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 }),
- ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48 }),
- ((v32qi) { 5 + (64 * 3), 6 + (64 * 1),
- 7 + (64 * 2), 8 + (64 * 1),
- 9 + (64 * 3), 10 + (64 * 1),
- 28 + (64 * 3), 29 + (64 * 3),
- 30 + (64 * 1), 31 + (64 * 1),
- 32 + (64 * 3), 33 + (64 * 2),
- 54 + (64 * 2), 55 + (64 * 2),
- 56 + (64 * 1), 61 + (64 * 2),
- 5 + (64 * 2), 6 + (64 * 1),
- 7 + (64 * 2), 8 + (64 * 2),
- 9 + (64 * 2), 10 + (64 * 1),
- 28 + (64 * 3), 29 + (64 * 1),
- 30 + (64 * 3), 31 + (64 * 3),
- 32 + (64 * 1), 33 + (64 * 1),
- 54 + (64 * 2), 55 + (64 * 2),
- 56 + (64 * 2), 61 + (64 * 2) }));
- TEST_VEC_PERM (v4df, v4di,
- ((v4df) { 5.1, 36.1, 7.1, 48.1 }),
- ((v4df) { 4.1, 5.1, 6.1, 7.1 }),
- ((v4df) { 12.1, 24.1, 36.1, 48.1 }),
- ((v4di) { 1 + (8 * 3), 6 + (8 * 10),
- 3 + (8 * 8), 7 + (8 * 2) }));
- TEST_VEC_PERM (v8sf, v8si,
- ((v8sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }),
- ((v8sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
- ((v8sf) { 33.2, 34.2, 35.2, 36.2,
- 37.2, 38.2, 39.2, 40.2 }),
- ((v8si) { 9 + (16 * 1), 13 + (16 * 5),
- 15 + (16 * 4), 7 + (16 * 4),
- 6 + (16 * 3), 5 + (16 * 2),
- 4 + (16 * 1), 10 + (16 * 0) }));
- TEST_VEC_PERM (v16hf, v16hi,
- ((v16hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0,
- 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }),
- ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
- 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
- ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
- 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }),
- ((v16hi) { 9 + (32 * 2), 13 + (32 * 2),
- 15 + (32 * 8), 7 + (32 * 9),
- 25 + (32 * 4), 26 + (32 * 3),
- 27 + (32 * 1), 17 + (32 * 2),
- 4 + (32 * 6), 31 + (32 * 7),
- 0 + (32 * 8), 18 + (32 * 9),
- 6 + (32 * 6), 5 + (32 * 7),
- 4 + (32 * 2), 10 + (32 * 2) }));
+ TEST_VEC_PERM (vnx2di, vnx2di,
+ ((vnx2di) { 5, 36, 7, 48 }),
+ ((vnx2di) { 4, 5, 6, 7 }),
+ ((vnx2di) { 12, 24, 36, 48 }),
+ ((vnx2di) { 1 + (8 * 1), 6 + (8 * 3),
+ 3 + (8 * 1), 7 + (8 * 5) }));
+ TEST_VEC_PERM (vnx4si, vnx4si,
+ ((vnx4si) { 34, 38, 40, 10, 9, 8, 7, 35 }),
+ ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+ ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }),
+ ((vnx4si) { 9 + (16 * 2), 13 + (16 * 5),
+ 15 + (16 * 1), 7 + (16 * 0),
+ 6 + (16 * 8), 5 + (16 * 2),
+ 4 + (16 * 3), 10 + (16 * 2) }));
+ TEST_VEC_PERM (vnx8hi, vnx8hi,
+ ((vnx8hi) { 12, 16, 18, 10, 42, 43, 44, 34,
+ 7, 48, 3, 35, 9, 8, 7, 13 }),
+ ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18 }),
+ ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48 }),
+ ((vnx8hi) { 9 + (32 * 2), 13 + (32 * 2),
+ 15 + (32 * 8), 7 + (32 * 9),
+ 25 + (32 * 4), 26 + (32 * 3),
+ 27 + (32 * 1), 17 + (32 * 2),
+ 4 + (32 * 6), 31 + (32 * 7),
+ 0 + (32 * 8), 18 + (32 * 9),
+ 6 + (32 * 6), 5 + (32 * 7),
+ 4 + (32 * 2), 10 + (32 * 2) }));
+ TEST_VEC_PERM (vnx16qi, vnx16qi,
+ ((vnx16qi) { 5, 6, 7, 4, 5, 6, 4, 5,
+ 6, 7, 12, 24, 36, 48, 12, 24,
+ 5, 6, 7, 4, 5, 6, 4, 5,
+ 6, 7, 12, 24, 36, 48, 12, 24 }),
+ ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7 }),
+ ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48 }),
+ ((vnx16qi) { 5 + (64 * 3), 6 + (64 * 1),
+ 7 + (64 * 2), 8 + (64 * 1),
+ 9 + (64 * 3), 10 + (64 * 1),
+ 28 + (64 * 3), 29 + (64 * 3),
+ 30 + (64 * 1), 31 + (64 * 1),
+ 32 + (64 * 3), 33 + (64 * 2),
+ 54 + (64 * 2), 55 + (64 * 2),
+ 56 + (64 * 1), 61 + (64 * 2),
+ 5 + (64 * 2), 6 + (64 * 1),
+ 7 + (64 * 2), 8 + (64 * 2),
+ 9 + (64 * 2), 10 + (64 * 1),
+ 28 + (64 * 3), 29 + (64 * 1),
+ 30 + (64 * 3), 31 + (64 * 3),
+ 32 + (64 * 1), 33 + (64 * 1),
+ 54 + (64 * 2), 55 + (64 * 2),
+ 56 + (64 * 2), 61 + (64 * 2) }));
+ TEST_VEC_PERM (vnx2df, vnx2di,
+ ((vnx2df) { 5.1, 36.1, 7.1, 48.1 }),
+ ((vnx2df) { 4.1, 5.1, 6.1, 7.1 }),
+ ((vnx2df) { 12.1, 24.1, 36.1, 48.1 }),
+ ((vnx2di) { 1 + (8 * 3), 6 + (8 * 10),
+ 3 + (8 * 8), 7 + (8 * 2) }));
+ TEST_VEC_PERM (vnx4sf, vnx4si,
+ ((vnx4sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }),
+ ((vnx4sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
+ ((vnx4sf) { 33.2, 34.2, 35.2, 36.2,
+ 37.2, 38.2, 39.2, 40.2 }),
+ ((vnx4si) { 9 + (16 * 1), 13 + (16 * 5),
+ 15 + (16 * 4), 7 + (16 * 4),
+ 6 + (16 * 3), 5 + (16 * 2),
+ 4 + (16 * 1), 10 + (16 * 0) }));
+ TEST_VEC_PERM (vnx8hf, vnx8hi,
+ ((vnx8hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0,
+ 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }),
+ ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+ 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+ ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+ 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }),
+ ((vnx8hi) { 9 + (32 * 2), 13 + (32 * 2),
+ 15 + (32 * 8), 7 + (32 * 9),
+ 25 + (32 * 4), 26 + (32 * 3),
+ 27 + (32 * 1), 17 + (32 * 2),
+ 4 + (32 * 6), 31 + (32 * 7),
+ 0 + (32 * 8), 18 + (32 * 9),
+ 6 + (32 * 6), 5 + (32 * 7),
+ 4 + (32 * 2), 10 + (32 * 2) }));
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c
index 4d46ff02192..a551ffa9b49 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c
@@ -19,61 +19,61 @@
int main (void)
{
- TEST_VEC_PERM (v4di, v4di,
- ((v4di) { 5, 36, 7, 48 }),
- ((v4di) { 4, 5, 6, 7 }),
- ((v4di) { 12, 24, 36, 48 }),
- ((v4di) { 1, 6, 3, 7 }));
- TEST_VEC_PERM (v8si, v8si,
- ((v8si) { 34, 38, 40, 10, 9, 8, 7, 35 }),
- ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
- ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }),
- ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
- TEST_VEC_PERM (v16hi, v16hi,
- ((v16hi) { 12, 16, 18, 10, 42, 43, 44, 34,
- 7, 48, 3, 35, 9, 8, 7, 13 }),
- ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18 }),
- ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48 }),
- ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17,
- 4, 31, 0, 18, 6, 5, 4, 10 }));
- TEST_VEC_PERM (v32qi, v32qi,
- ((v32qi) { 5, 6, 7, 4, 5, 6, 4, 5,
- 6, 7, 12, 24, 36, 48, 12, 24,
- 5, 6, 7, 4, 5, 6, 4, 5,
- 6, 7, 12, 24, 36, 48, 12, 24 }),
- ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 }),
- ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48 }),
- ((v32qi) { 5, 6, 7, 8, 9, 10, 28, 29,
- 30, 31, 32, 33, 54, 55, 56, 61,
- 5, 6, 7, 8, 9, 10, 28, 29,
- 30, 31, 32, 33, 54, 55, 56, 61 }));
- TEST_VEC_PERM (v4df, v4di,
- ((v4df) { 5.1, 36.1, 7.1, 48.1 }),
- ((v4df) { 4.1, 5.1, 6.1, 7.1 }),
- ((v4df) { 12.1, 24.1, 36.1, 48.1 }),
- ((v4di) { 1, 6, 3, 7 }));
- TEST_VEC_PERM (v8sf, v8si,
- ((v8sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }),
- ((v8sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
- ((v8sf) { 33.2, 34.2, 35.2, 36.2,
- 37.2, 38.2, 39.2, 40.2 }),
- ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
- TEST_VEC_PERM (v16hf, v16hi,
- ((v16hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0,
- 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }),
- ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
- 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
- ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
- 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }),
- ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17,
- 4, 31, 0, 18, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx2di, vnx2di,
+ ((vnx2di) { 5, 36, 7, 48 }),
+ ((vnx2di) { 4, 5, 6, 7 }),
+ ((vnx2di) { 12, 24, 36, 48 }),
+ ((vnx2di) { 1, 6, 3, 7 }));
+ TEST_VEC_PERM (vnx4si, vnx4si,
+ ((vnx4si) { 34, 38, 40, 10, 9, 8, 7, 35 }),
+ ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+ ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }),
+ ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx8hi, vnx8hi,
+ ((vnx8hi) { 12, 16, 18, 10, 42, 43, 44, 34,
+ 7, 48, 3, 35, 9, 8, 7, 13 }),
+ ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18 }),
+ ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48 }),
+ ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17,
+ 4, 31, 0, 18, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx16qi, vnx16qi,
+ ((vnx16qi) { 5, 6, 7, 4, 5, 6, 4, 5,
+ 6, 7, 12, 24, 36, 48, 12, 24,
+ 5, 6, 7, 4, 5, 6, 4, 5,
+ 6, 7, 12, 24, 36, 48, 12, 24 }),
+ ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7 }),
+ ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48 }),
+ ((vnx16qi) { 5, 6, 7, 8, 9, 10, 28, 29,
+ 30, 31, 32, 33, 54, 55, 56, 61,
+ 5, 6, 7, 8, 9, 10, 28, 29,
+ 30, 31, 32, 33, 54, 55, 56, 61 }));
+ TEST_VEC_PERM (vnx2df, vnx2di,
+ ((vnx2df) { 5.1, 36.1, 7.1, 48.1 }),
+ ((vnx2df) { 4.1, 5.1, 6.1, 7.1 }),
+ ((vnx2df) { 12.1, 24.1, 36.1, 48.1 }),
+ ((vnx2di) { 1, 6, 3, 7 }));
+ TEST_VEC_PERM (vnx4sf, vnx4si,
+ ((vnx4sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }),
+ ((vnx4sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
+ ((vnx4sf) { 33.2, 34.2, 35.2, 36.2,
+ 37.2, 38.2, 39.2, 40.2 }),
+ ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx8hf, vnx8hi,
+ ((vnx8hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0,
+ 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }),
+ ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+ 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+ ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+ 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }),
+ ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17,
+ 4, 31, 0, 18, 6, 5, 4, 10 }));
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c
index 31cff7ab113..4c3df975bab 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_reverse_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
{ \
for (int i = 0; i < n; ++i) \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c
index 342b1ddb44d..9a9300509ab 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c
@@ -9,7 +9,10 @@
{ \
TYPE a[N], b[N]; \
for (unsigned int i = 0; i < N; ++i) \
- b[i] = i * 2 + i % 5; \
+ { \
+ b[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
vec_reverse_##TYPE (a, b, N); \
for (unsigned int i = 0; i < N; ++i) \
{ \
@@ -19,7 +22,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c
index 4f70abd35e5..8b4901b1014 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_zip_##TYPE (TYPE *restrict a, TYPE *restrict b, \
TYPE *restrict c, long n) \
{ \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c
index 14d66f99383..c47b4050ae2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c
@@ -12,6 +12,7 @@
{ \
b[i] = i * 2 + i % 5; \
c[i] = i * 3; \
+ asm volatile ("" ::: "memory"); \
} \
vec_zip_##TYPE (a, b, c, N / 8); \
for (unsigned int i = 0; i < N / 2; ++i) \
@@ -23,7 +24,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c
index 5fbd59f08bd..c08ad23868c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_uzp_##TYPE (TYPE *restrict a, TYPE *restrict b, \
TYPE *restrict c, long n) \
{ \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c
index 404429208a0..a096b6c5353 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c
@@ -9,7 +9,10 @@
{ \
TYPE a[N], b[N], c[N]; \
for (unsigned int i = 0; i < N; ++i) \
- c[i] = i * 2 + i % 5; \
+ { \
+ c[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
vec_uzp_##TYPE (a, b, c, N / 8); \
for (unsigned int i = 0; i < N; ++i) \
{ \
@@ -19,7 +22,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c
index e76b3bc5abb..7b470cb04e2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define VEC_PERM_CONST(TYPE, MASK) \
TYPE __attribute__ ((noinline, noclone)) \
@@ -18,18 +18,18 @@ vec_perm_##TYPE (TYPE values1, TYPE values2) \
return __builtin_shuffle (values1, values2, MASK); \
}
-VEC_PERM_CONST (v4di, ((v4di) { 4, 3, 6, 1 }));
-VEC_PERM_CONST (v8si, ((v8si) { 3, 9, 11, 12, 2, 4, 4, 2 }));
-VEC_PERM_CONST (v16hi, ((v16hi) { 8, 27, 5, 4, 21, 12, 13, 0,
- 22, 1, 8, 9, 3, 24, 15, 1 }));
-VEC_PERM_CONST (v32qi, ((v32qi) { 13, 31, 11, 2, 48, 28, 3, 4,
- 54, 11, 30, 1, 0, 61, 2, 3,
- 4, 5, 11, 63, 24, 11, 42, 39,
- 2, 57, 22, 11, 6, 16, 18, 21 }));
-VEC_PERM_CONST (v4df, ((v4di) { 7, 3, 2, 1 }));
-VEC_PERM_CONST (v8sf, ((v8si) { 1, 9, 13, 11, 2, 5, 4, 2 }));
-VEC_PERM_CONST (v16hf, ((v16hi) { 8, 27, 5, 4, 21, 12, 13, 0,
- 22, 1, 8, 9, 3, 24, 15, 1 }));
+VEC_PERM_CONST (vnx2di, ((vnx2di) { 4, 3, 6, 1 }));
+VEC_PERM_CONST (vnx4si, ((vnx4si) { 3, 9, 11, 12, 2, 4, 4, 2 }));
+VEC_PERM_CONST (vnx8hi, ((vnx8hi) { 8, 27, 5, 4, 21, 12, 13, 0,
+ 22, 1, 8, 9, 3, 24, 15, 1 }));
+VEC_PERM_CONST (vnx16qi, ((vnx16qi) { 13, 31, 11, 2, 48, 28, 3, 4,
+ 54, 11, 30, 1, 0, 61, 2, 3,
+ 4, 5, 11, 63, 24, 11, 42, 39,
+ 2, 57, 22, 11, 6, 16, 18, 21 }));
+VEC_PERM_CONST (vnx2df, ((vnx2di) { 7, 3, 2, 1 }));
+VEC_PERM_CONST (vnx4sf, ((vnx4si) { 1, 9, 13, 11, 2, 5, 4, 2 }));
+VEC_PERM_CONST (vnx8hf, ((vnx8hi) { 8, 27, 5, 4, 21, 12, 13, 0,
+ 22, 1, 8, 9, 3, 24, 15, 1 }));
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c
index b4f82091f7c..d397c3d6670 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define VEC_PERM_CONST_OVERRUN(TYPE, MASK) \
TYPE vec_perm_overrun_##TYPE (TYPE values1, TYPE values2) \
@@ -17,50 +17,50 @@ TYPE vec_perm_overrun_##TYPE (TYPE values1, TYPE values2) \
return __builtin_shuffle (values1, values2, MASK); \
}
-VEC_PERM_CONST_OVERRUN (v4di, ((v4di) { 4 + (8 * 1), 3 + (8 * 1),
- 6 + (8 * 2), 1 + (8 * 3) }));
-VEC_PERM_CONST_OVERRUN (v8si, ((v8si) { 3 + (16 * 3), 9 + (16 * 4),
- 11 + (16 * 5), 12 + (16 * 3),
- 2 + (16 * 2), 4 + (16 * 1),
- 4 + (16 * 2), 2 + (16 * 1) }));
-VEC_PERM_CONST_OVERRUN (v16hi, ((v16hi) { 8 + (32 * 3), 27 + (32 * 1),
- 5 + (32 * 3), 4 + (32 * 3),
- 21 + (32 * 1), 12 + (32 * 3),
- 13 + (32 * 3), 0 + (32 * 1),
- 22 + (32 * 2), 1 + (32 * 2),
- 8 + (32 * 2), 9 + (32 * 1),
- 3 + (32 * 2), 24 + (32 * 2),
- 15 + (32 * 1), 1 + (32 * 1) }));
-VEC_PERM_CONST_OVERRUN (v32qi, ((v32qi) { 13 + (64 * 2), 31 + (64 * 2),
- 11 + (64 * 2), 2 + (64 * 1),
- 48 + (64 * 1), 28 + (64 * 2),
- 3 + (64 * 2), 4 + (64 * 3),
- 54 + (64 * 1), 11 + (64 * 2),
- 30 + (64 * 2), 1 + (64 * 1),
- 0 + (64 * 1), 61 + (64 * 2),
- 2 + (64 * 3), 3 + (64 * 2),
- 4 + (64 * 3), 5 + (64 * 3),
- 11 + (64 * 3), 63 + (64 * 1),
- 24 + (64 * 1), 11 + (64 * 3),
- 42 + (64 * 3), 39 + (64 * 2),
- 2 + (64 * 2), 57 + (64 * 3),
- 22 + (64 * 3), 11 + (64 * 2),
- 6 + (64 * 2), 16 + (64 * 2),
- 18 + (64 * 2), 21 + (64 * 3) }));
-VEC_PERM_CONST_OVERRUN (v4df, ((v4di) { 7 + (8 * 1), 3 + (8 * 3),
- 2 + (8 * 5), 1 + (8 * 3) }));
-VEC_PERM_CONST_OVERRUN (v8sf, ((v8si) { 1 + (16 * 1), 9 + (16 * 2),
- 13 + (16 * 2), 11 + (16 * 3),
- 2 + (16 * 2), 5 + (16 * 2),
- 4 + (16 * 4), 2 + (16 * 3) }));
-VEC_PERM_CONST_OVERRUN (v16hf, ((v16hi) { 8 + (32 * 3), 27 + (32 * 1),
- 5 + (32 * 3), 4 + (32 * 3),
- 21 + (32 * 1), 12 + (32 * 3),
- 13 + (32 * 3), 0 + (32 * 1),
- 22 + (32 * 2), 1 + (32 * 2),
- 8 + (32 * 2), 9 + (32 * 1),
- 3 + (32 * 2), 24 + (32 * 2),
- 15 + (32 * 1), 1 + (32 * 1) }));
+VEC_PERM_CONST_OVERRUN (vnx2di, ((vnx2di) { 4 + (8 * 1), 3 + (8 * 1),
+ 6 + (8 * 2), 1 + (8 * 3) }));
+VEC_PERM_CONST_OVERRUN (vnx4si, ((vnx4si) { 3 + (16 * 3), 9 + (16 * 4),
+ 11 + (16 * 5), 12 + (16 * 3),
+ 2 + (16 * 2), 4 + (16 * 1),
+ 4 + (16 * 2), 2 + (16 * 1) }));
+VEC_PERM_CONST_OVERRUN (vnx8hi, ((vnx8hi) { 8 + (32 * 3), 27 + (32 * 1),
+ 5 + (32 * 3), 4 + (32 * 3),
+ 21 + (32 * 1), 12 + (32 * 3),
+ 13 + (32 * 3), 0 + (32 * 1),
+ 22 + (32 * 2), 1 + (32 * 2),
+ 8 + (32 * 2), 9 + (32 * 1),
+ 3 + (32 * 2), 24 + (32 * 2),
+ 15 + (32 * 1), 1 + (32 * 1) }));
+VEC_PERM_CONST_OVERRUN (vnx16qi, ((vnx16qi) { 13 + (64 * 2), 31 + (64 * 2),
+ 11 + (64 * 2), 2 + (64 * 1),
+ 48 + (64 * 1), 28 + (64 * 2),
+ 3 + (64 * 2), 4 + (64 * 3),
+ 54 + (64 * 1), 11 + (64 * 2),
+ 30 + (64 * 2), 1 + (64 * 1),
+ 0 + (64 * 1), 61 + (64 * 2),
+ 2 + (64 * 3), 3 + (64 * 2),
+ 4 + (64 * 3), 5 + (64 * 3),
+ 11 + (64 * 3), 63 + (64 * 1),
+ 24 + (64 * 1), 11 + (64 * 3),
+ 42 + (64 * 3), 39 + (64 * 2),
+ 2 + (64 * 2), 57 + (64 * 3),
+ 22 + (64 * 3), 11 + (64 * 2),
+ 6 + (64 * 2), 16 + (64 * 2),
+ 18 + (64 * 2), 21 + (64 * 3) }));
+VEC_PERM_CONST_OVERRUN (vnx2df, ((vnx2di) { 7 + (8 * 1), 3 + (8 * 3),
+ 2 + (8 * 5), 1 + (8 * 3) }));
+VEC_PERM_CONST_OVERRUN (vnx4sf, ((vnx4si) { 1 + (16 * 1), 9 + (16 * 2),
+ 13 + (16 * 2), 11 + (16 * 3),
+ 2 + (16 * 2), 5 + (16 * 2),
+ 4 + (16 * 4), 2 + (16 * 3) }));
+VEC_PERM_CONST_OVERRUN (vnx8hf, ((vnx8hi) { 8 + (32 * 3), 27 + (32 * 1),
+ 5 + (32 * 3), 4 + (32 * 3),
+ 21 + (32 * 1), 12 + (32 * 3),
+ 13 + (32 * 3), 0 + (32 * 1),
+ 22 + (32 * 2), 1 + (32 * 2),
+ 8 + (32 * 2), 9 + (32 * 1),
+ 3 + (32 * 2), 24 + (32 * 2),
+ 15 + (32 * 1), 1 + (32 * 1) }));
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c
index 7324c1da0a4..a0214880dbe 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c
@@ -22,49 +22,49 @@
int main (void)
{
- TEST_VEC_PERM (v4di,
- ((v4di) { 12, 7, 36, 5 }),
- ((v4di) { 4, 5, 6, 7 }),
- ((v4di) { 12, 24, 36, 48 }));
- TEST_VEC_PERM (v8si,
- ((v8si) { 6, 34, 36, 37, 5, 7, 7, 5 }),
- ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
- ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
- TEST_VEC_PERM (v16hi,
- ((v16hi) { 11, 44, 8, 7, 38, 15, 16, 3,
- 39, 4, 11, 12, 6, 41, 18, 4 }),
- ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18 }),
- ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48 }));
- TEST_VEC_PERM (v32qi,
- ((v32qi) { 5, 7, 7, 6, 12, 4, 7, 4,
- 36, 7, 6, 5, 4, 24, 6, 7,
- 4, 5, 7, 48, 4, 7, 36, 48,
- 6, 24, 6, 7, 6, 4, 6, 5 }),
- ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 }),
- ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48 }));
- TEST_VEC_PERM (v4df,
- ((v4df) { 48.5, 7.5, 6.5, 5.5 }),
- ((v4df) { 4.5, 5.5, 6.5, 7.5 }),
- ((v4df) { 12.5, 24.5, 36.5, 48.5 }));
- TEST_VEC_PERM (v8sf,
- ((v8sf) { 4.5, 34.5, 38.5, 36.5, 5.5, 8.5, 7.5, 5.5 }),
- ((v8sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }),
- ((v8sf) { 33.5, 34.5, 35.5, 36.5,
- 37.5, 38.5, 39.5, 40.5 }));
- TEST_VEC_PERM (v16hf,
- ((v16hf) { 11.0, 44.0, 8.0, 7.0, 38.0, 15.0, 16.0, 3.0,
- 39.0, 4.0, 11.0, 12.0, 6.0, 41.0, 18.0, 4.0 }),
- ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
- 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
- ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
- 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
+ TEST_VEC_PERM (vnx2di,
+ ((vnx2di) { 12, 7, 36, 5 }),
+ ((vnx2di) { 4, 5, 6, 7 }),
+ ((vnx2di) { 12, 24, 36, 48 }));
+ TEST_VEC_PERM (vnx4si,
+ ((vnx4si) { 6, 34, 36, 37, 5, 7, 7, 5 }),
+ ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+ ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
+ TEST_VEC_PERM (vnx8hi,
+ ((vnx8hi) { 11, 44, 8, 7, 38, 15, 16, 3,
+ 39, 4, 11, 12, 6, 41, 18, 4 }),
+ ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, 16, 17, 18 }),
+ ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48 }));
+ TEST_VEC_PERM (vnx16qi,
+ ((vnx16qi) { 5, 7, 7, 6, 12, 4, 7, 4,
+ 36, 7, 6, 5, 4, 24, 6, 7,
+ 4, 5, 7, 48, 4, 7, 36, 48,
+ 6, 24, 6, 7, 6, 4, 6, 5 }),
+ ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7 }),
+ ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48 }));
+ TEST_VEC_PERM (vnx2df,
+ ((vnx2df) { 48.5, 7.5, 6.5, 5.5 }),
+ ((vnx2df) { 4.5, 5.5, 6.5, 7.5 }),
+ ((vnx2df) { 12.5, 24.5, 36.5, 48.5 }));
+ TEST_VEC_PERM (vnx4sf,
+ ((vnx4sf) { 4.5, 34.5, 38.5, 36.5, 5.5, 8.5, 7.5, 5.5 }),
+ ((vnx4sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }),
+ ((vnx4sf) { 33.5, 34.5, 35.5, 36.5,
+ 37.5, 38.5, 39.5, 40.5 }));
+ TEST_VEC_PERM (vnx8hf,
+ ((vnx8hf) { 11.0, 44.0, 8.0, 7.0, 38.0, 15.0, 16.0, 3.0,
+ 39.0, 4.0, 11.0, 12.0, 6.0, 41.0, 18.0, 4.0 }),
+ ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
+ 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+ ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+ 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c
index a4efb4fea79..beabf272f11 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define VEC_PERM_SINGLE(TYPE, MASK) \
TYPE vec_perm_##TYPE (TYPE values1, TYPE values2) \
@@ -17,18 +17,18 @@ TYPE vec_perm_##TYPE (TYPE values1, TYPE values2) \
return __builtin_shuffle (values1, values2, MASK); \
}
-VEC_PERM_SINGLE (v4di, ((v4di) { 0, 3, 2, 1 }));
-VEC_PERM_SINGLE (v8si, ((v8si) { 3, 7, 1, 0, 2, 4, 4, 2 }));
-VEC_PERM_SINGLE (v16hi, ((v16hi) { 8, 7, 5, 4, 11, 12, 13, 0,
- 1, 1, 8, 9, 3, 14, 15, 1 }));
-VEC_PERM_SINGLE (v32qi, ((v32qi) { 13, 21, 11, 2, 8, 28, 3, 4,
- 14, 11, 30, 1, 0, 31, 2, 3,
- 4, 5, 11, 23, 24, 11, 12, 9,
- 2, 7, 22, 11, 6, 16, 18, 21 }));
-VEC_PERM_SINGLE (v4df, ((v4di) { 3, 3, 1, 1 }));
-VEC_PERM_SINGLE (v8sf, ((v8si) { 4, 5, 6, 0, 2, 7, 4, 2 }));
-VEC_PERM_SINGLE (v16hf, ((v16hi) { 8, 7, 5, 4, 11, 12, 13, 0,
- 1, 1, 8, 9, 3, 14, 15, 1 }));
+VEC_PERM_SINGLE (vnx2di, ((vnx2di) { 0, 3, 2, 1 }));
+VEC_PERM_SINGLE (vnx4si, ((vnx4si) { 3, 7, 1, 0, 2, 4, 4, 2 }));
+VEC_PERM_SINGLE (vnx8hi, ((vnx8hi) { 8, 7, 5, 4, 11, 12, 13, 0,
+ 1, 1, 8, 9, 3, 14, 15, 1 }));
+VEC_PERM_SINGLE (vnx16qi, ((vnx16qi) { 13, 21, 11, 2, 8, 28, 3, 4,
+ 14, 11, 30, 1, 0, 31, 2, 3,
+ 4, 5, 11, 23, 24, 11, 12, 9,
+ 2, 7, 22, 11, 6, 16, 18, 21 }));
+VEC_PERM_SINGLE (vnx2df, ((vnx2di) { 3, 3, 1, 1 }));
+VEC_PERM_SINGLE (vnx4sf, ((vnx4si) { 4, 5, 6, 0, 2, 7, 4, 2 }));
+VEC_PERM_SINGLE (vnx8hf, ((vnx8hi) { 8, 7, 5, 4, 11, 12, 13, 0,
+ 1, 1, 8, 9, 3, 14, 15, 1 }));
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c
index fbae30c8d1c..aa443563182 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c
@@ -17,49 +17,49 @@
int main (void)
{
- TEST_VEC_PERM (v4di,
- ((v4di) { 4, 7, 6, 5 }),
- ((v4di) { 4, 5, 6, 7 }),
- ((v4di) { 12, 24, 36, 48 }));
- TEST_VEC_PERM (v8si,
- ((v8si) { 6, 10, 4, 3, 5, 7, 7, 5 }),
- ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
- ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
- TEST_VEC_PERM (v16hi,
- ((v16hi) { 11, 10, 8, 7, 14, 15, 16, 3,
- 4, 4, 11, 12, 6, 17, 18, 4 }),
- ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18 }),
- ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48 }));
- TEST_VEC_PERM (v32qi,
- ((v32qi) { 5, 5, 7, 6, 4, 4, 7, 4,
- 6, 7, 6, 5, 4, 7, 6, 7,
- 4, 5, 7, 7, 4, 7, 4, 5,
- 6, 7, 6, 7, 6, 4, 6, 5 }),
- ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 }),
- ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48 }));
- TEST_VEC_PERM (v4df,
- ((v4df) { 7.5, 7.5, 5.5, 5.5 }),
- ((v4df) { 4.5, 5.5, 6.5, 7.5 }),
- ((v4df) { 12.5, 24.5, 36.5, 48.5 }));
- TEST_VEC_PERM (v8sf,
- ((v8sf) { 7.5, 8.5, 9.5, 3.5, 5.5, 10.5, 7.5, 5.5 }),
- ((v8sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }),
- ((v8sf) { 33.5, 34.5, 35.5, 36.5,
- 37.5, 38.5, 39.5, 40.5 }));
- TEST_VEC_PERM (v16hf,
- ((v16hf) { 11.0, 10.0, 8.0, 7.0, 14.0, 15.0, 16.0, 3.0,
- 4.0, 4.0, 11.0, 12.0, 6.0, 17.0, 18.0, 4.0 }),
- ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
- 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
- ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
- 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
+ TEST_VEC_PERM (vnx2di,
+ ((vnx2di) { 4, 7, 6, 5 }),
+ ((vnx2di) { 4, 5, 6, 7 }),
+ ((vnx2di) { 12, 24, 36, 48 }));
+ TEST_VEC_PERM (vnx4si,
+ ((vnx4si) { 6, 10, 4, 3, 5, 7, 7, 5 }),
+ ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+ ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
+ TEST_VEC_PERM (vnx8hi,
+ ((vnx8hi) { 11, 10, 8, 7, 14, 15, 16, 3,
+ 4, 4, 11, 12, 6, 17, 18, 4 }),
+ ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18 }),
+ ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48 }));
+ TEST_VEC_PERM (vnx16qi,
+ ((vnx16qi) { 5, 5, 7, 6, 4, 4, 7, 4,
+ 6, 7, 6, 5, 4, 7, 6, 7,
+ 4, 5, 7, 7, 4, 7, 4, 5,
+ 6, 7, 6, 7, 6, 4, 6, 5 }),
+ ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7 }),
+ ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48 }));
+ TEST_VEC_PERM (vnx2df,
+ ((vnx2df) { 7.5, 7.5, 5.5, 5.5 }),
+ ((vnx2df) { 4.5, 5.5, 6.5, 7.5 }),
+ ((vnx2df) { 12.5, 24.5, 36.5, 48.5 }));
+ TEST_VEC_PERM (vnx4sf,
+ ((vnx4sf) { 7.5, 8.5, 9.5, 3.5, 5.5, 10.5, 7.5, 5.5 }),
+ ((vnx4sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }),
+ ((vnx4sf) { 33.5, 34.5, 35.5, 36.5,
+ 37.5, 38.5, 39.5, 40.5 }));
+ TEST_VEC_PERM (vnx8hf,
+ ((vnx8hf) { 11.0, 10.0, 8.0, 7.0, 14.0, 15.0, 16.0, 3.0,
+ 4.0, 4.0, 11.0, 12.0, 6.0, 17.0, 18.0, 4.0 }),
+ ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+ 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+ ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+ 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c
index a82b57dc378..c4abc2de551 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define VEC_PERM(TYPE, MASKTYPE) \
TYPE vec_perm_##TYPE (TYPE values, MASKTYPE mask) \
@@ -17,13 +17,13 @@ TYPE vec_perm_##TYPE (TYPE values, MASKTYPE mask) \
return __builtin_shuffle (values, mask); \
}
-VEC_PERM (v4di, v4di)
-VEC_PERM (v8si, v8si)
-VEC_PERM (v16hi, v16hi)
-VEC_PERM (v32qi, v32qi)
-VEC_PERM (v4df, v4di)
-VEC_PERM (v8sf, v8si)
-VEC_PERM (v16hf, v16hi)
+VEC_PERM (vnx2di, vnx2di)
+VEC_PERM (vnx4si, vnx4si)
+VEC_PERM (vnx8hi, vnx8hi)
+VEC_PERM (vnx16qi, vnx16qi)
+VEC_PERM (vnx2df, vnx2di)
+VEC_PERM (vnx4sf, vnx4si)
+VEC_PERM (vnx8hf, vnx8hi)
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c
index 539c99d4f61..fd73bc9652f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c
@@ -18,48 +18,48 @@ extern void abort (void);
int main (void)
{
- TEST_VEC_PERM (v4di, v4di,
- ((v4di) { 5, 6, 7, 5 }),
- ((v4di) { 4, 5, 6, 7 }),
- ((v4di) { 1, 6, 3, 5 }));
- TEST_VEC_PERM (v8si, v8si,
- ((v8si) { 4, 8, 10, 10, 9, 8, 7, 5 }),
- ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
- ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
- TEST_VEC_PERM (v16hi, v16hi,
- ((v16hi) { 12, 16, 18, 10, 12, 13, 14, 4,
- 7, 18, 3, 5, 9, 8, 7, 13 }),
- ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18 }),
- ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17,
- 4, 31, 0, 18, 6, 5, 4, 10 }));
- TEST_VEC_PERM (v32qi, v32qi,
- ((v32qi) { 5, 6, 7, 4, 5, 6, 4, 5,
- 6, 7, 4, 5, 6, 7, 4, 5,
- 5, 6, 7, 4, 5, 6, 4, 5,
- 6, 7, 4, 5, 6, 7, 4, 5 }),
- ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 }),
- ((v32qi) { 5, 6, 7, 8, 9, 10, 28, 29,
- 30, 31, 32, 33, 54, 55, 56, 61,
- 5, 6, 7, 8, 9, 10, 28, 29,
- 30, 31, 32, 33, 54, 55, 56, 61 }));
- TEST_VEC_PERM (v4df, v4di,
- ((v4df) { 5.1, 6.1, 7.1, 5.1 }),
- ((v4df) { 4.1, 5.1, 6.1, 7.1 }),
- ((v4di) { 1, 6, 3, 5 }));
- TEST_VEC_PERM (v8sf, v8si,
- ((v8sf) { 4.2, 8.2, 10.2, 10.2, 9.2, 8.2, 7.2, 5.2 }),
- ((v8sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
- ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
- TEST_VEC_PERM (v16hf, v16hi,
- ((v16hf) { 12.0, 16.0, 18.0, 10.0, 12.0, 13.0, 14.0, 4.0,
- 7.0, 18.0, 3.0, 5.0, 9.0, 8.0, 7.0, 13.0 }),
- ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
- 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
- ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17,
- 4, 31, 0, 18, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx2di, vnx2di,
+ ((vnx2di) { 5, 6, 7, 5 }),
+ ((vnx2di) { 4, 5, 6, 7 }),
+ ((vnx2di) { 1, 6, 3, 5 }));
+ TEST_VEC_PERM (vnx4si, vnx4si,
+ ((vnx4si) { 4, 8, 10, 10, 9, 8, 7, 5 }),
+ ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+ ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx8hi, vnx8hi,
+ ((vnx8hi) { 12, 16, 18, 10, 12, 13, 14, 4,
+ 7, 18, 3, 5, 9, 8, 7, 13 }),
+ ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18 }),
+ ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17,
+ 4, 31, 0, 18, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx16qi, vnx16qi,
+ ((vnx16qi) { 5, 6, 7, 4, 5, 6, 4, 5,
+ 6, 7, 4, 5, 6, 7, 4, 5,
+ 5, 6, 7, 4, 5, 6, 4, 5,
+ 6, 7, 4, 5, 6, 7, 4, 5 }),
+ ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7 }),
+ ((vnx16qi) { 5, 6, 7, 8, 9, 10, 28, 29,
+ 30, 31, 32, 33, 54, 55, 56, 61,
+ 5, 6, 7, 8, 9, 10, 28, 29,
+ 30, 31, 32, 33, 54, 55, 56, 61 }));
+ TEST_VEC_PERM (vnx2df, vnx2di,
+ ((vnx2df) { 5.1, 6.1, 7.1, 5.1 }),
+ ((vnx2df) { 4.1, 5.1, 6.1, 7.1 }),
+ ((vnx2di) { 1, 6, 3, 5 }));
+ TEST_VEC_PERM (vnx4sf, vnx4si,
+ ((vnx4sf) { 4.2, 8.2, 10.2, 10.2, 9.2, 8.2, 7.2, 5.2 }),
+ ((vnx4sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
+ ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx8hf, vnx8hi,
+ ((vnx8hf) { 12.0, 16.0, 18.0, 10.0, 12.0, 13.0, 14.0, 4.0,
+ 7.0, 18.0, 3.0, 5.0, 9.0, 8.0, 7.0, 13.0 }),
+ ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+ 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+ ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17,
+ 4, 31, 0, 18, 6, 5, 4, 10 }));
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_1.c b/gcc/testsuite/gcc.target/aarch64/sve_while_1.c
index c54db87fa21..2a268a447e3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_while_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_while_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
-vec_while_##TYPE (TYPE *restrict a, int n) \
-{ \
- for (int i = 0; i < n; ++i) \
- a[i] += 1; \
-}
+#define ADD_LOOP(TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ vec_while_##TYPE (TYPE *restrict a, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ a[i] += 1; \
+ }
#define TEST_ALL(T) \
T (int8_t) \
@@ -23,7 +23,7 @@ vec_while_##TYPE (TYPE *restrict a, int n) \
T (float) \
T (double)
-TEST_ALL (VEC_PERM)
+TEST_ALL (ADD_LOOP)
/* { dg-final { scan-assembler-not {\tuqdec} } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_2.c b/gcc/testsuite/gcc.target/aarch64/sve_while_2.c
index 62f82cc43f4..2f0f0f49e12 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_while_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_while_2.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
-vec_while_##TYPE (TYPE *restrict a, unsigned int n) \
-{ \
- for (unsigned int i = 0; i < n; ++i) \
- a[i] += 1; \
-}
+#define ADD_LOOP(TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ vec_while_##TYPE (TYPE *restrict a, unsigned int n) \
+ { \
+ for (unsigned int i = 0; i < n; ++i) \
+ a[i] += 1; \
+ }
#define TEST_ALL(T) \
T (int8_t) \
@@ -23,7 +23,7 @@ vec_while_##TYPE (TYPE *restrict a, unsigned int n) \
T (float) \
T (double)
-TEST_ALL (VEC_PERM)
+TEST_ALL (ADD_LOOP)
/* { dg-final { scan-assembler-not {\tuqdec} } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_3.c b/gcc/testsuite/gcc.target/aarch64/sve_while_3.c
index ace7ebc5a0f..026a8195238 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_while_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_while_3.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
-vec_while_##TYPE (TYPE *restrict a, long n) \
-{ \
- for (long i = 0; i < n; ++i) \
- a[i] += 1; \
-}
+#define ADD_LOOP(TYPE) \
+ TYPE __attribute__ ((noinline, noclone)) \
+ vec_while_##TYPE (TYPE *restrict a, int64_t n) \
+ { \
+ for (int64_t i = 0; i < n; ++i) \
+ a[i] += 1; \
+ }
#define TEST_ALL(T) \
T (int8_t) \
@@ -23,7 +23,7 @@ vec_while_##TYPE (TYPE *restrict a, long n) \
T (float) \
T (double)
-TEST_ALL (VEC_PERM)
+TEST_ALL (ADD_LOOP)
/* { dg-final { scan-assembler-not {\tuqdec} } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_4.c b/gcc/testsuite/gcc.target/aarch64/sve_while_4.c
index 0717eac1ff6..d71b141b431 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_while_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_while_4.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
-vec_while_##TYPE (TYPE *restrict a, unsigned long n) \
-{ \
- for (unsigned long i = 0; i < n; ++i) \
- a[i] += 1; \
-}
+#define ADD_LOOP(TYPE) \
+ TYPE __attribute__ ((noinline, noclone)) \
+ vec_while_##TYPE (TYPE *restrict a, uint64_t n) \
+ { \
+ for (uint64_t i = 0; i < n; ++i) \
+ a[i] += 1; \
+ }
#define TEST_ALL(T) \
T (int8_t) \
@@ -23,7 +23,7 @@ vec_while_##TYPE (TYPE *restrict a, unsigned long n) \
T (float) \
T (double)
-TEST_ALL (VEC_PERM)
+TEST_ALL (ADD_LOOP)
/* { dg-final { scan-assembler-times {\tuqdec} 2 } } */
/* { dg-final { scan-assembler-times {\tuqdecb\tx[0-9]+} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_1.c b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_1.c
deleted file mode 100644
index ead821b43ca..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_1.c
+++ /dev/null
@@ -1,16 +0,0 @@
-/* { dg-do assemble } */
-/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */
-
-int
-loop (short b)
-{
- int c = 0;
-l1:
- b++;
- c |= b;
- if (b)
- goto l1;
- return c;
-}
-
-/* { dg-final { scan-assembler-times {\tadd\tx[0-9], x[0-9], 1\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_2.c b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_2.c
deleted file mode 100644
index 1a3502a0f94..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_2.c
+++ /dev/null
@@ -1,16 +0,0 @@
-/* { dg-do assemble } */
-/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */
-
-int
-loop (short b)
-{
- int c = 0;
-l1:
- b++;
- c |= b;
- if (b < 32767)
- goto l1;
-return c;
-}
-
-/* { dg-final { scan-assembler-times {\tadd\tx[0-9], x[0-9], 1\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_3.c b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_3.c
deleted file mode 100644
index 125fc31a464..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_3.c
+++ /dev/null
@@ -1,18 +0,0 @@
-/* { dg-do assemble } */
-/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */
-
-int
-loop (short b)
-{
- int c = 0;
-l1:
- b++;
- c |= b;
- if (b < 32766)
- goto l1;
-return c;
-}
-
-/* { dg-final { scan-assembler-not {\tmov\tx[0-9], 65536\n} } } */
-/* { dg-final { scan-assembler-not {\tcmp\tx[0-9], 0\n} } } */
-/* { dg-final { scan-assembler-not {\tcsel\tx[0-9], x[0-9], x[0-9], ne\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c
index 918313f62bd..c84b88a2e70 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c
@@ -7,13 +7,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define MASK_2(X, Y) X, Y + X
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 1, Y)
@@ -21,10 +21,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 4, Y)
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 8, Y)
-#define INDEX_4 v4di
-#define INDEX_8 v8si
-#define INDEX_16 v16hi
-#define INDEX_32 v32qi
+#define INDEX_4 vnx2di
+#define INDEX_8 vnx4si
+#define INDEX_16 vnx8hi
+#define INDEX_32 vnx16qi
#define PERMUTE(TYPE, NUNITS) \
TYPE permute_##TYPE (TYPE values1, TYPE values2) \
@@ -36,13 +36,13 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (v4di, 4) \
- T (v8si, 8) \
- T (v16hi, 16) \
- T (v32qi, 32) \
- T (v4df, 4) \
- T (v8sf, 8) \
- T (v16hf, 16)
+ T (vnx2di, 4) \
+ T (vnx4si, 8) \
+ T (vnx8hi, 16) \
+ T (vnx16qi, 32) \
+ T (vnx2df, 4) \
+ T (vnx4sf, 8) \
+ T (vnx8hf, 16)
TEST_ALL (PERMUTE)
diff --git a/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c b/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c
index c7c15ee5c4a..aecf8262706 100644
--- a/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c
+++ b/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c
@@ -49,4 +49,6 @@ f12 (void)
return sum;
}
-/* { dg-final { scan-assembler-not "sp" } } */
+/* Fails for fixed-length SVE because we lack a vec_init pattern.
+ A later patch fixes this in generic code. */
+/* { dg-final { scan-assembler-not "sp" { xfail { aarch64_sve && { ! vect_variable_length } } } } } */