summaryrefslogtreecommitdiff
path: root/gcc/testsuite/gcc.target
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/testsuite/gcc.target')
-rw-r--r--gcc/testsuite/gcc.target/aarch64/bsl-idiom.c88
-rw-r--r--gcc/testsuite/gcc.target/aarch64/construct_lane_zero_1.c37
-rw-r--r--gcc/testsuite/gcc.target/aarch64/copysign-bsl.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c26
-rw-r--r--gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c31
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_cap_4.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c11
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C14
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C10
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c64
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_ext_1.c64
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_ext_2.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_extract_1.c80
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_extract_2.c80
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_extract_3.c122
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c102
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_10.c72
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_11.c14
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c72
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c63
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_3_run.c41
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c20
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_4_run.c35
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c130
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_5_run.c161
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c50
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c26
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_8.c19
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_gather_load_9.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_index_offset_1.c54
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_index_offset_1_run.c34
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1.c49
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1_run.c48
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.C56
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_ld1r_1_run.C64
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_ld1r_2.c61
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_ld1r_2_run.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_live_1.c52
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c52
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_live_2.c19
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_live_2_run.c32
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mad_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c83
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1_run.c72
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c69
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2_run.c98
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c65
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3_run.c47
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c27
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4_run.c37
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c156
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5_run.c177
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_6.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_7.c53
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c173
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1_run.c186
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_2.c17
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c22
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c28
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mla_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mls_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_msb_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c33
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.C48
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.c28
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.C47
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.c29
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.C48
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.c28
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.C59
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.c31
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.c (renamed from gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.C)23
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_rev_1.c49
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_revb_1.c10
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_revh_1.c14
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_revw_1.c10
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c134
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1_run.c155
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_2.c10
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_3.c32
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_4.c10
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_5.c23
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_6.c36
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_scatter_store_7.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_1.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_10.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_11.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_12.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_13.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_2.c11
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_3.c11
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_4.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_5.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c39
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_6.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c43
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_7.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c47
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_8.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_9.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_1.c40
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_3.c32
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_4.c33
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_5.c34
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_6.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_7.c34
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_load_8.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_store_1.c40
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_store_2.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_store_3.c33
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_store_4.c33
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_store_5.c34
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_store_6.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_strided_store_7.c34
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c93
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c84
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c87
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_move_4.c116
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_move_5.c111
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_move_6.c129
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c20
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c20
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c75
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13_run.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c58
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c69
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17_run.c32
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c22
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c56
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c41
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c49
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c41
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23.c47
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23_run.c45
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c36
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c86
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c86
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c60
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c59
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c28
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c176
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c112
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c102
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c88
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c88
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c28
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c86
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_while_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_while_2.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_while_3.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_while_4.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_1.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_2.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_3.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c36
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c4
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-4.c41
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-5.c37
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-6.c46
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-7.c38
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-8.c40
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-9.c43
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-and-union-1.c96
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-and-union.c22
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-11.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-13.c10
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-2.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-6.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/softfp.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/union-1.c55
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/baseline/union-2.c68
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/bitfield-4.x40
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/bitfield-5.x36
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/bitfield-6.x45
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/bitfield-7.x36
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/bitfield-8.x39
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/bitfield-9.x42
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/bitfield-and-union.x (renamed from gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-and-union-1.c)19
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/cmse-13.x7
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/cmse-5.x7
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/cmse-7.x7
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/cmse-8.x7
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-4.c41
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-5.c37
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-6.c46
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-7.c38
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-8.c40
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-9.c43
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-and-union.c20
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-13.c11
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c13
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-7.c10
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-8.c10
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-13.c11
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-5.c13
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-7.c10
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-8.c10
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-13.c10
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-5.c14
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-7.c10
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-8.c10
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c13
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-7.c10
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-8.c10
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-13.c10
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-5.c13
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-7.c10
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-8.c10
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/union-1.c55
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/union-2.c68
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/union-1.x54
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/union-2.x67
-rw-r--r--gcc/testsuite/gcc.target/arm/copysign_softfloat_1.c1
-rw-r--r--gcc/testsuite/gcc.target/arm/lp1189445.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/pr54300.C2
-rw-r--r--gcc/testsuite/gcc.target/arm/pr67989.C3
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-1.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-2.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-pr82855.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-gf2p8affineqb-2.c74
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-gf2p8mulb-2.c76
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-gf2p8affineqb-2.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-gf2p8mulb-2.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/force-indirect-call-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/force-indirect-call-2.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/force-indirect-call-3.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/gfni-1.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/gfni-2.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/gfni-3.c8
-rw-r--r--gcc/testsuite/gcc.target/i386/gfni-4.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr80425-3.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/pr81706.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82002-2a.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82002-2b.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82941-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82941-2.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82942-1.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82942-2.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82990-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82990-2.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82990-3.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82990-4.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82990-5.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82990-6.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82990-7.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-13.c7
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-14.c3
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-23.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/stack-check-12.c2
-rw-r--r--gcc/testsuite/gcc.target/mips/pr82981.c13
-rw-r--r--gcc/testsuite/gcc.target/powerpc/builtin-vec-sums-be-int.c16
-rw-r--r--gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c13
-rw-r--r--gcc/testsuite/gcc.target/powerpc/builtins-6-p9-runnable.c1046
-rw-r--r--gcc/testsuite/gcc.target/powerpc/builtins-revb-runnable.c342
-rw-r--r--gcc/testsuite/gcc.target/powerpc/float128-hw4.c135
-rw-r--r--gcc/testsuite/gcc.target/powerpc/float128-minmax.c15
-rw-r--r--gcc/testsuite/gcc.target/powerpc/p9-xxbr-1.c11
-rw-r--r--gcc/testsuite/gcc.target/powerpc/p9-xxbr-3.c99
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr82748-1.c82
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr82748-2.c46
-rw-r--r--gcc/testsuite/gcc.target/powerpc/sad-vectorize-1.c36
-rw-r--r--gcc/testsuite/gcc.target/powerpc/sad-vectorize-2.c36
-rw-r--r--gcc/testsuite/gcc.target/powerpc/sad-vectorize-3.c57
-rw-r--r--gcc/testsuite/gcc.target/powerpc/sad-vectorize-4.c57
-rw-r--r--gcc/testsuite/gcc.target/powerpc/swaps-p8-26.c6
-rw-r--r--gcc/testsuite/gcc.target/powerpc/vec-cmp-sel.c5
-rw-r--r--gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-0.c2
-rw-r--r--gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-1.c2
-rw-r--r--gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-2.c2
-rw-r--r--gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-3.c2
-rw-r--r--gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-4.c2
-rw-r--r--gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-5.c2
-rw-r--r--gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-6.c2
-rw-r--r--gcc/testsuite/gcc.target/powerpc/vsu/vec-cnttz-lsbb-2.c2
390 files changed, 6917 insertions, 5460 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/bsl-idiom.c b/gcc/testsuite/gcc.target/aarch64/bsl-idiom.c
new file mode 100644
index 00000000000..8151387600f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bsl-idiom.c
@@ -0,0 +1,88 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-rtl-combine --save-temps" } */
+
+/* Test that we don't generate BSL when in DImode with values in integer
+ registers, and do generate it where we have values in floating-point
+ registers. This is useful, as it allows us to avoid register moves
+ in the general case.
+
+ We want:
+ eor x0, x0, x1
+ and x0, x0, x2
+ eor x0, x0, x1
+ ret
+
+ Rather than:
+ fmov d2, x0
+ fmov d0, x2
+ fmov d1, x1
+ bsl v0.8b, v2.8b, v1.8b
+ fmov x0, d0
+ ret */
+
+extern void abort (void);
+
+unsigned long long __attribute__ ((noinline))
+foo (unsigned long long a, unsigned long long b, unsigned long long c)
+{
+ return ((a ^ b) & c) ^ b;
+}
+
+unsigned long long __attribute__ ((noinline))
+foo2 (unsigned long long a, unsigned long long b, unsigned long long c)
+{
+ return ((a ^ b) & c) ^ a;
+}
+
+#define force_simd(V1) asm volatile ("mov %d0, %1.d[0]" \
+ : "=w"(V1) \
+ : "w"(V1) \
+ : /* No clobbers */);
+
+unsigned long long __attribute__ ((noinline))
+bar (unsigned long long a, unsigned long long b, unsigned long long c)
+{
+ force_simd (a);
+ force_simd (b);
+ force_simd (c);
+ c = ((a ^ b) & c) ^ b;
+ force_simd (c);
+ return c;
+}
+
+unsigned long long __attribute__ ((noinline))
+bar2 (unsigned long long a, unsigned long long b, unsigned long long c)
+{
+ force_simd (a);
+ force_simd (b);
+ force_simd (c);
+ c = ((a ^ b) & c) ^ a;
+ force_simd (c);
+ return c;
+}
+
+int
+main (int argc, char** argv)
+{
+ unsigned long long a = 0x0123456789abcdefULL;
+ unsigned long long b = 0xfedcba9876543210ULL;
+ unsigned long long c = 0xaabbccddeeff7777ULL;
+ if (foo (a, b, c) != bar (a, b, c))
+ abort ();
+ if (foo2 (a, b, c) != bar2 (a, b, c))
+ abort ();
+ return 0;
+}
+
+/* 2 BSL, 6 FMOV (to floating-point registers), and 2 FMOV (to general
+purpose registers) for the "bar" tests, which should still use BSL. */
+/* { dg-final { scan-assembler-times "bsl\tv\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "fmov\td\[0-9\]" 6 } } */
+/* { dg-final { scan-assembler-times "fmov\tx\[0-9\]" 2 } } */
+
+/* { dg-final { scan-assembler-not "bif\tv\[0-9\]" } } */
+/* { dg-final { scan-assembler-not "bit\tv\[0-9\]" } } */
+
+/* We always match the idiom during combine. */
+/* { dg-final { scan-rtl-dump-times "aarch64_simd_bsldi_internal" 2 "combine" } } */
+/* { dg-final { scan-rtl-dump-times "aarch64_simd_bsldi_alt" 2 "combine" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/construct_lane_zero_1.c b/gcc/testsuite/gcc.target/aarch64/construct_lane_zero_1.c
new file mode 100644
index 00000000000..d87f3290828
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/construct_lane_zero_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef long long v2di __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+
+v2di
+construct_lanedi (long long *y)
+{
+ v2di x =
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ { 0, y[0] }
+#else
+ { y[0], 0 }
+#endif
+ ;
+ return x;
+}
+
+v2df
+construct_lanedf (double *y)
+{
+ v2df x =
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ { 0.0, y[0] }
+#else
+ { y[0], 0.0 }
+#endif
+ ;
+ return x;
+}
+
+/* Check that creating V2DI and V2DF vectors from a lane with a zero
+ makes use of the D-reg LDR rather than doing explicit lane inserts. */
+
+/* { dg-final { scan-assembler-times "ldr\td\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-not "ins\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign-bsl.c b/gcc/testsuite/gcc.target/aarch64/copysign-bsl.c
new file mode 100644
index 00000000000..0ec7109c738
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign-bsl.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Test that we can generate DImode BSL when we are using
+ copysign. */
+
+double
+foo (double a, double b)
+{
+ return __builtin_copysign (a, b);
+}
+
+/* { dg-final { scan-assembler "b\(sl|it|if\)\tv\[0-9\]" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c b/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c
index cce88155aca..ae5b3797021 100644
--- a/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c
+++ b/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c
@@ -3,7 +3,7 @@
/* { dg-options "-O0 -gdwarf-2" } */
/* { dg-final { scan-assembler ".cfi_restore 30" } } */
/* { dg-final { scan-assembler ".cfi_restore 29" } } */
-/* { dg-final { scan-assembler ".cfi_def_cfa 31, 0" } } */
+/* { dg-final { scan-assembler ".cfi_def_cfa_offset 0" } } */
/* { dg-final { scan-assembler "ret" } } */
int bar (unsigned int);
diff --git a/gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c b/gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c
new file mode 100644
index 00000000000..3c31b340154
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef long long v2di __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+
+v2di
+construct_lanedi (long long *y)
+{
+ v2di x = { y[0], y[1] };
+ return x;
+}
+
+v2df
+construct_lanedf (double *y)
+{
+ v2df x = { y[0], y[1] };
+ return x;
+}
+
+/* We can use the load_pair_lanes<mode> pattern to vec_concat two DI/DF
+ values from consecutive memory into a 2-element vector by using
+ a Q-reg LDR. */
+
+/* { dg-final { scan-assembler-times "ldr\tq\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-not "ins\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c b/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c
new file mode 100644
index 00000000000..6810db3c54d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef long long v2di __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+
+void
+construct_lane_1 (double *y, v2df *z)
+{
+ double y0 = y[0] + 1;
+ double y1 = y[1] + 2;
+ v2df x = {y0, y1};
+ z[2] = x;
+}
+
+void
+construct_lane_2 (long long *y, v2di *z)
+{
+ long long y0 = y[0] + 1;
+ long long y1 = y[1] + 2;
+ v2di x = {y0, y1};
+ z[2] = x;
+}
+
+/* We can use the load_pair_lanes<mode> pattern to vec_concat two DI/DF
+ values from consecutive memory into a 2-element vector by using
+ a Q-reg LDR. */
+
+/* { dg-final { scan-assembler-times "stp\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-not "ins\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c b/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c
index b22828d621b..c3bf2f326d3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c
@@ -36,7 +36,7 @@ LOOP (double)
/* { dg-final { scan-assembler-times {\tstr\td[0-9]+} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]} 4 } } */
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]} 4 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+\.s, sxtw 2\]} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d, lsl 3\]} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c
index a176d9ce251..4651c70afda 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c
@@ -1,11 +1,11 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define N 32
/* Simple condition reduction. */
-int
+int __attribute__ ((noinline, noclone))
condition_reduction (int *a, int min_v)
{
int last = 66; /* High start value. */
@@ -17,6 +17,4 @@ condition_reduction (int *a, int min_v)
return last;
}
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c
index 8e6444e4239..0dcba03b61c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c
@@ -1,24 +1,22 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "sve_clastb_1.c"
-extern void abort (void) __attribute__ ((noreturn));
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
int a[N] = {
- 11, -12, 13, 14, 15, 16, 17, 18, 19, 20,
- 1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
- 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
- 31, 32
+ 11, -12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
};
int ret = condition_reduction (a, 1);
if (ret != 17)
- abort ();
+ __builtin_abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c
index dcae41f5425..381cbd17577 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c
@@ -1,15 +1,17 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
#if !defined(TYPE)
-#define TYPE unsigned int
+#define TYPE uint32_t
#endif
#define N 254
/* Non-simple condition reduction. */
-TYPE
+TYPE __attribute__ ((noinline, noclone))
condition_reduction (TYPE *a, TYPE min_v)
{
TYPE last = 65;
@@ -21,7 +23,4 @@ condition_reduction (TYPE *a, TYPE min_v)
return last;
}
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c
index 0503ba36c3d..0d5187ba3ae 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c
@@ -1,25 +1,23 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "sve_clastb_2.c"
-extern void abort (void) __attribute__ ((noreturn));
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
unsigned int a[N] = {
- 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
- 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
- 31, 32
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
};
- __builtin_memset (a+32, 43, (N-32)*sizeof (int));
+ __builtin_memset (a + 32, 43, (N - 32) * sizeof (int));
unsigned int ret = condition_reduction (a, 16);
if (ret != 10)
- abort ();
+ __builtin_abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c
index 1061194a08e..90a3b938593 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c
@@ -1,11 +1,8 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TYPE unsigned char
+#define TYPE uint8_t
#include "sve_clastb_2.c"
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.b} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c
index 90c3e4a0cf3..f90fbfc5e9b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c
@@ -1,25 +1,23 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "sve_clastb_3.c"
-extern void abort (void) __attribute__ ((noreturn));
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
unsigned char a[N] = {
- 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
- 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
- 31, 32
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
};
- __builtin_memset (a+32, 43, N-32);
+ __builtin_memset (a + 32, 43, N - 32);
unsigned char ret = condition_reduction (a, 16);
if (ret != 10)
- abort ();
+ __builtin_abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c
index 698d958693a..dc01b21c273 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c
@@ -1,11 +1,8 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TYPE short
+#define TYPE int16_t
#include "sve_clastb_2.c"
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.h} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c
index d0337ab300d..e17199f3672 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c
@@ -5,7 +5,7 @@
extern void abort (void) __attribute__ ((noreturn));
-int
+int __attribute__ ((optimize (1)))
main (void)
{
short a[N] = {
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c
index 655f95f410a..aef2a80c68f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c
@@ -1,11 +1,8 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TYPE long
+#define TYPE uint64_t
#include "sve_clastb_2.c"
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\tx[0-9]+, p[0-7], x[0-9]+, z[0-9]+\.d} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c
index 573787233d8..e251db0bb76 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c
@@ -1,25 +1,23 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "sve_clastb_5.c"
-extern void abort (void) __attribute__ ((noreturn));
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
long a[N] = {
- 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
- 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
- 31, 32
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
};
- __builtin_memset (a+32, 43, (N-32)*sizeof (long));
+ __builtin_memset (a + 32, 43, (N - 32) * sizeof (long));
long ret = condition_reduction (a, 16);
if (ret != 10)
- abort ();
+ __builtin_abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c
index bf1bc1a346a..93fec6396a2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c
@@ -1,5 +1,5 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define N 32
@@ -9,7 +9,7 @@
/* Non-integer data types. */
-TYPE
+TYPE __attribute__ ((noinline, noclone))
condition_reduction (TYPE *a, TYPE min_v)
{
TYPE last = 0;
@@ -21,8 +21,4 @@ condition_reduction (TYPE *a, TYPE min_v)
return last;
}
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c
index 4c760daba89..c204ed4c4f0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c
@@ -1,24 +1,22 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "sve_clastb_6.c"
-extern void abort (void) __attribute__ ((noreturn));
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
float a[N] = {
- 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20,
- 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6,
- 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30,
- 31.111, 32.322
+ 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20,
+ 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6,
+ 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30,
+ 31.111, 32.322
};
float ret = condition_reduction (a, 16.7);
- if (ret != (float)10.6)
- abort ();
+ if (ret != (float) 10.6)
+ __builtin_abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c
index 12e53b75e8a..d232a87e41d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c
@@ -1,11 +1,7 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE double
#include "sve_clastb_6.c"
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c
index d0001a923e8..2f87a4766e0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c
@@ -1,24 +1,22 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "sve_clastb_7.c"
-extern void abort (void) __attribute__ ((noreturn));
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
double a[N] = {
- 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20,
- 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6,
- 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30,
- 31.111, 32.322
+ 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20,
+ 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6,
+ 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30,
+ 31.111, 32.322
};
double ret = condition_reduction (a, 16.7);
- if (ret != (double)10.6)
- abort ();
+ if (ret != 10.6)
+ __builtin_abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C
index 4937e7f10e5..3f30a527cae 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C
+++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C
@@ -1,15 +1,15 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */
-typedef signed char v32qi __attribute__((vector_size(32)));
+typedef signed char vnx16qi __attribute__((vector_size(32)));
-v32qi
-foo (v32qi x, v32qi y)
+vnx16qi
+foo (vnx16qi x, vnx16qi y)
{
- return (v32qi) { -1, 0, 0, -1, -1, -1, 0, 0,
- -1, -1, -1, -1, 0, 0, 0, 0,
- -1, -1, -1, -1, -1, -1, -1, -1,
- 0, 0, 0, 0, 0, 0, 0, 0 } ? x : y;
+ return (vnx16qi) { -1, 0, 0, -1, -1, -1, 0, 0,
+ -1, -1, -1, -1, 0, 0, 0, 0,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 0, 0, 0, 0, 0, 0, 0 } ? x : y;
}
/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C
index 3de4a8ccd00..ec8a0ab9d69 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C
+++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C
@@ -1,13 +1,13 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */
-typedef short v16hi __attribute__((vector_size(32)));
+typedef short vnx8hi __attribute__((vector_size(32)));
-v16hi
-foo (v16hi x, v16hi y)
+vnx8hi
+foo (vnx8hi x, vnx8hi y)
{
- return (v16hi) { -1, 0, 0, -1, -1, -1, 0, 0,
- -1, -1, -1, -1, 0, 0, 0, 0 } ? x : y;
+ return (vnx8hi) { -1, 0, 0, -1, -1, -1, 0, 0,
+ -1, -1, -1, -1, 0, 0, 0, 0 } ? x : y;
}
/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C
index 8185f7baa76..ab1429d4e40 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C
+++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C
@@ -1,12 +1,12 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */
-typedef int v8si __attribute__((vector_size(32)));
+typedef int vnx4si __attribute__((vector_size(32)));
-v8si
-foo (v8si x, v8si y)
+vnx4si
+foo (vnx4si x, vnx4si y)
{
- return (v8si) { -1, 0, 0, -1, -1, -1, 0, 0 } ? x : y;
+ return (vnx4si) { -1, 0, 0, -1, -1, -1, 0, 0 } ? x : y;
}
/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C
index b15da8a59e2..3ad39b9df7d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C
+++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C
@@ -1,12 +1,12 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */
-typedef long long v4di __attribute__((vector_size(32)));
+typedef long long vnx2di __attribute__((vector_size(32)));
-v4di
-foo (v4di x, v4di y)
+vnx2di
+foo (vnx2di x, vnx2di y)
{
- return (v4di) { -1, 0, 0, -1 } ? x : y;
+ return (vnx2di) { -1, 0, 0, -1 } ? x : y;
}
/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c
index ea977207226..8df86eb6b1b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define MASK_2(X) X, X
#define MASK_4(X) MASK_2 (X), MASK_2 (X)
@@ -17,10 +17,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
#define MASK_16(X) MASK_8 (X), MASK_8 (X)
#define MASK_32(X) MASK_16 (X), MASK_16 (X)
-#define INDEX_4 v4di
-#define INDEX_8 v8si
-#define INDEX_16 v16hi
-#define INDEX_32 v32qi
+#define INDEX_4 vnx2di
+#define INDEX_8 vnx4si
+#define INDEX_16 vnx8hi
+#define INDEX_32 vnx16qi
#define DUP_LANE(TYPE, NUNITS, INDEX) \
TYPE dup_##INDEX##_##TYPE (TYPE values1, TYPE values2) \
@@ -30,27 +30,27 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (v4di, 4, 0) \
- T (v4di, 4, 2) \
- T (v4di, 4, 3) \
- T (v8si, 8, 0) \
- T (v8si, 8, 5) \
- T (v8si, 8, 7) \
- T (v16hi, 16, 0) \
- T (v16hi, 16, 6) \
- T (v16hi, 16, 15) \
- T (v32qi, 32, 0) \
- T (v32qi, 32, 19) \
- T (v32qi, 32, 31) \
- T (v4df, 4, 0) \
- T (v4df, 4, 2) \
- T (v4df, 4, 3) \
- T (v8sf, 8, 0) \
- T (v8sf, 8, 5) \
- T (v8sf, 8, 7) \
- T (v16hf, 16, 0) \
- T (v16hf, 16, 6) \
- T (v16hf, 16, 15) \
+ T (vnx2di, 4, 0) \
+ T (vnx2di, 4, 2) \
+ T (vnx2di, 4, 3) \
+ T (vnx4si, 8, 0) \
+ T (vnx4si, 8, 5) \
+ T (vnx4si, 8, 7) \
+ T (vnx8hi, 16, 0) \
+ T (vnx8hi, 16, 6) \
+ T (vnx8hi, 16, 15) \
+ T (vnx16qi, 32, 0) \
+ T (vnx16qi, 32, 19) \
+ T (vnx16qi, 32, 31) \
+ T (vnx2df, 4, 0) \
+ T (vnx2df, 4, 2) \
+ T (vnx2df, 4, 3) \
+ T (vnx4sf, 8, 0) \
+ T (vnx4sf, 8, 5) \
+ T (vnx4sf, 8, 7) \
+ T (vnx8hf, 16, 0) \
+ T (vnx8hf, 16, 6) \
+ T (vnx8hf, 16, 15) \
TEST_ALL (DUP_LANE)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c b/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c
index 1ec51aa2eaf..05bd6dc8f65 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define MASK_2(X) X, X + 1
#define MASK_4(X) MASK_2 (X), MASK_2 (X + 2)
@@ -17,10 +17,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
#define MASK_16(X) MASK_8 (X), MASK_8 (X + 8)
#define MASK_32(X) MASK_16 (X), MASK_16 (X + 16)
-#define INDEX_4 v4di
-#define INDEX_8 v8si
-#define INDEX_16 v16hi
-#define INDEX_32 v32qi
+#define INDEX_4 vnx2di
+#define INDEX_8 vnx4si
+#define INDEX_16 vnx8hi
+#define INDEX_32 vnx16qi
#define DUP_LANE(TYPE, NUNITS, INDEX) \
TYPE dup_##INDEX##_##TYPE (TYPE values1, TYPE values2) \
@@ -30,27 +30,27 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (v4di, 4, 1) \
- T (v4di, 4, 2) \
- T (v4di, 4, 3) \
- T (v8si, 8, 1) \
- T (v8si, 8, 5) \
- T (v8si, 8, 7) \
- T (v16hi, 16, 1) \
- T (v16hi, 16, 6) \
- T (v16hi, 16, 15) \
- T (v32qi, 32, 1) \
- T (v32qi, 32, 19) \
- T (v32qi, 32, 31) \
- T (v4df, 4, 1) \
- T (v4df, 4, 2) \
- T (v4df, 4, 3) \
- T (v8sf, 8, 1) \
- T (v8sf, 8, 5) \
- T (v8sf, 8, 7) \
- T (v16hf, 16, 1) \
- T (v16hf, 16, 6) \
- T (v16hf, 16, 15) \
+ T (vnx2di, 4, 1) \
+ T (vnx2di, 4, 2) \
+ T (vnx2di, 4, 3) \
+ T (vnx4si, 8, 1) \
+ T (vnx4si, 8, 5) \
+ T (vnx4si, 8, 7) \
+ T (vnx8hi, 16, 1) \
+ T (vnx8hi, 16, 6) \
+ T (vnx8hi, 16, 15) \
+ T (vnx16qi, 32, 1) \
+ T (vnx16qi, 32, 19) \
+ T (vnx16qi, 32, 31) \
+ T (vnx2df, 4, 1) \
+ T (vnx2df, 4, 2) \
+ T (vnx2df, 4, 3) \
+ T (vnx4sf, 8, 1) \
+ T (vnx4sf, 8, 5) \
+ T (vnx4sf, 8, 7) \
+ T (vnx8hf, 16, 1) \
+ T (vnx8hf, 16, 6) \
+ T (vnx8hf, 16, 15) \
TEST_ALL (DUP_LANE)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c b/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c
index b93574e50f7..047d4c59651 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c
@@ -1,16 +1,16 @@
/* { dg-do compile } */
/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */
-typedef int v8si __attribute__((vector_size (32)));
+typedef int vnx4si __attribute__((vector_size (32)));
void
foo (void)
{
- register v8si x asm ("z0");
- register v8si y asm ("z1");
+ register vnx4si x asm ("z0");
+ register vnx4si y asm ("z1");
asm volatile ("" : "=w" (y));
- x = __builtin_shuffle (y, y, (v8si) { 1, 2, 3, 4, 5, 6, 7, 8 });
+ x = __builtin_shuffle (y, y, (vnx4si) { 1, 2, 3, 4, 5, 6, 7, 8 });
asm volatile ("" :: "w" (x));
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c
index 1ba277ffa6d..f9cd8d2998e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define EXTRACT(ELT_TYPE, TYPE, INDEX) \
ELT_TYPE permute_##TYPE##_##INDEX (void) \
@@ -20,39 +20,39 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (int64_t, v4di, 0) \
- T (int64_t, v4di, 1) \
- T (int64_t, v4di, 2) \
- T (int64_t, v4di, 3) \
- T (int32_t, v8si, 0) \
- T (int32_t, v8si, 1) \
- T (int32_t, v8si, 3) \
- T (int32_t, v8si, 4) \
- T (int32_t, v8si, 7) \
- T (int16_t, v16hi, 0) \
- T (int16_t, v16hi, 1) \
- T (int16_t, v16hi, 7) \
- T (int16_t, v16hi, 8) \
- T (int16_t, v16hi, 15) \
- T (int8_t, v32qi, 0) \
- T (int8_t, v32qi, 1) \
- T (int8_t, v32qi, 15) \
- T (int8_t, v32qi, 16) \
- T (int8_t, v32qi, 31) \
- T (double, v4df, 0) \
- T (double, v4df, 1) \
- T (double, v4df, 2) \
- T (double, v4df, 3) \
- T (float, v8sf, 0) \
- T (float, v8sf, 1) \
- T (float, v8sf, 3) \
- T (float, v8sf, 4) \
- T (float, v8sf, 7) \
- T (_Float16, v16hf, 0) \
- T (_Float16, v16hf, 1) \
- T (_Float16, v16hf, 7) \
- T (_Float16, v16hf, 8) \
- T (_Float16, v16hf, 15)
+ T (int64_t, vnx2di, 0) \
+ T (int64_t, vnx2di, 1) \
+ T (int64_t, vnx2di, 2) \
+ T (int64_t, vnx2di, 3) \
+ T (int32_t, vnx4si, 0) \
+ T (int32_t, vnx4si, 1) \
+ T (int32_t, vnx4si, 3) \
+ T (int32_t, vnx4si, 4) \
+ T (int32_t, vnx4si, 7) \
+ T (int16_t, vnx8hi, 0) \
+ T (int16_t, vnx8hi, 1) \
+ T (int16_t, vnx8hi, 7) \
+ T (int16_t, vnx8hi, 8) \
+ T (int16_t, vnx8hi, 15) \
+ T (int8_t, vnx16qi, 0) \
+ T (int8_t, vnx16qi, 1) \
+ T (int8_t, vnx16qi, 15) \
+ T (int8_t, vnx16qi, 16) \
+ T (int8_t, vnx16qi, 31) \
+ T (double, vnx2df, 0) \
+ T (double, vnx2df, 1) \
+ T (double, vnx2df, 2) \
+ T (double, vnx2df, 3) \
+ T (float, vnx4sf, 0) \
+ T (float, vnx4sf, 1) \
+ T (float, vnx4sf, 3) \
+ T (float, vnx4sf, 4) \
+ T (float, vnx4sf, 7) \
+ T (_Float16, vnx8hf, 0) \
+ T (_Float16, vnx8hf, 1) \
+ T (_Float16, vnx8hf, 7) \
+ T (_Float16, vnx8hf, 8) \
+ T (_Float16, vnx8hf, 15)
TEST_ALL (EXTRACT)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c
index b163f28ef28..717546997b3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v8di __attribute__((vector_size (64)));
-typedef int32_t v16si __attribute__((vector_size (64)));
-typedef int16_t v32hi __attribute__((vector_size (64)));
-typedef int8_t v64qi __attribute__((vector_size (64)));
-typedef double v8df __attribute__((vector_size (64)));
-typedef float v16sf __attribute__((vector_size (64)));
-typedef _Float16 v32hf __attribute__((vector_size (64)));
+typedef int64_t vnx4di __attribute__((vector_size (64)));
+typedef int32_t vnx8si __attribute__((vector_size (64)));
+typedef int16_t vnx16hi __attribute__((vector_size (64)));
+typedef int8_t vnx32qi __attribute__((vector_size (64)));
+typedef double vnx4df __attribute__((vector_size (64)));
+typedef float vnx8sf __attribute__((vector_size (64)));
+typedef _Float16 vnx16hf __attribute__((vector_size (64)));
#define EXTRACT(ELT_TYPE, TYPE, INDEX) \
ELT_TYPE permute_##TYPE##_##INDEX (void) \
@@ -20,39 +20,39 @@ typedef _Float16 v32hf __attribute__((vector_size (64)));
}
#define TEST_ALL(T) \
- T (int64_t, v8di, 0) \
- T (int64_t, v8di, 1) \
- T (int64_t, v8di, 2) \
- T (int64_t, v8di, 7) \
- T (int32_t, v16si, 0) \
- T (int32_t, v16si, 1) \
- T (int32_t, v16si, 3) \
- T (int32_t, v16si, 4) \
- T (int32_t, v16si, 15) \
- T (int16_t, v32hi, 0) \
- T (int16_t, v32hi, 1) \
- T (int16_t, v32hi, 7) \
- T (int16_t, v32hi, 8) \
- T (int16_t, v32hi, 31) \
- T (int8_t, v64qi, 0) \
- T (int8_t, v64qi, 1) \
- T (int8_t, v64qi, 15) \
- T (int8_t, v64qi, 16) \
- T (int8_t, v64qi, 63) \
- T (double, v8df, 0) \
- T (double, v8df, 1) \
- T (double, v8df, 2) \
- T (double, v8df, 7) \
- T (float, v16sf, 0) \
- T (float, v16sf, 1) \
- T (float, v16sf, 3) \
- T (float, v16sf, 4) \
- T (float, v16sf, 15) \
- T (_Float16, v32hf, 0) \
- T (_Float16, v32hf, 1) \
- T (_Float16, v32hf, 7) \
- T (_Float16, v32hf, 8) \
- T (_Float16, v32hf, 31)
+ T (int64_t, vnx4di, 0) \
+ T (int64_t, vnx4di, 1) \
+ T (int64_t, vnx4di, 2) \
+ T (int64_t, vnx4di, 7) \
+ T (int32_t, vnx8si, 0) \
+ T (int32_t, vnx8si, 1) \
+ T (int32_t, vnx8si, 3) \
+ T (int32_t, vnx8si, 4) \
+ T (int32_t, vnx8si, 15) \
+ T (int16_t, vnx16hi, 0) \
+ T (int16_t, vnx16hi, 1) \
+ T (int16_t, vnx16hi, 7) \
+ T (int16_t, vnx16hi, 8) \
+ T (int16_t, vnx16hi, 31) \
+ T (int8_t, vnx32qi, 0) \
+ T (int8_t, vnx32qi, 1) \
+ T (int8_t, vnx32qi, 15) \
+ T (int8_t, vnx32qi, 16) \
+ T (int8_t, vnx32qi, 63) \
+ T (double, vnx4df, 0) \
+ T (double, vnx4df, 1) \
+ T (double, vnx4df, 2) \
+ T (double, vnx4df, 7) \
+ T (float, vnx8sf, 0) \
+ T (float, vnx8sf, 1) \
+ T (float, vnx8sf, 3) \
+ T (float, vnx8sf, 4) \
+ T (float, vnx8sf, 15) \
+ T (_Float16, vnx16hf, 0) \
+ T (_Float16, vnx16hf, 1) \
+ T (_Float16, vnx16hf, 7) \
+ T (_Float16, vnx16hf, 8) \
+ T (_Float16, vnx16hf, 31)
TEST_ALL (EXTRACT)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c
index 87ac2351768..19a22cdd7b7 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v16di __attribute__((vector_size (128)));
-typedef int32_t v32si __attribute__((vector_size (128)));
-typedef int16_t v64hi __attribute__((vector_size (128)));
-typedef int8_t v128qi __attribute__((vector_size (128)));
-typedef double v16df __attribute__((vector_size (128)));
-typedef float v32sf __attribute__((vector_size (128)));
-typedef _Float16 v64hf __attribute__((vector_size (128)));
+typedef int64_t vnx8di __attribute__((vector_size (128)));
+typedef int32_t vnx16si __attribute__((vector_size (128)));
+typedef int16_t vnx32hi __attribute__((vector_size (128)));
+typedef int8_t vnx64qi __attribute__((vector_size (128)));
+typedef double vnx8df __attribute__((vector_size (128)));
+typedef float vnx16sf __attribute__((vector_size (128)));
+typedef _Float16 vnx32hf __attribute__((vector_size (128)));
#define EXTRACT(ELT_TYPE, TYPE, INDEX) \
ELT_TYPE permute_##TYPE##_##INDEX (void) \
@@ -20,60 +20,60 @@ typedef _Float16 v64hf __attribute__((vector_size (128)));
}
#define TEST_ALL(T) \
- T (int64_t, v16di, 0) \
- T (int64_t, v16di, 1) \
- T (int64_t, v16di, 2) \
- T (int64_t, v16di, 7) \
- T (int64_t, v16di, 8) \
- T (int64_t, v16di, 9) \
- T (int64_t, v16di, 15) \
- T (int32_t, v32si, 0) \
- T (int32_t, v32si, 1) \
- T (int32_t, v32si, 3) \
- T (int32_t, v32si, 4) \
- T (int32_t, v32si, 15) \
- T (int32_t, v32si, 16) \
- T (int32_t, v32si, 21) \
- T (int32_t, v32si, 31) \
- T (int16_t, v64hi, 0) \
- T (int16_t, v64hi, 1) \
- T (int16_t, v64hi, 7) \
- T (int16_t, v64hi, 8) \
- T (int16_t, v64hi, 31) \
- T (int16_t, v64hi, 32) \
- T (int16_t, v64hi, 47) \
- T (int16_t, v64hi, 63) \
- T (int8_t, v128qi, 0) \
- T (int8_t, v128qi, 1) \
- T (int8_t, v128qi, 15) \
- T (int8_t, v128qi, 16) \
- T (int8_t, v128qi, 63) \
- T (int8_t, v128qi, 64) \
- T (int8_t, v128qi, 100) \
- T (int8_t, v128qi, 127) \
- T (double, v16df, 0) \
- T (double, v16df, 1) \
- T (double, v16df, 2) \
- T (double, v16df, 7) \
- T (double, v16df, 8) \
- T (double, v16df, 9) \
- T (double, v16df, 15) \
- T (float, v32sf, 0) \
- T (float, v32sf, 1) \
- T (float, v32sf, 3) \
- T (float, v32sf, 4) \
- T (float, v32sf, 15) \
- T (float, v32sf, 16) \
- T (float, v32sf, 21) \
- T (float, v32sf, 31) \
- T (_Float16, v64hf, 0) \
- T (_Float16, v64hf, 1) \
- T (_Float16, v64hf, 7) \
- T (_Float16, v64hf, 8) \
- T (_Float16, v64hf, 31) \
- T (_Float16, v64hf, 32) \
- T (_Float16, v64hf, 47) \
- T (_Float16, v64hf, 63)
+ T (int64_t, vnx8di, 0) \
+ T (int64_t, vnx8di, 1) \
+ T (int64_t, vnx8di, 2) \
+ T (int64_t, vnx8di, 7) \
+ T (int64_t, vnx8di, 8) \
+ T (int64_t, vnx8di, 9) \
+ T (int64_t, vnx8di, 15) \
+ T (int32_t, vnx16si, 0) \
+ T (int32_t, vnx16si, 1) \
+ T (int32_t, vnx16si, 3) \
+ T (int32_t, vnx16si, 4) \
+ T (int32_t, vnx16si, 15) \
+ T (int32_t, vnx16si, 16) \
+ T (int32_t, vnx16si, 21) \
+ T (int32_t, vnx16si, 31) \
+ T (int16_t, vnx32hi, 0) \
+ T (int16_t, vnx32hi, 1) \
+ T (int16_t, vnx32hi, 7) \
+ T (int16_t, vnx32hi, 8) \
+ T (int16_t, vnx32hi, 31) \
+ T (int16_t, vnx32hi, 32) \
+ T (int16_t, vnx32hi, 47) \
+ T (int16_t, vnx32hi, 63) \
+ T (int8_t, vnx64qi, 0) \
+ T (int8_t, vnx64qi, 1) \
+ T (int8_t, vnx64qi, 15) \
+ T (int8_t, vnx64qi, 16) \
+ T (int8_t, vnx64qi, 63) \
+ T (int8_t, vnx64qi, 64) \
+ T (int8_t, vnx64qi, 100) \
+ T (int8_t, vnx64qi, 127) \
+ T (double, vnx8df, 0) \
+ T (double, vnx8df, 1) \
+ T (double, vnx8df, 2) \
+ T (double, vnx8df, 7) \
+ T (double, vnx8df, 8) \
+ T (double, vnx8df, 9) \
+ T (double, vnx8df, 15) \
+ T (float, vnx16sf, 0) \
+ T (float, vnx16sf, 1) \
+ T (float, vnx16sf, 3) \
+ T (float, vnx16sf, 4) \
+ T (float, vnx16sf, 15) \
+ T (float, vnx16sf, 16) \
+ T (float, vnx16sf, 21) \
+ T (float, vnx16sf, 31) \
+ T (_Float16, vnx32hf, 0) \
+ T (_Float16, vnx32hf, 1) \
+ T (_Float16, vnx32hf, 7) \
+ T (_Float16, vnx32hf, 8) \
+ T (_Float16, vnx32hf, 31) \
+ T (_Float16, vnx32hf, 32) \
+ T (_Float16, vnx32hf, 47) \
+ T (_Float16, vnx32hf, 63)
TEST_ALL (EXTRACT)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c
index b193726ea0a..5934b2dfb12 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vdiv_##TYPE (TYPE *x, TYPE y) \
@@ -29,9 +29,9 @@ void vdivr_##TYPE (TYPE *x, TYPE y) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c
index 2b1dbb087bc..7b1575f9ee4 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c
index d5e4df266bf..381af4c8517 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c
index c3f2c8a5823..744d0bb7bcc 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c
index 30e1895c8d5..e1251bd9cf6 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c
index 84a95187314..238bd852117 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfnmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfnmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c
index dcc4811f1d8..f258a7454da 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfnmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfnmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c
index 7a89399f4be..4d859d4b0a1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfnmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfnmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c
index 6c95b0abc8e..2510a6f2831 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c
@@ -1,9 +1,9 @@
/* { dg-do assemble } */
/* { dg-options " -O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-typedef _Float16 v16hf __attribute__((vector_size(32)));
-typedef float v8sf __attribute__((vector_size(32)));
-typedef double v4df __attribute__((vector_size(32)));
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v16hf)
-DO_OP (v8sf)
-DO_OP (v4df)
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
/* { dg-final { scan-assembler-times {\tfnmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfnmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c
index 096a969d756..6ed5c06bd51 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c
@@ -1,72 +1,32 @@
/* { dg-do assemble } */
-/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */
-
-void gather_load64(unsigned long * restrict dst, unsigned long * restrict src, unsigned long * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load32(unsigned int * restrict dst, unsigned int * restrict src, unsigned int * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load16(unsigned short * restrict dst, unsigned short * restrict src, unsigned short * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load8(unsigned char * restrict dst, unsigned char * restrict src, unsigned char * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load64s(signed long * restrict dst, signed long * restrict src, unsigned long * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load32s(signed int * restrict dst, signed int * restrict src, unsigned int * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load16s(signed short * restrict dst, signed short * restrict src, unsigned short * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load8s(signed char * restrict dst, signed char * restrict src, unsigned char * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load_double(double * restrict dst, double * restrict src, unsigned long * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load_float(float * restrict dst, float * restrict src, unsigned int * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 3 } } */
-/* { dg-final { scan-assembler-not "ld1d\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 3\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 2\\\]" } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" 3 } } */
-/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 1\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 1\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw\\\ ]" } } */
-/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\ ]" } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+/* Invoked 18 times for each data size. */
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ INDEX##BITS *indices, int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ dest[i] += src[indices[i]]; \
+ }
+
+#define TEST_ALL(T) \
+ T (int32_t, 32) \
+ T (uint32_t, 32) \
+ T (float, 32) \
+ T (int64_t, 64) \
+ T (uint64_t, 64) \
+ T (double, 64)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_10.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_10.c
deleted file mode 100644
index b31b4508114..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_10.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/* { dg-do assemble } */
-/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */
-
-void gather_load64(unsigned long * restrict dst, unsigned long * restrict src, signed long * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load32(unsigned int * restrict dst, unsigned int * restrict src, signed int * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load16(unsigned short * restrict dst, unsigned short * restrict src, signed short * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load8(unsigned char * restrict dst, unsigned char * restrict src, signed char * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load64s(signed long * restrict dst, signed long * restrict src, signed long * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load32s(signed int * restrict dst, signed int * restrict src, signed int * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load16s(signed short * restrict dst, signed short * restrict src, signed short * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load8s(signed char * restrict dst, signed char * restrict src, signed char * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load_double(double * restrict dst, double * restrict src, signed long * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_load_float(float * restrict dst, float * restrict src, signed int * restrict indices, int count)
-{
- for (int i=0; i<count; i++)
- dst[i] = src[indices[i]];
-}
-
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 3 } } */
-/* { dg-final { scan-assembler-not "ld1d\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 3\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 2\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 3 } } */
-/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 1\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 1\\\]" } } */
-/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw\\\ ]" } } */
-/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\ ]" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_11.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_11.c
deleted file mode 100644
index d8a85396eb4..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_11.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/* { dg-do assemble } */
-/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */
-
-void
-f (double *restrict a, double *restrict b, short *c, int *d, int n)
-{
- for (int i = 0; i < n; i++)
- a[i] = b[c[i] + d[i]];
-}
-
-/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+.h,} 1 } } */
-/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+.s,} 2 } } */
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+.d,} 4 } } */
-/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+.d,} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c
index 9b62b12904e..4e348db3bf1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c
@@ -1,72 +1,10 @@
/* { dg-do assemble } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-void gather_loadu64_s16(unsigned long * restrict dst, unsigned long * restrict src,
- short int * restrict indices, short n)
-{
- for (short i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
-void gather_loadu64_u16(unsigned long * restrict dst, unsigned long * restrict src,
- unsigned short int * restrict indices, short n)
-{
- for (short i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
+#include "sve_gather_load_1.c"
-void gather_loadd_s16(double * restrict dst, double * restrict src,
- short * restrict indices, short n)
-{
- for (short i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_loadd_u16(double * restrict dst, double * restrict src,
- unsigned short * restrict indices, short n)
-{
- for (short i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_loadu64_s32(unsigned long * restrict dst, unsigned long * restrict src,
- int * restrict indices, int n)
-{
- for (int i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_loadu64_u32(unsigned long * restrict dst, unsigned long * restrict src,
- unsigned int * restrict indices, int n)
-{
- for (int i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_loadd_s32(double * restrict dst, double * restrict src,
- int * restrict indices, int n)
-{
- for (int i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
-
-void gather_loadd_u32(double * restrict dst, double * restrict src,
- unsigned int * restrict indices, int n)
-{
- for (int i=0; i<n; i++)
- dst[i] = src[indices[i]];
-}
-
-/* At present we only use unpacks for the 32/64 combinations. */
-/* { dg-final { scan-assembler-times {\tpunpklo\tp[0-9]+\.h, p[0-9]+\.b} 4 } } */
-/* { dg-final { scan-assembler-times {\tpunpkhi\tp[0-9]+\.h, p[0-9]+\.b} 4 } } */
-
-/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */
-/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */
-/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */
-/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */
-/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */
-/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */
-/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */
-/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]} 24 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c
index 0a8f802ce56..a113a0faeb9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c
@@ -1,45 +1,32 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \
- DATA_TYPE __attribute__ ((noinline)) \
- NAME (char *data, INDEX_TYPE *indices, int n) \
- { \
- DATA_TYPE sum = 0; \
- for (int i = 0; i < n; ++i) \
- sum += *(DATA_TYPE *) (data + indices[i]); \
- return sum; \
- }
+#include <stdint.h>
-#define TEST32(NAME, DATA_TYPE) \
- TEST_LOOP (NAME ## _u8, DATA_TYPE, unsigned char) \
- TEST_LOOP (NAME ## _u16, DATA_TYPE, unsigned short) \
- TEST_LOOP (NAME ## _u32, DATA_TYPE, unsigned int) \
- TEST_LOOP (NAME ## _s8, DATA_TYPE, signed char) \
- TEST_LOOP (NAME ## _s16, DATA_TYPE, signed short) \
- TEST_LOOP (NAME ## _s32, DATA_TYPE, signed int)
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
-#define TEST64(NAME, DATA_TYPE) \
- TEST_LOOP (NAME ## _s8, DATA_TYPE, signed char) \
- TEST_LOOP (NAME ## _u8, DATA_TYPE, unsigned char) \
- TEST_LOOP (NAME ## _s16, DATA_TYPE, short) \
- TEST_LOOP (NAME ## _u16, DATA_TYPE, unsigned short) \
- TEST_LOOP (NAME ## _s32, DATA_TYPE, int) \
- TEST_LOOP (NAME ## _u32, DATA_TYPE, unsigned int) \
- TEST_LOOP (NAME ## _s64, DATA_TYPE, long) \
- TEST_LOOP (NAME ## _u64, DATA_TYPE, unsigned long)
+/* Invoked 18 times for each data size. */
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ INDEX##BITS *indices, int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ dest[i] += *(DATA_TYPE *) ((char *) src + indices[i]); \
+ }
-TEST32 (f_s32, int)
-TEST32 (f_u32, unsigned int)
-TEST32 (f_f32, float)
+#define TEST_ALL(T) \
+ T (int32_t, 32) \
+ T (uint32_t, 32) \
+ T (float, 32) \
+ T (int64_t, 64) \
+ T (uint64_t, 64) \
+ T (double, 64)
-TEST64 (f_s64, long)
-TEST64 (f_u64, unsigned long)
-TEST64 (f_f64, double)
+TEST_ALL (TEST_LOOP)
-/* (4 + 2 + 1) * 3 */
-/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw\]} 21 } } */
-/* (4 + 2 + 1) * 3 */
-/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw\]} 21 } } */
-/* (8 + 8 + 4 + 4 + 2 + 2 + 1 + 1) * 3 */
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]} 90 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3_run.c
deleted file mode 100644
index baa90d5d5fc..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3_run.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */
-
-#include "sve_gather_load_3.c"
-
-extern void abort (void);
-
-#define N 57
-
-#undef TEST_LOOP
-#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \
- { \
- INDEX_TYPE indices[N]; \
- DATA_TYPE data[N * 2]; \
- for (int i = 0; i < N * 2; ++i) \
- data[i] = (i / 2) * 4 + i % 2; \
- DATA_TYPE sum = 0; \
- for (int i = 0; i < N; ++i) \
- { \
- INDEX_TYPE j = (i * 3 / 2) * sizeof (DATA_TYPE); \
- j &= (1ULL << (sizeof (INDEX_TYPE) * 8 - 1)) - 1; \
- sum += data[j / sizeof (DATA_TYPE)]; \
- indices[i] = j; \
- } \
- DATA_TYPE res = NAME ((char *) data, indices, N); \
- if (res != sum) \
- abort (); \
- }
-
-int __attribute__ ((optimize (1)))
-main ()
-{
- TEST32 (f_s32, int)
- TEST32 (f_u32, unsigned int)
- TEST32 (f_f32, float)
-
- TEST64 (f_s64, long)
- TEST64 (f_u64, unsigned long)
- TEST64 (f_f64, double)
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c
index 4d0da987d30..5382e523689 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c
@@ -1,18 +1,10 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TEST_LOOP(NAME, TYPE) \
- TYPE __attribute__ ((noinline)) \
- NAME (TYPE **indices, int n) \
- { \
- TYPE sum = 0; \
- for (int i = 0; i < n; ++i) \
- sum += *indices[i]; \
- return sum; \
- }
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
-TEST_LOOP (f_s64, long)
-TEST_LOOP (f_u64, unsigned long)
-TEST_LOOP (f_f64, double)
+#include "sve_gather_load_3.c"
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+\.d\]} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4_run.c
deleted file mode 100644
index 00d3dea6acd..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4_run.c
+++ /dev/null
@@ -1,35 +0,0 @@
-/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */
-
-#include "sve_gather_load_4.c"
-
-extern void abort (void);
-
-#define N 57
-
-#undef TEST_LOOP
-#define TEST_LOOP(NAME, TYPE) \
- { \
- TYPE *ptrs[N]; \
- TYPE data[N * 2]; \
- for (int i = 0; i < N * 2; ++i) \
- data[i] = (i / 2) * 4 + i % 2; \
- TYPE sum = 0; \
- for (int i = 0; i < N; ++i) \
- { \
- ptrs[i] = &data[i * 3 / 2]; \
- sum += *ptrs[i]; \
- } \
- TYPE res = NAME (ptrs, N); \
- if (res != sum) \
- abort (); \
- }
-
-int __attribute__ ((optimize (1)))
-main ()
-{
- TEST_LOOP (f_s64, long)
- TEST_LOOP (f_u64, unsigned long)
- TEST_LOOP (f_f64, double)
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c
index 0aaf9553a11..8e4f689243b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c
@@ -1,113 +1,23 @@
/* { dg-do assemble } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define GATHER_LOAD1(OBJTYPE,STRIDETYPE,STRIDE)\
-void gather_load1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- dst[i] = src[i * STRIDE];\
-}
-
-#define GATHER_LOAD2(OBJTYPE,STRIDETYPE)\
-void gather_load2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- dst[i] = src[i * stride];\
-}
-
-#define GATHER_LOAD3(OBJTYPE,STRIDETYPE)\
-void gather_load3s5##OBJTYPE##STRIDETYPE\
- (OBJTYPE * restrict d1, OBJTYPE * restrict d2, OBJTYPE * restrict d3,\
- OBJTYPE * restrict d4, OBJTYPE * restrict d5, OBJTYPE * restrict src,\
- STRIDETYPE count)\
-{\
- const STRIDETYPE STRIDE = 5;\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- d1[i] = src[0 + (i * STRIDE)];\
- d2[i] = src[1 + (i * STRIDE)];\
- d3[i] = src[2 + (i * STRIDE)];\
- d4[i] = src[3 + (i * STRIDE)];\
- d5[i] = src[4 + (i * STRIDE)];\
- }\
-}
-
-#define GATHER_LOAD4(OBJTYPE,STRIDETYPE,STRIDE)\
-void gather_load4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- *dst = *src;\
- dst += 1;\
- src += STRIDE;\
- }\
-}
-
-#define GATHER_LOAD5(OBJTYPE,STRIDETYPE)\
-void gather_load5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- *dst = *src;\
- dst += 1;\
- src += stride;\
- }\
-}
-
-GATHER_LOAD1 (double, long, 5)
-GATHER_LOAD1 (double, long, 8)
-GATHER_LOAD1 (double, long, 21)
-GATHER_LOAD1 (double, long, 1009)
-
-GATHER_LOAD1 (float, int, 5)
-GATHER_LOAD1 (float, int, 8)
-GATHER_LOAD1 (float, int, 21)
-GATHER_LOAD1 (float, int, 1009)
-
-GATHER_LOAD2 (double, long)
-GATHER_LOAD2 (float, int)
-
-GATHER_LOAD3 (double, long)
-GATHER_LOAD3 (float, int)
-
-GATHER_LOAD4 (double, long, 5)
-
-/* NOTE: We can't vectorize GATHER_LOAD4 (float, int, 5) because we can't prove
- that the offsets used for the gather load won't overflow. */
-
-GATHER_LOAD5 (double, long)
-GATHER_LOAD5 (float, int)
-
-/* Widened forms. */
-GATHER_LOAD1 (double, int, 5)
-GATHER_LOAD1 (double, int, 8)
-GATHER_LOAD1 (double, short, 5)
-GATHER_LOAD1 (double, short, 8)
-
-GATHER_LOAD1 (float, short, 5)
-GATHER_LOAD1 (float, short, 8)
-
-GATHER_LOAD2 (double, int)
-GATHER_LOAD2 (float, short)
-
-GATHER_LOAD4 (double, int, 5)
-GATHER_LOAD4 (float, short, 5)
-
-GATHER_LOAD5 (double, int)
-
-/* TODO: We generate abysmal code for this even though we don't use gathers. */
-/*GATHER_LOAD5 (float, short)*/
-
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 19 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 12 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */
+#include <stdint.h>
+
+/* Invoked 18 times for each data size. */
+#define TEST_LOOP(DATA_TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict *src, \
+ int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ dest[i] += *src[i]; \
+ }
+
+#define TEST_ALL(T) \
+ T (int64_t) \
+ T (uint64_t) \
+ T (double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5_run.c
deleted file mode 100644
index 7608f9b569b..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5_run.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include <unistd.h>
-
-extern void abort (void);
-extern void *memset(void *, int, size_t);
-
-#include "sve_gather_load_5.c"
-
-#define NUM_DST_ELEMS 13
-#define NUM_SRC_ELEMS(STRIDE) (NUM_DST_ELEMS * STRIDE)
-
-#define TEST_GATHER_LOAD_COMMON1(FUN,OBJTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\
- memset (real_dst, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- src[i * STRIDE] = i;\
- FUN##OBJTYPE##STRIDETYPE##STRIDE \
- (dst, src, NUM_DST_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- if (dst[i] != i)\
- abort ();\
-}
-
-#define TEST_GATHER_LOAD_COMMON2(FUN,OBJTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\
- memset (real_dst, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- src[i * STRIDE] = i;\
- FUN##OBJTYPE##STRIDETYPE \
- (dst, src, STRIDE, NUM_DST_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- if (dst[i] != i)\
- abort ();\
-}
-
-#define TEST_GATHER_LOAD1(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_GATHER_LOAD_COMMON1 (gather_load1, OBJTYPE, STRIDETYPE, STRIDE)
-
-#define TEST_GATHER_LOAD2(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_GATHER_LOAD_COMMON2 (gather_load2, OBJTYPE, STRIDETYPE, STRIDE)
-
-#define TEST_GATHER_LOAD3(OBJTYPE,STRIDETYPE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS (5)]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst1[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst2[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst3[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst4[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst5[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS (5)) * sizeof (OBJTYPE));\
- memset (real_dst1, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\
- memset (real_dst2, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\
- memset (real_dst3, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\
- memset (real_dst4, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\
- memset (real_dst5, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst1 = &real_dst1[1];\
- OBJTYPE *dst2 = &real_dst2[1];\
- OBJTYPE *dst3 = &real_dst3[1];\
- OBJTYPE *dst4 = &real_dst4[1];\
- OBJTYPE *dst5 = &real_dst5[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS (5); i++)\
- src[i] = i;\
- gather_load3s5##OBJTYPE##STRIDETYPE \
- (dst1, dst2, dst3, dst4, dst5, src, NUM_DST_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- {\
- STRIDETYPE base = i * 5;\
- if (dst1[i] != base)\
- abort ();\
- if (dst2[i] != (base + 1))\
- abort ();\
- if (dst3[i] != (base + 2))\
- abort ();\
- if (dst4[i] != (base + 3))\
- abort ();\
- if (dst5[i] != (base + 4))\
- abort ();\
- }\
-}
-
-#define TEST_GATHER_LOAD4(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_GATHER_LOAD_COMMON1 (gather_load4, OBJTYPE, STRIDETYPE, STRIDE)
-
-#define TEST_GATHER_LOAD5(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_GATHER_LOAD_COMMON2 (gather_load5, OBJTYPE, STRIDETYPE, STRIDE)
-
-int __attribute__ ((optimize (1)))
-main ()
-{
- TEST_GATHER_LOAD1 (double, long, 5);
- TEST_GATHER_LOAD1 (double, long, 8);
- TEST_GATHER_LOAD1 (double, long, 21);
-
- TEST_GATHER_LOAD1 (float, int, 5);
- TEST_GATHER_LOAD1 (float, int, 8);
- TEST_GATHER_LOAD1 (float, int, 21);
-
- TEST_GATHER_LOAD2 (double, long, 5);
- TEST_GATHER_LOAD2 (double, long, 8);
- TEST_GATHER_LOAD2 (double, long, 21);
-
- TEST_GATHER_LOAD2 (float, int, 5);
- TEST_GATHER_LOAD2 (float, int, 8);
- TEST_GATHER_LOAD2 (float, int, 21);
-
- TEST_GATHER_LOAD3 (double, long);
- TEST_GATHER_LOAD3 (float, int);
-
- TEST_GATHER_LOAD4 (double, long, 5);
-
- TEST_GATHER_LOAD5 (double, long, 5);
- TEST_GATHER_LOAD5 (float, int, 5);
-
- /* Widened forms. */
- TEST_GATHER_LOAD1 (double, int, 5)
- TEST_GATHER_LOAD1 (double, int, 8)
- TEST_GATHER_LOAD1 (double, short, 5)
- TEST_GATHER_LOAD1 (double, short, 8)
-
- TEST_GATHER_LOAD1 (float, short, 5)
- TEST_GATHER_LOAD1 (float, short, 8)
-
- TEST_GATHER_LOAD2 (double, int, 5);
- TEST_GATHER_LOAD2 (double, int, 8);
- TEST_GATHER_LOAD2 (double, int, 21);
-
- TEST_GATHER_LOAD2 (float, short, 5);
- TEST_GATHER_LOAD2 (float, short, 8);
- TEST_GATHER_LOAD2 (float, short, 21);
-
- TEST_GATHER_LOAD4 (double, int, 5);
- TEST_GATHER_LOAD4 (float, short, 5);
-
- TEST_GATHER_LOAD5 (double, int, 5);
-
- return 0;
-}
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c
index 68b0b4d59b6..745e00f1e50 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c
@@ -1,14 +1,36 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
-
-void
-foo (double *__restrict y, double *__restrict x1,
- double *__restrict x2, int m)
-{
- for (int i = 0; i < 256; ++i)
- y[i * m] = x1[i * m] + x2[i * m];
-}
-
-/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, x[0-9]+} 1 } } */
-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 1 } } */
-/* { dg-final { scan-assembler-not {\torr\tz[0-9]+} } } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -fwrapv -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#endif
+
+/* Invoked 18 times for each data size. */
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ INDEX##BITS *indices, INDEX##BITS mask, int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ dest[i] = src[(INDEX##BITS) (indices[i] | mask)]; \
+ }
+
+#define TEST_ALL(T) \
+ T (int32_t, 16) \
+ T (uint32_t, 16) \
+ T (float, 16) \
+ T (int64_t, 32) \
+ T (uint64_t, 32) \
+ T (double, 32)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c
index 788aeb08df2..8f2dfb75149 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c
@@ -1,15 +1,15 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-void
-foo (double *x, int m)
-{
- for (int i = 0; i < 256; ++i)
- x[i * m] += x[i * m];
-}
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
-/* { dg-final { scan-assembler-times {\tcbz\tw1,} 1 } } */
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, } 1 } } */
-/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, } 1 } } */
-/* { dg-final { scan-assembler-times {\tldr\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */
+#include "sve_gather_load_6.c"
+
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* Either extension type is OK here. */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, [us]xtw 2\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_8.c
deleted file mode 100644
index 0c0cf73be55..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_8.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
-
-void
-f (int *__restrict a,
- int *__restrict b,
- int *__restrict c,
- int count)
-{
- for (int i = 0; i < count; ++i)
- a[i] = (b[i * 4] + b[i * 4 + 1] + b[i * 4 + 2]
- + c[i * 5] + c[i * 5 + 3]);
-}
-
-/* There must be a final scalar iteration because b[(count - 1) * 4 + 3]
- is not accessed by the original code. */
-/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.*}} 1 } } */
-/* { dg-final { scan-assembler {\tldr\t} } } */
-/* { dg-final { scan-assembler {\tstr\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_9.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_9.c
deleted file mode 100644
index dad798c8106..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_9.c
+++ /dev/null
@@ -1,18 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
-
-void
-f (int *__restrict a,
- int *__restrict b,
- int *__restrict c,
- int count)
-{
- for (int i = 0; i < count; ++i)
- a[i] = (b[i * 4] + b[i * 4 + 1] + b[i * 4 + 3]
- + c[i * 5] + c[i * 5 + 3]);
-}
-
-/* There's no need for a scalar tail here. */
-/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.*}} 1 } } */
-/* { dg-final { scan-assembler-not {\tldr\t} } } */
-/* { dg-final { scan-assembler-not {\tstr\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1.c
new file mode 100644
index 00000000000..9c4bb37f04e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */
+
+#define SIZE (15 * 8 + 3)
+
+#define DEF_INDEX_OFFSET(SIGNED, TYPE, ITERTYPE) \
+void __attribute__ ((noinline, noclone)) \
+set_##SIGNED##_##TYPE##_##ITERTYPE (SIGNED TYPE *restrict out, \
+ SIGNED TYPE *restrict in) \
+{ \
+ SIGNED ITERTYPE i; \
+ for (i = 0; i < SIZE; i++) \
+ { \
+ out[i] = in[i]; \
+ } \
+} \
+void __attribute__ ((noinline, noclone)) \
+set_##SIGNED##_##TYPE##_##ITERTYPE##_var (SIGNED TYPE *restrict out, \
+ SIGNED TYPE *restrict in, \
+ SIGNED ITERTYPE n) \
+{ \
+ SIGNED ITERTYPE i; \
+ for (i = 0; i < n; i++) \
+ { \
+ out[i] = in[i]; \
+ } \
+}
+
+#define TEST_TYPE(T, SIGNED, TYPE) \
+ T (SIGNED, TYPE, char) \
+ T (SIGNED, TYPE, short) \
+ T (SIGNED, TYPE, int) \
+ T (SIGNED, TYPE, long)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, signed, long) \
+ TEST_TYPE (T, unsigned, long) \
+ TEST_TYPE (T, signed, int) \
+ TEST_TYPE (T, unsigned, int) \
+ TEST_TYPE (T, signed, short) \
+ TEST_TYPE (T, unsigned, short) \
+ TEST_TYPE (T, signed, char) \
+ TEST_TYPE (T, unsigned, char)
+
+TEST_ALL (DEF_INDEX_OFFSET)
+
+/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 3\\\]" 16 } } */
+/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 3\\\]" 16 } } */
+/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 2\\\]" 16 } } */
+/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 2\\\]" 16 } } */
+/* { dg-final { scan-assembler-times "ld1h\\tz\[0-9\]+.h, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 1\\\]" 16 } } */
+/* { dg-final { scan-assembler-times "st1h\\tz\[0-9\]+.h, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 1\\\]" 16 } } */
+/* { dg-final { scan-assembler-times "ld1b\\tz\[0-9\]+.b, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+\\\]" 16 } } */
+/* { dg-final { scan-assembler-times "st1b\\tz\[0-9\]+.b, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+\\\]" 16 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1_run.c
new file mode 100644
index 00000000000..276d259ac3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1_run.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */
+
+#include "sve_index_offset_1.c"
+
+#define TEST_INDEX_OFFSET(SIGNED, TYPE, ITERTYPE) \
+{ \
+ SIGNED TYPE out[SIZE + 1]; \
+ SIGNED TYPE in1[SIZE + 1]; \
+ SIGNED TYPE in2[SIZE + 1]; \
+ for (int i = 0; i < SIZE + 1; ++i) \
+ { \
+ in1[i] = (i * 4) ^ i; \
+ in2[i] = (i * 2) ^ i; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ out[SIZE] = 42; \
+ set_##SIGNED##_##TYPE##_##ITERTYPE (out, in1); \
+ if (0 != __builtin_memcmp (out, in1, SIZE * sizeof (TYPE))) \
+ __builtin_abort (); \
+ set_##SIGNED##_##TYPE##_##ITERTYPE##_var (out, in2, SIZE); \
+ if (0 != __builtin_memcmp (out, in2, SIZE * sizeof (TYPE))) \
+ __builtin_abort (); \
+ if (out[SIZE] != 42) \
+ __builtin_abort (); \
+}
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST_ALL (TEST_INDEX_OFFSET);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1.c
deleted file mode 100644
index 949449cde9f..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-std=c99 -ftree-vectorize -O2 -fno-inline -march=armv8-a+sve -msve-vector-bits=256" } */
-
-#define SIZE 15*8+3
-
-#define INDEX_OFFSET_TEST_1(SIGNED, TYPE, ITERTYPE) \
-void set_##SIGNED##_##TYPE##_##ITERTYPE (SIGNED TYPE *__restrict out, \
- SIGNED TYPE *__restrict in) \
-{ \
- SIGNED ITERTYPE i; \
- for (i = 0; i < SIZE; i++) \
- { \
- out[i] = in[i]; \
- } \
-} \
-void set_##SIGNED##_##TYPE##_##ITERTYPE##_var (SIGNED TYPE *__restrict out, \
- SIGNED TYPE *__restrict in, \
- SIGNED ITERTYPE n) \
-{\
- SIGNED ITERTYPE i;\
- for (i = 0; i < n; i++)\
- {\
- out[i] = in[i];\
- }\
-}
-
-#define INDEX_OFFSET_TEST(SIGNED, TYPE)\
- INDEX_OFFSET_TEST_1 (SIGNED, TYPE, char) \
- INDEX_OFFSET_TEST_1 (SIGNED, TYPE, short) \
- INDEX_OFFSET_TEST_1 (SIGNED, TYPE, int) \
- INDEX_OFFSET_TEST_1 (SIGNED, TYPE, long)
-
-INDEX_OFFSET_TEST (signed, long)
-INDEX_OFFSET_TEST (unsigned, long)
-INDEX_OFFSET_TEST (signed, int)
-INDEX_OFFSET_TEST (unsigned, int)
-INDEX_OFFSET_TEST (signed, short)
-INDEX_OFFSET_TEST (unsigned, short)
-INDEX_OFFSET_TEST (signed, char)
-INDEX_OFFSET_TEST (unsigned, char)
-
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 3\\\]" 16 } } */
-/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 3\\\]" 16 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 2\\\]" 16 } } */
-/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 2\\\]" 16 } } */
-/* { dg-final { scan-assembler-times "ld1h\\tz\[0-9\]+.h, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 1\\\]" 16 } } */
-/* { dg-final { scan-assembler-times "st1h\\tz\[0-9\]+.h, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 1\\\]" 16 } } */
-/* { dg-final { scan-assembler-times "ld1b\\tz\[0-9\]+.b, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+\\\]" 16 } } */
-/* { dg-final { scan-assembler-times "st1b\\tz\[0-9\]+.b, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+\\\]" 16 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1_run.c
deleted file mode 100644
index d6b2646798c..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1_run.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-std=c99 -ftree-vectorize -O2 -fno-inline -march=armv8-a+sve" } */
-/* { dg-options "-std=c99 -ftree-vectorize -O2 -fno-inline -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */
-
-#include "sve_indexoffset_1.c"
-
-#include <string.h>
-
-#define CALL_INDEX_OFFSET_TEST_1(SIGNED, TYPE, ITERTYPE)\
-{\
- SIGNED TYPE out[SIZE + 1];\
- SIGNED TYPE in1[SIZE + 1];\
- SIGNED TYPE in2[SIZE + 1];\
- for (int i = 0; i < SIZE + 1; ++i)\
- {\
- in1[i] = (i * 4) ^ i;\
- in2[i] = (i * 2) ^ i;\
- }\
- out[SIZE] = 42;\
- set_##SIGNED##_##TYPE##_##ITERTYPE (out, in1); \
- if (0 != memcmp (out, in1, SIZE * sizeof (TYPE)))\
- return 1;\
- set_##SIGNED##_##TYPE##_##ITERTYPE##_var (out, in2, SIZE); \
- if (0 != memcmp (out, in2, SIZE * sizeof (TYPE)))\
- return 1;\
- if (out[SIZE] != 42)\
- return 1;\
-}
-
-#define CALL_INDEX_OFFSET_TEST(SIGNED, TYPE)\
- CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, char) \
- CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, short) \
- CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, int) \
- CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, long)
-
-int
-main (void)
-{
- CALL_INDEX_OFFSET_TEST (signed, long)
- CALL_INDEX_OFFSET_TEST (unsigned, long)
- CALL_INDEX_OFFSET_TEST (signed, int)
- CALL_INDEX_OFFSET_TEST (unsigned, int)
- CALL_INDEX_OFFSET_TEST (signed, short)
- CALL_INDEX_OFFSET_TEST (unsigned, short)
- CALL_INDEX_OFFSET_TEST (signed, char)
- CALL_INDEX_OFFSET_TEST (unsigned, char)
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.C b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.C
deleted file mode 100644
index 4c196684364..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.C
+++ /dev/null
@@ -1,56 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-std=c++11 -O3 -fno-inline -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */
-
-#include <stdint.h>
-
-#define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE))
-
-#define DEF_LOAD_BROADCAST(TYPE)\
-void set_##TYPE (TYPE *__restrict__ a, TYPE *__restrict__ b)\
-{\
- for (int i = 0; i < NUM_ELEMS (TYPE); i++)\
- a[i] = *b;\
-}\
-
-#define DEF_LOAD_BROADCAST_IMM(TYPE,IMM,SUFFIX)\
-void set_##TYPE##SUFFIX (TYPE *__restrict__ a)\
-{\
- for (int i = 0; i < NUM_ELEMS (TYPE); i++)\
- a[i] = IMM;\
-}\
-
-/* --- VALID --- */
-
-DEF_LOAD_BROADCAST (int8_t)
-DEF_LOAD_BROADCAST (int16_t)
-DEF_LOAD_BROADCAST (int32_t)
-DEF_LOAD_BROADCAST (int64_t)
-
-DEF_LOAD_BROADCAST_IMM (int16_t, 129, imm_129)
-DEF_LOAD_BROADCAST_IMM (int32_t, 129, imm_129)
-DEF_LOAD_BROADCAST_IMM (int64_t, 129, imm_129)
-
-DEF_LOAD_BROADCAST_IMM (int16_t, -130, imm_m130)
-DEF_LOAD_BROADCAST_IMM (int32_t, -130, imm_m130)
-DEF_LOAD_BROADCAST_IMM (int64_t, -130, imm_m130)
-
-DEF_LOAD_BROADCAST_IMM (int16_t, 0x1234, imm_0x1234)
-DEF_LOAD_BROADCAST_IMM (int32_t, 0x1234, imm_0x1234)
-DEF_LOAD_BROADCAST_IMM (int64_t, 0x1234, imm_0x1234)
-
-DEF_LOAD_BROADCAST_IMM (int16_t, 0xFEDC, imm_0xFEDC)
-DEF_LOAD_BROADCAST_IMM (int32_t, 0xFEDC, imm_0xFEDC)
-DEF_LOAD_BROADCAST_IMM (int64_t, 0xFEDC, imm_0xFEDC)
-
-DEF_LOAD_BROADCAST_IMM (int32_t, 0x12345678, imm_0x12345678)
-DEF_LOAD_BROADCAST_IMM (int64_t, 0x12345678, imm_0x12345678)
-
-DEF_LOAD_BROADCAST_IMM (int32_t, 0xF2345678, imm_0xF2345678)
-DEF_LOAD_BROADCAST_IMM (int64_t, 0xF2345678, imm_0xF2345678)
-
-DEF_LOAD_BROADCAST_IMM (int64_t, int64_t (0xFEBA716B12371765), imm_FEBA716B12371765)
-
-/* { dg-final { scan-assembler-times {\tld1rb\tz[0-9]+\.b, p[0-7]/z, } 1 } } */
-/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, p[0-7]/z, } 5 } } */
-/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, p[0-7]/z, } 7 } } */
-/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, p[0-7]/z, } 8 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1_run.C
deleted file mode 100644
index 8e954f3e32c..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1_run.C
+++ /dev/null
@@ -1,64 +0,0 @@
-/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-std=c++11 -O3 -fno-inline -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */
-
-#include "sve_ld1r_1.C"
-
-#include <stdlib.h>
-#include <stdio.h>
-
-#define TEST_LOAD_BROADCAST(TYPE,IMM)\
- {\
- TYPE v[NUM_ELEMS (TYPE)];\
- TYPE temp = 0;\
- set_##TYPE (v, IMM);\
- for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\
- temp += v[i];\
- result += temp;\
- }\
-
-#define TEST_LOAD_BROADCAST_IMM(TYPE,IMM,SUFFIX)\
- {\
- TYPE v[NUM_ELEMS (TYPE)];\
- TYPE temp = 0;\
- set_##TYPE##SUFFIX (v);\
- for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\
- temp += v[i];\
- result += temp;\
- }\
-
-int main (int argc, char **argv)
-{
- long long int result = 0;
-
- TEST_LOAD_BROADCAST_IMM (int16_t, 129, imm_129)
- TEST_LOAD_BROADCAST_IMM (int32_t, 129, imm_129)
- TEST_LOAD_BROADCAST_IMM (int64_t, 129, imm_129)
-
- TEST_LOAD_BROADCAST_IMM (int16_t, -130, imm_m130)
- TEST_LOAD_BROADCAST_IMM (int32_t, -130, imm_m130)
- TEST_LOAD_BROADCAST_IMM (int64_t, -130, imm_m130)
-
- TEST_LOAD_BROADCAST_IMM (int16_t, 0x1234, imm_0x1234)
- TEST_LOAD_BROADCAST_IMM (int32_t, 0x1234, imm_0x1234)
- TEST_LOAD_BROADCAST_IMM (int64_t, 0x1234, imm_0x1234)
-
- TEST_LOAD_BROADCAST_IMM (int16_t, int16_t (0xFEDC), imm_0xFEDC)
- TEST_LOAD_BROADCAST_IMM (int32_t, 0xFEDC, imm_0xFEDC)
- TEST_LOAD_BROADCAST_IMM (int64_t, 0xFEDC, imm_0xFEDC)
-
- TEST_LOAD_BROADCAST_IMM (int32_t, 0x12345678, imm_0x12345678)
- TEST_LOAD_BROADCAST_IMM (int64_t, 0x12345678, imm_0x12345678)
-
- TEST_LOAD_BROADCAST_IMM (int32_t, 0xF2345678, imm_0xF2345678)
- TEST_LOAD_BROADCAST_IMM (int64_t, 0xF2345678, imm_0xF2345678)
-
- TEST_LOAD_BROADCAST_IMM (int64_t, int64_t (0xFEBA716B12371765),
- imm_FEBA716B12371765)
-
- if (result != int64_t (6717319005707226880))
- {
- fprintf (stderr, "result = %lld\n", result);
- abort ();
- }
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2.c b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2.c
new file mode 100644
index 00000000000..89d5f4289de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */
+
+#include <stdint.h>
+
+#define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE))
+
+#define DEF_LOAD_BROADCAST(TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ set_##TYPE (TYPE *restrict a, TYPE *restrict b) \
+ { \
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \
+ a[i] = *b; \
+ }
+
+#define DEF_LOAD_BROADCAST_IMM(TYPE, IMM, SUFFIX) \
+ void __attribute__ ((noinline, noclone)) \
+ set_##TYPE##_##SUFFIX (TYPE *a) \
+ { \
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \
+ a[i] = IMM; \
+ }
+
+#define FOR_EACH_LOAD_BROADCAST(T) \
+ T (int8_t) \
+ T (int16_t) \
+ T (int32_t) \
+ T (int64_t)
+
+#define FOR_EACH_LOAD_BROADCAST_IMM(T) \
+ T (int16_t, 129, imm_129) \
+ T (int32_t, 129, imm_129) \
+ T (int64_t, 129, imm_129) \
+ \
+ T (int16_t, -130, imm_m130) \
+ T (int32_t, -130, imm_m130) \
+ T (int64_t, -130, imm_m130) \
+ \
+ T (int16_t, 0x1234, imm_0x1234) \
+ T (int32_t, 0x1234, imm_0x1234) \
+ T (int64_t, 0x1234, imm_0x1234) \
+ \
+ T (int16_t, 0xFEDC, imm_0xFEDC) \
+ T (int32_t, 0xFEDC, imm_0xFEDC) \
+ T (int64_t, 0xFEDC, imm_0xFEDC) \
+ \
+ T (int32_t, 0x12345678, imm_0x12345678) \
+ T (int64_t, 0x12345678, imm_0x12345678) \
+ \
+ T (int32_t, 0xF2345678, imm_0xF2345678) \
+ T (int64_t, 0xF2345678, imm_0xF2345678) \
+ \
+ T (int64_t, (int64_t) 0xFEBA716B12371765, imm_FEBA716B12371765)
+
+FOR_EACH_LOAD_BROADCAST (DEF_LOAD_BROADCAST)
+FOR_EACH_LOAD_BROADCAST_IMM (DEF_LOAD_BROADCAST_IMM)
+
+/* { dg-final { scan-assembler-times {\tld1rb\tz[0-9]+\.b, p[0-7]/z, } 1 } } */
+/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, p[0-7]/z, } 5 } } */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, p[0-7]/z, } 7 } } */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, p[0-7]/z, } 8 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2_run.c
new file mode 100644
index 00000000000..510b2eca517
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2_run.c
@@ -0,0 +1,38 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */
+
+#include "sve_ld1r_2.c"
+
+#define TEST_LOAD_BROADCAST(TYPE) \
+ { \
+ TYPE v[NUM_ELEMS (TYPE)]; \
+ TYPE val = 99; \
+ set_##TYPE (v, &val); \
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \
+ { \
+ if (v[i] != (TYPE) 99) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+#define TEST_LOAD_BROADCAST_IMM(TYPE, IMM, SUFFIX) \
+ { \
+ TYPE v[NUM_ELEMS (TYPE)]; \
+ set_##TYPE##_##SUFFIX (v); \
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++ ) \
+ { \
+ if (v[i] != (TYPE) IMM) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (int argc, char **argv)
+{
+ FOR_EACH_LOAD_BROADCAST (TEST_LOAD_BROADCAST)
+ FOR_EACH_LOAD_BROADCAST_IMM (TEST_LOAD_BROADCAST_IMM)
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_1.c b/gcc/testsuite/gcc.target/aarch64/sve_live_1.c
index 2d92708fbd2..407d1277c50 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_live_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_live_1.c
@@ -1,19 +1,41 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -fno-tree-scev-cprop -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-int
-liveloop (int start, int n, int *x)
-{
- int i = start;
- int j;
+#include <stdint.h>
- for (j = 0; j < n; ++j)
- {
- i += 1;
- x[j] = i;
- }
- return i;
-}
+#define EXTRACT_LAST(TYPE) \
+ TYPE __attribute__ ((noinline, noclone)) \
+ test_##TYPE (TYPE *x, int n, TYPE value) \
+ { \
+ TYPE last; \
+ for (int j = 0; j < n; ++j) \
+ { \
+ last = x[j]; \
+ x[j] = last * value; \
+ } \
+ return last; \
+ }
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Using a fully-masked loop" 1 "vect" } } */
+#define TEST_ALL(T) \
+ T (uint8_t) \
+ T (uint16_t) \
+ T (uint32_t) \
+ T (uint64_t) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (EXTRACT_LAST)
+
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7].b, } 2 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7].h, } 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7].s, } 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7].d, } 4 } } */
+
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c
index 99f0be353aa..2a1f6df4788 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c
@@ -1,29 +1,35 @@
/* { dg-do run { target { aarch64_sve_hw } } } */
/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-extern void abort(void);
-#include <string.h>
-
#include "sve_live_1.c"
-#define MAX 62
-#define START 27
-
-int main (void)
+#define N 107
+#define OP 70
+
+#define TEST_LOOP(TYPE) \
+ { \
+ TYPE a[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + (i % 3); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ TYPE expected = a[N - 1]; \
+ TYPE res = test_##TYPE (a, N, OP); \
+ if (res != expected) \
+ __builtin_abort (); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE old = i * 2 + (i % 3); \
+ if (a[i] != (TYPE) (old * (TYPE) OP)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
{
- int a[MAX];
- int i;
-
- memset (a, 0, MAX*sizeof (int));
-
- int ret = liveloop (START, MAX, a);
-
- if (ret != 89)
- abort ();
-
- for (i=0; i<MAX; i++)
- {
- if (a[i] != i+START+1)
- abort ();
- }
-} \ No newline at end of file
+ TEST_ALL (TEST_LOOP);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_2.c b/gcc/testsuite/gcc.target/aarch64/sve_live_2.c
deleted file mode 100644
index 06d95fa8ea6..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_live_2.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -fno-tree-scev-cprop -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */
-
-int
-liveloop (int start, int n, int * __restrict__ x, char * __restrict__ y)
-{
- int i = start;
- int j;
-
- for (j = 0; j < n; ++j)
- {
- i += 1;
- x[j] = y[j] + 1;
- }
- return i;
-}
-
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Can't use a fully-masked loop because ncopies is greater than 1" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_live_2_run.c
deleted file mode 100644
index e7924e020cb..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_live_2_run.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-extern void abort(void);
-#include <string.h>
-#include <stdio.h>
-
-#include "sve_live_2.c"
-
-#define MAX 193
-#define START 84
-
-int main (void)
-{
- int a[MAX];
- char b[MAX];
- int i;
-
- memset (a, 0, MAX*sizeof (int));
- memset (b, 23, MAX*sizeof (char));
-
- int ret = liveloop (START, MAX, a, b);
-
- if (ret != 277)
- abort ();
-
- for (i=0; i<MAX; i++)
- {
- if (a[i] != 24)
- abort ();
- }
-} \ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c
index 0bc757907cf..882da83237e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c
@@ -3,10 +3,10 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__ ((vector_size (32)));
-typedef int32_t v8si __attribute__ ((vector_size (32)));
-typedef int16_t v16hi __attribute__ ((vector_size (32)));
-typedef int8_t v32qi __attribute__ ((vector_size (32)));
+typedef int64_t vnx2di __attribute__ ((vector_size (32)));
+typedef int32_t vnx4si __attribute__ ((vector_size (32)));
+typedef int16_t vnx8hi __attribute__ ((vector_size (32)));
+typedef int8_t vnx16qi __attribute__ ((vector_size (32)));
#define TEST_TYPE(TYPE) \
void sve_load_##TYPE##_neg9 (TYPE *a) \
@@ -45,10 +45,10 @@ typedef int8_t v32qi __attribute__ ((vector_size (32)));
asm volatile ("" :: "w" (x)); \
}
-TEST_TYPE (v4di)
-TEST_TYPE (v8si)
-TEST_TYPE (v16hi)
-TEST_TYPE (v32qi)
+TEST_TYPE (vnx2di)
+TEST_TYPE (vnx4si)
+TEST_TYPE (vnx8hi)
+TEST_TYPE (vnx16qi)
/* { dg-final { scan-assembler-times {\tsub\tx[0-9]+, x0, #288\n} 4 } } */
/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x0, 16\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c
index a0ced0d9be4..78cfc7a9bd8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c
@@ -1,11 +1,11 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -save-temps -msve-vector-bits=256" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -save-temps" } */
void
-f (unsigned int *restrict a, unsigned char *restrict b, int n)
+f (unsigned int *restrict a, signed char *restrict b, signed char mask, int n)
{
for (int i = 0; i < n; ++i)
- a[i] += b[i];
+ a[i] += (signed char) (b[i] | mask);
}
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c
index 00731d995c8..51732b03784 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c
@@ -1,12 +1,7 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -save-temps -msve-vector-bits=256" } */
-void
-f (unsigned int *restrict a, unsigned char *restrict b, int n)
-{
- for (int i = 0; i < n; ++i)
- a[i] += b[i];
-}
+#include "sve_load_const_offset_2.c"
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #1, mul vl\]\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c
index 9163702db1d..f1c37d388f9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c
@@ -3,65 +3,65 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__ ((vector_size (32)));
-typedef int32_t v8si __attribute__ ((vector_size (32)));
-typedef int16_t v16hi __attribute__ ((vector_size (32)));
-typedef int8_t v32qi __attribute__ ((vector_size (32)));
+typedef int64_t vnx2di __attribute__ ((vector_size (32)));
+typedef int32_t vnx4si __attribute__ ((vector_size (32)));
+typedef int16_t vnx8hi __attribute__ ((vector_size (32)));
+typedef int8_t vnx16qi __attribute__ ((vector_size (32)));
void sve_load_64_u_lsl (uint64_t *a)
{
register unsigned long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v4di *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx2di *)&a[i]));
}
void sve_load_64_s_lsl (int64_t *a)
{
register long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v4di *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx2di *)&a[i]));
}
void sve_load_32_u_lsl (uint32_t *a)
{
register unsigned long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v8si *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx4si *)&a[i]));
}
void sve_load_32_s_lsl (int32_t *a)
{
register long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v8si *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx4si *)&a[i]));
}
void sve_load_16_z_lsl (uint16_t *a)
{
register unsigned long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v16hi *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx8hi *)&a[i]));
}
void sve_load_16_s_lsl (int16_t *a)
{
register long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v16hi *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx8hi *)&a[i]));
}
void sve_load_8_z (uint8_t *a)
{
register unsigned long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v32qi *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx16qi *)&a[i]));
}
void sve_load_8_s (int8_t *a)
{
register long i asm("x1");
asm volatile ("" : "=r" (i));
- asm volatile ("" :: "w" (*(v32qi *)&a[i]));
+ asm volatile ("" :: "w" (*(vnx16qi *)&a[i]));
}
/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0, x1, lsl 3\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c
index 2d11a221e93..0f918a4155f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c
@@ -10,7 +10,10 @@
{ \
TYPE a[N]; \
for (int i = 0; i < N; ++i) \
- a[i] = i * i + i % 5; \
+ { \
+ a[i] = i * i + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
test_##TYPE##_##NAME (a, BASE, N); \
for (int i = 0; i < N; ++i) \
{ \
@@ -20,7 +23,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (TEST_LOOP)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c
index ccb20b4191f..551b451495d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c
@@ -3,10 +3,10 @@
#include <stdint.h>
-typedef int8_t v32qi __attribute__((vector_size(32)));
-typedef int16_t v16hi __attribute__((vector_size(32)));
-typedef int32_t v8si __attribute__((vector_size(32)));
-typedef int64_t v4di __attribute__((vector_size(32)));
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -23,10 +23,10 @@ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v32qi)
-DO_OP (v16hi)
-DO_OP (v8si)
-DO_OP (v4di)
+DO_OP (vnx16qi)
+DO_OP (vnx8hi)
+DO_OP (vnx4si)
+DO_OP (vnx2di)
/* { dg-final { scan-assembler-times {\tmad\tz0\.b, p[0-7]/m, z2\.b, z4\.b} 1 } } */
/* { dg-final { scan-assembler-times {\tmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c
index 4d47bce14fd..469e3c670d3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c
@@ -1,37 +1,52 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */
#include <stdint.h>
-#define INVALID_INDEX(TYPE) ((TYPE) 107)
-#define IS_VALID_INDEX(TYPE, VAL) (VAL < INVALID_INDEX (TYPE))
-#define ODD(VAL) (VAL & 0x1)
-
-/* TODO: This is a bit ugly for floating point types as it involves FP<>INT
- conversions, but I can't find another way of auto-vectorizing the code to
- make use of SVE gather instructions. */
-#define DEF_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\
-void fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE (OUTTYPE *__restrict out,\
- LOOKUPTYPE *__restrict lookup,\
- INDEXTYPE *__restrict index, int n)\
-{\
- int i;\
- for (i = 0; i < n; ++i)\
- {\
- INDEXTYPE x = index[i];\
- if (IS_VALID_INDEX (INDEXTYPE, x))\
- x = lookup[x];\
- out[i] = x;\
- }\
-}\
-
-DEF_MASK_GATHER_LOAD (int32_t, int32_t, int32_t)
-DEF_MASK_GATHER_LOAD (int64_t, int64_t, int64_t)
-DEF_MASK_GATHER_LOAD (uint32_t, uint32_t, uint32_t)
-DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint64_t)
-DEF_MASK_GATHER_LOAD (float, float, int32_t)
-DEF_MASK_GATHER_LOAD (double, double, int64_t)
-
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 3 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" 1 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 2 } } */
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, BITS) \
+ void \
+ f_##DATA_TYPE##_##CMP_TYPE \
+ (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX##BITS *indices, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ if (cmp1[i] == cmp2[i]) \
+ dest[i] += src[indices[i]]; \
+ }
+
+#define TEST32(T, DATA_TYPE) \
+ T (DATA_TYPE, int32_t, 32) \
+ T (DATA_TYPE, uint32_t, 32) \
+ T (DATA_TYPE, float, 32)
+
+#define TEST64(T, DATA_TYPE) \
+ T (DATA_TYPE, int64_t, 64) \
+ T (DATA_TYPE, uint64_t, 64) \
+ T (DATA_TYPE, double, 64)
+
+#define TEST_ALL(T) \
+ TEST32 (T, int32_t) \
+ TEST32 (T, uint32_t) \
+ TEST32 (T, float) \
+ TEST64 (T, int64_t) \
+ TEST64 (T, uint64_t) \
+ TEST64 (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1_run.c
deleted file mode 100644
index 89ccf3e35a4..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1_run.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include "sve_mask_gather_load_1.c"
-
-#include <stdio.h>
-
-extern void abort ();
-
-/* TODO: Support widening forms of gather loads and test them here. */
-
-#define NUM_ELEMS(TYPE) (32 / sizeof (TYPE))
-
-#define INDEX_VEC_INIT(INDEXTYPE)\
- INDEXTYPE index_##INDEXTYPE[NUM_ELEMS (INDEXTYPE)];\
-
-#define VEC_INIT(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\
- LOOKUPTYPE lookup_##LOOKUPTYPE[NUM_ELEMS (LOOKUPTYPE)];\
- OUTTYPE out_##OUTTYPE[NUM_ELEMS (OUTTYPE)];\
- {\
- int i;\
- for (i = 0; i < NUM_ELEMS (INDEXTYPE); i++)\
- {\
- lookup_##LOOKUPTYPE [i] = i * 2;\
- index_##INDEXTYPE [i] = ODD (i) ? i : INVALID_INDEX (INDEXTYPE);\
- }\
- }
-
-#define TEST_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\
- fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE\
- (out_##OUTTYPE, lookup_##LOOKUPTYPE, index_##INDEXTYPE,\
- NUM_ELEMS (INDEXTYPE));\
- {\
- int i;\
- for (i = 0; i < NUM_ELEMS (OUTTYPE); i++)\
- {\
- if (ODD (i) && out_##OUTTYPE[i] != (i * 2))\
- break;\
- else if (!ODD (i) && out_##OUTTYPE[i] != INVALID_INDEX (INDEXTYPE))\
- break;\
- }\
- if (i < NUM_ELEMS (OUTTYPE))\
- {\
- fprintf (stderr, "out_" # OUTTYPE "[%d] = %d\n",\
- i, (int) out_##OUTTYPE[i]);\
- abort ();\
- }\
- }
-
-int main()
-{
- INDEX_VEC_INIT (int32_t)
- INDEX_VEC_INIT (int64_t)
- INDEX_VEC_INIT (uint32_t)
- INDEX_VEC_INIT (uint64_t)
-
- VEC_INIT (int32_t, int32_t, int32_t)
- VEC_INIT (int64_t, int64_t, int64_t)
- VEC_INIT (uint32_t, uint32_t, uint32_t)
- VEC_INIT (uint64_t, uint64_t, uint64_t)
- VEC_INIT (float, float, int32_t)
- VEC_INIT (double, double, int64_t)
-
- TEST_MASK_GATHER_LOAD (int32_t, int32_t, int32_t)
- TEST_MASK_GATHER_LOAD (int64_t, int64_t, int64_t)
- TEST_MASK_GATHER_LOAD (uint32_t, uint32_t, uint32_t)
- TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint64_t)
- TEST_MASK_GATHER_LOAD (float, float, int32_t)
- TEST_MASK_GATHER_LOAD (double, double, int64_t)
-
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c
index 48db58ffefd..8dd48462b51 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c
@@ -1,60 +1,19 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */
-#include <stdint.h>
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
-#define NUM_ELEMS(TYPE) (4 * (32 / sizeof (TYPE)))
-#define INVALID_INDEX(TYPE) ((TYPE) 107)
-#define IS_VALID_INDEX(TYPE, VAL) (VAL < INVALID_INDEX (TYPE))
+#include "sve_mask_gather_load_1.c"
-/* TODO: This is a bit ugly for floating point types as it involves FP<>INT
- conversions, but I can't find another way of auto-vectorizing the code to
- make use of SVE gather instructions. */
-#define DEF_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\
-void fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE (OUTTYPE *__restrict out,\
- LOOKUPTYPE *__restrict lookup,\
- INDEXTYPE *__restrict index, INDEXTYPE n)\
-{\
- INDEXTYPE i;\
- for (i = 0; i < n; ++i)\
- {\
- LOOKUPTYPE x = index[i];\
- if (IS_VALID_INDEX (LOOKUPTYPE, x))\
- x = lookup[x];\
- out[i] = x;\
- }\
-}\
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
-DEF_MASK_GATHER_LOAD (int32_t, int32_t, int8_t)
-DEF_MASK_GATHER_LOAD (int64_t, int64_t, int8_t)
-DEF_MASK_GATHER_LOAD (int32_t, int32_t, int16_t)
-DEF_MASK_GATHER_LOAD (int64_t, int64_t, int16_t)
-DEF_MASK_GATHER_LOAD (int64_t, int64_t, int32_t)
-DEF_MASK_GATHER_LOAD (uint32_t, uint32_t, uint8_t)
-DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint8_t)
-DEF_MASK_GATHER_LOAD (uint32_t, uint32_t, uint16_t)
-DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint16_t)
-DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint32_t)
-
-/* At present we only use predicate unpacks when the index type is
- half the size of the result type. */
-/* { dg-final { scan-assembler-times "\tpunpklo\\tp\[0-9\]+\.h, p\[0-9\]+\.b" 4 } } */
-/* { dg-final { scan-assembler-times "\tpunpkhi\\tp\[0-9\]+\.h, p\[0-9\]+\.b" 4 } } */
-
-/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */
-/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */
-/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */
-/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */
-/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */
-/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */
-
-/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */
-/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */
-/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */
-/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */
-/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */
-/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */
-
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" 6 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 6 } } */
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 28 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2_run.c
deleted file mode 100644
index c5280546206..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2_run.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include "sve_mask_gather_load_2.c"
-
-#include <stdio.h>
-
-extern void abort ();
-
-#define ODD(VAL) (VAL & 0x1)
-#define INDEX_VEC_INIT(INDEXTYPE)\
- INDEXTYPE index_##INDEXTYPE[NUM_ELEMS (int8_t)];\
-
-#define VEC_INIT(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\
- LOOKUPTYPE lookup_##LOOKUPTYPE[NUM_ELEMS (OUTTYPE)];\
- OUTTYPE out_##OUTTYPE[NUM_ELEMS (OUTTYPE)];\
- {\
- int i;\
- for (i = 0; i < NUM_ELEMS (OUTTYPE); i++)\
- {\
- lookup_##LOOKUPTYPE [i] = i * 2;\
- index_##INDEXTYPE [i] = ODD (i) ? i : INVALID_INDEX (INDEXTYPE);\
- }\
- }
-
-#define TEST_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\
- fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE\
- (out_##OUTTYPE, lookup_##LOOKUPTYPE, index_##INDEXTYPE,\
- NUM_ELEMS (OUTTYPE));\
- {\
- int i;\
- for (i = 0; i < NUM_ELEMS (OUTTYPE); i++)\
- {\
- if (ODD (i) && out_##OUTTYPE[i] != (i * 2))\
- break;\
- else if (!ODD (i) && out_##OUTTYPE[i] != INVALID_INDEX (OUTTYPE))\
- break;\
- }\
- if (i < NUM_ELEMS (OUTTYPE))\
- {\
- fprintf (stderr, "out_" # OUTTYPE "[%d] = %d\n",\
- i, (int) out_##OUTTYPE[i]);\
- abort ();\
- }\
- }
-
-int main()
-{
- INDEX_VEC_INIT (int8_t)
- INDEX_VEC_INIT (int16_t)
- INDEX_VEC_INIT (int32_t)
- INDEX_VEC_INIT (uint8_t)
- INDEX_VEC_INIT (uint16_t)
- INDEX_VEC_INIT (uint32_t)
-
- {
- VEC_INIT (int32_t, int32_t, int8_t)
- TEST_MASK_GATHER_LOAD (int32_t, int32_t, int8_t)
- }
- {
- VEC_INIT (int64_t, int64_t, int8_t)
- TEST_MASK_GATHER_LOAD (int64_t, int64_t, int8_t)
- }
- {
- VEC_INIT (int32_t, int32_t, int16_t)
- TEST_MASK_GATHER_LOAD (int32_t, int32_t, int16_t)
- }
- {
- VEC_INIT (int64_t, int64_t, int16_t)
- TEST_MASK_GATHER_LOAD (int64_t, int64_t, int16_t)
- }
- {
- VEC_INIT (int64_t, int64_t, int32_t)
- TEST_MASK_GATHER_LOAD (int64_t, int64_t, int32_t)
- }
- {
- VEC_INIT (uint32_t, uint32_t, uint8_t)
- TEST_MASK_GATHER_LOAD (uint32_t, uint32_t, uint8_t)
- }
- {
- VEC_INIT (uint64_t, uint64_t, uint8_t)
- TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint8_t)
- }
- {
- VEC_INIT (uint32_t, uint32_t, uint16_t)
- TEST_MASK_GATHER_LOAD (uint32_t, uint32_t, uint16_t)
- }
- {
- VEC_INIT (uint64_t, uint64_t, uint16_t)
- TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint16_t)
- }
- {
- VEC_INIT (uint64_t, uint64_t, uint32_t)
- TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint32_t)
- }
-
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c
index 2965760e058..b370f532f2c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c
@@ -1,29 +1,52 @@
/* { dg-do assemble } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */
-#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \
- DATA_TYPE __attribute__ ((noinline)) \
- NAME (char *data, INDEX_TYPE *indices, signed char n) \
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, BITS) \
+ void \
+ f_##DATA_TYPE##_##CMP_TYPE \
+ (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX##BITS *indices, int n) \
{ \
- DATA_TYPE sum = 0; \
- for (signed char i = 0; i < n; ++i) \
- { \
- INDEX_TYPE index = indices[i]; \
- sum += (index & 16 ? *(DATA_TYPE *) (data + index) : 1); \
- } \
- return sum; \
+ for (int i = 0; i < n; ++i) \
+ if (cmp1[i] == cmp2[i]) \
+ dest[i] += *(DATA_TYPE *) ((char *) src + indices[i]); \
}
-TEST_LOOP (f_s32, int, unsigned int)
-TEST_LOOP (f_u32, unsigned int, unsigned int)
-TEST_LOOP (f_f32, float, unsigned int)
+#define TEST32(T, DATA_TYPE) \
+ T (DATA_TYPE, int32_t, 32) \
+ T (DATA_TYPE, uint32_t, 32) \
+ T (DATA_TYPE, float, 32)
+
+#define TEST64(T, DATA_TYPE) \
+ T (DATA_TYPE, int64_t, 64) \
+ T (DATA_TYPE, uint64_t, 64) \
+ T (DATA_TYPE, double, 64)
+
+#define TEST_ALL(T) \
+ TEST32 (T, int32_t) \
+ TEST32 (T, uint32_t) \
+ TEST32 (T, float) \
+ TEST64 (T, int64_t) \
+ TEST64 (T, uint64_t) \
+ TEST64 (T, double)
+
+TEST_ALL (TEST_LOOP)
-TEST_LOOP (f_s64_s64, long, long)
-TEST_LOOP (f_s64_u64, long, unsigned long)
-TEST_LOOP (f_u64_s64, unsigned long, long)
-TEST_LOOP (f_u64_u64, unsigned long, unsigned long)
-TEST_LOOP (f_f64_s64, double, long)
-TEST_LOOP (f_f64_u64, double, unsigned long)
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
-/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw\]} 3 } } */
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3_run.c
deleted file mode 100644
index aa73c81ffca..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3_run.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */
-
-#include "sve_mask_gather_load_3.c"
-
-extern void abort (void);
-
-#define N 57
-
-#undef TEST_LOOP
-#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \
- { \
- INDEX_TYPE indices[N]; \
- DATA_TYPE data[N * 2]; \
- for (int i = 0; i < N * 2; ++i) \
- data[i] = (i / 2) * 4 + i % 2; \
- DATA_TYPE sum = 0; \
- for (int i = 0; i < N; ++i) \
- { \
- INDEX_TYPE j = (i * 3 / 2) * sizeof (DATA_TYPE); \
- j &= (1ULL << (sizeof (INDEX_TYPE) * 8 - 1)) - 1; \
- if (j & 16) \
- sum += data[j / sizeof (DATA_TYPE)]; \
- else \
- sum += 1; \
- indices[i] = j; \
- } \
- DATA_TYPE res = NAME ((char *) data, indices, N); \
- if (res != sum) \
- abort (); \
- }
-
-int __attribute__ ((optimize (1)))
-main ()
-{
- TEST_LOOP (f_s32, int, unsigned int)
- TEST_LOOP (f_u32, unsigned int, unsigned int)
- TEST_LOOP (f_f32, float, unsigned int)
-
- TEST_LOOP (f_s64_s64, long, long)
- TEST_LOOP (f_s64_u64, long, unsigned long)
- TEST_LOOP (f_u64_s64, unsigned long, long)
- TEST_LOOP (f_u64_u64, unsigned long, unsigned long)
- TEST_LOOP (f_f64_s64, double, long)
- TEST_LOOP (f_f64_u64, double, unsigned long)
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c
index 38bb5275e59..0464e9343a3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c
@@ -1,18 +1,19 @@
/* { dg-do assemble } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */
-#define TEST_LOOP(NAME, TYPE) \
- TYPE __attribute__ ((noinline)) \
- NAME (TYPE **indices, long *mask, int n) \
- { \
- TYPE sum = 0; \
- for (int i = 0; i < n; ++i) \
- sum += mask[i] ? *indices[i] : 1; \
- return sum; \
- }
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
-TEST_LOOP (f_s64, long)
-TEST_LOOP (f_u64, unsigned long)
-TEST_LOOP (f_f64, double)
+#include "sve_mask_gather_load_3.c"
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+\.d\]} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4_run.c
deleted file mode 100644
index 8a6320a002c..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4_run.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */
-
-#include "sve_mask_gather_load_4.c"
-
-extern void abort (void);
-
-#define N 57
-
-#undef TEST_LOOP
-#define TEST_LOOP(NAME, TYPE) \
- { \
- TYPE *ptrs[N]; \
- TYPE data[N * 2]; \
- long mask[N]; \
- for (int i = 0; i < N * 2; ++i) \
- data[i] = (i / 2) * 4 + i % 2; \
- TYPE sum = 0; \
- for (int i = 0; i < N; ++i) \
- { \
- mask[i] = i & 15; \
- ptrs[i] = &data[i * 3 / 2]; \
- sum += mask[i] ? *ptrs[i] : 1; \
- } \
- TYPE res = NAME (ptrs, mask, N); \
- if (res != sum) \
- abort (); \
- }
-
-int __attribute__ ((optimize (1)))
-main ()
-{
- TEST_LOOP (f_s64, long)
- TEST_LOOP (f_u64, unsigned long)
- TEST_LOOP (f_f64, double)
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c
index abb38e40f72..831d594654a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c
@@ -1,120 +1,38 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-
-#define MASK_GATHER_LOAD1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-void mgather_load1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- if (masks[i * STRIDE])\
- dst[i] = src[i * STRIDE];\
-}
-
-#define MASK_GATHER_LOAD2(OBJTYPE,MASKTYPE,STRIDETYPE)\
-void mgather_load2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- if (masks[i * stride])\
- dst[i] = src[i * stride];\
-}
-
-#define MASK_GATHER_LOAD3(OBJTYPE,MASKTYPE,STRIDETYPE)\
-void mgather_load3s5##OBJTYPE##STRIDETYPE\
- (OBJTYPE * restrict d1, OBJTYPE * restrict d2, OBJTYPE * restrict d3,\
- OBJTYPE * restrict d4, OBJTYPE * restrict d5, OBJTYPE * restrict src,\
- MASKTYPE * restrict masks, STRIDETYPE count)\
-{\
- const STRIDETYPE STRIDE = 5;\
- for (STRIDETYPE i=0; i<count; i++)\
- if (masks[i * STRIDE])\
- {\
- d1[i] = src[0 + (i * STRIDE)];\
- d2[i] = src[1 + (i * STRIDE)];\
- d3[i] = src[2 + (i * STRIDE)];\
- d4[i] = src[3 + (i * STRIDE)];\
- d5[i] = src[4 + (i * STRIDE)];\
- }\
-}
-
-#define MASK_GATHER_LOAD4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-void mgather_load4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- if (masks[i * STRIDE])\
- *dst = *src;\
- dst += 1;\
- src += STRIDE;\
- }\
-}
-
-#define MASK_GATHER_LOAD5(OBJTYPE,MASKTYPE,STRIDETYPE)\
-void mgather_load5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- if (masks[i * stride])\
- *dst = *src;\
- dst += 1;\
- src += stride;\
- }\
-}
-
-MASK_GATHER_LOAD1 (double, long, long, 5)
-MASK_GATHER_LOAD1 (double, long, long, 8)
-MASK_GATHER_LOAD1 (double, long, long, 21)
-MASK_GATHER_LOAD1 (double, long, long, 1009)
-
-MASK_GATHER_LOAD1 (float, int, int, 5)
-MASK_GATHER_LOAD1 (float, int, int, 8)
-MASK_GATHER_LOAD1 (float, int, int, 21)
-MASK_GATHER_LOAD1 (float, int, int, 1009)
-
-MASK_GATHER_LOAD2 (double, long, long)
-MASK_GATHER_LOAD2 (float, int, int)
-
-MASK_GATHER_LOAD3 (double, long, long)
-MASK_GATHER_LOAD3 (float, int, int)
-
-MASK_GATHER_LOAD4 (double, long, long, 5)
-
-/* NOTE: We can't vectorize MASK_GATHER_LOAD4 (float, int, int, 5) because we
- can't prove that the offsets used for the gather load won't overflow. */
-
-MASK_GATHER_LOAD5 (double, long, long)
-MASK_GATHER_LOAD5 (float, int, int)
-
-/* Widened forms. */
-MASK_GATHER_LOAD1 (double, long, int, 5)
-MASK_GATHER_LOAD1 (double, long, int, 8)
-MASK_GATHER_LOAD1 (double, long, short, 5)
-MASK_GATHER_LOAD1 (double, long, short, 8)
-
-MASK_GATHER_LOAD1 (float, int, short, 5)
-MASK_GATHER_LOAD1 (float, int, short, 8)
-
-MASK_GATHER_LOAD2 (double, long, int)
-MASK_GATHER_LOAD2 (float, int, short)
-
-MASK_GATHER_LOAD4 (double, long, int, 5)
-MASK_GATHER_LOAD4 (float, int, short, 5)
-
-MASK_GATHER_LOAD5 (double, long, int)
-
-/* Loads including masks. */
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 34 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 20 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 6 } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE) \
+ void \
+ f_##DATA_TYPE##_##CMP_TYPE \
+ (DATA_TYPE *restrict dest, DATA_TYPE *restrict *restrict src, \
+ CMP_TYPE *cmp1, CMP_TYPE *cmp2, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ if (cmp1[i] == cmp2[i]) \
+ dest[i] += *src[i]; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, int64_t) \
+ T (DATA_TYPE, uint64_t) \
+ T (DATA_TYPE, double)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+\.d\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5_run.c
deleted file mode 100644
index 445c47f23ac..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5_run.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include <unistd.h>
-
-extern void abort (void);
-extern void *memset(void *, int, size_t);
-
-#include "sve_mask_gather_load_5.c"
-
-#define NUM_DST_ELEMS 13
-#define NUM_SRC_ELEMS(STRIDE) (NUM_DST_ELEMS * STRIDE)
-
-#define MASKED_VALUE 3
-
-#define TEST_MASK_GATHER_LOAD_COMMON1(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- MASKTYPE masks[NUM_SRC_ELEMS (STRIDE)];\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\
- memset (masks, 0, (NUM_SRC_ELEMS (STRIDE)) * sizeof (MASKTYPE));\
- real_dst[0] = 0;\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- {\
- src[i * STRIDE] = i;\
- dst[i] = MASKED_VALUE;\
- masks[i * STRIDE] = i & 0x1;\
- }\
- FUN##OBJTYPE##STRIDETYPE##STRIDE \
- (dst, src, masks, NUM_DST_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- if (dst[i] != (masks[i * STRIDE] ? i : MASKED_VALUE))\
- abort ();\
-}
-
-#define TEST_MASK_GATHER_LOAD_COMMON2(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- MASKTYPE masks[NUM_SRC_ELEMS (STRIDE)];\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\
- memset (masks, 0, (NUM_SRC_ELEMS (STRIDE)) * sizeof (MASKTYPE));\
- real_dst[0] = 0;\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- {\
- src[i * STRIDE] = i;\
- dst[i] = MASKED_VALUE;\
- masks[i * STRIDE] = i & 0x1;\
- }\
- FUN##OBJTYPE##STRIDETYPE \
- (dst, src, masks, STRIDE, NUM_DST_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- if (dst[i] != (masks[i * STRIDE] ? i : MASKED_VALUE))\
- abort ();\
-}
-
-#define TEST_MASK_GATHER_LOAD1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_GATHER_LOAD_COMMON1 (mgather_load1, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-#define TEST_MASK_GATHER_LOAD2(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_GATHER_LOAD_COMMON2 (mgather_load2, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-#define TEST_MASK_GATHER_LOAD3(OBJTYPE,MASKTYPE,STRIDETYPE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS (5)]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst1[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst2[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst3[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst4[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst5[1 + NUM_DST_ELEMS]\
- __attribute__((aligned (32)));\
- MASKTYPE masks[NUM_SRC_ELEMS (5)];\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS (5)) * sizeof (OBJTYPE));\
- memset (masks, 0, (NUM_SRC_ELEMS (5)) * sizeof (MASKTYPE));\
- real_dst1[0] = real_dst2[0] = real_dst3[0] = real_dst4[0] = real_dst5[0] = 0;\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst1 = &real_dst1[1];\
- OBJTYPE *dst2 = &real_dst2[1];\
- OBJTYPE *dst3 = &real_dst3[1];\
- OBJTYPE *dst4 = &real_dst4[1];\
- OBJTYPE *dst5 = &real_dst5[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS (5); i++)\
- src[i] = i;\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- {\
- dst1[i] = MASKED_VALUE;\
- dst2[i] = MASKED_VALUE;\
- dst3[i] = MASKED_VALUE;\
- dst4[i] = MASKED_VALUE;\
- dst5[i] = MASKED_VALUE;\
- masks[i * 5] = i & 0x1;\
- }\
- mgather_load3s5##OBJTYPE##STRIDETYPE \
- (dst1, dst2, dst3, dst4, dst5, src, masks, NUM_DST_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\
- {\
- STRIDETYPE base = i * 5;\
- if (dst1[i] != (masks[base] ? base : MASKED_VALUE))\
- abort ();\
- if (dst2[i] != (masks[base] ? (base + 1) : MASKED_VALUE))\
- abort ();\
- if (dst3[i] != (masks[base] ? (base + 2) : MASKED_VALUE))\
- abort ();\
- if (dst4[i] != (masks[base] ? (base + 3) : MASKED_VALUE))\
- abort ();\
- if (dst5[i] != (masks[base] ? (base + 4) : MASKED_VALUE))\
- abort ();\
- }\
-}
-
-#define TEST_MASK_GATHER_LOAD4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_GATHER_LOAD_COMMON1 (mgather_load4, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-#define TEST_MASK_GATHER_LOAD5(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_GATHER_LOAD_COMMON2 (mgather_load5, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-int main ()
-{
- TEST_MASK_GATHER_LOAD1 (double, long, long, 5);
- TEST_MASK_GATHER_LOAD1 (double, long, long, 8);
- TEST_MASK_GATHER_LOAD1 (double, long, long, 21);
-
- TEST_MASK_GATHER_LOAD1 (float, int, int, 5);
- TEST_MASK_GATHER_LOAD1 (float, int, int, 8);
- TEST_MASK_GATHER_LOAD1 (float, int, int, 21);
-
- TEST_MASK_GATHER_LOAD2 (double, long, long, 5);
- TEST_MASK_GATHER_LOAD2 (double, long, long, 8);
- TEST_MASK_GATHER_LOAD2 (double, long, long, 21);
-
- TEST_MASK_GATHER_LOAD3 (double, long, long);
- TEST_MASK_GATHER_LOAD3 (float, int, int);
-
- TEST_MASK_GATHER_LOAD4 (double, long, long, 5);
-
- TEST_MASK_GATHER_LOAD5 (double, long, long, 5);
- TEST_MASK_GATHER_LOAD5 (float, int, int, 5);
-
- /* Widened forms. */
- TEST_MASK_GATHER_LOAD1 (double, long, int, 5)
- TEST_MASK_GATHER_LOAD1 (double, long, int, 8)
- TEST_MASK_GATHER_LOAD1 (double, long, short, 5)
- TEST_MASK_GATHER_LOAD1 (double, long, short, 8)
-
- TEST_MASK_GATHER_LOAD1 (float, int, short, 5)
- TEST_MASK_GATHER_LOAD1 (float, int, short, 8)
-
- TEST_MASK_GATHER_LOAD2 (double, long, int, 5);
- TEST_MASK_GATHER_LOAD2 (double, long, int, 8);
- TEST_MASK_GATHER_LOAD2 (double, long, int, 21);
-
- TEST_MASK_GATHER_LOAD4 (double, long, int, 5);
- TEST_MASK_GATHER_LOAD4 (float, int, short, 5);
-
- TEST_MASK_GATHER_LOAD5 (double, long, int, 5);
-
- return 0;
-}
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_6.c
new file mode 100644
index 00000000000..64eb0c46278
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_6.c
@@ -0,0 +1,38 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, INDEX_TYPE) \
+ void \
+ f_##DATA_TYPE##_##CMP_TYPE##_##INDEX_TYPE \
+ (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX_TYPE *indices, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ if (cmp1[i] == cmp2[i]) \
+ dest[i] += src[indices[i]]; \
+ }
+
+#define TEST32(T, DATA_TYPE) \
+ T (DATA_TYPE, int64_t, int32_t) \
+ T (DATA_TYPE, uint64_t, int32_t) \
+ T (DATA_TYPE, double, int32_t) \
+ T (DATA_TYPE, int64_t, uint32_t) \
+ T (DATA_TYPE, uint64_t, uint32_t) \
+ T (DATA_TYPE, double, uint32_t)
+
+#define TEST_ALL(T) \
+ TEST32 (T, int32_t) \
+ TEST32 (T, uint32_t) \
+ TEST32 (T, float)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 72 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 24 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 18 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_7.c
new file mode 100644
index 00000000000..4a8b38e13af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_7.c
@@ -0,0 +1,53 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, INDEX_TYPE) \
+ void \
+ f_##DATA_TYPE##_##CMP_TYPE##_##INDEX_TYPE \
+ (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX_TYPE *indices, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ if (cmp1[i] == cmp2[i]) \
+ dest[i] += src[indices[i]]; \
+ }
+
+#define TEST32(T, DATA_TYPE) \
+ T (DATA_TYPE, int16_t, int32_t) \
+ T (DATA_TYPE, uint16_t, int32_t) \
+ T (DATA_TYPE, _Float16, int32_t) \
+ T (DATA_TYPE, int16_t, uint32_t) \
+ T (DATA_TYPE, uint16_t, uint32_t) \
+ T (DATA_TYPE, _Float16, uint32_t)
+
+#define TEST64(T, DATA_TYPE) \
+ T (DATA_TYPE, int32_t, int64_t) \
+ T (DATA_TYPE, uint32_t, int64_t) \
+ T (DATA_TYPE, float, int64_t) \
+ T (DATA_TYPE, int32_t, uint64_t) \
+ T (DATA_TYPE, uint32_t, uint64_t) \
+ T (DATA_TYPE, float, uint64_t)
+
+#define TEST_ALL(T) \
+ TEST32 (T, int32_t) \
+ TEST32 (T, uint32_t) \
+ TEST32 (T, float) \
+ TEST64 (T, int64_t) \
+ TEST64 (T, uint64_t) \
+ TEST64 (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 1\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 18 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 18 } } */
+
+/* Also used for the TEST32 indices. */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 72 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 36 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c
index a7f2995a6cd..562bdb720de 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c
@@ -1,124 +1,51 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-
-#define MASK_SCATTER_STORE1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-void mscatter_store1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- if (masks[i * STRIDE])\
- dst[i * STRIDE] = src[i];\
-}
-
-#define MASK_SCATTER_STORE2(OBJTYPE,MASKTYPE,STRIDETYPE)\
-void mscatter_store2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- if (masks[i * stride])\
- dst[i * stride] = src[i];\
-}
-
-#define MASK_SCATTER_STORE3(OBJTYPE,MASKTYPE,STRIDETYPE)\
-void mscatter_store3s5##OBJTYPE##STRIDETYPE\
- (OBJTYPE * restrict dst, OBJTYPE * restrict s1, OBJTYPE * restrict s2,\
- OBJTYPE * restrict s3, OBJTYPE * restrict s4, OBJTYPE * restrict s5,\
- MASKTYPE * restrict masks, STRIDETYPE count)\
-{\
- const STRIDETYPE STRIDE = 5;\
- for (STRIDETYPE i=0; i<count; i++)\
- if (masks[i * STRIDE])\
- {\
- dst[0 + (i * STRIDE)] = s1[i];\
- dst[1 + (i * STRIDE)] = s2[i];\
- dst[2 + (i * STRIDE)] = s3[i];\
- dst[3 + (i * STRIDE)] = s4[i];\
- dst[4 + (i * STRIDE)] = s5[i];\
- }\
-}
-
-#define MASK_SCATTER_STORE4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-void mscatter_store4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- if (masks[i * STRIDE])\
- *dst = *src;\
- dst += STRIDE;\
- src += 1;\
- }\
-}
-
-#define MASK_SCATTER_STORE5(OBJTYPE,MASKTYPE,STRIDETYPE)\
-void mscatter_store5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- MASKTYPE * restrict masks,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- if (masks[i * stride])\
- *dst = *src;\
- dst += stride;\
- src += 1;\
- }\
-}
-
-MASK_SCATTER_STORE1 (double, long, long, 5)
-MASK_SCATTER_STORE1 (double, long, long, 8)
-MASK_SCATTER_STORE1 (double, long, long, 21)
-MASK_SCATTER_STORE1 (double, long, long, 1009)
-
-MASK_SCATTER_STORE1 (float, int, int, 5)
-
-MASK_SCATTER_STORE1 (float, int, int, 8)
-MASK_SCATTER_STORE1 (float, int, int, 21)
-MASK_SCATTER_STORE1 (float, int, int, 1009)
-
-MASK_SCATTER_STORE2 (double, long, long)
-MASK_SCATTER_STORE2 (float, int, int)
-
-MASK_SCATTER_STORE3 (double, long, long)
-MASK_SCATTER_STORE3 (float, int, int)
-
-MASK_SCATTER_STORE4 (double, long, long, 5)
-/* NOTE: We can't vectorize MASK_SCATTER_STORE4 (float, int, int, 3) because we
- can't prove that the offsets used for the gather load won't overflow. */
-
-MASK_SCATTER_STORE5 (double, long, long)
-MASK_SCATTER_STORE5 (float, int, int)
-
-/* Widened forms. */
-MASK_SCATTER_STORE1 (double, long, int, 5)
-MASK_SCATTER_STORE1 (double, long, int, 8)
-MASK_SCATTER_STORE1 (double, long, short, 5)
-MASK_SCATTER_STORE1 (double, long, short, 8)
-
-MASK_SCATTER_STORE1 (float, int, short, 5)
-MASK_SCATTER_STORE1 (float, int, short, 8)
-
-MASK_SCATTER_STORE2 (double, long, int)
-MASK_SCATTER_STORE2 (float, int, short)
-
-MASK_SCATTER_STORE4 (double, long, int, 5)
-MASK_SCATTER_STORE4 (float, int, short, 5)
-
-MASK_SCATTER_STORE5 (double, long, int)
-
-/* Gather loads are for the masks. */
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 15 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 8 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */
-
-/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 19 } } */
-/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 12 } } */
-/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, BITS) \
+ void \
+ f_##DATA_TYPE##_##CMP_TYPE \
+ (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ CMP_TYPE *restrict cmp1, CMP_TYPE *restrict cmp2, \
+ INDEX##BITS *restrict indices, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ if (cmp1[i] == cmp2[i]) \
+ dest[indices[i]] = src[i] + 1; \
+ }
+
+#define TEST32(T, DATA_TYPE) \
+ T (DATA_TYPE, int32_t, 32) \
+ T (DATA_TYPE, uint32_t, 32) \
+ T (DATA_TYPE, float, 32)
+
+#define TEST64(T, DATA_TYPE) \
+ T (DATA_TYPE, int64_t, 64) \
+ T (DATA_TYPE, uint64_t, 64) \
+ T (DATA_TYPE, double, 64)
+
+#define TEST_ALL(T) \
+ TEST32 (T, int32_t) \
+ TEST32 (T, uint32_t) \
+ TEST32 (T, float) \
+ TEST64 (T, int64_t) \
+ TEST64 (T, uint64_t) \
+ TEST64 (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1_run.c
deleted file mode 100644
index 3222d420763..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1_run.c
+++ /dev/null
@@ -1,186 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include <unistd.h>
-#include <stdio.h>
-
-extern void abort (void);
-extern void *memset(void *, int, size_t);
-
-#include "sve_mask_scatter_store_1.c"
-
-#define NUM_SRC_ELEMS 13
-#define NUM_DST_ELEMS(STRIDE) (NUM_SRC_ELEMS * STRIDE)
-
-#define MASKED_VALUE 3
-
-#define TEST_MASK_SCATTER_STORE_COMMON1(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- MASKTYPE masks[NUM_DST_ELEMS (STRIDE)];\
- memset (masks, 0, (NUM_DST_ELEMS (STRIDE)) * sizeof (MASKTYPE));\
- real_src[0] = 0;\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- {\
- src[i] = i;\
- masks[i * STRIDE] = i & 0x1;\
- }\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (STRIDE); i++)\
- dst[i] = MASKED_VALUE;\
- FUN##OBJTYPE##STRIDETYPE##STRIDE (dst, src, masks, NUM_SRC_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- if (dst[i * STRIDE] != (masks[i * STRIDE] ? i : MASKED_VALUE))\
- abort ();\
-}
-
-#define TEST_MASK_SCATTER_STORE_COMMON2(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- MASKTYPE masks[NUM_DST_ELEMS (STRIDE)];\
- memset (masks, 0, (NUM_DST_ELEMS (STRIDE)) * sizeof (MASKTYPE));\
- real_src[0] = 0;\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- {\
- src[i] = i;\
- masks[i * STRIDE] = i & 0x1;\
- }\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (STRIDE); i++)\
- dst[i] = MASKED_VALUE;\
- FUN##OBJTYPE##STRIDETYPE (dst, src, masks, STRIDE, NUM_SRC_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- if (dst[i * STRIDE] != (masks[i * STRIDE] ? i : MASKED_VALUE))\
- abort ();\
-}
-
-#define TEST_MASK_SCATTER_STORE1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_SCATTER_STORE_COMMON1 (mscatter_store1, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-#define TEST_MASK_SCATTER_STORE2(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_SCATTER_STORE_COMMON2 (mscatter_store2, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-#define TEST_MASK_SCATTER_STORE3(OBJTYPE,MASKTYPE,STRIDETYPE)\
-{\
- OBJTYPE real_src1[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src2[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src3[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src4[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src5[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS (5)]\
- __attribute__((aligned (32)));\
- MASKTYPE masks[NUM_DST_ELEMS (5)];\
- memset (masks, 0, (NUM_DST_ELEMS (5)) * sizeof (MASKTYPE));\
- real_src1[0] = real_src2[0] = real_src3[0] = real_src4[0] = real_src5[0] = 0;\
- OBJTYPE *src1 = &real_src1[1];\
- OBJTYPE *src2 = &real_src2[1];\
- OBJTYPE *src3 = &real_src3[1];\
- OBJTYPE *src4 = &real_src4[1];\
- OBJTYPE *src5 = &real_src5[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- {\
- STRIDETYPE base = i * 5;\
- src1[i] = base;\
- src2[i] = base + 1;\
- src3[i] = base + 2;\
- src4[i] = base + 3;\
- src5[i] = base + 4;\
- masks[i * 5] = i & 0x1;\
- }\
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (5); i++)\
- dst[i] = MASKED_VALUE;\
- mscatter_store3s5##OBJTYPE##STRIDETYPE \
- (dst, src1, src2, src3, src4, src5, masks, NUM_SRC_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- {\
- STRIDETYPE base = i * 5;\
- if (dst[base] != (masks[i * 5] ? base : MASKED_VALUE))\
- abort ();\
- if (dst[base + 1] != (masks[i * 5] ? (base + 1) : MASKED_VALUE))\
- abort ();\
- if (dst[base + 2] != (masks[i * 5] ? (base + 2) : MASKED_VALUE))\
- abort ();\
- if (dst[base + 3] != (masks[i * 5] ? (base + 3) : MASKED_VALUE))\
- abort ();\
- if (dst[base + 4] != (masks[i * 5] ? (base + 4) : MASKED_VALUE))\
- abort ();\
- }\
-}
-
-#define TEST_MASK_SCATTER_STORE4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_SCATTER_STORE_COMMON1 (mscatter_store4, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-#define TEST_MASK_SCATTER_STORE5(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \
- TEST_MASK_SCATTER_STORE_COMMON2 (mscatter_store5, OBJTYPE, MASKTYPE, \
- STRIDETYPE, STRIDE)
-
-int __attribute__ ((optimize (1)))
-main ()
-{
- TEST_MASK_SCATTER_STORE1 (double, long, long, 5);
-
- TEST_MASK_SCATTER_STORE1 (double, long, long, 8);
- TEST_MASK_SCATTER_STORE1 (double, long, long, 21);
-
- TEST_MASK_SCATTER_STORE1 (float, int, int, 5);
- TEST_MASK_SCATTER_STORE1 (float, int, int, 8);
- TEST_MASK_SCATTER_STORE1 (float, int, int, 21);
-
- TEST_MASK_SCATTER_STORE2 (double, long, long, 5);
- TEST_MASK_SCATTER_STORE2 (double, long, long, 8);
- TEST_MASK_SCATTER_STORE2 (double, long, long, 21);
-
- TEST_MASK_SCATTER_STORE2 (float, int, int, 5);
- TEST_MASK_SCATTER_STORE2 (float, int, int, 8);
- TEST_MASK_SCATTER_STORE2 (float, int, int, 21);
-
- TEST_MASK_SCATTER_STORE3 (double, long, long);
- TEST_MASK_SCATTER_STORE3 (float, int, int);
-
- TEST_MASK_SCATTER_STORE4 (double, long, long, 5);
-
- TEST_MASK_SCATTER_STORE5 (double, long, long, 5);
- TEST_MASK_SCATTER_STORE5 (float, int, int, 5);
-
- /* Widened forms. */
- TEST_MASK_SCATTER_STORE1 (double, long, int, 5)
- TEST_MASK_SCATTER_STORE1 (double, long, int, 8)
- TEST_MASK_SCATTER_STORE1 (double, long, short, 5)
- TEST_MASK_SCATTER_STORE1 (double, long, short, 8)
-
- TEST_MASK_SCATTER_STORE1 (float, int, short, 5)
- TEST_MASK_SCATTER_STORE1 (float, int, short, 8)
-
- TEST_MASK_SCATTER_STORE2 (double, long, int, 5);
- TEST_MASK_SCATTER_STORE2 (double, long, int, 8);
- TEST_MASK_SCATTER_STORE2 (double, long, int, 21);
-
- TEST_MASK_SCATTER_STORE2 (float, int, short, 5);
- TEST_MASK_SCATTER_STORE2 (float, int, short, 8);
- TEST_MASK_SCATTER_STORE2 (float, int, short, 21);
-
- TEST_MASK_SCATTER_STORE4 (double, long, int, 5);
- TEST_MASK_SCATTER_STORE4 (float, int, short, 5);
-
- TEST_MASK_SCATTER_STORE5 (double, long, int, 5);
-
- return 0;
-}
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_2.c
new file mode 100644
index 00000000000..c0f291673dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_2.c
@@ -0,0 +1,17 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */
+
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "sve_mask_scatter_store_1.c"
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c
index 4a6247db978..9eff539c1d8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -28,6 +28,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -44,10 +45,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
Out 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for half float)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tld2h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tld2h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c
index 626b78c29e1..72086145290 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#include "sve_mask_struct_load_1.c"
#define N 100
-volatile int x;
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
{ \
@@ -17,6 +15,7 @@ volatile int x;
{ \
out[i] = i * 7 / 2; \
mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
} \
for (int i = 0; i < N * 2; ++i) \
in[i] = i * 9 / 2; \
@@ -27,11 +26,11 @@ volatile int x;
OUTTYPE if_false = i * 7 / 2; \
if (out[i] != (mask[i] ? if_true : if_false)) \
__builtin_abort (); \
- x += 1; \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c
index 0004e673d49..fe69b96e35a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -30,6 +30,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -46,10 +47,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
Out 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for _Float16)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tld3h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c
index 86219b4a191..a9784676efb 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#include "sve_mask_struct_load_2.c"
#define N 100
-volatile int x;
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
{ \
@@ -17,6 +15,7 @@ volatile int x;
{ \
out[i] = i * 7 / 2; \
mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
} \
for (int i = 0; i < N * 3; ++i) \
in[i] = i * 9 / 2; \
@@ -29,11 +28,11 @@ volatile int x;
OUTTYPE if_false = i * 7 / 2; \
if (out[i] != (mask[i] ? if_true : if_false)) \
__builtin_abort (); \
- x += 1; \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c
index 5f784e7dd36..b8bdd51459f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -31,6 +31,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -47,10 +48,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
Out 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for half float)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c
index 51bd38e2890..f168d656af9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#include "sve_mask_struct_load_3.c"
#define N 100
-volatile int x;
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
{ \
@@ -17,6 +15,7 @@ volatile int x;
{ \
out[i] = i * 7 / 2; \
mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
} \
for (int i = 0; i < N * 4; ++i) \
in[i] = i * 9 / 2; \
@@ -30,11 +29,11 @@ volatile int x;
OUTTYPE if_false = i * 7 / 2; \
if (out[i] != (mask[i] ? if_true : if_false)) \
__builtin_abort (); \
- x += 1; \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c
index 6608558d3ff..2b319229d1f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void \
+ void __attribute__ ((noinline, noclone)) \
NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -28,6 +28,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -44,10 +45,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
Out 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for half float)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tld3h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c
index 003cf650d7d..a81c647004f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void \
+ void __attribute__ ((noinline, noclone)) \
NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -28,6 +28,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -44,10 +45,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
Out 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for half float)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c
index a6161f31536..b6e3f55d7e8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void \
+ void __attribute__ ((noinline, noclone)) \
NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -28,6 +28,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c
index 75a3e43f267..da97e2795a9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void \
+ void __attribute__ ((noinline, noclone)) \
NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -28,6 +28,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c
index e87ad0bc074..c3884b0b074 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void \
+ void __attribute__ ((noinline, noclone)) \
NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -28,6 +28,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c
index 966968d4b91..9af479f478d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c
@@ -2,16 +2,19 @@
/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, INTYPE bias, int n) \
{ \
for (int i = 0; i < n; ++i) \
- if (cond[i]) \
- { \
- dest[i * 2] = src[i]; \
- dest[i * 2 + 1] = src[i]; \
- } \
+ { \
+ INTYPE value = src[i] + bias; \
+ if (cond[i]) \
+ { \
+ dest[i * 2] = value; \
+ dest[i * 2 + 1] = value; \
+ } \
+ } \
}
#define TEST2(NAME, OUTTYPE, INTYPE) \
@@ -31,6 +34,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -47,10 +51,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
In 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for _Float16)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tst2h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tst2h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c
index fd48a4c96f9..f472e1da01d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#include "sve_mask_struct_store_1.c"
#define N 100
-volatile int x;
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
{ \
@@ -17,21 +15,22 @@ volatile int x;
{ \
in[i] = i * 7 / 2; \
mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
} \
for (int i = 0; i < N * 2; ++i) \
out[i] = i * 9 / 2; \
- NAME##_2 (out, in, mask, N); \
+ NAME##_2 (out, in, mask, 17, N); \
for (int i = 0; i < N * 2; ++i) \
{ \
- OUTTYPE if_true = in[i / 2]; \
+ OUTTYPE if_true = (INTYPE) (in[i / 2] + 17); \
OUTTYPE if_false = i * 9 / 2; \
if (out[i] != (mask[i / 2] ? if_true : if_false)) \
__builtin_abort (); \
- x += 1; \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c
index 5359c6a457a..b817a095abe 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c
@@ -2,17 +2,20 @@
/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, INTYPE bias, int n) \
{ \
for (int i = 0; i < n; ++i) \
- if (cond[i]) \
- { \
- dest[i * 3] = src[i]; \
- dest[i * 3 + 1] = src[i]; \
- dest[i * 3 + 2] = src[i]; \
- } \
+ { \
+ INTYPE value = src[i] + bias; \
+ if (cond[i]) \
+ { \
+ dest[i * 3] = value; \
+ dest[i * 3 + 1] = value; \
+ dest[i * 3 + 2] = value; \
+ } \
+ } \
}
#define TEST2(NAME, OUTTYPE, INTYPE) \
@@ -32,6 +35,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -48,10 +52,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
In 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for _Float16)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tst3h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tst3h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c
index f8845ebd7ec..c1771d52298 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#include "sve_mask_struct_store_2.c"
#define N 100
-volatile int x;
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
{ \
@@ -17,21 +15,22 @@ volatile int x;
{ \
in[i] = i * 7 / 2; \
mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
} \
for (int i = 0; i < N * 3; ++i) \
out[i] = i * 9 / 2; \
- NAME##_3 (out, in, mask, N); \
+ NAME##_3 (out, in, mask, 11, N); \
for (int i = 0; i < N * 3; ++i) \
{ \
- OUTTYPE if_true = in[i / 3]; \
+ OUTTYPE if_true = (INTYPE) (in[i / 3] + 11); \
OUTTYPE if_false = i * 9 / 2; \
if (out[i] != (mask[i / 3] ? if_true : if_false)) \
__builtin_abort (); \
- x += 1; \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c
index cc614847e7e..d604bd77efe 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c
@@ -1,19 +1,22 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
- MASKTYPE *__restrict cond, int n) \
+ MASKTYPE *__restrict cond, INTYPE bias, int n) \
{ \
for (int i = 0; i < n; ++i) \
- if (cond[i]) \
- { \
- dest[i * 4] = src[i]; \
- dest[i * 4 + 1] = src[i]; \
- dest[i * 4 + 2] = src[i]; \
- dest[i * 4 + 3] = src[i]; \
- } \
+ { \
+ INTYPE value = src[i] + bias; \
+ if (cond[i]) \
+ { \
+ dest[i * 4] = value; \
+ dest[i * 4 + 1] = value; \
+ dest[i * 4 + 2] = value; \
+ dest[i * 4 + 3] = value; \
+ } \
+ } \
}
#define TEST2(NAME, OUTTYPE, INTYPE) \
@@ -33,6 +36,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
@@ -49,10 +53,10 @@ TEST (test)
/* Mask | 8 16 32 64
-------+------------
In 8 | 2 2 2 2
- 16 | 2 1 1 1
+ 16 | 2 1 1 1 x2 (for half float)
32 | 2 1 1 1
64 | 2 1 1 1. */
-/* { dg-final { scan-assembler-times {\tst4h\t.z[0-9]} 23 } } */
+/* { dg-final { scan-assembler-times {\tst4h\t.z[0-9]} 28 } } */
/* Mask | 8 16 32 64
-------+------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c
index f845818fa4d..cbac3da9db2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#include "sve_mask_struct_store_3.c"
#define N 100
-volatile int x;
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
{ \
@@ -17,21 +15,22 @@ volatile int x;
{ \
in[i] = i * 7 / 2; \
mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
} \
for (int i = 0; i < N * 4; ++i) \
out[i] = i * 9 / 2; \
- NAME##_4 (out, in, mask, N); \
+ NAME##_4 (out, in, mask, 42, N); \
for (int i = 0; i < N * 4; ++i) \
{ \
- OUTTYPE if_true = in[i / 4]; \
+ OUTTYPE if_true = (INTYPE) (in[i / 4] + 42); \
OUTTYPE if_false = i * 9 / 2; \
if (out[i] != (mask[i / 4] ? if_true : if_false)) \
__builtin_abort (); \
- x += 1; \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c
index ac2df82c539..9b4e75554f9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c
@@ -2,7 +2,7 @@
/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, int n) \
{ \
@@ -32,6 +32,7 @@
TEST1 (NAME##_i16, unsigned short) \
TEST1 (NAME##_i32, int) \
TEST1 (NAME##_i64, unsigned long) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c
index a4d705e38ba..a2e671de3d3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c
@@ -3,10 +3,10 @@
#include <stdint.h>
-typedef int8_t v32qi __attribute__((vector_size(32)));
-typedef int16_t v16hi __attribute__((vector_size(32)));
-typedef int32_t v8si __attribute__((vector_size(32)));
-typedef int64_t v4di __attribute__((vector_size(32)));
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -23,10 +23,10 @@ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v32qi)
-DO_OP (v16hi)
-DO_OP (v8si)
-DO_OP (v4di)
+DO_OP (vnx16qi)
+DO_OP (vnx8hi)
+DO_OP (vnx4si)
+DO_OP (vnx2di)
/* { dg-final { scan-assembler-times {\tmla\tz0\.b, p[0-7]/m, z2\.b, z4\.b\n} 1 } } */
/* { dg-final { scan-assembler-times {\tmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c
index b7cc1dba087..fb4454a1426 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c
@@ -3,10 +3,10 @@
#include <stdint.h>
-typedef int8_t v32qi __attribute__((vector_size(32)));
-typedef int16_t v16hi __attribute__((vector_size(32)));
-typedef int32_t v8si __attribute__((vector_size(32)));
-typedef int64_t v4di __attribute__((vector_size(32)));
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -23,10 +23,10 @@ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v32qi)
-DO_OP (v16hi)
-DO_OP (v8si)
-DO_OP (v4di)
+DO_OP (vnx16qi)
+DO_OP (vnx8hi)
+DO_OP (vnx4si)
+DO_OP (vnx2di)
/* { dg-final { scan-assembler-times {\tmls\tz0\.b, p[0-7]/m, z2\.b, z4\.b\n} 1 } } */
/* { dg-final { scan-assembler-times {\tmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c
index a38375af017..756263253c0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c
@@ -3,9 +3,9 @@
void sve_copy_rr (void)
{
- typedef int v8si __attribute__((vector_size(32)));
- register v8si x asm ("z1");
- register v8si y asm ("z2");
+ typedef int vnx4si __attribute__((vector_size(32)));
+ register vnx4si x asm ("z1");
+ register vnx4si y asm ("z2");
asm volatile ("#foo" : "=w" (x));
y = x;
asm volatile ("#foo" :: "w" (y));
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c
index fc05837a920..38aab512376 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c
@@ -3,10 +3,10 @@
#include <stdint.h>
-typedef int8_t v32qi __attribute__((vector_size(32)));
-typedef int16_t v16hi __attribute__((vector_size(32)));
-typedef int32_t v8si __attribute__((vector_size(32)));
-typedef int64_t v4di __attribute__((vector_size(32)));
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
#define DO_OP(TYPE) \
void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
@@ -23,10 +23,10 @@ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \
*x = dst; \
}
-DO_OP (v32qi)
-DO_OP (v16hi)
-DO_OP (v8si)
-DO_OP (v4di)
+DO_OP (vnx16qi)
+DO_OP (vnx8hi)
+DO_OP (vnx4si)
+DO_OP (vnx2di)
/* { dg-final { scan-assembler-times {\tmsb\tz0\.b, p[0-7]/m, z2\.b, z4\.b} 1 } } */
/* { dg-final { scan-assembler-times {\tmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c b/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c
index 8f50308ebd5..a87fdd2aed2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c
@@ -1,36 +1,39 @@
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */
-#define TEST(NAME, TYPE, ITYPE) \
+#include <stdint.h>
+
+#define TEST(NAME, TYPE) \
void \
- NAME##1 (TYPE *x, ITYPE n) \
+ NAME##1 (TYPE *x, int n) \
{ \
- for (ITYPE i = 0; i < n; ++i) \
+ for (int i = 0; i < n; ++i) \
x[i] += 1; \
} \
TYPE NAME##_array[1024]; \
void \
NAME##2 (void) \
{ \
- for (ITYPE i = 1; i < 200; ++i) \
+ for (int i = 1; i < 200; ++i) \
NAME##_array[i] += 1; \
}
-TEST (sc, signed char, unsigned char)
-TEST (uc, unsigned char, unsigned char)
-TEST (ss, signed short, unsigned short)
-TEST (us, unsigned short, signed short)
-TEST (si, signed int, signed int)
-TEST (ui, unsigned int, unsigned int)
-TEST (sl, signed long, unsigned long)
-TEST (ul, unsigned long, signed long)
-TEST (f, float, int)
-TEST (d, double, long)
+TEST (s8, int8_t)
+TEST (u8, uint8_t)
+TEST (s16, int16_t)
+TEST (u16, uint16_t)
+TEST (s32, int32_t)
+TEST (u32, uint32_t)
+TEST (s64, int64_t)
+TEST (u64, uint64_t)
+TEST (f16, _Float16)
+TEST (f32, float)
+TEST (f64, double)
/* No scalar memory accesses. */
/* { dg-final { scan-assembler-not {[wx][0-9]*, \[} } } */
/* 2 for each NAME##1 test, one in the header and one in the main loop
and 1 for each NAME##2 test, in the main loop only. */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b,} 6 } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 6 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 9 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 9 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c
index a39f8241f46..23b1b2a51e5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c
@@ -9,7 +9,7 @@
int x[N] __attribute__((aligned(32)));
-void __attribute__((weak))
+void __attribute__((noinline, noclone))
foo (void)
{
unsigned int v = 0;
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c
index 1ebaeea2bb9..6ed98ec075c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx" } */
-/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" { target aarch64_sve256_hw } } */
+/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx -msve-vector-bits=256" { target aarch64_sve256_hw } } */
#include "sve_peel_ind_1.c"
-volatile int y;
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
foo ();
@@ -14,7 +12,7 @@ main (void)
{
if (x[i] != (i < START || i >= END ? 0 : (i - START) * 5))
__builtin_abort ();
- y++;
+ asm volatile ("" ::: "memory");
}
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c
index 9ef8c7f85e4..af1a5aaa0ec 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c
@@ -9,7 +9,7 @@
int x[N] __attribute__((aligned(32)));
-void __attribute__((weak))
+void __attribute__((noinline, noclone))
foo (void)
{
for (unsigned int i = START; i < END; ++i)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c
index b3e56bbbb7c..5565c32a888 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx" } */
-/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" { target aarch64_sve256_hw } } */
+/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx -msve-vector-bits=256" { target aarch64_sve256_hw } } */
#include "sve_peel_ind_2.c"
-volatile int y;
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
foo ();
@@ -14,7 +12,7 @@ main (void)
{
if (x[i] != (i < START || i >= END ? 0 : i))
__builtin_abort ();
- y++;
+ asm volatile ("" ::: "memory");
}
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c
index 97a29f18361..a2602e781a1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c
@@ -9,7 +9,7 @@
int x[MAX_START][N] __attribute__((aligned(32)));
-void __attribute__((weak))
+void __attribute__((noinline, noclone))
foo (int start)
{
for (int i = start; i < start + COUNT; ++i)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c
index 9851c1cce64..ee8061a1163 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx" } */
-/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" { target aarch64_sve256_hw } } */
+/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx -msve-vector-bits=256" { target aarch64_sve256_hw } } */
#include "sve_peel_ind_3.c"
-volatile int y;
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
for (int start = 0; start < MAX_START; ++start)
@@ -16,7 +14,7 @@ main (void)
{
if (x[start][i] != (i < start || i >= start + COUNT ? 0 : i))
__builtin_abort ();
- y++;
+ asm volatile ("" ::: "memory");
}
}
return 0;
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c
index e5c55877341..6ab089522fb 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c
@@ -6,7 +6,7 @@
#define START 1
#define END 505
-void __attribute__((weak))
+void __attribute__((noinline, noclone))
foo (double *x)
{
double v = 10.0;
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c
index 60be4a038de..3764457ffcc 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c
@@ -1,17 +1,18 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-Ofast -march=armv8-a+sve -mtune=thunderx -fno-vect-cost-model" } */
-/* { dg-options "-Ofast -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx -fno-vect-cost-model" { target aarch64_sve256_hw } } */
+/* { dg-options "-Ofast -march=armv8-a+sve -mtune=thunderx" } */
+/* { dg-options "-Ofast -march=armv8-a+sve -mtune=thunderx -mtune=thunderx" { target aarch64_sve256_hw } } */
#include "sve_peel_ind_4.c"
-volatile int y;
-
-int
+int __attribute__ ((optimize (1)))
main (void)
{
double x[END + 1];
for (int i = 0; i < END + 1; ++i)
- x[i] = i;
+ {
+ x[i] = i;
+ asm volatile ("" ::: "memory");
+ }
foo (x);
for (int i = 0; i < END + 1; ++i)
{
@@ -22,7 +23,7 @@ main (void)
expected = 10 + (i - START) * 5;
if (x[i] != expected)
__builtin_abort ();
- y++;
+ asm volatile ("" ::: "memory");
}
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.C
deleted file mode 100644
index 53e10bcea01..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.C
+++ /dev/null
@@ -1,48 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include <math.h>
-
-#define NUM_ELEMS(TYPE) (int)(5 * (256 / sizeof (TYPE)) + 3)
-
-#define DEF_REDUC_PLUS(TYPE)\
-TYPE reduc_plus_##TYPE (TYPE *__restrict__ a, TYPE *__restrict__ b)\
-{\
- TYPE r = 0, q = 3;\
- for (int i = 0; i < NUM_ELEMS(TYPE); i++)\
- {\
- r += a[i];\
- q -= b[i];\
- }\
- return r * q;\
-}\
-
-DEF_REDUC_PLUS (float)
-DEF_REDUC_PLUS (double)
-
-#define DEF_REDUC_MAXMIN(TYPE,FUN)\
-TYPE reduc_##FUN (TYPE *__restrict__ a, TYPE *__restrict__ b)\
-{\
- TYPE r = a[0], q = b[0];\
- for (int i = 0; i < NUM_ELEMS(TYPE); i++)\
- {\
- r = FUN (a[i], r);\
- q = FUN (b[i], q);\
- }\
- return r * q;\
-}\
-
-DEF_REDUC_MAXMIN (float, fmaxf)
-DEF_REDUC_MAXMIN (double, fmax)
-DEF_REDUC_MAXMIN (float, fminf)
-DEF_REDUC_MAXMIN (double, fmin)
-
-
-/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 2 } } */
-/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 2 } } */
-
-/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 { xfail *-*-* } } } */
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.c
new file mode 100644
index 00000000000..eb3e7e656d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#define NUM_ELEMS(TYPE) ((int)(5 * (256 / sizeof (TYPE)) + 3))
+
+#define DEF_REDUC_PLUS(TYPE) \
+ TYPE __attribute__ ((noinline, noclone)) \
+ reduc_plus_##TYPE (TYPE *a, TYPE *b) \
+ { \
+ TYPE r = 0, q = 3; \
+ for (int i = 0; i < NUM_ELEMS(TYPE); i++) \
+ { \
+ r += a[i]; \
+ q -= b[i]; \
+ } \
+ return r * q; \
+ }
+
+#define TEST_ALL(T) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (DEF_REDUC_PLUS)
+
+/* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.C
deleted file mode 100644
index 769d25165ea..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.C
+++ /dev/null
@@ -1,47 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include "sve_reduc_strict_1.C"
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-
-#define DEF_INIT_VECTOR(TYPE)\
- TYPE a_##TYPE[NUM_ELEMS (TYPE)];\
- TYPE b_##TYPE[NUM_ELEMS (TYPE)];\
- for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\
- {\
- a_##TYPE[i] = (i * 2) * (i & 1 ? 1 : -1);\
- b_##TYPE[i] = (i * 3) * (i & 1 ? 1 : -1);\
- }
-
-#define TEST_REDUC_PLUS(RES,TYPE) (RES) += reduc_plus_##TYPE (a_##TYPE, b_##TYPE);
-#define TEST_REDUC_MAX(RES,TYPE) (RES) += reduc_fmax (a_##TYPE, b_##TYPE);
-#define TEST_REDUC_MAXF(RES,TYPE) (RES) += reduc_fmaxf (a_##TYPE, b_##TYPE);
-#define TEST_REDUC_MIN(RES,TYPE) (RES) += reduc_fmin (a_##TYPE, b_##TYPE);
-#define TEST_REDUC_MINF(RES,TYPE) (RES) += reduc_fminf (a_##TYPE, b_##TYPE);
-
-int main ()
-{
- double result = 0.0;
- DEF_INIT_VECTOR (float)
- DEF_INIT_VECTOR (double)
-
- TEST_REDUC_PLUS (result, float)
- TEST_REDUC_PLUS (result, double)
-
- TEST_REDUC_MINF (result, float)
- TEST_REDUC_MIN (result, double)
-
- TEST_REDUC_MAXF (result, float)
- TEST_REDUC_MAX (result, double)
-
- if (result != double (1356996))
- {
- fprintf (stderr, "result = %1.16lf\n", result);
- abort ();
- }
-
- return 0;
-}
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.c
new file mode 100644
index 00000000000..4c810d4a337
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.c
@@ -0,0 +1,29 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_reduc_strict_1.c"
+
+#define TEST_REDUC_PLUS(TYPE) \
+ { \
+ TYPE a[NUM_ELEMS (TYPE)]; \
+ TYPE b[NUM_ELEMS (TYPE)]; \
+ TYPE r = 0, q = 3; \
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \
+ { \
+ a[i] = (i * 0.1) * (i & 1 ? 1 : -1); \
+ b[i] = (i * 0.3) * (i & 1 ? 1 : -1); \
+ r += a[i]; \
+ q -= b[i]; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ TYPE res = reduc_plus_##TYPE (a, b); \
+ if (res != r * q) \
+ __builtin_abort (); \
+ }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+ TEST_ALL (TEST_REDUC_PLUS);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.C
deleted file mode 100644
index 542918abeb8..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.C
+++ /dev/null
@@ -1,48 +0,0 @@
-/* { dg-do compile } */
-/* FIXME: With -O3 we don't generate reductions as the compiler unrolls the outer loop
- and processes the rows in parallel, performing in order reductions on the inner loop. */
-/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include <math.h>
-
-#define NUM_ELEMS(TYPE) (int)(5 * (256 / sizeof (TYPE)) + 3)
-
-/* TODO: Test with inner loop = n * NUM_ELEMS(TYPE). */
-#define DEF_REDUC_PLUS(TYPE)\
-void reduc_plus_##TYPE (TYPE (*__restrict__ a)[NUM_ELEMS(TYPE)], TYPE *__restrict__ r, int n)\
-{\
- for (int i = 0; i < n; i++)\
- {\
- r[i] = 0;\
- for (int j = 0; j < NUM_ELEMS(TYPE); j++)\
- r[i] += a[i][j];\
- }\
-}\
-
-DEF_REDUC_PLUS (float)
-DEF_REDUC_PLUS (double)
-
-#define DEF_REDUC_MAXMIN(TYPE,FUN)\
-void reduc_##FUN (TYPE (*__restrict__ a)[NUM_ELEMS(TYPE)], TYPE *__restrict__ r, int n)\
-{\
- for (int i = 0; i < n; i++)\
- {\
- r[i] = a[i][0];\
- for (int j = 0; j < NUM_ELEMS(TYPE); j++)\
- r[i] = FUN (a[i][j], r[i]);\
- }\
-}\
-
-DEF_REDUC_MAXMIN (float, fmaxf)
-DEF_REDUC_MAXMIN (double, fmax)
-DEF_REDUC_MAXMIN (float, fminf)
-DEF_REDUC_MAXMIN (double, fmin)
-
-/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 1 } } */
-/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 1 } } */
-
-/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 1 { xfail *-*-* } } } */
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.c
new file mode 100644
index 00000000000..672be8f793e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#define NUM_ELEMS(TYPE) ((int) (5 * (256 / sizeof (TYPE)) + 3))
+
+#define DEF_REDUC_PLUS(TYPE) \
+void __attribute__ ((noinline, noclone)) \
+reduc_plus_##TYPE (TYPE (*restrict a)[NUM_ELEMS(TYPE)], \
+ TYPE *restrict r, int n) \
+{ \
+ for (int i = 0; i < n; i++) \
+ { \
+ r[i] = 0; \
+ for (int j = 0; j < NUM_ELEMS(TYPE); j++) \
+ r[i] += a[i][j]; \
+ } \
+}
+
+#define TEST_ALL(T) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (DEF_REDUC_PLUS)
+
+/* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.C
deleted file mode 100644
index 86a930c7d33..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.C
+++ /dev/null
@@ -1,59 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include "sve_reduc_strict_2.C"
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-
-#define NROWS 5
-
-#define DEF_INIT_VECTOR(TYPE)\
- TYPE mat_##TYPE[NROWS][NUM_ELEMS (TYPE)];\
- TYPE r_##TYPE[NROWS];\
- for (int i = 0; i < NROWS; i++)\
- for (int j = 0; j < NUM_ELEMS (TYPE); j++ )\
- mat_##TYPE[i][j] = i + (j * 2) * (j & 1 ? 1 : -1);\
-
-#define TEST_REDUC_PLUS(TYPE) reduc_plus_##TYPE (mat_##TYPE, r_##TYPE, NROWS);
-#define TEST_REDUC_MAXF reduc_fmaxf (mat_float, r_float, NROWS);
-#define TEST_REDUC_MAX reduc_fmax (mat_double, r_double, NROWS);
-#define TEST_REDUC_MINF reduc_fminf (mat_float, r_float, NROWS);
-#define TEST_REDUC_MIN reduc_fmin (mat_double, r_double, NROWS);
-
-#define SUM_VECTOR(RES, TYPE)\
- for (int i = 0; i < NROWS; i++)\
- (RES) += r_##TYPE[i];
-
-#define SUM_FLOAT_RESULT(RES)\
- SUM_VECTOR (RES, float);\
- SUM_VECTOR (RES, double);\
-
-int main ()
-{
- double resultF = 0.0;
- DEF_INIT_VECTOR (float)
- DEF_INIT_VECTOR (double)
-
- TEST_REDUC_PLUS (float)
- TEST_REDUC_PLUS (double)
- SUM_FLOAT_RESULT (resultF);
-
- TEST_REDUC_MAXF
- TEST_REDUC_MAX
- SUM_FLOAT_RESULT (resultF);
-
- TEST_REDUC_MINF
- TEST_REDUC_MIN
- SUM_FLOAT_RESULT (resultF);
-
- if (resultF != double (2460))
- {
- fprintf (stderr, "resultF = %1.16lf\n", resultF);
- abort ();
- }
-
- return 0;
-}
-
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.c
new file mode 100644
index 00000000000..4741e6acb14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.c
@@ -0,0 +1,31 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+
+#include "sve_reduc_strict_2.c"
+
+#define NROWS 5
+
+#define TEST_REDUC_PLUS(TYPE) \
+ { \
+ TYPE a[NROWS][NUM_ELEMS (TYPE)]; \
+ TYPE r[NROWS]; \
+ TYPE expected[NROWS] = {}; \
+ for (int i = 0; i < NROWS; ++i) \
+ for (int j = 0; j < NUM_ELEMS (TYPE); ++j) \
+ { \
+ a[i][j] = (i * 0.1 + j * 0.6) * (j & 1 ? 1 : -1); \
+ expected[i] += a[i][j]; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ reduc_plus_##TYPE (a, r, NROWS); \
+ for (int i = 0; i < NROWS; ++i) \
+ if (r[i] != expected[i]) \
+ __builtin_abort (); \
+ }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+ TEST_ALL (TEST_REDUC_PLUS);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.c
index 338aa614b47..ebed8e697c1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.C
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.c
@@ -1,12 +1,13 @@
/* { dg-do compile } */
-/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -msve-vector-bits=256 -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -msve-vector-bits=256 -fdump-tree-vect-details" } */
double mat[100][4];
double mat2[100][8];
double mat3[100][12];
double mat4[100][3];
-double slp_reduc_plus (int n)
+double
+slp_reduc_plus (int n)
{
double tmp = 0.0;
for (int i = 0; i < n; i++)
@@ -19,7 +20,8 @@ double slp_reduc_plus (int n)
return tmp;
}
-double slp_reduc_plus2 (int n)
+double
+slp_reduc_plus2 (int n)
{
double tmp = 0.0;
for (int i = 0; i < n; i++)
@@ -36,7 +38,8 @@ double slp_reduc_plus2 (int n)
return tmp;
}
-double slp_reduc_plus3 (int n)
+double
+slp_reduc_plus3 (int n)
{
double tmp = 0.0;
for (int i = 0; i < n; i++)
@@ -57,7 +60,8 @@ double slp_reduc_plus3 (int n)
return tmp;
}
-void slp_non_chained_reduc (int n, double * __restrict__ out)
+void
+slp_non_chained_reduc (int n, double * restrict out)
{
for (int i = 0; i < 3; i++)
out[i] = 0;
@@ -73,7 +77,8 @@ void slp_non_chained_reduc (int n, double * __restrict__ out)
/* Strict FP reductions shouldn't be used for the outer loops, only the
inner loops. */
-float double_reduc1 (float (*__restrict__ i)[16])
+float
+double_reduc1 (float (*restrict i)[16])
{
float l = 0;
@@ -83,7 +88,8 @@ float double_reduc1 (float (*__restrict__ i)[16])
return l;
}
-float double_reduc2 (float *__restrict__ i)
+float
+double_reduc2 (float *restrict i)
{
float l = 0;
@@ -98,7 +104,8 @@ float double_reduc2 (float *__restrict__ i)
return l;
}
-float double_reduc3 (float *__restrict__ i, float *__restrict__ j)
+float
+double_reduc3 (float *restrict i, float *restrict j)
{
float k = 0, l = 0;
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_rev_1.c b/gcc/testsuite/gcc.target/aarch64/sve_rev_1.c
new file mode 100644
index 00000000000..7c4290a2dc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_rev_1.c
@@ -0,0 +1,49 @@
+/* { dg-do assemble } */
+/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+
+#define MASK_2(X, Y) (Y) - 1 - (X), (Y) - 2 - (X)
+#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
+#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
+#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
+#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
+
+#define INDEX_32 vnx16qi
+#define INDEX_16 vnx8hi
+#define INDEX_8 vnx4si
+#define INDEX_4 vnx2di
+
+#define PERMUTE(TYPE, NUNITS) \
+ TYPE permute_##TYPE (TYPE values1, TYPE values2) \
+ { \
+ return __builtin_shuffle \
+ (values1, values2, \
+ ((INDEX_##NUNITS) { MASK_##NUNITS (0, NUNITS) })); \
+ }
+
+#define TEST_ALL(T) \
+ T (vnx16qi, 32) \
+ T (vnx8hi, 16) \
+ T (vnx4si, 8) \
+ T (vnx2di, 4) \
+ T (vnx8hf, 16) \
+ T (vnx4sf, 8) \
+ T (vnx2df, 4)
+
+TEST_ALL (PERMUTE)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c
index 9307200fb05..709fd3b37b4 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c
@@ -3,7 +3,7 @@
#include <stdint.h>
-typedef int8_t v32qi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
#define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
@@ -11,7 +11,7 @@ typedef int8_t v32qi __attribute__((vector_size (32)));
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
-#define INDEX_32 v32qi
+#define INDEX_32 vnx16qi
#define PERMUTE(TYPE, NUNITS, REV_NUNITS) \
TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2) \
@@ -22,9 +22,9 @@ typedef int8_t v32qi __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (v32qi, 32, 2) \
- T (v32qi, 32, 4) \
- T (v32qi, 32, 8)
+ T (vnx16qi, 32, 2) \
+ T (vnx16qi, 32, 4) \
+ T (vnx16qi, 32, 8)
TEST_ALL (PERMUTE)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c
index fb238373c4e..fe3533cf6db 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c
@@ -3,8 +3,8 @@
#include <stdint.h>
-typedef uint16_t v16hi __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef uint16_t vnx8hi __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
@@ -12,7 +12,7 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
-#define INDEX_16 v16hi
+#define INDEX_16 vnx8hi
#define PERMUTE(TYPE, NUNITS, REV_NUNITS) \
TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2) \
@@ -23,10 +23,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (v16hi, 16, 2) \
- T (v16hi, 16, 4) \
- T (v16hf, 16, 2) \
- T (v16hf, 16, 4)
+ T (vnx8hi, 16, 2) \
+ T (vnx8hi, 16, 4) \
+ T (vnx8hf, 16, 2) \
+ T (vnx8hf, 16, 4)
TEST_ALL (PERMUTE)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c
index 4834e2c2b01..a6b95f52880 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c
@@ -3,14 +3,14 @@
#include <stdint.h>
-typedef uint32_t v8si __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
+typedef uint32_t vnx4si __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
#define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
-#define INDEX_8 v8si
+#define INDEX_8 vnx4si
#define PERMUTE(TYPE, NUNITS, REV_NUNITS) \
TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2) \
@@ -21,8 +21,8 @@ typedef float v8sf __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (v8si, 8, 2) \
- T (v8sf, 8, 2)
+ T (vnx4si, 8, 2) \
+ T (vnx4sf, 8, 2)
TEST_ALL (PERMUTE)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c
index 2270be2bd29..43a7e831cae 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c
@@ -1,109 +1,31 @@
/* { dg-do assemble } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define SCATTER_STORE1(OBJTYPE,STRIDETYPE,STRIDE)\
-void scatter_store1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- dst[i * STRIDE] = src[i];\
-}
-
-#define SCATTER_STORE2(OBJTYPE,STRIDETYPE)\
-void scatter_store2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- dst[i * stride] = src[i];\
-}
-
-#define SCATTER_STORE3(OBJTYPE,STRIDETYPE)\
-void scatter_store3s5##OBJTYPE##STRIDETYPE\
- (OBJTYPE * restrict dst, OBJTYPE * restrict s1, OBJTYPE * restrict s2,\
- OBJTYPE * restrict s3, OBJTYPE * restrict s4, OBJTYPE * restrict s5,\
- STRIDETYPE count)\
-{\
- const STRIDETYPE STRIDE = 5;\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- dst[0 + (i * STRIDE)] = s1[i];\
- dst[4 + (i * STRIDE)] = s5[i];\
- dst[1 + (i * STRIDE)] = s2[i];\
- dst[2 + (i * STRIDE)] = s3[i];\
- dst[3 + (i * STRIDE)] = s4[i];\
- }\
-}
-
-#define SCATTER_STORE4(OBJTYPE,STRIDETYPE,STRIDE)\
-void scatter_store4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- *dst = *src;\
- dst += STRIDE;\
- src += 1;\
- }\
-}
-
-#define SCATTER_STORE5(OBJTYPE,STRIDETYPE)\
-void scatter_store5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
- OBJTYPE * restrict src,\
- STRIDETYPE stride,\
- STRIDETYPE count)\
-{\
- for (STRIDETYPE i=0; i<count; i++)\
- {\
- *dst = *src;\
- dst += stride;\
- src += 1;\
- }\
-}
-
-SCATTER_STORE1 (double, long, 5)
-SCATTER_STORE1 (double, long, 8)
-SCATTER_STORE1 (double, long, 21)
-SCATTER_STORE1 (double, long, 1009)
-
-SCATTER_STORE1 (float, int, 5)
-SCATTER_STORE1 (float, int, 8)
-SCATTER_STORE1 (float, int, 21)
-SCATTER_STORE1 (float, int, 1009)
-
-SCATTER_STORE2 (double, long)
-SCATTER_STORE2 (float, int)
-
-SCATTER_STORE3 (double, long)
-SCATTER_STORE3 (float, int)
-
-SCATTER_STORE4 (double, long, 5)
-/* NOTE: We can't vectorize SCATTER_STORE4 (float, int, 5) because we can't
- prove that the offsets used for the gather load won't overflow. */
-
-SCATTER_STORE5 (double, long)
-SCATTER_STORE5 (float, int)
-
-/* Widened forms. */
-SCATTER_STORE1 (double, int, 5)
-SCATTER_STORE1 (double, int, 8)
-SCATTER_STORE1 (double, short, 5)
-SCATTER_STORE1 (double, short, 8)
-
-SCATTER_STORE1 (float, short, 5)
-SCATTER_STORE1 (float, short, 8)
-
-SCATTER_STORE2 (double, int)
-SCATTER_STORE2 (float, short)
-
-SCATTER_STORE4 (double, int, 5)
-SCATTER_STORE4 (float, short, 5)
-
-SCATTER_STORE5 (double, int)
-
-/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 19 } } */
-/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 12 } } */
-/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ INDEX##BITS *indices, int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ dest[indices[i]] = src[i] + 1; \
+ }
+
+#define TEST_ALL(T) \
+ T (int32_t, 32) \
+ T (uint32_t, 32) \
+ T (float, 32) \
+ T (int64_t, 64) \
+ T (uint64_t, 64) \
+ T (double, 64)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1_run.c
deleted file mode 100644
index 4d8cddc510f..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1_run.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/* { dg-do run { target { aarch64_sve_hw } } } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
-
-#include <unistd.h>
-
-extern void abort (void);
-extern void *memset(void *, int, size_t);
-
-#include "sve_scatter_store_1.c"
-
-#define NUM_SRC_ELEMS 13
-#define NUM_DST_ELEMS(STRIDE) (NUM_SRC_ELEMS * STRIDE)
-
-#define TEST_SCATTER_STORE_COMMON1(FUN,OBJTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\
- memset (real_dst, 0, (1 + NUM_DST_ELEMS (STRIDE)) * sizeof (OBJTYPE));\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- src[i] = i;\
- FUN##OBJTYPE##STRIDETYPE##STRIDE (dst, src, NUM_SRC_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- if (dst[i * STRIDE] != i)\
- abort ();\
-}
-
-#define TEST_SCATTER_STORE_COMMON2(FUN,OBJTYPE,STRIDETYPE,STRIDE)\
-{\
- OBJTYPE real_src[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\
- __attribute__((aligned (32)));\
- memset (real_src, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\
- memset (real_dst, 0, (1 + NUM_DST_ELEMS (STRIDE)) * sizeof (OBJTYPE));\
- OBJTYPE *src = &real_src[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- src[i] = i;\
- FUN##OBJTYPE##STRIDETYPE (dst, src, STRIDE, NUM_SRC_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- if (dst[i * STRIDE] != i)\
- abort ();\
-}
-
-#define TEST_SCATTER_STORE1(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_SCATTER_STORE_COMMON1 (scatter_store1, OBJTYPE, STRIDETYPE, STRIDE)
-
-#define TEST_SCATTER_STORE2(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_SCATTER_STORE_COMMON2 (scatter_store2, OBJTYPE, STRIDETYPE, STRIDE)
-
-#define TEST_SCATTER_STORE3(OBJTYPE,STRIDETYPE)\
-{\
- OBJTYPE real_src1[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src2[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src3[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src4[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_src5[1 + NUM_SRC_ELEMS]\
- __attribute__((aligned (32)));\
- OBJTYPE real_dst[1 + NUM_DST_ELEMS (5)]\
- __attribute__((aligned (32)));\
- memset (real_src1, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\
- memset (real_src2, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\
- memset (real_src3, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\
- memset (real_src4, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\
- memset (real_src5, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\
- memset (real_dst, 0, (1 + NUM_DST_ELEMS (5)) * sizeof (OBJTYPE));\
- OBJTYPE *src1 = &real_src1[1];\
- OBJTYPE *src2 = &real_src2[1];\
- OBJTYPE *src3 = &real_src3[1];\
- OBJTYPE *src4 = &real_src4[1];\
- OBJTYPE *src5 = &real_src5[1];\
- OBJTYPE *dst = &real_dst[1];\
- for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\
- {\
- STRIDETYPE base = i * 5;\
- src1[i] = base;\
- src2[i] = base + 1;\
- src3[i] = base + 2;\
- src4[i] = base + 3;\
- src5[i] = base + 4;\
- }\
- scatter_store3s5##OBJTYPE##STRIDETYPE \
- (dst, src1, src2, src3, src4, src5, NUM_SRC_ELEMS); \
- for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (5); i++)\
- if (dst[i] != i)\
- abort ();\
-}
-
-#define TEST_SCATTER_STORE4(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_SCATTER_STORE_COMMON1 (scatter_store4, OBJTYPE, STRIDETYPE, STRIDE)
-
-#define TEST_SCATTER_STORE5(OBJTYPE,STRIDETYPE,STRIDE) \
- TEST_SCATTER_STORE_COMMON2 (scatter_store5, OBJTYPE, STRIDETYPE, STRIDE)
-
-int __attribute__ ((optimize (1)))
-main ()
-{
- TEST_SCATTER_STORE1 (double, long, 5);
- TEST_SCATTER_STORE1 (double, long, 8);
- TEST_SCATTER_STORE1 (double, long, 21);
-
- TEST_SCATTER_STORE1 (float, int, 5);
- TEST_SCATTER_STORE1 (float, int, 8);
- TEST_SCATTER_STORE1 (float, int, 21);
-
- TEST_SCATTER_STORE2 (double, long, 5);
- TEST_SCATTER_STORE2 (double, long, 8);
- TEST_SCATTER_STORE2 (double, long, 21);
-
- TEST_SCATTER_STORE2 (float, int, 5);
- TEST_SCATTER_STORE2 (float, int, 8);
- TEST_SCATTER_STORE2 (float, int, 21);
-
- TEST_SCATTER_STORE3 (double, long);
- TEST_SCATTER_STORE3 (float, int);
-
- TEST_SCATTER_STORE4 (double, long, 5);
-
- TEST_SCATTER_STORE5 (double, long, 5);
- TEST_SCATTER_STORE5 (float, int, 5);
-
- /* Widened forms. */
- TEST_SCATTER_STORE1 (double, int, 5)
- TEST_SCATTER_STORE1 (double, int, 8)
- TEST_SCATTER_STORE1 (double, short, 5)
- TEST_SCATTER_STORE1 (double, short, 8)
-
- TEST_SCATTER_STORE1 (float, short, 5)
- TEST_SCATTER_STORE1 (float, short, 8)
-
- TEST_SCATTER_STORE2 (double, int, 5);
- TEST_SCATTER_STORE2 (double, int, 8);
- TEST_SCATTER_STORE2 (double, int, 21);
-
- TEST_SCATTER_STORE2 (float, short, 5);
- TEST_SCATTER_STORE2 (float, short, 8);
- TEST_SCATTER_STORE2 (float, short, 21);
-
- TEST_SCATTER_STORE4 (double, int, 5);
- TEST_SCATTER_STORE4 (float, short, 5);
-
- TEST_SCATTER_STORE5 (double, int, 5);
-
- return 0;
-}
-
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_2.c
new file mode 100644
index 00000000000..dcc96f07fc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_2.c
@@ -0,0 +1,10 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "sve_scatter_store_1.c"
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_3.c
new file mode 100644
index 00000000000..d09c4015aa0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_3.c
@@ -0,0 +1,32 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+/* Invoked 18 times for each data size. */
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ INDEX##BITS *indices, int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ *(DATA_TYPE *) ((char *) dest + indices[i]) = src[i] + 1; \
+ }
+
+#define TEST_ALL(T) \
+ T (int32_t, 32) \
+ T (uint32_t, 32) \
+ T (float, 32) \
+ T (int64_t, 64) \
+ T (uint64_t, 64) \
+ T (double, 64)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_4.c
new file mode 100644
index 00000000000..c4f2dae481b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_4.c
@@ -0,0 +1,10 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "sve_scatter_store_3.c"
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_5.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_5.c
new file mode 100644
index 00000000000..7b117bc0b2b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_5.c
@@ -0,0 +1,23 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+/* Invoked 18 times for each data size. */
+#define TEST_LOOP(DATA_TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict *dest, DATA_TYPE *restrict src, \
+ int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ *dest[i] = src[i] + 1; \
+ }
+
+#define TEST_ALL(T) \
+ T (int64_t) \
+ T (uint64_t) \
+ T (double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_6.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_6.c
new file mode 100644
index 00000000000..14e68267c9f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_6.c
@@ -0,0 +1,36 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -fwrapv -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#endif
+
+/* Invoked 18 times for each data size. */
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ INDEX##BITS *indices, INDEX##BITS mask, int n) \
+ { \
+ for (int i = 9; i < n; ++i) \
+ dest[(INDEX##BITS) (indices[i] | mask)] = src[i] + 1; \
+ }
+
+#define TEST_ALL(T) \
+ T (int32_t, 16) \
+ T (uint32_t, 16) \
+ T (float, 16) \
+ T (int64_t, 32) \
+ T (uint64_t, 32) \
+ T (double, 32)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_7.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_7.c
new file mode 100644
index 00000000000..89e2d305c29
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_7.c
@@ -0,0 +1,15 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
+
+#include "sve_scatter_store_6.c"
+
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* Either extension type is OK here. */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, [us]xtw 2\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c
index 460359e4be3..23327a7a152 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n) \
{ \
for (int i = 0; i < n; ++i) \
@@ -23,15 +23,18 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
TEST_ALL (VEC_PERM)
-/* We should use one DUP for each of the 8-, 16- and 32-bit types.
- We should use two DUPs for each of the three 64-bit types. */
+/* We should use one DUP for each of the 8-, 16- and 32-bit types,
+ although we currently use LD1RW for _Float16. We should use two
+ DUPs for each of the three 64-bit types. */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, [hw]} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 1 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, [dx]} 9 } } */
/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
/* { dg-final { scan-assembler-not {\tzip2\t} } } */
@@ -39,17 +42,18 @@ TEST_ALL (VEC_PERM)
/* The loop should be fully-masked. */
/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 3 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
/* { dg-final { scan-assembler-not {\tldr} } } */
-/* { dg-final { scan-assembler-not {\tstr} } } */
+/* { dg-final { scan-assembler-times {\tstr} 2 } } */
+/* { dg-final { scan-assembler-times {\tstr\th[0-9]+} 2 } } */
/* { dg-final { scan-assembler-not {\tuqdec} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c
index 7dd3640966a..0c10d934259 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
{ \
for (int i = 0; i < n; ++i) \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c
index c1aeaf9b06e..08cad65ab63 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c
@@ -47,7 +47,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c
index 3db5769deed..ce6060a52df 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE1, TYPE2) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE1##_##TYPE2 (TYPE1 *restrict a, \
TYPE2 *restrict b, int n) \
{ \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c
index c302ef6fb76..aa49952b470 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c
@@ -38,7 +38,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c
index 9afe7e59ef2..77bf7b72454 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c
@@ -6,7 +6,7 @@
#define N1 (19 * 2)
#define VEC_PERM(TYPE) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b) \
{ \
for (int i = 0; i < N1; ++i) \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c
index 8c854d4207c..e926de602bd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c
@@ -46,7 +46,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c
index f3ecbd7adbc..ff3046e127d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c
@@ -5,7 +5,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, int n) \
{ \
TYPE res = 0; \
@@ -26,6 +26,7 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
@@ -35,7 +36,7 @@ TEST_ALL (VEC_PERM)
/* ??? We don't treat the uint loops as SLP. */
/* The loop should be fully-masked. */
/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 3 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 2 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 3 { xfail *-*-* } } } */
@@ -43,7 +44,7 @@ TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-not {\tldr} { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
@@ -51,6 +52,7 @@ TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d\n} 1 } } */
/* { dg-final { scan-assembler-not {\tfadd\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c
index 282f1ae2310..2824073cf14 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "sve_slp_13.c"
@@ -21,7 +21,7 @@
__builtin_abort (); \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c
index 6c1b38277ec..3971acde999 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c
@@ -9,7 +9,10 @@
{ \
TYPE a[N], b[2] = { 3, 11 }; \
for (unsigned int i = 0; i < N; ++i) \
- a[i] = i * 2 + i % 5; \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
vec_slp_##TYPE (a, b[0], b[1], N / 2); \
for (unsigned int i = 0; i < N; ++i) \
{ \
@@ -20,7 +23,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c
index 3e71596021f..ba3506ab4e4 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, int n) \
{ \
for (int i = 0; i < n; ++i) \
@@ -23,13 +23,14 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, } 2 } } */
-/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 } } */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 3 } } */
/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 5 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #10\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */
@@ -39,14 +40,14 @@ TEST_ALL (VEC_PERM)
/* The loop should be fully-masked. */
/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 3 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
/* { dg-final { scan-assembler-not {\tldr} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c
index 7d4d5e8ca3d..c0411459b94 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c
@@ -9,7 +9,10 @@
{ \
TYPE a[N], b[2] = { 10, 17 }; \
for (unsigned int i = 0; i < N; ++i) \
- a[i] = i * 2 + i % 5; \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
vec_slp_##TYPE (a, N / 2); \
for (unsigned int i = 0; i < N; ++i) \
{ \
@@ -20,7 +23,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c
index 3ac0eebf422..326630f421f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, int n) \
{ \
for (int i = 0; i < n; ++i) \
@@ -25,6 +25,7 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
@@ -33,7 +34,7 @@ TEST_ALL (VEC_PERM)
/* 1 for each 8-bit type. */
/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 } } */
/* 1 for each 16-bit type, 2 for each 32-bit type, and 4 for double. */
-/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 12 } } */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 13 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #41\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #25\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #31\n} 2 } } */
@@ -49,14 +50,14 @@ TEST_ALL (VEC_PERM)
and stores each. */
/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 6 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 12 } } */
/* { dg-final { scan-assembler-not {\tldr} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c
index 7306355b873..de33f41c2c1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c
@@ -9,7 +9,10 @@
{ \
TYPE a[N], b[4] = { 41, 25, 31, 62 }; \
for (unsigned int i = 0; i < N; ++i) \
- a[i] = i * 2 + i % 5; \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
vec_slp_##TYPE (a, N / 4); \
for (unsigned int i = 0; i < N; ++i) \
{ \
@@ -20,7 +23,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c
index b0890fd934b..32c14ebe4bf 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, int n) \
{ \
for (int i = 0; i < n; ++i) \
@@ -29,6 +29,7 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
@@ -36,7 +37,7 @@ TEST_ALL (VEC_PERM)
/* 1 for each 8-bit type, 2 for each 16-bit type, 4 for each 32-bit type
and 8 for double. */
-/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 26 } } */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 28 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #99\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #11\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */
@@ -55,21 +56,21 @@ TEST_ALL (VEC_PERM)
ZIP1 ZIP1 ZIP1 ZIP1 (4 ZIP2s optimized away)
ZIP1 ZIP2 ZIP1 ZIP2
ZIP1 ZIP2 ZIP1 ZIP2. */
-/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 35 } } */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 36 } } */
/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 15 } } */
/* The loop should be fully-masked. The 32-bit types need two loads
and stores each and the 64-bit types need four. */
/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 6 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 6 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 12 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 12 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 12 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 24 } } */
/* { dg-final { scan-assembler-not {\tldr} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c
index 2eb2a5ff07e..e0fe656859d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c
@@ -9,7 +9,10 @@
{ \
TYPE a[N], b[8] = { 99, 11, 17, 80, 63, 37, 24, 81 }; \
for (unsigned int i = 0; i < N; ++i) \
- a[i] = i * 2 + i % 5; \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
vec_slp_##TYPE (a, N / 8); \
for (unsigned int i = 0; i < N; ++i) \
{ \
@@ -20,7 +23,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c
index 0f8cf624e20..e0bacb0cad8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
{ \
TYPE x0 = b[0]; \
@@ -27,6 +27,7 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
@@ -37,9 +38,9 @@ TEST_ALL (VEC_PERM)
/* ??? At present we don't treat the int8_t and int16_t loops as
reductions. */
/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 3 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-not {\tld2b\t} } } */
@@ -52,12 +53,14 @@ TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
-/* Should be 4, if we used reductions for int8_t and int16_t. */
+/* Should be 4 and 6 respectively, if we used reductions for int8_t and
+ int16_t. */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c
index 476b40cb0e9..bb5421700da 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c
@@ -5,25 +5,30 @@
#define N (141 * 2)
-#define HARNESS(TYPE) \
- { \
- TYPE a[N], b[2] = { 40, 22 }; \
- for (unsigned int i = 0; i < N; ++i) \
- a[i] = i * 2 + i % 5; \
- vec_slp_##TYPE (a, b, N / 2); \
- TYPE x0 = 40; \
- TYPE x1 = 22; \
- for (unsigned int i = 0; i < N; i += 2) \
- { \
- x0 += a[i]; \
- x1 += a[i + 1]; \
- asm volatile (""); \
- } \
- if (x0 != b[0] || x1 != b[1]) \
- __builtin_abort (); \
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N], b[2] = { 40, 22 }; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vec_slp_##TYPE (a, b, N / 2); \
+ TYPE x0 = 40; \
+ TYPE x1 = 22; \
+ for (unsigned int i = 0; i < N; i += 2) \
+ { \
+ x0 += a[i]; \
+ x1 += a[i + 1]; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ /* _Float16 isn't precise enough for this. */ \
+ if ((TYPE) 0x1000 + 1 != (TYPE) 0x1000 \
+ && (x0 != b[0] || x1 != b[1])) \
+ __builtin_abort (); \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c
index 8cdceb57dc6..b3bdb04e2ab 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
{ \
TYPE x0 = b[0]; \
@@ -30,6 +30,7 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c
index a9ca327c907..e2ad116f91d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c
@@ -5,27 +5,32 @@
#define N (77 * 3)
-#define HARNESS(TYPE) \
- { \
- TYPE a[N], b[3] = { 40, 22, 75 }; \
- for (unsigned int i = 0; i < N; ++i) \
- a[i] = i * 2 + i % 5; \
- vec_slp_##TYPE (a, b, N / 3); \
- TYPE x0 = 40; \
- TYPE x1 = 22; \
- TYPE x2 = 75; \
- for (unsigned int i = 0; i < N; i += 3) \
- { \
- x0 += a[i]; \
- x1 += a[i + 1]; \
- x2 += a[i + 2]; \
- asm volatile (""); \
- } \
- if (x0 != b[0] || x1 != b[1] || x2 != b[2]) \
- __builtin_abort (); \
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N], b[3] = { 40, 22, 75 }; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vec_slp_##TYPE (a, b, N / 3); \
+ TYPE x0 = 40; \
+ TYPE x1 = 22; \
+ TYPE x2 = 75; \
+ for (unsigned int i = 0; i < N; i += 3) \
+ { \
+ x0 += a[i]; \
+ x1 += a[i + 1]; \
+ x2 += a[i + 2]; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ /* _Float16 isn't precise enough for this. */ \
+ if ((TYPE) 0x1000 + 1 != (TYPE) 0x1000 \
+ && (x0 != b[0] || x1 != b[1] || x2 != b[2])) \
+ __builtin_abort (); \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c
index 4dc9fafcdde..372c7575cdb 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
{ \
TYPE x0 = b[0]; \
@@ -33,6 +33,7 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
+ T (_Float16) \
T (float) \
T (double)
@@ -45,9 +46,9 @@ TEST_ALL (VEC_PERM)
/* ??? At present we don't treat the int8_t and int16_t loops as
reductions. */
/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld4d\t} 3 } } */
/* { dg-final { scan-assembler-not {\tld4b\t} } } */
@@ -60,12 +61,14 @@ TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s} 8 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 8 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */
/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */
-/* Should be 4, if we used reductions for int8_t and int16_t. */
+/* Should be 4 and 6 respectively, if we used reductions for int8_t and
+ int16_t. */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c
index 12446972fde..5a8bf99bc5b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c
@@ -5,29 +5,34 @@
#define N (54 * 4)
-#define HARNESS(TYPE) \
- { \
- TYPE a[N], b[4] = { 40, 22, 75, 19 }; \
- for (unsigned int i = 0; i < N; ++i) \
- a[i] = i * 2 + i % 5; \
- vec_slp_##TYPE (a, b, N / 4); \
- TYPE x0 = 40; \
- TYPE x1 = 22; \
- TYPE x2 = 75; \
- TYPE x3 = 19; \
- for (unsigned int i = 0; i < N; i += 4) \
- { \
- x0 += a[i]; \
- x1 += a[i + 1]; \
- x2 += a[i + 2]; \
- x3 += a[i + 3]; \
- asm volatile (""); \
- } \
- if (x0 != b[0] || x1 != b[1] || x2 != b[2] || x3 != b[3]) \
- __builtin_abort (); \
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N], b[4] = { 40, 22, 75, 19 }; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vec_slp_##TYPE (a, b, N / 4); \
+ TYPE x0 = 40; \
+ TYPE x1 = 22; \
+ TYPE x2 = 75; \
+ TYPE x3 = 19; \
+ for (unsigned int i = 0; i < N; i += 4) \
+ { \
+ x0 += a[i]; \
+ x1 += a[i + 1]; \
+ x2 += a[i + 2]; \
+ x3 += a[i + 3]; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ /* _Float16 isn't precise enough for this. */ \
+ if ((TYPE) 0x1000 + 1 != (TYPE) 0x1000 \
+ && (x0 != b[0] || x1 != b[1] || x2 != b[2] || x3 != b[3])) \
+ __builtin_abort (); \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c
index caae4528d82..d57457fbef0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
{ \
for (int i = 0; i < n; ++i) \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c
index 2717ca62de1..09a6d648c52 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c
@@ -37,7 +37,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c
index af06270b6f2..65e1cb8f044 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE1, TYPE2) \
-void __attribute__ ((weak)) \
+void __attribute__ ((noinline, noclone)) \
vec_slp_##TYPE1##_##TYPE2 (TYPE1 *restrict a, \
TYPE2 *restrict b, int n) \
{ \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c
index 0bde3b6ea03..3e69a48580b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c
@@ -32,7 +32,7 @@
} \
}
-int
+int __attribute__ ((noinline, noclone))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c
index 25f3047444e..db35711a193 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c
@@ -21,6 +21,6 @@ FPTYPE spec_fp_loop_##ARGTYPE##INDUCTYPE (ARGTYPE mask, ARGTYPE limit)\
SPEC_FP_LOOP (uint32_t, uint32_t, double)
-/* { dg-final { scan-tree-dump-times "Not vectorized: Multiple ncopies not supported" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "not vectorized: ncopies is greater than 1" 1 "vect" } } */
/* { dg-final { scan-assembler-not "brka\tp\[0-9\]*.b, p\[0-9\]*\/z, p\[0-9\]*.b" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c
index 4765b22f014..1b71687a257 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c
@@ -41,4 +41,4 @@ SPEC_LOOP (uint64_t, uint16_t)
SPEC_LOOP (uint64_t, uint32_t)
/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "Speculative loop mask load/stores not supported" "vect" } } */
+/* { dg-final { scan-tree-dump "speculative mask loads not supported" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c
index 3e7367cd9fa..1a48f7b6080 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c
@@ -3,50 +3,50 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size(32)));
-typedef int32_t v8si __attribute__((vector_size(32)));
-typedef int16_t v16hi __attribute__((vector_size(32)));
-typedef int8_t v32qi __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
void sve_store_64_z_lsl (uint64_t *a, unsigned long i)
{
- asm volatile ("" : "=w" (*(v4di *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx2di *) &a[i]));
}
void sve_store_64_s_lsl (int64_t *a, signed long i)
{
- asm volatile ("" : "=w" (*(v4di *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx2di *) &a[i]));
}
void sve_store_32_z_lsl (uint32_t *a, unsigned long i)
{
- asm volatile ("" : "=w" (*(v8si *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx4si *) &a[i]));
}
void sve_store_32_s_lsl (int32_t *a, signed long i)
{
- asm volatile ("" : "=w" (*(v8si *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx4si *) &a[i]));
}
void sve_store_16_z_lsl (uint16_t *a, unsigned long i)
{
- asm volatile ("" : "=w" (*(v16hi *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx8hi *) &a[i]));
}
void sve_store_16_s_lsl (int16_t *a, signed long i)
{
- asm volatile ("" : "=w" (*(v16hi *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx8hi *) &a[i]));
}
/* ??? The other argument order leads to a redundant move. */
void sve_store_8_z (unsigned long i, uint8_t *a)
{
- asm volatile ("" : "=w" (*(v32qi *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx16qi *) &a[i]));
}
void sve_store_8_s (signed long i, int8_t *a)
{
- asm volatile ("" : "=w" (*(v32qi *) &a[i]));
+ asm volatile ("" : "=w" (*(vnx16qi *) &a[i]));
}
/* { dg-final { scan-assembler-times {\tst1d\tz0\.d, p[0-7], \[x0, x1, lsl 3\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_1.c
new file mode 100644
index 00000000000..b940ba9d4de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_1.c
@@ -0,0 +1,40 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX8
+#define INDEX8 int8_t
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, \
+ INDEX##BITS stride, INDEX##BITS n) \
+ { \
+ for (INDEX##BITS i = 0; i < n; ++i) \
+ dest[i] += src[i * stride]; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 8) \
+ T (DATA_TYPE, 16) \
+ T (DATA_TYPE, 32) \
+ T (DATA_TYPE, 64)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 12 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c
new file mode 100644
index 00000000000..a834989091d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define INDEX8 uint8_t
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "sve_strided_load_1.c"
+
+/* 8 and 16 bits are signed because the multiplication promotes to int.
+ Using uxtw for all 9 would be OK. */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+/* The 32-bit loop needs to honor the defined overflow in uint32_t,
+ so we vectorize the offset calculation. This means that the
+ 64-bit version needs two copies. */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_3.c
new file mode 100644
index 00000000000..8f0bfdd4bb8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_3.c
@@ -0,0 +1,32 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, OTHER_TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, \
+ OTHER_TYPE *restrict other, \
+ OTHER_TYPE mask, \
+ int stride, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[i] = src[i * stride] + (OTHER_TYPE) (other[i] | mask); \
+ }
+
+#define TEST_ALL(T) \
+ T (int32_t, int16_t) \
+ T (uint32_t, int16_t) \
+ T (float, int16_t) \
+ T (int64_t, int32_t) \
+ T (uint64_t, int32_t) \
+ T (double, int32_t)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 1\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_4.c
new file mode 100644
index 00000000000..b7dc12fb3c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_4.c
@@ -0,0 +1,33 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[i] += src[i * SCALE]; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 5, 5) \
+ T (DATA_TYPE, 7, 7) \
+ T (DATA_TYPE, 11, 11) \
+ T (DATA_TYPE, 200, 200) \
+ T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 15 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_5.c
new file mode 100644
index 00000000000..6cbcc963595
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_5.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, long n) \
+ { \
+ for (long i = 0; i < n; ++i) \
+ dest[i] += src[i * SCALE]; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 5, 5) \
+ T (DATA_TYPE, 7, 7) \
+ T (DATA_TYPE, 11, 11) \
+ T (DATA_TYPE, 200, 200) \
+ T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_6.c
new file mode 100644
index 00000000000..aaf743b3d82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_6.c
@@ -0,0 +1,7 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable --save-temps" } */
+
+#include "sve_strided_load_5.c"
+
+/* { dg-final { scan-assembler-not {\[x[0-9]+, z[0-9]+\.s} } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_7.c
new file mode 100644
index 00000000000..ddf6667e8c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_7.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src) \
+ { \
+ for (long i = 0; i < 1000; ++i) \
+ dest[i] += src[i * SCALE]; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 5, 5) \
+ T (DATA_TYPE, 7, 7) \
+ T (DATA_TYPE, 11, 11) \
+ T (DATA_TYPE, 200, 200) \
+ T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_8.c
new file mode 100644
index 00000000000..788aeb08df2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_8.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+void
+foo (double *x, int m)
+{
+ for (int i = 0; i < 256; ++i)
+ x[i * m] += x[i * m];
+}
+
+/* { dg-final { scan-assembler-times {\tcbz\tw1,} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, } 1 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, } 1 } } */
+/* { dg-final { scan-assembler-times {\tldr\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_1.c
new file mode 100644
index 00000000000..4f84b3fdec5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_1.c
@@ -0,0 +1,40 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX8
+#define INDEX8 int8_t
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, \
+ INDEX##BITS stride, INDEX##BITS n) \
+ { \
+ for (INDEX##BITS i = 0; i < n; ++i) \
+ dest[i * stride] = src[i] + 1; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 8) \
+ T (DATA_TYPE, 16) \
+ T (DATA_TYPE, 32) \
+ T (DATA_TYPE, 64)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 12 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_2.c
new file mode 100644
index 00000000000..1a8df604ead
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_2.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define INDEX8 uint8_t
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "sve_strided_store_1.c"
+
+/* 8 and 16 bits are signed because the multiplication promotes to int.
+ Using uxtw for all 9 would be OK. */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+/* The 32-bit loop needs to honor the defined overflow in uint32_t,
+ so we vectorize the offset calculation. This means that the
+ 64-bit version needs two copies. */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_3.c
new file mode 100644
index 00000000000..19454565f97
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_3.c
@@ -0,0 +1,33 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, OTHER_TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, \
+ OTHER_TYPE *restrict other, \
+ OTHER_TYPE mask, \
+ int stride, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[i * stride] = src[i] + (OTHER_TYPE) (other[i] | mask); \
+ }
+
+#define TEST_ALL(T) \
+ T (int32_t, int16_t) \
+ T (uint32_t, int16_t) \
+ T (float, int16_t) \
+ T (int64_t, int32_t) \
+ T (uint64_t, int32_t) \
+ T (double, int32_t)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 1\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_4.c
new file mode 100644
index 00000000000..23f1329c69b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_4.c
@@ -0,0 +1,33 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[i * SCALE] = src[i] + 1; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 5, 5) \
+ T (DATA_TYPE, 7, 7) \
+ T (DATA_TYPE, 11, 11) \
+ T (DATA_TYPE, 200, 200) \
+ T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 15 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_5.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_5.c
new file mode 100644
index 00000000000..68f2a539c27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_5.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src, long n) \
+ { \
+ for (long i = 0; i < n; ++i) \
+ dest[i * SCALE] = src[i] + 1; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 5, 5) \
+ T (DATA_TYPE, 7, 7) \
+ T (DATA_TYPE, 11, 11) \
+ T (DATA_TYPE, 200, 200) \
+ T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_6.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_6.c
new file mode 100644
index 00000000000..da124b7348b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_6.c
@@ -0,0 +1,7 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable --save-temps" } */
+
+#include "sve_strided_store_5.c"
+
+/* { dg-final { scan-assembler-not {\[x[0-9]+, z[0-9]+\.s} } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_7.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_7.c
new file mode 100644
index 00000000000..a76ac359f01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_7.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \
+ DATA_TYPE *restrict src) \
+ { \
+ for (long i = 0; i < 1000; ++i) \
+ dest[i * SCALE] = src[i] + 1; \
+ }
+
+#define TEST_TYPE(T, DATA_TYPE) \
+ T (DATA_TYPE, 5, 5) \
+ T (DATA_TYPE, 7, 7) \
+ T (DATA_TYPE, 11, 11) \
+ T (DATA_TYPE, 200, 200) \
+ T (DATA_TYPE, m100, -100)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c
index bb23f9886c6..e9ac4790c7b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c
@@ -1,32 +1,35 @@
-/* { dg-do compile } */
-/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */
+/* { dg-do assemble } */
+/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mbig-endian --save-temps" } */
-typedef char v32qi __attribute__((vector_size(32)));
-typedef struct { v32qi a[2]; } v64qi;
+typedef char vnx16qi __attribute__((vector_size(32)));
+typedef struct { vnx16qi a[2]; } vnx32qi;
-typedef short v16hi __attribute__((vector_size(32)));
-typedef struct { v16hi a[2]; } v32hi;
+typedef short vnx8hi __attribute__((vector_size(32)));
+typedef struct { vnx8hi a[2]; } vnx16hi;
-typedef int v8si __attribute__((vector_size(32)));
-typedef struct { v8si a[2]; } v16si;
+typedef int vnx4si __attribute__((vector_size(32)));
+typedef struct { vnx4si a[2]; } vnx8si;
-typedef long v4di __attribute__((vector_size(32)));
-typedef struct { v4di a[2]; } v8di;
+typedef long vnx2di __attribute__((vector_size(32)));
+typedef struct { vnx2di a[2]; } vnx4di;
-typedef float v8sf __attribute__((vector_size(32)));
-typedef struct { v8sf a[2]; } v16sf;
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef struct { vnx8hf a[2]; } vnx16hf;
-typedef double v4df __attribute__((vector_size(32)));
-typedef struct { v4df a[2]; } v8df;
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef struct { vnx4sf a[2]; } vnx8sf;
+
+typedef double vnx2df __attribute__((vector_size(32)));
+typedef struct { vnx2df a[2]; } vnx4df;
#define TEST_TYPE(TYPE, REG1, REG2) \
void \
f1_##TYPE (TYPE *a) \
{ \
register TYPE x asm (#REG1) = a[0]; \
- asm volatile ("# test " #TYPE " 1 %0" :: "w" (x)); \
+ asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \
register TYPE y asm (#REG2) = x; \
- asm volatile ("# test " #TYPE " 2 %0, %1, %2" \
+ asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \
: "=&w" (x) : "0" (x), "w" (y)); \
a[1] = x; \
} \
@@ -54,63 +57,73 @@ typedef struct { v4df a[2]; } v8df;
asm volatile ("# %0" :: "w" (x)); \
}
-TEST_TYPE (v64qi, z0, z2)
-TEST_TYPE (v32hi, z5, z7)
-TEST_TYPE (v16si, z10, z12)
-TEST_TYPE (v8di, z15, z17)
-TEST_TYPE (v16sf, z20, z23)
-TEST_TYPE (v8df, z28, z30)
+TEST_TYPE (vnx32qi, z0, z2)
+TEST_TYPE (vnx16hi, z5, z7)
+TEST_TYPE (vnx8si, z10, z12)
+TEST_TYPE (vnx4di, z15, z17)
+TEST_TYPE (vnx16hf, z18, z20)
+TEST_TYPE (vnx8sf, z21, z23)
+TEST_TYPE (vnx4df, z28, z30)
/* { dg-final { scan-assembler {\tld1b\tz0.b, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1b\tz1.b, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v64qi 1 z0\n} } } */
+/* { dg-final { scan-assembler { test vnx32qi 1 z0\n} } } */
/* { dg-final { scan-assembler {\tmov\tz2.d, z0.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz3.d, z1.d\n} } } */
-/* { dg-final { scan-assembler { test v64qi 2 z0, z0, z2\n} } } */
+/* { dg-final { scan-assembler { test vnx32qi 2 z0, z0, z2\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz0.b, p[0-7], \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz1.b, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1h\tz5.h, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1h\tz6.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v32hi 1 z5\n} } } */
+/* { dg-final { scan-assembler { test vnx16hi 1 z5\n} } } */
/* { dg-final { scan-assembler {\tmov\tz7.d, z5.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz8.d, z6.d\n} } } */
-/* { dg-final { scan-assembler { test v32hi 2 z5, z5, z7\n} } } */
+/* { dg-final { scan-assembler { test vnx16hi 2 z5, z5, z7\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz5.h, p[0-7], \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz6.h, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz10.s, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz11.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v16si 1 z10\n} } } */
+/* { dg-final { scan-assembler { test vnx8si 1 z10\n} } } */
/* { dg-final { scan-assembler {\tmov\tz12.d, z10.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz13.d, z11.d\n} } } */
-/* { dg-final { scan-assembler { test v16si 2 z10, z10, z12\n} } } */
+/* { dg-final { scan-assembler { test vnx8si 2 z10, z10, z12\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz10.s, p[0-7], \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz11.s, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz15.d, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz16.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v8di 1 z15\n} } } */
+/* { dg-final { scan-assembler { test vnx4di 1 z15\n} } } */
/* { dg-final { scan-assembler {\tmov\tz17.d, z15.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz18.d, z16.d\n} } } */
-/* { dg-final { scan-assembler { test v8di 2 z15, z15, z17\n} } } */
+/* { dg-final { scan-assembler { test vnx4di 2 z15, z15, z17\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz15.d, p[0-7], \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz16.d, p[0-7], \[x0, #3, mul vl\]\n} } } */
-/* { dg-final { scan-assembler {\tld1w\tz20.s, p[0-7]/z, \[x0\]\n} } } */
-/* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v16sf 1 z20\n} } } */
-/* { dg-final { scan-assembler {\tmov\tz23.d, z20.d\n} } } */
-/* { dg-final { scan-assembler {\tmov\tz24.d, z21.d\n} } } */
-/* { dg-final { scan-assembler { test v16sf 2 z20, z20, z23\n} } } */
-/* { dg-final { scan-assembler {\tst1w\tz20.s, p[0-7], \[x0, #2, mul vl\]\n} } } */
-/* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz18.h, p[0-7]/z, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz19.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx16hf 1 z18\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz20.d, z18.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz21.d, z19.d\n} } } */
+/* { dg-final { scan-assembler { test vnx16hf 2 z18, z18, z20\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz18.h, p[0-7], \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz19.h, p[0-7], \[x0, #3, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tld1w\tz22.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx8sf 1 z21\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz23.d, z21.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz24.d, z22.d\n} } } */
+/* { dg-final { scan-assembler { test vnx8sf 2 z21, z21, z23\n} } } */
+/* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1w\tz22.s, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz28.d, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz29.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v8df 1 z28\n} } } */
+/* { dg-final { scan-assembler { test vnx4df 1 z28\n} } } */
/* { dg-final { scan-assembler {\tmov\tz30.d, z28.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz31.d, z29.d\n} } } */
-/* { dg-final { scan-assembler { test v8df 2 z28, z28, z30\n} } } */
+/* { dg-final { scan-assembler { test vnx4df 2 z28, z28, z30\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz28.d, p[0-7], \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz29.d, p[0-7], \[x0, #3, mul vl\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c
index d36aa75483a..faf503c35e1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c
@@ -1,51 +1,55 @@
-/* { dg-do compile } */
-/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */
+/* { dg-do assemble } */
+/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mbig-endian --save-temps" } */
-typedef char v32qi __attribute__((vector_size(32)));
-typedef struct { v32qi a[3]; } v96qi;
+typedef char vnx16qi __attribute__((vector_size(32)));
+typedef struct { vnx16qi a[3]; } vnx48qi;
-typedef short v16hi __attribute__((vector_size(32)));
-typedef struct { v16hi a[3]; } v48hi;
+typedef short vnx8hi __attribute__((vector_size(32)));
+typedef struct { vnx8hi a[3]; } vnx24hi;
-typedef int v8si __attribute__((vector_size(32)));
-typedef struct { v8si a[3]; } v24si;
+typedef int vnx4si __attribute__((vector_size(32)));
+typedef struct { vnx4si a[3]; } vnx12si;
-typedef long v4di __attribute__((vector_size(32)));
-typedef struct { v4di a[3]; } v12di;
+typedef long vnx2di __attribute__((vector_size(32)));
+typedef struct { vnx2di a[3]; } vnx6di;
-typedef float v8sf __attribute__((vector_size(32)));
-typedef struct { v8sf a[3]; } v24sf;
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef struct { vnx8hf a[3]; } vnx24hf;
-typedef double v4df __attribute__((vector_size(32)));
-typedef struct { v4df a[3]; } v12df;
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef struct { vnx4sf a[3]; } vnx12sf;
+
+typedef double vnx2df __attribute__((vector_size(32)));
+typedef struct { vnx2df a[3]; } vnx6df;
#define TEST_TYPE(TYPE, REG1, REG2) \
void \
f_##TYPE (TYPE *a) \
{ \
register TYPE x asm (#REG1) = a[0]; \
- asm volatile ("# test " #TYPE " 1 %0" :: "w" (x)); \
+ asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \
register TYPE y asm (#REG2) = x; \
- asm volatile ("# test " #TYPE " 2 %0, %1, %2" \
+ asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \
: "=&w" (x) : "0" (x), "w" (y)); \
a[1] = x; \
}
-TEST_TYPE (v96qi, z0, z3)
-TEST_TYPE (v48hi, z6, z2)
-TEST_TYPE (v24si, z12, z15)
-TEST_TYPE (v12di, z16, z13)
-TEST_TYPE (v24sf, z20, z23)
-TEST_TYPE (v12df, z26, z29)
+TEST_TYPE (vnx48qi, z0, z3)
+TEST_TYPE (vnx24hi, z6, z2)
+TEST_TYPE (vnx12si, z12, z15)
+TEST_TYPE (vnx6di, z16, z13)
+TEST_TYPE (vnx24hf, z18, z1)
+TEST_TYPE (vnx12sf, z20, z23)
+TEST_TYPE (vnx6df, z26, z29)
/* { dg-final { scan-assembler {\tld1b\tz0.b, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1b\tz1.b, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1b\tz2.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v96qi 1 z0\n} } } */
+/* { dg-final { scan-assembler { test vnx48qi 1 z0\n} } } */
/* { dg-final { scan-assembler {\tmov\tz3.d, z0.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz4.d, z1.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz5.d, z2.d\n} } } */
-/* { dg-final { scan-assembler { test v96qi 2 z0, z0, z3\n} } } */
+/* { dg-final { scan-assembler { test vnx48qi 2 z0, z0, z3\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz0.b, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz1.b, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz2.b, p[0-7], \[x0, #5, mul vl\]\n} } } */
@@ -53,11 +57,11 @@ TEST_TYPE (v12df, z26, z29)
/* { dg-final { scan-assembler {\tld1h\tz6.h, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1h\tz7.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1h\tz8.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v48hi 1 z6\n} } } */
+/* { dg-final { scan-assembler { test vnx24hi 1 z6\n} } } */
/* { dg-final { scan-assembler {\tmov\tz2.d, z6.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz3.d, z7.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz4.d, z8.d\n} } } */
-/* { dg-final { scan-assembler { test v48hi 2 z6, z6, z2\n} } } */
+/* { dg-final { scan-assembler { test vnx24hi 2 z6, z6, z2\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz6.h, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz7.h, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz8.h, p[0-7], \[x0, #5, mul vl\]\n} } } */
@@ -65,11 +69,11 @@ TEST_TYPE (v12df, z26, z29)
/* { dg-final { scan-assembler {\tld1w\tz12.s, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz13.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz14.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v24si 1 z12\n} } } */
+/* { dg-final { scan-assembler { test vnx12si 1 z12\n} } } */
/* { dg-final { scan-assembler {\tmov\tz15.d, z12.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz16.d, z13.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz17.d, z14.d\n} } } */
-/* { dg-final { scan-assembler { test v24si 2 z12, z12, z15\n} } } */
+/* { dg-final { scan-assembler { test vnx12si 2 z12, z12, z15\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz12.s, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz13.s, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz14.s, p[0-7], \[x0, #5, mul vl\]\n} } } */
@@ -77,23 +81,35 @@ TEST_TYPE (v12df, z26, z29)
/* { dg-final { scan-assembler {\tld1d\tz16.d, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz17.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz18.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v12di 1 z16\n} } } */
+/* { dg-final { scan-assembler { test vnx6di 1 z16\n} } } */
/* { dg-final { scan-assembler {\tmov\tz13.d, z16.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz14.d, z17.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz15.d, z18.d\n} } } */
-/* { dg-final { scan-assembler { test v12di 2 z16, z16, z13\n} } } */
+/* { dg-final { scan-assembler { test vnx6di 2 z16, z16, z13\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz16.d, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz17.d, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz18.d, p[0-7], \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz18.h, p[0-7]/z, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz19.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz20.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx24hf 1 z18\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz1.d, z18.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz2.d, z19.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz3.d, z20.d\n} } } */
+/* { dg-final { scan-assembler { test vnx24hf 2 z18, z18, z1\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz18.h, p[0-7], \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz19.h, p[0-7], \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz20.h, p[0-7], \[x0, #5, mul vl\]\n} } } */
+
/* { dg-final { scan-assembler {\tld1w\tz20.s, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz22.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v24sf 1 z20\n} } } */
+/* { dg-final { scan-assembler { test vnx12sf 1 z20\n} } } */
/* { dg-final { scan-assembler {\tmov\tz23.d, z20.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz24.d, z21.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz25.d, z22.d\n} } } */
-/* { dg-final { scan-assembler { test v24sf 2 z20, z20, z23\n} } } */
+/* { dg-final { scan-assembler { test vnx12sf 2 z20, z20, z23\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz20.s, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz22.s, p[0-7], \[x0, #5, mul vl\]\n} } } */
@@ -101,11 +117,11 @@ TEST_TYPE (v12df, z26, z29)
/* { dg-final { scan-assembler {\tld1d\tz26.d, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz27.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz28.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v12df 1 z26\n} } } */
+/* { dg-final { scan-assembler { test vnx6df 1 z26\n} } } */
/* { dg-final { scan-assembler {\tmov\tz29.d, z26.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz30.d, z27.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz31.d, z28.d\n} } } */
-/* { dg-final { scan-assembler { test v12df 2 z26, z26, z29\n} } } */
+/* { dg-final { scan-assembler { test vnx6df 2 z26, z26, z29\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz26.d, p[0-7], \[x0, #3, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz27.d, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz28.d, p[0-7], \[x0, #5, mul vl\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c
index d97d6973359..101a33701a5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c
@@ -1,53 +1,57 @@
-/* { dg-do compile } */
-/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */
+/* { dg-do assemble } */
+/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mbig-endian --save-temps" } */
-typedef char v32qi __attribute__((vector_size(32)));
-typedef struct { v32qi a[4]; } v128qi;
+typedef char vnx16qi __attribute__((vector_size(32)));
+typedef struct { vnx16qi a[4]; } vnx64qi;
-typedef short v16hi __attribute__((vector_size(32)));
-typedef struct { v16hi a[4]; } v64hi;
+typedef short vnx8hi __attribute__((vector_size(32)));
+typedef struct { vnx8hi a[4]; } vnx32hi;
-typedef int v8si __attribute__((vector_size(32)));
-typedef struct { v8si a[4]; } v32si;
+typedef int vnx4si __attribute__((vector_size(32)));
+typedef struct { vnx4si a[4]; } vnx16si;
-typedef long v4di __attribute__((vector_size(32)));
-typedef struct { v4di a[4]; } v16di;
+typedef long vnx2di __attribute__((vector_size(32)));
+typedef struct { vnx2di a[4]; } vnx8di;
-typedef float v8sf __attribute__((vector_size(32)));
-typedef struct { v8sf a[4]; } v32sf;
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef struct { vnx8hf a[4]; } vnx32hf;
-typedef double v4df __attribute__((vector_size(32)));
-typedef struct { v4df a[4]; } v16df;
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef struct { vnx4sf a[4]; } vnx16sf;
+
+typedef double vnx2df __attribute__((vector_size(32)));
+typedef struct { vnx2df a[4]; } vnx8df;
#define TEST_TYPE(TYPE, REG1, REG2) \
void \
f_##TYPE (TYPE *a) \
{ \
register TYPE x asm (#REG1) = a[0]; \
- asm volatile ("# test " #TYPE " 1 %0" :: "w" (x)); \
+ asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \
register TYPE y asm (#REG2) = x; \
- asm volatile ("# test " #TYPE " 2 %0, %1, %2" \
+ asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \
: "=&w" (x) : "0" (x), "w" (y)); \
a[1] = x; \
}
-TEST_TYPE (v128qi, z0, z4)
-TEST_TYPE (v64hi, z6, z2)
-TEST_TYPE (v32si, z12, z16)
-TEST_TYPE (v16di, z17, z13)
-TEST_TYPE (v32sf, z20, z16)
-TEST_TYPE (v16df, z24, z28)
+TEST_TYPE (vnx64qi, z0, z4)
+TEST_TYPE (vnx32hi, z6, z2)
+TEST_TYPE (vnx16si, z12, z16)
+TEST_TYPE (vnx8di, z17, z13)
+TEST_TYPE (vnx32hf, z18, z1)
+TEST_TYPE (vnx16sf, z20, z16)
+TEST_TYPE (vnx8df, z24, z28)
/* { dg-final { scan-assembler {\tld1b\tz0.b, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1b\tz1.b, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1b\tz2.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1b\tz3.b, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v128qi 1 z0\n} } } */
+/* { dg-final { scan-assembler { test vnx64qi 1 z0\n} } } */
/* { dg-final { scan-assembler {\tmov\tz4.d, z0.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz5.d, z1.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz6.d, z2.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz7.d, z3.d\n} } } */
-/* { dg-final { scan-assembler { test v128qi 2 z0, z0, z4\n} } } */
+/* { dg-final { scan-assembler { test vnx64qi 2 z0, z0, z4\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz0.b, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz1.b, p[0-7], \[x0, #5, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1b\tz2.b, p[0-7], \[x0, #6, mul vl\]\n} } } */
@@ -57,12 +61,12 @@ TEST_TYPE (v16df, z24, z28)
/* { dg-final { scan-assembler {\tld1h\tz7.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1h\tz8.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1h\tz9.h, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v64hi 1 z6\n} } } */
+/* { dg-final { scan-assembler { test vnx32hi 1 z6\n} } } */
/* { dg-final { scan-assembler {\tmov\tz2.d, z6.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz3.d, z7.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz4.d, z8.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz5.d, z9.d\n} } } */
-/* { dg-final { scan-assembler { test v64hi 2 z6, z6, z2\n} } } */
+/* { dg-final { scan-assembler { test vnx32hi 2 z6, z6, z2\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz6.h, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz7.h, p[0-7], \[x0, #5, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1h\tz8.h, p[0-7], \[x0, #6, mul vl\]\n} } } */
@@ -72,12 +76,12 @@ TEST_TYPE (v16df, z24, z28)
/* { dg-final { scan-assembler {\tld1w\tz13.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz14.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz15.s, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v32si 1 z12\n} } } */
+/* { dg-final { scan-assembler { test vnx16si 1 z12\n} } } */
/* { dg-final { scan-assembler {\tmov\tz16.d, z12.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz17.d, z13.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz18.d, z14.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz19.d, z15.d\n} } } */
-/* { dg-final { scan-assembler { test v32si 2 z12, z12, z16\n} } } */
+/* { dg-final { scan-assembler { test vnx16si 2 z12, z12, z16\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz12.s, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz13.s, p[0-7], \[x0, #5, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz14.s, p[0-7], \[x0, #6, mul vl\]\n} } } */
@@ -87,27 +91,42 @@ TEST_TYPE (v16df, z24, z28)
/* { dg-final { scan-assembler {\tld1d\tz18.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz19.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz20.d, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v16di 1 z17\n} } } */
+/* { dg-final { scan-assembler { test vnx8di 1 z17\n} } } */
/* { dg-final { scan-assembler {\tmov\tz13.d, z17.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz14.d, z18.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz15.d, z19.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz16.d, z20.d\n} } } */
-/* { dg-final { scan-assembler { test v16di 2 z17, z17, z13\n} } } */
+/* { dg-final { scan-assembler { test vnx8di 2 z17, z17, z13\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz17.d, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz18.d, p[0-7], \[x0, #5, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz19.d, p[0-7], \[x0, #6, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz20.d, p[0-7], \[x0, #7, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz18.h, p[0-7]/z, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz19.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz20.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1h\tz21.h, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx32hf 1 z18\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz1.d, z18.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz2.d, z19.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz3.d, z20.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz4.d, z21.d\n} } } */
+/* { dg-final { scan-assembler { test vnx32hf 2 z18, z18, z1\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz18.h, p[0-7], \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz19.h, p[0-7], \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz20.h, p[0-7], \[x0, #6, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1h\tz21.h, p[0-7], \[x0, #7, mul vl\]\n} } } */
+
/* { dg-final { scan-assembler {\tld1w\tz20.s, p[0-7]/z, \[x0\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz22.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1w\tz23.s, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v32sf 1 z20\n} } } */
+/* { dg-final { scan-assembler { test vnx16sf 1 z20\n} } } */
/* { dg-final { scan-assembler {\tmov\tz16.d, z20.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz17.d, z21.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz18.d, z22.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz19.d, z23.d\n} } } */
-/* { dg-final { scan-assembler { test v32sf 2 z20, z20, z16\n} } } */
+/* { dg-final { scan-assembler { test vnx16sf 2 z20, z20, z16\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz20.s, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #5, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1w\tz22.s, p[0-7], \[x0, #6, mul vl\]\n} } } */
@@ -117,12 +136,12 @@ TEST_TYPE (v16df, z24, z28)
/* { dg-final { scan-assembler {\tld1d\tz25.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz26.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tld1d\tz27.d, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */
-/* { dg-final { scan-assembler { test v16df 1 z24\n} } } */
+/* { dg-final { scan-assembler { test vnx8df 1 z24\n} } } */
/* { dg-final { scan-assembler {\tmov\tz28.d, z24.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz29.d, z25.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz30.d, z26.d\n} } } */
/* { dg-final { scan-assembler {\tmov\tz31.d, z27.d\n} } } */
-/* { dg-final { scan-assembler { test v16df 2 z24, z24, z28\n} } } */
+/* { dg-final { scan-assembler { test vnx8df 2 z24, z24, z28\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz24.d, p[0-7], \[x0, #4, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz25.d, p[0-7], \[x0, #5, mul vl\]\n} } } */
/* { dg-final { scan-assembler {\tst1d\tz26.d, p[0-7], \[x0, #6, mul vl\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_4.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_4.c
new file mode 100644
index 00000000000..40ec0481e84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_4.c
@@ -0,0 +1,116 @@
+/* { dg-do assemble } */
+/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mlittle-endian --save-temps" } */
+
+typedef char vnx16qi __attribute__((vector_size(32)));
+typedef struct { vnx16qi a[2]; } vnx32qi;
+
+typedef short vnx8hi __attribute__((vector_size(32)));
+typedef struct { vnx8hi a[2]; } vnx16hi;
+
+typedef int vnx4si __attribute__((vector_size(32)));
+typedef struct { vnx4si a[2]; } vnx8si;
+
+typedef long vnx2di __attribute__((vector_size(32)));
+typedef struct { vnx2di a[2]; } vnx4di;
+
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef struct { vnx4sf a[2]; } vnx8sf;
+
+typedef double vnx2df __attribute__((vector_size(32)));
+typedef struct { vnx2df a[2]; } vnx4df;
+
+#define TEST_TYPE(TYPE, REG1, REG2) \
+ void \
+ f1_##TYPE (TYPE *a) \
+ { \
+ register TYPE x asm (#REG1) = a[0]; \
+ asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \
+ register TYPE y asm (#REG2) = x; \
+ asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \
+ : "=&w" (x) : "0" (x), "w" (y)); \
+ a[1] = x; \
+ } \
+ /* This must compile, but we don't care how. */ \
+ void \
+ f2_##TYPE (TYPE *a) \
+ { \
+ TYPE x = a[0]; \
+ x.a[0][3] = 1; \
+ x.a[1][2] = 12; \
+ asm volatile ("# %0" :: "w" (x)); \
+ } \
+ void \
+ f3_##TYPE (TYPE *a, int i) \
+ { \
+ TYPE x = a[0]; \
+ x.a[0][i] = 1; \
+ asm volatile ("# %0" :: "w" (x)); \
+ } \
+ void \
+ f4_##TYPE (TYPE *a, int i, int j) \
+ { \
+ TYPE x = a[0]; \
+ x.a[i][j] = 44; \
+ asm volatile ("# %0" :: "w" (x)); \
+ }
+
+TEST_TYPE (vnx32qi, z0, z2)
+TEST_TYPE (vnx16hi, z5, z7)
+TEST_TYPE (vnx8si, z10, z12)
+TEST_TYPE (vnx4di, z15, z17)
+TEST_TYPE (vnx8sf, z20, z23)
+TEST_TYPE (vnx4df, z28, z30)
+
+/* { dg-final { scan-assembler {\tldr\tz0, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz1, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx32qi 1 z0\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz2.d, z0.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz3.d, z1.d\n} } } */
+/* { dg-final { scan-assembler { test vnx32qi 2 z0, z0, z2\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz0, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz1, \[x0, #3, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz5, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz6, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx16hi 1 z5\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz7.d, z5.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz8.d, z6.d\n} } } */
+/* { dg-final { scan-assembler { test vnx16hi 2 z5, z5, z7\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz5, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz6, \[x0, #3, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz10, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz11, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx8si 1 z10\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz12.d, z10.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz13.d, z11.d\n} } } */
+/* { dg-final { scan-assembler { test vnx8si 2 z10, z10, z12\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz10, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz11, \[x0, #3, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz15, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz16, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx4di 1 z15\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz17.d, z15.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz18.d, z16.d\n} } } */
+/* { dg-final { scan-assembler { test vnx4di 2 z15, z15, z17\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz15, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz16, \[x0, #3, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz20, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz21, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx8sf 1 z20\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz23.d, z20.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz24.d, z21.d\n} } } */
+/* { dg-final { scan-assembler { test vnx8sf 2 z20, z20, z23\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz20, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz21, \[x0, #3, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz28, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz29, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx4df 1 z28\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz30.d, z28.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz31.d, z29.d\n} } } */
+/* { dg-final { scan-assembler { test vnx4df 2 z28, z28, z30\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz28, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz29, \[x0, #3, mul vl\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_5.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_5.c
new file mode 100644
index 00000000000..ee04c3e0f23
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_5.c
@@ -0,0 +1,111 @@
+/* { dg-do assemble } */
+/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mlittle-endian --save-temps" } */
+
+typedef char vnx16qi __attribute__((vector_size(32)));
+typedef struct { vnx16qi a[3]; } vnx48qi;
+
+typedef short vnx8hi __attribute__((vector_size(32)));
+typedef struct { vnx8hi a[3]; } vnx24hi;
+
+typedef int vnx4si __attribute__((vector_size(32)));
+typedef struct { vnx4si a[3]; } vnx12si;
+
+typedef long vnx2di __attribute__((vector_size(32)));
+typedef struct { vnx2di a[3]; } vnx6di;
+
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef struct { vnx4sf a[3]; } vnx12sf;
+
+typedef double vnx2df __attribute__((vector_size(32)));
+typedef struct { vnx2df a[3]; } vnx6df;
+
+#define TEST_TYPE(TYPE, REG1, REG2) \
+ void \
+ f_##TYPE (TYPE *a) \
+ { \
+ register TYPE x asm (#REG1) = a[0]; \
+ asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \
+ register TYPE y asm (#REG2) = x; \
+ asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \
+ : "=&w" (x) : "0" (x), "w" (y)); \
+ a[1] = x; \
+ }
+
+TEST_TYPE (vnx48qi, z0, z3)
+TEST_TYPE (vnx24hi, z6, z2)
+TEST_TYPE (vnx12si, z12, z15)
+TEST_TYPE (vnx6di, z16, z13)
+TEST_TYPE (vnx12sf, z20, z23)
+TEST_TYPE (vnx6df, z26, z29)
+
+/* { dg-final { scan-assembler {\tldr\tz0, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz1, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz2, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx48qi 1 z0\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz3.d, z0.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz4.d, z1.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz5.d, z2.d\n} } } */
+/* { dg-final { scan-assembler { test vnx48qi 2 z0, z0, z3\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz0, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz1, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz2, \[x0, #5, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz6, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz7, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz8, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx24hi 1 z6\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz2.d, z6.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz3.d, z7.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz4.d, z8.d\n} } } */
+/* { dg-final { scan-assembler { test vnx24hi 2 z6, z6, z2\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz6, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz7, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz8, \[x0, #5, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz12, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz13, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz14, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx12si 1 z12\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz15.d, z12.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz16.d, z13.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz17.d, z14.d\n} } } */
+/* { dg-final { scan-assembler { test vnx12si 2 z12, z12, z15\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz12, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz13, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz14, \[x0, #5, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz16, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz17, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz18, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx6di 1 z16\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz13.d, z16.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz14.d, z17.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz15.d, z18.d\n} } } */
+/* { dg-final { scan-assembler { test vnx6di 2 z16, z16, z13\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz16, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz17, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz18, \[x0, #5, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz20, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz21, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz22, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx12sf 1 z20\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz23.d, z20.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz24.d, z21.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz25.d, z22.d\n} } } */
+/* { dg-final { scan-assembler { test vnx12sf 2 z20, z20, z23\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz20, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz21, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz22, \[x0, #5, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz26, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz27, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz28, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx6df 1 z26\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz29.d, z26.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz30.d, z27.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz31.d, z28.d\n} } } */
+/* { dg-final { scan-assembler { test vnx6df 2 z26, z26, z29\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz26, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz27, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz28, \[x0, #5, mul vl\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_6.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_6.c
new file mode 100644
index 00000000000..8bfd9f6d1af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_6.c
@@ -0,0 +1,129 @@
+/* { dg-do assemble } */
+/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mlittle-endian --save-temps" } */
+
+typedef char vnx16qi __attribute__((vector_size(32)));
+typedef struct { vnx16qi a[4]; } vnx64qi;
+
+typedef short vnx8hi __attribute__((vector_size(32)));
+typedef struct { vnx8hi a[4]; } vnx32hi;
+
+typedef int vnx4si __attribute__((vector_size(32)));
+typedef struct { vnx4si a[4]; } vnx16si;
+
+typedef long vnx2di __attribute__((vector_size(32)));
+typedef struct { vnx2di a[4]; } vnx8di;
+
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef struct { vnx4sf a[4]; } vnx16sf;
+
+typedef double vnx2df __attribute__((vector_size(32)));
+typedef struct { vnx2df a[4]; } vnx8df;
+
+#define TEST_TYPE(TYPE, REG1, REG2) \
+ void \
+ f_##TYPE (TYPE *a) \
+ { \
+ register TYPE x asm (#REG1) = a[0]; \
+ asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \
+ register TYPE y asm (#REG2) = x; \
+ asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \
+ : "=&w" (x) : "0" (x), "w" (y)); \
+ a[1] = x; \
+ }
+
+TEST_TYPE (vnx64qi, z0, z4)
+TEST_TYPE (vnx32hi, z6, z2)
+TEST_TYPE (vnx16si, z12, z16)
+TEST_TYPE (vnx8di, z17, z13)
+TEST_TYPE (vnx16sf, z20, z16)
+TEST_TYPE (vnx8df, z24, z28)
+
+/* { dg-final { scan-assembler {\tldr\tz0, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz1, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz2, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz3, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx64qi 1 z0\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz4.d, z0.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz5.d, z1.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz6.d, z2.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz7.d, z3.d\n} } } */
+/* { dg-final { scan-assembler { test vnx64qi 2 z0, z0, z4\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz0, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz1, \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz2, \[x0, #6, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz3, \[x0, #7, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz6, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz7, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz8, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz9, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx32hi 1 z6\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz2.d, z6.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz3.d, z7.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz4.d, z8.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz5.d, z9.d\n} } } */
+/* { dg-final { scan-assembler { test vnx32hi 2 z6, z6, z2\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz6, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz7, \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz8, \[x0, #6, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz9, \[x0, #7, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz12, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz13, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz14, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz15, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx16si 1 z12\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz16.d, z12.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz17.d, z13.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz18.d, z14.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz19.d, z15.d\n} } } */
+/* { dg-final { scan-assembler { test vnx16si 2 z12, z12, z16\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz12, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz13, \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz14, \[x0, #6, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz15, \[x0, #7, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz17, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz18, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz19, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz20, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx8di 1 z17\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz13.d, z17.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz14.d, z18.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz15.d, z19.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz16.d, z20.d\n} } } */
+/* { dg-final { scan-assembler { test vnx8di 2 z17, z17, z13\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz17, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz18, \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz19, \[x0, #6, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz20, \[x0, #7, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz20, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz21, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz22, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz23, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx16sf 1 z20\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz16.d, z20.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz17.d, z21.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz18.d, z22.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz19.d, z23.d\n} } } */
+/* { dg-final { scan-assembler { test vnx16sf 2 z20, z20, z16\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz20, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz21, \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz22, \[x0, #6, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz23, \[x0, #7, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler {\tldr\tz24, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz25, \[x0, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz26, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tldr\tz27, \[x0, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler { test vnx8df 1 z24\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz28.d, z24.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz29.d, z25.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz30.d, z26.d\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz31.d, z27.d\n} } } */
+/* { dg-final { scan-assembler { test vnx8df 2 z24, z24, z28\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz24, \[x0, #4, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz25, \[x0, #5, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz26, \[x0, #6, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tstr\tz27, \[x0, #7, mul vl\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c
index 6d7b5fecbce..3405bd76eb1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#ifndef TYPE
#define TYPE unsigned char
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c
index 7ae718ada2c..dff9e963e06 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned long
#define ITYPE long
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c
index 5ab3ff68bda..611cbbda078 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned long
#define ITYPE long
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c
index 6771938131b..80e69463e18 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c
@@ -1,13 +1,13 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TYPE float
-#define ITYPE int
+#define TYPE _Float16
+#define ITYPE short
#include "sve_struct_vect_7.c"
-/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c
index f9c129801fc..bfab53d9b6b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c
@@ -1,6 +1,6 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TYPE float
-#define ITYPE int
+#define TYPE _Float16
+#define ITYPE short
#include "sve_struct_vect_7_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c
index 37c11b3b29a..47279e0a80e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c
@@ -1,13 +1,13 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TYPE double
-#define ITYPE long
+#define TYPE float
+#define ITYPE int
#include "sve_struct_vect_7.c"
-/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c
index c7ed3fe2806..74007a938b7 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c
@@ -1,6 +1,6 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
-#define TYPE double
-#define ITYPE long
+#define TYPE float
+#define ITYPE int
#include "sve_struct_vect_7_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c
index 3e3b9d733e4..5ebf5d8ee38 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c
@@ -1,66 +1,13 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */
-
-#define TYPE unsigned char
-#define NAME(X) qi_##X
-#include "sve_struct_vect_1.c"
-#undef NAME
-#undef TYPE
-
-#define TYPE unsigned short
-#define NAME(X) hi_##X
-#include "sve_struct_vect_1.c"
-#undef NAME
-#undef TYPE
-
-#define TYPE unsigned int
-#define NAME(X) si_##X
-#include "sve_struct_vect_1.c"
-#undef NAME
-#undef TYPE
-
-#define TYPE unsigned long
-#define NAME(X) di_##X
-#include "sve_struct_vect_1.c"
-#undef NAME
-#undef TYPE
-
-#define TYPE float
-#define NAME(X) sf_##X
-#include "sve_struct_vect_1.c"
-#undef NAME
-#undef TYPE
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE double
-#define NAME(X) df_##X
-#include "sve_struct_vect_1.c"
-#undef NAME
-#undef TYPE
-
-/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-
-/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-
-/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */
-
-/* { dg-final { scan-assembler-times {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+#define ITYPE long
+#include "sve_struct_vect_7.c"
+
+/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13_run.c
new file mode 100644
index 00000000000..6fb5329913b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13_run.c
@@ -0,0 +1,6 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define TYPE double
+#define ITYPE long
+#include "sve_struct_vect_7_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c
index c3e81f500e0..46126e841dc 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c
@@ -1,7 +1,47 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=512" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */
-#include "sve_struct_vect_13.c"
+#define TYPE unsigned char
+#define NAME(X) qi_##X
+#include "sve_struct_vect_1.c"
+#undef NAME
+#undef TYPE
+
+#define TYPE unsigned short
+#define NAME(X) hi_##X
+#include "sve_struct_vect_1.c"
+#undef NAME
+#undef TYPE
+
+#define TYPE unsigned int
+#define NAME(X) si_##X
+#include "sve_struct_vect_1.c"
+#undef NAME
+#undef TYPE
+
+#define TYPE unsigned long
+#define NAME(X) di_##X
+#include "sve_struct_vect_1.c"
+#undef NAME
+#undef TYPE
+
+#define TYPE _Float16
+#define NAME(X) hf_##X
+#include "sve_struct_vect_1.c"
+#undef NAME
+#undef TYPE
+
+#define TYPE float
+#define NAME(X) sf_##X
+#include "sve_struct_vect_1.c"
+#undef NAME
+#undef TYPE
+
+#define TYPE double
+#define NAME(X) df_##X
+#include "sve_struct_vect_1.c"
+#undef NAME
+#undef TYPE
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
@@ -10,12 +50,12 @@
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c
index 635910e11a0..c1ccf7f09bb 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c
@@ -1,7 +1,7 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=1024" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=512 --save-temps" } */
-#include "sve_struct_vect_13.c"
+#include "sve_struct_vect_14.c"
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
@@ -10,12 +10,12 @@
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c
index 9afc0708fb1..61985f98974 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c
@@ -1,7 +1,7 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=2048" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=1024 --save-temps" } */
-#include "sve_struct_vect_13.c"
+#include "sve_struct_vect_14.c"
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
@@ -10,12 +10,12 @@
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c
index 80c99961791..6dd2878c552 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c
@@ -1,47 +1,32 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=2048 --save-temps" } */
-#define N 2000
+#include "sve_struct_vect_14.c"
-#define TEST_LOOP(NAME, TYPE) \
- void __attribute__((weak)) \
- NAME (TYPE *restrict dest, TYPE *restrict src) \
- { \
- for (int i = 0; i < N; ++i) \
- dest[i] += src[i * 2]; \
- }
+/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-#define TEST(NAME) \
- TEST_LOOP (NAME##_i8, signed char) \
- TEST_LOOP (NAME##_i16, unsigned short) \
- TEST_LOOP (NAME##_f32, float) \
- TEST_LOOP (NAME##_f64, double)
+/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */
-TEST (test)
+/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */
-/* Check the vectorized loop. */
-/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2d\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
-
-/* Check the scalar tail. */
-/* { dg-final { scan-assembler-times {\tldrb\tw} 2 } } */
-/* { dg-final { scan-assembler-times {\tstrb\tw} 1 } } */
-/* { dg-final { scan-assembler-times {\tldrh\tw} 2 } } */
-/* { dg-final { scan-assembler-times {\tstrh\tw} 1 } } */
-/* { dg-final { scan-assembler-times {\tldr\ts} 2 } } */
-/* { dg-final { scan-assembler-times {\tstr\ts} 1 } } */
-/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */
-/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */
-
-/* The only branches should be in the vectorized loop. */
-/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */
+/* { dg-final { scan-assembler-times {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17_run.c
deleted file mode 100644
index 970c6de6f08..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17_run.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
-
-#include "sve_struct_vect_17.c"
-
-volatile int x;
-
-#undef TEST_LOOP
-#define TEST_LOOP(NAME, TYPE) \
- { \
- TYPE out[N]; \
- TYPE in[N * 2]; \
- for (int i = 0; i < N; ++i) \
- out[i] = i * 7 / 2; \
- for (int i = 0; i < N * 2; ++i) \
- in[i] = i * 9 / 2; \
- NAME (out, in); \
- for (int i = 0; i < N; ++i) \
- { \
- TYPE expected = i * 7 / 2 + in[i * 2]; \
- if (out[i] != expected) \
- __builtin_abort (); \
- x += 1; \
- } \
- }
-
-int
-main (void)
-{
- TEST (test);
- return 0;
-}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c
index 90e0b53c7df..fd0ce83ffac 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c
@@ -4,11 +4,11 @@
#define N 2000
#define TEST_LOOP(NAME, TYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME (TYPE *restrict dest, TYPE *restrict src) \
{ \
for (int i = 0; i < N; ++i) \
- dest[i] += src[i * 4]; \
+ dest[i] += src[i * 3]; \
}
#define TEST(NAME) \
@@ -21,16 +21,16 @@ TEST (test)
/* Check the vectorized loop. */
/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
/* Check the scalar tail. */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c
index f7db5aea413..6467fa23b83 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c
@@ -3,28 +3,32 @@
#include "sve_struct_vect_18.c"
-volatile int x;
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, TYPE) \
{ \
TYPE out[N]; \
- TYPE in[N * 4]; \
+ TYPE in[N * 3]; \
for (int i = 0; i < N; ++i) \
- out[i] = i * 7 / 2; \
- for (int i = 0; i < N * 4; ++i) \
- in[i] = i * 9 / 2; \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 3; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
NAME (out, in); \
for (int i = 0; i < N; ++i) \
{ \
- TYPE expected = i * 7 / 2 + in[i * 4]; \
+ TYPE expected = i * 7 / 2 + in[i * 3]; \
if (out[i] != expected) \
__builtin_abort (); \
- x += 1; \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c
index 3430459a2f3..2a099d05d65 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c
@@ -2,11 +2,11 @@
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#define TEST_LOOP(NAME, TYPE) \
- void __attribute__((weak)) \
+ void __attribute__ ((noinline, noclone)) \
NAME (TYPE *restrict dest, TYPE *restrict src, int n) \
{ \
for (int i = 0; i < n; ++i) \
- dest[i] += src[i * 2]; \
+ dest[i] += src[i * 3]; \
}
#define TEST(NAME) \
@@ -19,16 +19,16 @@ TEST (test)
/* Check the vectorized loop. */
/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld2d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
/* Check the scalar tail. */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c
index 94593cef684..f9bf095d3a5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c
@@ -3,37 +3,41 @@
#include "sve_struct_vect_19.c"
-volatile int x;
-
#define N 1000
#undef TEST_LOOP
-#define TEST_LOOP(NAME, TYPE) \
- { \
- TYPE out[N]; \
- TYPE in[N * 2]; \
- int counts[] = { 0, 1, N - 1 }; \
- for (int j = 0; j < 3; ++j) \
- { \
- int count = counts[j]; \
- for (int i = 0; i < N; ++i) \
- out[i] = i * 7 / 2; \
- for (int i = 0; i < N * 2; ++i) \
- in[i] = i * 9 / 2; \
- NAME (out, in, count); \
- for (int i = 0; i < N; ++i) \
- { \
- TYPE expected = i * 7 / 2; \
- if (i < count) \
- expected += in[i * 2]; \
- if (out[i] != expected) \
- __builtin_abort (); \
- x += 1; \
- } \
- } \
+#define TEST_LOOP(NAME, TYPE) \
+ { \
+ TYPE out[N]; \
+ TYPE in[N * 3]; \
+ int counts[] = { 0, 1, N - 1 }; \
+ for (int j = 0; j < 3; ++j) \
+ { \
+ int count = counts[j]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 3; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in, count); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE expected = i * 7 / 2; \
+ if (i < count) \
+ expected += in[i * 3]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ } \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c
index 1f99c676586..a94142f2c9e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c
@@ -1,10 +1,8 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#include "sve_struct_vect_1.c"
-extern void abort() __attribute__((noreturn));
-
TYPE a[N], b[N], c[N], d[N], e[N * 4];
void __attribute__ ((noinline, noclone))
@@ -19,10 +17,10 @@ check_array (TYPE *array, int n, TYPE base, TYPE step)
{
for (int i = 0; i < n; ++i)
if (array[i] != (TYPE) (base + step * i))
- abort ();
+ __builtin_abort ();
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
init_array (e, 2 * N, 11, 5);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c
index 8e5a96361f6..0d51808552e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned short
#include "sve_struct_vect_1.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c
index aad0e104379..3a2907f4ad9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c
@@ -1,12 +1,14 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+#define N 2000
+
#define TEST_LOOP(NAME, TYPE) \
- void __attribute__((weak)) \
- NAME (TYPE *restrict dest, TYPE *restrict src, int n) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME (TYPE *restrict dest, TYPE *restrict src) \
{ \
- for (int i = 0; i < n; ++i) \
- dest[i] += src[i * 4]; \
+ for (int i = 0; i < N; ++i) \
+ dest[i] += src[i * 2]; \
}
#define TEST(NAME) \
@@ -19,16 +21,16 @@ TEST (test)
/* Check the vectorized loop. */
/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2d\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
/* Check the scalar tail. */
@@ -41,7 +43,5 @@ TEST (test)
/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */
/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */
-/* Each function should have three branches: one directly to the exit
- (n <= 0), one to the single scalar epilogue iteration (n == 1),
- and one branch-back for the vectorized loop. */
-/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */
+/* The only branches should be in the vectorized loop. */
+/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c
index 3be63364455..de563c98c1f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c
@@ -3,37 +3,32 @@
#include "sve_struct_vect_20.c"
-volatile int x;
-
-#define N 1000
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, TYPE) \
{ \
TYPE out[N]; \
- TYPE in[N * 4]; \
- int counts[] = { 0, 1, N - 1 }; \
- for (int j = 0; j < 3; ++j) \
+ TYPE in[N * 2]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 2; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in); \
+ for (int i = 0; i < N; ++i) \
{ \
- int count = counts[j]; \
- for (int i = 0; i < N; ++i) \
- out[i] = i * 7 / 2; \
- for (int i = 0; i < N * 4; ++i) \
- in[i] = i * 9 / 2; \
- NAME (out, in, count); \
- for (int i = 0; i < N; ++i) \
- { \
- TYPE expected = i * 7 / 2; \
- if (i < count) \
- expected += in[i * 4]; \
- if (out[i] != expected) \
- __builtin_abort (); \
- x += 1; \
- } \
+ TYPE expected = i * 7 / 2 + in[i * 2]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c
index ac3a7dd2383..bb29747b0c1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c
@@ -1,14 +1,12 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
-#define N 2000
-
#define TEST_LOOP(NAME, TYPE) \
- void __attribute__((weak)) \
- NAME (TYPE *restrict dest, TYPE *restrict src) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME (TYPE *restrict dest, TYPE *restrict src, int n) \
{ \
- for (int i = 0; i < N; ++i) \
- dest[i] += src[i * 3]; \
+ for (int i = 0; i < n; ++i) \
+ dest[i] += src[i * 2]; \
}
#define TEST(NAME) \
@@ -21,16 +19,16 @@ TEST (test)
/* Check the vectorized loop. */
/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2d\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
/* Check the scalar tail. */
@@ -43,5 +41,7 @@ TEST (test)
/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */
/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */
-/* The only branches should be in the vectorized loop. */
-/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */
+/* Each function should have three branches: one directly to the exit
+ (n <= 0), one to the single scalar epilogue iteration (n == 1),
+ and one branch-back for the vectorized loop. */
+/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c
index 94d72d1835a..6f9a4e3dc32 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c
@@ -3,28 +3,41 @@
#include "sve_struct_vect_21.c"
-volatile int x;
+#define N 1000
#undef TEST_LOOP
-#define TEST_LOOP(NAME, TYPE) \
- { \
- TYPE out[N]; \
- TYPE in[N * 3]; \
- for (int i = 0; i < N; ++i) \
- out[i] = i * 7 / 2; \
- for (int i = 0; i < N * 3; ++i) \
- in[i] = i * 9 / 2; \
- NAME (out, in); \
- for (int i = 0; i < N; ++i) \
- { \
- TYPE expected = i * 7 / 2 + in[i * 3]; \
- if (out[i] != expected) \
- __builtin_abort (); \
- x += 1; \
- } \
+#define TEST_LOOP(NAME, TYPE) \
+ { \
+ TYPE out[N]; \
+ TYPE in[N * 2]; \
+ int counts[] = { 0, 1, N - 1 }; \
+ for (int j = 0; j < 3; ++j) \
+ { \
+ int count = counts[j]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 2; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in, count); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE expected = i * 7 / 2; \
+ if (i < count) \
+ expected += in[i * 2]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ } \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c
index c17766c7d23..8ee25a0e279 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c
@@ -1,12 +1,14 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+#define N 2000
+
#define TEST_LOOP(NAME, TYPE) \
- void __attribute__((weak)) \
- NAME (TYPE *restrict dest, TYPE *restrict src, int n) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME (TYPE *restrict dest, TYPE *restrict src) \
{ \
- for (int i = 0; i < n; ++i) \
- dest[i] += src[i * 3]; \
+ for (int i = 0; i < N; ++i) \
+ dest[i] += src[i * 4]; \
}
#define TEST(NAME) \
@@ -19,16 +21,16 @@ TEST (test)
/* Check the vectorized loop. */
/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */
/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
/* Check the scalar tail. */
@@ -41,7 +43,5 @@ TEST (test)
/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */
/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */
-/* Each function should have three branches: one directly to the exit
- (n <= 0), one to the single scalar epilogue iteration (n == 1),
- and one branch-back for the vectorized loop. */
-/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */
+/* The only branches should be in the vectorized loop. */
+/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c
index 550364b16d1..1c3699292c0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c
@@ -3,37 +3,32 @@
#include "sve_struct_vect_22.c"
-volatile int x;
-
-#define N 1000
-
#undef TEST_LOOP
#define TEST_LOOP(NAME, TYPE) \
{ \
TYPE out[N]; \
- TYPE in[N * 3]; \
- int counts[] = { 0, 1, N - 1 }; \
- for (int j = 0; j < 3; ++j) \
+ TYPE in[N * 4]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 4; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in); \
+ for (int i = 0; i < N; ++i) \
{ \
- int count = counts[j]; \
- for (int i = 0; i < N; ++i) \
- out[i] = i * 7 / 2; \
- for (int i = 0; i < N * 3; ++i) \
- in[i] = i * 9 / 2; \
- NAME (out, in, count); \
- for (int i = 0; i < N; ++i) \
- { \
- TYPE expected = i * 7 / 2; \
- if (i < count) \
- expected += in[i * 3]; \
- if (out[i] != expected) \
- __builtin_abort (); \
- x += 1; \
- } \
+ TYPE expected = i * 7 / 2 + in[i * 4]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST (test);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23.c
new file mode 100644
index 00000000000..7542e531624
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#define TEST_LOOP(NAME, TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME (TYPE *restrict dest, TYPE *restrict src, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[i] += src[i * 4]; \
+ }
+
+#define TEST(NAME) \
+ TEST_LOOP (NAME##_i8, signed char) \
+ TEST_LOOP (NAME##_i16, unsigned short) \
+ TEST_LOOP (NAME##_f32, float) \
+ TEST_LOOP (NAME##_f64, double)
+
+TEST (test)
+
+/* Check the vectorized loop. */
+/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
+
+/* Check the scalar tail. */
+/* { dg-final { scan-assembler-times {\tldrb\tw} 2 } } */
+/* { dg-final { scan-assembler-times {\tstrb\tw} 1 } } */
+/* { dg-final { scan-assembler-times {\tldrh\tw} 2 } } */
+/* { dg-final { scan-assembler-times {\tstrh\tw} 1 } } */
+/* { dg-final { scan-assembler-times {\tldr\ts} 2 } } */
+/* { dg-final { scan-assembler-times {\tstr\ts} 1 } } */
+/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */
+/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */
+
+/* Each function should have three branches: one directly to the exit
+ (n <= 0), one to the single scalar epilogue iteration (n == 1),
+ and one branch-back for the vectorized loop. */
+/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23_run.c
new file mode 100644
index 00000000000..83f13dd46cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23_run.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_struct_vect_23.c"
+
+#define N 1000
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, TYPE) \
+ { \
+ TYPE out[N]; \
+ TYPE in[N * 4]; \
+ int counts[] = { 0, 1, N - 1 }; \
+ for (int j = 0; j < 3; ++j) \
+ { \
+ int count = counts[j]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 4; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in, count); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE expected = i * 7 / 2; \
+ if (i < count) \
+ expected += in[i * 4]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c
index 6229b78b72e..0da23e144af 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned short
#include "sve_struct_vect_1_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c
index 3a29ae16701..b1e37e536e5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned int
#include "sve_struct_vect_1.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c
index 7703dc6c043..74a5bd3233b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned int
#include "sve_struct_vect_1_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c
index 0c526365829..af20d763bdd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned long
#include "sve_struct_vect_1.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c
index 4ea2cff9dd0..a8aedd188c8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned long
#include "sve_struct_vect_1_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c
index efc1c9d2e2c..4b1f8cd341a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE float
#include "sve_struct_vect_1.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c
index f0d56e87dcc..22ba35ff702 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE float
#include "sve_struct_vect_1_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c
index ff445c1fbb0..981c9d31950 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE double
#include "sve_struct_vect_1.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c
index b0b685c0789..dbcbae8259f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE double
#include "sve_struct_vect_1_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c
index 9712f89d171..8067d5ed169 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#ifndef TYPE
#define TYPE unsigned char
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c
index 5cfb7559a5c..8cc1993e997 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c
@@ -1,12 +1,10 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#include "sve_struct_vect_7.c"
#define N 93
-extern void abort() __attribute__((noreturn));
-
TYPE a[N], b[N], c[N], d[N], e[N * 4];
void __attribute__ ((noinline, noclone))
@@ -21,10 +19,10 @@ check_array (TYPE *array, int n, TYPE base, TYPE step)
{
for (int i = 0; i < n; ++i)
if (array[i] != (TYPE) (base + step * i))
- abort ();
+ __builtin_abort ();
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
init_array (e, 2 * N, 11, 5);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c
index 57cb93de5d9..e807179a6a5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned short
#define ITYPE short
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c
index 59005a2f05b..954043fa874 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned short
#define ITYPE short
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c
index d897d556d05..a167a7b2caf 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned int
#define ITYPE int
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c
index ab694b4a971..4b94d383fec 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c
@@ -1,5 +1,5 @@
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define TYPE unsigned int
#define ITYPE int
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c
index 0c7b887d232..754b188a206 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c
@@ -7,13 +7,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define MASK_2(X, Y) X, Y + X
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
@@ -21,10 +21,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
-#define INDEX_4 v4di
-#define INDEX_8 v8si
-#define INDEX_16 v16hi
-#define INDEX_32 v32qi
+#define INDEX_4 vnx2di
+#define INDEX_8 vnx4si
+#define INDEX_16 vnx8hi
+#define INDEX_32 vnx16qi
#define PERMUTE(TYPE, NUNITS) \
TYPE permute_##TYPE (TYPE values1, TYPE values2) \
@@ -35,13 +35,13 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (v4di, 4) \
- T (v8si, 8) \
- T (v16hi, 16) \
- T (v32qi, 32) \
- T (v4df, 4) \
- T (v8sf, 8) \
- T (v16hf, 16)
+ T (vnx2di, 4) \
+ T (vnx4si, 8) \
+ T (vnx8hi, 16) \
+ T (vnx16qi, 32) \
+ T (vnx2df, 4) \
+ T (vnx4sf, 8) \
+ T (vnx8hf, 16)
TEST_ALL (PERMUTE)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c
index 4d345cf81e9..303276a64cf 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c
@@ -3,12 +3,12 @@
#include <stdint.h>
-#define UNPACK(TYPED, TYPES) \
-void __attribute__ ((noinline, noclone)) \
-unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, int size) \
-{ \
- for (int i = 0; i < size; i++) \
- d[i] = s[i] + 1; \
+#define UNPACK(TYPED, TYPES) \
+void __attribute__ ((noinline, noclone)) \
+unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, TYPES mask, int size) \
+{ \
+ for (int i = 0; i < size; i++) \
+ d[i] = (TYPES) (s[i] | mask); \
}
#define TEST_ALL(T) \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c
index d183408d124..da29eda1434 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c
@@ -14,9 +14,9 @@
arrays[i] = (i - 10) * 3; \
asm volatile ("" ::: "memory"); \
} \
- unpack_##TYPED##_##TYPES (arrayd, arrays, ARRAY_SIZE); \
+ unpack_##TYPED##_##TYPES (arrayd, arrays, 7, ARRAY_SIZE); \
for (int i = 0; i < ARRAY_SIZE; i++) \
- if (arrayd[i] != (TYPED) ((TYPES) ((i - 10) * 3) + 1)) \
+ if (arrayd[i] != (TYPED) (TYPES) (((i - 10) * 3) | 7)) \
__builtin_abort (); \
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c
index fa8de963264..8c927873340 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c
@@ -8,7 +8,7 @@ void __attribute__ ((noinline, noclone)) \
unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, int size) \
{ \
for (int i = 0; i < size; i++) \
- d[i] = s[i] + 1; \
+ d[i] = (TYPES) (s[i] + 1); \
}
#define TEST_ALL(T) \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c
index 3fa66220f17..d2df061e88d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c
@@ -16,7 +16,7 @@
} \
unpack_##TYPED##_##TYPES (arrayd, arrays, ARRAY_SIZE); \
for (int i = 0; i < ARRAY_SIZE; i++) \
- if (arrayd[i] != (TYPED) ((TYPES) ((i - 10) * 3) + 1)) \
+ if (arrayd[i] != (TYPED) (TYPES) (((i - 10) * 3) + 1)) \
__builtin_abort (); \
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c
index aaa4fdccbf0..36048f03f99 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define UZP1(TYPE, MASK) \
TYPE uzp1_##TYPE (TYPE values1, TYPE values2) \
@@ -18,18 +18,18 @@ TYPE uzp1_##TYPE (TYPE values1, TYPE values2) \
}
-UZP1 (v4di, ((v4di) { 0, 2, 4, 6 }));
-UZP1 (v8si, ((v8si) { 0, 2, 4, 6, 8, 10, 12, 14 }));
-UZP1 (v16hi, ((v16hi) { 0, 2, 4, 6, 8, 10, 12, 14,
- 16, 18, 20, 22, 24, 26, 28, 30 }));
-UZP1 (v32qi, ((v32qi) { 0, 2, 4, 6, 8, 10, 12, 14,
- 16, 18, 20, 22, 24, 26, 28, 30,
- 32, 34, 36, 38, 40, 42, 44, 46,
- 48, 50, 52, 54, 56, 58, 60, 62 }));
-UZP1 (v4df, ((v4di) { 0, 2, 4, 6 }));
-UZP1 (v8sf, ((v8si) { 0, 2, 4, 6, 8, 10, 12, 14 }));
-UZP1 (v16hf, ((v16hi) { 0, 2, 4, 6, 8, 10, 12, 14,
- 16, 18, 20, 22, 24, 26, 28, 30 }));
+UZP1 (vnx2di, ((vnx2di) { 0, 2, 4, 6 }));
+UZP1 (vnx4si, ((vnx4si) { 0, 2, 4, 6, 8, 10, 12, 14 }));
+UZP1 (vnx8hi, ((vnx8hi) { 0, 2, 4, 6, 8, 10, 12, 14,
+ 16, 18, 20, 22, 24, 26, 28, 30 }));
+UZP1 (vnx16qi, ((vnx16qi) { 0, 2, 4, 6, 8, 10, 12, 14,
+ 16, 18, 20, 22, 24, 26, 28, 30,
+ 32, 34, 36, 38, 40, 42, 44, 46,
+ 48, 50, 52, 54, 56, 58, 60, 62 }));
+UZP1 (vnx2df, ((vnx2di) { 0, 2, 4, 6 }));
+UZP1 (vnx4sf, ((vnx4si) { 0, 2, 4, 6, 8, 10, 12, 14 }));
+UZP1 (vnx8hf, ((vnx8hi) { 0, 2, 4, 6, 8, 10, 12, 14,
+ 16, 18, 20, 22, 24, 26, 28, 30 }));
/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */
/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c
index d35dad0ffca..622f0d10f5f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c
@@ -16,48 +16,48 @@
int main (void)
{
- TEST_UZP1 (v4di,
- ((v4di) { 4, 6, 12, 36 }),
- ((v4di) { 4, 5, 6, 7 }),
- ((v4di) { 12, 24, 36, 48 }));
- TEST_UZP1 (v8si,
- ((v8si) { 3, 5, 7, 9, 33, 35, 37, 39 }),
- ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
- ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
- TEST_UZP1 (v16hi,
- ((v16hi) { 3, 5, 7, 9, 11, 13, 15, 17,
- 33, 35, 37, 39, 41, 43, 45, 47 }),
- ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18 }),
- ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48 }));
- TEST_UZP1 (v32qi,
- ((v32qi) { 4, 6, 4, 6, 4, 6, 4, 6,
- 4, 6, 4, 6, 4, 6, 4, 6,
- 12, 36, 12, 36, 12, 36, 12, 36,
- 12, 36, 12, 36, 12, 36, 12, 36 }),
- ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 }),
- ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48 }));
- TEST_UZP1 (v4df,
- ((v4df) { 4.0, 6.0, 12.0, 36.0 }),
- ((v4df) { 4.0, 5.0, 6.0, 7.0 }),
- ((v4df) { 12.0, 24.0, 36.0, 48.0 }));
- TEST_UZP1 (v8sf,
- ((v8sf) { 3.0, 5.0, 7.0, 9.0, 33.0, 35.0, 37.0, 39.0 }),
- ((v8sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }),
- ((v8sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 }));
- TEST_UZP1 (v16hf,
- ((v16hf) { 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0,
- 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0 }),
- ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
- 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
- ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
- 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
+ TEST_UZP1 (vnx2di,
+ ((vnx2di) { 4, 6, 12, 36 }),
+ ((vnx2di) { 4, 5, 6, 7 }),
+ ((vnx2di) { 12, 24, 36, 48 }));
+ TEST_UZP1 (vnx4si,
+ ((vnx4si) { 3, 5, 7, 9, 33, 35, 37, 39 }),
+ ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+ ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
+ TEST_UZP1 (vnx8hi,
+ ((vnx8hi) { 3, 5, 7, 9, 11, 13, 15, 17,
+ 33, 35, 37, 39, 41, 43, 45, 47 }),
+ ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18 }),
+ ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48 }));
+ TEST_UZP1 (vnx16qi,
+ ((vnx16qi) { 4, 6, 4, 6, 4, 6, 4, 6,
+ 4, 6, 4, 6, 4, 6, 4, 6,
+ 12, 36, 12, 36, 12, 36, 12, 36,
+ 12, 36, 12, 36, 12, 36, 12, 36 }),
+ ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7 }),
+ ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48 }));
+ TEST_UZP1 (vnx2df,
+ ((vnx2df) { 4.0, 6.0, 12.0, 36.0 }),
+ ((vnx2df) { 4.0, 5.0, 6.0, 7.0 }),
+ ((vnx2df) { 12.0, 24.0, 36.0, 48.0 }));
+ TEST_UZP1 (vnx4sf,
+ ((vnx4sf) { 3.0, 5.0, 7.0, 9.0, 33.0, 35.0, 37.0, 39.0 }),
+ ((vnx4sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }),
+ ((vnx4sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 }));
+ TEST_UZP1 (vnx8hf,
+ ((vnx8hf) { 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0,
+ 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0 }),
+ ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+ 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+ ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+ 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c
index 1bb84d80eb0..a9e4a63fb4d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define UZP2(TYPE, MASK) \
TYPE uzp2_##TYPE (TYPE values1, TYPE values2) \
@@ -17,18 +17,18 @@ TYPE uzp2_##TYPE (TYPE values1, TYPE values2) \
return __builtin_shuffle (values1, values2, MASK); \
}
-UZP2 (v4di, ((v4di) { 1, 3, 5, 7 }));
-UZP2 (v8si, ((v8si) { 1, 3, 5, 7, 9, 11, 13, 15 }));
-UZP2 (v16hi, ((v16hi) { 1, 3, 5, 7, 9, 11, 13, 15,
- 17, 19, 21, 23, 25, 27, 29, 31 }));
-UZP2 (v32qi, ((v32qi) { 1, 3, 5, 7, 9, 11, 13, 15,
- 17, 19, 21, 23, 25, 27, 29, 31,
- 33, 35, 37, 39, 41, 43, 45, 47,
- 49, 51, 53, 55, 57, 59, 61, 63 }));
-UZP2 (v4df, ((v4di) { 1, 3, 5, 7 }));
-UZP2 (v8sf, ((v8si) { 1, 3, 5, 7, 9, 11, 13, 15 }));
-UZP2 (v16hf, ((v16hi) { 1, 3, 5, 7, 9, 11, 13, 15,
- 17, 19, 21, 23, 25, 27, 29, 31 }));
+UZP2 (vnx2di, ((vnx2di) { 1, 3, 5, 7 }));
+UZP2 (vnx4si, ((vnx4si) { 1, 3, 5, 7, 9, 11, 13, 15 }));
+UZP2 (vnx8hi, ((vnx8hi) { 1, 3, 5, 7, 9, 11, 13, 15,
+ 17, 19, 21, 23, 25, 27, 29, 31 }));
+UZP2 (vnx16qi, ((vnx16qi) { 1, 3, 5, 7, 9, 11, 13, 15,
+ 17, 19, 21, 23, 25, 27, 29, 31,
+ 33, 35, 37, 39, 41, 43, 45, 47,
+ 49, 51, 53, 55, 57, 59, 61, 63 }));
+UZP2 (vnx2df, ((vnx2di) { 1, 3, 5, 7 }));
+UZP2 (vnx4sf, ((vnx4si) { 1, 3, 5, 7, 9, 11, 13, 15 }));
+UZP2 (vnx8hf, ((vnx8hi) { 1, 3, 5, 7, 9, 11, 13, 15,
+ 17, 19, 21, 23, 25, 27, 29, 31 }));
/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */
/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c
index d7a241c1258..05d82fe08c1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c
@@ -16,48 +16,48 @@
int main (void)
{
- TEST_UZP2 (v4di,
- ((v4di) { 5, 7, 24, 48 }),
- ((v4di) { 4, 5, 6, 7 }),
- ((v4di) { 12, 24, 36, 48 }));
- TEST_UZP2 (v8si,
- ((v8si) { 4, 6, 8, 10, 34, 36, 38, 40 }),
- ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
- ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
- TEST_UZP2 (v16hi,
- ((v16hi) { 4, 6, 8, 10, 12, 14, 16, 18,
- 34, 36, 38, 40, 42, 44, 46, 48 }),
- ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18 }),
- ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48 }));
- TEST_UZP2 (v32qi,
- ((v32qi) { 5, 7, 5, 7, 5, 7, 5, 7,
- 5, 7, 5, 7, 5, 7, 5, 7,
- 24, 48, 24, 48, 24, 48, 24, 48,
- 24, 48, 24, 48, 24, 48, 24, 48 }),
- ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 }),
- ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48 }));
- TEST_UZP2 (v4df,
- ((v4df) { 5.0, 7.0, 24.0, 48.0 }),
- ((v4df) { 4.0, 5.0, 6.0, 7.0 }),
- ((v4df) { 12.0, 24.0, 36.0, 48.0 }));
- TEST_UZP2 (v8sf,
- ((v8sf) { 4.0, 6.0, 8.0, 10.0, 34.0, 36.0, 38.0, 40.0 }),
- ((v8sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }),
- ((v8sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 }));
- TEST_UZP2 (v16hf,
- ((v16hf) { 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0,
- 34.0, 36.0, 38.0, 40.0, 42.0, 44.0, 46.0, 48.0 }),
- ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
- 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
- ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
- 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
+ TEST_UZP2 (vnx2di,
+ ((vnx2di) { 5, 7, 24, 48 }),
+ ((vnx2di) { 4, 5, 6, 7 }),
+ ((vnx2di) { 12, 24, 36, 48 }));
+ TEST_UZP2 (vnx4si,
+ ((vnx4si) { 4, 6, 8, 10, 34, 36, 38, 40 }),
+ ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+ ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
+ TEST_UZP2 (vnx8hi,
+ ((vnx8hi) { 4, 6, 8, 10, 12, 14, 16, 18,
+ 34, 36, 38, 40, 42, 44, 46, 48 }),
+ ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18 }),
+ ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48 }));
+ TEST_UZP2 (vnx16qi,
+ ((vnx16qi) { 5, 7, 5, 7, 5, 7, 5, 7,
+ 5, 7, 5, 7, 5, 7, 5, 7,
+ 24, 48, 24, 48, 24, 48, 24, 48,
+ 24, 48, 24, 48, 24, 48, 24, 48 }),
+ ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7 }),
+ ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48 }));
+ TEST_UZP2 (vnx2df,
+ ((vnx2df) { 5.0, 7.0, 24.0, 48.0 }),
+ ((vnx2df) { 4.0, 5.0, 6.0, 7.0 }),
+ ((vnx2df) { 12.0, 24.0, 36.0, 48.0 }));
+ TEST_UZP2 (vnx4sf,
+ ((vnx4sf) { 4.0, 6.0, 8.0, 10.0, 34.0, 36.0, 38.0, 40.0 }),
+ ((vnx4sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }),
+ ((vnx4sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 }));
+ TEST_UZP2 (vnx8hf,
+ ((vnx8hf) { 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0,
+ 34.0, 36.0, 38.0, 40.0, 42.0, 44.0, 46.0, 48.0 }),
+ ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+ 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+ ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+ 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c
index 958dce4262d..74acc7983b8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c
@@ -16,7 +16,7 @@ f (TYPE *x, TYPE *y, unsigned short n, unsigned short m)
/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
/* Should multiply by (257-1)*4 rather than (VF-1)*4. */
-/* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x[0-9]+, 10, 16} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x[0-9]+, x[0-9]+, lsl 10\n} 2 } } */
/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
/* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c
index 54d592d8ef1..f915e90b12e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c
@@ -16,7 +16,7 @@ f (TYPE *x, TYPE *y, int n, int m)
/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
/* Should multiply by (257-1)*4 rather than (VF-1)*4. */
-/* { dg-final { scan-assembler-times {\tsbfiz\tx[0-9]+, x[0-9]+, 10, 32} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tx[0-9]+, x[0-9]+, 10\n} 2 } } */
/* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */
/* { dg-final { scan-assembler {\tcmp\tw3, 0} } } */
/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C b/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C
index 9be09546c80..d0febc69533 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C
@@ -3,10 +3,10 @@
#include <stdint.h>
-typedef int8_t v32qi __attribute__((vector_size(32)));
-typedef int16_t v16hi __attribute__((vector_size(32)));
-typedef int32_t v8si __attribute__((vector_size(32)));
-typedef int64_t v4di __attribute__((vector_size(32)));
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
typedef uint8_t v32qu __attribute__((vector_size(32)));
typedef uint16_t v16hu __attribute__((vector_size(32)));
@@ -30,10 +30,10 @@ TYPE vcond_imm_##TYPE##_##SUFFIX (TYPE x, TYPE y, TYPE a) \
}
#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \
- T (v32qi, COND, SUFFIX) \
- T (v16hi, COND, SUFFIX) \
- T (v8si, COND, SUFFIX) \
- T (v4di, COND, SUFFIX)
+ T (vnx16qi, COND, SUFFIX) \
+ T (vnx8hi, COND, SUFFIX) \
+ T (vnx4si, COND, SUFFIX) \
+ T (vnx2di, COND, SUFFIX)
#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \
T (v32qu, COND, SUFFIX) \
@@ -54,10 +54,10 @@ TYPE vcond_imm_##TYPE##_##SUFFIX (TYPE x, TYPE y, TYPE a) \
TEST_COND_VAR_ALL (T, !=, ne)
#define TEST_COND_IMM_SIGNED_ALL(T, COND, IMM, SUFFIX) \
- T (v32qi, COND, IMM, SUFFIX) \
- T (v16hi, COND, IMM, SUFFIX) \
- T (v8si, COND, IMM, SUFFIX) \
- T (v4di, COND, IMM, SUFFIX)
+ T (vnx16qi, COND, IMM, SUFFIX) \
+ T (vnx8hi, COND, IMM, SUFFIX) \
+ T (vnx4si, COND, IMM, SUFFIX) \
+ T (vnx2di, COND, IMM, SUFFIX)
#define TEST_COND_IMM_UNSIGNED_ALL(T, COND, IMM, SUFFIX) \
T (v32qu, COND, IMM, SUFFIX) \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c
index 3b7c3e75775..d94cbb37b6a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c
@@ -1,57 +1,41 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include <stdint.h>
#include <stdbool.h>
-#define VEC_BOOL_CMPNE(VARTYPE, INDUCTYPE) \
-void \
-vec_bool_cmpne##VARTYPE##INDUCTYPE (VARTYPE *dst, VARTYPE *src, \
- INDUCTYPE start, INDUCTYPE n, \
- INDUCTYPE mask) \
+#define VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE) \
+void __attribute__ ((noinline, noclone)) \
+vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (VARTYPE *dst, VARTYPE *src, \
+ INDUCTYPE start, \
+ INDUCTYPE n, \
+ INDUCTYPE mask) \
{ \
- INDUCTYPE i; \
- for (i = 0; i < n; i++) \
+ for (INDUCTYPE i = 0; i < n; i++) \
{ \
bool lhs = i >= start; \
bool rhs = (i & mask) != 0x3D; \
- if (lhs != rhs) \
+ if (lhs OP rhs) \
dst[i] = src[i]; \
} \
}
-#define VEC_BOOL_CMPEQ(VARTYPE, INDUCTYPE) \
-void \
-vec_bool_cmpeq##VARTYPE##INDUCTYPE (VARTYPE *dst, VARTYPE *src, \
- INDUCTYPE start, INDUCTYPE n, \
- INDUCTYPE mask) \
-{ \
- INDUCTYPE i; \
- for (i = 0; i < n; i++) \
- { \
- bool lhs = i >= start; \
- bool rhs = (i & mask) != 0x3D; \
- if (lhs == rhs) \
- dst[i] = src[i]; \
- } \
-}
+#define TEST_OP(T, NAME, OP) \
+ T (NAME, OP, uint8_t, uint8_t) \
+ T (NAME, OP, uint16_t, uint16_t) \
+ T (NAME, OP, uint32_t, uint32_t) \
+ T (NAME, OP, uint64_t, uint64_t) \
+ T (NAME, OP, float, uint32_t) \
+ T (NAME, OP, double, uint64_t)
-VEC_BOOL_CMPNE (uint8_t, uint8_t)
-VEC_BOOL_CMPNE (uint16_t, uint16_t)
-VEC_BOOL_CMPNE (uint32_t, uint32_t)
-VEC_BOOL_CMPNE (uint64_t, uint64_t)
-VEC_BOOL_CMPNE (float, uint32_t)
-VEC_BOOL_CMPNE (double, uint64_t)
+#define TEST_ALL(T) \
+ TEST_OP (T, cmpeq, ==) \
+ TEST_OP (T, cmpne, !=)
-VEC_BOOL_CMPEQ (uint8_t, uint8_t)
-VEC_BOOL_CMPEQ (uint16_t, uint16_t)
-VEC_BOOL_CMPEQ (uint32_t, uint32_t)
-VEC_BOOL_CMPEQ (uint64_t, uint64_t)
-VEC_BOOL_CMPEQ (float, uint32_t)
-VEC_BOOL_CMPEQ (double, uint64_t)
+TEST_ALL (VEC_BOOL)
-/* Both CMPNE and CMPEQ loops will contain an exclusive predicate or. */
+/* Both cmpne and cmpeq loops will contain an exclusive predicate or. */
/* { dg-final { scan-assembler-times {\teors?\tp[0-9]*\.b, p[0-7]/z, p[0-9]*\.b, p[0-9]*\.b\n} 12 } } */
-/* CMPEQ will also contain a masked predicate not operation, which gets
+/* cmpeq will also contain a masked predicate not operation, which gets
folded to BIC. */
/* { dg-final { scan-assembler-times {\tbic\tp[0-9]+\.b, p[0-7]/z, p[0-9]+\.b, p[0-9]+\.b\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c
index 8c341c0e932..092aa386c60 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c
@@ -3,32 +3,9 @@
#include "sve_vec_bool_cmp_1.c"
-extern void abort (void);
-
#define N 103
-#define TEST_VEC_BOOL_CMPNE(VARTYPE,INDUCTYPE) \
-{ \
- INDUCTYPE i; \
- VARTYPE src[N]; \
- VARTYPE dst[N]; \
- for (i = 0; i < N; i++) \
- { \
- src[i] = i; \
- dst[i] = i * 2; \
- } \
- vec_bool_cmpne##VARTYPE##INDUCTYPE (dst, src, 13, 97, 0xFF); \
- for (i = 0; i < 13; i++) \
- if (dst[i] != i) \
- abort (); \
- for (i = 13; i < N; i++) \
- if (i != 0x3D && dst[i] != (i * 2)) \
- abort (); \
- else if (i == 0x3D && dst[i] != 0x3D) \
- abort (); \
-}
-
-#define TEST_VEC_BOOL_CMPEQ(VARTYPE,INDUCTYPE) \
+#define TEST_VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE) \
{ \
INDUCTYPE i; \
VARTYPE src[N]; \
@@ -37,36 +14,24 @@ extern void abort (void);
{ \
src[i] = i; \
dst[i] = i * 2; \
+ asm volatile ("" ::: "memory"); \
} \
- vec_bool_cmpeq##VARTYPE##INDUCTYPE (dst, src, 13, 97, 0xFF); \
+ vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (dst, src, 13, \
+ 97, 0xFF); \
for (i = 0; i < 13; i++) \
- if (dst[i] != (i * 2)) \
- abort (); \
+ if (dst[i] != (VARTYPE) (0 OP 1 ? i : i * 2)) \
+ __builtin_abort (); \
for (i = 13; i < 97; i++) \
- if (i != 0x3D && dst[i] != i) \
- abort (); \
- else if (i == 0x3D && dst[i] != (0x3D) * 2) \
- abort (); \
+ if (dst[i] != (VARTYPE) (1 OP (i != 0x3D) ? i : i * 2)) \
+ __builtin_abort (); \
for (i = 97; i < N; i++) \
if (dst[i] != (i * 2)) \
- abort (); \
+ __builtin_abort (); \
}
-int main ()
+int __attribute__ ((optimize (1)))
+main ()
{
- TEST_VEC_BOOL_CMPNE (uint8_t, uint8_t);
- TEST_VEC_BOOL_CMPNE (uint16_t, uint16_t);
- TEST_VEC_BOOL_CMPNE (uint32_t, uint32_t);
- TEST_VEC_BOOL_CMPNE (uint64_t, uint64_t);
- TEST_VEC_BOOL_CMPNE (float, uint32_t);
- TEST_VEC_BOOL_CMPNE (double, uint64_t);
-
- TEST_VEC_BOOL_CMPEQ (uint8_t, uint8_t);
- TEST_VEC_BOOL_CMPEQ (uint16_t, uint16_t);
- TEST_VEC_BOOL_CMPEQ (uint32_t, uint32_t);
- TEST_VEC_BOOL_CMPEQ (uint64_t, uint64_t);
- TEST_VEC_BOOL_CMPEQ (float, uint32_t);
- TEST_VEC_BOOL_CMPEQ (double, uint64_t);
-
+ TEST_ALL (TEST_VEC_BOOL)
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c
index 3d5b584e9e5..95b278e58f5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c
@@ -1,10 +1,10 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */
-typedef unsigned int v8si __attribute__ ((vector_size(32)));
+typedef unsigned int vnx4si __attribute__ ((vector_size(32)));
void
-f (v8si *ptr, int x)
+f (vnx4si *ptr, int x)
{
- *ptr += (v8si) { x, x, 1, 2, 3, x, x, 4 };
+ *ptr += (vnx4si) { x, x, 1, 2, 3, x, x, 4 };
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c
index ae8542f2c75..31283fcf424 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define VEC_PERM(TYPE, MASKTYPE) \
TYPE __attribute__ ((noinline, noclone)) \
@@ -18,13 +18,13 @@ vec_perm_##TYPE (TYPE values1, TYPE values2, MASKTYPE mask) \
return __builtin_shuffle (values1, values2, mask); \
}
-VEC_PERM (v4di, v4di);
-VEC_PERM (v8si, v8si);
-VEC_PERM (v16hi, v16hi);
-VEC_PERM (v32qi, v32qi);
-VEC_PERM (v4df, v4di);
-VEC_PERM (v8sf, v8si);
-VEC_PERM (v16hf, v16hi);
+VEC_PERM (vnx2di, vnx2di);
+VEC_PERM (vnx4si, vnx4si);
+VEC_PERM (vnx8hi, vnx8hi);
+VEC_PERM (vnx16qi, vnx16qi);
+VEC_PERM (vnx2df, vnx2di);
+VEC_PERM (vnx4sf, vnx4si);
+VEC_PERM (vnx8hf, vnx8hi);
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c
index 6ab82250d4c..1b98389d996 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c
@@ -19,93 +19,93 @@
int main (void)
{
- TEST_VEC_PERM (v4di, v4di,
- ((v4di) { 5, 36, 7, 48 }),
- ((v4di) { 4, 5, 6, 7 }),
- ((v4di) { 12, 24, 36, 48 }),
- ((v4di) { 1 + (8 * 1), 6 + (8 * 3),
- 3 + (8 * 1), 7 + (8 * 5) }));
- TEST_VEC_PERM (v8si, v8si,
- ((v8si) { 34, 38, 40, 10, 9, 8, 7, 35 }),
- ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
- ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }),
- ((v8si) { 9 + (16 * 2), 13 + (16 * 5),
- 15 + (16 * 1), 7 + (16 * 0),
- 6 + (16 * 8), 5 + (16 * 2),
- 4 + (16 * 3), 10 + (16 * 2) }));
- TEST_VEC_PERM (v16hi, v16hi,
- ((v16hi) { 12, 16, 18, 10, 42, 43, 44, 34,
- 7, 48, 3, 35, 9, 8, 7, 13 }),
- ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18 }),
- ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48 }),
- ((v16hi) { 9 + (32 * 2), 13 + (32 * 2),
- 15 + (32 * 8), 7 + (32 * 9),
- 25 + (32 * 4), 26 + (32 * 3),
- 27 + (32 * 1), 17 + (32 * 2),
- 4 + (32 * 6), 31 + (32 * 7),
- 0 + (32 * 8), 18 + (32 * 9),
- 6 + (32 * 6), 5 + (32 * 7),
- 4 + (32 * 2), 10 + (32 * 2) }));
- TEST_VEC_PERM (v32qi, v32qi,
- ((v32qi) { 5, 6, 7, 4, 5, 6, 4, 5,
- 6, 7, 12, 24, 36, 48, 12, 24,
- 5, 6, 7, 4, 5, 6, 4, 5,
- 6, 7, 12, 24, 36, 48, 12, 24 }),
- ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 }),
- ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48 }),
- ((v32qi) { 5 + (64 * 3), 6 + (64 * 1),
- 7 + (64 * 2), 8 + (64 * 1),
- 9 + (64 * 3), 10 + (64 * 1),
- 28 + (64 * 3), 29 + (64 * 3),
- 30 + (64 * 1), 31 + (64 * 1),
- 32 + (64 * 3), 33 + (64 * 2),
- 54 + (64 * 2), 55 + (64 * 2),
- 56 + (64 * 1), 61 + (64 * 2),
- 5 + (64 * 2), 6 + (64 * 1),
- 7 + (64 * 2), 8 + (64 * 2),
- 9 + (64 * 2), 10 + (64 * 1),
- 28 + (64 * 3), 29 + (64 * 1),
- 30 + (64 * 3), 31 + (64 * 3),
- 32 + (64 * 1), 33 + (64 * 1),
- 54 + (64 * 2), 55 + (64 * 2),
- 56 + (64 * 2), 61 + (64 * 2) }));
- TEST_VEC_PERM (v4df, v4di,
- ((v4df) { 5.1, 36.1, 7.1, 48.1 }),
- ((v4df) { 4.1, 5.1, 6.1, 7.1 }),
- ((v4df) { 12.1, 24.1, 36.1, 48.1 }),
- ((v4di) { 1 + (8 * 3), 6 + (8 * 10),
- 3 + (8 * 8), 7 + (8 * 2) }));
- TEST_VEC_PERM (v8sf, v8si,
- ((v8sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }),
- ((v8sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
- ((v8sf) { 33.2, 34.2, 35.2, 36.2,
- 37.2, 38.2, 39.2, 40.2 }),
- ((v8si) { 9 + (16 * 1), 13 + (16 * 5),
- 15 + (16 * 4), 7 + (16 * 4),
- 6 + (16 * 3), 5 + (16 * 2),
- 4 + (16 * 1), 10 + (16 * 0) }));
- TEST_VEC_PERM (v16hf, v16hi,
- ((v16hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0,
- 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }),
- ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
- 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
- ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
- 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }),
- ((v16hi) { 9 + (32 * 2), 13 + (32 * 2),
- 15 + (32 * 8), 7 + (32 * 9),
- 25 + (32 * 4), 26 + (32 * 3),
- 27 + (32 * 1), 17 + (32 * 2),
- 4 + (32 * 6), 31 + (32 * 7),
- 0 + (32 * 8), 18 + (32 * 9),
- 6 + (32 * 6), 5 + (32 * 7),
- 4 + (32 * 2), 10 + (32 * 2) }));
+ TEST_VEC_PERM (vnx2di, vnx2di,
+ ((vnx2di) { 5, 36, 7, 48 }),
+ ((vnx2di) { 4, 5, 6, 7 }),
+ ((vnx2di) { 12, 24, 36, 48 }),
+ ((vnx2di) { 1 + (8 * 1), 6 + (8 * 3),
+ 3 + (8 * 1), 7 + (8 * 5) }));
+ TEST_VEC_PERM (vnx4si, vnx4si,
+ ((vnx4si) { 34, 38, 40, 10, 9, 8, 7, 35 }),
+ ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+ ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }),
+ ((vnx4si) { 9 + (16 * 2), 13 + (16 * 5),
+ 15 + (16 * 1), 7 + (16 * 0),
+ 6 + (16 * 8), 5 + (16 * 2),
+ 4 + (16 * 3), 10 + (16 * 2) }));
+ TEST_VEC_PERM (vnx8hi, vnx8hi,
+ ((vnx8hi) { 12, 16, 18, 10, 42, 43, 44, 34,
+ 7, 48, 3, 35, 9, 8, 7, 13 }),
+ ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18 }),
+ ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48 }),
+ ((vnx8hi) { 9 + (32 * 2), 13 + (32 * 2),
+ 15 + (32 * 8), 7 + (32 * 9),
+ 25 + (32 * 4), 26 + (32 * 3),
+ 27 + (32 * 1), 17 + (32 * 2),
+ 4 + (32 * 6), 31 + (32 * 7),
+ 0 + (32 * 8), 18 + (32 * 9),
+ 6 + (32 * 6), 5 + (32 * 7),
+ 4 + (32 * 2), 10 + (32 * 2) }));
+ TEST_VEC_PERM (vnx16qi, vnx16qi,
+ ((vnx16qi) { 5, 6, 7, 4, 5, 6, 4, 5,
+ 6, 7, 12, 24, 36, 48, 12, 24,
+ 5, 6, 7, 4, 5, 6, 4, 5,
+ 6, 7, 12, 24, 36, 48, 12, 24 }),
+ ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7 }),
+ ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48 }),
+ ((vnx16qi) { 5 + (64 * 3), 6 + (64 * 1),
+ 7 + (64 * 2), 8 + (64 * 1),
+ 9 + (64 * 3), 10 + (64 * 1),
+ 28 + (64 * 3), 29 + (64 * 3),
+ 30 + (64 * 1), 31 + (64 * 1),
+ 32 + (64 * 3), 33 + (64 * 2),
+ 54 + (64 * 2), 55 + (64 * 2),
+ 56 + (64 * 1), 61 + (64 * 2),
+ 5 + (64 * 2), 6 + (64 * 1),
+ 7 + (64 * 2), 8 + (64 * 2),
+ 9 + (64 * 2), 10 + (64 * 1),
+ 28 + (64 * 3), 29 + (64 * 1),
+ 30 + (64 * 3), 31 + (64 * 3),
+ 32 + (64 * 1), 33 + (64 * 1),
+ 54 + (64 * 2), 55 + (64 * 2),
+ 56 + (64 * 2), 61 + (64 * 2) }));
+ TEST_VEC_PERM (vnx2df, vnx2di,
+ ((vnx2df) { 5.1, 36.1, 7.1, 48.1 }),
+ ((vnx2df) { 4.1, 5.1, 6.1, 7.1 }),
+ ((vnx2df) { 12.1, 24.1, 36.1, 48.1 }),
+ ((vnx2di) { 1 + (8 * 3), 6 + (8 * 10),
+ 3 + (8 * 8), 7 + (8 * 2) }));
+ TEST_VEC_PERM (vnx4sf, vnx4si,
+ ((vnx4sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }),
+ ((vnx4sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
+ ((vnx4sf) { 33.2, 34.2, 35.2, 36.2,
+ 37.2, 38.2, 39.2, 40.2 }),
+ ((vnx4si) { 9 + (16 * 1), 13 + (16 * 5),
+ 15 + (16 * 4), 7 + (16 * 4),
+ 6 + (16 * 3), 5 + (16 * 2),
+ 4 + (16 * 1), 10 + (16 * 0) }));
+ TEST_VEC_PERM (vnx8hf, vnx8hi,
+ ((vnx8hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0,
+ 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }),
+ ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+ 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+ ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+ 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }),
+ ((vnx8hi) { 9 + (32 * 2), 13 + (32 * 2),
+ 15 + (32 * 8), 7 + (32 * 9),
+ 25 + (32 * 4), 26 + (32 * 3),
+ 27 + (32 * 1), 17 + (32 * 2),
+ 4 + (32 * 6), 31 + (32 * 7),
+ 0 + (32 * 8), 18 + (32 * 9),
+ 6 + (32 * 6), 5 + (32 * 7),
+ 4 + (32 * 2), 10 + (32 * 2) }));
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c
index 4d46ff02192..a551ffa9b49 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c
@@ -19,61 +19,61 @@
int main (void)
{
- TEST_VEC_PERM (v4di, v4di,
- ((v4di) { 5, 36, 7, 48 }),
- ((v4di) { 4, 5, 6, 7 }),
- ((v4di) { 12, 24, 36, 48 }),
- ((v4di) { 1, 6, 3, 7 }));
- TEST_VEC_PERM (v8si, v8si,
- ((v8si) { 34, 38, 40, 10, 9, 8, 7, 35 }),
- ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
- ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }),
- ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
- TEST_VEC_PERM (v16hi, v16hi,
- ((v16hi) { 12, 16, 18, 10, 42, 43, 44, 34,
- 7, 48, 3, 35, 9, 8, 7, 13 }),
- ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18 }),
- ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48 }),
- ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17,
- 4, 31, 0, 18, 6, 5, 4, 10 }));
- TEST_VEC_PERM (v32qi, v32qi,
- ((v32qi) { 5, 6, 7, 4, 5, 6, 4, 5,
- 6, 7, 12, 24, 36, 48, 12, 24,
- 5, 6, 7, 4, 5, 6, 4, 5,
- 6, 7, 12, 24, 36, 48, 12, 24 }),
- ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 }),
- ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48 }),
- ((v32qi) { 5, 6, 7, 8, 9, 10, 28, 29,
- 30, 31, 32, 33, 54, 55, 56, 61,
- 5, 6, 7, 8, 9, 10, 28, 29,
- 30, 31, 32, 33, 54, 55, 56, 61 }));
- TEST_VEC_PERM (v4df, v4di,
- ((v4df) { 5.1, 36.1, 7.1, 48.1 }),
- ((v4df) { 4.1, 5.1, 6.1, 7.1 }),
- ((v4df) { 12.1, 24.1, 36.1, 48.1 }),
- ((v4di) { 1, 6, 3, 7 }));
- TEST_VEC_PERM (v8sf, v8si,
- ((v8sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }),
- ((v8sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
- ((v8sf) { 33.2, 34.2, 35.2, 36.2,
- 37.2, 38.2, 39.2, 40.2 }),
- ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
- TEST_VEC_PERM (v16hf, v16hi,
- ((v16hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0,
- 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }),
- ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
- 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
- ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
- 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }),
- ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17,
- 4, 31, 0, 18, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx2di, vnx2di,
+ ((vnx2di) { 5, 36, 7, 48 }),
+ ((vnx2di) { 4, 5, 6, 7 }),
+ ((vnx2di) { 12, 24, 36, 48 }),
+ ((vnx2di) { 1, 6, 3, 7 }));
+ TEST_VEC_PERM (vnx4si, vnx4si,
+ ((vnx4si) { 34, 38, 40, 10, 9, 8, 7, 35 }),
+ ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+ ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }),
+ ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx8hi, vnx8hi,
+ ((vnx8hi) { 12, 16, 18, 10, 42, 43, 44, 34,
+ 7, 48, 3, 35, 9, 8, 7, 13 }),
+ ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18 }),
+ ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48 }),
+ ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17,
+ 4, 31, 0, 18, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx16qi, vnx16qi,
+ ((vnx16qi) { 5, 6, 7, 4, 5, 6, 4, 5,
+ 6, 7, 12, 24, 36, 48, 12, 24,
+ 5, 6, 7, 4, 5, 6, 4, 5,
+ 6, 7, 12, 24, 36, 48, 12, 24 }),
+ ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7 }),
+ ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48 }),
+ ((vnx16qi) { 5, 6, 7, 8, 9, 10, 28, 29,
+ 30, 31, 32, 33, 54, 55, 56, 61,
+ 5, 6, 7, 8, 9, 10, 28, 29,
+ 30, 31, 32, 33, 54, 55, 56, 61 }));
+ TEST_VEC_PERM (vnx2df, vnx2di,
+ ((vnx2df) { 5.1, 36.1, 7.1, 48.1 }),
+ ((vnx2df) { 4.1, 5.1, 6.1, 7.1 }),
+ ((vnx2df) { 12.1, 24.1, 36.1, 48.1 }),
+ ((vnx2di) { 1, 6, 3, 7 }));
+ TEST_VEC_PERM (vnx4sf, vnx4si,
+ ((vnx4sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }),
+ ((vnx4sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
+ ((vnx4sf) { 33.2, 34.2, 35.2, 36.2,
+ 37.2, 38.2, 39.2, 40.2 }),
+ ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx8hf, vnx8hi,
+ ((vnx8hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0,
+ 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }),
+ ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+ 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+ ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+ 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }),
+ ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17,
+ 4, 31, 0, 18, 6, 5, 4, 10 }));
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c
index 31cff7ab113..4c3df975bab 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_reverse_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
{ \
for (int i = 0; i < n; ++i) \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c
index 342b1ddb44d..9a9300509ab 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c
@@ -9,7 +9,10 @@
{ \
TYPE a[N], b[N]; \
for (unsigned int i = 0; i < N; ++i) \
- b[i] = i * 2 + i % 5; \
+ { \
+ b[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
vec_reverse_##TYPE (a, b, N); \
for (unsigned int i = 0; i < N; ++i) \
{ \
@@ -19,7 +22,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c
index 4f70abd35e5..8b4901b1014 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_zip_##TYPE (TYPE *restrict a, TYPE *restrict b, \
TYPE *restrict c, long n) \
{ \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c
index 14d66f99383..c47b4050ae2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c
@@ -12,6 +12,7 @@
{ \
b[i] = i * 2 + i % 5; \
c[i] = i * 3; \
+ asm volatile ("" ::: "memory"); \
} \
vec_zip_##TYPE (a, b, c, N / 8); \
for (unsigned int i = 0; i < N / 2; ++i) \
@@ -23,7 +24,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c
index 5fbd59f08bd..c08ad23868c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c
@@ -4,7 +4,7 @@
#include <stdint.h>
#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
+TYPE __attribute__ ((noinline, noclone)) \
vec_uzp_##TYPE (TYPE *restrict a, TYPE *restrict b, \
TYPE *restrict c, long n) \
{ \
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c
index 404429208a0..a096b6c5353 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c
@@ -9,7 +9,10 @@
{ \
TYPE a[N], b[N], c[N]; \
for (unsigned int i = 0; i < N; ++i) \
- c[i] = i * 2 + i % 5; \
+ { \
+ c[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
vec_uzp_##TYPE (a, b, c, N / 8); \
for (unsigned int i = 0; i < N; ++i) \
{ \
@@ -19,7 +22,7 @@
} \
}
-int
+int __attribute__ ((optimize (1)))
main (void)
{
TEST_ALL (HARNESS)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c
index e76b3bc5abb..7b470cb04e2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define VEC_PERM_CONST(TYPE, MASK) \
TYPE __attribute__ ((noinline, noclone)) \
@@ -18,18 +18,18 @@ vec_perm_##TYPE (TYPE values1, TYPE values2) \
return __builtin_shuffle (values1, values2, MASK); \
}
-VEC_PERM_CONST (v4di, ((v4di) { 4, 3, 6, 1 }));
-VEC_PERM_CONST (v8si, ((v8si) { 3, 9, 11, 12, 2, 4, 4, 2 }));
-VEC_PERM_CONST (v16hi, ((v16hi) { 8, 27, 5, 4, 21, 12, 13, 0,
- 22, 1, 8, 9, 3, 24, 15, 1 }));
-VEC_PERM_CONST (v32qi, ((v32qi) { 13, 31, 11, 2, 48, 28, 3, 4,
- 54, 11, 30, 1, 0, 61, 2, 3,
- 4, 5, 11, 63, 24, 11, 42, 39,
- 2, 57, 22, 11, 6, 16, 18, 21 }));
-VEC_PERM_CONST (v4df, ((v4di) { 7, 3, 2, 1 }));
-VEC_PERM_CONST (v8sf, ((v8si) { 1, 9, 13, 11, 2, 5, 4, 2 }));
-VEC_PERM_CONST (v16hf, ((v16hi) { 8, 27, 5, 4, 21, 12, 13, 0,
- 22, 1, 8, 9, 3, 24, 15, 1 }));
+VEC_PERM_CONST (vnx2di, ((vnx2di) { 4, 3, 6, 1 }));
+VEC_PERM_CONST (vnx4si, ((vnx4si) { 3, 9, 11, 12, 2, 4, 4, 2 }));
+VEC_PERM_CONST (vnx8hi, ((vnx8hi) { 8, 27, 5, 4, 21, 12, 13, 0,
+ 22, 1, 8, 9, 3, 24, 15, 1 }));
+VEC_PERM_CONST (vnx16qi, ((vnx16qi) { 13, 31, 11, 2, 48, 28, 3, 4,
+ 54, 11, 30, 1, 0, 61, 2, 3,
+ 4, 5, 11, 63, 24, 11, 42, 39,
+ 2, 57, 22, 11, 6, 16, 18, 21 }));
+VEC_PERM_CONST (vnx2df, ((vnx2di) { 7, 3, 2, 1 }));
+VEC_PERM_CONST (vnx4sf, ((vnx4si) { 1, 9, 13, 11, 2, 5, 4, 2 }));
+VEC_PERM_CONST (vnx8hf, ((vnx8hi) { 8, 27, 5, 4, 21, 12, 13, 0,
+ 22, 1, 8, 9, 3, 24, 15, 1 }));
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c
index b4f82091f7c..d397c3d6670 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define VEC_PERM_CONST_OVERRUN(TYPE, MASK) \
TYPE vec_perm_overrun_##TYPE (TYPE values1, TYPE values2) \
@@ -17,50 +17,50 @@ TYPE vec_perm_overrun_##TYPE (TYPE values1, TYPE values2) \
return __builtin_shuffle (values1, values2, MASK); \
}
-VEC_PERM_CONST_OVERRUN (v4di, ((v4di) { 4 + (8 * 1), 3 + (8 * 1),
- 6 + (8 * 2), 1 + (8 * 3) }));
-VEC_PERM_CONST_OVERRUN (v8si, ((v8si) { 3 + (16 * 3), 9 + (16 * 4),
- 11 + (16 * 5), 12 + (16 * 3),
- 2 + (16 * 2), 4 + (16 * 1),
- 4 + (16 * 2), 2 + (16 * 1) }));
-VEC_PERM_CONST_OVERRUN (v16hi, ((v16hi) { 8 + (32 * 3), 27 + (32 * 1),
- 5 + (32 * 3), 4 + (32 * 3),
- 21 + (32 * 1), 12 + (32 * 3),
- 13 + (32 * 3), 0 + (32 * 1),
- 22 + (32 * 2), 1 + (32 * 2),
- 8 + (32 * 2), 9 + (32 * 1),
- 3 + (32 * 2), 24 + (32 * 2),
- 15 + (32 * 1), 1 + (32 * 1) }));
-VEC_PERM_CONST_OVERRUN (v32qi, ((v32qi) { 13 + (64 * 2), 31 + (64 * 2),
- 11 + (64 * 2), 2 + (64 * 1),
- 48 + (64 * 1), 28 + (64 * 2),
- 3 + (64 * 2), 4 + (64 * 3),
- 54 + (64 * 1), 11 + (64 * 2),
- 30 + (64 * 2), 1 + (64 * 1),
- 0 + (64 * 1), 61 + (64 * 2),
- 2 + (64 * 3), 3 + (64 * 2),
- 4 + (64 * 3), 5 + (64 * 3),
- 11 + (64 * 3), 63 + (64 * 1),
- 24 + (64 * 1), 11 + (64 * 3),
- 42 + (64 * 3), 39 + (64 * 2),
- 2 + (64 * 2), 57 + (64 * 3),
- 22 + (64 * 3), 11 + (64 * 2),
- 6 + (64 * 2), 16 + (64 * 2),
- 18 + (64 * 2), 21 + (64 * 3) }));
-VEC_PERM_CONST_OVERRUN (v4df, ((v4di) { 7 + (8 * 1), 3 + (8 * 3),
- 2 + (8 * 5), 1 + (8 * 3) }));
-VEC_PERM_CONST_OVERRUN (v8sf, ((v8si) { 1 + (16 * 1), 9 + (16 * 2),
- 13 + (16 * 2), 11 + (16 * 3),
- 2 + (16 * 2), 5 + (16 * 2),
- 4 + (16 * 4), 2 + (16 * 3) }));
-VEC_PERM_CONST_OVERRUN (v16hf, ((v16hi) { 8 + (32 * 3), 27 + (32 * 1),
- 5 + (32 * 3), 4 + (32 * 3),
- 21 + (32 * 1), 12 + (32 * 3),
- 13 + (32 * 3), 0 + (32 * 1),
- 22 + (32 * 2), 1 + (32 * 2),
- 8 + (32 * 2), 9 + (32 * 1),
- 3 + (32 * 2), 24 + (32 * 2),
- 15 + (32 * 1), 1 + (32 * 1) }));
+VEC_PERM_CONST_OVERRUN (vnx2di, ((vnx2di) { 4 + (8 * 1), 3 + (8 * 1),
+ 6 + (8 * 2), 1 + (8 * 3) }));
+VEC_PERM_CONST_OVERRUN (vnx4si, ((vnx4si) { 3 + (16 * 3), 9 + (16 * 4),
+ 11 + (16 * 5), 12 + (16 * 3),
+ 2 + (16 * 2), 4 + (16 * 1),
+ 4 + (16 * 2), 2 + (16 * 1) }));
+VEC_PERM_CONST_OVERRUN (vnx8hi, ((vnx8hi) { 8 + (32 * 3), 27 + (32 * 1),
+ 5 + (32 * 3), 4 + (32 * 3),
+ 21 + (32 * 1), 12 + (32 * 3),
+ 13 + (32 * 3), 0 + (32 * 1),
+ 22 + (32 * 2), 1 + (32 * 2),
+ 8 + (32 * 2), 9 + (32 * 1),
+ 3 + (32 * 2), 24 + (32 * 2),
+ 15 + (32 * 1), 1 + (32 * 1) }));
+VEC_PERM_CONST_OVERRUN (vnx16qi, ((vnx16qi) { 13 + (64 * 2), 31 + (64 * 2),
+ 11 + (64 * 2), 2 + (64 * 1),
+ 48 + (64 * 1), 28 + (64 * 2),
+ 3 + (64 * 2), 4 + (64 * 3),
+ 54 + (64 * 1), 11 + (64 * 2),
+ 30 + (64 * 2), 1 + (64 * 1),
+ 0 + (64 * 1), 61 + (64 * 2),
+ 2 + (64 * 3), 3 + (64 * 2),
+ 4 + (64 * 3), 5 + (64 * 3),
+ 11 + (64 * 3), 63 + (64 * 1),
+ 24 + (64 * 1), 11 + (64 * 3),
+ 42 + (64 * 3), 39 + (64 * 2),
+ 2 + (64 * 2), 57 + (64 * 3),
+ 22 + (64 * 3), 11 + (64 * 2),
+ 6 + (64 * 2), 16 + (64 * 2),
+ 18 + (64 * 2), 21 + (64 * 3) }));
+VEC_PERM_CONST_OVERRUN (vnx2df, ((vnx2di) { 7 + (8 * 1), 3 + (8 * 3),
+ 2 + (8 * 5), 1 + (8 * 3) }));
+VEC_PERM_CONST_OVERRUN (vnx4sf, ((vnx4si) { 1 + (16 * 1), 9 + (16 * 2),
+ 13 + (16 * 2), 11 + (16 * 3),
+ 2 + (16 * 2), 5 + (16 * 2),
+ 4 + (16 * 4), 2 + (16 * 3) }));
+VEC_PERM_CONST_OVERRUN (vnx8hf, ((vnx8hi) { 8 + (32 * 3), 27 + (32 * 1),
+ 5 + (32 * 3), 4 + (32 * 3),
+ 21 + (32 * 1), 12 + (32 * 3),
+ 13 + (32 * 3), 0 + (32 * 1),
+ 22 + (32 * 2), 1 + (32 * 2),
+ 8 + (32 * 2), 9 + (32 * 1),
+ 3 + (32 * 2), 24 + (32 * 2),
+ 15 + (32 * 1), 1 + (32 * 1) }));
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c
index 7324c1da0a4..a0214880dbe 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c
@@ -22,49 +22,49 @@
int main (void)
{
- TEST_VEC_PERM (v4di,
- ((v4di) { 12, 7, 36, 5 }),
- ((v4di) { 4, 5, 6, 7 }),
- ((v4di) { 12, 24, 36, 48 }));
- TEST_VEC_PERM (v8si,
- ((v8si) { 6, 34, 36, 37, 5, 7, 7, 5 }),
- ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
- ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
- TEST_VEC_PERM (v16hi,
- ((v16hi) { 11, 44, 8, 7, 38, 15, 16, 3,
- 39, 4, 11, 12, 6, 41, 18, 4 }),
- ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18 }),
- ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48 }));
- TEST_VEC_PERM (v32qi,
- ((v32qi) { 5, 7, 7, 6, 12, 4, 7, 4,
- 36, 7, 6, 5, 4, 24, 6, 7,
- 4, 5, 7, 48, 4, 7, 36, 48,
- 6, 24, 6, 7, 6, 4, 6, 5 }),
- ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 }),
- ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48 }));
- TEST_VEC_PERM (v4df,
- ((v4df) { 48.5, 7.5, 6.5, 5.5 }),
- ((v4df) { 4.5, 5.5, 6.5, 7.5 }),
- ((v4df) { 12.5, 24.5, 36.5, 48.5 }));
- TEST_VEC_PERM (v8sf,
- ((v8sf) { 4.5, 34.5, 38.5, 36.5, 5.5, 8.5, 7.5, 5.5 }),
- ((v8sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }),
- ((v8sf) { 33.5, 34.5, 35.5, 36.5,
- 37.5, 38.5, 39.5, 40.5 }));
- TEST_VEC_PERM (v16hf,
- ((v16hf) { 11.0, 44.0, 8.0, 7.0, 38.0, 15.0, 16.0, 3.0,
- 39.0, 4.0, 11.0, 12.0, 6.0, 41.0, 18.0, 4.0 }),
- ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
- 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
- ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
- 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
+ TEST_VEC_PERM (vnx2di,
+ ((vnx2di) { 12, 7, 36, 5 }),
+ ((vnx2di) { 4, 5, 6, 7 }),
+ ((vnx2di) { 12, 24, 36, 48 }));
+ TEST_VEC_PERM (vnx4si,
+ ((vnx4si) { 6, 34, 36, 37, 5, 7, 7, 5 }),
+ ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+ ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
+ TEST_VEC_PERM (vnx8hi,
+ ((vnx8hi) { 11, 44, 8, 7, 38, 15, 16, 3,
+ 39, 4, 11, 12, 6, 41, 18, 4 }),
+ ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, 16, 17, 18 }),
+ ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48 }));
+ TEST_VEC_PERM (vnx16qi,
+ ((vnx16qi) { 5, 7, 7, 6, 12, 4, 7, 4,
+ 36, 7, 6, 5, 4, 24, 6, 7,
+ 4, 5, 7, 48, 4, 7, 36, 48,
+ 6, 24, 6, 7, 6, 4, 6, 5 }),
+ ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7 }),
+ ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48 }));
+ TEST_VEC_PERM (vnx2df,
+ ((vnx2df) { 48.5, 7.5, 6.5, 5.5 }),
+ ((vnx2df) { 4.5, 5.5, 6.5, 7.5 }),
+ ((vnx2df) { 12.5, 24.5, 36.5, 48.5 }));
+ TEST_VEC_PERM (vnx4sf,
+ ((vnx4sf) { 4.5, 34.5, 38.5, 36.5, 5.5, 8.5, 7.5, 5.5 }),
+ ((vnx4sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }),
+ ((vnx4sf) { 33.5, 34.5, 35.5, 36.5,
+ 37.5, 38.5, 39.5, 40.5 }));
+ TEST_VEC_PERM (vnx8hf,
+ ((vnx8hf) { 11.0, 44.0, 8.0, 7.0, 38.0, 15.0, 16.0, 3.0,
+ 39.0, 4.0, 11.0, 12.0, 6.0, 41.0, 18.0, 4.0 }),
+ ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
+ 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+ ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+ 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c
index a4efb4fea79..beabf272f11 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define VEC_PERM_SINGLE(TYPE, MASK) \
TYPE vec_perm_##TYPE (TYPE values1, TYPE values2) \
@@ -17,18 +17,18 @@ TYPE vec_perm_##TYPE (TYPE values1, TYPE values2) \
return __builtin_shuffle (values1, values2, MASK); \
}
-VEC_PERM_SINGLE (v4di, ((v4di) { 0, 3, 2, 1 }));
-VEC_PERM_SINGLE (v8si, ((v8si) { 3, 7, 1, 0, 2, 4, 4, 2 }));
-VEC_PERM_SINGLE (v16hi, ((v16hi) { 8, 7, 5, 4, 11, 12, 13, 0,
- 1, 1, 8, 9, 3, 14, 15, 1 }));
-VEC_PERM_SINGLE (v32qi, ((v32qi) { 13, 21, 11, 2, 8, 28, 3, 4,
- 14, 11, 30, 1, 0, 31, 2, 3,
- 4, 5, 11, 23, 24, 11, 12, 9,
- 2, 7, 22, 11, 6, 16, 18, 21 }));
-VEC_PERM_SINGLE (v4df, ((v4di) { 3, 3, 1, 1 }));
-VEC_PERM_SINGLE (v8sf, ((v8si) { 4, 5, 6, 0, 2, 7, 4, 2 }));
-VEC_PERM_SINGLE (v16hf, ((v16hi) { 8, 7, 5, 4, 11, 12, 13, 0,
- 1, 1, 8, 9, 3, 14, 15, 1 }));
+VEC_PERM_SINGLE (vnx2di, ((vnx2di) { 0, 3, 2, 1 }));
+VEC_PERM_SINGLE (vnx4si, ((vnx4si) { 3, 7, 1, 0, 2, 4, 4, 2 }));
+VEC_PERM_SINGLE (vnx8hi, ((vnx8hi) { 8, 7, 5, 4, 11, 12, 13, 0,
+ 1, 1, 8, 9, 3, 14, 15, 1 }));
+VEC_PERM_SINGLE (vnx16qi, ((vnx16qi) { 13, 21, 11, 2, 8, 28, 3, 4,
+ 14, 11, 30, 1, 0, 31, 2, 3,
+ 4, 5, 11, 23, 24, 11, 12, 9,
+ 2, 7, 22, 11, 6, 16, 18, 21 }));
+VEC_PERM_SINGLE (vnx2df, ((vnx2di) { 3, 3, 1, 1 }));
+VEC_PERM_SINGLE (vnx4sf, ((vnx4si) { 4, 5, 6, 0, 2, 7, 4, 2 }));
+VEC_PERM_SINGLE (vnx8hf, ((vnx8hi) { 8, 7, 5, 4, 11, 12, 13, 0,
+ 1, 1, 8, 9, 3, 14, 15, 1 }));
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c
index fbae30c8d1c..aa443563182 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c
@@ -17,49 +17,49 @@
int main (void)
{
- TEST_VEC_PERM (v4di,
- ((v4di) { 4, 7, 6, 5 }),
- ((v4di) { 4, 5, 6, 7 }),
- ((v4di) { 12, 24, 36, 48 }));
- TEST_VEC_PERM (v8si,
- ((v8si) { 6, 10, 4, 3, 5, 7, 7, 5 }),
- ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
- ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
- TEST_VEC_PERM (v16hi,
- ((v16hi) { 11, 10, 8, 7, 14, 15, 16, 3,
- 4, 4, 11, 12, 6, 17, 18, 4 }),
- ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18 }),
- ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48 }));
- TEST_VEC_PERM (v32qi,
- ((v32qi) { 5, 5, 7, 6, 4, 4, 7, 4,
- 6, 7, 6, 5, 4, 7, 6, 7,
- 4, 5, 7, 7, 4, 7, 4, 5,
- 6, 7, 6, 7, 6, 4, 6, 5 }),
- ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 }),
- ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48,
- 12, 24, 36, 48, 12, 24, 36, 48 }));
- TEST_VEC_PERM (v4df,
- ((v4df) { 7.5, 7.5, 5.5, 5.5 }),
- ((v4df) { 4.5, 5.5, 6.5, 7.5 }),
- ((v4df) { 12.5, 24.5, 36.5, 48.5 }));
- TEST_VEC_PERM (v8sf,
- ((v8sf) { 7.5, 8.5, 9.5, 3.5, 5.5, 10.5, 7.5, 5.5 }),
- ((v8sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }),
- ((v8sf) { 33.5, 34.5, 35.5, 36.5,
- 37.5, 38.5, 39.5, 40.5 }));
- TEST_VEC_PERM (v16hf,
- ((v16hf) { 11.0, 10.0, 8.0, 7.0, 14.0, 15.0, 16.0, 3.0,
- 4.0, 4.0, 11.0, 12.0, 6.0, 17.0, 18.0, 4.0 }),
- ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
- 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
- ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
- 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
+ TEST_VEC_PERM (vnx2di,
+ ((vnx2di) { 4, 7, 6, 5 }),
+ ((vnx2di) { 4, 5, 6, 7 }),
+ ((vnx2di) { 12, 24, 36, 48 }));
+ TEST_VEC_PERM (vnx4si,
+ ((vnx4si) { 6, 10, 4, 3, 5, 7, 7, 5 }),
+ ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+ ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
+ TEST_VEC_PERM (vnx8hi,
+ ((vnx8hi) { 11, 10, 8, 7, 14, 15, 16, 3,
+ 4, 4, 11, 12, 6, 17, 18, 4 }),
+ ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18 }),
+ ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48 }));
+ TEST_VEC_PERM (vnx16qi,
+ ((vnx16qi) { 5, 5, 7, 6, 4, 4, 7, 4,
+ 6, 7, 6, 5, 4, 7, 6, 7,
+ 4, 5, 7, 7, 4, 7, 4, 5,
+ 6, 7, 6, 7, 6, 4, 6, 5 }),
+ ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7 }),
+ ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48,
+ 12, 24, 36, 48, 12, 24, 36, 48 }));
+ TEST_VEC_PERM (vnx2df,
+ ((vnx2df) { 7.5, 7.5, 5.5, 5.5 }),
+ ((vnx2df) { 4.5, 5.5, 6.5, 7.5 }),
+ ((vnx2df) { 12.5, 24.5, 36.5, 48.5 }));
+ TEST_VEC_PERM (vnx4sf,
+ ((vnx4sf) { 7.5, 8.5, 9.5, 3.5, 5.5, 10.5, 7.5, 5.5 }),
+ ((vnx4sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }),
+ ((vnx4sf) { 33.5, 34.5, 35.5, 36.5,
+ 37.5, 38.5, 39.5, 40.5 }));
+ TEST_VEC_PERM (vnx8hf,
+ ((vnx8hf) { 11.0, 10.0, 8.0, 7.0, 14.0, 15.0, 16.0, 3.0,
+ 4.0, 4.0, 11.0, 12.0, 6.0, 17.0, 18.0, 4.0 }),
+ ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+ 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+ ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+ 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c
index a82b57dc378..c4abc2de551 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define VEC_PERM(TYPE, MASKTYPE) \
TYPE vec_perm_##TYPE (TYPE values, MASKTYPE mask) \
@@ -17,13 +17,13 @@ TYPE vec_perm_##TYPE (TYPE values, MASKTYPE mask) \
return __builtin_shuffle (values, mask); \
}
-VEC_PERM (v4di, v4di)
-VEC_PERM (v8si, v8si)
-VEC_PERM (v16hi, v16hi)
-VEC_PERM (v32qi, v32qi)
-VEC_PERM (v4df, v4di)
-VEC_PERM (v8sf, v8si)
-VEC_PERM (v16hf, v16hi)
+VEC_PERM (vnx2di, vnx2di)
+VEC_PERM (vnx4si, vnx4si)
+VEC_PERM (vnx8hi, vnx8hi)
+VEC_PERM (vnx16qi, vnx16qi)
+VEC_PERM (vnx2df, vnx2di)
+VEC_PERM (vnx4sf, vnx4si)
+VEC_PERM (vnx8hf, vnx8hi)
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c
index 539c99d4f61..fd73bc9652f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c
@@ -18,48 +18,48 @@ extern void abort (void);
int main (void)
{
- TEST_VEC_PERM (v4di, v4di,
- ((v4di) { 5, 6, 7, 5 }),
- ((v4di) { 4, 5, 6, 7 }),
- ((v4di) { 1, 6, 3, 5 }));
- TEST_VEC_PERM (v8si, v8si,
- ((v8si) { 4, 8, 10, 10, 9, 8, 7, 5 }),
- ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
- ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
- TEST_VEC_PERM (v16hi, v16hi,
- ((v16hi) { 12, 16, 18, 10, 12, 13, 14, 4,
- 7, 18, 3, 5, 9, 8, 7, 13 }),
- ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18 }),
- ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17,
- 4, 31, 0, 18, 6, 5, 4, 10 }));
- TEST_VEC_PERM (v32qi, v32qi,
- ((v32qi) { 5, 6, 7, 4, 5, 6, 4, 5,
- 6, 7, 4, 5, 6, 7, 4, 5,
- 5, 6, 7, 4, 5, 6, 4, 5,
- 6, 7, 4, 5, 6, 7, 4, 5 }),
- ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 }),
- ((v32qi) { 5, 6, 7, 8, 9, 10, 28, 29,
- 30, 31, 32, 33, 54, 55, 56, 61,
- 5, 6, 7, 8, 9, 10, 28, 29,
- 30, 31, 32, 33, 54, 55, 56, 61 }));
- TEST_VEC_PERM (v4df, v4di,
- ((v4df) { 5.1, 6.1, 7.1, 5.1 }),
- ((v4df) { 4.1, 5.1, 6.1, 7.1 }),
- ((v4di) { 1, 6, 3, 5 }));
- TEST_VEC_PERM (v8sf, v8si,
- ((v8sf) { 4.2, 8.2, 10.2, 10.2, 9.2, 8.2, 7.2, 5.2 }),
- ((v8sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
- ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
- TEST_VEC_PERM (v16hf, v16hi,
- ((v16hf) { 12.0, 16.0, 18.0, 10.0, 12.0, 13.0, 14.0, 4.0,
- 7.0, 18.0, 3.0, 5.0, 9.0, 8.0, 7.0, 13.0 }),
- ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
- 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
- ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17,
- 4, 31, 0, 18, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx2di, vnx2di,
+ ((vnx2di) { 5, 6, 7, 5 }),
+ ((vnx2di) { 4, 5, 6, 7 }),
+ ((vnx2di) { 1, 6, 3, 5 }));
+ TEST_VEC_PERM (vnx4si, vnx4si,
+ ((vnx4si) { 4, 8, 10, 10, 9, 8, 7, 5 }),
+ ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+ ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx8hi, vnx8hi,
+ ((vnx8hi) { 12, 16, 18, 10, 12, 13, 14, 4,
+ 7, 18, 3, 5, 9, 8, 7, 13 }),
+ ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18 }),
+ ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17,
+ 4, 31, 0, 18, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx16qi, vnx16qi,
+ ((vnx16qi) { 5, 6, 7, 4, 5, 6, 4, 5,
+ 6, 7, 4, 5, 6, 7, 4, 5,
+ 5, 6, 7, 4, 5, 6, 4, 5,
+ 6, 7, 4, 5, 6, 7, 4, 5 }),
+ ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7,
+ 4, 5, 6, 7, 4, 5, 6, 7 }),
+ ((vnx16qi) { 5, 6, 7, 8, 9, 10, 28, 29,
+ 30, 31, 32, 33, 54, 55, 56, 61,
+ 5, 6, 7, 8, 9, 10, 28, 29,
+ 30, 31, 32, 33, 54, 55, 56, 61 }));
+ TEST_VEC_PERM (vnx2df, vnx2di,
+ ((vnx2df) { 5.1, 6.1, 7.1, 5.1 }),
+ ((vnx2df) { 4.1, 5.1, 6.1, 7.1 }),
+ ((vnx2di) { 1, 6, 3, 5 }));
+ TEST_VEC_PERM (vnx4sf, vnx4si,
+ ((vnx4sf) { 4.2, 8.2, 10.2, 10.2, 9.2, 8.2, 7.2, 5.2 }),
+ ((vnx4sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
+ ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
+ TEST_VEC_PERM (vnx8hf, vnx8hi,
+ ((vnx8hf) { 12.0, 16.0, 18.0, 10.0, 12.0, 13.0, 14.0, 4.0,
+ 7.0, 18.0, 3.0, 5.0, 9.0, 8.0, 7.0, 13.0 }),
+ ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+ 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+ ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17,
+ 4, 31, 0, 18, 6, 5, 4, 10 }));
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_1.c b/gcc/testsuite/gcc.target/aarch64/sve_while_1.c
index c54db87fa21..2a268a447e3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_while_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_while_1.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
-vec_while_##TYPE (TYPE *restrict a, int n) \
-{ \
- for (int i = 0; i < n; ++i) \
- a[i] += 1; \
-}
+#define ADD_LOOP(TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ vec_while_##TYPE (TYPE *restrict a, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ a[i] += 1; \
+ }
#define TEST_ALL(T) \
T (int8_t) \
@@ -23,7 +23,7 @@ vec_while_##TYPE (TYPE *restrict a, int n) \
T (float) \
T (double)
-TEST_ALL (VEC_PERM)
+TEST_ALL (ADD_LOOP)
/* { dg-final { scan-assembler-not {\tuqdec} } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_2.c b/gcc/testsuite/gcc.target/aarch64/sve_while_2.c
index 62f82cc43f4..2f0f0f49e12 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_while_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_while_2.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
-vec_while_##TYPE (TYPE *restrict a, unsigned int n) \
-{ \
- for (unsigned int i = 0; i < n; ++i) \
- a[i] += 1; \
-}
+#define ADD_LOOP(TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ vec_while_##TYPE (TYPE *restrict a, unsigned int n) \
+ { \
+ for (unsigned int i = 0; i < n; ++i) \
+ a[i] += 1; \
+ }
#define TEST_ALL(T) \
T (int8_t) \
@@ -23,7 +23,7 @@ vec_while_##TYPE (TYPE *restrict a, unsigned int n) \
T (float) \
T (double)
-TEST_ALL (VEC_PERM)
+TEST_ALL (ADD_LOOP)
/* { dg-final { scan-assembler-not {\tuqdec} } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_3.c b/gcc/testsuite/gcc.target/aarch64/sve_while_3.c
index ace7ebc5a0f..026a8195238 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_while_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_while_3.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
-vec_while_##TYPE (TYPE *restrict a, long n) \
-{ \
- for (long i = 0; i < n; ++i) \
- a[i] += 1; \
-}
+#define ADD_LOOP(TYPE) \
+ TYPE __attribute__ ((noinline, noclone)) \
+ vec_while_##TYPE (TYPE *restrict a, int64_t n) \
+ { \
+ for (int64_t i = 0; i < n; ++i) \
+ a[i] += 1; \
+ }
#define TEST_ALL(T) \
T (int8_t) \
@@ -23,7 +23,7 @@ vec_while_##TYPE (TYPE *restrict a, long n) \
T (float) \
T (double)
-TEST_ALL (VEC_PERM)
+TEST_ALL (ADD_LOOP)
/* { dg-final { scan-assembler-not {\tuqdec} } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_4.c b/gcc/testsuite/gcc.target/aarch64/sve_while_4.c
index 0717eac1ff6..d71b141b431 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_while_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_while_4.c
@@ -3,13 +3,13 @@
#include <stdint.h>
-#define VEC_PERM(TYPE) \
-TYPE __attribute__ ((weak)) \
-vec_while_##TYPE (TYPE *restrict a, unsigned long n) \
-{ \
- for (unsigned long i = 0; i < n; ++i) \
- a[i] += 1; \
-}
+#define ADD_LOOP(TYPE) \
+ TYPE __attribute__ ((noinline, noclone)) \
+ vec_while_##TYPE (TYPE *restrict a, uint64_t n) \
+ { \
+ for (uint64_t i = 0; i < n; ++i) \
+ a[i] += 1; \
+ }
#define TEST_ALL(T) \
T (int8_t) \
@@ -23,7 +23,7 @@ vec_while_##TYPE (TYPE *restrict a, unsigned long n) \
T (float) \
T (double)
-TEST_ALL (VEC_PERM)
+TEST_ALL (ADD_LOOP)
/* { dg-final { scan-assembler-times {\tuqdec} 2 } } */
/* { dg-final { scan-assembler-times {\tuqdecb\tx[0-9]+} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_1.c b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_1.c
deleted file mode 100644
index ead821b43ca..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_1.c
+++ /dev/null
@@ -1,16 +0,0 @@
-/* { dg-do assemble } */
-/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */
-
-int
-loop (short b)
-{
- int c = 0;
-l1:
- b++;
- c |= b;
- if (b)
- goto l1;
- return c;
-}
-
-/* { dg-final { scan-assembler-times {\tadd\tx[0-9], x[0-9], 1\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_2.c b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_2.c
deleted file mode 100644
index 1a3502a0f94..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_2.c
+++ /dev/null
@@ -1,16 +0,0 @@
-/* { dg-do assemble } */
-/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */
-
-int
-loop (short b)
-{
- int c = 0;
-l1:
- b++;
- c |= b;
- if (b < 32767)
- goto l1;
-return c;
-}
-
-/* { dg-final { scan-assembler-times {\tadd\tx[0-9], x[0-9], 1\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_3.c b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_3.c
deleted file mode 100644
index 125fc31a464..00000000000
--- a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_3.c
+++ /dev/null
@@ -1,18 +0,0 @@
-/* { dg-do assemble } */
-/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */
-
-int
-loop (short b)
-{
- int c = 0;
-l1:
- b++;
- c |= b;
- if (b < 32766)
- goto l1;
-return c;
-}
-
-/* { dg-final { scan-assembler-not {\tmov\tx[0-9], 65536\n} } } */
-/* { dg-final { scan-assembler-not {\tcmp\tx[0-9], 0\n} } } */
-/* { dg-final { scan-assembler-not {\tcsel\tx[0-9], x[0-9], x[0-9], ne\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c
index 918313f62bd..c84b88a2e70 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c
@@ -7,13 +7,13 @@
#include <stdint.h>
-typedef int64_t v4di __attribute__((vector_size (32)));
-typedef int32_t v8si __attribute__((vector_size (32)));
-typedef int16_t v16hi __attribute__((vector_size (32)));
-typedef int8_t v32qi __attribute__((vector_size (32)));
-typedef double v4df __attribute__((vector_size (32)));
-typedef float v8sf __attribute__((vector_size (32)));
-typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
#define MASK_2(X, Y) X, Y + X
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 1, Y)
@@ -21,10 +21,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 4, Y)
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 8, Y)
-#define INDEX_4 v4di
-#define INDEX_8 v8si
-#define INDEX_16 v16hi
-#define INDEX_32 v32qi
+#define INDEX_4 vnx2di
+#define INDEX_8 vnx4si
+#define INDEX_16 vnx8hi
+#define INDEX_32 vnx16qi
#define PERMUTE(TYPE, NUNITS) \
TYPE permute_##TYPE (TYPE values1, TYPE values2) \
@@ -36,13 +36,13 @@ typedef _Float16 v16hf __attribute__((vector_size (32)));
}
#define TEST_ALL(T) \
- T (v4di, 4) \
- T (v8si, 8) \
- T (v16hi, 16) \
- T (v32qi, 32) \
- T (v4df, 4) \
- T (v8sf, 8) \
- T (v16hf, 16)
+ T (vnx2di, 4) \
+ T (vnx4si, 8) \
+ T (vnx8hi, 16) \
+ T (vnx16qi, 32) \
+ T (vnx2df, 4) \
+ T (vnx4sf, 8) \
+ T (vnx8hf, 16)
TEST_ALL (PERMUTE)
diff --git a/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c b/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c
index c7c15ee5c4a..aecf8262706 100644
--- a/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c
+++ b/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c
@@ -49,4 +49,6 @@ f12 (void)
return sum;
}
-/* { dg-final { scan-assembler-not "sp" } } */
+/* Fails for fixed-length SVE because we lack a vec_init pattern.
+ A later patch fixes this in generic code. */
+/* { dg-final { scan-assembler-not "sp" { xfail { aarch64_sve && { ! vect_variable_length } } } } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-4.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-4.c
index a6c1386c06e..2911da3a72d 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-4.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-4.c
@@ -1,46 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- unsigned char a;
- unsigned int b:5;
- unsigned int c:11, :0, d:8;
- struct { unsigned int ee:2; } e;
-} test_st;
-
-typedef union
-{
- test_st st;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_st;
-
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
-
-extern void foo (test_st st);
-
-int
-main (void)
-{
- read_st r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
- r.values.v3 = 0xFFFFFFFF;
- r.values.v4 = 0xFFFFFFFF;
-
- f (r.st);
- return 0;
-}
+#include "../bitfield-4.x"
/* { dg-final { scan-assembler "mov\tip, r4" } } */
/* { dg-final { scan-assembler "movw\tr4, #65535" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-5.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-5.c
index d51ce2d42c0..376e92b23fa 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-5.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-5.c
@@ -1,42 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- unsigned char a;
- unsigned short b :5;
- unsigned char c;
- unsigned short d :11;
-} test_st;
-
-typedef union
-{
- test_st st;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_st;
-
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
-
-int
-main (void)
-{
- read_st r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
-
- f (r.st);
- return 0;
-}
+#include "../bitfield-5.x"
/* { dg-final { scan-assembler "mov\tip, r4" } } */
/* { dg-final { scan-assembler "movw\tr4, #8191" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-6.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-6.c
index 77e9104b546..9845b6054c1 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-6.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-6.c
@@ -1,51 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- unsigned char a;
- unsigned int b : 3;
- unsigned int c : 14;
- unsigned int d : 1;
- struct {
- unsigned int ee : 2;
- unsigned short ff : 15;
- } e;
- unsigned char g : 1;
- unsigned char : 4;
- unsigned char h : 3;
-} test_st;
-
-typedef union
-{
- test_st st;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_st;
-
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
-
-int
-main (void)
-{
- read_st r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
- r.values.v3 = 0xFFFFFFFF;
- r.values.v4 = 0xFFFFFFFF;
-
- f (r.st);
- return 0;
-}
+#include "../bitfield-6.x"
/* { dg-final { scan-assembler "mov\tip, r4" } } */
/* { dg-final { scan-assembler "movw\tr4, #65535" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-7.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-7.c
index 3d8941bbfee..2ea52dfe655 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-7.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-7.c
@@ -1,43 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- unsigned char a;
- unsigned short b :5;
- unsigned char c;
- unsigned short d :11;
-} test_st;
-
-typedef union
-{
- test_st st;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_st;
-
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
-
-int
-main (void)
-{
- read_st r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
-
- f (r.st);
- return 0;
-}
-
+#include "../bitfield-7.x"
/* { dg-final { scan-assembler "mov\tip, r4" } } */
/* { dg-final { scan-assembler "movw\tr4, #8191" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-8.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-8.c
index 9ffbb718d34..9bc32b83d74 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-8.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-8.c
@@ -1,45 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- unsigned char a;
- unsigned int :0;
- unsigned int b :1;
- unsigned short :0;
- unsigned short c;
- unsigned int :0;
- unsigned int d :21;
-} test_st;
-
-typedef union
-{
- test_st st;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_st;
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
-
-int
-main (void)
-{
- read_st r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
- r.values.v3 = 0xFFFFFFFF;
-
- f (r.st);
- return 0;
-}
+#include "../bitfield-8.x"
/* { dg-final { scan-assembler "mov\tip, r4" } } */
/* { dg-final { scan-assembler "movs\tr4, #255" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-9.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-9.c
index 8a614182923..f6c15338d00 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-9.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-9.c
@@ -1,48 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- char a:3;
-} test_st3;
-
-typedef struct
-{
- char a:3;
-} test_st2;
-
-typedef struct
-{
- test_st2 st2;
- test_st3 st3;
-} test_st;
-
-typedef union
-{
- test_st st;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_st;
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
-
-int
-main (void)
-{
- read_st r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
-
- f (r.st);
- return 0;
-}
+#include "../bitfield-9.x"
/* { dg-final { scan-assembler "mov\tip, r4" } } */
/* { dg-final { scan-assembler "movw\tr4, #1799" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-and-union-1.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-and-union-1.c
deleted file mode 100644
index 642f4e0346b..00000000000
--- a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-and-union-1.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-mcmse" } */
-
-typedef struct
-{
- unsigned short a :11;
-} test_st_4;
-
-typedef union
-{
- char a;
- test_st_4 st4;
-}test_un_2;
-
-typedef struct
-{
- unsigned char a;
- unsigned int :0;
- unsigned int b :1;
- unsigned short :0;
- unsigned short c;
- unsigned int :0;
- unsigned int d :21;
-} test_st_3;
-
-typedef struct
-{
- unsigned char a :3;
- unsigned int b :13;
- test_un_2 un2;
-} test_st_2;
-
-typedef union
-{
- test_st_2 st2;
- test_st_3 st3;
-}test_un_1;
-
-typedef struct
-{
- unsigned char a :2;
- unsigned char :0;
- unsigned short b :5;
- unsigned char :0;
- unsigned char c :4;
- test_un_1 un1;
-} test_st_1;
-
-typedef union
-{
- test_st_1 st1;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_st_1;
-
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st_1);
-
-int
-main (void)
-{
- read_st_1 r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
- r.values.v3 = 0xFFFFFFFF;
- r.values.v4 = 0xFFFFFFFF;
-
- f (r.st1);
- return 0;
-}
-
-/* { dg-final { scan-assembler "mov\tip, r4" } } */
-/* { dg-final { scan-assembler "movw\tr4, #7939" } } */
-/* { dg-final { scan-assembler "movt\tr4, 15" } } */
-/* { dg-final { scan-assembler "ands\tr0, r4" } } */
-/* { dg-final { scan-assembler "movw\tr4, #65535" } } */
-/* { dg-final { scan-assembler "movt\tr4, 2047" } } */
-/* { dg-final { scan-assembler "ands\tr1, r4" } } */
-/* { dg-final { scan-assembler "movs\tr4, #1" } } */
-/* { dg-final { scan-assembler "movt\tr4, 65535" } } */
-/* { dg-final { scan-assembler "ands\tr2, r4" } } */
-/* { dg-final { scan-assembler "movw\tr4, #65535" } } */
-/* { dg-final { scan-assembler "movt\tr4, 31" } } */
-/* { dg-final { scan-assembler "ands\tr3, r4" } } */
-/* { dg-final { scan-assembler "mov\tr4, ip" } } */
-/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
-/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
-/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-and-union.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-and-union.c
new file mode 100644
index 00000000000..31249489e89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-and-union.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+#include "../bitfield-and-union.x"
+
+/* { dg-final { scan-assembler "mov\tip, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #7939" } } */
+/* { dg-final { scan-assembler "movt\tr4, 15" } } */
+/* { dg-final { scan-assembler "ands\tr0, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65535" } } */
+/* { dg-final { scan-assembler "movt\tr4, 2047" } } */
+/* { dg-final { scan-assembler "ands\tr1, r4" } } */
+/* { dg-final { scan-assembler "movs\tr4, #1" } } */
+/* { dg-final { scan-assembler "movt\tr4, 65535" } } */
+/* { dg-final { scan-assembler "ands\tr2, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65535" } } */
+/* { dg-final { scan-assembler "movt\tr4, 31" } } */
+/* { dg-final { scan-assembler "ands\tr3, r4" } } */
+/* { dg-final { scan-assembler "mov\tr4, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-11.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-11.c
index 3007409ad88..795544fe11d 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-11.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-11.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
/* { dg-require-effective-target arm_arch_v8m_base_ok } */
/* { dg-add-options arm_arch_v8m_base } */
-/* { dg-options "-mcmse" } */
int __attribute__ ((cmse_nonsecure_call)) (*bar) (int);
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-13.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-13.c
index f2b931be591..7208a2cedd2 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-13.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-13.c
@@ -1,15 +1,9 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
/* { dg-require-effective-target arm_arch_v8m_base_ok } */
/* { dg-add-options arm_arch_v8m_base } */
-/* { dg-options "-mcmse" } */
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double);
-int
-foo (int a)
-{
- return bar (1.0f, 2.0) + a + 1;
-}
+#include "../cmse-13.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-2.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-2.c
index 814502d4e5d..fec7dc10484 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-2.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-2.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
/* { dg-require-effective-target arm_arch_v8m_base_ok } */
/* { dg-add-options arm_arch_v8m_base } */
-/* { dg-options "-mcmse" } */
extern float bar (void);
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-6.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-6.c
index 95da045690a..43d45e7a63e 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-6.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-6.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
/* { dg-require-effective-target arm_arch_v8m_base_ok } */
/* { dg-add-options arm_arch_v8m_base } */
-/* { dg-options "-mcmse" } */
int __attribute__ ((cmse_nonsecure_call)) (*bar) (double);
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/softfp.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/softfp.c
index 0069fcdaebf..ca76e12cd92 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/baseline/softfp.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/softfp.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=softfp" } */
/* { dg-require-effective-target arm_arch_v8m_base_ok } */
/* { dg-add-options arm_arch_v8m_base } */
-/* { dg-options "-mcmse -mfloat-abi=softfp" } */
double __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double);
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/union-1.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/union-1.c
index ff18e839b02..afd5b98509c 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/baseline/union-1.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/union-1.c
@@ -1,60 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- unsigned char a :2;
- unsigned char :0;
- unsigned short b :5;
- unsigned char :0;
- unsigned short c :3;
- unsigned char :0;
- unsigned int d :9;
-} test_st_1;
-
-typedef struct
-{
- unsigned short a :7;
- unsigned char :0;
- unsigned char b :1;
- unsigned char :0;
- unsigned short c :6;
-} test_st_2;
-
-typedef union
-{
- test_st_1 st_1;
- test_st_2 st_2;
-}test_un;
-
-typedef union
-{
- test_un un;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_un;
-
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_un);
-
-int
-main (void)
-{
- read_un r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
-
- f (r.un);
- return 0;
-}
+#include "../union-1.x"
/* { dg-final { scan-assembler "mov\tip, r4" } } */
/* { dg-final { scan-assembler "movw\tr4, #8063" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/union-2.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/union-2.c
index b2e024b7f07..6e60f2a7628 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/baseline/union-2.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/union-2.c
@@ -1,73 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- unsigned char a :2;
- unsigned char :0;
- unsigned short b :5;
- unsigned char :0;
- unsigned short c :3;
- unsigned char :0;
- unsigned int d :9;
-} test_st_1;
-
-typedef struct
-{
- unsigned short a :7;
- unsigned char :0;
- unsigned char b :1;
- unsigned char :0;
- unsigned short c :6;
-} test_st_2;
-
-typedef struct
-{
- unsigned char a;
- unsigned int :0;
- unsigned int b :1;
- unsigned short :0;
- unsigned short c;
- unsigned int :0;
- unsigned int d :21;
-} test_st_3;
-
-typedef union
-{
- test_st_1 st_1;
- test_st_2 st_2;
- test_st_3 st_3;
-}test_un;
-
-typedef union
-{
- test_un un;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_un;
-
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_un);
-
-int
-main (void)
-{
- read_un r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
- r.values.v3 = 0xFFFFFFFF;
-
- f (r.un);
- return 0;
-}
+#include "../union-2.x"
/* { dg-final { scan-assembler "mov\tip, r4" } } */
/* { dg-final { scan-assembler "movw\tr4, #8191" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/bitfield-4.x b/gcc/testsuite/gcc.target/arm/cmse/bitfield-4.x
new file mode 100644
index 00000000000..62e35cc3cb8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/bitfield-4.x
@@ -0,0 +1,40 @@
+typedef struct
+{
+ unsigned char a;
+ unsigned int b:5;
+ unsigned int c:11, :0, d:8;
+ struct { unsigned int ee:2; } e;
+} test_st;
+
+typedef union
+{
+ test_st st;
+ struct
+ {
+ unsigned int v1;
+ unsigned int v2;
+ unsigned int v3;
+ unsigned int v4;
+ }values;
+} read_st;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+extern void foo (test_st st);
+
+int
+main (void)
+{
+ read_st r;
+ foo_ns f;
+
+ f = (foo_ns) 0x200000;
+ r.values.v1 = 0xFFFFFFFF;
+ r.values.v2 = 0xFFFFFFFF;
+ r.values.v3 = 0xFFFFFFFF;
+ r.values.v4 = 0xFFFFFFFF;
+
+ f (r.st);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/cmse/bitfield-5.x b/gcc/testsuite/gcc.target/arm/cmse/bitfield-5.x
new file mode 100644
index 00000000000..de5649dda6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/bitfield-5.x
@@ -0,0 +1,36 @@
+typedef struct
+{
+ unsigned char a;
+ unsigned short b :5;
+ unsigned char c;
+ unsigned short d :11;
+} test_st;
+
+typedef union
+{
+ test_st st;
+ struct
+ {
+ unsigned int v1;
+ unsigned int v2;
+ unsigned int v3;
+ unsigned int v4;
+ }values;
+} read_st;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+ read_st r;
+ foo_ns f;
+
+ f = (foo_ns) 0x200000;
+ r.values.v1 = 0xFFFFFFFF;
+ r.values.v2 = 0xFFFFFFFF;
+
+ f (r.st);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/cmse/bitfield-6.x b/gcc/testsuite/gcc.target/arm/cmse/bitfield-6.x
new file mode 100644
index 00000000000..693a8ae0abb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/bitfield-6.x
@@ -0,0 +1,45 @@
+typedef struct
+{
+ unsigned char a;
+ unsigned int b : 3;
+ unsigned int c : 14;
+ unsigned int d : 1;
+ struct {
+ unsigned int ee : 2;
+ unsigned short ff : 15;
+ } e;
+ unsigned char g : 1;
+ unsigned char : 4;
+ unsigned char h : 3;
+} test_st;
+
+typedef union
+{
+ test_st st;
+ struct
+ {
+ unsigned int v1;
+ unsigned int v2;
+ unsigned int v3;
+ unsigned int v4;
+ }values;
+} read_st;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+ read_st r;
+ foo_ns f;
+
+ f = (foo_ns) 0x200000;
+ r.values.v1 = 0xFFFFFFFF;
+ r.values.v2 = 0xFFFFFFFF;
+ r.values.v3 = 0xFFFFFFFF;
+ r.values.v4 = 0xFFFFFFFF;
+
+ f (r.st);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/cmse/bitfield-7.x b/gcc/testsuite/gcc.target/arm/cmse/bitfield-7.x
new file mode 100644
index 00000000000..de5649dda6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/bitfield-7.x
@@ -0,0 +1,36 @@
+typedef struct
+{
+ unsigned char a;
+ unsigned short b :5;
+ unsigned char c;
+ unsigned short d :11;
+} test_st;
+
+typedef union
+{
+ test_st st;
+ struct
+ {
+ unsigned int v1;
+ unsigned int v2;
+ unsigned int v3;
+ unsigned int v4;
+ }values;
+} read_st;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+ read_st r;
+ foo_ns f;
+
+ f = (foo_ns) 0x200000;
+ r.values.v1 = 0xFFFFFFFF;
+ r.values.v2 = 0xFFFFFFFF;
+
+ f (r.st);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/cmse/bitfield-8.x b/gcc/testsuite/gcc.target/arm/cmse/bitfield-8.x
new file mode 100644
index 00000000000..654b21e94b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/bitfield-8.x
@@ -0,0 +1,39 @@
+typedef struct
+{
+ unsigned char a;
+ unsigned int :0;
+ unsigned int b :1;
+ unsigned short :0;
+ unsigned short c;
+ unsigned int :0;
+ unsigned int d :21;
+} test_st;
+
+typedef union
+{
+ test_st st;
+ struct
+ {
+ unsigned int v1;
+ unsigned int v2;
+ unsigned int v3;
+ unsigned int v4;
+ }values;
+} read_st;
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+ read_st r;
+ foo_ns f;
+
+ f = (foo_ns) 0x200000;
+ r.values.v1 = 0xFFFFFFFF;
+ r.values.v2 = 0xFFFFFFFF;
+ r.values.v3 = 0xFFFFFFFF;
+
+ f (r.st);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/cmse/bitfield-9.x b/gcc/testsuite/gcc.target/arm/cmse/bitfield-9.x
new file mode 100644
index 00000000000..7543ac52696
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/bitfield-9.x
@@ -0,0 +1,42 @@
+typedef struct
+{
+ char a:3;
+} test_st3;
+
+typedef struct
+{
+ char a:3;
+} test_st2;
+
+typedef struct
+{
+ test_st2 st2;
+ test_st3 st3;
+} test_st;
+
+typedef union
+{
+ test_st st;
+ struct
+ {
+ unsigned int v1;
+ unsigned int v2;
+ unsigned int v3;
+ unsigned int v4;
+ }values;
+} read_st;
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+ read_st r;
+ foo_ns f;
+
+ f = (foo_ns) 0x200000;
+ r.values.v1 = 0xFFFFFFFF;
+
+ f (r.st);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-and-union-1.c b/gcc/testsuite/gcc.target/arm/cmse/bitfield-and-union.x
index e139ba61af5..0a6eb3dd816 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-and-union-1.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/bitfield-and-union.x
@@ -1,6 +1,3 @@
-/* { dg-do compile } */
-/* { dg-options "-mcmse" } */
-
typedef struct
{
unsigned short a :11;
@@ -76,19 +73,3 @@ main (void)
f (r.st1);
return 0;
}
-
-/* { dg-final { scan-assembler "movw\tip, #7939" } } */
-/* { dg-final { scan-assembler "movt\tip, 15" } } */
-/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */
-/* { dg-final { scan-assembler "movw\tip, #65535" } } */
-/* { dg-final { scan-assembler "movt\tip, 2047" } } */
-/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */
-/* { dg-final { scan-assembler "mov\tip, #1" } } */
-/* { dg-final { scan-assembler "movt\tip, 65535" } } */
-/* { dg-final { scan-assembler "and\tr2, r2, ip" } } */
-/* { dg-final { scan-assembler "movw\tip, #65535" } } */
-/* { dg-final { scan-assembler "movt\tip, 31" } } */
-/* { dg-final { scan-assembler "and\tr3, r3, ip" } } */
-/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
-/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
-/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-13.x b/gcc/testsuite/gcc.target/arm/cmse/cmse-13.x
new file mode 100644
index 00000000000..cdcd5ba6cf6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-13.x
@@ -0,0 +1,7 @@
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double);
+
+int
+foo (int a)
+{
+ return bar (3.0f, 2.0) + a + 1;
+}
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-5.x b/gcc/testsuite/gcc.target/arm/cmse/cmse-5.x
new file mode 100644
index 00000000000..7b03819d6b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-5.x
@@ -0,0 +1,7 @@
+extern float bar (void);
+
+float __attribute__ ((cmse_nonsecure_entry))
+foo (void)
+{
+ return bar ();
+}
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-7.x b/gcc/testsuite/gcc.target/arm/cmse/cmse-7.x
new file mode 100644
index 00000000000..3fa372af237
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-7.x
@@ -0,0 +1,7 @@
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (void);
+
+int
+foo (int a)
+{
+ return bar () + a + 1;
+}
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-8.x b/gcc/testsuite/gcc.target/arm/cmse/cmse-8.x
new file mode 100644
index 00000000000..7e1479542ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-8.x
@@ -0,0 +1,7 @@
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (double);
+
+int
+foo (int a)
+{
+ return bar (2.0) + a + 1;
+}
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-4.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-4.c
index c3b1396d52e..55da2a0c622 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-4.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-4.c
@@ -1,46 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- unsigned char a;
- unsigned int b:5;
- unsigned int c:11, :0, d:8;
- struct { unsigned int ee:2; } e;
-} test_st;
-
-typedef union
-{
- test_st st;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_st;
-
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
-
-extern void foo (test_st st);
-
-int
-main (void)
-{
- read_st r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
- r.values.v3 = 0xFFFFFFFF;
- r.values.v4 = 0xFFFFFFFF;
-
- f (r.st);
- return 0;
-}
+#include "../bitfield-4.x"
/* { dg-final { scan-assembler "movw\tip, #65535" } } */
/* { dg-final { scan-assembler "movt\tip, 255" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-5.c
index 0d029044aa9..383363233e6 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-5.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-5.c
@@ -1,42 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- unsigned char a;
- unsigned short b :5;
- unsigned char c;
- unsigned short d :11;
-} test_st;
-
-typedef union
-{
- test_st st;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_st;
-
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
-
-int
-main (void)
-{
- read_st r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
-
- f (r.st);
- return 0;
-}
+#include "../bitfield-5.x"
/* { dg-final { scan-assembler "movw\tip, #8191" } } */
/* { dg-final { scan-assembler "movt\tip, 255" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-6.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-6.c
index 005515ab9cb..03c294ea323 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-6.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-6.c
@@ -1,51 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- unsigned char a;
- unsigned int b : 3;
- unsigned int c : 14;
- unsigned int d : 1;
- struct {
- unsigned int ee : 2;
- unsigned short ff : 15;
- } e;
- unsigned char g : 1;
- unsigned char : 4;
- unsigned char h : 3;
-} test_st;
-
-typedef union
-{
- test_st st;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_st;
-
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
-
-int
-main (void)
-{
- read_st r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
- r.values.v3 = 0xFFFFFFFF;
- r.values.v4 = 0xFFFFFFFF;
-
- f (r.st);
- return 0;
-}
+#include "../bitfield-6.x"
/* { dg-final { scan-assembler "movw\tip, #65535" } } */
/* { dg-final { scan-assembler "movt\tip, 1023" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-7.c
index 6dd218e62fd..7692a69b159 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-7.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-7.c
@@ -1,43 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- unsigned char a;
- unsigned short b :5;
- unsigned char c;
- unsigned short d :11;
-} test_st;
-
-typedef union
-{
- test_st st;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_st;
-
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
-
-int
-main (void)
-{
- read_st r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
-
- f (r.st);
- return 0;
-}
-
+#include "../bitfield-7.x"
/* { dg-final { scan-assembler "movw\tip, #8191" } } */
/* { dg-final { scan-assembler "movt\tip, 255" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-8.c
index c833bcb0ae9..a0a488775fe 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-8.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-8.c
@@ -1,45 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- unsigned char a;
- unsigned int :0;
- unsigned int b :1;
- unsigned short :0;
- unsigned short c;
- unsigned int :0;
- unsigned int d :21;
-} test_st;
-
-typedef union
-{
- test_st st;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_st;
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
-
-int
-main (void)
-{
- read_st r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
- r.values.v3 = 0xFFFFFFFF;
-
- f (r.st);
- return 0;
-}
+#include "../bitfield-8.x"
/* { dg-final { scan-assembler "mov\tip, #255" } } */
/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-9.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-9.c
index d6e4cdb8c44..8bfeeb0bbf6 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-9.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-9.c
@@ -1,48 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- char a:3;
-} test_st3;
-
-typedef struct
-{
- char a:3;
-} test_st2;
-
-typedef struct
-{
- test_st2 st2;
- test_st3 st3;
-} test_st;
-
-typedef union
-{
- test_st st;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_st;
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
-
-int
-main (void)
-{
- read_st r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
-
- f (r.st);
- return 0;
-}
+#include "../bitfield-9.x"
/* { dg-final { scan-assembler "movw\tip, #1799" } } */
/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-and-union.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-and-union.c
new file mode 100644
index 00000000000..aac5ae1a052
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-and-union.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+#include "../bitfield-and-union.x"
+
+/* { dg-final { scan-assembler "movw\tip, #7939" } } */
+/* { dg-final { scan-assembler "movt\tip, 15" } } */
+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */
+/* { dg-final { scan-assembler "movw\tip, #65535" } } */
+/* { dg-final { scan-assembler "movt\tip, 2047" } } */
+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */
+/* { dg-final { scan-assembler "mov\tip, #1" } } */
+/* { dg-final { scan-assembler "movt\tip, 65535" } } */
+/* { dg-final { scan-assembler "and\tr2, r2, ip" } } */
+/* { dg-final { scan-assembler "movw\tip, #65535" } } */
+/* { dg-final { scan-assembler "movt\tip, 31" } } */
+/* { dg-final { scan-assembler "and\tr3, r3, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-13.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-13.c
index d90ad811fc1..6f4d6b4b755 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-13.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-13.c
@@ -1,18 +1,11 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" } */
-
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double);
-int
-foo (int a)
-{
- return bar (3.0f, 2.0) + a + 1;
-}
+#include "../../cmse-13.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c
index 88dec276281..0ae2a51990b 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c
@@ -1,17 +1,12 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" } */
-extern float bar (void);
+#include "../../cmse-5.x"
-float __attribute__ ((cmse_nonsecure_entry))
-foo (void)
-{
- return bar ();
-}
/* { dg-final { scan-assembler "mov\tr0, lr" } } */
/* { dg-final { scan-assembler "mov\tr1, lr" } } */
/* { dg-final { scan-assembler "mov\tr2, lr" } } */
@@ -32,8 +27,8 @@ foo (void)
/* { dg-final { scan-assembler "vmov\.f32\ts13, #1\.0" } } */
/* { dg-final { scan-assembler "vmov\.f32\ts14, #1\.0" } } */
/* { dg-final { scan-assembler "vmov\.f32\ts15, #1\.0" } } */
-/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
-/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { ! arm_dsp } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target arm_dsp } } } */
/* { dg-final { scan-assembler "push\t{r4}" } } */
/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */
/* { dg-final { scan-assembler "movw\tr4, #65376" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-7.c
index c047cd51c94..141ba73484c 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-7.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-7.c
@@ -1,17 +1,11 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" } */
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (void);
-int
-foo (int a)
-{
- return bar () + a + 1;
-}
+#include "../../cmse-7.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-8.c
index 20d2d4a8fb1..6c5e688f220 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-8.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-8.c
@@ -1,17 +1,11 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" } */
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (double);
-int
-foo (int a)
-{
- return bar (2.0) + a + 1;
-}
+#include "../../cmse-8.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-13.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-13.c
index 0af586a7fd1..d35321bfda8 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-13.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-13.c
@@ -1,18 +1,11 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" } */
-
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double);
-int
-foo (int a)
-{
- return bar (3.0f, 2.0) + a + 1;
-}
+#include "../../cmse-13.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-5.c
index 29f60baf521..955f749cb72 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-5.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-5.c
@@ -1,17 +1,12 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" } */
-extern float bar (void);
+#include "../../cmse-5.x"
-float __attribute__ ((cmse_nonsecure_entry))
-foo (void)
-{
- return bar ();
-}
/* { dg-final { scan-assembler "mov\tr0, lr" } } */
/* { dg-final { scan-assembler "mov\tr1, lr" } } */
/* { dg-final { scan-assembler "mov\tr2, lr" } } */
@@ -25,8 +20,8 @@ foo (void)
/* { dg-final { scan-assembler "vmov\.f64\td5, #1\.0" } } */
/* { dg-final { scan-assembler "vmov\.f64\td6, #1\.0" } } */
/* { dg-final { scan-assembler "vmov\.f64\td7, #1\.0" } } */
-/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
-/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { ! arm_dsp } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target arm_dsp } } } */
/* { dg-final { scan-assembler "push\t{r4}" } } */
/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */
/* { dg-final { scan-assembler "movw\tr4, #65376" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-7.c
index a5c64fb06ed..858555b8d89 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-7.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-7.c
@@ -1,17 +1,11 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" } */
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (void);
-int
-foo (int a)
-{
- return bar () + a + 1;
-}
+#include "../../cmse-7.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-8.c
index 5e041b17b0e..f85d68a3eff 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-8.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-8.c
@@ -1,17 +1,11 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" } */
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (double);
-int
-foo (int a)
-{
- return bar (2.0) + a + 1;
-}
+#include "../../cmse-8.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-13.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-13.c
index dbbd262c890..11d44550de9 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-13.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-13.c
@@ -1,16 +1,10 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=soft" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=hard" -mfloat-abi=softfp } {""} } */
-/* { dg-options "-mcmse -mfloat-abi=soft" } */
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double);
-int
-foo (int a)
-{
- return bar (1.0f, 2.0) + a + 1;
-}
+#include "../../cmse-13.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-5.c
index a7229ea8eb2..dfd2fe6323a 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-5.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-5.c
@@ -1,16 +1,10 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=soft" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=hard" -mfloat-abi=softfp } {""} } */
-/* { dg-options "-mcmse -mfloat-abi=soft" } */
-
-extern float bar (void);
-float __attribute__ ((cmse_nonsecure_entry))
-foo (void)
-{
- return bar ();
-}
+#include "../../cmse-5.x"
/* { dg-final { scan-assembler "mov\tr1, lr" } } */
/* { dg-final { scan-assembler "mov\tr2, lr" } } */
@@ -18,7 +12,7 @@ foo (void)
/* { dg-final { scan-assembler "mov\tip, lr" } } */
/* { dg-final { scan-assembler-not "vmov" } } */
/* { dg-final { scan-assembler-not "vmsr" } } */
-/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
-/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { ! arm_dsp } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target arm_dsp } } } */
/* { dg-final { scan-assembler "bxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-7.c
index e33568400ef..76ca271278e 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-7.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-7.c
@@ -1,16 +1,10 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=soft" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=hard" -mfloat-abi=softfp } {""} } */
-/* { dg-options "-mcmse -mfloat-abi=soft" } */
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (void);
-int
-foo (int a)
-{
- return bar () + a + 1;
-}
+#include "../../cmse-7.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-8.c
index 024a12e0a41..a917aa7778a 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-8.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-8.c
@@ -1,16 +1,10 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=soft" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=hard" -mfloat-abi=softfp } {""} } */
-/* { dg-options "-mcmse -mfloat-abi=soft" } */
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (double);
-int
-foo (int a)
-{
- return bar (2.0) + a + 1;
-}
+#include "../../cmse-8.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c
index 7734d77dc38..01e5d659fe2 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c
@@ -1,17 +1,12 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16" } */
-extern float bar (void);
+#include "../../cmse-5.x"
-float __attribute__ ((cmse_nonsecure_entry))
-foo (void)
-{
- return bar ();
-}
/* { dg-final { scan-assembler "__acle_se_foo:" } } */
/* { dg-final { scan-assembler-not "mov\tr0, lr" } } */
/* { dg-final { scan-assembler "mov\tr1, lr" } } */
@@ -33,8 +28,8 @@ foo (void)
/* { dg-final { scan-assembler "vmov\.f32\ts13, #1\.0" } } */
/* { dg-final { scan-assembler "vmov\.f32\ts14, #1\.0" } } */
/* { dg-final { scan-assembler "vmov\.f32\ts15, #1\.0" } } */
-/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
-/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { ! arm_dsp } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target arm_dsp } } } */
/* { dg-final { scan-assembler "push\t{r4}" } } */
/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */
/* { dg-final { scan-assembler "movw\tr4, #65376" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-7.c
index fb195eb58d5..5d904786e41 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-7.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-7.c
@@ -1,17 +1,11 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16" } */
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (void);
-int
-foo (int a)
-{
- return bar () + a + 1;
-}
+#include "../../cmse-7.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-8.c
index 22ed3f8af88..3feee43c423 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-8.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-8.c
@@ -1,17 +1,11 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16" } */
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (double);
-int
-foo (int a)
-{
- return bar (2.0) + a + 1;
-}
+#include "../../cmse-8.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-13.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-13.c
index 9634065e7cb..4eb984f4479 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-13.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-13.c
@@ -1,17 +1,11 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" } */
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double);
-int
-foo (int a)
-{
- return bar (1.0f, 2.0) + a + 1;
-}
+#include "../../cmse-13.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-5.c
index 6addaa1a4ed..4815a480f66 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-5.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-5.c
@@ -1,17 +1,12 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" } */
-extern float bar (void);
+#include "../../cmse-5.x"
-float __attribute__ ((cmse_nonsecure_entry))
-foo (void)
-{
- return bar ();
-}
/* { dg-final { scan-assembler "__acle_se_foo:" } } */
/* { dg-final { scan-assembler-not "mov\tr0, lr" } } */
/* { dg-final { scan-assembler "mov\tr1, lr" } } */
@@ -25,8 +20,8 @@ foo (void)
/* { dg-final { scan-assembler "vmov\.f64\td5, #1\.0" } } */
/* { dg-final { scan-assembler "vmov\.f64\td6, #1\.0" } } */
/* { dg-final { scan-assembler "vmov\.f64\td7, #1\.0" } } */
-/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
-/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { ! arm_dsp } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target arm_dsp } } } */
/* { dg-final { scan-assembler "push\t{r4}" } } */
/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */
/* { dg-final { scan-assembler "movw\tr4, #65376" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-7.c
index 04f8466cc11..5535c5514b1 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-7.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-7.c
@@ -1,17 +1,11 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" } */
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (void);
-int
-foo (int a)
-{
- return bar () + a + 1;
-}
+#include "../../cmse-7.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-8.c
index ffe94de8541..6663fc43f5f 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-8.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-8.c
@@ -1,17 +1,11 @@
/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" } */
/* { dg-require-effective-target arm_arch_v8m_main_ok } */
/* { dg-add-options arm_arch_v8m_main } */
/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
-/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" } */
-
-int __attribute__ ((cmse_nonsecure_call)) (*bar) (double);
-int
-foo (int a)
-{
- return bar (2.0) + a + 1;
-}
+#include "../../cmse-8.x"
/* Checks for saving and clearing prior to function call. */
/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/union-1.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/union-1.c
index 1fc846cd7a5..071955f206c 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/union-1.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/union-1.c
@@ -1,60 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- unsigned char a :2;
- unsigned char :0;
- unsigned short b :5;
- unsigned char :0;
- unsigned short c :3;
- unsigned char :0;
- unsigned int d :9;
-} test_st_1;
-
-typedef struct
-{
- unsigned short a :7;
- unsigned char :0;
- unsigned char b :1;
- unsigned char :0;
- unsigned short c :6;
-} test_st_2;
-
-typedef union
-{
- test_st_1 st_1;
- test_st_2 st_2;
-}test_un;
-
-typedef union
-{
- test_un un;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_un;
-
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_un);
-
-int
-main (void)
-{
- read_un r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
-
- f (r.un);
- return 0;
-}
+#include "../union-1.x"
/* { dg-final { scan-assembler "movw\tip, #8063" } } */
/* { dg-final { scan-assembler "movt\tip, 63" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/union-2.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/union-2.c
index 420d0f136ef..c7431930ff9 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/mainline/union-2.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/union-2.c
@@ -1,73 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mcmse" } */
-typedef struct
-{
- unsigned char a :2;
- unsigned char :0;
- unsigned short b :5;
- unsigned char :0;
- unsigned short c :3;
- unsigned char :0;
- unsigned int d :9;
-} test_st_1;
-
-typedef struct
-{
- unsigned short a :7;
- unsigned char :0;
- unsigned char b :1;
- unsigned char :0;
- unsigned short c :6;
-} test_st_2;
-
-typedef struct
-{
- unsigned char a;
- unsigned int :0;
- unsigned int b :1;
- unsigned short :0;
- unsigned short c;
- unsigned int :0;
- unsigned int d :21;
-} test_st_3;
-
-typedef union
-{
- test_st_1 st_1;
- test_st_2 st_2;
- test_st_3 st_3;
-}test_un;
-
-typedef union
-{
- test_un un;
- struct
- {
- unsigned int v1;
- unsigned int v2;
- unsigned int v3;
- unsigned int v4;
- }values;
-} read_un;
-
-
-typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_un);
-
-int
-main (void)
-{
- read_un r;
- foo_ns f;
-
- f = (foo_ns) 0x200000;
- r.values.v1 = 0xFFFFFFFF;
- r.values.v2 = 0xFFFFFFFF;
- r.values.v3 = 0xFFFFFFFF;
-
- f (r.un);
- return 0;
-}
+#include "../union-2.x"
/* { dg-final { scan-assembler "movw\tip, #8191" } } */
/* { dg-final { scan-assembler "movt\tip, 63" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/union-1.x b/gcc/testsuite/gcc.target/arm/cmse/union-1.x
new file mode 100644
index 00000000000..8fe95351495
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/union-1.x
@@ -0,0 +1,54 @@
+typedef struct
+{
+ unsigned char a :2;
+ unsigned char :0;
+ unsigned short b :5;
+ unsigned char :0;
+ unsigned short c :3;
+ unsigned char :0;
+ unsigned int d :9;
+} test_st_1;
+
+typedef struct
+{
+ unsigned short a :7;
+ unsigned char :0;
+ unsigned char b :1;
+ unsigned char :0;
+ unsigned short c :6;
+} test_st_2;
+
+typedef union
+{
+ test_st_1 st_1;
+ test_st_2 st_2;
+}test_un;
+
+typedef union
+{
+ test_un un;
+ struct
+ {
+ unsigned int v1;
+ unsigned int v2;
+ unsigned int v3;
+ unsigned int v4;
+ }values;
+} read_un;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_un);
+
+int
+main (void)
+{
+ read_un r;
+ foo_ns f;
+
+ f = (foo_ns) 0x200000;
+ r.values.v1 = 0xFFFFFFFF;
+ r.values.v2 = 0xFFFFFFFF;
+
+ f (r.un);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/cmse/union-2.x b/gcc/testsuite/gcc.target/arm/cmse/union-2.x
new file mode 100644
index 00000000000..8a880e7cb5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/union-2.x
@@ -0,0 +1,67 @@
+typedef struct
+{
+ unsigned char a :2;
+ unsigned char :0;
+ unsigned short b :5;
+ unsigned char :0;
+ unsigned short c :3;
+ unsigned char :0;
+ unsigned int d :9;
+} test_st_1;
+
+typedef struct
+{
+ unsigned short a :7;
+ unsigned char :0;
+ unsigned char b :1;
+ unsigned char :0;
+ unsigned short c :6;
+} test_st_2;
+
+typedef struct
+{
+ unsigned char a;
+ unsigned int :0;
+ unsigned int b :1;
+ unsigned short :0;
+ unsigned short c;
+ unsigned int :0;
+ unsigned int d :21;
+} test_st_3;
+
+typedef union
+{
+ test_st_1 st_1;
+ test_st_2 st_2;
+ test_st_3 st_3;
+}test_un;
+
+typedef union
+{
+ test_un un;
+ struct
+ {
+ unsigned int v1;
+ unsigned int v2;
+ unsigned int v3;
+ unsigned int v4;
+ }values;
+} read_un;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_un);
+
+int
+main (void)
+{
+ read_un r;
+ foo_ns f;
+
+ f = (foo_ns) 0x200000;
+ r.values.v1 = 0xFFFFFFFF;
+ r.values.v2 = 0xFFFFFFFF;
+ r.values.v3 = 0xFFFFFFFF;
+
+ f (r.un);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/copysign_softfloat_1.c b/gcc/testsuite/gcc.target/arm/copysign_softfloat_1.c
index 1260a6f8eeb..d79d014e27c 100644
--- a/gcc/testsuite/gcc.target/arm/copysign_softfloat_1.c
+++ b/gcc/testsuite/gcc.target/arm/copysign_softfloat_1.c
@@ -1,5 +1,6 @@
/* { dg-do run } */
/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-require-effective-target arm_soft_ok } */
/* { dg-skip-if "skip override" { *-*-* } { "-mfloat-abi=softfp" "-mfloat-abi=hard" } { "" } } */
/* { dg-options "-O2 -mfloat-abi=soft --save-temps" } */
extern void abort (void);
diff --git a/gcc/testsuite/gcc.target/arm/lp1189445.c b/gcc/testsuite/gcc.target/arm/lp1189445.c
index 766748e5509..4866953558a 100644
--- a/gcc/testsuite/gcc.target/arm/lp1189445.c
+++ b/gcc/testsuite/gcc.target/arm/lp1189445.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
+/* { dg-options "-O3" } */
/* { dg-require-effective-target arm_neon } */
/* { dg-add-options arm_neon } */
-/* { dg-options "-O3" } */
int id;
int
diff --git a/gcc/testsuite/gcc.target/arm/pr54300.C b/gcc/testsuite/gcc.target/arm/pr54300.C
index eb1a74e36cf..9105e279b33 100644
--- a/gcc/testsuite/gcc.target/arm/pr54300.C
+++ b/gcc/testsuite/gcc.target/arm/pr54300.C
@@ -51,6 +51,7 @@ test(unsigned short *_Inp, int32_t *_Out,
vst1q_s32( _Out, c );
}
+int
main()
{
unsigned short a[4] = {1, 2, 3, 4};
@@ -58,4 +59,5 @@ main()
test(a, b, 1, 1, ~0);
if (b[0] != 1 || b[1] != 2 || b[2] != 3 || b[3] != 4)
abort();
+ return 0;
}
diff --git a/gcc/testsuite/gcc.target/arm/pr67989.C b/gcc/testsuite/gcc.target/arm/pr67989.C
index 0006924e24f..89d2530f3a6 100644
--- a/gcc/testsuite/gcc.target/arm/pr67989.C
+++ b/gcc/testsuite/gcc.target/arm/pr67989.C
@@ -2,7 +2,8 @@
/* { dg-options "-std=c++11 -O2" } */
/* { dg-require-effective-target arm_arch_v4t_ok } */
/* { dg-add-options arm_arch_v4t } */
-/* { dg-additional-options "-marm" } */
+/* { dg-additional-options "-marm -Wno-return-type" } */
+
/* Duplicate version of the test in g++.dg to be able to run this test only if
ARMv4t in ARM execution state can be targetted. Newer architecture don't
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index 46238265ae6..97a899cfb5c 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
@@ -610,8 +610,12 @@
#define __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(A, B, 1, D, E)
#define __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(A, B, 1, D, E)
#define __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(A, B, 1, D, E)
-
-
+#define __builtin_ia32_vgf2p8affineqb_v16qi(A, B, C) __builtin_ia32_vgf2p8affineqb_v16qi(A, B, 1)
+#define __builtin_ia32_vgf2p8affineqb_v32qi(A, B, C) __builtin_ia32_vgf2p8affineqb_v32qi(A, B, 1)
+#define __builtin_ia32_vgf2p8affineqb_v64qi(A, B, C) __builtin_ia32_vgf2p8affineqb_v64qi(A, B, 1)
+#define __builtin_ia32_vgf2p8affineqb_v16qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v16qi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vgf2p8affineqb_v32qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v32qi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vgf2p8affineqb_v64qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v64qi_mask(A, B, 1, D, E)
#include <wmmintrin.h>
#include <immintrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/avx-2.c b/gcc/testsuite/gcc.target/i386/avx-2.c
index 0061d9cdd22..986fbd819e4 100644
--- a/gcc/testsuite/gcc.target/i386/avx-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx-2.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-pr82855.c b/gcc/testsuite/gcc.target/i386/avx512dq-pr82855.c
new file mode 100644
index 00000000000..563454c3578
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-pr82855.c
@@ -0,0 +1,14 @@
+/* PR target/82855 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mavx512dq" } */
+/* { dg-final { scan-assembler {\mktestb\M} } } */
+
+#include <immintrin.h>
+
+int
+foo (const __m256i *ptr)
+{
+ __m256i v = _mm256_loadu_si256 (ptr);
+ __mmask8 m = _mm256_cmpeq_epi32_mask (v, _mm256_setzero_si256 ());
+ return 0 == m;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gf2p8affineqb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-gf2p8affineqb-2.c
new file mode 100644
index 00000000000..807da2c972f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-gf2p8affineqb-2.c
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -mgfni -mavx512bw" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-require-effective-target gfni } */
+
+#define AVX512F
+
+#define GFNI
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+
+#include "avx512f-mask-type.h"
+#include <x86intrin.h>
+
+static void
+CALC (unsigned char *r, unsigned char *s1, unsigned char *s2, unsigned char imm)
+{
+ for (int a = 0; a < SIZE/8; a++)
+ {
+ for (int val = 0; val < 8; val++)
+ {
+ unsigned char result = 0;
+ for (int bit = 0; bit < 8; bit++)
+ {
+ unsigned char temp = s1[a*8 + val] & s2[a*8 + bit];
+ unsigned char parity = __popcntd(temp);
+ if (parity % 2)
+ result |= (1 << (8 - bit - 1));
+ }
+ r[a*8 + val] = result ^ imm;
+ }
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[SIZE];
+ unsigned char imm = 0;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1 + i;
+ src2.a[i] = 1;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ CALC (res_ref, src1.a, src2.a, imm);
+
+ res1.x = INTRINSIC (_gf2p8affine_epi64_epi8) (src1.x, src2.x, imm);
+ res2.x = INTRINSIC (_mask_gf2p8affine_epi64_epi8) (res2.x, mask, src1.x, src2.x, imm);
+ res3.x = INTRINSIC (_maskz_gf2p8affine_epi64_epi8) (mask, src1.x, src2.x, imm);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gf2p8mulb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-gf2p8mulb-2.c
new file mode 100644
index 00000000000..08fc5b7b7b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-gf2p8mulb-2.c
@@ -0,0 +1,76 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -mgfni -mavx512bw" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-require-effective-target gfni } */
+
+#define AVX512F
+
+#define GFNI
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+
+#include "avx512f-mask-type.h"
+
+static void
+CALC (unsigned char *r, unsigned char *s1, unsigned char *s2)
+{
+ for (int i = 0; i < SIZE; i++)
+ {
+ unsigned short result = 0;
+ for (int bit = 0; bit < 8; bit++)
+ {
+ if ((s1[i] >> bit) & 1)
+ {
+ result ^= s2[i] << bit;
+ }
+ }
+ // Reduce result by x^8 + x^4 + x^3 + x + 1
+ for (int bit = 14; bit > 7; bit--)
+ {
+ unsigned short p = 0x11B << (bit - 8);
+ if ((result >> bit) & 1)
+ result ^= p;
+ }
+ r[i] = result;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1 + i;
+ src2.a[i] = 2 + 2*i;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ CALC (res_ref, src1.a, src2.a);
+
+ res1.x = INTRINSIC (_gf2p8mul_epi8) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_gf2p8mul_epi8) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_gf2p8mul_epi8) (mask, src1.x, src2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-gf2p8affineqb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-gf2p8affineqb-2.c
new file mode 100644
index 00000000000..1b650d07539
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-gf2p8affineqb-2.c
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -mgfni" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target gfni } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-gf2p8affineqb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-gf2p8affineqb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-gf2p8mulb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-gf2p8mulb-2.c
new file mode 100644
index 00000000000..8215247a714
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-gf2p8mulb-2.c
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -mgfni" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target gfni } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-gf2p8mulb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-gf2p8mulb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/force-indirect-call-1.c b/gcc/testsuite/gcc.target/i386/force-indirect-call-1.c
new file mode 100644
index 00000000000..6ecf598708a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/force-indirect-call-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mforce-indirect-call" } */
+/* { dg-final { scan-assembler-times "(?:call|jmp)\[ \\t\]+\\*%" 3 } } */
+
+int x;
+int y;
+
+void __attribute__((noinline)) f1(void)
+{
+ x++;
+}
+
+static __attribute__((noinline)) void f3(void)
+{
+ y++;
+}
+
+void f2()
+{
+ f1();
+ f3();
+ f1();
+}
diff --git a/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c b/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c
new file mode 100644
index 00000000000..2f702363041
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mforce-indirect-call -fPIC" } */
+/* { dg-require-effective-target fpic } */
+/* { dg-final { scan-assembler-times "(?:call|jmp)\[ \\t\]+\\*%" 3 } } */
+
+#include "force-indirect-call-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/force-indirect-call-3.c b/gcc/testsuite/gcc.target/i386/force-indirect-call-3.c
new file mode 100644
index 00000000000..37bc01bf5c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/force-indirect-call-3.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mforce-indirect-call -mcmodel=medium" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-final { scan-assembler-times "(?:call|jmp)\[ \\t\]+\\*%" 3 } } */
+
+#include "force-indirect-call-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/gfni-1.c b/gcc/testsuite/gcc.target/i386/gfni-1.c
index 5e22c9eae92..bf72ad041a2 100644
--- a/gcc/testsuite/gcc.target/i386/gfni-1.c
+++ b/gcc/testsuite/gcc.target/i386/gfni-1.c
@@ -3,6 +3,12 @@
/* { dg-final { scan-assembler-times "vgf2p8affineinvqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vgf2p8affineinvqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vgf2p8affineinvqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8affineqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8affineqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8affineqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8mulb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8mulb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8mulb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <x86intrin.h>
@@ -15,4 +21,10 @@ avx512vl_test (void)
x1 = _mm512_gf2p8affineinv_epi64_epi8(x1, x2, 3);
x1 = _mm512_mask_gf2p8affineinv_epi64_epi8(x1, m64, x2, x1, 3);
x1 = _mm512_maskz_gf2p8affineinv_epi64_epi8(m64, x1, x2, 3);
+ x1 = _mm512_gf2p8affine_epi64_epi8(x1, x2, 3);
+ x1 = _mm512_mask_gf2p8affine_epi64_epi8(x1, m64, x2, x1, 3);
+ x1 = _mm512_maskz_gf2p8affine_epi64_epi8(m64, x1, x2, 3);
+ x1 = _mm512_gf2p8mul_epi8(x1, x2);
+ x1 = _mm512_mask_gf2p8mul_epi8(x1, m64, x2, x1);
+ x1 = _mm512_maskz_gf2p8mul_epi8(m64, x1, x2);
}
diff --git a/gcc/testsuite/gcc.target/i386/gfni-2.c b/gcc/testsuite/gcc.target/i386/gfni-2.c
index 4d1f151aa40..413cb64c6b2 100644
--- a/gcc/testsuite/gcc.target/i386/gfni-2.c
+++ b/gcc/testsuite/gcc.target/i386/gfni-2.c
@@ -6,6 +6,18 @@
/* { dg-final { scan-assembler-times "vgf2p8affineinvqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vgf2p8affineinvqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vgf2p8affineinvqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8affineqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8affineqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8affineqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8affineqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8affineqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8affineqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8mulb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8mulb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8mulb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8mulb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8mulb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8mulb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <x86intrin.h>
@@ -24,4 +36,16 @@ avx512vl_test (void)
x5 = _mm_gf2p8affineinv_epi64_epi8(x5, x6, 3);
x5 = _mm_mask_gf2p8affineinv_epi64_epi8(x5, m16, x6, x5, 3);
x5 = _mm_maskz_gf2p8affineinv_epi64_epi8(m16, x5, x6, 3);
+ x3 = _mm256_gf2p8affine_epi64_epi8(x3, x4, 3);
+ x3 = _mm256_mask_gf2p8affine_epi64_epi8(x3, m32, x4, x3, 3);
+ x3 = _mm256_maskz_gf2p8affine_epi64_epi8(m32, x3, x4, 3);
+ x5 = _mm_gf2p8affine_epi64_epi8(x5, x6, 3);
+ x5 = _mm_mask_gf2p8affine_epi64_epi8(x5, m16, x6, x5, 3);
+ x5 = _mm_maskz_gf2p8affine_epi64_epi8(m16, x5, x6, 3);
+ x3 = _mm256_gf2p8mul_epi8(x3, x4);
+ x3 = _mm256_mask_gf2p8mul_epi8(x3, m32, x4, x3);
+ x3 = _mm256_maskz_gf2p8mul_epi8(m32, x3, x4);
+ x5 = _mm_gf2p8mul_epi8(x5, x6);
+ x5 = _mm_mask_gf2p8mul_epi8(x5, m16, x6, x5);
+ x5 = _mm_maskz_gf2p8mul_epi8(m16, x5, x6);
}
diff --git a/gcc/testsuite/gcc.target/i386/gfni-3.c b/gcc/testsuite/gcc.target/i386/gfni-3.c
index de5f80b1124..2beedc8abb3 100644
--- a/gcc/testsuite/gcc.target/i386/gfni-3.c
+++ b/gcc/testsuite/gcc.target/i386/gfni-3.c
@@ -2,6 +2,10 @@
/* { dg-options "-mgfni -mavx -O2" } */
/* { dg-final { scan-assembler-times "vgf2p8affineinvqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vgf2p8affineinvqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8affineqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8affineqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8mulb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgf2p8mulb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
#include <x86intrin.h>
@@ -14,4 +18,8 @@ avx512vl_test (void)
{
x3 = _mm256_gf2p8affineinv_epi64_epi8(x3, x4, 3);
x5 = _mm_gf2p8affineinv_epi64_epi8(x5, x6, 3);
+ x3 = _mm256_gf2p8affine_epi64_epi8(x3, x4, 3);
+ x5 = _mm_gf2p8affine_epi64_epi8(x5, x6, 3);
+ x3 = _mm256_gf2p8mul_epi8(x3, x4);
+ x5 = _mm_gf2p8mul_epi8(x5, x6);
}
diff --git a/gcc/testsuite/gcc.target/i386/gfni-4.c b/gcc/testsuite/gcc.target/i386/gfni-4.c
index 1532716191e..e0750054b82 100644
--- a/gcc/testsuite/gcc.target/i386/gfni-4.c
+++ b/gcc/testsuite/gcc.target/i386/gfni-4.c
@@ -1,6 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-mgfni -O2" } */
+/* { dg-options "-mgfni -O2 -msse" } */
/* { dg-final { scan-assembler-times "gf2p8affineinvqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "gf2p8affineqb\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "gf2p8mulb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
#include <x86intrin.h>
@@ -11,4 +13,6 @@ void extern
avx512vl_test (void)
{
x5 = _mm_gf2p8affineinv_epi64_epi8(x5, x6, 3);
+ x5 = _mm_gf2p8affine_epi64_epi8(x5, x6, 3);
+ x5 = _mm_gf2p8mul_epi8(x5, x6);
}
diff --git a/gcc/testsuite/gcc.target/i386/pr80425-3.c b/gcc/testsuite/gcc.target/i386/pr80425-3.c
new file mode 100644
index 00000000000..1bf80b17b1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr80425-3.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+
+#include <x86intrin.h>
+
+extern int a;
+
+__m512i
+f1 (__m512i x)
+{
+ return _mm512_srai_epi32 (x, a);
+}
+
+/* { dg-final { scan-assembler-times "movd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81706.c b/gcc/testsuite/gcc.target/i386/pr81706.c
index 333fd159770..b4b7c781b70 100644
--- a/gcc/testsuite/gcc.target/i386/pr81706.c
+++ b/gcc/testsuite/gcc.target/i386/pr81706.c
@@ -1,8 +1,8 @@
/* PR libstdc++/81706 */
/* { dg-do compile } */
/* { dg-options "-O3 -mavx2 -mno-avx512f" } */
-/* { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_cos" } } */
-/* { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_sin" } } */
+/* { dg-final { scan-assembler "call\[^\n\r]__?ZGVdN4v_cos" } } */
+/* { dg-final { scan-assembler "call\[^\n\r]__?ZGVdN4v_sin" } } */
#ifdef __cplusplus
extern "C" {
diff --git a/gcc/testsuite/gcc.target/i386/pr82002-2a.c b/gcc/testsuite/gcc.target/i386/pr82002-2a.c
index bc85080ba8e..c31440debe2 100644
--- a/gcc/testsuite/gcc.target/i386/pr82002-2a.c
+++ b/gcc/testsuite/gcc.target/i386/pr82002-2a.c
@@ -1,7 +1,5 @@
/* { dg-do compile { target lp64 } } */
/* { dg-options "-Ofast -mstackrealign -mabi=ms" } */
-/* { dg-xfail-if "" { *-*-* } } */
-/* { dg-xfail-run-if "" { *-*-* } } */
void __attribute__((sysv_abi)) a (char *);
void
diff --git a/gcc/testsuite/gcc.target/i386/pr82002-2b.c b/gcc/testsuite/gcc.target/i386/pr82002-2b.c
index 10e44cd7b1d..939e069517d 100644
--- a/gcc/testsuite/gcc.target/i386/pr82002-2b.c
+++ b/gcc/testsuite/gcc.target/i386/pr82002-2b.c
@@ -1,7 +1,5 @@
/* { dg-do compile { target lp64 } } */
/* { dg-options "-Ofast -mstackrealign -mabi=ms -mcall-ms2sysv-xlogues" } */
-/* { dg-xfail-if "" { *-*-* } } */
-/* { dg-xfail-run-if "" { *-*-* } } */
void __attribute__((sysv_abi)) a (char *);
void
diff --git a/gcc/testsuite/gcc.target/i386/pr82941-1.c b/gcc/testsuite/gcc.target/i386/pr82941-1.c
new file mode 100644
index 00000000000..d7e530d5116
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82941-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+#include <immintrin.h>
+
+extern __m512d y, z;
+
+void
+pr82941 ()
+{
+ z = y;
+}
+
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82941-2.c b/gcc/testsuite/gcc.target/i386/pr82941-2.c
new file mode 100644
index 00000000000..db2f8589ab6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82941-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=knl" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82942-1.c b/gcc/testsuite/gcc.target/i386/pr82942-1.c
new file mode 100644
index 00000000000..9cdf81a9d60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82942-1.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mno-avx512er -O2" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82942-2.c b/gcc/testsuite/gcc.target/i386/pr82942-2.c
new file mode 100644
index 00000000000..ddb4e689659
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82942-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mavx512er -mtune=knl -O2" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c b/gcc/testsuite/gcc.target/i386/pr82990-1.c
new file mode 100644
index 00000000000..ff1d6d40eb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=knl -mvzeroupper" } */
+
+#include <immintrin.h>
+
+extern __m512d y, z;
+
+void
+pr82941 ()
+{
+ z = y;
+}
+
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-2.c b/gcc/testsuite/gcc.target/i386/pr82990-2.c
new file mode 100644
index 00000000000..0d3cb2333dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-vzeroupper" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c b/gcc/testsuite/gcc.target/i386/pr82990-3.c
new file mode 100644
index 00000000000..201fa98d8d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-3.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mavx512er -mvzeroupper -O2" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-4.c b/gcc/testsuite/gcc.target/i386/pr82990-4.c
new file mode 100644
index 00000000000..09f161c7291
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-4.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mno-avx512er -mno-vzeroupper -O2" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-5.c b/gcc/testsuite/gcc.target/i386/pr82990-5.c
new file mode 100644
index 00000000000..9932bdc5375
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-5.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+#include <immintrin.h>
+
+extern __m512d y, z;
+
+void
+pr82941 ()
+{
+ z = y;
+}
+
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-6.c b/gcc/testsuite/gcc.target/i386/pr82990-6.c
new file mode 100644
index 00000000000..063a61c111d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-6.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512 -mtune=knl" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-7.c b/gcc/testsuite/gcc.target/i386/pr82990-7.c
new file mode 100644
index 00000000000..dedde8b854b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-7.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512 -mtune=generic -mtune-ctrl=^emit_vzeroupper" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index c35ec9a47cb..9bdc73f0c57 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -627,5 +627,12 @@
#define __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(A, B, 1, D, E)
#define __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(A, B, 1, D, E)
#define __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vgf2p8affineqb_v16qi(A, B, C) __builtin_ia32_vgf2p8affineqb_v16qi(A, B, 1)
+#define __builtin_ia32_vgf2p8affineqb_v32qi(A, B, C) __builtin_ia32_vgf2p8affineqb_v32qi(A, B, 1)
+#define __builtin_ia32_vgf2p8affineqb_v64qi(A, B, C) __builtin_ia32_vgf2p8affineqb_v64qi(A, B, 1)
+#define __builtin_ia32_vgf2p8affineqb_v16qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v16qi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vgf2p8affineqb_v32qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v32qi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vgf2p8affineqb_v64qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v64qi_mask(A, B, 1, D, E)
+
#include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 388026f927a..fb2c35ab909 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -689,3 +689,6 @@ test_1 ( __bextri_u64, unsigned long long, unsigned long long, 1)
test_2 (_mm_gf2p8affineinv_epi64_epi8, __m128i, __m128i, __m128i, 1)
test_2 (_mm256_gf2p8affineinv_epi64_epi8, __m256i, __m256i, __m256i, 1)
test_2 (_mm512_gf2p8affineinv_epi64_epi8, __m512i, __m512i, __m512i, 1)
+test_2 (_mm_gf2p8affine_epi64_epi8, __m128i, __m128i, __m128i, 1)
+test_2 (_mm256_gf2p8affine_epi64_epi8, __m256i, __m256i, __m256i, 1)
+test_2 (_mm512_gf2p8affine_epi64_epi8, __m512i, __m512i, __m512i, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 911258fa042..66c25c74add 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -626,6 +626,12 @@
#define __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(A, B, 1, D, E)
#define __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(A, B, 1, D, E)
#define __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vgf2p8affineqb_v16qi(A, B, C) __builtin_ia32_vgf2p8affineqb_v16qi(A, B, 1)
+#define __builtin_ia32_vgf2p8affineqb_v32qi(A, B, C) __builtin_ia32_vgf2p8affineqb_v32qi(A, B, 1)
+#define __builtin_ia32_vgf2p8affineqb_v64qi(A, B, C) __builtin_ia32_vgf2p8affineqb_v64qi(A, B, 1)
+#define __builtin_ia32_vgf2p8affineqb_v16qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v16qi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vgf2p8affineqb_v32qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v32qi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vgf2p8affineqb_v64qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v64qi_mask(A, B, 1, D, E)
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni")
diff --git a/gcc/testsuite/gcc.target/i386/stack-check-12.c b/gcc/testsuite/gcc.target/i386/stack-check-12.c
index cb69bb08086..980416946df 100644
--- a/gcc/testsuite/gcc.target/i386/stack-check-12.c
+++ b/gcc/testsuite/gcc.target/i386/stack-check-12.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fstack-clash-protection -mtune=generic" } */
+/* { dg-options "-O2 -fstack-clash-protection -mtune=generic -fomit-frame-pointer" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
__attribute__ ((noreturn)) void exit (int);
diff --git a/gcc/testsuite/gcc.target/mips/pr82981.c b/gcc/testsuite/gcc.target/mips/pr82981.c
new file mode 100644
index 00000000000..677e4cc01e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/pr82981.c
@@ -0,0 +1,13 @@
+/* PR target/82981 */
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
+/* { dg-options "-march=mips64r6 -mabi=64 -mexplicit-relocs" } */
+
+unsigned long
+func (unsigned long a, unsigned long b)
+{
+ return a > (~0UL) / b;
+}
+
+/* { dg-final { scan-assembler-not "__multi3" } } */
+/* { dg-final { scan-assembler "\tdmuhu" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/builtin-vec-sums-be-int.c b/gcc/testsuite/gcc.target/powerpc/builtin-vec-sums-be-int.c
new file mode 100644
index 00000000000..b4dfd0637e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtin-vec-sums-be-int.c
@@ -0,0 +1,16 @@
+/* Test for the __builtin_altivec_vsumsws_be() builtin.
+ It produces just the instruction vsumsws in LE and BE modes. */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-maltivec -O2" } */
+
+#include <altivec.h>
+
+vector signed int
+test_vec_sums (vector signed int vsi2, vector signed int vsi3)
+{
+ return __builtin_altivec_vsumsws_be (vsi2, vsi3);
+}
+
+/* { dg-final { scan-assembler-times "vsumsws" 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c b/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c
index 46a31aeecf5..9dc53da58ad 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-require-effective-target powerpc_p9vector_ok } */
-/* { dg-options "-mcpu=power9" } */
+/* { dg-options "-mcpu=power9 -O1" } */
#include <altivec.h>
@@ -53,19 +53,20 @@ test_vull_bperm_vull_vuc (vector unsigned long long x,
test_ne_short 1 vcmpneh
test_ne_int 1 vcmpnew
test_ne_long 1 vcmpequd, 1 xxlnor inst
- test_nabs_long_long 1 xxspltib, 1 vsubudm, 1 vminsd
test_neg_long_long 1 vnegd
test_vull_bperm_vull_vuc 1 vbpermd
-
+ test_nabs_long_long (-O0) 1 xxspltib, 1 vsubudm, 1 vminsd
+ test_nabs_long_long (-O1) 1 vnegd, vminsd
+*/
/* { dg-final { scan-assembler-times "vcmpneb" 1 } } */
/* { dg-final { scan-assembler-times "vcmpneh" 1 } } */
/* { dg-final { scan-assembler-times "vcmpnew" 1 } } */
/* { dg-final { scan-assembler-times "vcmpequd" 1 } } */
/* { dg-final { scan-assembler-times "xxlnor" 1 } } */
-/* { dg-final { scan-assembler-times "xxspltib" 1 } } */
-/* { dg-final { scan-assembler-times "vsubudm" 1 } } */
+/* { dg-final { scan-assembler-times "xxspltib" 0 } } */
+/* { dg-final { scan-assembler-times "vsubudm" 0 } } */
/* { dg-final { scan-assembler-times "vminsd" 1 } } */
-/* { dg-final { scan-assembler-times "vnegd" 1 } } */
+/* { dg-final { scan-assembler-times "vnegd" 2 } } */
/* { dg-final { scan-assembler-times "vbpermd" 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-6-p9-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-6-p9-runnable.c
new file mode 100644
index 00000000000..9319a372748
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-6-p9-runnable.c
@@ -0,0 +1,1046 @@
+/* { dg-do run { target { powerpc*-*-* && p9vector_hw } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <altivec.h> // vector
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+void abort (void);
+
+int main() {
+ vector signed char char_src1, char_src2;
+ vector unsigned char uchar_src1, uchar_src2;
+ vector signed short int short_src1, short_src2;
+ vector unsigned short int ushort_src1, ushort_src2;
+ vector signed int int_src1, int_src2;
+ vector unsigned int uint_src1, uint_src2;
+ unsigned int result, expected_result;
+
+ /* Tests for: vec_first_match_index() */
+ /* char */
+ char_src1 = (vector signed char) {-1, 2, 3, 4, -5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ char_src2 = (vector signed char) {-1, 2, 3, 20, -5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ expected_result = 0;
+
+ result = vec_first_match_index (char_src1, char_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: char first match result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ char_src1 = (vector signed char) {1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ char_src2 = (vector signed char) {-1, -2, -3, -4, -5, -6, -7, -8,
+ -9, -10, -11, -12, -13, -14, -15, -16};
+ expected_result = 16;
+
+ result = vec_first_match_index (char_src1, char_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: char first match result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uchar_src1 = (vector unsigned char) {0, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ uchar_src2 = (vector unsigned char) {1, 0, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ expected_result = 2;
+
+ result = vec_first_match_index (uchar_src1, uchar_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uchar first match result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uchar_src1 = (vector unsigned char) {2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16, 17};
+ uchar_src2 = (vector unsigned char) {3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18};
+ expected_result = 16;
+
+ result = vec_first_match_index (uchar_src1, uchar_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uchar first match result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ /* short int */
+ short_src1 = (vector short int) {10, -20, -30, 40, 50, 60, 70, 80};
+ short_src2 = (vector short int) {-10, 20, 30, 40, 50, 60, 70, 80};
+
+ expected_result = 3;
+
+ result = vec_first_match_index (short_src1, short_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: short int first match result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+
+ short_src1 = (vector short int) {10, 20, 30, 40, 50, 60, 70, 80};
+ short_src2 = (vector short int) {0, 0, 0, 0, 0, 0, 0, 0};
+
+ expected_result = 8;
+
+ result = vec_first_match_index (short_src1, short_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: short int first match result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ ushort_src1 = (vector short unsigned int) {0, 0, 0, 0, 0, 60, 70, 0};
+ ushort_src2 = (vector short unsigned int) {10, 20, 30, 40, 50, 60, 70, 80};
+
+ expected_result = 5;
+
+ result = vec_first_match_index (ushort_src1, ushort_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: ushort int first match result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ ushort_src1 = (vector short unsigned int) {-20, 30, -40, 50,
+ 60, -70, 80, -90};
+ ushort_src2 = (vector short unsigned int) {20, -30, 40, -50,
+ -60, 70, -80, 90};
+
+ expected_result = 8;
+
+ result = vec_first_match_index (ushort_src1, ushort_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: ushort int first match result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ /* int */
+ int_src1 = (vector int) {1, 2, 3, 4};
+ int_src2 = (vector int) {10, 20, 30, 4};
+
+ expected_result = 3;
+
+ result = vec_first_match_index (int_src1, int_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: int first match result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ int_src1 = (vector int) {1, 2, 3, 4};
+ int_src2 = (vector int) {4, 3, 2, 1};
+
+ expected_result = 4;
+
+ result = vec_first_match_index (int_src1, int_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: int first match result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uint_src1 = (vector unsigned int) {1, 2, 3, 4};
+ uint_src2 = (vector unsigned int) {11, 2, 33, 4};
+
+ expected_result = 1;
+
+ result = vec_first_match_index (uint_src1, uint_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uint first match result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uint_src1 = (vector unsigned int) {1, 2, 3, 4};
+ uint_src2 = (vector unsigned int) {2, 3, 4, 5};
+
+ expected_result = 4;
+
+ result = vec_first_match_index (uint_src1, uint_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uint first match result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ /* Tests for: vec_first_mismatch_index() */
+ /* char */
+ char_src1 = (vector signed char) {-1, 2, 3, 4, -5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ char_src2 = (vector signed char) {-1, 2, 3, 20, -5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ expected_result = 3;
+
+ result = vec_first_mismatch_index (char_src1, char_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: char first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ char_src1 = (vector signed char) {1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ char_src2 = (vector signed char) {1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ expected_result = 16;
+
+ result = vec_first_mismatch_index (char_src1, char_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: char first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uchar_src1 = (vector unsigned char) {1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ uchar_src2 = (vector unsigned char) {1, 0, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ expected_result = 1;
+
+ result = vec_first_mismatch_index (uchar_src1, uchar_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uchar first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uchar_src1 = (vector unsigned char) {2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16};
+ uchar_src2 = (vector unsigned char) {2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 11, 12, 13, 14, 15, 16};
+ expected_result = 8;
+
+ result = vec_first_mismatch_index (uchar_src1, uchar_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uchar first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uchar_src1 = (vector unsigned char) {2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16};
+ uchar_src2 = (vector unsigned char) {2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16};
+ expected_result = 16;
+
+ result = vec_first_mismatch_index (uchar_src1, uchar_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uchar first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ /* short int */
+ short_src1 = (vector short int) {-10, -20, 30, 40, 50, 60, 70, 80};
+ short_src2 = (vector short int) {-10, 20, 30, 40, 50, 60, 70, 80};
+
+ expected_result = 1;
+
+ result = vec_first_mismatch_index (short_src1, short_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: short int first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ short_src1 = (vector short int) {10, 20, 30, 40, 50, 60, 70, 80};
+ short_src2 = (vector short int) {10, 20, 30, 40, 50, 60, 70, 80};
+
+ expected_result = 8;
+
+ result = vec_first_mismatch_index (short_src1, short_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: short int first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ ushort_src1 = (vector short unsigned int) {10, 20, 30, 40, 50, 60, 70, 0};
+ ushort_src2 = (vector short unsigned int) {10, 20, 30, 40, 50, 60, 70, 80};
+
+ expected_result = 7;
+
+ result = vec_first_mismatch_index (ushort_src1, ushort_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: ushort int first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ ushort_src1 = (vector short unsigned int) {20, 30, 40, 50, 60, 70, 80, 90};
+ ushort_src2 = (vector short unsigned int) {20, 30, 40, 50, 60, 70, 80, 90};
+
+ expected_result = 8;
+
+ result = vec_first_mismatch_index (ushort_src1, ushort_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: ushort int first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ /* int */
+ int_src1 = (vector int) {1, 2, 3, 4};
+ int_src2 = (vector int) {1, 20, 3, 4};
+
+ expected_result = 1;
+
+ result = vec_first_mismatch_index (int_src1, int_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: int first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ int_src1 = (vector int) {1, 2, 3, 4};
+ int_src2 = (vector int) {1, 2, 3, 4};
+
+ expected_result = 4;
+
+ result = vec_first_mismatch_index (int_src1, int_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: int first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ int_src1 = (vector int) {1, 0, 3, 4};
+ int_src2 = (vector int) {1, 2, 3, 4};
+
+ expected_result = 1;
+
+ result = vec_first_mismatch_index (int_src1, int_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: int first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uint_src1 = (vector unsigned int) {1, 2, 3, 4};
+ uint_src2 = (vector unsigned int) {11, 2, 33, 4};
+
+ expected_result = 0;
+
+ result = vec_first_mismatch_index (uint_src1, uint_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uint first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uint_src1 = (vector unsigned int) {1, 2, 3, 4};
+ uint_src2 = (vector unsigned int) {1, 2, 3, 4};
+
+ expected_result = 4;
+
+ result = vec_first_mismatch_index (uint_src1, uint_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uint first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ /* Tests for: vec_first_match_or_eos_index() */
+ /* char */
+ char_src1 = (vector signed char) {-1, 2, 3, 4, -5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ char_src2 = (vector signed char) {-1, 2, 3, 20, -5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ expected_result = 0;
+
+ result = vec_first_match_or_eos_index (char_src1, char_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: char first match result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ char_src1 = (vector signed char) {-1, 2, 3, 0, -5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ char_src2 = (vector signed char) {2, 3, 20, 0, -5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ expected_result = 3;
+
+ result = vec_first_match_or_eos_index (char_src1, char_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: char first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ char_src1 = (vector signed char) {1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ char_src2 = (vector signed char) {-1, -2, -3, -4, -5, -6, -7, -8,
+ -9, -10, -11, -12, -13, -14, -15, -16};
+ expected_result = 16;
+
+ result = vec_first_match_or_eos_index (char_src1, char_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: char first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+#endif
+
+ uchar_src1 = (vector unsigned char) {1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ uchar_src2 = (vector unsigned char) {-1, 0, -3, -4, -5, -6, -7, -8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ expected_result = 1;
+
+ result = vec_first_match_or_eos_index (uchar_src1, uchar_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uchar first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uchar_src1 = (vector unsigned char) {2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16, 17};
+ uchar_src2 = (vector unsigned char) {3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18};
+ expected_result = 16;
+
+ result = vec_first_match_or_eos_index (uchar_src1, uchar_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uchar first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ /* short int */
+ short_src1 = (vector short int) {10, -20, -30, 40, 50, 60, 70, 80};
+ short_src2 = (vector short int) {-10, 20, 30, 40, 50, 60, 70, 80};
+
+ expected_result = 3;
+
+ result = vec_first_match_or_eos_index (short_src1, short_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: short int first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ short_src1 = (vector short int) {1, 20, 30, 40, 50, 60, 70, 80};
+
+ short_src2 = (vector short int) {10, 0, 30, 40, 50, 60, 70, 80};
+
+ expected_result = 1;
+
+ result = vec_first_match_or_eos_index (short_src1, short_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: short int first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ short_src1 = (vector short int) {-10, -20, -30, -40, -50, -60, -70, -80};
+
+ short_src2 = (vector short int) {10, 20, 30, 40, 50, 0, 70, 80};
+
+ expected_result = 5;
+
+ result = vec_first_match_or_eos_index (short_src1, short_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: short int first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ short_src1 = (vector short int) {10, 20, 30, 40, 50, 60, 70, 80};
+ short_src2 = (vector short int) {0, 0, 0, 0, 0, 0, 0, 0};
+
+ expected_result = 0;
+
+ result = vec_first_match_or_eos_index (short_src1, short_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: short int first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ ushort_src1 = (vector short unsigned int) {1, 2, 0, 0, 60, 70, 0};
+ ushort_src2 = (vector short unsigned int) {10, 20, 30, 40, 50, 60, 70, 80};
+
+ expected_result = 2;
+
+ result = vec_first_match_or_eos_index (ushort_src1, ushort_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: ushort int first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ ushort_src1 = (vector short unsigned int) {-20, 30, -40, 50,
+ 60, -70, 80, -90};
+ ushort_src2 = (vector short unsigned int) {20, -30, 40, -50,
+ -60, 70, -80, 90};
+
+ expected_result = 8;
+
+ result = vec_first_match_or_eos_index (ushort_src1, ushort_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: ushort int first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+ ushort_src1 = (vector short unsigned int) {-20, 30, -40, 50,
+ 60, -70, 80, 0};
+
+ ushort_src2 = (vector short unsigned int) {20, -30, 40, -50,
+ -60, 70, -80, 90};
+
+ expected_result = 7;
+
+ result = vec_first_match_or_eos_index (ushort_src1, ushort_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: ushort int first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ /* int */
+ int_src1 = (vector int) {1, 2, 3, 4};
+ int_src2 = (vector int) {10, 20, 30, 4};
+
+ expected_result = 3;
+
+ result = vec_first_match_or_eos_index (int_src1, int_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: int first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ int_src1 = (vector int) {0, 2, 3, 4};
+ int_src2 = (vector int) {4, 3, 2, 1};
+
+ expected_result = 0;
+
+ result = vec_first_match_or_eos_index (int_src1, int_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: int first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+
+ int_src1 = (vector int) {1, 2, 3, 4};
+ int_src2 = (vector int) {4, 3, 2, 1};
+
+ expected_result = 4;
+
+ result = vec_first_match_or_eos_index (int_src1, int_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: int first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uint_src1 = (vector unsigned int) {1, 2, 3, 4};
+ uint_src2 = (vector unsigned int) {11, 2, 33, 4};
+
+ expected_result = 1;
+
+ result = vec_first_match_or_eos_index (uint_src1, uint_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uint first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uint_src1 = (vector unsigned int) {1, 2, 0, 4};
+ uint_src2 = (vector unsigned int) {2, 3, 4, 5};
+
+ expected_result = 2;
+
+ result = vec_first_match_or_eos_index (uint_src1, uint_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uint first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uint_src1 = (vector unsigned int) {1, 2, 3, 4};
+ uint_src2 = (vector unsigned int) {2, 3, 4, 5};
+
+ expected_result = 4;
+
+ result = vec_first_match_or_eos_index (uint_src1, uint_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uint first match or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ /* Tests for: vec_first_mismatch_or_eos_index() */
+ /* char */
+ char_src1 = (vector signed char) {-1, 2, 3, 4, -5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ char_src2 = (vector signed char) {-1, 2, 3, 20, -5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ expected_result = 3;
+
+ result = vec_first_mismatch_or_eos_index (char_src1, char_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: char first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ char_src1 = (vector signed char) {1, 2, 0, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ char_src2 = (vector signed char) {1, 2, 0, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ expected_result = 2;
+
+ result = vec_first_mismatch_or_eos_index (char_src1, char_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: char first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ char_src1 = (vector signed char) {1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ char_src2 = (vector signed char) {1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ expected_result = 16;
+
+ result = vec_first_mismatch_or_eos_index (char_src1, char_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: char first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uchar_src1 = (vector unsigned char) {1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ uchar_src2 = (vector unsigned char) {1, 0, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ expected_result = 1;
+
+ result = vec_first_mismatch_or_eos_index (uchar_src1, uchar_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uchar first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uchar_src1 = (vector unsigned char) {2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 11, 12, 13, 14, 15, 16};
+ uchar_src2 = (vector unsigned char) {2, 3, 4, 5, 6, 7, 8, 9,
+ 0, 11, 12, 13, 14, 15, 16};
+ expected_result = 8;
+
+ result = vec_first_mismatch_or_eos_index (uchar_src1, uchar_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uchar first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uchar_src1 = (vector unsigned char) {2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16, 17};
+ uchar_src2 = (vector unsigned char) {2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 0, 16, 17};
+ expected_result = 13;
+
+ result = vec_first_mismatch_or_eos_index (uchar_src1, uchar_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uchar first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uchar_src1 = (vector unsigned char) {2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16, 17};
+ uchar_src2 = (vector unsigned char) {2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16, 17};
+ expected_result = 16;
+
+ result = vec_first_mismatch_or_eos_index (uchar_src1, uchar_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uchar first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ /* short int */
+ short_src1 = (vector short int) {-10, -20, 30, 40, 50, 60, 70, 80};
+ short_src2 = (vector short int) {-10, 20, 30, 40, 50, 60, 70, 80};
+
+ expected_result = 1;
+
+ result = vec_first_mismatch_or_eos_index (short_src1, short_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: short int first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ short_src1 = (vector short int) {0, 20, 30, 40, 50, 60, 70, 80};
+ short_src2 = (vector short int) {0, 20, 30, 40, 50, 60, 70, 80};
+
+ expected_result = 0;
+
+ result = vec_first_mismatch_or_eos_index (short_src1, short_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: short int first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ short_src1 = (vector short int) {10, 20, 30, 40, 50, 60, 70, 80};
+ short_src2 = (vector short int) {10, 20, 30, 40, 50, 60, 70, 80};
+
+ expected_result = 8;
+
+ result = vec_first_mismatch_or_eos_index (short_src1, short_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: short int first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ short_src1 = (vector short int) {10, 0, 30, 40, 50, 60, 70, 80};
+ short_src2 = (vector short int) {10, 0, 30, 40, 50, 60, 70, 80};
+
+ expected_result = 1;
+
+ result = vec_first_mismatch_or_eos_index (short_src1, short_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: short int first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ ushort_src1 = (vector short unsigned int) {10, 20, 30, 40, 50, 60, 70, 0};
+ ushort_src2 = (vector short unsigned int) {10, 20, 30, 40, 50, 60, 70, 80};
+
+ expected_result = 7;
+
+ result = vec_first_mismatch_or_eos_index (ushort_src1, ushort_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: ushort int first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ ushort_src1 = (vector short unsigned int) {20, 0, 40, 50, 60, 70, 80, 90};
+ ushort_src2 = (vector short unsigned int) {20, 0, 40, 50, 60, 70, 80, 90};
+
+ expected_result = 1;
+
+ result = vec_first_mismatch_or_eos_index (ushort_src1, ushort_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: ushort int first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ ushort_src1 = (vector short unsigned int) {20, 30, 40, 50, 60, 70, 80, 90};
+ ushort_src2 = (vector short unsigned int) {20, 30, 40, 50, 60, 70, 80, 90};
+
+ expected_result = 8;
+
+ result = vec_first_mismatch_or_eos_index (ushort_src1, ushort_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: ushort int first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ /* int */
+ int_src1 = (vector int) {1, 2, 3, 4};
+ int_src2 = (vector int) {1, 20, 3, 4};
+
+ expected_result = 1;
+
+ result = vec_first_mismatch_or_eos_index (int_src1, int_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: int first mismatch or EOS result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ int_src1 = (vector int) {1, 2, 3, 4};
+ int_src2 = (vector int) {1, 2, 3, 4};
+
+ expected_result = 4;
+
+ result = vec_first_mismatch_or_eos_index (int_src1, int_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: int first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ int_src1 = (vector int) {1, 2, 0, 4};
+ int_src2 = (vector int) {1, 2, 0, 4};
+
+ expected_result = 2;
+
+ result = vec_first_mismatch_or_eos_index (int_src1, int_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: int first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ int_src1 = (vector int) {1, 0, 3, 4};
+ int_src2 = (vector int) {1, 2, 3, 4};
+
+ expected_result = 1;
+
+ result = vec_first_mismatch_or_eos_index (int_src1, int_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: int first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uint_src1 = (vector unsigned int) {1, 2, 3, 4};
+ uint_src2 = (vector unsigned int) {11, 2, 33, 4};
+
+ expected_result = 0;
+
+ result = vec_first_mismatch_or_eos_index (uint_src1, uint_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uint first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uint_src1 = (vector unsigned int) {1, 2, 3, 0};
+ uint_src2 = (vector unsigned int) {1, 2, 3, 0};
+
+ expected_result = 3;
+
+ result = vec_first_mismatch_or_eos_index (uint_src1, uint_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uint first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+ uint_src1 = (vector unsigned int) {1, 2, 3, 4};
+ uint_src2 = (vector unsigned int) {1, 2, 3, 4};
+
+ expected_result = 4;
+
+ result = vec_first_mismatch_or_eos_index (uint_src1, uint_src2);
+
+ if (result != expected_result)
+#ifdef DEBUG
+ printf("Error: uint first mismatch result (%d) does not match expected result (%d)\n",
+ result, expected_result);
+#else
+ abort();
+#endif
+
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-revb-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-revb-runnable.c
new file mode 100644
index 00000000000..b6ffa238221
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-revb-runnable.c
@@ -0,0 +1,342 @@
+/* { dg-do run { target { powerpc*-*-* && { lp64 && p8vector_hw } } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O3" } */
+
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+void abort (void);
+
+/* Verify vec_revb builtins */
+
+int
+main()
+{
+ int i;
+ vector bool char arg_bc, result_bc, expected_bc;
+ vector unsigned char arg_uc, result_uc, expected_uc;
+ vector signed char arg_sc, result_sc, expected_sc;
+
+ vector bool short int arg_bsi, result_bsi, expected_bsi;
+ vector unsigned short int arg_usi, result_usi, expected_usi;
+ vector short int arg_si, result_si, expected_si;
+
+ vector bool int arg_bi, result_bi, expected_bi;
+ vector unsigned int arg_ui, result_ui, expected_ui;
+ vector int arg_int, result_int, expected_int;
+
+ vector bool long long int arg_blli, result_blli, expected_blli;
+ vector unsigned long long int arg_ulli, result_ulli, expected_ulli;
+ vector long long int arg_lli, result_lli, expected_lli;
+
+ vector __uint128_t arg_uint128, result_uint128, expected_uint128;
+ vector __int128_t arg_int128, result_int128, expected_int128;
+
+ vector float arg_f, result_f, expected_f;
+ vector double arg_d, result_d, expected_d;
+
+ /* 8-bit ints */
+ /* The element is a byte. Reversing the byte in each byte element
+ gives the same value. */
+ arg_bc = (vector bool char) {0x01, 0x23, 0x45, 0x67,
+ 0x7E, 0x7C, 0x7A, 0x78,
+ 0x02, 0x46, 0x7A, 0x7E,
+ 0x13, 0x57, 0x7B, 0x7F};
+ expected_bc = arg_bc;
+
+ result_bc = vec_revb (arg_bc);
+
+ for (i = 0; i < 16; i++) {
+ if (result_bc[i] != expected_bc[i])
+#ifdef DEBUG
+ printf("arg_bc[%d] = 0x%x, result_bc[%d] = 0x%x, expected_bc[%d] = 0x%x\n",
+ i, arg_bc[i], i, result_bc[i], i, expected_bc[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_uc = (vector unsigned char) {0x01, 0x23, 0x45, 0x67,
+ 0x7E, 0x7C, 0x7A, 0x78,
+ 0x02, 0x46, 0x7A, 0x7E,
+ 0x13, 0x57, 0x7B, 0x7F};
+ expected_uc = arg_uc;
+
+ result_uc = vec_revb (arg_uc);
+
+ for (i = 0; i < 16; i++) {
+ if (result_uc[i] != expected_uc[i])
+#ifdef DEBUG
+ printf("arg_uc[%d] = 0x%x, result_uc[%d] = 0x%x, expected_uc[%d] = 0x%x\n",
+ i, arg_uc[i], i, result_uc[i], i, expected_uc[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_sc = (vector signed char) {0x01, 0x23, 0x45, 0x67,
+ 0x7E, 0x7C, 0x7A, 0x78,
+ 0x02, 0x46, 0x7A, 0x7E,
+ 0x13, 0x57, 0x7B, 0x7F};
+ expected_sc = arg_sc;
+
+ result_sc = vec_revb (arg_sc);
+
+ for (i = 0; i < 16; i++) {
+ if (result_sc[i] != expected_sc[i])
+#ifdef DEBUG
+ printf("arg_sc[%d] = 0x%x, result_sc[%d] = 0x%x, expected_sc[%d] = 0x%x\n",
+ i, arg_sc[i], i, result_sc[i], i, expected_sc[i]);
+#else
+ abort();
+#endif
+ }
+
+ /* 16-bit ints */
+ arg_bsi = (vector bool short int) {0x0123, 0x4567, 0xFEDC, 0xBA98, 0x0246,
+ 0x8ACE, 0x1357, 0x9BDF};
+ expected_bsi = (vector bool short int) {0x2301, 0x6745, 0xDCFE, 0x98BA,
+ 0x4602, 0xCE8A, 0x5713, 0xDF9B};
+
+ result_bsi = vec_revb (arg_bsi);
+
+ for (i = 0; i < 8; i++) {
+ if (result_bsi[i] != expected_bsi[i])
+#ifdef DEBUG
+ printf("arg_bsi[%d] = 0x%x, result_bsi[%d] = 0x%x, expected_bsi[%d] = 0x%x\n",
+ i, arg_bsi[i], i, result_bsi[i], i, expected_bsi[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_usi = (vector unsigned short int) {0x0123, 0x4567, 0xFEDC, 0xBA98,
+ 0x0246, 0x8ACE, 0x1357, 0x9BDF};
+ expected_usi = (vector unsigned short int) {0x2301, 0x6745, 0xDCFE, 0x98BA,
+ 0x4602, 0xCE8A, 0x5713, 0xDF9B};
+
+ result_usi = vec_revb (arg_usi);
+
+ for (i = 0; i < 8; i++) {
+ if (result_usi[i] != expected_usi[i])
+#ifdef DEBUG
+ printf("arg_usi[%d] = 0x%x, result_usi[%d] = 0x%x, expected_usi[%d] = 0x%x\n",
+ i, arg_usi[i], i, result_usi[i], i, expected_usi[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_si = (vector short int) {0x0123, 0x4567, 0xFEDC, 0xBA98, 0x0246, 0x8ACE,
+ 0x1357, 0x9BDF};
+ expected_si = (vector short int) {0x2301, 0x6745, 0xDCFE, 0x98BA, 0x4602,
+ 0xCE8A, 0x5713, 0xDF9B};
+
+ result_si = vec_revb (arg_si);
+
+ for (i = 0; i < 8; i++) {
+ if (result_si[i] != expected_si[i])
+#ifdef DEBUG
+ printf("arg_si[%d] = 0x%x, result_si[%d] = 0x%x, expected_si[%d] = 0x%x\n",
+ i, arg_si[i], i, result_si[i], i, expected_si[i]);
+#else
+ abort();
+#endif
+ }
+
+ /* 32-bit ints */
+ arg_bi = (vector bool int) {0x01234567, 0xFEDCBA98, 0x02468ACE, 0x13579BDF};
+ expected_bi = (vector bool int) {0x67452301, 0x98BADCFE, 0xCE8A4602,
+ 0xDF9B5713};
+
+ result_bi = vec_revb (arg_bi);
+
+ for (i = 0; i < 4; i++) {
+ if (result_bi[i] != expected_bi[i])
+#ifdef DEBUG
+ printf("arg_bi[%d] = 0x%x, result_bi[%d] = 0x%x, expected_bi[%d] = 0x%x\n",
+ i, arg_bi[i], i, result_bi[i], i, expected_bi[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_ui = (vector unsigned int) {0x01234567, 0xFEDCBA98, 0x02468ACE,
+ 0x13579BDF};
+ expected_ui = (vector unsigned int) {0x67452301, 0x98BADCFE, 0xCE8A4602,
+ 0xDF9B5713};
+
+ result_ui = vec_revb (arg_ui);
+
+ for (i = 0; i < 4; i++) {
+ if (result_ui[i] != expected_ui[i])
+#ifdef DEBUG
+ printf("arg_ui[%d] = 0x%x, result_ui[%d] = 0x%x, expected_ui[%d] = 0x%x\n",
+ i, arg_ui[i], i, result_ui[i], i, expected_ui[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_int = (vector int) {0x01234567, 0xFEDCBA98, 0x02468ACE, 0x13579BDF};
+ expected_int = (vector int) {0x67452301, 0x98BADCFE, 0xCE8A4602, 0xDF9B5713};
+
+ result_int = vec_revb (arg_int);
+
+ for (i = 0; i < 4; i++) {
+ if (result_int[i] != expected_int[i])
+#ifdef DEBUG
+ printf("arg_int[%d] = 0x%x, result_int[%d] = 0x%x, expected_int[%d] = 0x%x\n",
+ i, arg_int[i], i, result_int[i], i, expected_int[i]);
+#else
+ abort();
+#endif
+ }
+
+ /* 64-bit ints */
+ arg_blli = (vector bool long long int) {0x01234567FEDCBA98,
+ 0x02468ACE13579BDF};
+ expected_blli = (vector bool long long int) {0x98BADCFE67452301,
+ 0xDF9B5713CE8A4602};
+
+ result_blli = vec_revb (arg_blli);
+
+ for (i = 0; i < 2; i++) {
+ if (result_blli[i] != expected_blli[i])
+#ifdef DEBUG
+ printf("arg_blli[%d] = 0x%x, result_blli[%d] = 0x%llx, expected_blli[%d] = 0x%llx\n",
+ i, arg_blli[i], i, result_blli[i], i, expected_blli[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_ulli = (vector unsigned long long int) {0x01234567FEDCBA98,
+ 0x02468ACE13579BDF};
+ expected_ulli = (vector unsigned long long int) {0x98BADCFE67452301,
+ 0xDF9B5713CE8A4602};
+
+ result_ulli = vec_revb (arg_ulli);
+
+ for (i = 0; i < 2; i++) {
+ if (result_ulli[i] != expected_ulli[i])
+#ifdef DEBUG
+ printf("arg_ulli[%d] = 0x%x, result_ulli[%d] = 0x%llx, expected_ulli[%d] = 0x%llx\n",
+ i, arg_ulli[i], i, result_ulli[i], i, expected_ulli[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_lli = (vector long long int) {0x01234567FEDCBA98, 0x02468ACE13579BDF};
+ expected_lli = (vector long long int) {0x98BADCFE67452301,
+ 0xDF9B5713CE8A4602};
+
+ result_lli = vec_revb (arg_lli);
+
+ for (i = 0; i < 2; i++) {
+ if (result_lli[i] != expected_lli[i])
+#ifdef DEBUG
+ printf("arg_lli[%d] = 0x%x, result_lli[%d] = 0x%llx, expected_lli[%d] = 0x%llx\n",
+ i, arg_lli[i], i, result_lli[i], i, expected_lli[i]);
+#else
+ abort();
+#endif
+ }
+
+ /* 128-bit ints */
+ arg_uint128[0] = 0x1627384950617243;
+ arg_uint128[0] = arg_uint128[0] << 64;
+ arg_uint128[0] |= 0x9405182930415263;
+ expected_uint128[0] = 0x6352413029180594;
+ expected_uint128[0] = expected_uint128[0] << 64;
+ expected_uint128[0] |= 0x4372615049382716;
+
+ result_uint128 = vec_revb (arg_uint128);
+
+ if (result_uint128[0] != expected_uint128[0])
+ {
+#ifdef DEBUG
+ printf("result_uint128[0] doesn't match expected_u128[0]\n");
+ printf("arg_uint128[0] = %llx ", arg_uint128[0] >> 64);
+ printf(" %llx\n", arg_uint128[0] & 0xFFFFFFFFFFFFFFFF);
+
+ printf("result_uint128[0] = %llx ", result_uint128[0] >> 64);
+ printf(" %llx\n", result_uint128[0] & 0xFFFFFFFFFFFFFFFF);
+
+ printf("expected_uint128[0] = %llx ", expected_uint128[0] >> 64);
+ printf(" %llx\n", expected_uint128[0] & 0xFFFFFFFFFFFFFFFF);
+#else
+ abort();
+#endif
+ }
+
+ arg_int128[0] = 0x1627384950617283;
+ arg_int128[0] = arg_int128[0] << 64;
+ arg_int128[0] |= 0x9405182930415263;
+ expected_int128[0] = 0x6352413029180594;
+ expected_int128[0] = expected_int128[0] << 64;
+ expected_int128[0] |= 0x8372615049382716;;
+
+ result_int128 = vec_revb (arg_int128);
+
+ if (result_int128[0] != expected_int128[0])
+ {
+#ifdef DEBUG
+ printf("result_int128[0] doesn't match expected128[0]\n");
+ printf("arg_int128[0] = %llx ", arg_int128[0] >> 64);
+ printf(" %llx\n", arg_int128[0] & 0xFFFFFFFFFFFFFFFF);
+
+ printf("result_int128[0] = %llx ", result_int128[0] >> 64);
+ printf(" %llx\n", result_int128[0] & 0xFFFFFFFFFFFFFFFF);
+
+ printf("expected_int128[0] = %llx ", expected_int128[0] >> 64);
+ printf(" %llx\n", expected_int128[0] & 0xFFFFFFFFFFFFFFFF);
+#else
+ abort();
+#endif
+ }
+
+ /* 32-bit floats */
+ /* 0x42f7224e, 0x43e471ec, 0x49712062, 0x4a0f2b38 */
+ arg_f = (vector float) {123.567, 456.89, 987654.123456, 2345678.0};
+ /* 0x4e22F742, 0xec71e443, 0x62207149, 0x382b0f4a */
+ expected_f = (vector float) {683528320.0,
+ -1169716232068291395011477504.0,
+ 739910526898278498304.0,
+ 0.0000407838160754181444644927978515625};
+
+ result_f = vec_revb (arg_f);
+
+ for (i = 0; i < 4; i++) {
+ if (result_f[i] != expected_f[i])
+#ifdef DEBUG
+ printf(" arg_f[%d] = %f, result_f[%d] = %f, expected_f[%d] = %f\n",
+ i, arg_f[i], i, result_f[i], i, expected_f[i]);
+#else
+ abort();
+#endif
+ }
+
+ /* 64-bit floats */
+ /* 0x419D6F34547E6B75 0x4194E5FEC781948B */
+ arg_d = (vector double) {123456789.123456789, 87654321.87654321};
+ /* 0x756B7E54346F9D41 0x8B9481C7FEE59441 */
+ expected_d = (vector double) {4.12815412905659550518671402044E257,
+ -6.99269992046390236552018719554E-253};
+
+ result_d = vec_revb (arg_d);
+
+ for (i = 0; i < 2; i++) {
+ if (result_d[i] != expected_d[i])
+#ifdef DEBUG
+ printf("arg_d[%d] = %f, result_d[%d] = %f, expected_d[%d] = %f\n",
+ i, arg_d[i], i, result_d[i], i, expected_d[i]);
+#else
+ abort();
+#endif
+ }
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-hw4.c b/gcc/testsuite/gcc.target/powerpc/float128-hw4.c
new file mode 100644
index 00000000000..be5d0d6eef4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-hw4.c
@@ -0,0 +1,135 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -O2 -mabi=ieeelongdouble -Wno-psabi" } */
+
+/* Insure that the ISA 3.0 IEEE 128-bit floating point built-in functions can
+ be used with long double when the default is IEEE 128-bit. */
+
+#ifndef TYPE
+#define TYPE long double
+#endif
+
+unsigned int
+get_double_exponent (double a)
+{
+ return __builtin_vec_scalar_extract_exp (a);
+}
+
+unsigned int
+get_float128_exponent (TYPE a)
+{
+ return __builtin_vec_scalar_extract_exp (a);
+}
+
+unsigned long
+get_double_mantissa (double a)
+{
+ return __builtin_vec_scalar_extract_sig (a);
+}
+
+__uint128_t
+get_float128_mantissa (TYPE a)
+{
+ return __builtin_vec_scalar_extract_sig (a);
+}
+
+double
+set_double_exponent_ulong (unsigned long a, unsigned long e)
+{
+ return __builtin_vec_scalar_insert_exp (a, e);
+}
+
+TYPE
+set_float128_exponent_uint128 (__uint128_t a, unsigned long e)
+{
+ return __builtin_vec_scalar_insert_exp (a, e);
+}
+
+double
+set_double_exponent_double (double a, unsigned long e)
+{
+ return __builtin_vec_scalar_insert_exp (a, e);
+}
+
+TYPE
+set_float128_exponent_float128 (TYPE a, __uint128_t e)
+{
+ return __builtin_vec_scalar_insert_exp (a, e);
+}
+
+TYPE
+sqrt_odd (TYPE a)
+{
+ return __builtin_sqrtf128_round_to_odd (a);
+}
+
+double
+trunc_odd (TYPE a)
+{
+ return __builtin_truncf128_round_to_odd (a);
+}
+
+TYPE
+add_odd (TYPE a, TYPE b)
+{
+ return __builtin_addf128_round_to_odd (a, b);
+}
+
+TYPE
+sub_odd (TYPE a, TYPE b)
+{
+ return __builtin_subf128_round_to_odd (a, b);
+}
+
+TYPE
+mul_odd (TYPE a, TYPE b)
+{
+ return __builtin_mulf128_round_to_odd (a, b);
+}
+
+TYPE
+div_odd (TYPE a, TYPE b)
+{
+ return __builtin_divf128_round_to_odd (a, b);
+}
+
+TYPE
+fma_odd (TYPE a, TYPE b, TYPE c)
+{
+ return __builtin_fmaf128_round_to_odd (a, b, c);
+}
+
+TYPE
+fms_odd (TYPE a, TYPE b, TYPE c)
+{
+ return __builtin_fmaf128_round_to_odd (a, b, -c);
+}
+
+TYPE
+nfma_odd (TYPE a, TYPE b, TYPE c)
+{
+ return -__builtin_fmaf128_round_to_odd (a, b, c);
+}
+
+TYPE
+nfms_odd (TYPE a, TYPE b, TYPE c)
+{
+ return -__builtin_fmaf128_round_to_odd (a, b, -c);
+}
+
+/* { dg-final { scan-assembler {\mxsiexpdp\M} } } */
+/* { dg-final { scan-assembler {\mxsiexpqp\M} } } */
+/* { dg-final { scan-assembler {\mxsxexpdp\M} } } */
+/* { dg-final { scan-assembler {\mxsxexpqp\M} } } */
+/* { dg-final { scan-assembler {\mxsxsigdp\M} } } */
+/* { dg-final { scan-assembler {\mxsxsigqp\M} } } */
+/* { dg-final { scan-assembler {\mxsaddqpo\M} } } */
+/* { dg-final { scan-assembler {\mxsdivqpo\M} } } */
+/* { dg-final { scan-assembler {\mxsmaddqpo\M} } } */
+/* { dg-final { scan-assembler {\mxsmsubqpo\M} } } */
+/* { dg-final { scan-assembler {\mxsmulqpo\M} } } */
+/* { dg-final { scan-assembler {\mxsnmaddqpo\M} } } */
+/* { dg-final { scan-assembler {\mxsnmsubqpo\M} } } */
+/* { dg-final { scan-assembler {\mxssqrtqpo\M} } } */
+/* { dg-final { scan-assembler {\mxssubqpo\M} } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-minmax.c b/gcc/testsuite/gcc.target/powerpc/float128-minmax.c
new file mode 100644
index 00000000000..f8b025d66fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-minmax.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -O2 -ffast-math" } */
+
+#ifndef TYPE
+#define TYPE _Float128
+#endif
+
+/* Test that the fminf128/fmaxf128 functions generate if/then/else and not a
+ call. */
+TYPE f128_min (TYPE a, TYPE b) { return __builtin_fminf128 (a, b); }
+TYPE f128_max (TYPE a, TYPE b) { return __builtin_fmaxf128 (a, b); }
+
+/* { dg-final { scan-assembler {\mxscmpuqp\M} } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-xxbr-1.c b/gcc/testsuite/gcc.target/powerpc/p9-xxbr-1.c
index 164f11f6ea3..7a07d0f8f2a 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-xxbr-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-xxbr-1.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { powerpc64*-*-* } } } */
+/* { dg-do compile { target { powerpc*-*-* && { lp64 && p9vector_hw } } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
/* { dg-require-effective-target powerpc_p9vector_ok } */
/* { dg-options "-mcpu=power9 -O3" } */
@@ -10,25 +10,25 @@
vector char
rev_char (vector char a)
{
- return vec_revb (a); /* XXBRQ. */
+ return vec_revb (a); /* Is a NOP, maps to move inst */
}
vector bool char
rev_bool_char (vector bool char a)
{
- return vec_revb (a); /* XXBRQ. */
+ return vec_revb (a); /* Is a NOP, maps to move inst */
}
vector signed char
rev_schar (vector signed char a)
{
- return vec_revb (a); /* XXBRQ. */
+ return vec_revb (a); /* Is a NOP, maps to move inst */
}
vector unsigned char
rev_uchar (vector unsigned char a)
{
- return vec_revb (a); /* XXBRQ. */
+ return vec_revb (a); /* Is a NOP, maps to move inst */
}
vector short
@@ -81,5 +81,4 @@ rev_double (vector double a)
/* { dg-final { scan-assembler-times "xxbrd" 1 } } */
/* { dg-final { scan-assembler-times "xxbrh" 3 } } */
-/* { dg-final { scan-assembler-times "xxbrq" 4 } } */
/* { dg-final { scan-assembler-times "xxbrw" 4 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-xxbr-3.c b/gcc/testsuite/gcc.target/powerpc/p9-xxbr-3.c
new file mode 100644
index 00000000000..98ad7ebfd87
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p9-xxbr-3.c
@@ -0,0 +1,99 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -O2" } */
+
+/* Verify that the XXBR{H,W} instructions are generated if the value is
+ forced to be in a vector register, and XXBRD is generated all of the
+ time for register bswap64's. */
+
+unsigned short
+do_bswap16_mem (unsigned short *p)
+{
+ return __builtin_bswap16 (*p); /* LHBRX. */
+}
+
+unsigned short
+do_bswap16_reg (unsigned short a)
+{
+ return __builtin_bswap16 (a); /* gpr sequences. */
+}
+
+void
+do_bswap16_store (unsigned short *p, unsigned short a)
+{
+ *p = __builtin_bswap16 (a); /* STHBRX. */
+}
+
+unsigned short
+do_bswap16_vect (unsigned short a)
+{
+ __asm__ (" # %x0" : "+v" (a));
+ return __builtin_bswap16 (a); /* XXBRW. */
+}
+
+unsigned int
+do_bswap32_mem (unsigned int *p)
+{
+ return __builtin_bswap32 (*p); /* LWBRX. */
+}
+
+unsigned int
+do_bswap32_reg (unsigned int a)
+{
+ return __builtin_bswap32 (a); /* gpr sequences. */
+}
+
+void
+do_bswap32_store (unsigned int *p, unsigned int a)
+{
+ *p = __builtin_bswap32 (a); /* STWBRX. */
+}
+
+unsigned int
+do_bswap32_vect (unsigned int a)
+{
+ __asm__ (" # %x0" : "+v" (a));
+ return __builtin_bswap32 (a); /* XXBRW. */
+}
+
+unsigned long
+do_bswap64_mem (unsigned long *p)
+{
+ return __builtin_bswap64 (*p); /* LDBRX. */
+}
+
+unsigned long
+do_bswap64_reg (unsigned long a)
+{
+ return __builtin_bswap64 (a); /* gpr sequences. */
+}
+
+void
+do_bswap64_store (unsigned long *p, unsigned int a)
+{
+ *p = __builtin_bswap64 (a); /* STDBRX. */
+}
+
+double
+do_bswap64_double (unsigned long a)
+{
+ return (double) __builtin_bswap64 (a); /* XXBRD. */
+}
+
+unsigned long
+do_bswap64_vect (unsigned long a)
+{
+ __asm__ (" # %x0" : "+v" (a)); /* XXBRD. */
+ return __builtin_bswap64 (a);
+}
+
+/* Make sure XXBR{H,W,D} is not generated by default. */
+/* { dg-final { scan-assembler-times "xxbrd" 3 } } */
+/* { dg-final { scan-assembler-times "xxbrh" 1 } } */
+/* { dg-final { scan-assembler-times "xxbrw" 1 } } */
+/* { dg-final { scan-assembler-times "ldbrx" 1 } } */
+/* { dg-final { scan-assembler-times "lhbrx" 1 } } */
+/* { dg-final { scan-assembler-times "lwbrx" 1 } } */
+/* { dg-final { scan-assembler-times "stdbrx" 1 } } */
+/* { dg-final { scan-assembler-times "sthbrx" 1 } } */
+/* { dg-final { scan-assembler-times "stwbrx" 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr82748-1.c b/gcc/testsuite/gcc.target/powerpc/pr82748-1.c
new file mode 100644
index 00000000000..15a746bcf63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr82748-1.c
@@ -0,0 +1,82 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -O2 -mabi=ieeelongdouble -Wno-psabi" } */
+
+/* Make sure the old 'q' builtin functions work correctly when the long double
+ default has been changed to be IEEE 128-bit floating point. */
+
+_Float128
+do_fabs_f (_Float128 a)
+{
+ return __builtin_fabsq (a);
+}
+
+_Float128
+do_copysign_f (_Float128 a, _Float128 b)
+{
+ return __builtin_copysignq (a, b);
+}
+
+_Float128
+do_inf_f (void)
+{
+ return __builtin_infq ();
+}
+
+_Float128
+do_nan_f (void)
+{
+ return __builtin_nanq ("");
+}
+
+_Float128
+do_nans_f (void)
+{
+ return __builtin_nansq ("");
+}
+
+_Float128
+do_huge_val_f (void)
+{
+ return __builtin_huge_valq ();
+}
+
+long double
+do_fabs_ld (long double a)
+{
+ return __builtin_fabsq (a);
+}
+
+long double
+do_copysign_ld (long double a, long double b)
+{
+ return __builtin_copysignq (a, b);
+}
+
+long double
+do_inf_ld (void)
+{
+ return __builtin_infq ();
+}
+
+long double
+do_nan_ld (void)
+{
+ return __builtin_nanq ("");
+}
+
+long double
+do_nans_ld (void)
+{
+ return __builtin_nansq ("");
+}
+
+long double
+do_huge_val_ld (void)
+{
+ return __builtin_huge_valq ();
+}
+
+/* { dg-final { scan-assembler {\mxsabsqp\M} } } */
+/* { dg-final { scan-assembler {\mxscpsgnqp\M} } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr82748-2.c b/gcc/testsuite/gcc.target/powerpc/pr82748-2.c
new file mode 100644
index 00000000000..0079394b101
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr82748-2.c
@@ -0,0 +1,46 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -O2 -mabi=ibmlongdouble -Wno-psabi" } */
+
+/* Make sure the old 'q' builtin functions work correctly when the long double
+ default uses the IBM double-double format. */
+
+_Float128
+do_fabs (_Float128 a)
+{
+ return __builtin_fabsq (a);
+}
+
+_Float128
+do_copysign (_Float128 a, _Float128 b)
+{
+ return __builtin_copysignq (a, b);
+}
+
+_Float128
+do_inf (void)
+{
+ return __builtin_infq ();
+}
+
+_Float128
+do_nan (void)
+{
+ return __builtin_nanq ("");
+}
+
+_Float128
+do_nans (void)
+{
+ return __builtin_nansq ("");
+}
+
+_Float128
+do_huge_val (void)
+{
+ return __builtin_huge_valq ();
+}
+
+/* { dg-final { scan-assembler {\mxsabsqp\M} } } */
+/* { dg-final { scan-assembler {\mxscpsgnqp\M} } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/sad-vectorize-1.c b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-1.c
new file mode 100644
index 00000000000..b122bf5ce3e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-1.c
@@ -0,0 +1,36 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "" { powerpc*-*-aix* } } */
+/* { dg-options "-O3 -mcpu=power9" } */
+
+/* Verify that we vectorize this SAD loop using vabsdub. */
+
+extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__));
+
+static int
+foo (unsigned char *w, int i, unsigned char *x, int j)
+{
+ int tot = 0;
+ for (int a = 0; a < 16; a++)
+ {
+ for (int b = 0; b < 16; b++)
+ tot += abs (w[b] - x[b]);
+ w += i;
+ x += j;
+ }
+ return tot;
+}
+
+void
+bar (unsigned char *w, unsigned char *x, int i, int *result)
+{
+ *result = foo (w, 16, x, i);
+}
+
+/* { dg-final { scan-assembler-times "vabsdub" 16 } } */
+/* { dg-final { scan-assembler-times "vsum4ubs" 16 } } */
+/* { dg-final { scan-assembler-times "vadduwm" 17 } } */
+
+/* Note: One of the 16 adds is optimized out (add with zero),
+ leaving 15. The extra two adds are for the final reduction. */
diff --git a/gcc/testsuite/gcc.target/powerpc/sad-vectorize-2.c b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-2.c
new file mode 100644
index 00000000000..b1b6de9ddea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-2.c
@@ -0,0 +1,36 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "" { powerpc*-*-aix* } } */
+/* { dg-options "-O3 -mcpu=power9" } */
+
+/* Verify that we vectorize this SAD loop using vabsduh. */
+
+extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__));
+
+static int
+foo (unsigned short *w, int i, unsigned short *x, int j)
+{
+ int tot = 0;
+ for (int a = 0; a < 16; a++)
+ {
+ for (int b = 0; b < 8; b++)
+ tot += abs (w[b] - x[b]);
+ w += i;
+ x += j;
+ }
+ return tot;
+}
+
+void
+bar (unsigned short *w, unsigned short *x, int i, int *result)
+{
+ *result = foo (w, 8, x, i);
+}
+
+/* { dg-final { scan-assembler-times "vabsduh" 16 } } */
+/* { dg-final { scan-assembler-times "vsum4shs" 16 } } */
+/* { dg-final { scan-assembler-times "vadduwm" 17 } } */
+
+/* Note: One of the 16 adds is optimized out (add with zero),
+ leaving 15. The extra two adds are for the final reduction. */
diff --git a/gcc/testsuite/gcc.target/powerpc/sad-vectorize-3.c b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-3.c
new file mode 100644
index 00000000000..0513a507484
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-3.c
@@ -0,0 +1,57 @@
+/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O3 -mcpu=power9" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+
+/* Verify that we get correct code when we vectorize this SAD loop using
+ vabsdub. */
+
+extern void abort ();
+extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__));
+
+static int
+foo (unsigned char *w, int i, unsigned char *x, int j)
+{
+ int tot = 0;
+ for (int a = 0; a < 16; a++)
+ {
+ for (int b = 0; b < 16; b++)
+ tot += abs (w[b] - x[b]);
+ w += i;
+ x += j;
+ }
+ return tot;
+}
+
+void
+bar (unsigned char *w, unsigned char *x, int i, int *result)
+{
+ *result = foo (w, 16, x, i);
+}
+
+int
+main ()
+{
+ unsigned char m[256];
+ unsigned char n[256];
+ int sum, i;
+
+ for (i = 0; i < 256; ++i)
+ if (i % 2 == 0)
+ {
+ m[i] = (i % 8) * 2 + 1;
+ n[i] = -(i % 8);
+ }
+ else
+ {
+ m[i] = -((i % 8) * 2 + 2);
+ n[i] = -((i % 8) >> 1);
+ }
+
+ bar (m, n, 16, &sum);
+
+ if (sum != 32384)
+ abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sad-vectorize-4.c b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-4.c
new file mode 100644
index 00000000000..2db016563a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-4.c
@@ -0,0 +1,57 @@
+/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O3 -mcpu=power9" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+
+/* Verify that we get correct code when we vectorize this SAD loop using
+ vabsduh. */
+
+extern void abort ();
+extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__));
+
+static int
+foo (unsigned short *w, int i, unsigned short *x, int j)
+{
+ int tot = 0;
+ for (int a = 0; a < 16; a++)
+ {
+ for (int b = 0; b < 8; b++)
+ tot += abs (w[b] - x[b]);
+ w += i;
+ x += j;
+ }
+ return tot;
+}
+
+void
+bar (unsigned short *w, unsigned short *x, int i, int *result)
+{
+ *result = foo (w, 8, x, i);
+}
+
+int
+main ()
+{
+ unsigned short m[128];
+ unsigned short n[128];
+ int sum, i;
+
+ for (i = 0; i < 128; ++i)
+ if (i % 2 == 0)
+ {
+ m[i] = (i % 8) * 2 + 1;
+ n[i] = i % 8;
+ }
+ else
+ {
+ m[i] = (i % 8) * 4 - 3;
+ n[i] = (i % 8) >> 1;
+ }
+
+ bar (m, n, 8, &sum);
+
+ if (sum != 992)
+ abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-26.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-26.c
index d01d86b94eb..28ce1cd39e4 100644
--- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-26.c
+++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-26.c
@@ -1,11 +1,11 @@
/* { dg-do compile { target { powerpc64le-*-* } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
/* { dg-options "-mcpu=power8 -O3 " } */
-/* { dg-final { scan-assembler-times "lxvw4x" 2 } } */
-/* { dg-final { scan-assembler "stxvw4x" } } */
+/* { dg-final { scan-assembler-times "lxvd2x" 2 } } */
+/* { dg-final { scan-assembler "stxvd2x" } } */
/* { dg-final { scan-assembler-not "xxpermdi" } } */
-/* Verify that swap optimization does not interfere with element-reversing
+/* Verify that swap optimization does not interfere with unaligned
loads and stores. */
/* Test case to resolve PR79044. */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-cmp-sel.c b/gcc/testsuite/gcc.target/powerpc/vec-cmp-sel.c
index 6f3c0937ba4..f74a117ace4 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-cmp-sel.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-cmp-sel.c
@@ -12,9 +12,10 @@
#include <altivec.h>
+volatile vector signed long long x = { 25399, -12900 };
+volatile vector signed long long y = { 12178, -9987 };
+
vector signed long long foo () {
- vector signed long long x = { 25399, -12900 };
- vector signed long long y = { 12178, -9987 };
vector bool long long b = vec_cmpge (x, y);
vector signed long long z = vec_sel (y, x, b);
return z;
diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-0.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-0.c
index 8e036e3e2c9..5c09c70ae28 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-0.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-0.c
@@ -1,7 +1,7 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
/* { dg-require-effective-target powerpc_p9vector_ok } */
-/* { dg-options "-mcpu=power9" } */
+/* { dg-options "-mcpu=power9 -O1" } */
#include <altivec.h>
diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-1.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-1.c
index e510a448a81..a74f7398543 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-1.c
@@ -1,7 +1,7 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
/* { dg-require-effective-target powerpc_p9vector_ok } */
-/* { dg-options "-mcpu=power9" } */
+/* { dg-options "-mcpu=power9 -O1" } */
#include <altivec.h>
diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-2.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-2.c
index 0ea5aa79dc6..f7f1e0d7fb2 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-2.c
@@ -1,7 +1,7 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
/* { dg-require-effective-target powerpc_p9vector_ok } */
-/* { dg-options "-mcpu=power9" } */
+/* { dg-options "-mcpu=power9 -O1" } */
#include <altivec.h>
diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-3.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-3.c
index 6bb5ebe24e4..8ec94bd4a50 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-3.c
@@ -1,7 +1,7 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
/* { dg-require-effective-target powerpc_p9vector_ok } */
-/* { dg-options "-mcpu=power9" } */
+/* { dg-options "-mcpu=power9 -O1" } */
#include <altivec.h>
diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-4.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-4.c
index a8d3f175378..2f47697d384 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-4.c
@@ -1,7 +1,7 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
/* { dg-require-effective-target powerpc_p9vector_ok } */
-/* { dg-options "-mcpu=power9" } */
+/* { dg-options "-mcpu=power9 -O1" } */
#include <altivec.h>
diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-5.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-5.c
index dae3e2291e2..11670859996 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-5.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-5.c
@@ -1,7 +1,7 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
/* { dg-require-effective-target powerpc_p9vector_ok } */
-/* { dg-options "-mcpu=power9" } */
+/* { dg-options "-mcpu=power9 -O1" } */
#include <altivec.h>
diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-6.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-6.c
index 550a3531afd..031a48f1ca3 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-6.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-6.c
@@ -1,7 +1,7 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
/* { dg-require-effective-target powerpc_p9vector_ok } */
-/* { dg-options "-mcpu=power9" } */
+/* { dg-options "-mcpu=power9 -O1" } */
#include <altivec.h>
diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cnttz-lsbb-2.c b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cnttz-lsbb-2.c
index 969107a24f7..cd4bb9dc9f7 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cnttz-lsbb-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cnttz-lsbb-2.c
@@ -10,5 +10,5 @@ count_trailing_zero_byte_bits (vector unsigned char *arg1_p)
{
vector unsigned char arg_1 = *arg1_p;
- return __builtin_vec_vctzlsbb (arg_1); /* { dg-error "builtin function '__builtin_altivec_vctzlsbb' requires the '-mcpu=power9' option" } */
+ return __builtin_vec_vctzlsbb (arg_1); /* { dg-error "builtin function '__builtin_altivec_vctzlsbb_v16qi' requires the '-mcpu=power9' option" } */
}