diff options
Diffstat (limited to 'gcc/config/aarch64')
-rw-r--r-- | gcc/config/aarch64/aarch64-builtins.c | 12 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-cores.def | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-fusion-pairs.def | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-option-extensions.def | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 18 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 54 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 555 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.h | 34 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 256 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/constraints.md | 18 | ||||
-rw-r--r-- | gcc/config/aarch64/cortex-a57-fma-steering.c | 17 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 3 | ||||
-rw-r--r-- | gcc/config/aarch64/predicates.md | 13 | ||||
-rw-r--r-- | gcc/config/aarch64/rtems.h | 17 |
15 files changed, 718 insertions, 290 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index f09399f4c15..d30009ba441 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -530,7 +530,7 @@ aarch64_mangle_builtin_type (const_tree type) } static tree -aarch64_simd_builtin_std_type (enum machine_mode mode, +aarch64_simd_builtin_std_type (machine_mode mode, enum aarch64_type_qualifiers q) { #define QUAL_TYPE(M) \ @@ -566,7 +566,7 @@ aarch64_simd_builtin_std_type (enum machine_mode mode, } static tree -aarch64_lookup_simd_builtin_type (enum machine_mode mode, +aarch64_lookup_simd_builtin_type (machine_mode mode, enum aarch64_type_qualifiers q) { int i; @@ -585,7 +585,7 @@ aarch64_lookup_simd_builtin_type (enum machine_mode mode, } static tree -aarch64_simd_builtin_type (enum machine_mode mode, +aarch64_simd_builtin_type (machine_mode mode, bool unsigned_p, bool poly_p) { if (poly_p) @@ -649,7 +649,7 @@ aarch64_init_simd_builtin_types (void) for (i = 0; i < nelts; i++) { tree eltype = aarch64_simd_types[i].eltype; - enum machine_mode mode = aarch64_simd_types[i].mode; + machine_mode mode = aarch64_simd_types[i].mode; if (aarch64_simd_types[i].itype == NULL) { @@ -1015,7 +1015,7 @@ typedef enum static rtx aarch64_simd_expand_args (rtx target, int icode, int have_retval, tree exp, builtin_simd_arg *args, - enum machine_mode builtin_mode) + machine_mode builtin_mode) { rtx pat; rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand. */ @@ -1040,7 +1040,7 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval, else { tree arg = CALL_EXPR_ARG (exp, opc - have_retval); - enum machine_mode mode = insn_data[icode].operand[opc].mode; + machine_mode mode = insn_data[icode].operand[opc].mode; op[opc] = expand_normal (arg); switch (thisarg) diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index f8342ca722d..b8d0ba6b69e 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -65,8 +65,8 @@ AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1) /* Qualcomm ('Q') cores. */ -AARCH64_CORE("falkor", falkor, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1) -AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1) +AARCH64_CORE("falkor", falkor, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) +AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) /* Samsung ('S') cores. */ AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def index f0e6dbcdd81..300cd00e4bf 100644 --- a/gcc/config/aarch64/aarch64-fusion-pairs.def +++ b/gcc/config/aarch64/aarch64-fusion-pairs.def @@ -34,5 +34,6 @@ AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK) AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR) AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH) AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC) +AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH) #undef AARCH64_FUSION_PAIR diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index c0752ce3470..c4f059ab7c5 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -63,4 +63,8 @@ AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, 0, "fphp asimdhp") /* Enabling or disabling "rcpc" only changes "rcpc". */ AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, "lrcpc") +/* Enabling "rdma" also enables "fp", "simd". + Disabling "rdma" just disables "rdma". */ +AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, AARCH64_FL_FP | AARCH64_FL_SIMD, 0, "rdma") + #undef AARCH64_OPT_EXTENSION diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index bfe44a75e12..beff28e2272 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -319,6 +319,7 @@ unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in); bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode); int aarch64_branch_cost (bool, bool); enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx); +bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode); bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); bool aarch64_constant_address_p (rtx); bool aarch64_emit_approx_div (rtx, rtx, rtx); @@ -326,6 +327,7 @@ bool aarch64_emit_approx_sqrt (rtx, rtx, bool); void aarch64_expand_call (rtx, rtx, bool); bool aarch64_expand_movmem (rtx *); bool aarch64_float_const_zero_rtx_p (rtx); +bool aarch64_float_const_rtx_p (rtx); bool aarch64_function_arg_regno_p (unsigned); bool aarch64_fusion_enabled_p (enum aarch64_fusion_pairs); bool aarch64_gen_movmemqi (rtx *); @@ -342,8 +344,8 @@ bool aarch64_modes_tieable_p (machine_mode mode1, bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx); bool aarch64_move_imm (HOST_WIDE_INT, machine_mode); bool aarch64_mov_operand_p (rtx, machine_mode); -int aarch64_simd_attr_length_rglist (enum machine_mode); -rtx aarch64_reverse_mask (enum machine_mode); +int aarch64_simd_attr_length_rglist (machine_mode); +rtx aarch64_reverse_mask (machine_mode); bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT); char *aarch64_output_scalar_simd_mov_immediate (rtx, machine_mode); char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned); @@ -351,9 +353,9 @@ bool aarch64_pad_arg_upward (machine_mode, const_tree); bool aarch64_pad_reg_upward (machine_mode, const_tree, bool); bool aarch64_regno_ok_for_base_p (int, bool); bool aarch64_regno_ok_for_index_p (int, bool); +bool aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *fail); bool aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode, bool high); -bool aarch64_simd_imm_scalar_p (rtx x, machine_mode mode); bool aarch64_simd_imm_zero_p (rtx, machine_mode); bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode); bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool); @@ -411,7 +413,7 @@ void aarch64_save_restore_target_globals (tree); /* Initialize builtins for SIMD intrinsics. */ void init_aarch64_simd_builtins (void); -void aarch64_simd_emit_reg_reg_move (rtx *, enum machine_mode, unsigned int); +void aarch64_simd_emit_reg_reg_move (rtx *, machine_mode, unsigned int); /* Expand builtins for SIMD intrinsics. */ rtx aarch64_simd_expand_builtin (int, tree, rtx); @@ -444,7 +446,7 @@ bool aarch64_atomic_ldop_supported_p (enum rtx_code); void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx); void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); -bool aarch64_gen_adjusted_ldpstp (rtx *, bool, enum machine_mode, RTX_CODE); +bool aarch64_gen_adjusted_ldpstp (rtx *, bool, machine_mode, RTX_CODE); #endif /* RTX_CODE */ void aarch64_init_builtins (void); @@ -468,11 +470,11 @@ extern void aarch64_final_prescan_insn (rtx_insn *); extern bool aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *); -int aarch64_ccmp_mode_to_code (enum machine_mode mode); +int aarch64_ccmp_mode_to_code (machine_mode mode); bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset); -bool aarch64_operands_ok_for_ldpstp (rtx *, bool, enum machine_mode); -bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, enum machine_mode); +bool aarch64_operands_ok_for_ldpstp (rtx *, bool, machine_mode); +bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, machine_mode); extern void aarch64_asm_output_pool_epilogue (FILE *, const char *, tree, HOST_WIDE_INT); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 264a9c047ce..011fcec0795 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1033,6 +1033,18 @@ [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] ) +(define_insn "*aarch64_mla_elt_merge<mode>" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (plus:VDQHS + (mult:VDQHS (vec_duplicate:VDQHS + (match_operand:<VEL> 1 "register_operand" "w")) + (match_operand:VDQHS 2 "register_operand" "w")) + (match_operand:VDQHS 3 "register_operand" "0")))] + "TARGET_SIMD" + "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" + [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] +) + (define_insn "aarch64_mls<mode>" [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0") @@ -1080,6 +1092,18 @@ [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] ) +(define_insn "*aarch64_mls_elt_merge<mode>" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (minus:VDQHS + (match_operand:VDQHS 1 "register_operand" "0") + (mult:VDQHS (vec_duplicate:VDQHS + (match_operand:<VEL> 2 "register_operand" "w")) + (match_operand:VDQHS 3 "register_operand" "w"))))] + "TARGET_SIMD" + "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]" + [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] +) + ;; Max/Min operations. (define_insn "<su><maxmin><mode>3" [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") @@ -2809,38 +2833,10 @@ (match_operand:VDC 2 "register_operand")] "TARGET_SIMD" { - rtx op1, op2; - if (BYTES_BIG_ENDIAN) - { - op1 = operands[2]; - op2 = operands[1]; - } - else - { - op1 = operands[1]; - op2 = operands[2]; - } - emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2)); - DONE; -} -) + aarch64_split_simd_combine (operands[0], operands[1], operands[2]); -(define_insn_and_split "aarch64_combine_internal<mode>" - [(set (match_operand:<VDBL> 0 "register_operand" "=&w") - (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w") - (match_operand:VDC 2 "register_operand" "w")))] - "TARGET_SIMD" - "#" - "&& reload_completed" - [(const_int 0)] -{ - if (BYTES_BIG_ENDIAN) - aarch64_split_simd_combine (operands[0], operands[2], operands[1]); - else - aarch64_split_simd_combine (operands[0], operands[1], operands[2]); DONE; } -[(set_attr "type" "multiple")] ) (define_expand "aarch64_simd_combine<mode>" diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 95592f9fa17..055ebafb830 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -147,6 +147,8 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, bool is_packed); +static machine_mode +aarch64_simd_container_mode (machine_mode mode, unsigned width); /* Major revision number of the ARM Architecture implemented by the target. */ unsigned aarch64_architecture_version; @@ -206,22 +208,6 @@ static const struct cpu_addrcost_table generic_addrcost_table = 0 /* imm_offset */ }; -static const struct cpu_addrcost_table cortexa57_addrcost_table = -{ - { - 1, /* hi */ - 0, /* si */ - 0, /* di */ - 1, /* ti */ - }, - 0, /* pre_modify */ - 0, /* post_modify */ - 0, /* register_offset */ - 0, /* register_sextend */ - 0, /* register_zextend */ - 0, /* imm_offset */ -}; - static const struct cpu_addrcost_table exynosm1_addrcost_table = { { @@ -254,22 +240,6 @@ static const struct cpu_addrcost_table xgene1_addrcost_table = 0, /* imm_offset */ }; -static const struct cpu_addrcost_table qdf24xx_addrcost_table = -{ - { - 1, /* hi */ - 0, /* si */ - 0, /* di */ - 1, /* ti */ - }, - 0, /* pre_modify */ - 0, /* post_modify */ - 0, /* register_offset */ - 0, /* register_sextend */ - 0, /* register_zextend */ - 0 /* imm_offset */ -}; - static const struct cpu_addrcost_table thunderx2t99_addrcost_table = { { @@ -390,13 +360,13 @@ static const struct cpu_vector_cost thunderx_vector_cost = 3, /* scalar_load_cost */ 1, /* scalar_store_cost */ 4, /* vec_int_stmt_cost */ - 4, /* vec_fp_stmt_cost */ + 1, /* vec_fp_stmt_cost */ 4, /* vec_permute_cost */ 2, /* vec_to_scalar_cost */ 2, /* scalar_to_vec_cost */ 3, /* vec_align_load_cost */ - 10, /* vec_unalign_load_cost */ - 10, /* vec_unalign_store_cost */ + 5, /* vec_unalign_load_cost */ + 5, /* vec_unalign_store_cost */ 1, /* vec_store_cost */ 3, /* cond_taken_branch_cost */ 3 /* cond_not_taken_branch_cost */ @@ -488,20 +458,6 @@ static const struct cpu_branch_cost generic_branch_cost = 3 /* Unpredictable. */ }; -/* Branch costs for Cortex-A57. */ -static const struct cpu_branch_cost cortexa57_branch_cost = -{ - 1, /* Predictable. */ - 3 /* Unpredictable. */ -}; - -/* Branch costs for Vulcan. */ -static const struct cpu_branch_cost thunderx2t99_branch_cost = -{ - 1, /* Predictable. */ - 3 /* Unpredictable. */ -}; - /* Generic approximation modes. */ static const cpu_approx_modes generic_approx_modes = { @@ -612,7 +568,7 @@ static const struct tune_params cortexa35_tunings = &generic_addrcost_table, &cortexa53_regmove_cost, &generic_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost */ 1, /* issue_rate */ @@ -638,7 +594,7 @@ static const struct tune_params cortexa53_tunings = &generic_addrcost_table, &cortexa53_regmove_cost, &generic_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost */ 2, /* issue_rate */ @@ -661,10 +617,10 @@ static const struct tune_params cortexa53_tunings = static const struct tune_params cortexa57_tunings = { &cortexa57_extra_costs, - &cortexa57_addrcost_table, + &generic_addrcost_table, &cortexa57_regmove_cost, &cortexa57_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost */ 3, /* issue_rate */ @@ -687,10 +643,10 @@ static const struct tune_params cortexa57_tunings = static const struct tune_params cortexa72_tunings = { &cortexa57_extra_costs, - &cortexa57_addrcost_table, + &generic_addrcost_table, &cortexa57_regmove_cost, &cortexa57_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost */ 3, /* issue_rate */ @@ -713,10 +669,10 @@ static const struct tune_params cortexa72_tunings = static const struct tune_params cortexa73_tunings = { &cortexa57_extra_costs, - &cortexa57_addrcost_table, + &generic_addrcost_table, &cortexa57_regmove_cost, &cortexa57_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost. */ 2, /* issue_rate. */ @@ -842,7 +798,7 @@ static const struct tune_params xgene1_tunings = static const struct tune_params qdf24xx_tunings = { &qdf24xx_extra_costs, - &qdf24xx_addrcost_table, + &generic_addrcost_table, &qdf24xx_regmove_cost, &generic_vector_cost, &generic_branch_cost, @@ -871,11 +827,12 @@ static const struct tune_params thunderx2t99_tunings = &thunderx2t99_addrcost_table, &thunderx2t99_regmove_cost, &thunderx2t99_vector_cost, - &thunderx2t99_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost. */ 4, /* issue_rate. */ - (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC), /* fusible_ops */ + (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC + | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */ 16, /* function_align. */ 8, /* jump_align. */ 16, /* loop_align. */ @@ -1031,7 +988,7 @@ static reg_class_t aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class, reg_class_t best_class) { - enum machine_mode mode; + machine_mode mode; if (allocno_class != ALL_REGS) return allocno_class; @@ -1044,7 +1001,7 @@ aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class, } static unsigned int -aarch64_min_divisions_for_recip_mul (enum machine_mode mode) +aarch64_min_divisions_for_recip_mul (machine_mode mode) { if (GET_MODE_UNIT_SIZE (mode) == 4) return aarch64_tune_params.min_div_recip_mul_sf; @@ -1053,7 +1010,7 @@ aarch64_min_divisions_for_recip_mul (enum machine_mode mode) static int aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED, - enum machine_mode mode) + machine_mode mode) { if (VECTOR_MODE_P (mode)) return aarch64_tune_params.vec_reassoc_width; @@ -1732,41 +1689,41 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2) machine_mode dst_mode = GET_MODE (dst); gcc_assert (VECTOR_MODE_P (dst_mode)); + gcc_assert (register_operand (dst, dst_mode) + && register_operand (src1, src_mode) + && register_operand (src2, src_mode)); - if (REG_P (dst) && REG_P (src1) && REG_P (src2)) - { - rtx (*gen) (rtx, rtx, rtx); - - switch (src_mode) - { - case V8QImode: - gen = gen_aarch64_simd_combinev8qi; - break; - case V4HImode: - gen = gen_aarch64_simd_combinev4hi; - break; - case V2SImode: - gen = gen_aarch64_simd_combinev2si; - break; - case V4HFmode: - gen = gen_aarch64_simd_combinev4hf; - break; - case V2SFmode: - gen = gen_aarch64_simd_combinev2sf; - break; - case DImode: - gen = gen_aarch64_simd_combinedi; - break; - case DFmode: - gen = gen_aarch64_simd_combinedf; - break; - default: - gcc_unreachable (); - } + rtx (*gen) (rtx, rtx, rtx); - emit_insn (gen (dst, src1, src2)); - return; + switch (src_mode) + { + case V8QImode: + gen = gen_aarch64_simd_combinev8qi; + break; + case V4HImode: + gen = gen_aarch64_simd_combinev4hi; + break; + case V2SImode: + gen = gen_aarch64_simd_combinev2si; + break; + case V4HFmode: + gen = gen_aarch64_simd_combinev4hf; + break; + case V2SFmode: + gen = gen_aarch64_simd_combinev2sf; + break; + case DImode: + gen = gen_aarch64_simd_combinedi; + break; + case DFmode: + gen = gen_aarch64_simd_combinedf; + break; + default: + gcc_unreachable (); } + + emit_insn (gen (dst, src1, src2)); + return; } /* Split a complex SIMD move. */ @@ -1875,6 +1832,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, return 1; } + /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff + (with XXXX non-zero). In that case check to see if the move can be done in + a smaller mode. */ + val2 = val & 0xffffffff; + if (mode == DImode + && aarch64_move_imm (val2, SImode) + && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0)) + { + if (generate) + emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); + + /* Check if we have to emit a second instruction by checking to see + if any of the upper 32 bits of the original DI mode value is set. */ + if (val == val2) + return 1; + + i = (val >> 48) ? 48 : 32; + + if (generate) + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); + + return 2; + } + if ((val >> 32) == 0 || mode == SImode) { if (generate) @@ -2002,6 +1984,8 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) gcc_assert (can_create_pseudo_p ()); base = gen_reg_rtx (ptr_mode); aarch64_expand_mov_immediate (base, XEXP (mem, 0)); + if (ptr_mode != Pmode) + base = convert_memory_address (Pmode, base); mem = gen_rtx_MEM (ptr_mode, base); } @@ -4720,6 +4704,69 @@ aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode) return true; } +/* Return the binary representation of floating point constant VALUE in INTVAL. + If the value cannot be converted, return false without setting INTVAL. + The conversion is done in the given MODE. */ +bool +aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval) +{ + + /* We make a general exception for 0. */ + if (aarch64_float_const_zero_rtx_p (value)) + { + *intval = 0; + return true; + } + + machine_mode mode = GET_MODE (value); + if (GET_CODE (value) != CONST_DOUBLE + || !SCALAR_FLOAT_MODE_P (mode) + || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT + /* Only support up to DF mode. */ + || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode)) + return false; + + unsigned HOST_WIDE_INT ival = 0; + + long res[2]; + real_to_target (res, + CONST_DOUBLE_REAL_VALUE (value), + REAL_MODE_FORMAT (mode)); + + ival = zext_hwi (res[0], 32); + if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (DFmode)) + ival |= (zext_hwi (res[1], 32) << 32); + + *intval = ival; + return true; +} + +/* Return TRUE if rtx X is an immediate constant that can be moved using a + single MOV(+MOVK) followed by an FMOV. */ +bool +aarch64_float_const_rtx_p (rtx x) +{ + machine_mode mode = GET_MODE (x); + if (mode == VOIDmode) + return false; + + /* Determine whether it's cheaper to write float constants as + mov/movk pairs over ldr/adrp pairs. */ + unsigned HOST_WIDE_INT ival; + + if (GET_CODE (x) == CONST_DOUBLE + && SCALAR_FLOAT_MODE_P (mode) + && aarch64_reinterpret_float_as_int (x, &ival)) + { + machine_mode imode = mode == HFmode ? SImode : int_mode_for_mode (mode); + int num_instr = aarch64_internal_mov_immediate + (NULL_RTX, gen_int_mode (ival, imode), false, imode); + return num_instr < 3; + } + + return false; +} + /* Return TRUE if rtx X is immediate constant 0.0 */ bool aarch64_float_const_zero_rtx_p (rtx x) @@ -4732,6 +4779,49 @@ aarch64_float_const_zero_rtx_p (rtx x) return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0); } +/* Return TRUE if rtx X is immediate constant that fits in a single + MOVI immediate operation. */ +bool +aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode) +{ + if (!TARGET_SIMD) + return false; + + /* We make a general exception for 0. */ + if (aarch64_float_const_zero_rtx_p (x)) + return true; + + machine_mode vmode, imode; + unsigned HOST_WIDE_INT ival; + + if (GET_CODE (x) == CONST_DOUBLE + && SCALAR_FLOAT_MODE_P (mode)) + { + if (!aarch64_reinterpret_float_as_int (x, &ival)) + return false; + + imode = int_mode_for_mode (mode); + } + else if (GET_CODE (x) == CONST_INT + && SCALAR_INT_MODE_P (mode)) + { + imode = mode; + ival = INTVAL (x); + } + else + return false; + + /* use a 64 bit mode for everything except for DI/DF mode, where we use + a 128 bit vector mode. */ + int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64; + + vmode = aarch64_simd_container_mode (imode, width); + rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival); + + return aarch64_simd_valid_immediate (v_op, vmode, false, NULL); +} + + /* Return the fixed registers used for condition codes. */ static bool @@ -4884,7 +4974,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) } static int -aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code); +aarch64_get_condition_code_1 (machine_mode, enum rtx_code); int aarch64_get_condition_code (rtx x) @@ -4898,7 +4988,7 @@ aarch64_get_condition_code (rtx x) } static int -aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code) +aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code) { switch (mode) { @@ -5050,12 +5140,43 @@ static const int aarch64_nzcv_codes[] = 0 /* NV, Any. */ }; +/* Print operand X to file F in a target specific manner according to CODE. + The acceptable formatting commands given by CODE are: + 'c': An integer or symbol address without a preceding # + sign. + 'e': Print the sign/zero-extend size as a character 8->b, + 16->h, 32->w. + 'p': Prints N such that 2^N == X (X must be power of 2 and + const int). + 'P': Print the number of non-zero bits in X (a const_int). + 'H': Print the higher numbered register of a pair (TImode) + of regs. + 'm': Print a condition (eq, ne, etc). + 'M': Same as 'm', but invert condition. + 'b/h/s/d/q': Print a scalar FP/SIMD register name. + 'S/T/U/V': Print a FP/SIMD register name for a register list. + The register printed is the FP/SIMD register name + of X + 0/1/2/3 for S/T/U/V. + 'R': Print a scalar FP/SIMD register name + 1. + 'X': Print bottom 16 bits of integer constant in hex. + 'w/x': Print a general register name or the zero register + (32-bit or 64-bit). + '0': Print a normal operand, if it's a general register, + then we assume DImode. + 'k': Print NZCV for conditional compare instructions. + 'A': Output address constant representing the first + argument of X, specifying a relocation offset + if appropriate. + 'L': Output constant address specified by X + with a relocation offset if appropriate. + 'G': Prints address of X, specifying a PC relative + relocation mode if appropriate. */ + static void aarch64_print_operand (FILE *f, rtx x, int code) { switch (code) { - /* An integer or symbol address without a preceding # sign. */ case 'c': switch (GET_CODE (x)) { @@ -5082,7 +5203,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) break; case 'e': - /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */ { int n; @@ -5115,7 +5235,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) { int n; - /* Print N such that 2^N == X. */ if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0) { output_operand_lossage ("invalid operand for '%%%c'", code); @@ -5127,7 +5246,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) break; case 'P': - /* Print the number of non-zero bits in X (a const_int). */ if (!CONST_INT_P (x)) { output_operand_lossage ("invalid operand for '%%%c'", code); @@ -5138,7 +5256,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) break; case 'H': - /* Print the higher numbered register of a pair (TImode) of regs. */ if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1)) { output_operand_lossage ("invalid operand for '%%%c'", code); @@ -5152,8 +5269,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) case 'm': { int cond_code; - /* Print a condition (eq, ne, etc) or its inverse. */ - /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */ if (x == const_true_rtx) { @@ -5181,7 +5296,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) case 's': case 'd': case 'q': - /* Print a scalar FP/SIMD register name. */ if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) { output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); @@ -5194,7 +5308,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) case 'T': case 'U': case 'V': - /* Print the first FP/SIMD register name in a list. */ if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) { output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); @@ -5204,7 +5317,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) break; case 'R': - /* Print a scalar FP/SIMD register name + 1. */ if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) { output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); @@ -5214,7 +5326,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) break; case 'X': - /* Print bottom 16 bits of integer constant in hex. */ if (!CONST_INT_P (x)) { output_operand_lossage ("invalid operand for '%%%c'", code); @@ -5225,8 +5336,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) case 'w': case 'x': - /* Print a general register name or the zero register (32-bit or - 64-bit). */ if (x == const0_rtx || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x))) { @@ -5249,8 +5358,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) /* Fall through */ case 0: - /* Print a normal operand, if it's a general register, then we - assume DImode. */ if (x == NULL) { output_operand_lossage ("missing operand"); @@ -5265,6 +5372,8 @@ aarch64_print_operand (FILE *f, rtx x, int code) case MEM: output_address (GET_MODE (x), XEXP (x, 0)); + /* Check all memory references are Pmode - even with ILP32. */ + gcc_assert (GET_MODE (XEXP (x, 0)) == Pmode); break; case CONST: @@ -5401,7 +5510,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) break; case 'G': - switch (aarch64_classify_symbolic_expression (x)) { case SYMBOL_TLSLE24: @@ -5416,7 +5524,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) case 'k': { HOST_WIDE_INT cond_code; - /* Print nzcv. */ if (!CONST_INT_P (x)) { @@ -5909,12 +6016,6 @@ aarch64_preferred_reload_class (rtx x, reg_class_t regclass) return NO_REGS; } - /* If it's an integer immediate that MOVI can't handle, then - FP_REGS is not an option, so we return NO_REGS instead. */ - if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS) - && !aarch64_simd_imm_scalar_p (x, GET_MODE (x))) - return NO_REGS; - /* Register eliminiation can result in a request for SP+constant->FP_REGS. We cannot support such operations which use SP as source and an FP_REG as destination, so reject out @@ -6864,6 +6965,25 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, return true; case CONST_DOUBLE: + + /* First determine number of instructions to do the move + as an integer constant. */ + if (!aarch64_float_const_representable_p (x) + && !aarch64_can_const_movi_rtx_p (x, mode) + && aarch64_float_const_rtx_p (x)) + { + unsigned HOST_WIDE_INT ival; + bool succeed = aarch64_reinterpret_float_as_int (x, &ival); + gcc_assert (succeed); + + machine_mode imode = mode == HFmode ? SImode + : int_mode_for_mode (mode); + int ncost = aarch64_internal_mov_immediate + (NULL_RTX, gen_int_mode (ival, imode), false, imode); + *cost += COSTS_N_INSNS (ncost); + return true; + } + if (speed) { /* mov[df,sf]_aarch64. */ @@ -7537,17 +7657,26 @@ cost_plus: } else { - if (speed) + if (VECTOR_MODE_P (mode)) { - if (VECTOR_MODE_P (mode)) - { - /* Vector shift (register). */ - *cost += extra_cost->vect.alu; - } - else + if (speed) + /* Vector shift (register). */ + *cost += extra_cost->vect.alu; + } + else + { + if (speed) + /* LSLV. */ + *cost += extra_cost->alu.shift_reg; + + if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0)) + && CONST_INT_P (XEXP (op1, 1)) + && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1) { - /* LSLV. */ - *cost += extra_cost->alu.shift_reg; + *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed); + /* We already demanded XEXP (op1, 0) to be REG_P, so + don't recurse into it. */ + return true; } } return false; /* All arguments need to be in registers. */ @@ -7576,14 +7705,27 @@ cost_plus: } else { - - /* ASR (register) and friends. */ - if (speed) + if (VECTOR_MODE_P (mode)) { - if (VECTOR_MODE_P (mode)) + if (speed) + /* Vector shift (register). */ *cost += extra_cost->vect.alu; - else + } + else + { + if (speed) + /* ASR (register) and friends. */ *cost += extra_cost->alu.shift_reg; + + if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0)) + && CONST_INT_P (XEXP (op1, 1)) + && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1) + { + *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed); + /* We already demanded XEXP (op1, 0) to be REG_P, so + don't recurse into it. */ + return true; + } } return false; /* All arguments need to be in registers. */ } @@ -10151,7 +10293,7 @@ aarch64_classify_symbol (rtx x, rtx offset) /* This is alright even in PIC code as the constant pool reference is always PC relative and within the same translation unit. */ - if (CONSTANT_POOL_ADDRESS_P (x)) + if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x)) return SYMBOL_SMALL_ABSOLUTE; else return SYMBOL_FORCE_TO_MEM; @@ -10186,18 +10328,16 @@ aarch64_legitimate_pic_operand_p (rtx x) /* Return true if X holds either a quarter-precision or floating-point +0.0 constant. */ static bool -aarch64_valid_floating_const (machine_mode mode, rtx x) +aarch64_valid_floating_const (rtx x) { if (!CONST_DOUBLE_P (x)) return false; - if (aarch64_float_const_zero_rtx_p (x)) + /* This call determines which constants can be used in mov<mode> + as integer moves instead of constant loads. */ + if (aarch64_float_const_rtx_p (x)) return true; - /* We only handle moving 0.0 to a TFmode register. */ - if (!(mode == SFmode || mode == DFmode)) - return false; - return aarch64_float_const_representable_p (x); } @@ -10209,11 +10349,15 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x) if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode)) return false; - /* This could probably go away because - we now decompose CONST_INTs according to expand_mov_immediate. */ + /* For these cases we never want to use a literal load. + As such we have to prevent the compiler from forcing these + to memory. */ if ((GET_CODE (x) == CONST_VECTOR && aarch64_simd_valid_immediate (x, mode, false, NULL)) - || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x)) + || CONST_INT_P (x) + || aarch64_valid_floating_const (x) + || aarch64_can_const_movi_rtx_p (x, mode) + || aarch64_float_const_rtx_p (x)) return !targetm.cannot_force_const_mem (mode, x); if (GET_CODE (x) == HIGH @@ -11496,23 +11640,6 @@ aarch64_mask_from_zextract_ops (rtx width, rtx pos) } bool -aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED) -{ - HOST_WIDE_INT imm = INTVAL (x); - int i; - - for (i = 0; i < 8; i++) - { - unsigned int byte = imm & 0xff; - if (byte != 0xff && byte != 0) - return false; - imm >>= 8; - } - - return true; -} - -bool aarch64_mov_operand_p (rtx x, machine_mode mode) { if (GET_CODE (x) == HIGH @@ -11666,7 +11793,7 @@ aarch64_simd_mem_operand_p (rtx op) COUNT is the number of components into which the copy needs to be decomposed. */ void -aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode, +aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode, unsigned int count) { unsigned int i; @@ -11687,7 +11814,7 @@ aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode, /* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is one of VSTRUCT modes: OI, CI, or XI. */ int -aarch64_simd_attr_length_rglist (enum machine_mode mode) +aarch64_simd_attr_length_rglist (machine_mode mode) { return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4; } @@ -12119,10 +12246,8 @@ aarch64_emit_store_exclusive (machine_mode mode, rtx bval, static void aarch64_emit_unlikely_jump (rtx insn) { - int very_unlikely = REG_BR_PROB_BASE / 100 - 1; - rtx_insn *jump = emit_jump_insn (insn); - add_int_reg_note (jump, REG_BR_PROB, very_unlikely); + add_reg_br_prob_note (jump, profile_probability::very_unlikely ()); } /* Expand a compare and swap pattern. */ @@ -12905,15 +13030,28 @@ aarch64_output_simd_mov_immediate (rtx const_vector, } char* -aarch64_output_scalar_simd_mov_immediate (rtx immediate, - machine_mode mode) +aarch64_output_scalar_simd_mov_immediate (rtx immediate, machine_mode mode) { + + /* If a floating point number was passed and we desire to use it in an + integer mode do the conversion to integer. */ + if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT) + { + unsigned HOST_WIDE_INT ival; + if (!aarch64_reinterpret_float_as_int (immediate, &ival)) + gcc_unreachable (); + immediate = gen_int_mode (ival, mode); + } + machine_mode vmode; + /* use a 64 bit mode for everything except for DI/DF mode, where we use + a 128 bit vector mode. */ + int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64; gcc_assert (!VECTOR_MODE_P (mode)); - vmode = aarch64_simd_container_mode (mode, 64); + vmode = aarch64_simd_container_mode (mode, width); rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate)); - return aarch64_output_simd_mov_immediate (v_op, vmode, 64); + return aarch64_output_simd_mov_immediate (v_op, vmode, width); } /* Split operands into moves from op[1] + op[2] into op[0]. */ @@ -13678,7 +13816,7 @@ aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, } rtx -aarch64_reverse_mask (enum machine_mode mode) +aarch64_reverse_mask (machine_mode mode) { /* We have to reverse each vector because we dont have a permuted load that can reverse-load according to ABI rules. */ @@ -14278,13 +14416,68 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) { enum attr_type prev_type = get_attr_type (prev); - /* FIXME: this misses some which is considered simple arthematic - instructions for ThunderX. Simple shifts are missed here. */ - if (prev_type == TYPE_ALUS_SREG - || prev_type == TYPE_ALUS_IMM - || prev_type == TYPE_LOGICS_REG - || prev_type == TYPE_LOGICS_IMM) - return true; + unsigned int condreg1, condreg2; + rtx cc_reg_1; + aarch64_fixed_condition_code_regs (&condreg1, &condreg2); + cc_reg_1 = gen_rtx_REG (CCmode, condreg1); + + if (reg_referenced_p (cc_reg_1, PATTERN (curr)) + && prev + && modified_in_p (cc_reg_1, prev)) + { + /* FIXME: this misses some which is considered simple arthematic + instructions for ThunderX. Simple shifts are missed here. */ + if (prev_type == TYPE_ALUS_SREG + || prev_type == TYPE_ALUS_IMM + || prev_type == TYPE_LOGICS_REG + || prev_type == TYPE_LOGICS_IMM) + return true; + } + } + + if (prev_set + && curr_set + && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH) + && any_condjump_p (curr)) + { + /* We're trying to match: + prev (alu_insn) == (set (r0) plus ((r0) (r1/imm))) + curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0) + (const_int 0)) + (label_ref ("SYM")) + (pc)) */ + if (SET_DEST (curr_set) == (pc_rtx) + && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE + && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) + && REG_P (SET_DEST (prev_set)) + && REGNO (SET_DEST (prev_set)) + == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0))) + { + /* Fuse ALU operations followed by conditional branch instruction. */ + switch (get_attr_type (prev)) + { + case TYPE_ALU_IMM: + case TYPE_ALU_SREG: + case TYPE_ADC_REG: + case TYPE_ADC_IMM: + case TYPE_ADCS_REG: + case TYPE_ADCS_IMM: + case TYPE_LOGIC_REG: + case TYPE_LOGIC_IMM: + case TYPE_CSEL: + case TYPE_ADR: + case TYPE_MOV_IMM: + case TYPE_SHIFT_REG: + case TYPE_SHIFT_IMM: + case TYPE_BFM: + case TYPE_RBIT: + case TYPE_REV: + case TYPE_EXTEND: + return true; + + default:; + } + } } return false; @@ -14468,7 +14661,7 @@ aarch64_sched_adjust_priority (rtx_insn *insn, int priority) bool aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, - enum machine_mode mode) + machine_mode mode) { HOST_WIDE_INT offval_1, offval_2, msize; enum reg_class rclass_1, rclass_2; @@ -14575,7 +14768,7 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, bool aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load, - enum machine_mode mode) + machine_mode mode) { enum reg_class rclass_1, rclass_2, rclass_3, rclass_4; HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize; @@ -14709,7 +14902,7 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load, bool aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, - enum machine_mode mode, RTX_CODE code) + machine_mode mode, RTX_CODE code) { rtx base, offset, t1, t2; rtx mem_1, mem_2, mem_3, mem_4; diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 3b3f27e2f95..7f91edb5713 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -98,14 +98,24 @@ && (ALIGN) < BITS_PER_WORD) \ ? BITS_PER_WORD : ALIGN) -#define DATA_ALIGNMENT(EXP, ALIGN) \ - ((((ALIGN) < BITS_PER_WORD) \ - && (TREE_CODE (EXP) == ARRAY_TYPE \ - || TREE_CODE (EXP) == UNION_TYPE \ - || TREE_CODE (EXP) == RECORD_TYPE)) \ - ? BITS_PER_WORD : (ALIGN)) - -#define LOCAL_ALIGNMENT(EXP, ALIGN) DATA_ALIGNMENT(EXP, ALIGN) +/* Align definitions of arrays, unions and structures so that + initializations and copies can be made more efficient. This is not + ABI-changing, so it only affects places where we can see the + definition. Increasing the alignment tends to introduce padding, + so don't do this when optimizing for size/conserving stack space. */ +#define AARCH64_EXPAND_ALIGNMENT(COND, EXP, ALIGN) \ + (((COND) && ((ALIGN) < BITS_PER_WORD) \ + && (TREE_CODE (EXP) == ARRAY_TYPE \ + || TREE_CODE (EXP) == UNION_TYPE \ + || TREE_CODE (EXP) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN)) + +/* Align global data. */ +#define DATA_ALIGNMENT(EXP, ALIGN) \ + AARCH64_EXPAND_ALIGNMENT (!optimize_size, EXP, ALIGN) + +/* Similarly, make sure that objects on the stack are sensibly aligned. */ +#define LOCAL_ALIGNMENT(EXP, ALIGN) \ + AARCH64_EXPAND_ALIGNMENT (!flag_conserve_stack, EXP, ALIGN) #define STRUCTURE_SIZE_BOUNDARY 8 @@ -134,7 +144,8 @@ extern unsigned aarch64_architecture_version; #define AARCH64_FL_CRC (1 << 3) /* Has CRC. */ /* ARMv8.1-A architecture extensions. */ #define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */ -#define AARCH64_FL_V8_1 (1 << 5) /* Has ARMv8.1-A extensions. */ +#define AARCH64_FL_RDMA (1 << 5) /* Has Round Double Multiply Add. */ +#define AARCH64_FL_V8_1 (1 << 6) /* Has ARMv8.1-A extensions. */ /* ARMv8.2-A architecture extensions. */ #define AARCH64_FL_V8_2 (1 << 8) /* Has ARMv8.2-A features. */ #define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16 extensions. */ @@ -151,7 +162,8 @@ extern unsigned aarch64_architecture_version; /* Architecture flags that effect instruction selection. */ #define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD) #define AARCH64_FL_FOR_ARCH8_1 \ - (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC | AARCH64_FL_V8_1) + (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \ + | AARCH64_FL_RDMA | AARCH64_FL_V8_1) #define AARCH64_FL_FOR_ARCH8_2 \ (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2) #define AARCH64_FL_FOR_ARCH8_3 \ @@ -164,7 +176,7 @@ extern unsigned aarch64_architecture_version; #define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP) #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) #define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE) -#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_V8_1) +#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA) #define AARCH64_ISA_V8_2 (aarch64_isa_flags & AARCH64_FL_V8_2) #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16) #define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 6bdbf650d92..fc799479c81 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -181,6 +181,11 @@ ;; will be disabled when !TARGET_FLOAT. (define_attr "fp" "no,yes" (const_string "no")) +;; Attribute that specifies whether or not the instruction touches half +;; precision fp registers. When this is set to yes for an alternative, +;; that alternative will be disabled when !TARGET_FP_F16INST. +(define_attr "fp16" "no,yes" (const_string "no")) + ;; Attribute that specifies whether or not the instruction touches simd ;; registers. When this is set to yes for an alternative, that alternative ;; will be disabled when !TARGET_SIMD. @@ -194,11 +199,14 @@ ;; registers when -mgeneral-regs-only is specified. (define_attr "enabled" "no,yes" (cond [(ior - (and (eq_attr "fp" "yes") - (eq (symbol_ref "TARGET_FLOAT") (const_int 0))) - (and (eq_attr "simd" "yes") - (eq (symbol_ref "TARGET_SIMD") (const_int 0)))) - (const_string "no") + (ior + (and (eq_attr "fp" "yes") + (eq (symbol_ref "TARGET_FLOAT") (const_int 0))) + (and (eq_attr "simd" "yes") + (eq (symbol_ref "TARGET_SIMD") (const_int 0)))) + (and (eq_attr "fp16" "yes") + (eq (symbol_ref "TARGET_FP_F16INST") (const_int 0)))) + (const_string "no") ] (const_string "yes"))) ;; Attribute that specifies whether we are dealing with a branch to a @@ -920,8 +928,8 @@ ) (define_insn_and_split "*movsi_aarch64" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w,r,*w") - (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w, r,*w,w") + (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Ds"))] "(register_operand (operands[0], SImode) || aarch64_reg_or_zero (operands[1], SImode))" "@ @@ -938,8 +946,9 @@ adrp\\t%x0, %A1 fmov\\t%s0, %w1 fmov\\t%w0, %s1 - fmov\\t%s0, %s1" - "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode) + fmov\\t%s0, %s1 + * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);" + "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode) && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" [(const_int 0)] "{ @@ -947,13 +956,14 @@ DONE; }" [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ - adr,adr,f_mcr,f_mrc,fmov") - (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] + adr,adr,f_mcr,f_mrc,fmov,neon_move") + (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") + (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] ) (define_insn_and_split "*movdi_aarch64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r, *w,r,*w,w") - (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r,*w,m, m,r,r, *w,r,*w,w") + (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))] "(register_operand (operands[0], DImode) || aarch64_reg_or_zero (operands[1], DImode))" "@ @@ -961,6 +971,7 @@ mov\\t%0, %x1 mov\\t%x0, %1 mov\\t%x0, %1 + mov\\t%w0, %1 # ldr\\t%x0, %1 ldr\\t%d0, %1 @@ -971,7 +982,7 @@ fmov\\t%d0, %x1 fmov\\t%x0, %d1 fmov\\t%d0, %d1 - movi\\t%d0, %1" + * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);" "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)) && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" [(const_int 0)] @@ -979,10 +990,10 @@ aarch64_expand_mov_immediate (operands[0], operands[1]); DONE; }" - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ - adr,adr,f_mcr,f_mrc,fmov,neon_move") - (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") - (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load1,\ + load1,store1,store1,adr,adr,f_mcr,f_mrc,fmov,neon_move") + (set_attr "fp" "*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") + (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] ) (define_insn "insv_imm<mode>" @@ -1062,65 +1073,94 @@ ) (define_insn "*movhf_aarch64" - [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r") - (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))] + [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r") + (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))] "TARGET_FLOAT && (register_operand (operands[0], HFmode) - || aarch64_reg_or_fp_zero (operands[1], HFmode))" + || aarch64_reg_or_fp_float (operands[1], HFmode))" "@ movi\\t%0.4h, #0 - mov\\t%0.h[0], %w1 + fmov\\t%h0, %w1 umov\\t%w0, %1.h[0] mov\\t%0.h[0], %1.h[0] + fmov\\t%h0, %1 + * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); ldr\\t%h0, %1 str\\t%h1, %0 ldrh\\t%w0, %1 strh\\t%w1, %0 mov\\t%w0, %w1" - [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\ - f_loads,f_stores,load1,store1,mov_reg") - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")] + [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \ + neon_move,f_loads,f_stores,load1,store1,mov_reg") + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*") + (set_attr "fp16" "*,yes,*,*,yes,*,*,*,*,*,*")] ) (define_insn "*movsf_aarch64" - [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r") - (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))] + [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r") + (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))] "TARGET_FLOAT && (register_operand (operands[0], SFmode) - || aarch64_reg_or_fp_zero (operands[1], SFmode))" + || aarch64_reg_or_fp_float (operands[1], SFmode))" "@ movi\\t%0.2s, #0 fmov\\t%s0, %w1 fmov\\t%w0, %s1 fmov\\t%s0, %s1 fmov\\t%s0, %1 + * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); ldr\\t%s0, %1 str\\t%s1, %0 ldr\\t%w0, %1 str\\t%w1, %0 - mov\\t%w0, %w1" - [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\ - f_loads,f_stores,load1,store1,mov_reg") - (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] + mov\\t%w0, %w1 + mov\\t%w0, %1" + [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\ + f_loads,f_stores,load1,store1,mov_reg,\ + fconsts") + (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")] ) (define_insn "*movdf_aarch64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r") - (match_operand:DF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))] + [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r") + (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))] "TARGET_FLOAT && (register_operand (operands[0], DFmode) - || aarch64_reg_or_fp_zero (operands[1], DFmode))" + || aarch64_reg_or_fp_float (operands[1], DFmode))" "@ movi\\t%d0, #0 fmov\\t%d0, %x1 fmov\\t%x0, %d1 fmov\\t%d0, %d1 fmov\\t%d0, %1 + * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode); ldr\\t%d0, %1 str\\t%d1, %0 ldr\\t%x0, %1 str\\t%x1, %0 - mov\\t%x0, %x1" - [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\ - f_loadd,f_stored,load1,store1,mov_reg") - (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] + mov\\t%x0, %x1 + mov\\t%x0, %1" + [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\ + f_loadd,f_stored,load1,store1,mov_reg,\ + fconstd") + (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")] +) + +(define_split + [(set (match_operand:GPF_HF 0 "nonimmediate_operand") + (match_operand:GPF_HF 1 "general_operand"))] + "can_create_pseudo_p () + && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode) + && !aarch64_float_const_representable_p (operands[1]) + && aarch64_float_const_rtx_p (operands[1])" + [(const_int 0)] + { + unsigned HOST_WIDE_INT ival; + if (!aarch64_reinterpret_float_as_int (operands[1], &ival)) + FAIL; + + rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode); + emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode)); + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp)); + DONE; + } ) (define_insn "*movtf_aarch64" @@ -1905,6 +1945,17 @@ [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] ) +(define_insn "aarch64_sub<mode>_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (minus:GPI (match_operand:GPI 0 "register_operand" "r") + (match_operand:GPI 1 "aarch64_plus_operand" "r")) + (const_int 0)))] + "" + "cmp\\t%<w>0, %<w>1" + [(set_attr "type" "alus_sreg")] +) + (define_insn "*compare_neg<mode>" [(set (reg:CC_Z CC_REGNUM) (compare:CC_Z @@ -3824,6 +3875,22 @@ [(set_attr "type" "logics_reg,logics_imm")] ) +(define_split + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_mov_imm_operand")) + (const_int 0))) + (clobber (match_operand:SI 2 "register_operand"))] + "" + [(set (match_dup 2) (match_dup 1)) + (set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (match_dup 0) + (match_dup 2)) + (const_int 0)))] +) + (define_insn "*and<mode>3nr_compare0_zextract" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ @@ -3859,6 +3926,26 @@ [(set_attr "type" "logics_shift_imm")] ) +(define_split + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (SHIFT:GPI + (match_operand:GPI 0 "register_operand") + (match_operand:QI 1 "aarch64_shift_imm_<mode>")) + (match_operand:GPI 2 "aarch64_mov_imm_operand")) + (const_int 0))) + (clobber (match_operand:SI 3 "register_operand"))] + "" + [(set (match_dup 3) (match_dup 2)) + (set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (SHIFT:GPI + (match_dup 0) + (match_dup 1)) + (match_dup 3)) + (const_int 0)))] +) + ;; ------------------------------------------------------------------- ;; Shifts ;; ------------------------------------------------------------------- @@ -3942,6 +4029,97 @@ } ) +;; When the LSL, LSR, ASR, ROR instructions operate on all register arguments +;; they truncate the shift/rotate amount by the size of the registers they +;; operate on: 32 for W-regs, 64 for X-regs. This allows us to optimise away +;; such redundant masking instructions. GCC can do that automatically when +;; SHIFT_COUNT_TRUNCATED is true, but we can't enable it for TARGET_SIMD +;; because some of the SISD shift alternatives don't perform this truncations. +;; So this pattern exists to catch such cases. + +(define_insn "*aarch64_<optab>_reg_<mode>3_mask1" + [(set (match_operand:GPI 0 "register_operand" "=r") + (SHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operator 4 "subreg_lowpart_operator" + [(and:GPI (match_operand:GPI 2 "register_operand" "r") + (match_operand 3 "const_int_operand" "n"))])))] + "(~INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0" + "<shift>\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "shift_reg")] +) + +(define_insn_and_split "*aarch64_reg_<mode>3_neg_mask2" + [(set (match_operand:GPI 0 "register_operand" "=&r") + (SHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operator 4 "subreg_lowpart_operator" + [(neg:SI (and:SI (match_operand:SI 2 "register_operand" "r") + (match_operand 3 "const_int_operand" "n")))])))] + "((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0)" + "#" + "&& true" + [(const_int 0)] + { + rtx tmp = (can_create_pseudo_p () ? gen_reg_rtx (SImode) + : operands[0]); + emit_insn (gen_negsi2 (tmp, operands[2])); + + rtx and_op = gen_rtx_AND (SImode, tmp, operands[3]); + rtx subreg_tmp = gen_rtx_SUBREG (GET_MODE (operands[4]), and_op, + SUBREG_BYTE (operands[4])); + emit_insn (gen_<optab><mode>3 (operands[0], operands[1], subreg_tmp)); + DONE; + } +) + +(define_insn_and_split "*aarch64_reg_<mode>3_minus_mask" + [(set (match_operand:GPI 0 "register_operand" "=&r") + (ashift:GPI + (match_operand:GPI 1 "register_operand" "r") + (minus:QI (match_operand 2 "const_int_operand" "n") + (match_operator 5 "subreg_lowpart_operator" + [(and:SI (match_operand:SI 3 "register_operand" "r") + (match_operand 4 "const_int_operand" "n"))]))))] + "((~INTVAL (operands[4]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0) + && INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)" + "#" + "&& true" + [(const_int 0)] + { + rtx tmp = (can_create_pseudo_p () ? gen_reg_rtx (SImode) + : operands[0]); + + emit_insn (gen_negsi2 (tmp, operands[3])); + + rtx and_op = gen_rtx_AND (SImode, tmp, operands[4]); + rtx subreg_tmp = gen_rtx_SUBREG (GET_MODE (operands[5]), and_op, + SUBREG_BYTE (operands[5])); + + emit_insn (gen_ashl<mode>3 (operands[0], operands[1], subreg_tmp)); + DONE; + } +) + +(define_insn "*aarch64_<optab>_reg_di3_mask2" + [(set (match_operand:DI 0 "register_operand" "=r") + (SHIFT:DI + (match_operand:DI 1 "register_operand" "r") + (match_operator 4 "subreg_lowpart_operator" + [(and:SI (match_operand:SI 2 "register_operand" "r") + (match_operand 3 "aarch64_shift_imm_di" "Usd"))])))] + "((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (DImode)-1)) == 0)" +{ + rtx xop[3]; + xop[0] = operands[0]; + xop[1] = operands[1]; + xop[2] = gen_lowpart (GET_MODE (operands[4]), operands[2]); + output_asm_insn ("<shift>\t%x0, %x1, %x2", xop); + return ""; +} + [(set_attr "type" "shift_reg")] +) + ;; Logical left shift using SISD or Integer instruction (define_insn "*aarch64_ashl_sisd_or_int_<mode>3" [(set (match_operand:GPI 0 "register_operand" "=r,r,w,w") diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 0753da32f59..d7b30b0e5ee 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -12162,7 +12162,7 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) /* ARMv8.1-A instrinsics. */ #pragma GCC push_options -#pragma GCC target ("arch=armv8.1-a") +#pragma GCC target ("+nothing+rdma") __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index 88e840f2898..9ce3d4efaf3 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -176,6 +176,12 @@ (and (match_code "const_double") (match_test "aarch64_float_const_representable_p (op)"))) +(define_constraint "Uvi" + "A floating point constant which can be used with a\ + MOVI immediate operation." + (and (match_code "const_double") + (match_test "aarch64_can_const_movi_rtx_p (op, GET_MODE (op))"))) + (define_constraint "Dn" "@internal A constraint that matches vector of immediates." @@ -220,9 +226,17 @@ (define_constraint "Dd" "@internal - A constraint that matches an immediate operand valid for AdvSIMD scalar." + A constraint that matches an integer immediate operand valid\ + for AdvSIMD scalar operations in DImode." + (and (match_code "const_int") + (match_test "aarch64_can_const_movi_rtx_p (op, DImode)"))) + +(define_constraint "Ds" + "@internal + A constraint that matches an integer immediate operand valid\ + for AdvSIMD scalar operations in SImode." (and (match_code "const_int") - (match_test "aarch64_simd_imm_scalar_p (op, GET_MODE (op))"))) + (match_test "aarch64_can_const_movi_rtx_p (op, SImode)"))) (define_address_constraint "Dp" "@internal diff --git a/gcc/config/aarch64/cortex-a57-fma-steering.c b/gcc/config/aarch64/cortex-a57-fma-steering.c index 94d7f9c5869..fa8c56aab02 100644 --- a/gcc/config/aarch64/cortex-a57-fma-steering.c +++ b/gcc/config/aarch64/cortex-a57-fma-steering.c @@ -603,7 +603,7 @@ fma_node::rename (fma_forest *forest) { rtx_insn *insn = this->m_insn; HARD_REG_SET unavailable; - enum machine_mode mode; + machine_mode mode; int reg; if (dump_file) @@ -973,10 +973,17 @@ func_fma_steering::analyze () break; } - /* We didn't find a chain with a def for this instruction. */ - gcc_assert (i < dest_op_info->n_chains); - - this->analyze_fma_fmul_insn (forest, chain, head); + /* Due to implementation of regrename, dest register can slip away + from regrename's analysis. As a result, there is no chain for + the destination register of insn. We simply skip the insn even + it is a fmul/fmac instruction. This can happen when the dest + register is also a source register of insn and one of the below + conditions is satisfied: + 1) the source reg is setup in larger mode than this insn; + 2) the source reg is uninitialized; + 3) the source reg is passed in as parameter. */ + if (i < dest_op_info->n_chains) + this->analyze_fma_fmul_insn (forest, chain, head); } } free (bb_dfs_preorder); diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 43be7fd3611..067cef78533 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -44,6 +44,9 @@ ;; Iterator for all scalar floating point modes (HF, SF, DF) (define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF]) +;; Iterator for all scalar floating point modes (HF, SF, DF) +(define_mode_iterator GPF_HF [HF SF DF]) + ;; Iterator for all scalar floating point modes (HF, SF, DF and TF) (define_mode_iterator GPF_TF_F16 [HF SF DF TF]) diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index cd7ded98663..95d28cfa33c 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -35,6 +35,10 @@ (and (match_code "const_int") (match_test "op == CONST0_RTX (mode)"))) +(define_special_predicate "subreg_lowpart_operator" + (and (match_code "subreg") + (match_test "subreg_lowpart_p (op)"))) + (define_predicate "aarch64_ccmp_immediate" (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), -31, 31)"))) @@ -53,6 +57,11 @@ (ior (match_operand 0 "register_operand") (match_test "op == const0_rtx")))) +(define_predicate "aarch64_reg_or_fp_float" + (ior (match_operand 0 "register_operand") + (and (match_code "const_double") + (match_test "aarch64_float_const_rtx_p (op)")))) + (define_predicate "aarch64_reg_or_fp_zero" (ior (match_operand 0 "register_operand") (and (match_code "const_double") @@ -110,6 +119,10 @@ (ior (match_operand 0 "register_operand") (match_operand 0 "aarch64_logical_immediate"))) +(define_predicate "aarch64_mov_imm_operand" + (and (match_code "const_int") + (match_test "aarch64_move_imm (INTVAL (op), mode)"))) + (define_predicate "aarch64_logical_and_immediate" (and (match_code "const_int") (match_test "aarch64_and_bitmask_imm (INTVAL (op), mode)"))) diff --git a/gcc/config/aarch64/rtems.h b/gcc/config/aarch64/rtems.h index b48e28afda0..07c5679d5c1 100644 --- a/gcc/config/aarch64/rtems.h +++ b/gcc/config/aarch64/rtems.h @@ -1,20 +1,25 @@ /* Definitions for RTEMS based AARCH64 system. Copyright (C) 2016-2017 Free Software Foundation, Inc. - + This file is part of GCC. - + GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. - + GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #define HAS_INIT_SECTION |