summaryrefslogtreecommitdiff
path: root/gcc/config/aarch64
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/aarch64')
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c12
-rw-r--r--gcc/config/aarch64/aarch64-cores.def4
-rw-r--r--gcc/config/aarch64/aarch64-fusion-pairs.def1
-rw-r--r--gcc/config/aarch64/aarch64-option-extensions.def4
-rw-r--r--gcc/config/aarch64/aarch64-protos.h18
-rw-r--r--gcc/config/aarch64/aarch64-simd.md54
-rw-r--r--gcc/config/aarch64/aarch64.c555
-rw-r--r--gcc/config/aarch64/aarch64.h34
-rw-r--r--gcc/config/aarch64/aarch64.md256
-rw-r--r--gcc/config/aarch64/arm_neon.h2
-rw-r--r--gcc/config/aarch64/constraints.md18
-rw-r--r--gcc/config/aarch64/cortex-a57-fma-steering.c17
-rw-r--r--gcc/config/aarch64/iterators.md3
-rw-r--r--gcc/config/aarch64/predicates.md13
-rw-r--r--gcc/config/aarch64/rtems.h17
15 files changed, 718 insertions, 290 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index f09399f4c15..d30009ba441 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -530,7 +530,7 @@ aarch64_mangle_builtin_type (const_tree type)
}
static tree
-aarch64_simd_builtin_std_type (enum machine_mode mode,
+aarch64_simd_builtin_std_type (machine_mode mode,
enum aarch64_type_qualifiers q)
{
#define QUAL_TYPE(M) \
@@ -566,7 +566,7 @@ aarch64_simd_builtin_std_type (enum machine_mode mode,
}
static tree
-aarch64_lookup_simd_builtin_type (enum machine_mode mode,
+aarch64_lookup_simd_builtin_type (machine_mode mode,
enum aarch64_type_qualifiers q)
{
int i;
@@ -585,7 +585,7 @@ aarch64_lookup_simd_builtin_type (enum machine_mode mode,
}
static tree
-aarch64_simd_builtin_type (enum machine_mode mode,
+aarch64_simd_builtin_type (machine_mode mode,
bool unsigned_p, bool poly_p)
{
if (poly_p)
@@ -649,7 +649,7 @@ aarch64_init_simd_builtin_types (void)
for (i = 0; i < nelts; i++)
{
tree eltype = aarch64_simd_types[i].eltype;
- enum machine_mode mode = aarch64_simd_types[i].mode;
+ machine_mode mode = aarch64_simd_types[i].mode;
if (aarch64_simd_types[i].itype == NULL)
{
@@ -1015,7 +1015,7 @@ typedef enum
static rtx
aarch64_simd_expand_args (rtx target, int icode, int have_retval,
tree exp, builtin_simd_arg *args,
- enum machine_mode builtin_mode)
+ machine_mode builtin_mode)
{
rtx pat;
rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand. */
@@ -1040,7 +1040,7 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval,
else
{
tree arg = CALL_EXPR_ARG (exp, opc - have_retval);
- enum machine_mode mode = insn_data[icode].operand[opc].mode;
+ machine_mode mode = insn_data[icode].operand[opc].mode;
op[opc] = expand_normal (arg);
switch (thisarg)
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index f8342ca722d..b8d0ba6b69e 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -65,8 +65,8 @@ AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH
AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
/* Qualcomm ('Q') cores. */
-AARCH64_CORE("falkor", falkor, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1)
-AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1)
+AARCH64_CORE("falkor", falkor, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
+AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
/* Samsung ('S') cores. */
AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def
index f0e6dbcdd81..300cd00e4bf 100644
--- a/gcc/config/aarch64/aarch64-fusion-pairs.def
+++ b/gcc/config/aarch64/aarch64-fusion-pairs.def
@@ -34,5 +34,6 @@ AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK)
AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR)
AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH)
AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC)
+AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH)
#undef AARCH64_FUSION_PAIR
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index c0752ce3470..c4f059ab7c5 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -63,4 +63,8 @@ AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, 0, "fphp asimdhp")
/* Enabling or disabling "rcpc" only changes "rcpc". */
AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, "lrcpc")
+/* Enabling "rdma" also enables "fp", "simd".
+ Disabling "rdma" just disables "rdma". */
+AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, AARCH64_FL_FP | AARCH64_FL_SIMD, 0, "rdma")
+
#undef AARCH64_OPT_EXTENSION
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index bfe44a75e12..beff28e2272 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -319,6 +319,7 @@ unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
int aarch64_branch_cost (bool, bool);
enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
+bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
bool aarch64_constant_address_p (rtx);
bool aarch64_emit_approx_div (rtx, rtx, rtx);
@@ -326,6 +327,7 @@ bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
void aarch64_expand_call (rtx, rtx, bool);
bool aarch64_expand_movmem (rtx *);
bool aarch64_float_const_zero_rtx_p (rtx);
+bool aarch64_float_const_rtx_p (rtx);
bool aarch64_function_arg_regno_p (unsigned);
bool aarch64_fusion_enabled_p (enum aarch64_fusion_pairs);
bool aarch64_gen_movmemqi (rtx *);
@@ -342,8 +344,8 @@ bool aarch64_modes_tieable_p (machine_mode mode1,
bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
bool aarch64_mov_operand_p (rtx, machine_mode);
-int aarch64_simd_attr_length_rglist (enum machine_mode);
-rtx aarch64_reverse_mask (enum machine_mode);
+int aarch64_simd_attr_length_rglist (machine_mode);
+rtx aarch64_reverse_mask (machine_mode);
bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
char *aarch64_output_scalar_simd_mov_immediate (rtx, machine_mode);
char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned);
@@ -351,9 +353,9 @@ bool aarch64_pad_arg_upward (machine_mode, const_tree);
bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
bool aarch64_regno_ok_for_base_p (int, bool);
bool aarch64_regno_ok_for_index_p (int, bool);
+bool aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *fail);
bool aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
bool high);
-bool aarch64_simd_imm_scalar_p (rtx x, machine_mode mode);
bool aarch64_simd_imm_zero_p (rtx, machine_mode);
bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
@@ -411,7 +413,7 @@ void aarch64_save_restore_target_globals (tree);
/* Initialize builtins for SIMD intrinsics. */
void init_aarch64_simd_builtins (void);
-void aarch64_simd_emit_reg_reg_move (rtx *, enum machine_mode, unsigned int);
+void aarch64_simd_emit_reg_reg_move (rtx *, machine_mode, unsigned int);
/* Expand builtins for SIMD intrinsics. */
rtx aarch64_simd_expand_builtin (int, tree, rtx);
@@ -444,7 +446,7 @@ bool aarch64_atomic_ldop_supported_p (enum rtx_code);
void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
-bool aarch64_gen_adjusted_ldpstp (rtx *, bool, enum machine_mode, RTX_CODE);
+bool aarch64_gen_adjusted_ldpstp (rtx *, bool, machine_mode, RTX_CODE);
#endif /* RTX_CODE */
void aarch64_init_builtins (void);
@@ -468,11 +470,11 @@ extern void aarch64_final_prescan_insn (rtx_insn *);
extern bool
aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
-int aarch64_ccmp_mode_to_code (enum machine_mode mode);
+int aarch64_ccmp_mode_to_code (machine_mode mode);
bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset);
-bool aarch64_operands_ok_for_ldpstp (rtx *, bool, enum machine_mode);
-bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, enum machine_mode);
+bool aarch64_operands_ok_for_ldpstp (rtx *, bool, machine_mode);
+bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, machine_mode);
extern void aarch64_asm_output_pool_epilogue (FILE *, const char *,
tree, HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 264a9c047ce..011fcec0795 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1033,6 +1033,18 @@
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
+(define_insn "*aarch64_mla_elt_merge<mode>"
+ [(set (match_operand:VDQHS 0 "register_operand" "=w")
+ (plus:VDQHS
+ (mult:VDQHS (vec_duplicate:VDQHS
+ (match_operand:<VEL> 1 "register_operand" "w"))
+ (match_operand:VDQHS 2 "register_operand" "w"))
+ (match_operand:VDQHS 3 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
+ [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+)
+
(define_insn "aarch64_mls<mode>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
@@ -1080,6 +1092,18 @@
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
+(define_insn "*aarch64_mls_elt_merge<mode>"
+ [(set (match_operand:VDQHS 0 "register_operand" "=w")
+ (minus:VDQHS
+ (match_operand:VDQHS 1 "register_operand" "0")
+ (mult:VDQHS (vec_duplicate:VDQHS
+ (match_operand:<VEL> 2 "register_operand" "w"))
+ (match_operand:VDQHS 3 "register_operand" "w"))))]
+ "TARGET_SIMD"
+ "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
+ [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+)
+
;; Max/Min operations.
(define_insn "<su><maxmin><mode>3"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
@@ -2809,38 +2833,10 @@
(match_operand:VDC 2 "register_operand")]
"TARGET_SIMD"
{
- rtx op1, op2;
- if (BYTES_BIG_ENDIAN)
- {
- op1 = operands[2];
- op2 = operands[1];
- }
- else
- {
- op1 = operands[1];
- op2 = operands[2];
- }
- emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
- DONE;
-}
-)
+ aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
-(define_insn_and_split "aarch64_combine_internal<mode>"
- [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
- (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
- (match_operand:VDC 2 "register_operand" "w")))]
- "TARGET_SIMD"
- "#"
- "&& reload_completed"
- [(const_int 0)]
-{
- if (BYTES_BIG_ENDIAN)
- aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
- else
- aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
DONE;
}
-[(set_attr "type" "multiple")]
)
(define_expand "aarch64_simd_combine<mode>"
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 95592f9fa17..055ebafb830 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -147,6 +147,8 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
const_tree type,
int misalignment,
bool is_packed);
+static machine_mode
+aarch64_simd_container_mode (machine_mode mode, unsigned width);
/* Major revision number of the ARM Architecture implemented by the target. */
unsigned aarch64_architecture_version;
@@ -206,22 +208,6 @@ static const struct cpu_addrcost_table generic_addrcost_table =
0 /* imm_offset */
};
-static const struct cpu_addrcost_table cortexa57_addrcost_table =
-{
- {
- 1, /* hi */
- 0, /* si */
- 0, /* di */
- 1, /* ti */
- },
- 0, /* pre_modify */
- 0, /* post_modify */
- 0, /* register_offset */
- 0, /* register_sextend */
- 0, /* register_zextend */
- 0, /* imm_offset */
-};
-
static const struct cpu_addrcost_table exynosm1_addrcost_table =
{
{
@@ -254,22 +240,6 @@ static const struct cpu_addrcost_table xgene1_addrcost_table =
0, /* imm_offset */
};
-static const struct cpu_addrcost_table qdf24xx_addrcost_table =
-{
- {
- 1, /* hi */
- 0, /* si */
- 0, /* di */
- 1, /* ti */
- },
- 0, /* pre_modify */
- 0, /* post_modify */
- 0, /* register_offset */
- 0, /* register_sextend */
- 0, /* register_zextend */
- 0 /* imm_offset */
-};
-
static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
{
{
@@ -390,13 +360,13 @@ static const struct cpu_vector_cost thunderx_vector_cost =
3, /* scalar_load_cost */
1, /* scalar_store_cost */
4, /* vec_int_stmt_cost */
- 4, /* vec_fp_stmt_cost */
+ 1, /* vec_fp_stmt_cost */
4, /* vec_permute_cost */
2, /* vec_to_scalar_cost */
2, /* scalar_to_vec_cost */
3, /* vec_align_load_cost */
- 10, /* vec_unalign_load_cost */
- 10, /* vec_unalign_store_cost */
+ 5, /* vec_unalign_load_cost */
+ 5, /* vec_unalign_store_cost */
1, /* vec_store_cost */
3, /* cond_taken_branch_cost */
3 /* cond_not_taken_branch_cost */
@@ -488,20 +458,6 @@ static const struct cpu_branch_cost generic_branch_cost =
3 /* Unpredictable. */
};
-/* Branch costs for Cortex-A57. */
-static const struct cpu_branch_cost cortexa57_branch_cost =
-{
- 1, /* Predictable. */
- 3 /* Unpredictable. */
-};
-
-/* Branch costs for Vulcan. */
-static const struct cpu_branch_cost thunderx2t99_branch_cost =
-{
- 1, /* Predictable. */
- 3 /* Unpredictable. */
-};
-
/* Generic approximation modes. */
static const cpu_approx_modes generic_approx_modes =
{
@@ -612,7 +568,7 @@ static const struct tune_params cortexa35_tunings =
&generic_addrcost_table,
&cortexa53_regmove_cost,
&generic_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost */
1, /* issue_rate */
@@ -638,7 +594,7 @@ static const struct tune_params cortexa53_tunings =
&generic_addrcost_table,
&cortexa53_regmove_cost,
&generic_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost */
2, /* issue_rate */
@@ -661,10 +617,10 @@ static const struct tune_params cortexa53_tunings =
static const struct tune_params cortexa57_tunings =
{
&cortexa57_extra_costs,
- &cortexa57_addrcost_table,
+ &generic_addrcost_table,
&cortexa57_regmove_cost,
&cortexa57_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost */
3, /* issue_rate */
@@ -687,10 +643,10 @@ static const struct tune_params cortexa57_tunings =
static const struct tune_params cortexa72_tunings =
{
&cortexa57_extra_costs,
- &cortexa57_addrcost_table,
+ &generic_addrcost_table,
&cortexa57_regmove_cost,
&cortexa57_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost */
3, /* issue_rate */
@@ -713,10 +669,10 @@ static const struct tune_params cortexa72_tunings =
static const struct tune_params cortexa73_tunings =
{
&cortexa57_extra_costs,
- &cortexa57_addrcost_table,
+ &generic_addrcost_table,
&cortexa57_regmove_cost,
&cortexa57_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost. */
2, /* issue_rate. */
@@ -842,7 +798,7 @@ static const struct tune_params xgene1_tunings =
static const struct tune_params qdf24xx_tunings =
{
&qdf24xx_extra_costs,
- &qdf24xx_addrcost_table,
+ &generic_addrcost_table,
&qdf24xx_regmove_cost,
&generic_vector_cost,
&generic_branch_cost,
@@ -871,11 +827,12 @@ static const struct tune_params thunderx2t99_tunings =
&thunderx2t99_addrcost_table,
&thunderx2t99_regmove_cost,
&thunderx2t99_vector_cost,
- &thunderx2t99_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost. */
4, /* issue_rate. */
- (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC), /* fusible_ops */
+ (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
+ | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
16, /* function_align. */
8, /* jump_align. */
16, /* loop_align. */
@@ -1031,7 +988,7 @@ static reg_class_t
aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
reg_class_t best_class)
{
- enum machine_mode mode;
+ machine_mode mode;
if (allocno_class != ALL_REGS)
return allocno_class;
@@ -1044,7 +1001,7 @@ aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
}
static unsigned int
-aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
+aarch64_min_divisions_for_recip_mul (machine_mode mode)
{
if (GET_MODE_UNIT_SIZE (mode) == 4)
return aarch64_tune_params.min_div_recip_mul_sf;
@@ -1053,7 +1010,7 @@ aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
static int
aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
- enum machine_mode mode)
+ machine_mode mode)
{
if (VECTOR_MODE_P (mode))
return aarch64_tune_params.vec_reassoc_width;
@@ -1732,41 +1689,41 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
machine_mode dst_mode = GET_MODE (dst);
gcc_assert (VECTOR_MODE_P (dst_mode));
+ gcc_assert (register_operand (dst, dst_mode)
+ && register_operand (src1, src_mode)
+ && register_operand (src2, src_mode));
- if (REG_P (dst) && REG_P (src1) && REG_P (src2))
- {
- rtx (*gen) (rtx, rtx, rtx);
-
- switch (src_mode)
- {
- case V8QImode:
- gen = gen_aarch64_simd_combinev8qi;
- break;
- case V4HImode:
- gen = gen_aarch64_simd_combinev4hi;
- break;
- case V2SImode:
- gen = gen_aarch64_simd_combinev2si;
- break;
- case V4HFmode:
- gen = gen_aarch64_simd_combinev4hf;
- break;
- case V2SFmode:
- gen = gen_aarch64_simd_combinev2sf;
- break;
- case DImode:
- gen = gen_aarch64_simd_combinedi;
- break;
- case DFmode:
- gen = gen_aarch64_simd_combinedf;
- break;
- default:
- gcc_unreachable ();
- }
+ rtx (*gen) (rtx, rtx, rtx);
- emit_insn (gen (dst, src1, src2));
- return;
+ switch (src_mode)
+ {
+ case V8QImode:
+ gen = gen_aarch64_simd_combinev8qi;
+ break;
+ case V4HImode:
+ gen = gen_aarch64_simd_combinev4hi;
+ break;
+ case V2SImode:
+ gen = gen_aarch64_simd_combinev2si;
+ break;
+ case V4HFmode:
+ gen = gen_aarch64_simd_combinev4hf;
+ break;
+ case V2SFmode:
+ gen = gen_aarch64_simd_combinev2sf;
+ break;
+ case DImode:
+ gen = gen_aarch64_simd_combinedi;
+ break;
+ case DFmode:
+ gen = gen_aarch64_simd_combinedf;
+ break;
+ default:
+ gcc_unreachable ();
}
+
+ emit_insn (gen (dst, src1, src2));
+ return;
}
/* Split a complex SIMD move. */
@@ -1875,6 +1832,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
return 1;
}
+ /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
+ (with XXXX non-zero). In that case check to see if the move can be done in
+ a smaller mode. */
+ val2 = val & 0xffffffff;
+ if (mode == DImode
+ && aarch64_move_imm (val2, SImode)
+ && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
+ {
+ if (generate)
+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+
+ /* Check if we have to emit a second instruction by checking to see
+ if any of the upper 32 bits of the original DI mode value is set. */
+ if (val == val2)
+ return 1;
+
+ i = (val >> 48) ? 48 : 32;
+
+ if (generate)
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
+
+ return 2;
+ }
+
if ((val >> 32) == 0 || mode == SImode)
{
if (generate)
@@ -2002,6 +1984,8 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
gcc_assert (can_create_pseudo_p ());
base = gen_reg_rtx (ptr_mode);
aarch64_expand_mov_immediate (base, XEXP (mem, 0));
+ if (ptr_mode != Pmode)
+ base = convert_memory_address (Pmode, base);
mem = gen_rtx_MEM (ptr_mode, base);
}
@@ -4720,6 +4704,69 @@ aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
return true;
}
+/* Return the binary representation of floating point constant VALUE in INTVAL.
+ If the value cannot be converted, return false without setting INTVAL.
+ The conversion is done in the given MODE. */
+bool
+aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
+{
+
+ /* We make a general exception for 0. */
+ if (aarch64_float_const_zero_rtx_p (value))
+ {
+ *intval = 0;
+ return true;
+ }
+
+ machine_mode mode = GET_MODE (value);
+ if (GET_CODE (value) != CONST_DOUBLE
+ || !SCALAR_FLOAT_MODE_P (mode)
+ || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
+ /* Only support up to DF mode. */
+ || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
+ return false;
+
+ unsigned HOST_WIDE_INT ival = 0;
+
+ long res[2];
+ real_to_target (res,
+ CONST_DOUBLE_REAL_VALUE (value),
+ REAL_MODE_FORMAT (mode));
+
+ ival = zext_hwi (res[0], 32);
+ if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (DFmode))
+ ival |= (zext_hwi (res[1], 32) << 32);
+
+ *intval = ival;
+ return true;
+}
+
+/* Return TRUE if rtx X is an immediate constant that can be moved using a
+ single MOV(+MOVK) followed by an FMOV. */
+bool
+aarch64_float_const_rtx_p (rtx x)
+{
+ machine_mode mode = GET_MODE (x);
+ if (mode == VOIDmode)
+ return false;
+
+ /* Determine whether it's cheaper to write float constants as
+ mov/movk pairs over ldr/adrp pairs. */
+ unsigned HOST_WIDE_INT ival;
+
+ if (GET_CODE (x) == CONST_DOUBLE
+ && SCALAR_FLOAT_MODE_P (mode)
+ && aarch64_reinterpret_float_as_int (x, &ival))
+ {
+ machine_mode imode = mode == HFmode ? SImode : int_mode_for_mode (mode);
+ int num_instr = aarch64_internal_mov_immediate
+ (NULL_RTX, gen_int_mode (ival, imode), false, imode);
+ return num_instr < 3;
+ }
+
+ return false;
+}
+
/* Return TRUE if rtx X is immediate constant 0.0 */
bool
aarch64_float_const_zero_rtx_p (rtx x)
@@ -4732,6 +4779,49 @@ aarch64_float_const_zero_rtx_p (rtx x)
return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
}
+/* Return TRUE if rtx X is immediate constant that fits in a single
+ MOVI immediate operation. */
+bool
+aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
+{
+ if (!TARGET_SIMD)
+ return false;
+
+ /* We make a general exception for 0. */
+ if (aarch64_float_const_zero_rtx_p (x))
+ return true;
+
+ machine_mode vmode, imode;
+ unsigned HOST_WIDE_INT ival;
+
+ if (GET_CODE (x) == CONST_DOUBLE
+ && SCALAR_FLOAT_MODE_P (mode))
+ {
+ if (!aarch64_reinterpret_float_as_int (x, &ival))
+ return false;
+
+ imode = int_mode_for_mode (mode);
+ }
+ else if (GET_CODE (x) == CONST_INT
+ && SCALAR_INT_MODE_P (mode))
+ {
+ imode = mode;
+ ival = INTVAL (x);
+ }
+ else
+ return false;
+
+ /* use a 64 bit mode for everything except for DI/DF mode, where we use
+ a 128 bit vector mode. */
+ int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
+
+ vmode = aarch64_simd_container_mode (imode, width);
+ rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
+
+ return aarch64_simd_valid_immediate (v_op, vmode, false, NULL);
+}
+
+
/* Return the fixed registers used for condition codes. */
static bool
@@ -4884,7 +4974,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
}
static int
-aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
+aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
int
aarch64_get_condition_code (rtx x)
@@ -4898,7 +4988,7 @@ aarch64_get_condition_code (rtx x)
}
static int
-aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
+aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
{
switch (mode)
{
@@ -5050,12 +5140,43 @@ static const int aarch64_nzcv_codes[] =
0 /* NV, Any. */
};
+/* Print operand X to file F in a target specific manner according to CODE.
+ The acceptable formatting commands given by CODE are:
+ 'c': An integer or symbol address without a preceding #
+ sign.
+ 'e': Print the sign/zero-extend size as a character 8->b,
+ 16->h, 32->w.
+ 'p': Prints N such that 2^N == X (X must be power of 2 and
+ const int).
+ 'P': Print the number of non-zero bits in X (a const_int).
+ 'H': Print the higher numbered register of a pair (TImode)
+ of regs.
+ 'm': Print a condition (eq, ne, etc).
+ 'M': Same as 'm', but invert condition.
+ 'b/h/s/d/q': Print a scalar FP/SIMD register name.
+ 'S/T/U/V': Print a FP/SIMD register name for a register list.
+ The register printed is the FP/SIMD register name
+ of X + 0/1/2/3 for S/T/U/V.
+ 'R': Print a scalar FP/SIMD register name + 1.
+ 'X': Print bottom 16 bits of integer constant in hex.
+ 'w/x': Print a general register name or the zero register
+ (32-bit or 64-bit).
+ '0': Print a normal operand, if it's a general register,
+ then we assume DImode.
+ 'k': Print NZCV for conditional compare instructions.
+ 'A': Output address constant representing the first
+ argument of X, specifying a relocation offset
+ if appropriate.
+ 'L': Output constant address specified by X
+ with a relocation offset if appropriate.
+ 'G': Prints address of X, specifying a PC relative
+ relocation mode if appropriate. */
+
static void
aarch64_print_operand (FILE *f, rtx x, int code)
{
switch (code)
{
- /* An integer or symbol address without a preceding # sign. */
case 'c':
switch (GET_CODE (x))
{
@@ -5082,7 +5203,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'e':
- /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
{
int n;
@@ -5115,7 +5235,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
{
int n;
- /* Print N such that 2^N == X. */
if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
{
output_operand_lossage ("invalid operand for '%%%c'", code);
@@ -5127,7 +5246,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'P':
- /* Print the number of non-zero bits in X (a const_int). */
if (!CONST_INT_P (x))
{
output_operand_lossage ("invalid operand for '%%%c'", code);
@@ -5138,7 +5256,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'H':
- /* Print the higher numbered register of a pair (TImode) of regs. */
if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
{
output_operand_lossage ("invalid operand for '%%%c'", code);
@@ -5152,8 +5269,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
case 'm':
{
int cond_code;
- /* Print a condition (eq, ne, etc) or its inverse. */
-
/* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
if (x == const_true_rtx)
{
@@ -5181,7 +5296,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
case 's':
case 'd':
case 'q':
- /* Print a scalar FP/SIMD register name. */
if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
{
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
@@ -5194,7 +5308,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
case 'T':
case 'U':
case 'V':
- /* Print the first FP/SIMD register name in a list. */
if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
{
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
@@ -5204,7 +5317,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'R':
- /* Print a scalar FP/SIMD register name + 1. */
if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
{
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
@@ -5214,7 +5326,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'X':
- /* Print bottom 16 bits of integer constant in hex. */
if (!CONST_INT_P (x))
{
output_operand_lossage ("invalid operand for '%%%c'", code);
@@ -5225,8 +5336,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
case 'w':
case 'x':
- /* Print a general register name or the zero register (32-bit or
- 64-bit). */
if (x == const0_rtx
|| (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
{
@@ -5249,8 +5358,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
/* Fall through */
case 0:
- /* Print a normal operand, if it's a general register, then we
- assume DImode. */
if (x == NULL)
{
output_operand_lossage ("missing operand");
@@ -5265,6 +5372,8 @@ aarch64_print_operand (FILE *f, rtx x, int code)
case MEM:
output_address (GET_MODE (x), XEXP (x, 0));
+ /* Check all memory references are Pmode - even with ILP32. */
+ gcc_assert (GET_MODE (XEXP (x, 0)) == Pmode);
break;
case CONST:
@@ -5401,7 +5510,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'G':
-
switch (aarch64_classify_symbolic_expression (x))
{
case SYMBOL_TLSLE24:
@@ -5416,7 +5524,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
case 'k':
{
HOST_WIDE_INT cond_code;
- /* Print nzcv. */
if (!CONST_INT_P (x))
{
@@ -5909,12 +6016,6 @@ aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
return NO_REGS;
}
- /* If it's an integer immediate that MOVI can't handle, then
- FP_REGS is not an option, so we return NO_REGS instead. */
- if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
- && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
- return NO_REGS;
-
/* Register eliminiation can result in a request for
SP+constant->FP_REGS. We cannot support such operations which
use SP as source and an FP_REG as destination, so reject out
@@ -6864,6 +6965,25 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
return true;
case CONST_DOUBLE:
+
+ /* First determine number of instructions to do the move
+ as an integer constant. */
+ if (!aarch64_float_const_representable_p (x)
+ && !aarch64_can_const_movi_rtx_p (x, mode)
+ && aarch64_float_const_rtx_p (x))
+ {
+ unsigned HOST_WIDE_INT ival;
+ bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
+ gcc_assert (succeed);
+
+ machine_mode imode = mode == HFmode ? SImode
+ : int_mode_for_mode (mode);
+ int ncost = aarch64_internal_mov_immediate
+ (NULL_RTX, gen_int_mode (ival, imode), false, imode);
+ *cost += COSTS_N_INSNS (ncost);
+ return true;
+ }
+
if (speed)
{
/* mov[df,sf]_aarch64. */
@@ -7537,17 +7657,26 @@ cost_plus:
}
else
{
- if (speed)
+ if (VECTOR_MODE_P (mode))
{
- if (VECTOR_MODE_P (mode))
- {
- /* Vector shift (register). */
- *cost += extra_cost->vect.alu;
- }
- else
+ if (speed)
+ /* Vector shift (register). */
+ *cost += extra_cost->vect.alu;
+ }
+ else
+ {
+ if (speed)
+ /* LSLV. */
+ *cost += extra_cost->alu.shift_reg;
+
+ if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
+ && CONST_INT_P (XEXP (op1, 1))
+ && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
{
- /* LSLV. */
- *cost += extra_cost->alu.shift_reg;
+ *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
+ /* We already demanded XEXP (op1, 0) to be REG_P, so
+ don't recurse into it. */
+ return true;
}
}
return false; /* All arguments need to be in registers. */
@@ -7576,14 +7705,27 @@ cost_plus:
}
else
{
-
- /* ASR (register) and friends. */
- if (speed)
+ if (VECTOR_MODE_P (mode))
{
- if (VECTOR_MODE_P (mode))
+ if (speed)
+ /* Vector shift (register). */
*cost += extra_cost->vect.alu;
- else
+ }
+ else
+ {
+ if (speed)
+ /* ASR (register) and friends. */
*cost += extra_cost->alu.shift_reg;
+
+ if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
+ && CONST_INT_P (XEXP (op1, 1))
+ && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
+ {
+ *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
+ /* We already demanded XEXP (op1, 0) to be REG_P, so
+ don't recurse into it. */
+ return true;
+ }
}
return false; /* All arguments need to be in registers. */
}
@@ -10151,7 +10293,7 @@ aarch64_classify_symbol (rtx x, rtx offset)
/* This is alright even in PIC code as the constant
pool reference is always PC relative and within
the same translation unit. */
- if (CONSTANT_POOL_ADDRESS_P (x))
+ if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
return SYMBOL_SMALL_ABSOLUTE;
else
return SYMBOL_FORCE_TO_MEM;
@@ -10186,18 +10328,16 @@ aarch64_legitimate_pic_operand_p (rtx x)
/* Return true if X holds either a quarter-precision or
floating-point +0.0 constant. */
static bool
-aarch64_valid_floating_const (machine_mode mode, rtx x)
+aarch64_valid_floating_const (rtx x)
{
if (!CONST_DOUBLE_P (x))
return false;
- if (aarch64_float_const_zero_rtx_p (x))
+ /* This call determines which constants can be used in mov<mode>
+ as integer moves instead of constant loads. */
+ if (aarch64_float_const_rtx_p (x))
return true;
- /* We only handle moving 0.0 to a TFmode register. */
- if (!(mode == SFmode || mode == DFmode))
- return false;
-
return aarch64_float_const_representable_p (x);
}
@@ -10209,11 +10349,15 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
return false;
- /* This could probably go away because
- we now decompose CONST_INTs according to expand_mov_immediate. */
+ /* For these cases we never want to use a literal load.
+ As such we have to prevent the compiler from forcing these
+ to memory. */
if ((GET_CODE (x) == CONST_VECTOR
&& aarch64_simd_valid_immediate (x, mode, false, NULL))
- || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
+ || CONST_INT_P (x)
+ || aarch64_valid_floating_const (x)
+ || aarch64_can_const_movi_rtx_p (x, mode)
+ || aarch64_float_const_rtx_p (x))
return !targetm.cannot_force_const_mem (mode, x);
if (GET_CODE (x) == HIGH
@@ -11496,23 +11640,6 @@ aarch64_mask_from_zextract_ops (rtx width, rtx pos)
}
bool
-aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
-{
- HOST_WIDE_INT imm = INTVAL (x);
- int i;
-
- for (i = 0; i < 8; i++)
- {
- unsigned int byte = imm & 0xff;
- if (byte != 0xff && byte != 0)
- return false;
- imm >>= 8;
- }
-
- return true;
-}
-
-bool
aarch64_mov_operand_p (rtx x, machine_mode mode)
{
if (GET_CODE (x) == HIGH
@@ -11666,7 +11793,7 @@ aarch64_simd_mem_operand_p (rtx op)
COUNT is the number of components into which the copy needs to be
decomposed. */
void
-aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
+aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
unsigned int count)
{
unsigned int i;
@@ -11687,7 +11814,7 @@ aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
one of VSTRUCT modes: OI, CI, or XI. */
int
-aarch64_simd_attr_length_rglist (enum machine_mode mode)
+aarch64_simd_attr_length_rglist (machine_mode mode)
{
return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
}
@@ -12119,10 +12246,8 @@ aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
static void
aarch64_emit_unlikely_jump (rtx insn)
{
- int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
-
rtx_insn *jump = emit_jump_insn (insn);
- add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
+ add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
}
/* Expand a compare and swap pattern. */
@@ -12905,15 +13030,28 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
}
char*
-aarch64_output_scalar_simd_mov_immediate (rtx immediate,
- machine_mode mode)
+aarch64_output_scalar_simd_mov_immediate (rtx immediate, machine_mode mode)
{
+
+ /* If a floating point number was passed and we desire to use it in an
+ integer mode do the conversion to integer. */
+ if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
+ {
+ unsigned HOST_WIDE_INT ival;
+ if (!aarch64_reinterpret_float_as_int (immediate, &ival))
+ gcc_unreachable ();
+ immediate = gen_int_mode (ival, mode);
+ }
+
machine_mode vmode;
+ /* use a 64 bit mode for everything except for DI/DF mode, where we use
+ a 128 bit vector mode. */
+ int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
gcc_assert (!VECTOR_MODE_P (mode));
- vmode = aarch64_simd_container_mode (mode, 64);
+ vmode = aarch64_simd_container_mode (mode, width);
rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
- return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
+ return aarch64_output_simd_mov_immediate (v_op, vmode, width);
}
/* Split operands into moves from op[1] + op[2] into op[0]. */
@@ -13678,7 +13816,7 @@ aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
}
rtx
-aarch64_reverse_mask (enum machine_mode mode)
+aarch64_reverse_mask (machine_mode mode)
{
/* We have to reverse each vector because we dont have
a permuted load that can reverse-load according to ABI rules. */
@@ -14278,13 +14416,68 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
{
enum attr_type prev_type = get_attr_type (prev);
- /* FIXME: this misses some which is considered simple arthematic
- instructions for ThunderX. Simple shifts are missed here. */
- if (prev_type == TYPE_ALUS_SREG
- || prev_type == TYPE_ALUS_IMM
- || prev_type == TYPE_LOGICS_REG
- || prev_type == TYPE_LOGICS_IMM)
- return true;
+ unsigned int condreg1, condreg2;
+ rtx cc_reg_1;
+ aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
+ cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
+
+ if (reg_referenced_p (cc_reg_1, PATTERN (curr))
+ && prev
+ && modified_in_p (cc_reg_1, prev))
+ {
+ /* FIXME: this misses some which is considered simple arthematic
+ instructions for ThunderX. Simple shifts are missed here. */
+ if (prev_type == TYPE_ALUS_SREG
+ || prev_type == TYPE_ALUS_IMM
+ || prev_type == TYPE_LOGICS_REG
+ || prev_type == TYPE_LOGICS_IMM)
+ return true;
+ }
+ }
+
+ if (prev_set
+ && curr_set
+ && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
+ && any_condjump_p (curr))
+ {
+ /* We're trying to match:
+ prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
+ curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
+ (const_int 0))
+ (label_ref ("SYM"))
+ (pc)) */
+ if (SET_DEST (curr_set) == (pc_rtx)
+ && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
+ && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
+ && REG_P (SET_DEST (prev_set))
+ && REGNO (SET_DEST (prev_set))
+ == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
+ {
+ /* Fuse ALU operations followed by conditional branch instruction. */
+ switch (get_attr_type (prev))
+ {
+ case TYPE_ALU_IMM:
+ case TYPE_ALU_SREG:
+ case TYPE_ADC_REG:
+ case TYPE_ADC_IMM:
+ case TYPE_ADCS_REG:
+ case TYPE_ADCS_IMM:
+ case TYPE_LOGIC_REG:
+ case TYPE_LOGIC_IMM:
+ case TYPE_CSEL:
+ case TYPE_ADR:
+ case TYPE_MOV_IMM:
+ case TYPE_SHIFT_REG:
+ case TYPE_SHIFT_IMM:
+ case TYPE_BFM:
+ case TYPE_RBIT:
+ case TYPE_REV:
+ case TYPE_EXTEND:
+ return true;
+
+ default:;
+ }
+ }
}
return false;
@@ -14468,7 +14661,7 @@ aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
bool
aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
- enum machine_mode mode)
+ machine_mode mode)
{
HOST_WIDE_INT offval_1, offval_2, msize;
enum reg_class rclass_1, rclass_2;
@@ -14575,7 +14768,7 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
bool
aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
- enum machine_mode mode)
+ machine_mode mode)
{
enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
@@ -14709,7 +14902,7 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
bool
aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
- enum machine_mode mode, RTX_CODE code)
+ machine_mode mode, RTX_CODE code)
{
rtx base, offset, t1, t2;
rtx mem_1, mem_2, mem_3, mem_4;
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 3b3f27e2f95..7f91edb5713 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -98,14 +98,24 @@
&& (ALIGN) < BITS_PER_WORD) \
? BITS_PER_WORD : ALIGN)
-#define DATA_ALIGNMENT(EXP, ALIGN) \
- ((((ALIGN) < BITS_PER_WORD) \
- && (TREE_CODE (EXP) == ARRAY_TYPE \
- || TREE_CODE (EXP) == UNION_TYPE \
- || TREE_CODE (EXP) == RECORD_TYPE)) \
- ? BITS_PER_WORD : (ALIGN))
-
-#define LOCAL_ALIGNMENT(EXP, ALIGN) DATA_ALIGNMENT(EXP, ALIGN)
+/* Align definitions of arrays, unions and structures so that
+ initializations and copies can be made more efficient. This is not
+ ABI-changing, so it only affects places where we can see the
+ definition. Increasing the alignment tends to introduce padding,
+ so don't do this when optimizing for size/conserving stack space. */
+#define AARCH64_EXPAND_ALIGNMENT(COND, EXP, ALIGN) \
+ (((COND) && ((ALIGN) < BITS_PER_WORD) \
+ && (TREE_CODE (EXP) == ARRAY_TYPE \
+ || TREE_CODE (EXP) == UNION_TYPE \
+ || TREE_CODE (EXP) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+/* Align global data. */
+#define DATA_ALIGNMENT(EXP, ALIGN) \
+ AARCH64_EXPAND_ALIGNMENT (!optimize_size, EXP, ALIGN)
+
+/* Similarly, make sure that objects on the stack are sensibly aligned. */
+#define LOCAL_ALIGNMENT(EXP, ALIGN) \
+ AARCH64_EXPAND_ALIGNMENT (!flag_conserve_stack, EXP, ALIGN)
#define STRUCTURE_SIZE_BOUNDARY 8
@@ -134,7 +144,8 @@ extern unsigned aarch64_architecture_version;
#define AARCH64_FL_CRC (1 << 3) /* Has CRC. */
/* ARMv8.1-A architecture extensions. */
#define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */
-#define AARCH64_FL_V8_1 (1 << 5) /* Has ARMv8.1-A extensions. */
+#define AARCH64_FL_RDMA (1 << 5) /* Has Round Double Multiply Add. */
+#define AARCH64_FL_V8_1 (1 << 6) /* Has ARMv8.1-A extensions. */
/* ARMv8.2-A architecture extensions. */
#define AARCH64_FL_V8_2 (1 << 8) /* Has ARMv8.2-A features. */
#define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16 extensions. */
@@ -151,7 +162,8 @@ extern unsigned aarch64_architecture_version;
/* Architecture flags that effect instruction selection. */
#define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD)
#define AARCH64_FL_FOR_ARCH8_1 \
- (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC | AARCH64_FL_V8_1)
+ (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \
+ | AARCH64_FL_RDMA | AARCH64_FL_V8_1)
#define AARCH64_FL_FOR_ARCH8_2 \
(AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2)
#define AARCH64_FL_FOR_ARCH8_3 \
@@ -164,7 +176,7 @@ extern unsigned aarch64_architecture_version;
#define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP)
#define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD)
#define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE)
-#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_V8_1)
+#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA)
#define AARCH64_ISA_V8_2 (aarch64_isa_flags & AARCH64_FL_V8_2)
#define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16)
#define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 6bdbf650d92..fc799479c81 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -181,6 +181,11 @@
;; will be disabled when !TARGET_FLOAT.
(define_attr "fp" "no,yes" (const_string "no"))
+;; Attribute that specifies whether or not the instruction touches half
+;; precision fp registers. When this is set to yes for an alternative,
+;; that alternative will be disabled when !TARGET_FP_F16INST.
+(define_attr "fp16" "no,yes" (const_string "no"))
+
;; Attribute that specifies whether or not the instruction touches simd
;; registers. When this is set to yes for an alternative, that alternative
;; will be disabled when !TARGET_SIMD.
@@ -194,11 +199,14 @@
;; registers when -mgeneral-regs-only is specified.
(define_attr "enabled" "no,yes"
(cond [(ior
- (and (eq_attr "fp" "yes")
- (eq (symbol_ref "TARGET_FLOAT") (const_int 0)))
- (and (eq_attr "simd" "yes")
- (eq (symbol_ref "TARGET_SIMD") (const_int 0))))
- (const_string "no")
+ (ior
+ (and (eq_attr "fp" "yes")
+ (eq (symbol_ref "TARGET_FLOAT") (const_int 0)))
+ (and (eq_attr "simd" "yes")
+ (eq (symbol_ref "TARGET_SIMD") (const_int 0))))
+ (and (eq_attr "fp16" "yes")
+ (eq (symbol_ref "TARGET_FP_F16INST") (const_int 0))))
+ (const_string "no")
] (const_string "yes")))
;; Attribute that specifies whether we are dealing with a branch to a
@@ -920,8 +928,8 @@
)
(define_insn_and_split "*movsi_aarch64"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w,r,*w")
- (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w, r,*w,w")
+ (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Ds"))]
"(register_operand (operands[0], SImode)
|| aarch64_reg_or_zero (operands[1], SImode))"
"@
@@ -938,8 +946,9 @@
adrp\\t%x0, %A1
fmov\\t%s0, %w1
fmov\\t%w0, %s1
- fmov\\t%s0, %s1"
- "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
+ fmov\\t%s0, %s1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
+ "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
[(const_int 0)]
"{
@@ -947,13 +956,14 @@
DONE;
}"
[(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
- adr,adr,f_mcr,f_mrc,fmov")
- (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
+ adr,adr,f_mcr,f_mrc,fmov,neon_move")
+ (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+ (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
)
(define_insn_and_split "*movdi_aarch64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r, *w,r,*w,w")
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r,*w,m, m,r,r, *w,r,*w,w")
+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))]
"(register_operand (operands[0], DImode)
|| aarch64_reg_or_zero (operands[1], DImode))"
"@
@@ -961,6 +971,7 @@
mov\\t%0, %x1
mov\\t%x0, %1
mov\\t%x0, %1
+ mov\\t%w0, %1
#
ldr\\t%x0, %1
ldr\\t%d0, %1
@@ -971,7 +982,7 @@
fmov\\t%d0, %x1
fmov\\t%x0, %d1
fmov\\t%d0, %d1
- movi\\t%d0, %1"
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
"(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))
&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
[(const_int 0)]
@@ -979,10 +990,10 @@
aarch64_expand_mov_immediate (operands[0], operands[1]);
DONE;
}"
- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
- adr,adr,f_mcr,f_mrc,fmov,neon_move")
- (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
- (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load1,\
+ load1,store1,store1,adr,adr,f_mcr,f_mrc,fmov,neon_move")
+ (set_attr "fp" "*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+ (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
)
(define_insn "insv_imm<mode>"
@@ -1062,65 +1073,94 @@
)
(define_insn "*movhf_aarch64"
- [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r")
- (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))]
+ [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r")
+ (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))]
"TARGET_FLOAT && (register_operand (operands[0], HFmode)
- || aarch64_reg_or_fp_zero (operands[1], HFmode))"
+ || aarch64_reg_or_fp_float (operands[1], HFmode))"
"@
movi\\t%0.4h, #0
- mov\\t%0.h[0], %w1
+ fmov\\t%h0, %w1
umov\\t%w0, %1.h[0]
mov\\t%0.h[0], %1.h[0]
+ fmov\\t%h0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
ldr\\t%h0, %1
str\\t%h1, %0
ldrh\\t%w0, %1
strh\\t%w1, %0
mov\\t%w0, %w1"
- [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
- f_loads,f_stores,load1,store1,mov_reg")
- (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
+ [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
+ neon_move,f_loads,f_stores,load1,store1,mov_reg")
+ (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")
+ (set_attr "fp16" "*,yes,*,*,yes,*,*,*,*,*,*")]
)
(define_insn "*movsf_aarch64"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r")
- (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r")
+ (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
"TARGET_FLOAT && (register_operand (operands[0], SFmode)
- || aarch64_reg_or_fp_zero (operands[1], SFmode))"
+ || aarch64_reg_or_fp_float (operands[1], SFmode))"
"@
movi\\t%0.2s, #0
fmov\\t%s0, %w1
fmov\\t%w0, %s1
fmov\\t%s0, %s1
fmov\\t%s0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
ldr\\t%s0, %1
str\\t%s1, %0
ldr\\t%w0, %1
str\\t%w1, %0
- mov\\t%w0, %w1"
- [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
- f_loads,f_stores,load1,store1,mov_reg")
- (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+ mov\\t%w0, %w1
+ mov\\t%w0, %1"
+ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
+ f_loads,f_stores,load1,store1,mov_reg,\
+ fconsts")
+ (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
)
(define_insn "*movdf_aarch64"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r")
- (match_operand:DF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
+ (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
"TARGET_FLOAT && (register_operand (operands[0], DFmode)
- || aarch64_reg_or_fp_zero (operands[1], DFmode))"
+ || aarch64_reg_or_fp_float (operands[1], DFmode))"
"@
movi\\t%d0, #0
fmov\\t%d0, %x1
fmov\\t%x0, %d1
fmov\\t%d0, %d1
fmov\\t%d0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
ldr\\t%d0, %1
str\\t%d1, %0
ldr\\t%x0, %1
str\\t%x1, %0
- mov\\t%x0, %x1"
- [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
- f_loadd,f_stored,load1,store1,mov_reg")
- (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+ mov\\t%x0, %x1
+ mov\\t%x0, %1"
+ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
+ f_loadd,f_stored,load1,store1,mov_reg,\
+ fconstd")
+ (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
+)
+
+(define_split
+ [(set (match_operand:GPF_HF 0 "nonimmediate_operand")
+ (match_operand:GPF_HF 1 "general_operand"))]
+ "can_create_pseudo_p ()
+ && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
+ && !aarch64_float_const_representable_p (operands[1])
+ && aarch64_float_const_rtx_p (operands[1])"
+ [(const_int 0)]
+ {
+ unsigned HOST_WIDE_INT ival;
+ if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
+ FAIL;
+
+ rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode);
+ emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
+ DONE;
+ }
)
(define_insn "*movtf_aarch64"
@@ -1905,6 +1945,17 @@
[(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
)
+(define_insn "aarch64_sub<mode>_compare0"
+ [(set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ
+ (minus:GPI (match_operand:GPI 0 "register_operand" "r")
+ (match_operand:GPI 1 "aarch64_plus_operand" "r"))
+ (const_int 0)))]
+ ""
+ "cmp\\t%<w>0, %<w>1"
+ [(set_attr "type" "alus_sreg")]
+)
+
(define_insn "*compare_neg<mode>"
[(set (reg:CC_Z CC_REGNUM)
(compare:CC_Z
@@ -3824,6 +3875,22 @@
[(set_attr "type" "logics_reg,logics_imm")]
)
+(define_split
+ [(set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ
+ (and:GPI (match_operand:GPI 0 "register_operand")
+ (match_operand:GPI 1 "aarch64_mov_imm_operand"))
+ (const_int 0)))
+ (clobber (match_operand:SI 2 "register_operand"))]
+ ""
+ [(set (match_dup 2) (match_dup 1))
+ (set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ
+ (and:GPI (match_dup 0)
+ (match_dup 2))
+ (const_int 0)))]
+)
+
(define_insn "*and<mode>3nr_compare0_zextract"
[(set (reg:CC_NZ CC_REGNUM)
(compare:CC_NZ
@@ -3859,6 +3926,26 @@
[(set_attr "type" "logics_shift_imm")]
)
+(define_split
+ [(set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ
+ (and:GPI (SHIFT:GPI
+ (match_operand:GPI 0 "register_operand")
+ (match_operand:QI 1 "aarch64_shift_imm_<mode>"))
+ (match_operand:GPI 2 "aarch64_mov_imm_operand"))
+ (const_int 0)))
+ (clobber (match_operand:SI 3 "register_operand"))]
+ ""
+ [(set (match_dup 3) (match_dup 2))
+ (set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ
+ (and:GPI (SHIFT:GPI
+ (match_dup 0)
+ (match_dup 1))
+ (match_dup 3))
+ (const_int 0)))]
+)
+
;; -------------------------------------------------------------------
;; Shifts
;; -------------------------------------------------------------------
@@ -3942,6 +4029,97 @@
}
)
+;; When the LSL, LSR, ASR, ROR instructions operate on all register arguments
+;; they truncate the shift/rotate amount by the size of the registers they
+;; operate on: 32 for W-regs, 64 for X-regs. This allows us to optimise away
+;; such redundant masking instructions. GCC can do that automatically when
+;; SHIFT_COUNT_TRUNCATED is true, but we can't enable it for TARGET_SIMD
+;; because some of the SISD shift alternatives don't perform this truncations.
+;; So this pattern exists to catch such cases.
+
+(define_insn "*aarch64_<optab>_reg_<mode>3_mask1"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (SHIFT:GPI
+ (match_operand:GPI 1 "register_operand" "r")
+ (match_operator 4 "subreg_lowpart_operator"
+ [(and:GPI (match_operand:GPI 2 "register_operand" "r")
+ (match_operand 3 "const_int_operand" "n"))])))]
+ "(~INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0"
+ "<shift>\t%<w>0, %<w>1, %<w>2"
+ [(set_attr "type" "shift_reg")]
+)
+
+(define_insn_and_split "*aarch64_reg_<mode>3_neg_mask2"
+ [(set (match_operand:GPI 0 "register_operand" "=&r")
+ (SHIFT:GPI
+ (match_operand:GPI 1 "register_operand" "r")
+ (match_operator 4 "subreg_lowpart_operator"
+ [(neg:SI (and:SI (match_operand:SI 2 "register_operand" "r")
+ (match_operand 3 "const_int_operand" "n")))])))]
+ "((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0)"
+ "#"
+ "&& true"
+ [(const_int 0)]
+ {
+ rtx tmp = (can_create_pseudo_p () ? gen_reg_rtx (SImode)
+ : operands[0]);
+ emit_insn (gen_negsi2 (tmp, operands[2]));
+
+ rtx and_op = gen_rtx_AND (SImode, tmp, operands[3]);
+ rtx subreg_tmp = gen_rtx_SUBREG (GET_MODE (operands[4]), and_op,
+ SUBREG_BYTE (operands[4]));
+ emit_insn (gen_<optab><mode>3 (operands[0], operands[1], subreg_tmp));
+ DONE;
+ }
+)
+
+(define_insn_and_split "*aarch64_reg_<mode>3_minus_mask"
+ [(set (match_operand:GPI 0 "register_operand" "=&r")
+ (ashift:GPI
+ (match_operand:GPI 1 "register_operand" "r")
+ (minus:QI (match_operand 2 "const_int_operand" "n")
+ (match_operator 5 "subreg_lowpart_operator"
+ [(and:SI (match_operand:SI 3 "register_operand" "r")
+ (match_operand 4 "const_int_operand" "n"))]))))]
+ "((~INTVAL (operands[4]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0)
+ && INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)"
+ "#"
+ "&& true"
+ [(const_int 0)]
+ {
+ rtx tmp = (can_create_pseudo_p () ? gen_reg_rtx (SImode)
+ : operands[0]);
+
+ emit_insn (gen_negsi2 (tmp, operands[3]));
+
+ rtx and_op = gen_rtx_AND (SImode, tmp, operands[4]);
+ rtx subreg_tmp = gen_rtx_SUBREG (GET_MODE (operands[5]), and_op,
+ SUBREG_BYTE (operands[5]));
+
+ emit_insn (gen_ashl<mode>3 (operands[0], operands[1], subreg_tmp));
+ DONE;
+ }
+)
+
+(define_insn "*aarch64_<optab>_reg_di3_mask2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (SHIFT:DI
+ (match_operand:DI 1 "register_operand" "r")
+ (match_operator 4 "subreg_lowpart_operator"
+ [(and:SI (match_operand:SI 2 "register_operand" "r")
+ (match_operand 3 "aarch64_shift_imm_di" "Usd"))])))]
+ "((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (DImode)-1)) == 0)"
+{
+ rtx xop[3];
+ xop[0] = operands[0];
+ xop[1] = operands[1];
+ xop[2] = gen_lowpart (GET_MODE (operands[4]), operands[2]);
+ output_asm_insn ("<shift>\t%x0, %x1, %x2", xop);
+ return "";
+}
+ [(set_attr "type" "shift_reg")]
+)
+
;; Logical left shift using SISD or Integer instruction
(define_insn "*aarch64_ashl_sisd_or_int_<mode>3"
[(set (match_operand:GPI 0 "register_operand" "=r,r,w,w")
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 0753da32f59..d7b30b0e5ee 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -12162,7 +12162,7 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
/* ARMv8.1-A instrinsics. */
#pragma GCC push_options
-#pragma GCC target ("arch=armv8.1-a")
+#pragma GCC target ("+nothing+rdma")
__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 88e840f2898..9ce3d4efaf3 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -176,6 +176,12 @@
(and (match_code "const_double")
(match_test "aarch64_float_const_representable_p (op)")))
+(define_constraint "Uvi"
+ "A floating point constant which can be used with a\
+ MOVI immediate operation."
+ (and (match_code "const_double")
+ (match_test "aarch64_can_const_movi_rtx_p (op, GET_MODE (op))")))
+
(define_constraint "Dn"
"@internal
A constraint that matches vector of immediates."
@@ -220,9 +226,17 @@
(define_constraint "Dd"
"@internal
- A constraint that matches an immediate operand valid for AdvSIMD scalar."
+ A constraint that matches an integer immediate operand valid\
+ for AdvSIMD scalar operations in DImode."
+ (and (match_code "const_int")
+ (match_test "aarch64_can_const_movi_rtx_p (op, DImode)")))
+
+(define_constraint "Ds"
+ "@internal
+ A constraint that matches an integer immediate operand valid\
+ for AdvSIMD scalar operations in SImode."
(and (match_code "const_int")
- (match_test "aarch64_simd_imm_scalar_p (op, GET_MODE (op))")))
+ (match_test "aarch64_can_const_movi_rtx_p (op, SImode)")))
(define_address_constraint "Dp"
"@internal
diff --git a/gcc/config/aarch64/cortex-a57-fma-steering.c b/gcc/config/aarch64/cortex-a57-fma-steering.c
index 94d7f9c5869..fa8c56aab02 100644
--- a/gcc/config/aarch64/cortex-a57-fma-steering.c
+++ b/gcc/config/aarch64/cortex-a57-fma-steering.c
@@ -603,7 +603,7 @@ fma_node::rename (fma_forest *forest)
{
rtx_insn *insn = this->m_insn;
HARD_REG_SET unavailable;
- enum machine_mode mode;
+ machine_mode mode;
int reg;
if (dump_file)
@@ -973,10 +973,17 @@ func_fma_steering::analyze ()
break;
}
- /* We didn't find a chain with a def for this instruction. */
- gcc_assert (i < dest_op_info->n_chains);
-
- this->analyze_fma_fmul_insn (forest, chain, head);
+ /* Due to implementation of regrename, dest register can slip away
+ from regrename's analysis. As a result, there is no chain for
+ the destination register of insn. We simply skip the insn even
+ it is a fmul/fmac instruction. This can happen when the dest
+ register is also a source register of insn and one of the below
+ conditions is satisfied:
+ 1) the source reg is setup in larger mode than this insn;
+ 2) the source reg is uninitialized;
+ 3) the source reg is passed in as parameter. */
+ if (i < dest_op_info->n_chains)
+ this->analyze_fma_fmul_insn (forest, chain, head);
}
}
free (bb_dfs_preorder);
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 43be7fd3611..067cef78533 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -44,6 +44,9 @@
;; Iterator for all scalar floating point modes (HF, SF, DF)
(define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF])
+;; Iterator for all scalar floating point modes (HF, SF, DF)
+(define_mode_iterator GPF_HF [HF SF DF])
+
;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
(define_mode_iterator GPF_TF_F16 [HF SF DF TF])
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index cd7ded98663..95d28cfa33c 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -35,6 +35,10 @@
(and (match_code "const_int")
(match_test "op == CONST0_RTX (mode)")))
+(define_special_predicate "subreg_lowpart_operator"
+ (and (match_code "subreg")
+ (match_test "subreg_lowpart_p (op)")))
+
(define_predicate "aarch64_ccmp_immediate"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), -31, 31)")))
@@ -53,6 +57,11 @@
(ior (match_operand 0 "register_operand")
(match_test "op == const0_rtx"))))
+(define_predicate "aarch64_reg_or_fp_float"
+ (ior (match_operand 0 "register_operand")
+ (and (match_code "const_double")
+ (match_test "aarch64_float_const_rtx_p (op)"))))
+
(define_predicate "aarch64_reg_or_fp_zero"
(ior (match_operand 0 "register_operand")
(and (match_code "const_double")
@@ -110,6 +119,10 @@
(ior (match_operand 0 "register_operand")
(match_operand 0 "aarch64_logical_immediate")))
+(define_predicate "aarch64_mov_imm_operand"
+ (and (match_code "const_int")
+ (match_test "aarch64_move_imm (INTVAL (op), mode)")))
+
(define_predicate "aarch64_logical_and_immediate"
(and (match_code "const_int")
(match_test "aarch64_and_bitmask_imm (INTVAL (op), mode)")))
diff --git a/gcc/config/aarch64/rtems.h b/gcc/config/aarch64/rtems.h
index b48e28afda0..07c5679d5c1 100644
--- a/gcc/config/aarch64/rtems.h
+++ b/gcc/config/aarch64/rtems.h
@@ -1,20 +1,25 @@
/* Definitions for RTEMS based AARCH64 system.
Copyright (C) 2016-2017 Free Software Foundation, Inc.
-
+
This file is part of GCC.
-
+
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published
by the Free Software Foundation; either version 3, or (at your
option) any later version.
-
+
GCC is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
-
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#define HAS_INIT_SECTION