summaryrefslogtreecommitdiff
path: root/gcc/config/aarch64/aarch64.c
diff options
context:
space:
mode:
authorjamborm <jamborm@138bc75d-0d04-0410-961f-82ee72b054a4>2017-07-31 14:52:19 +0000
committerjamborm <jamborm@138bc75d-0d04-0410-961f-82ee72b054a4>2017-07-31 14:52:19 +0000
commitb31856d3ac23cf3dab1e95cb96230dc81564c84a (patch)
tree49524df297e69390449c3ef5037b2360d14c7b1a /gcc/config/aarch64/aarch64.c
parent1ade4d1864f2cf61eb5c045f57c0bcac80943c04 (diff)
parenta168a775e93ec31ae743ad282d8e60fa1c116891 (diff)
downloadgcc-b31856d3ac23cf3dab1e95cb96230dc81564c84a.tar.gz
Merged trunk revision 250739 into the hsa branch
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/hsa@250744 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/aarch64/aarch64.c')
-rw-r--r--gcc/config/aarch64/aarch64.c555
1 files changed, 374 insertions, 181 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 95592f9fa17..055ebafb830 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -147,6 +147,8 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
const_tree type,
int misalignment,
bool is_packed);
+static machine_mode
+aarch64_simd_container_mode (machine_mode mode, unsigned width);
/* Major revision number of the ARM Architecture implemented by the target. */
unsigned aarch64_architecture_version;
@@ -206,22 +208,6 @@ static const struct cpu_addrcost_table generic_addrcost_table =
0 /* imm_offset */
};
-static const struct cpu_addrcost_table cortexa57_addrcost_table =
-{
- {
- 1, /* hi */
- 0, /* si */
- 0, /* di */
- 1, /* ti */
- },
- 0, /* pre_modify */
- 0, /* post_modify */
- 0, /* register_offset */
- 0, /* register_sextend */
- 0, /* register_zextend */
- 0, /* imm_offset */
-};
-
static const struct cpu_addrcost_table exynosm1_addrcost_table =
{
{
@@ -254,22 +240,6 @@ static const struct cpu_addrcost_table xgene1_addrcost_table =
0, /* imm_offset */
};
-static const struct cpu_addrcost_table qdf24xx_addrcost_table =
-{
- {
- 1, /* hi */
- 0, /* si */
- 0, /* di */
- 1, /* ti */
- },
- 0, /* pre_modify */
- 0, /* post_modify */
- 0, /* register_offset */
- 0, /* register_sextend */
- 0, /* register_zextend */
- 0 /* imm_offset */
-};
-
static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
{
{
@@ -390,13 +360,13 @@ static const struct cpu_vector_cost thunderx_vector_cost =
3, /* scalar_load_cost */
1, /* scalar_store_cost */
4, /* vec_int_stmt_cost */
- 4, /* vec_fp_stmt_cost */
+ 1, /* vec_fp_stmt_cost */
4, /* vec_permute_cost */
2, /* vec_to_scalar_cost */
2, /* scalar_to_vec_cost */
3, /* vec_align_load_cost */
- 10, /* vec_unalign_load_cost */
- 10, /* vec_unalign_store_cost */
+ 5, /* vec_unalign_load_cost */
+ 5, /* vec_unalign_store_cost */
1, /* vec_store_cost */
3, /* cond_taken_branch_cost */
3 /* cond_not_taken_branch_cost */
@@ -488,20 +458,6 @@ static const struct cpu_branch_cost generic_branch_cost =
3 /* Unpredictable. */
};
-/* Branch costs for Cortex-A57. */
-static const struct cpu_branch_cost cortexa57_branch_cost =
-{
- 1, /* Predictable. */
- 3 /* Unpredictable. */
-};
-
-/* Branch costs for Vulcan. */
-static const struct cpu_branch_cost thunderx2t99_branch_cost =
-{
- 1, /* Predictable. */
- 3 /* Unpredictable. */
-};
-
/* Generic approximation modes. */
static const cpu_approx_modes generic_approx_modes =
{
@@ -612,7 +568,7 @@ static const struct tune_params cortexa35_tunings =
&generic_addrcost_table,
&cortexa53_regmove_cost,
&generic_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost */
1, /* issue_rate */
@@ -638,7 +594,7 @@ static const struct tune_params cortexa53_tunings =
&generic_addrcost_table,
&cortexa53_regmove_cost,
&generic_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost */
2, /* issue_rate */
@@ -661,10 +617,10 @@ static const struct tune_params cortexa53_tunings =
static const struct tune_params cortexa57_tunings =
{
&cortexa57_extra_costs,
- &cortexa57_addrcost_table,
+ &generic_addrcost_table,
&cortexa57_regmove_cost,
&cortexa57_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost */
3, /* issue_rate */
@@ -687,10 +643,10 @@ static const struct tune_params cortexa57_tunings =
static const struct tune_params cortexa72_tunings =
{
&cortexa57_extra_costs,
- &cortexa57_addrcost_table,
+ &generic_addrcost_table,
&cortexa57_regmove_cost,
&cortexa57_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost */
3, /* issue_rate */
@@ -713,10 +669,10 @@ static const struct tune_params cortexa72_tunings =
static const struct tune_params cortexa73_tunings =
{
&cortexa57_extra_costs,
- &cortexa57_addrcost_table,
+ &generic_addrcost_table,
&cortexa57_regmove_cost,
&cortexa57_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost. */
2, /* issue_rate. */
@@ -842,7 +798,7 @@ static const struct tune_params xgene1_tunings =
static const struct tune_params qdf24xx_tunings =
{
&qdf24xx_extra_costs,
- &qdf24xx_addrcost_table,
+ &generic_addrcost_table,
&qdf24xx_regmove_cost,
&generic_vector_cost,
&generic_branch_cost,
@@ -871,11 +827,12 @@ static const struct tune_params thunderx2t99_tunings =
&thunderx2t99_addrcost_table,
&thunderx2t99_regmove_cost,
&thunderx2t99_vector_cost,
- &thunderx2t99_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost. */
4, /* issue_rate. */
- (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC), /* fusible_ops */
+ (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
+ | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
16, /* function_align. */
8, /* jump_align. */
16, /* loop_align. */
@@ -1031,7 +988,7 @@ static reg_class_t
aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
reg_class_t best_class)
{
- enum machine_mode mode;
+ machine_mode mode;
if (allocno_class != ALL_REGS)
return allocno_class;
@@ -1044,7 +1001,7 @@ aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
}
static unsigned int
-aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
+aarch64_min_divisions_for_recip_mul (machine_mode mode)
{
if (GET_MODE_UNIT_SIZE (mode) == 4)
return aarch64_tune_params.min_div_recip_mul_sf;
@@ -1053,7 +1010,7 @@ aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
static int
aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
- enum machine_mode mode)
+ machine_mode mode)
{
if (VECTOR_MODE_P (mode))
return aarch64_tune_params.vec_reassoc_width;
@@ -1732,41 +1689,41 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
machine_mode dst_mode = GET_MODE (dst);
gcc_assert (VECTOR_MODE_P (dst_mode));
+ gcc_assert (register_operand (dst, dst_mode)
+ && register_operand (src1, src_mode)
+ && register_operand (src2, src_mode));
- if (REG_P (dst) && REG_P (src1) && REG_P (src2))
- {
- rtx (*gen) (rtx, rtx, rtx);
-
- switch (src_mode)
- {
- case V8QImode:
- gen = gen_aarch64_simd_combinev8qi;
- break;
- case V4HImode:
- gen = gen_aarch64_simd_combinev4hi;
- break;
- case V2SImode:
- gen = gen_aarch64_simd_combinev2si;
- break;
- case V4HFmode:
- gen = gen_aarch64_simd_combinev4hf;
- break;
- case V2SFmode:
- gen = gen_aarch64_simd_combinev2sf;
- break;
- case DImode:
- gen = gen_aarch64_simd_combinedi;
- break;
- case DFmode:
- gen = gen_aarch64_simd_combinedf;
- break;
- default:
- gcc_unreachable ();
- }
+ rtx (*gen) (rtx, rtx, rtx);
- emit_insn (gen (dst, src1, src2));
- return;
+ switch (src_mode)
+ {
+ case V8QImode:
+ gen = gen_aarch64_simd_combinev8qi;
+ break;
+ case V4HImode:
+ gen = gen_aarch64_simd_combinev4hi;
+ break;
+ case V2SImode:
+ gen = gen_aarch64_simd_combinev2si;
+ break;
+ case V4HFmode:
+ gen = gen_aarch64_simd_combinev4hf;
+ break;
+ case V2SFmode:
+ gen = gen_aarch64_simd_combinev2sf;
+ break;
+ case DImode:
+ gen = gen_aarch64_simd_combinedi;
+ break;
+ case DFmode:
+ gen = gen_aarch64_simd_combinedf;
+ break;
+ default:
+ gcc_unreachable ();
}
+
+ emit_insn (gen (dst, src1, src2));
+ return;
}
/* Split a complex SIMD move. */
@@ -1875,6 +1832,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
return 1;
}
+ /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
+ (with XXXX non-zero). In that case check to see if the move can be done in
+ a smaller mode. */
+ val2 = val & 0xffffffff;
+ if (mode == DImode
+ && aarch64_move_imm (val2, SImode)
+ && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
+ {
+ if (generate)
+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+
+ /* Check if we have to emit a second instruction by checking to see
+ if any of the upper 32 bits of the original DI mode value is set. */
+ if (val == val2)
+ return 1;
+
+ i = (val >> 48) ? 48 : 32;
+
+ if (generate)
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
+
+ return 2;
+ }
+
if ((val >> 32) == 0 || mode == SImode)
{
if (generate)
@@ -2002,6 +1984,8 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
gcc_assert (can_create_pseudo_p ());
base = gen_reg_rtx (ptr_mode);
aarch64_expand_mov_immediate (base, XEXP (mem, 0));
+ if (ptr_mode != Pmode)
+ base = convert_memory_address (Pmode, base);
mem = gen_rtx_MEM (ptr_mode, base);
}
@@ -4720,6 +4704,69 @@ aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
return true;
}
+/* Return the binary representation of floating point constant VALUE in INTVAL.
+ If the value cannot be converted, return false without setting INTVAL.
+ The conversion is done in the given MODE. */
+bool
+aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
+{
+
+ /* We make a general exception for 0. */
+ if (aarch64_float_const_zero_rtx_p (value))
+ {
+ *intval = 0;
+ return true;
+ }
+
+ machine_mode mode = GET_MODE (value);
+ if (GET_CODE (value) != CONST_DOUBLE
+ || !SCALAR_FLOAT_MODE_P (mode)
+ || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
+ /* Only support up to DF mode. */
+ || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
+ return false;
+
+ unsigned HOST_WIDE_INT ival = 0;
+
+ long res[2];
+ real_to_target (res,
+ CONST_DOUBLE_REAL_VALUE (value),
+ REAL_MODE_FORMAT (mode));
+
+ ival = zext_hwi (res[0], 32);
+ if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (DFmode))
+ ival |= (zext_hwi (res[1], 32) << 32);
+
+ *intval = ival;
+ return true;
+}
+
+/* Return TRUE if rtx X is an immediate constant that can be moved using a
+ single MOV(+MOVK) followed by an FMOV. */
+bool
+aarch64_float_const_rtx_p (rtx x)
+{
+ machine_mode mode = GET_MODE (x);
+ if (mode == VOIDmode)
+ return false;
+
+ /* Determine whether it's cheaper to write float constants as
+ mov/movk pairs over ldr/adrp pairs. */
+ unsigned HOST_WIDE_INT ival;
+
+ if (GET_CODE (x) == CONST_DOUBLE
+ && SCALAR_FLOAT_MODE_P (mode)
+ && aarch64_reinterpret_float_as_int (x, &ival))
+ {
+ machine_mode imode = mode == HFmode ? SImode : int_mode_for_mode (mode);
+ int num_instr = aarch64_internal_mov_immediate
+ (NULL_RTX, gen_int_mode (ival, imode), false, imode);
+ return num_instr < 3;
+ }
+
+ return false;
+}
+
/* Return TRUE if rtx X is immediate constant 0.0 */
bool
aarch64_float_const_zero_rtx_p (rtx x)
@@ -4732,6 +4779,49 @@ aarch64_float_const_zero_rtx_p (rtx x)
return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
}
+/* Return TRUE if rtx X is immediate constant that fits in a single
+ MOVI immediate operation. */
+bool
+aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
+{
+ if (!TARGET_SIMD)
+ return false;
+
+ /* We make a general exception for 0. */
+ if (aarch64_float_const_zero_rtx_p (x))
+ return true;
+
+ machine_mode vmode, imode;
+ unsigned HOST_WIDE_INT ival;
+
+ if (GET_CODE (x) == CONST_DOUBLE
+ && SCALAR_FLOAT_MODE_P (mode))
+ {
+ if (!aarch64_reinterpret_float_as_int (x, &ival))
+ return false;
+
+ imode = int_mode_for_mode (mode);
+ }
+ else if (GET_CODE (x) == CONST_INT
+ && SCALAR_INT_MODE_P (mode))
+ {
+ imode = mode;
+ ival = INTVAL (x);
+ }
+ else
+ return false;
+
+ /* use a 64 bit mode for everything except for DI/DF mode, where we use
+ a 128 bit vector mode. */
+ int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
+
+ vmode = aarch64_simd_container_mode (imode, width);
+ rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
+
+ return aarch64_simd_valid_immediate (v_op, vmode, false, NULL);
+}
+
+
/* Return the fixed registers used for condition codes. */
static bool
@@ -4884,7 +4974,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
}
static int
-aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
+aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
int
aarch64_get_condition_code (rtx x)
@@ -4898,7 +4988,7 @@ aarch64_get_condition_code (rtx x)
}
static int
-aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
+aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
{
switch (mode)
{
@@ -5050,12 +5140,43 @@ static const int aarch64_nzcv_codes[] =
0 /* NV, Any. */
};
+/* Print operand X to file F in a target specific manner according to CODE.
+ The acceptable formatting commands given by CODE are:
+ 'c': An integer or symbol address without a preceding #
+ sign.
+ 'e': Print the sign/zero-extend size as a character 8->b,
+ 16->h, 32->w.
+ 'p': Prints N such that 2^N == X (X must be power of 2 and
+ const int).
+ 'P': Print the number of non-zero bits in X (a const_int).
+ 'H': Print the higher numbered register of a pair (TImode)
+ of regs.
+ 'm': Print a condition (eq, ne, etc).
+ 'M': Same as 'm', but invert condition.
+ 'b/h/s/d/q': Print a scalar FP/SIMD register name.
+ 'S/T/U/V': Print a FP/SIMD register name for a register list.
+ The register printed is the FP/SIMD register name
+ of X + 0/1/2/3 for S/T/U/V.
+ 'R': Print a scalar FP/SIMD register name + 1.
+ 'X': Print bottom 16 bits of integer constant in hex.
+ 'w/x': Print a general register name or the zero register
+ (32-bit or 64-bit).
+ '0': Print a normal operand, if it's a general register,
+ then we assume DImode.
+ 'k': Print NZCV for conditional compare instructions.
+ 'A': Output address constant representing the first
+ argument of X, specifying a relocation offset
+ if appropriate.
+ 'L': Output constant address specified by X
+ with a relocation offset if appropriate.
+ 'G': Prints address of X, specifying a PC relative
+ relocation mode if appropriate. */
+
static void
aarch64_print_operand (FILE *f, rtx x, int code)
{
switch (code)
{
- /* An integer or symbol address without a preceding # sign. */
case 'c':
switch (GET_CODE (x))
{
@@ -5082,7 +5203,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'e':
- /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
{
int n;
@@ -5115,7 +5235,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
{
int n;
- /* Print N such that 2^N == X. */
if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
{
output_operand_lossage ("invalid operand for '%%%c'", code);
@@ -5127,7 +5246,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'P':
- /* Print the number of non-zero bits in X (a const_int). */
if (!CONST_INT_P (x))
{
output_operand_lossage ("invalid operand for '%%%c'", code);
@@ -5138,7 +5256,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'H':
- /* Print the higher numbered register of a pair (TImode) of regs. */
if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
{
output_operand_lossage ("invalid operand for '%%%c'", code);
@@ -5152,8 +5269,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
case 'm':
{
int cond_code;
- /* Print a condition (eq, ne, etc) or its inverse. */
-
/* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
if (x == const_true_rtx)
{
@@ -5181,7 +5296,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
case 's':
case 'd':
case 'q':
- /* Print a scalar FP/SIMD register name. */
if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
{
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
@@ -5194,7 +5308,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
case 'T':
case 'U':
case 'V':
- /* Print the first FP/SIMD register name in a list. */
if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
{
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
@@ -5204,7 +5317,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'R':
- /* Print a scalar FP/SIMD register name + 1. */
if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
{
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
@@ -5214,7 +5326,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'X':
- /* Print bottom 16 bits of integer constant in hex. */
if (!CONST_INT_P (x))
{
output_operand_lossage ("invalid operand for '%%%c'", code);
@@ -5225,8 +5336,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
case 'w':
case 'x':
- /* Print a general register name or the zero register (32-bit or
- 64-bit). */
if (x == const0_rtx
|| (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
{
@@ -5249,8 +5358,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
/* Fall through */
case 0:
- /* Print a normal operand, if it's a general register, then we
- assume DImode. */
if (x == NULL)
{
output_operand_lossage ("missing operand");
@@ -5265,6 +5372,8 @@ aarch64_print_operand (FILE *f, rtx x, int code)
case MEM:
output_address (GET_MODE (x), XEXP (x, 0));
+ /* Check all memory references are Pmode - even with ILP32. */
+ gcc_assert (GET_MODE (XEXP (x, 0)) == Pmode);
break;
case CONST:
@@ -5401,7 +5510,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'G':
-
switch (aarch64_classify_symbolic_expression (x))
{
case SYMBOL_TLSLE24:
@@ -5416,7 +5524,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
case 'k':
{
HOST_WIDE_INT cond_code;
- /* Print nzcv. */
if (!CONST_INT_P (x))
{
@@ -5909,12 +6016,6 @@ aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
return NO_REGS;
}
- /* If it's an integer immediate that MOVI can't handle, then
- FP_REGS is not an option, so we return NO_REGS instead. */
- if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
- && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
- return NO_REGS;
-
/* Register eliminiation can result in a request for
SP+constant->FP_REGS. We cannot support such operations which
use SP as source and an FP_REG as destination, so reject out
@@ -6864,6 +6965,25 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
return true;
case CONST_DOUBLE:
+
+ /* First determine number of instructions to do the move
+ as an integer constant. */
+ if (!aarch64_float_const_representable_p (x)
+ && !aarch64_can_const_movi_rtx_p (x, mode)
+ && aarch64_float_const_rtx_p (x))
+ {
+ unsigned HOST_WIDE_INT ival;
+ bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
+ gcc_assert (succeed);
+
+ machine_mode imode = mode == HFmode ? SImode
+ : int_mode_for_mode (mode);
+ int ncost = aarch64_internal_mov_immediate
+ (NULL_RTX, gen_int_mode (ival, imode), false, imode);
+ *cost += COSTS_N_INSNS (ncost);
+ return true;
+ }
+
if (speed)
{
/* mov[df,sf]_aarch64. */
@@ -7537,17 +7657,26 @@ cost_plus:
}
else
{
- if (speed)
+ if (VECTOR_MODE_P (mode))
{
- if (VECTOR_MODE_P (mode))
- {
- /* Vector shift (register). */
- *cost += extra_cost->vect.alu;
- }
- else
+ if (speed)
+ /* Vector shift (register). */
+ *cost += extra_cost->vect.alu;
+ }
+ else
+ {
+ if (speed)
+ /* LSLV. */
+ *cost += extra_cost->alu.shift_reg;
+
+ if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
+ && CONST_INT_P (XEXP (op1, 1))
+ && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
{
- /* LSLV. */
- *cost += extra_cost->alu.shift_reg;
+ *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
+ /* We already demanded XEXP (op1, 0) to be REG_P, so
+ don't recurse into it. */
+ return true;
}
}
return false; /* All arguments need to be in registers. */
@@ -7576,14 +7705,27 @@ cost_plus:
}
else
{
-
- /* ASR (register) and friends. */
- if (speed)
+ if (VECTOR_MODE_P (mode))
{
- if (VECTOR_MODE_P (mode))
+ if (speed)
+ /* Vector shift (register). */
*cost += extra_cost->vect.alu;
- else
+ }
+ else
+ {
+ if (speed)
+ /* ASR (register) and friends. */
*cost += extra_cost->alu.shift_reg;
+
+ if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
+ && CONST_INT_P (XEXP (op1, 1))
+ && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
+ {
+ *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
+ /* We already demanded XEXP (op1, 0) to be REG_P, so
+ don't recurse into it. */
+ return true;
+ }
}
return false; /* All arguments need to be in registers. */
}
@@ -10151,7 +10293,7 @@ aarch64_classify_symbol (rtx x, rtx offset)
/* This is alright even in PIC code as the constant
pool reference is always PC relative and within
the same translation unit. */
- if (CONSTANT_POOL_ADDRESS_P (x))
+ if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
return SYMBOL_SMALL_ABSOLUTE;
else
return SYMBOL_FORCE_TO_MEM;
@@ -10186,18 +10328,16 @@ aarch64_legitimate_pic_operand_p (rtx x)
/* Return true if X holds either a quarter-precision or
floating-point +0.0 constant. */
static bool
-aarch64_valid_floating_const (machine_mode mode, rtx x)
+aarch64_valid_floating_const (rtx x)
{
if (!CONST_DOUBLE_P (x))
return false;
- if (aarch64_float_const_zero_rtx_p (x))
+ /* This call determines which constants can be used in mov<mode>
+ as integer moves instead of constant loads. */
+ if (aarch64_float_const_rtx_p (x))
return true;
- /* We only handle moving 0.0 to a TFmode register. */
- if (!(mode == SFmode || mode == DFmode))
- return false;
-
return aarch64_float_const_representable_p (x);
}
@@ -10209,11 +10349,15 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
return false;
- /* This could probably go away because
- we now decompose CONST_INTs according to expand_mov_immediate. */
+ /* For these cases we never want to use a literal load.
+ As such we have to prevent the compiler from forcing these
+ to memory. */
if ((GET_CODE (x) == CONST_VECTOR
&& aarch64_simd_valid_immediate (x, mode, false, NULL))
- || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
+ || CONST_INT_P (x)
+ || aarch64_valid_floating_const (x)
+ || aarch64_can_const_movi_rtx_p (x, mode)
+ || aarch64_float_const_rtx_p (x))
return !targetm.cannot_force_const_mem (mode, x);
if (GET_CODE (x) == HIGH
@@ -11496,23 +11640,6 @@ aarch64_mask_from_zextract_ops (rtx width, rtx pos)
}
bool
-aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
-{
- HOST_WIDE_INT imm = INTVAL (x);
- int i;
-
- for (i = 0; i < 8; i++)
- {
- unsigned int byte = imm & 0xff;
- if (byte != 0xff && byte != 0)
- return false;
- imm >>= 8;
- }
-
- return true;
-}
-
-bool
aarch64_mov_operand_p (rtx x, machine_mode mode)
{
if (GET_CODE (x) == HIGH
@@ -11666,7 +11793,7 @@ aarch64_simd_mem_operand_p (rtx op)
COUNT is the number of components into which the copy needs to be
decomposed. */
void
-aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
+aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
unsigned int count)
{
unsigned int i;
@@ -11687,7 +11814,7 @@ aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
one of VSTRUCT modes: OI, CI, or XI. */
int
-aarch64_simd_attr_length_rglist (enum machine_mode mode)
+aarch64_simd_attr_length_rglist (machine_mode mode)
{
return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
}
@@ -12119,10 +12246,8 @@ aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
static void
aarch64_emit_unlikely_jump (rtx insn)
{
- int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
-
rtx_insn *jump = emit_jump_insn (insn);
- add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
+ add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
}
/* Expand a compare and swap pattern. */
@@ -12905,15 +13030,28 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
}
char*
-aarch64_output_scalar_simd_mov_immediate (rtx immediate,
- machine_mode mode)
+aarch64_output_scalar_simd_mov_immediate (rtx immediate, machine_mode mode)
{
+
+ /* If a floating point number was passed and we desire to use it in an
+ integer mode do the conversion to integer. */
+ if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
+ {
+ unsigned HOST_WIDE_INT ival;
+ if (!aarch64_reinterpret_float_as_int (immediate, &ival))
+ gcc_unreachable ();
+ immediate = gen_int_mode (ival, mode);
+ }
+
machine_mode vmode;
+ /* use a 64 bit mode for everything except for DI/DF mode, where we use
+ a 128 bit vector mode. */
+ int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
gcc_assert (!VECTOR_MODE_P (mode));
- vmode = aarch64_simd_container_mode (mode, 64);
+ vmode = aarch64_simd_container_mode (mode, width);
rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
- return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
+ return aarch64_output_simd_mov_immediate (v_op, vmode, width);
}
/* Split operands into moves from op[1] + op[2] into op[0]. */
@@ -13678,7 +13816,7 @@ aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
}
rtx
-aarch64_reverse_mask (enum machine_mode mode)
+aarch64_reverse_mask (machine_mode mode)
{
/* We have to reverse each vector because we dont have
a permuted load that can reverse-load according to ABI rules. */
@@ -14278,13 +14416,68 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
{
enum attr_type prev_type = get_attr_type (prev);
- /* FIXME: this misses some which is considered simple arthematic
- instructions for ThunderX. Simple shifts are missed here. */
- if (prev_type == TYPE_ALUS_SREG
- || prev_type == TYPE_ALUS_IMM
- || prev_type == TYPE_LOGICS_REG
- || prev_type == TYPE_LOGICS_IMM)
- return true;
+ unsigned int condreg1, condreg2;
+ rtx cc_reg_1;
+ aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
+ cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
+
+ if (reg_referenced_p (cc_reg_1, PATTERN (curr))
+ && prev
+ && modified_in_p (cc_reg_1, prev))
+ {
+ /* FIXME: this misses some which is considered simple arthematic
+ instructions for ThunderX. Simple shifts are missed here. */
+ if (prev_type == TYPE_ALUS_SREG
+ || prev_type == TYPE_ALUS_IMM
+ || prev_type == TYPE_LOGICS_REG
+ || prev_type == TYPE_LOGICS_IMM)
+ return true;
+ }
+ }
+
+ if (prev_set
+ && curr_set
+ && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
+ && any_condjump_p (curr))
+ {
+ /* We're trying to match:
+ prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
+ curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
+ (const_int 0))
+ (label_ref ("SYM"))
+ (pc)) */
+ if (SET_DEST (curr_set) == (pc_rtx)
+ && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
+ && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
+ && REG_P (SET_DEST (prev_set))
+ && REGNO (SET_DEST (prev_set))
+ == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
+ {
+ /* Fuse ALU operations followed by conditional branch instruction. */
+ switch (get_attr_type (prev))
+ {
+ case TYPE_ALU_IMM:
+ case TYPE_ALU_SREG:
+ case TYPE_ADC_REG:
+ case TYPE_ADC_IMM:
+ case TYPE_ADCS_REG:
+ case TYPE_ADCS_IMM:
+ case TYPE_LOGIC_REG:
+ case TYPE_LOGIC_IMM:
+ case TYPE_CSEL:
+ case TYPE_ADR:
+ case TYPE_MOV_IMM:
+ case TYPE_SHIFT_REG:
+ case TYPE_SHIFT_IMM:
+ case TYPE_BFM:
+ case TYPE_RBIT:
+ case TYPE_REV:
+ case TYPE_EXTEND:
+ return true;
+
+ default:;
+ }
+ }
}
return false;
@@ -14468,7 +14661,7 @@ aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
bool
aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
- enum machine_mode mode)
+ machine_mode mode)
{
HOST_WIDE_INT offval_1, offval_2, msize;
enum reg_class rclass_1, rclass_2;
@@ -14575,7 +14768,7 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
bool
aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
- enum machine_mode mode)
+ machine_mode mode)
{
enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
@@ -14709,7 +14902,7 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
bool
aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
- enum machine_mode mode, RTX_CODE code)
+ machine_mode mode, RTX_CODE code)
{
rtx base, offset, t1, t2;
rtx mem_1, mem_2, mem_3, mem_4;