diff options
author | jamborm <jamborm@138bc75d-0d04-0410-961f-82ee72b054a4> | 2017-07-31 14:52:19 +0000 |
---|---|---|
committer | jamborm <jamborm@138bc75d-0d04-0410-961f-82ee72b054a4> | 2017-07-31 14:52:19 +0000 |
commit | b31856d3ac23cf3dab1e95cb96230dc81564c84a (patch) | |
tree | 49524df297e69390449c3ef5037b2360d14c7b1a /gcc/config/aarch64/aarch64.c | |
parent | 1ade4d1864f2cf61eb5c045f57c0bcac80943c04 (diff) | |
parent | a168a775e93ec31ae743ad282d8e60fa1c116891 (diff) | |
download | gcc-b31856d3ac23cf3dab1e95cb96230dc81564c84a.tar.gz |
Merged trunk revision 250739 into the hsa branch
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/hsa@250744 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/aarch64/aarch64.c')
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 555 |
1 files changed, 374 insertions, 181 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 95592f9fa17..055ebafb830 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -147,6 +147,8 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, bool is_packed); +static machine_mode +aarch64_simd_container_mode (machine_mode mode, unsigned width); /* Major revision number of the ARM Architecture implemented by the target. */ unsigned aarch64_architecture_version; @@ -206,22 +208,6 @@ static const struct cpu_addrcost_table generic_addrcost_table = 0 /* imm_offset */ }; -static const struct cpu_addrcost_table cortexa57_addrcost_table = -{ - { - 1, /* hi */ - 0, /* si */ - 0, /* di */ - 1, /* ti */ - }, - 0, /* pre_modify */ - 0, /* post_modify */ - 0, /* register_offset */ - 0, /* register_sextend */ - 0, /* register_zextend */ - 0, /* imm_offset */ -}; - static const struct cpu_addrcost_table exynosm1_addrcost_table = { { @@ -254,22 +240,6 @@ static const struct cpu_addrcost_table xgene1_addrcost_table = 0, /* imm_offset */ }; -static const struct cpu_addrcost_table qdf24xx_addrcost_table = -{ - { - 1, /* hi */ - 0, /* si */ - 0, /* di */ - 1, /* ti */ - }, - 0, /* pre_modify */ - 0, /* post_modify */ - 0, /* register_offset */ - 0, /* register_sextend */ - 0, /* register_zextend */ - 0 /* imm_offset */ -}; - static const struct cpu_addrcost_table thunderx2t99_addrcost_table = { { @@ -390,13 +360,13 @@ static const struct cpu_vector_cost thunderx_vector_cost = 3, /* scalar_load_cost */ 1, /* scalar_store_cost */ 4, /* vec_int_stmt_cost */ - 4, /* vec_fp_stmt_cost */ + 1, /* vec_fp_stmt_cost */ 4, /* vec_permute_cost */ 2, /* vec_to_scalar_cost */ 2, /* scalar_to_vec_cost */ 3, /* vec_align_load_cost */ - 10, /* vec_unalign_load_cost */ - 10, /* vec_unalign_store_cost */ + 5, /* vec_unalign_load_cost */ + 5, /* vec_unalign_store_cost */ 1, /* vec_store_cost */ 3, /* cond_taken_branch_cost */ 3 /* cond_not_taken_branch_cost */ @@ -488,20 +458,6 @@ static const struct cpu_branch_cost generic_branch_cost = 3 /* Unpredictable. */ }; -/* Branch costs for Cortex-A57. */ -static const struct cpu_branch_cost cortexa57_branch_cost = -{ - 1, /* Predictable. */ - 3 /* Unpredictable. */ -}; - -/* Branch costs for Vulcan. */ -static const struct cpu_branch_cost thunderx2t99_branch_cost = -{ - 1, /* Predictable. */ - 3 /* Unpredictable. */ -}; - /* Generic approximation modes. */ static const cpu_approx_modes generic_approx_modes = { @@ -612,7 +568,7 @@ static const struct tune_params cortexa35_tunings = &generic_addrcost_table, &cortexa53_regmove_cost, &generic_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost */ 1, /* issue_rate */ @@ -638,7 +594,7 @@ static const struct tune_params cortexa53_tunings = &generic_addrcost_table, &cortexa53_regmove_cost, &generic_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost */ 2, /* issue_rate */ @@ -661,10 +617,10 @@ static const struct tune_params cortexa53_tunings = static const struct tune_params cortexa57_tunings = { &cortexa57_extra_costs, - &cortexa57_addrcost_table, + &generic_addrcost_table, &cortexa57_regmove_cost, &cortexa57_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost */ 3, /* issue_rate */ @@ -687,10 +643,10 @@ static const struct tune_params cortexa57_tunings = static const struct tune_params cortexa72_tunings = { &cortexa57_extra_costs, - &cortexa57_addrcost_table, + &generic_addrcost_table, &cortexa57_regmove_cost, &cortexa57_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost */ 3, /* issue_rate */ @@ -713,10 +669,10 @@ static const struct tune_params cortexa72_tunings = static const struct tune_params cortexa73_tunings = { &cortexa57_extra_costs, - &cortexa57_addrcost_table, + &generic_addrcost_table, &cortexa57_regmove_cost, &cortexa57_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost. */ 2, /* issue_rate. */ @@ -842,7 +798,7 @@ static const struct tune_params xgene1_tunings = static const struct tune_params qdf24xx_tunings = { &qdf24xx_extra_costs, - &qdf24xx_addrcost_table, + &generic_addrcost_table, &qdf24xx_regmove_cost, &generic_vector_cost, &generic_branch_cost, @@ -871,11 +827,12 @@ static const struct tune_params thunderx2t99_tunings = &thunderx2t99_addrcost_table, &thunderx2t99_regmove_cost, &thunderx2t99_vector_cost, - &thunderx2t99_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost. */ 4, /* issue_rate. */ - (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC), /* fusible_ops */ + (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC + | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */ 16, /* function_align. */ 8, /* jump_align. */ 16, /* loop_align. */ @@ -1031,7 +988,7 @@ static reg_class_t aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class, reg_class_t best_class) { - enum machine_mode mode; + machine_mode mode; if (allocno_class != ALL_REGS) return allocno_class; @@ -1044,7 +1001,7 @@ aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class, } static unsigned int -aarch64_min_divisions_for_recip_mul (enum machine_mode mode) +aarch64_min_divisions_for_recip_mul (machine_mode mode) { if (GET_MODE_UNIT_SIZE (mode) == 4) return aarch64_tune_params.min_div_recip_mul_sf; @@ -1053,7 +1010,7 @@ aarch64_min_divisions_for_recip_mul (enum machine_mode mode) static int aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED, - enum machine_mode mode) + machine_mode mode) { if (VECTOR_MODE_P (mode)) return aarch64_tune_params.vec_reassoc_width; @@ -1732,41 +1689,41 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2) machine_mode dst_mode = GET_MODE (dst); gcc_assert (VECTOR_MODE_P (dst_mode)); + gcc_assert (register_operand (dst, dst_mode) + && register_operand (src1, src_mode) + && register_operand (src2, src_mode)); - if (REG_P (dst) && REG_P (src1) && REG_P (src2)) - { - rtx (*gen) (rtx, rtx, rtx); - - switch (src_mode) - { - case V8QImode: - gen = gen_aarch64_simd_combinev8qi; - break; - case V4HImode: - gen = gen_aarch64_simd_combinev4hi; - break; - case V2SImode: - gen = gen_aarch64_simd_combinev2si; - break; - case V4HFmode: - gen = gen_aarch64_simd_combinev4hf; - break; - case V2SFmode: - gen = gen_aarch64_simd_combinev2sf; - break; - case DImode: - gen = gen_aarch64_simd_combinedi; - break; - case DFmode: - gen = gen_aarch64_simd_combinedf; - break; - default: - gcc_unreachable (); - } + rtx (*gen) (rtx, rtx, rtx); - emit_insn (gen (dst, src1, src2)); - return; + switch (src_mode) + { + case V8QImode: + gen = gen_aarch64_simd_combinev8qi; + break; + case V4HImode: + gen = gen_aarch64_simd_combinev4hi; + break; + case V2SImode: + gen = gen_aarch64_simd_combinev2si; + break; + case V4HFmode: + gen = gen_aarch64_simd_combinev4hf; + break; + case V2SFmode: + gen = gen_aarch64_simd_combinev2sf; + break; + case DImode: + gen = gen_aarch64_simd_combinedi; + break; + case DFmode: + gen = gen_aarch64_simd_combinedf; + break; + default: + gcc_unreachable (); } + + emit_insn (gen (dst, src1, src2)); + return; } /* Split a complex SIMD move. */ @@ -1875,6 +1832,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, return 1; } + /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff + (with XXXX non-zero). In that case check to see if the move can be done in + a smaller mode. */ + val2 = val & 0xffffffff; + if (mode == DImode + && aarch64_move_imm (val2, SImode) + && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0)) + { + if (generate) + emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); + + /* Check if we have to emit a second instruction by checking to see + if any of the upper 32 bits of the original DI mode value is set. */ + if (val == val2) + return 1; + + i = (val >> 48) ? 48 : 32; + + if (generate) + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); + + return 2; + } + if ((val >> 32) == 0 || mode == SImode) { if (generate) @@ -2002,6 +1984,8 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) gcc_assert (can_create_pseudo_p ()); base = gen_reg_rtx (ptr_mode); aarch64_expand_mov_immediate (base, XEXP (mem, 0)); + if (ptr_mode != Pmode) + base = convert_memory_address (Pmode, base); mem = gen_rtx_MEM (ptr_mode, base); } @@ -4720,6 +4704,69 @@ aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode) return true; } +/* Return the binary representation of floating point constant VALUE in INTVAL. + If the value cannot be converted, return false without setting INTVAL. + The conversion is done in the given MODE. */ +bool +aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval) +{ + + /* We make a general exception for 0. */ + if (aarch64_float_const_zero_rtx_p (value)) + { + *intval = 0; + return true; + } + + machine_mode mode = GET_MODE (value); + if (GET_CODE (value) != CONST_DOUBLE + || !SCALAR_FLOAT_MODE_P (mode) + || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT + /* Only support up to DF mode. */ + || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode)) + return false; + + unsigned HOST_WIDE_INT ival = 0; + + long res[2]; + real_to_target (res, + CONST_DOUBLE_REAL_VALUE (value), + REAL_MODE_FORMAT (mode)); + + ival = zext_hwi (res[0], 32); + if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (DFmode)) + ival |= (zext_hwi (res[1], 32) << 32); + + *intval = ival; + return true; +} + +/* Return TRUE if rtx X is an immediate constant that can be moved using a + single MOV(+MOVK) followed by an FMOV. */ +bool +aarch64_float_const_rtx_p (rtx x) +{ + machine_mode mode = GET_MODE (x); + if (mode == VOIDmode) + return false; + + /* Determine whether it's cheaper to write float constants as + mov/movk pairs over ldr/adrp pairs. */ + unsigned HOST_WIDE_INT ival; + + if (GET_CODE (x) == CONST_DOUBLE + && SCALAR_FLOAT_MODE_P (mode) + && aarch64_reinterpret_float_as_int (x, &ival)) + { + machine_mode imode = mode == HFmode ? SImode : int_mode_for_mode (mode); + int num_instr = aarch64_internal_mov_immediate + (NULL_RTX, gen_int_mode (ival, imode), false, imode); + return num_instr < 3; + } + + return false; +} + /* Return TRUE if rtx X is immediate constant 0.0 */ bool aarch64_float_const_zero_rtx_p (rtx x) @@ -4732,6 +4779,49 @@ aarch64_float_const_zero_rtx_p (rtx x) return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0); } +/* Return TRUE if rtx X is immediate constant that fits in a single + MOVI immediate operation. */ +bool +aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode) +{ + if (!TARGET_SIMD) + return false; + + /* We make a general exception for 0. */ + if (aarch64_float_const_zero_rtx_p (x)) + return true; + + machine_mode vmode, imode; + unsigned HOST_WIDE_INT ival; + + if (GET_CODE (x) == CONST_DOUBLE + && SCALAR_FLOAT_MODE_P (mode)) + { + if (!aarch64_reinterpret_float_as_int (x, &ival)) + return false; + + imode = int_mode_for_mode (mode); + } + else if (GET_CODE (x) == CONST_INT + && SCALAR_INT_MODE_P (mode)) + { + imode = mode; + ival = INTVAL (x); + } + else + return false; + + /* use a 64 bit mode for everything except for DI/DF mode, where we use + a 128 bit vector mode. */ + int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64; + + vmode = aarch64_simd_container_mode (imode, width); + rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival); + + return aarch64_simd_valid_immediate (v_op, vmode, false, NULL); +} + + /* Return the fixed registers used for condition codes. */ static bool @@ -4884,7 +4974,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) } static int -aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code); +aarch64_get_condition_code_1 (machine_mode, enum rtx_code); int aarch64_get_condition_code (rtx x) @@ -4898,7 +4988,7 @@ aarch64_get_condition_code (rtx x) } static int -aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code) +aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code) { switch (mode) { @@ -5050,12 +5140,43 @@ static const int aarch64_nzcv_codes[] = 0 /* NV, Any. */ }; +/* Print operand X to file F in a target specific manner according to CODE. + The acceptable formatting commands given by CODE are: + 'c': An integer or symbol address without a preceding # + sign. + 'e': Print the sign/zero-extend size as a character 8->b, + 16->h, 32->w. + 'p': Prints N such that 2^N == X (X must be power of 2 and + const int). + 'P': Print the number of non-zero bits in X (a const_int). + 'H': Print the higher numbered register of a pair (TImode) + of regs. + 'm': Print a condition (eq, ne, etc). + 'M': Same as 'm', but invert condition. + 'b/h/s/d/q': Print a scalar FP/SIMD register name. + 'S/T/U/V': Print a FP/SIMD register name for a register list. + The register printed is the FP/SIMD register name + of X + 0/1/2/3 for S/T/U/V. + 'R': Print a scalar FP/SIMD register name + 1. + 'X': Print bottom 16 bits of integer constant in hex. + 'w/x': Print a general register name or the zero register + (32-bit or 64-bit). + '0': Print a normal operand, if it's a general register, + then we assume DImode. + 'k': Print NZCV for conditional compare instructions. + 'A': Output address constant representing the first + argument of X, specifying a relocation offset + if appropriate. + 'L': Output constant address specified by X + with a relocation offset if appropriate. + 'G': Prints address of X, specifying a PC relative + relocation mode if appropriate. */ + static void aarch64_print_operand (FILE *f, rtx x, int code) { switch (code) { - /* An integer or symbol address without a preceding # sign. */ case 'c': switch (GET_CODE (x)) { @@ -5082,7 +5203,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) break; case 'e': - /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */ { int n; @@ -5115,7 +5235,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) { int n; - /* Print N such that 2^N == X. */ if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0) { output_operand_lossage ("invalid operand for '%%%c'", code); @@ -5127,7 +5246,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) break; case 'P': - /* Print the number of non-zero bits in X (a const_int). */ if (!CONST_INT_P (x)) { output_operand_lossage ("invalid operand for '%%%c'", code); @@ -5138,7 +5256,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) break; case 'H': - /* Print the higher numbered register of a pair (TImode) of regs. */ if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1)) { output_operand_lossage ("invalid operand for '%%%c'", code); @@ -5152,8 +5269,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) case 'm': { int cond_code; - /* Print a condition (eq, ne, etc) or its inverse. */ - /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */ if (x == const_true_rtx) { @@ -5181,7 +5296,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) case 's': case 'd': case 'q': - /* Print a scalar FP/SIMD register name. */ if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) { output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); @@ -5194,7 +5308,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) case 'T': case 'U': case 'V': - /* Print the first FP/SIMD register name in a list. */ if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) { output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); @@ -5204,7 +5317,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) break; case 'R': - /* Print a scalar FP/SIMD register name + 1. */ if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) { output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); @@ -5214,7 +5326,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) break; case 'X': - /* Print bottom 16 bits of integer constant in hex. */ if (!CONST_INT_P (x)) { output_operand_lossage ("invalid operand for '%%%c'", code); @@ -5225,8 +5336,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) case 'w': case 'x': - /* Print a general register name or the zero register (32-bit or - 64-bit). */ if (x == const0_rtx || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x))) { @@ -5249,8 +5358,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) /* Fall through */ case 0: - /* Print a normal operand, if it's a general register, then we - assume DImode. */ if (x == NULL) { output_operand_lossage ("missing operand"); @@ -5265,6 +5372,8 @@ aarch64_print_operand (FILE *f, rtx x, int code) case MEM: output_address (GET_MODE (x), XEXP (x, 0)); + /* Check all memory references are Pmode - even with ILP32. */ + gcc_assert (GET_MODE (XEXP (x, 0)) == Pmode); break; case CONST: @@ -5401,7 +5510,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) break; case 'G': - switch (aarch64_classify_symbolic_expression (x)) { case SYMBOL_TLSLE24: @@ -5416,7 +5524,6 @@ aarch64_print_operand (FILE *f, rtx x, int code) case 'k': { HOST_WIDE_INT cond_code; - /* Print nzcv. */ if (!CONST_INT_P (x)) { @@ -5909,12 +6016,6 @@ aarch64_preferred_reload_class (rtx x, reg_class_t regclass) return NO_REGS; } - /* If it's an integer immediate that MOVI can't handle, then - FP_REGS is not an option, so we return NO_REGS instead. */ - if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS) - && !aarch64_simd_imm_scalar_p (x, GET_MODE (x))) - return NO_REGS; - /* Register eliminiation can result in a request for SP+constant->FP_REGS. We cannot support such operations which use SP as source and an FP_REG as destination, so reject out @@ -6864,6 +6965,25 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, return true; case CONST_DOUBLE: + + /* First determine number of instructions to do the move + as an integer constant. */ + if (!aarch64_float_const_representable_p (x) + && !aarch64_can_const_movi_rtx_p (x, mode) + && aarch64_float_const_rtx_p (x)) + { + unsigned HOST_WIDE_INT ival; + bool succeed = aarch64_reinterpret_float_as_int (x, &ival); + gcc_assert (succeed); + + machine_mode imode = mode == HFmode ? SImode + : int_mode_for_mode (mode); + int ncost = aarch64_internal_mov_immediate + (NULL_RTX, gen_int_mode (ival, imode), false, imode); + *cost += COSTS_N_INSNS (ncost); + return true; + } + if (speed) { /* mov[df,sf]_aarch64. */ @@ -7537,17 +7657,26 @@ cost_plus: } else { - if (speed) + if (VECTOR_MODE_P (mode)) { - if (VECTOR_MODE_P (mode)) - { - /* Vector shift (register). */ - *cost += extra_cost->vect.alu; - } - else + if (speed) + /* Vector shift (register). */ + *cost += extra_cost->vect.alu; + } + else + { + if (speed) + /* LSLV. */ + *cost += extra_cost->alu.shift_reg; + + if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0)) + && CONST_INT_P (XEXP (op1, 1)) + && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1) { - /* LSLV. */ - *cost += extra_cost->alu.shift_reg; + *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed); + /* We already demanded XEXP (op1, 0) to be REG_P, so + don't recurse into it. */ + return true; } } return false; /* All arguments need to be in registers. */ @@ -7576,14 +7705,27 @@ cost_plus: } else { - - /* ASR (register) and friends. */ - if (speed) + if (VECTOR_MODE_P (mode)) { - if (VECTOR_MODE_P (mode)) + if (speed) + /* Vector shift (register). */ *cost += extra_cost->vect.alu; - else + } + else + { + if (speed) + /* ASR (register) and friends. */ *cost += extra_cost->alu.shift_reg; + + if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0)) + && CONST_INT_P (XEXP (op1, 1)) + && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1) + { + *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed); + /* We already demanded XEXP (op1, 0) to be REG_P, so + don't recurse into it. */ + return true; + } } return false; /* All arguments need to be in registers. */ } @@ -10151,7 +10293,7 @@ aarch64_classify_symbol (rtx x, rtx offset) /* This is alright even in PIC code as the constant pool reference is always PC relative and within the same translation unit. */ - if (CONSTANT_POOL_ADDRESS_P (x)) + if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x)) return SYMBOL_SMALL_ABSOLUTE; else return SYMBOL_FORCE_TO_MEM; @@ -10186,18 +10328,16 @@ aarch64_legitimate_pic_operand_p (rtx x) /* Return true if X holds either a quarter-precision or floating-point +0.0 constant. */ static bool -aarch64_valid_floating_const (machine_mode mode, rtx x) +aarch64_valid_floating_const (rtx x) { if (!CONST_DOUBLE_P (x)) return false; - if (aarch64_float_const_zero_rtx_p (x)) + /* This call determines which constants can be used in mov<mode> + as integer moves instead of constant loads. */ + if (aarch64_float_const_rtx_p (x)) return true; - /* We only handle moving 0.0 to a TFmode register. */ - if (!(mode == SFmode || mode == DFmode)) - return false; - return aarch64_float_const_representable_p (x); } @@ -10209,11 +10349,15 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x) if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode)) return false; - /* This could probably go away because - we now decompose CONST_INTs according to expand_mov_immediate. */ + /* For these cases we never want to use a literal load. + As such we have to prevent the compiler from forcing these + to memory. */ if ((GET_CODE (x) == CONST_VECTOR && aarch64_simd_valid_immediate (x, mode, false, NULL)) - || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x)) + || CONST_INT_P (x) + || aarch64_valid_floating_const (x) + || aarch64_can_const_movi_rtx_p (x, mode) + || aarch64_float_const_rtx_p (x)) return !targetm.cannot_force_const_mem (mode, x); if (GET_CODE (x) == HIGH @@ -11496,23 +11640,6 @@ aarch64_mask_from_zextract_ops (rtx width, rtx pos) } bool -aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED) -{ - HOST_WIDE_INT imm = INTVAL (x); - int i; - - for (i = 0; i < 8; i++) - { - unsigned int byte = imm & 0xff; - if (byte != 0xff && byte != 0) - return false; - imm >>= 8; - } - - return true; -} - -bool aarch64_mov_operand_p (rtx x, machine_mode mode) { if (GET_CODE (x) == HIGH @@ -11666,7 +11793,7 @@ aarch64_simd_mem_operand_p (rtx op) COUNT is the number of components into which the copy needs to be decomposed. */ void -aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode, +aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode, unsigned int count) { unsigned int i; @@ -11687,7 +11814,7 @@ aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode, /* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is one of VSTRUCT modes: OI, CI, or XI. */ int -aarch64_simd_attr_length_rglist (enum machine_mode mode) +aarch64_simd_attr_length_rglist (machine_mode mode) { return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4; } @@ -12119,10 +12246,8 @@ aarch64_emit_store_exclusive (machine_mode mode, rtx bval, static void aarch64_emit_unlikely_jump (rtx insn) { - int very_unlikely = REG_BR_PROB_BASE / 100 - 1; - rtx_insn *jump = emit_jump_insn (insn); - add_int_reg_note (jump, REG_BR_PROB, very_unlikely); + add_reg_br_prob_note (jump, profile_probability::very_unlikely ()); } /* Expand a compare and swap pattern. */ @@ -12905,15 +13030,28 @@ aarch64_output_simd_mov_immediate (rtx const_vector, } char* -aarch64_output_scalar_simd_mov_immediate (rtx immediate, - machine_mode mode) +aarch64_output_scalar_simd_mov_immediate (rtx immediate, machine_mode mode) { + + /* If a floating point number was passed and we desire to use it in an + integer mode do the conversion to integer. */ + if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT) + { + unsigned HOST_WIDE_INT ival; + if (!aarch64_reinterpret_float_as_int (immediate, &ival)) + gcc_unreachable (); + immediate = gen_int_mode (ival, mode); + } + machine_mode vmode; + /* use a 64 bit mode for everything except for DI/DF mode, where we use + a 128 bit vector mode. */ + int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64; gcc_assert (!VECTOR_MODE_P (mode)); - vmode = aarch64_simd_container_mode (mode, 64); + vmode = aarch64_simd_container_mode (mode, width); rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate)); - return aarch64_output_simd_mov_immediate (v_op, vmode, 64); + return aarch64_output_simd_mov_immediate (v_op, vmode, width); } /* Split operands into moves from op[1] + op[2] into op[0]. */ @@ -13678,7 +13816,7 @@ aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, } rtx -aarch64_reverse_mask (enum machine_mode mode) +aarch64_reverse_mask (machine_mode mode) { /* We have to reverse each vector because we dont have a permuted load that can reverse-load according to ABI rules. */ @@ -14278,13 +14416,68 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) { enum attr_type prev_type = get_attr_type (prev); - /* FIXME: this misses some which is considered simple arthematic - instructions for ThunderX. Simple shifts are missed here. */ - if (prev_type == TYPE_ALUS_SREG - || prev_type == TYPE_ALUS_IMM - || prev_type == TYPE_LOGICS_REG - || prev_type == TYPE_LOGICS_IMM) - return true; + unsigned int condreg1, condreg2; + rtx cc_reg_1; + aarch64_fixed_condition_code_regs (&condreg1, &condreg2); + cc_reg_1 = gen_rtx_REG (CCmode, condreg1); + + if (reg_referenced_p (cc_reg_1, PATTERN (curr)) + && prev + && modified_in_p (cc_reg_1, prev)) + { + /* FIXME: this misses some which is considered simple arthematic + instructions for ThunderX. Simple shifts are missed here. */ + if (prev_type == TYPE_ALUS_SREG + || prev_type == TYPE_ALUS_IMM + || prev_type == TYPE_LOGICS_REG + || prev_type == TYPE_LOGICS_IMM) + return true; + } + } + + if (prev_set + && curr_set + && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH) + && any_condjump_p (curr)) + { + /* We're trying to match: + prev (alu_insn) == (set (r0) plus ((r0) (r1/imm))) + curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0) + (const_int 0)) + (label_ref ("SYM")) + (pc)) */ + if (SET_DEST (curr_set) == (pc_rtx) + && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE + && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) + && REG_P (SET_DEST (prev_set)) + && REGNO (SET_DEST (prev_set)) + == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0))) + { + /* Fuse ALU operations followed by conditional branch instruction. */ + switch (get_attr_type (prev)) + { + case TYPE_ALU_IMM: + case TYPE_ALU_SREG: + case TYPE_ADC_REG: + case TYPE_ADC_IMM: + case TYPE_ADCS_REG: + case TYPE_ADCS_IMM: + case TYPE_LOGIC_REG: + case TYPE_LOGIC_IMM: + case TYPE_CSEL: + case TYPE_ADR: + case TYPE_MOV_IMM: + case TYPE_SHIFT_REG: + case TYPE_SHIFT_IMM: + case TYPE_BFM: + case TYPE_RBIT: + case TYPE_REV: + case TYPE_EXTEND: + return true; + + default:; + } + } } return false; @@ -14468,7 +14661,7 @@ aarch64_sched_adjust_priority (rtx_insn *insn, int priority) bool aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, - enum machine_mode mode) + machine_mode mode) { HOST_WIDE_INT offval_1, offval_2, msize; enum reg_class rclass_1, rclass_2; @@ -14575,7 +14768,7 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, bool aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load, - enum machine_mode mode) + machine_mode mode) { enum reg_class rclass_1, rclass_2, rclass_3, rclass_4; HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize; @@ -14709,7 +14902,7 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load, bool aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, - enum machine_mode mode, RTX_CODE code) + machine_mode mode, RTX_CODE code) { rtx base, offset, t1, t2; rtx mem_1, mem_2, mem_3, mem_4; |