diff options
author | Caroline Tice <cmtice@chromium.org> | 2013-06-21 12:05:49 -0700 |
---|---|---|
committer | Caroline Tice <cmtice@chromium.org> | 2013-06-21 12:05:49 -0700 |
commit | 9c1705845241cdea5cf61574218edef282901fbd (patch) | |
tree | 021da661f8a821bc7eea277521fa13f2043879a0 /gcc/config | |
parent | 9bdedf4df4166718fa0a44811c643214c6880471 (diff) | |
parent | 8a46ed36b0449c870d0a938c1010ad610ebcf4f7 (diff) | |
download | gcc-9c1705845241cdea5cf61574218edef282901fbd.tar.gz |
Merge branch 'master' into vtv
Conflicts:
gcc/ChangeLog
gcc/cp/ChangeLog
gcc/cp/decl2.c
gcc/varasm.c
libstdc++-v3/ChangeLog
Diffstat (limited to 'gcc/config')
55 files changed, 4047 insertions, 1287 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index bdb6b040578..12f3c3a6fe6 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -149,6 +149,8 @@ bool aarch64_legitimate_pic_operand_p (rtx); bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode); bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context, enum machine_mode); +char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode); +char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned); bool aarch64_pad_arg_upward (enum machine_mode, const_tree); bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool); bool aarch64_regno_ok_for_base_p (int, bool); @@ -157,6 +159,8 @@ bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode); bool aarch64_simd_imm_zero_p (rtx, enum machine_mode); bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode); bool aarch64_simd_shift_imm_p (rtx, enum machine_mode, bool); +bool aarch64_simd_valid_immediate (rtx, enum machine_mode, bool, + struct simd_immediate_info *); bool aarch64_symbolic_address_p (rtx); bool aarch64_symbolic_constant_p (rtx, enum aarch64_symbol_context, enum aarch64_symbol_type *); @@ -222,6 +226,8 @@ void aarch64_split_128bit_move (rtx, rtx); bool aarch64_split_128bit_move_p (rtx, rtx); +void aarch64_split_simd_combine (rtx, rtx, rtx); + void aarch64_split_simd_move (rtx, rtx); /* Check for a legitimate floating point constant for FMOV. */ @@ -257,6 +263,4 @@ extern void aarch64_split_combinev16qi (rtx operands[3]); extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); extern bool aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); - -char* aarch64_output_simd_mov_immediate (rtx *, enum machine_mode, unsigned); #endif /* GCC_AARCH64_PROTOS_H */ diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 04fbdbd5837..02037f3f2cb 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -409,7 +409,7 @@ case 4: return "ins\t%0.d[0], %1"; case 5: return "mov\t%0, %1"; case 6: - return aarch64_output_simd_mov_immediate (&operands[1], + return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 64); default: gcc_unreachable (); } @@ -440,7 +440,7 @@ case 5: return "#"; case 6: - return aarch64_output_simd_mov_immediate (&operands[1], <MODE>mode, 128); + return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128); default: gcc_unreachable (); } @@ -1058,9 +1058,9 @@ (vec_duplicate:<VHALF> (const_int 0))))] "TARGET_SIMD" "@ - mov\\t%d0, %d1 - fmov\t%d0, %1 - dup\t%d0, %1" + dup\\t%d0, %1.d[0] + fmov\\t%d0, %1 + dup\\t%d0, %1" [(set_attr "v8type" "*,fmov,*") (set_attr "simd_type" "simd_dup,*,simd_dup") (set_attr "simd_mode" "<MODE>") @@ -1190,6 +1190,104 @@ ;; Widening arithmetic. +(define_insn "*aarch64_<su>mlal_lo<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (plus:<VWIDE> + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3)))) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "<MODE>")] +) + +(define_insn "*aarch64_<su>mlal_hi<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (plus:<VWIDE> + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3)))) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "<MODE>")] +) + +(define_insn "*aarch64_<su>mlsl_lo<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (minus:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3))))))] + "TARGET_SIMD" + "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "<MODE>")] +) + +(define_insn "*aarch64_<su>mlsl_hi<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (minus:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3))))))] + "TARGET_SIMD" + "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "<MODE>")] +) + +(define_insn "*aarch64_<su>mlal<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (plus:<VWIDE> + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (match_operand:VDW 1 "register_operand" "w")) + (ANY_EXTEND:<VWIDE> + (match_operand:VDW 2 "register_operand" "w"))) + (match_operand:<VWIDE> 3 "register_operand" "0")))] + "TARGET_SIMD" + "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "<MODE>")] +) + +(define_insn "*aarch64_<su>mlsl<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (minus:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (match_operand:VDW 2 "register_operand" "w")) + (ANY_EXTEND:<VWIDE> + (match_operand:VDW 3 "register_operand" "w")))))] + "TARGET_SIMD" + "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>" + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "<MODE>")] +) + (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> @@ -2218,15 +2316,29 @@ (set_attr "simd_mode" "<MODE>")] ) -(define_insn "aarch64_combine<mode>" +(define_insn_and_split "aarch64_combine<mode>" [(set (match_operand:<VDBL> 0 "register_operand" "=&w") (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w") (match_operand:VDC 2 "register_operand" "w")))] "TARGET_SIMD" - "mov\\t%0.d[0], %1.d[0]\;ins\\t%0.d[1], %2.d[0]" - [(set_attr "simd_type" "simd_ins") - (set_attr "simd_mode" "<MODE>")] -) + "#" + "&& reload_completed" + [(const_int 0)] +{ + aarch64_split_simd_combine (operands[0], operands[1], operands[2]); + DONE; +}) + +(define_expand "aarch64_simd_combine<mode>" + [(set (match_operand:<VDBL> 0 "register_operand" "=&w") + (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w") + (match_operand:VDC 2 "register_operand" "w")))] + "TARGET_SIMD" + { + emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1])); + emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2])); + DONE; + }) ;; <su><addsub>l<q>. diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 9c77888157d..527b00dbcaa 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -87,6 +87,14 @@ struct aarch64_address_info { enum aarch64_symbol_type symbol_type; }; +struct simd_immediate_info +{ + rtx value; + int shift; + int element_width; + bool mvn; +}; + /* The current code model. */ enum aarch64_code_model aarch64_cmodel; @@ -103,8 +111,6 @@ static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode, static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_override_options_after_change (void); -static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *, - int *, unsigned char *, int *, int *); static bool aarch64_vector_mode_supported_p (enum machine_mode); static unsigned bit_count (unsigned HOST_WIDE_INT); static bool aarch64_const_vec_all_same_int_p (rtx, @@ -532,9 +538,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, { rtx tmp_reg = dest; if (can_create_pseudo_p ()) - { - tmp_reg = gen_reg_rtx (Pmode); - } + tmp_reg = gen_reg_rtx (Pmode); emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm)); emit_insn (gen_ldr_got_small (dest, tmp_reg, imm)); return; @@ -696,6 +700,49 @@ aarch64_split_128bit_move_p (rtx dst, rtx src) || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src)))); } +/* Split a complex SIMD combine. */ + +void +aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2) +{ + enum machine_mode src_mode = GET_MODE (src1); + enum machine_mode dst_mode = GET_MODE (dst); + + gcc_assert (VECTOR_MODE_P (dst_mode)); + + if (REG_P (dst) && REG_P (src1) && REG_P (src2)) + { + rtx (*gen) (rtx, rtx, rtx); + + switch (src_mode) + { + case V8QImode: + gen = gen_aarch64_simd_combinev8qi; + break; + case V4HImode: + gen = gen_aarch64_simd_combinev4hi; + break; + case V2SImode: + gen = gen_aarch64_simd_combinev2si; + break; + case V2SFmode: + gen = gen_aarch64_simd_combinev2sf; + break; + case DImode: + gen = gen_aarch64_simd_combinedi; + break; + case DFmode: + gen = gen_aarch64_simd_combinedf; + break; + default: + gcc_unreachable (); + } + + emit_insn (gen (dst, src1, src2)); + return; + } +} + /* Split a complex SIMD move. */ void @@ -5068,6 +5115,10 @@ aarch64_classify_symbol (rtx x, return SYMBOL_SMALL_ABSOLUTE; case AARCH64_CMODEL_TINY_PIC: + if (!aarch64_symbol_binds_local_p (x)) + return SYMBOL_SMALL_GOT; + return SYMBOL_TINY_ABSOLUTE; + case AARCH64_CMODEL_SMALL_PIC: if (!aarch64_symbol_binds_local_p (x)) return SYMBOL_SMALL_GOT; @@ -5150,8 +5201,7 @@ aarch64_legitimate_constant_p (enum machine_mode mode, rtx x) /* This could probably go away because we now decompose CONST_INTs according to expand_mov_immediate. */ if ((GET_CODE (x) == CONST_VECTOR - && aarch64_simd_valid_immediate (x, mode, false, - NULL, NULL, NULL, NULL, NULL) != -1) + && aarch64_simd_valid_immediate (x, mode, false, NULL)) || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x)) return !targetm.cannot_force_const_mem (mode, x); @@ -5982,32 +6032,57 @@ aarch64_vector_mode_supported_p (enum machine_mode mode) return false; } -/* Return quad mode as the preferred SIMD mode. */ +/* Return appropriate SIMD container + for MODE within a vector of WIDTH bits. */ static enum machine_mode -aarch64_preferred_simd_mode (enum machine_mode mode) +aarch64_simd_container_mode (enum machine_mode mode, unsigned width) { + gcc_assert (width == 64 || width == 128); if (TARGET_SIMD) - switch (mode) - { - case DFmode: - return V2DFmode; - case SFmode: - return V4SFmode; - case SImode: - return V4SImode; - case HImode: - return V8HImode; - case QImode: - return V16QImode; - case DImode: - return V2DImode; - break; - - default:; - } + { + if (width == 128) + switch (mode) + { + case DFmode: + return V2DFmode; + case SFmode: + return V4SFmode; + case SImode: + return V4SImode; + case HImode: + return V8HImode; + case QImode: + return V16QImode; + case DImode: + return V2DImode; + default: + break; + } + else + switch (mode) + { + case SFmode: + return V2SFmode; + case SImode: + return V2SImode; + case HImode: + return V4HImode; + case QImode: + return V8QImode; + default: + break; + } + } return word_mode; } +/* Return 128-bit container as the preferred SIMD mode for MODE. */ +static enum machine_mode +aarch64_preferred_simd_mode (enum machine_mode mode) +{ + return aarch64_simd_container_mode (mode, 128); +} + /* Return the bitmask of possible vector sizes for the vectorizer to iterate over. */ static unsigned int @@ -6095,7 +6170,7 @@ aarch64_mangle_type (const_tree type) } /* Return the equivalent letter for size. */ -static unsigned char +static char sizetochar (int size) { switch (size) @@ -6142,15 +6217,10 @@ aarch64_vect_float_const_representable_p (rtx x) return aarch64_float_const_representable_p (x0); } -/* TODO: This function returns values similar to those - returned by neon_valid_immediate in gcc/config/arm/arm.c - but the API here is different enough that these magic numbers - are not used. It should be sufficient to return true or false. */ -static int -aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, - rtx *modconst, int *elementwidth, - unsigned char *elementchar, - int *mvn, int *shift) +/* Return true for valid and false for invalid. */ +bool +aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse, + struct simd_immediate_info *info) { #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \ matches = 1; \ @@ -6161,7 +6231,6 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, { \ immtype = (CLASS); \ elsize = (ELSIZE); \ - elchar = sizetochar (elsize); \ eshift = (SHIFT); \ emvn = (NEG); \ break; \ @@ -6170,36 +6239,25 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op); unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); unsigned char bytes[16]; - unsigned char elchar = 0; int immtype = -1, matches; unsigned int invmask = inverse ? 0xff : 0; int eshift, emvn; if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) { - bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode); - int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0))); - - if (!(simd_imm_zero - || aarch64_vect_float_const_representable_p (op))) - return -1; - - if (modconst) - *modconst = CONST_VECTOR_ELT (op, 0); - - if (elementwidth) - *elementwidth = elem_width; - - if (elementchar) - *elementchar = sizetochar (elem_width); + if (! (aarch64_simd_imm_zero_p (op, mode) + || aarch64_vect_float_const_representable_p (op))) + return false; - if (shift) - *shift = 0; + if (info) + { + info->value = CONST_VECTOR_ELT (op, 0); + info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value)); + info->mvn = false; + info->shift = 0; + } - if (simd_imm_zero) - return 19; - else - return 18; + return true; } /* Splat vector constant out into a byte vector. */ @@ -6297,23 +6355,14 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, if (immtype == -1 || (immtype >= 12 && immtype <= 15) || immtype == 18) - return -1; - - - if (elementwidth) - *elementwidth = elsize; - - if (elementchar) - *elementchar = elchar; - - if (mvn) - *mvn = emvn; - - if (shift) - *shift = eshift; + return false; - if (modconst) + if (info) { + info->element_width = elsize; + info->mvn = emvn != 0; + info->shift = eshift; + unsigned HOST_WIDE_INT imm = 0; /* Un-invert bytes of recognized vector, if necessary. */ @@ -6330,68 +6379,27 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0) << (i * BITS_PER_UNIT); - *modconst = GEN_INT (imm); - } - else - { - unsigned HOST_WIDE_INT imm = 0; - for (i = 0; i < elsize / BITS_PER_UNIT; i++) - imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); + info->value = GEN_INT (imm); + } + else + { + for (i = 0; i < elsize / BITS_PER_UNIT; i++) + imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); /* Construct 'abcdefgh' because the assembler cannot handle - generic constants. */ - gcc_assert (shift != NULL && mvn != NULL); - if (*mvn) + generic constants. */ + if (info->mvn) imm = ~imm; - imm = (imm >> *shift) & 0xff; - *modconst = GEN_INT (imm); - } + imm = (imm >> info->shift) & 0xff; + info->value = GEN_INT (imm); + } } - return immtype; + return true; #undef CHECK } -/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction - (or, implicitly, MVNI) immediate. Write back width per element - to *ELEMENTWIDTH, and a modified constant (whatever should be output - for a MOVI instruction) in *MODCONST. */ -int -aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode, - rtx *modconst, int *elementwidth, - unsigned char *elementchar, - int *mvn, int *shift) -{ - rtx tmpconst; - int tmpwidth; - unsigned char tmpwidthc; - int tmpmvn = 0, tmpshift = 0; - int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst, - &tmpwidth, &tmpwidthc, - &tmpmvn, &tmpshift); - - if (retval == -1) - return 0; - - if (modconst) - *modconst = tmpconst; - - if (elementwidth) - *elementwidth = tmpwidth; - - if (elementchar) - *elementchar = tmpwidthc; - - if (mvn) - *mvn = tmpmvn; - - if (shift) - *shift = tmpshift; - - return 1; -} - static bool aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT minval, @@ -6496,9 +6504,7 @@ aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode) gcc_assert (!VECTOR_MODE_P (mode)); vmode = aarch64_preferred_simd_mode (mode); rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op)); - int retval = aarch64_simd_immediate_valid_for_move (op_v, vmode, 0, - NULL, NULL, NULL, NULL); - return retval; + return aarch64_simd_valid_immediate (op_v, vmode, false, NULL); } /* Construct and return a PARALLEL RTX vector. */ @@ -6726,8 +6732,7 @@ aarch64_simd_make_constant (rtx vals) gcc_unreachable (); if (const_vec != NULL_RTX - && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL, - NULL, NULL, NULL)) + && aarch64_simd_valid_immediate (const_vec, mode, false, NULL)) /* Load using MOVI/MVNI. */ return const_vec; else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX) @@ -7285,49 +7290,78 @@ aarch64_float_const_representable_p (rtx x) } char* -aarch64_output_simd_mov_immediate (rtx *const_vector, +aarch64_output_simd_mov_immediate (rtx const_vector, enum machine_mode mode, unsigned width) { - int is_valid; - unsigned char widthc; - int lane_width_bits; + bool is_valid; static char templ[40]; - int shift = 0, mvn = 0; const char *mnemonic; unsigned int lane_count = 0; + char element_char; - is_valid = - aarch64_simd_immediate_valid_for_move (*const_vector, mode, - const_vector, &lane_width_bits, - &widthc, &mvn, &shift); + struct simd_immediate_info info; + + /* This will return true to show const_vector is legal for use as either + a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will + also update INFO to show how the immediate should be generated. */ + is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info); gcc_assert (is_valid); + element_char = sizetochar (info.element_width); + lane_count = width / info.element_width; + mode = GET_MODE_INNER (mode); if (mode == SFmode || mode == DFmode) { - bool zero_p = - aarch64_float_const_zero_rtx_p (*const_vector); - gcc_assert (shift == 0); - mnemonic = zero_p ? "movi" : "fmov"; + gcc_assert (info.shift == 0 && ! info.mvn); + if (aarch64_float_const_zero_rtx_p (info.value)) + info.value = GEN_INT (0); + else + { +#define buf_size 20 + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, info.value); + char float_buf[buf_size] = {'\0'}; + real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode); +#undef buf_size + + if (lane_count == 1) + snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf); + else + snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s", + lane_count, element_char, float_buf); + return templ; + } } - else - mnemonic = mvn ? "mvni" : "movi"; - gcc_assert (lane_width_bits != 0); - lane_count = width / lane_width_bits; + mnemonic = info.mvn ? "mvni" : "movi"; if (lane_count == 1) - snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic); - else if (shift) - snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d", - mnemonic, lane_count, widthc, shift); + snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX, + mnemonic, UINTVAL (info.value)); + else if (info.shift) + snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX + ", lsl %d", mnemonic, lane_count, element_char, + UINTVAL (info.value), info.shift); else - snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1", - mnemonic, lane_count, widthc); + snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX, + mnemonic, lane_count, element_char, UINTVAL (info.value)); return templ; } +char* +aarch64_output_scalar_simd_mov_immediate (rtx immediate, + enum machine_mode mode) +{ + enum machine_mode vmode; + + gcc_assert (!VECTOR_MODE_P (mode)); + vmode = aarch64_simd_container_mode (mode, 64); + rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate)); + return aarch64_output_simd_mov_immediate (v_op, vmode, 64); +} + /* Split operands into moves from op[1] + op[2] into op[0]. */ void diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 89db09254eb..e88e5be894e 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -774,17 +774,34 @@ (match_operand:SHORT 1 "general_operand" " r,M,D<hq>,m, m,rZ,*w,*w, r,*w"))] "(register_operand (operands[0], <MODE>mode) || aarch64_reg_or_zero (operands[1], <MODE>mode))" - "@ - mov\\t%w0, %w1 - mov\\t%w0, %1 - movi\\t%0.<Vallxd>, %1 - ldr<size>\\t%w0, %1 - ldr\\t%<size>0, %1 - str<size>\\t%w1, %0 - str\\t%<size>1, %0 - umov\\t%w0, %1.<v>[0] - dup\\t%0.<Vallxd>, %w1 - dup\\t%0, %1.<v>[0]" +{ + switch (which_alternative) + { + case 0: + return "mov\t%w0, %w1"; + case 1: + return "mov\t%w0, %1"; + case 2: + return aarch64_output_scalar_simd_mov_immediate (operands[1], + <MODE>mode); + case 3: + return "ldr<size>\t%w0, %1"; + case 4: + return "ldr\t%<size>0, %1"; + case 5: + return "str<size>\t%w1, %0"; + case 6: + return "str\t%<size>1, %0"; + case 7: + return "umov\t%w0, %1.<v>[0]"; + case 8: + return "dup\t%0.<Vallxd>, %w1"; + case 9: + return "dup\t%0, %1.<v>[0]"; + default: + gcc_unreachable (); + } +} [(set_attr "v8type" "move,alu,alu,load1,load1,store1,store1,*,*,*") (set_attr "simd_type" "*,*,simd_move_imm,*,*,*,*,simd_movgp,simd_dupgp,simd_dup") (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes") @@ -850,7 +867,8 @@ movi\\t%d0, %1" [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov,fmov") (set_attr "mode" "DI") - (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,yes")] + (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") + (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] ) (define_insn "insv_imm<mode>" diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index d9c18e692ea..7cafc08fdd9 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -143,9 +143,8 @@ "@internal A constraint that matches vector of immediates." (and (match_code "const_vector") - (match_test "aarch64_simd_immediate_valid_for_move (op, GET_MODE (op), - NULL, NULL, NULL, - NULL, NULL) != 0"))) + (match_test "aarch64_simd_valid_immediate (op, GET_MODE (op), + false, NULL)"))) (define_constraint "Dh" "@internal diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 860d4d9a187..8e40c5de5d4 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -385,7 +385,8 @@ ;; Double modes of vector modes (lower case). (define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi") (V2SI "v4si") (V2SF "v4sf") - (SI "v2si") (DI "v2di")]) + (SI "v2si") (DI "v2di") + (DF "v2df")]) ;; Narrowed modes for VDN. (define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI") diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c index 096ef3b1f56..5f5b33e347b 100644 --- a/gcc/config/alpha/alpha.c +++ b/gcc/config/alpha/alpha.c @@ -2700,12 +2700,12 @@ alpha_emit_conditional_move (rtx cmp, enum machine_mode mode) break; case GE: case GT: case GEU: case GTU: - /* These must be swapped. */ - if (op1 != CONST0_RTX (cmp_mode)) - { - code = swap_condition (code); - tem = op0, op0 = op1, op1 = tem; - } + /* These normally need swapping, but for integer zero we have + special patterns that recognize swapped operands. */ + if (cmp_mode == DImode && op1 == const0_rtx) + break; + code = swap_condition (code); + tem = op0, op0 = op1, op1 = tem; break; default: @@ -3068,7 +3068,8 @@ alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1) out = gen_reg_rtx (DImode); /* What's actually returned is -1,0,1, not a proper boolean value. */ - note = gen_rtx_UNSPEC (DImode, gen_rtvec (2, op0, op1), UNSPEC_XFLT_COMPARE); + note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1); + note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE); alpha_emit_xfloating_libcall (func, out, operands, 2, note); return out; diff --git a/gcc/config/arm/arm-fixed.md b/gcc/config/arm/arm-fixed.md index 10da396ab66..12bbbaf9083 100644 --- a/gcc/config/arm/arm-fixed.md +++ b/gcc/config/arm/arm-fixed.md @@ -19,12 +19,13 @@ ;; This file contains ARM instructions that support fixed-point operations. (define_insn "add<mode>3" - [(set (match_operand:FIXED 0 "s_register_operand" "=r") - (plus:FIXED (match_operand:FIXED 1 "s_register_operand" "r") - (match_operand:FIXED 2 "s_register_operand" "r")))] + [(set (match_operand:FIXED 0 "s_register_operand" "=l,r") + (plus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r") + (match_operand:FIXED 2 "s_register_operand" "l,r")))] "TARGET_32BIT" "add%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no")]) (define_insn "add<mode>3" [(set (match_operand:ADDSUB 0 "s_register_operand" "=r") @@ -32,7 +33,8 @@ (match_operand:ADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "sadd<qaddsub_suf>%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "usadd<mode>3" [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r") @@ -40,7 +42,8 @@ (match_operand:UQADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "uqadd<qaddsub_suf>%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "ssadd<mode>3" [(set (match_operand:QADDSUB 0 "s_register_operand" "=r") @@ -48,15 +51,17 @@ (match_operand:QADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "qadd<qaddsub_suf>%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "sub<mode>3" - [(set (match_operand:FIXED 0 "s_register_operand" "=r") - (minus:FIXED (match_operand:FIXED 1 "s_register_operand" "r") - (match_operand:FIXED 2 "s_register_operand" "r")))] + [(set (match_operand:FIXED 0 "s_register_operand" "=l,r") + (minus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r") + (match_operand:FIXED 2 "s_register_operand" "l,r")))] "TARGET_32BIT" "sub%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no")]) (define_insn "sub<mode>3" [(set (match_operand:ADDSUB 0 "s_register_operand" "=r") @@ -64,7 +69,8 @@ (match_operand:ADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "ssub<qaddsub_suf>%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "ussub<mode>3" [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r") @@ -73,7 +79,8 @@ (match_operand:UQADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "uqsub<qaddsub_suf>%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "sssub<mode>3" [(set (match_operand:QADDSUB 0 "s_register_operand" "=r") @@ -81,7 +88,8 @@ (match_operand:QADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "qsub<qaddsub_suf>%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) ;; Fractional multiplies. @@ -374,6 +382,7 @@ "TARGET_32BIT && arm_arch6" "ssat%?\\t%0, #16, %2%S1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "insn" "sat") (set_attr "shift" "1") (set_attr "type" "alu_shift")]) @@ -384,4 +393,5 @@ "TARGET_INT_SIMD" "usat%?\\t%0, #16, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "insn" "sat")]) diff --git a/gcc/config/arm/arm-ldmstm.ml b/gcc/config/arm/arm-ldmstm.ml index 2bc9702bee2..e615437b125 100644 --- a/gcc/config/arm/arm-ldmstm.ml +++ b/gcc/config/arm/arm-ldmstm.ml @@ -146,12 +146,15 @@ let can_thumb addrmode update is_store = | IA, true, true -> true | _ -> false +exception InvalidAddrMode of string;; + let target addrmode thumb = match addrmode, thumb with IA, true -> "TARGET_THUMB1" | IA, false -> "TARGET_32BIT" | DB, false -> "TARGET_32BIT" | _, false -> "TARGET_ARM" + | _, _ -> raise (InvalidAddrMode "ERROR: Invalid Addressing mode for Thumb1.") let write_pattern_1 name ls addrmode nregs write_set_fn update thumb = let astr = string_of_addrmode addrmode in @@ -181,8 +184,10 @@ let write_pattern_1 name ls addrmode nregs write_set_fn update thumb = done; Printf.printf "}\"\n"; Printf.printf " [(set_attr \"type\" \"%s%d\")" ls nregs; - begin if not thumb then + if not thumb then begin Printf.printf "\n (set_attr \"predicable\" \"yes\")"; + if addrmode == IA || addrmode == DB then + Printf.printf "\n (set_attr \"predicable_short_it\" \"no\")"; end; Printf.printf "])\n\n" diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 55a512349cc..6fc307e7709 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -662,6 +662,10 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_ASAN_SHADOW_OFFSET #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset +#undef MAX_INSN_PER_IT_BLOCK +#define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4) + + struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -1871,6 +1875,11 @@ arm_option_override (void) arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0; arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0; arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; + if (arm_restrict_it == 2) + arm_restrict_it = arm_arch8 && TARGET_THUMB2; + + if (!TARGET_THUMB2) + arm_restrict_it = 0; /* If we are not using the default (ARM mode) section anchor offset ranges, then set the correct ranges now. */ @@ -2677,6 +2686,8 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code) switch (code) { case AND: + case IOR: + case XOR: return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF) && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF); case PLUS: @@ -16168,25 +16179,34 @@ arm_compute_save_reg0_reg12_mask (void) return save_reg_mask; } +/* Return true if r3 is live at the start of the function. */ + +static bool +arm_r3_live_at_start_p (void) +{ + /* Just look at cfg info, which is still close enough to correct at this + point. This gives false positives for broken functions that might use + uninitialized data that happens to be allocated in r3, but who cares? */ + return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 3); +} /* Compute the number of bytes used to store the static chain register on the - stack, above the stack frame. We need to know this accurately to get the - alignment of the rest of the stack frame correct. */ + stack, above the stack frame. We need to know this accurately to get the + alignment of the rest of the stack frame correct. */ -static int arm_compute_static_chain_stack_bytes (void) +static int +arm_compute_static_chain_stack_bytes (void) { - unsigned long func_type = arm_current_func_type (); - int static_chain_stack_bytes = 0; - - if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM && - IS_NESTED (func_type) && - df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0) - static_chain_stack_bytes = 4; + /* See the defining assertion in arm_expand_prologue. */ + if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM + && IS_NESTED (arm_current_func_type ()) + && arm_r3_live_at_start_p () + && crtl->args.pretend_args_size == 0) + return 4; - return static_chain_stack_bytes; + return 0; } - /* Compute a bit mask of which registers need to be saved on the stack for the current function. This is used by arm_get_frame_offsets, which may add extra registers. */ @@ -18141,16 +18161,16 @@ arm_expand_prologue (void) } else if (IS_NESTED (func_type)) { - /* The Static chain register is the same as the IP register + /* The static chain register is the same as the IP register used as a scratch register during stack frame creation. To get around this need to find somewhere to store IP whilst the frame is being created. We try the following places in order: - 1. The last argument register. + 1. The last argument register r3. 2. A slot on the stack above the frame. (This only works if the function is not a varargs function). - 3. Register r3, after pushing the argument registers + 3. Register r3 again, after pushing the argument registers onto the stack. Note - we only need to tell the dwarf2 backend about the SP @@ -18158,7 +18178,7 @@ arm_expand_prologue (void) doesn't need to be unwound, as it doesn't contain a value inherited from the caller. */ - if (df_regs_ever_live_p (3) == false) + if (!arm_r3_live_at_start_p ()) insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx); else if (args_to_push == 0) { @@ -18299,8 +18319,7 @@ arm_expand_prologue (void) if (IS_NESTED (func_type)) { /* Recover the static chain register. */ - if (!df_regs_ever_live_p (3) - || saved_pretend_args) + if (!arm_r3_live_at_start_p () || saved_pretend_args) insn = gen_rtx_REG (SImode, 3); else /* if (crtl->args.pretend_args_size == 0) */ { @@ -19592,7 +19611,7 @@ thumb2_final_prescan_insn (rtx insn) break; /* Allow up to 4 conditionally executed instructions in a block. */ n = get_attr_ce_count (insn); - if (arm_condexec_masklen + n > 4) + if (arm_condexec_masklen + n > MAX_INSN_PER_IT_BLOCK) break; predicate = COND_EXEC_TEST (body); diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index b7db3616cdf..3f0e021f3ed 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -93,6 +93,15 @@ ; IS_THUMB1 is set to 'yes' iff we are generating Thumb-1 code. (define_attr "is_thumb1" "no,yes" (const (symbol_ref "thumb1_code"))) +; We use this attribute to disable alternatives that can produce 32-bit +; instructions inside an IT-block in Thumb2 state. ARMv8 deprecates IT blocks +; that contain 32-bit instructions. +(define_attr "enabled_for_depr_it" "no,yes" (const_string "yes")) + +; This attribute is used to disable a predicated alternative when we have +; arm_restrict_it. +(define_attr "predicable_short_it" "no,yes" (const_string "yes")) + ;; Operand number of an input operand that is shifted. Zero if the ;; given instruction does not shift one of its input operands. (define_attr "shift" "" (const_int 0)) @@ -103,6 +112,8 @@ (define_attr "fpu" "none,vfp" (const (symbol_ref "arm_fpu_attr"))) +(define_attr "predicated" "yes,no" (const_string "no")) + ; LENGTH of an instruction (in bytes) (define_attr "length" "" (const_int 4)) @@ -190,6 +201,15 @@ (cond [(eq_attr "insn_enabled" "no") (const_string "no") + (and (eq_attr "predicable_short_it" "no") + (and (eq_attr "predicated" "yes") + (match_test "arm_restrict_it"))) + (const_string "no") + + (and (eq_attr "enabled_for_depr_it" "no") + (match_test "arm_restrict_it")) + (const_string "no") + (eq_attr "arch_enabled" "no") (const_string "no") @@ -2163,29 +2183,28 @@ ) (define_insn_and_split "*anddi3_insn" - [(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r,&r,w,w ,?&r,?&r,?w,?w") - (and:DI (match_operand:DI 1 "s_register_operand" "%0 ,r ,0,r ,w,0 ,0 ,r ,w ,0") - (match_operand:DI 2 "arm_anddi_operand_neon" "r ,r ,De,De,w,DL,r ,r ,w ,DL")))] + [(set (match_operand:DI 0 "s_register_operand" "=w,w ,&r,&r,&r,&r,?w,?w") + (and:DI (match_operand:DI 1 "s_register_operand" "%w,0 ,0 ,r ,0 ,r ,w ,0") + (match_operand:DI 2 "arm_anddi_operand_neon" "w ,DL,r ,r ,De,De,w ,DL")))] "TARGET_32BIT && !TARGET_IWMMXT" { switch (which_alternative) { - case 0: - case 1: + case 0: /* fall through */ + case 6: return "vand\t%P0, %P1, %P2"; + case 1: /* fall through */ + case 7: return neon_output_logic_immediate ("vand", &operands[2], + DImode, 1, VALID_NEON_QREG_MODE (DImode)); case 2: - case 3: /* fall through */ - return "#"; - case 4: /* fall through */ - case 8: return "vand\t%P0, %P1, %P2"; + case 3: + case 4: case 5: /* fall through */ - case 9: return neon_output_logic_immediate ("vand", &operands[2], - DImode, 1, VALID_NEON_QREG_MODE (DImode)); - case 6: return "#"; - case 7: return "#"; + return "#"; default: gcc_unreachable (); } } - "TARGET_32BIT && !TARGET_IWMMXT" + "TARGET_32BIT && !TARGET_IWMMXT && reload_completed + && !(IS_VFP_REGNUM (REGNO (operands[0])))" [(set (match_dup 3) (match_dup 4)) (set (match_dup 5) (match_dup 6))] " @@ -2201,19 +2220,11 @@ gen_highpart_mode (SImode, DImode, operands[2])); }" - [(set_attr "neon_type" "*,*,*,*,neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1") - (set_attr "arch" "*,*,*,*,neon_for_64bits,neon_for_64bits,*,*, + [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1") + (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*, avoid_neon_for_64bits,avoid_neon_for_64bits") - (set_attr "length" "8,8,8,8,*,*,8,8,*,*") - (set (attr "insn_enabled") (if_then_else - (lt (symbol_ref "which_alternative") - (const_int 4)) - (if_then_else (match_test "!TARGET_NEON") - (const_string "yes") - (const_string "no")) - (if_then_else (match_test "TARGET_NEON") - (const_string "yes") - (const_string "no"))))] + (set_attr "length" "*,*,8,8,8,8,*,*") + ] ) (define_insn_and_split "*anddi_zesidi_di" @@ -2997,14 +3008,47 @@ "" ) -(define_insn "*iordi3_insn" - [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") - (ior:DI (match_operand:DI 1 "s_register_operand" "%0,r") - (match_operand:DI 2 "s_register_operand" "r,r")))] - "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON" - "#" - [(set_attr "length" "8") - (set_attr "predicable" "yes")] +(define_insn_and_split "*iordi3_insn" + [(set (match_operand:DI 0 "s_register_operand" "=w,w ,&r,&r,&r,&r,?w,?w") + (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0 ,0 ,r ,0 ,r ,w ,0") + (match_operand:DI 2 "arm_iordi_operand_neon" "w ,Dl,r ,r ,Df,Df,w ,Dl")))] + "TARGET_32BIT && !TARGET_IWMMXT" + { + switch (which_alternative) + { + case 0: /* fall through */ + case 6: return "vorr\t%P0, %P1, %P2"; + case 1: /* fall through */ + case 7: return neon_output_logic_immediate ("vorr", &operands[2], + DImode, 0, VALID_NEON_QREG_MODE (DImode)); + case 2: + case 3: + case 4: + case 5: + return "#"; + default: gcc_unreachable (); + } + } + "TARGET_32BIT && !TARGET_IWMMXT && reload_completed + && !(IS_VFP_REGNUM (REGNO (operands[0])))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 5) (match_dup 6))] + " + { + operands[3] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_highpart (SImode, operands[0]); + + operands[4] = simplify_gen_binary (IOR, SImode, + gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2])); + operands[6] = simplify_gen_binary (IOR, SImode, + gen_highpart (SImode, operands[1]), + gen_highpart_mode (SImode, DImode, operands[2])); + + }" + [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1") + (set_attr "length" "*,*,8,8,8,8,*,*") + (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")] ) (define_insn "*iordi_zesidi_di" @@ -3137,19 +3181,49 @@ (define_expand "xordi3" [(set (match_operand:DI 0 "s_register_operand" "") (xor:DI (match_operand:DI 1 "s_register_operand" "") - (match_operand:DI 2 "s_register_operand" "")))] + (match_operand:DI 2 "arm_xordi_operand" "")))] "TARGET_32BIT" "" ) -(define_insn "*xordi3_insn" - [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") - (xor:DI (match_operand:DI 1 "s_register_operand" "%0,r") - (match_operand:DI 2 "s_register_operand" "r,r")))] - "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON" - "#" - [(set_attr "length" "8") - (set_attr "predicable" "yes")] +(define_insn_and_split "*xordi3_insn" + [(set (match_operand:DI 0 "s_register_operand" "=w,&r,&r,&r,&r,?w") + (xor:DI (match_operand:DI 1 "s_register_operand" "w ,%0,r ,0 ,r ,w") + (match_operand:DI 2 "arm_xordi_operand" "w ,r ,r ,Dg,Dg,w")))] + "TARGET_32BIT && !TARGET_IWMMXT" +{ + switch (which_alternative) + { + case 1: + case 2: + case 3: + case 4: /* fall through */ + return "#"; + case 0: /* fall through */ + case 5: return "veor\t%P0, %P1, %P2"; + default: gcc_unreachable (); + } +} + "TARGET_32BIT && !TARGET_IWMMXT && reload_completed + && !(IS_VFP_REGNUM (REGNO (operands[0])))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 5) (match_dup 6))] + " + { + operands[3] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_highpart (SImode, operands[0]); + + operands[4] = simplify_gen_binary (XOR, SImode, + gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2])); + operands[6] = simplify_gen_binary (XOR, SImode, + gen_highpart (SImode, operands[1]), + gen_highpart_mode (SImode, DImode, operands[2])); + + }" + [(set_attr "length" "*,8,8,8,8,*") + (set_attr "neon_type" "neon_int_1,*,*,*,*,neon_int_1") + (set_attr "arch" "neon_for_64bits,*,*,*,*,avoid_neon_for_64bits")] ) (define_insn "*xordi_zesidi_di" @@ -12097,6 +12171,7 @@ (const_int 0)])] "TARGET_32BIT" "" +[(set_attr "predicated" "yes")] ) (define_insn "force_register_use" diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index afb42421c06..b9ae2b09682 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -239,6 +239,10 @@ mword-relocations Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS) Only generate absolute relocations on word sized values. +mrestrict-it +Target Report Var(arm_restrict_it) Init(2) +Generate IT blocks appropriate for ARMv8. + mfix-cortex-m3-ldrd Target Report Var(fix_cm3_ldrd) Init(2) Avoid overlapping destination and address registers on LDRD instructions diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index 7e7b3e69e0a..7cd8e31c97f 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -260,6 +260,18 @@ (and (match_code "const_int") (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, AND)"))) +(define_constraint "Df" + "@internal + In ARM/Thumb-2 state a const_int that can be used by insn iordi." + (and (match_code "const_int") + (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, IOR)"))) + +(define_constraint "Dg" + "@internal + In ARM/Thumb-2 state a const_int that can be used by insn xordi." + (and (match_code "const_int") + (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, XOR)"))) + (define_constraint "Di" "@internal In ARM/Thumb-2 state a const_int or const_double where both the high diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md index 8ebdfc81761..ad137d492e4 100644 --- a/gcc/config/arm/ldmstm.md +++ b/gcc/config/arm/ldmstm.md @@ -37,7 +37,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "ldm%(ia%)\t%5, {%1, %2, %3, %4}" [(set_attr "type" "load4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm4_ia" [(match_parallel 0 "load_multiple_operation" @@ -74,7 +75,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" "ldm%(ia%)\t%5!, {%1, %2, %3, %4}" [(set_attr "type" "load4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm4_ia_update" [(match_parallel 0 "load_multiple_operation" @@ -108,7 +110,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "stm%(ia%)\t%5, {%1, %2, %3, %4}" [(set_attr "type" "store4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm4_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -125,7 +128,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" "stm%(ia%)\t%5!, {%1, %2, %3, %4}" [(set_attr "type" "store4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_stm4_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -302,7 +306,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "ldm%(db%)\t%5, {%1, %2, %3, %4}" [(set_attr "type" "load4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*ldm4_db_update" [(match_parallel 0 "load_multiple_operation" @@ -323,7 +328,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" "ldm%(db%)\t%5!, {%1, %2, %3, %4}" [(set_attr "type" "load4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm4_db" [(match_parallel 0 "store_multiple_operation" @@ -338,7 +344,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "stm%(db%)\t%5, {%1, %2, %3, %4}" [(set_attr "type" "store4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm4_db_update" [(match_parallel 0 "store_multiple_operation" @@ -355,7 +362,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" "stm%(db%)\t%5!, {%1, %2, %3, %4}" [(set_attr "type" "store4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_peephole2 [(set (match_operand:SI 0 "s_register_operand" "") @@ -477,7 +485,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "ldm%(ia%)\t%4, {%1, %2, %3}" [(set_attr "type" "load3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm3_ia" [(match_parallel 0 "load_multiple_operation" @@ -508,7 +517,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "ldm%(ia%)\t%4!, {%1, %2, %3}" [(set_attr "type" "load3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm3_ia_update" [(match_parallel 0 "load_multiple_operation" @@ -537,7 +547,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "stm%(ia%)\t%4, {%1, %2, %3}" [(set_attr "type" "store3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm3_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -552,7 +563,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "stm%(ia%)\t%4!, {%1, %2, %3}" [(set_attr "type" "store3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_stm3_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -704,7 +716,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "ldm%(db%)\t%4, {%1, %2, %3}" [(set_attr "type" "load3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*ldm3_db_update" [(match_parallel 0 "load_multiple_operation" @@ -722,7 +735,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "ldm%(db%)\t%4!, {%1, %2, %3}" [(set_attr "type" "load3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm3_db" [(match_parallel 0 "store_multiple_operation" @@ -735,7 +749,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "stm%(db%)\t%4, {%1, %2, %3}" [(set_attr "type" "store3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm3_db_update" [(match_parallel 0 "store_multiple_operation" @@ -750,7 +765,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "stm%(db%)\t%4!, {%1, %2, %3}" [(set_attr "type" "store3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_peephole2 [(set (match_operand:SI 0 "s_register_operand" "") @@ -855,7 +871,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" "ldm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm2_ia" [(match_parallel 0 "load_multiple_operation" @@ -880,7 +897,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "ldm%(ia%)\t%3!, {%1, %2}" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm2_ia_update" [(match_parallel 0 "load_multiple_operation" @@ -904,7 +922,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" "stm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm2_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -917,7 +936,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "stm%(ia%)\t%3!, {%1, %2}" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_stm2_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -1044,7 +1064,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" "ldm%(db%)\t%3, {%1, %2}" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*ldm2_db_update" [(match_parallel 0 "load_multiple_operation" @@ -1059,7 +1080,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "ldm%(db%)\t%3!, {%1, %2}" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm2_db" [(match_parallel 0 "store_multiple_operation" @@ -1070,7 +1092,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" "stm%(db%)\t%3, {%1, %2}" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm2_db_update" [(match_parallel 0 "store_multiple_operation" @@ -1083,7 +1106,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "stm%(db%)\t%3!, {%1, %2}" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_peephole2 [(set (match_operand:SI 0 "s_register_operand" "") diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index f91a6f7d08b..e814df0d264 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -679,29 +679,6 @@ [(set_attr "neon_type" "neon_int_1")] ) -(define_insn "iordi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w") - (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r,w,0") - (match_operand:DI 2 "neon_logic_op2" "w,Dl,r,r,w,Dl")))] - "TARGET_NEON" -{ - switch (which_alternative) - { - case 0: /* fall through */ - case 4: return "vorr\t%P0, %P1, %P2"; - case 1: /* fall through */ - case 5: return neon_output_logic_immediate ("vorr", &operands[2], - DImode, 0, VALID_NEON_QREG_MODE (DImode)); - case 2: return "#"; - case 3: return "#"; - default: gcc_unreachable (); - } -} - [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1") - (set_attr "length" "*,*,8,8,*,*") - (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")] -) - ;; The concrete forms of the Neon immediate-logic instructions are vbic and ;; vorr. We support the pseudo-instruction vand instead, because that ;; corresponds to the canonical form the middle-end expects to use for @@ -805,21 +782,6 @@ [(set_attr "neon_type" "neon_int_1")] ) -(define_insn "xordi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w") - (xor:DI (match_operand:DI 1 "s_register_operand" "%w,0,r,w") - (match_operand:DI 2 "s_register_operand" "w,r,r,w")))] - "TARGET_NEON" - "@ - veor\t%P0, %P1, %P2 - # - # - veor\t%P0, %P1, %P2" - [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1") - (set_attr "length" "*,8,8,*") - (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")] -) - (define_insn "one_cmpl<mode>2" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))] @@ -5617,7 +5579,7 @@ (match_operand:SI 3 "immediate_operand" "")] "TARGET_NEON" { - emit_insn (gen_ior<mode>3<V_suf64> (operands[0], operands[1], operands[2])); + emit_insn (gen_ior<mode>3 (operands[0], operands[1], operands[2])); DONE; }) @@ -5628,7 +5590,7 @@ (match_operand:SI 3 "immediate_operand" "")] "TARGET_NEON" { - emit_insn (gen_xor<mode>3<V_suf64> (operands[0], operands[1], operands[2])); + emit_insn (gen_xor<mode>3 (operands[0], operands[1], operands[2])); DONE; }) diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index 92de9fe8bd9..d169cb27035 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -42,6 +42,17 @@ (ior (match_operand 0 "imm_for_neon_inv_logic_operand") (match_operand 0 "s_register_operand"))) +(define_predicate "imm_for_neon_logic_operand" + (match_code "const_vector") +{ + return (TARGET_NEON + && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL)); +}) + +(define_predicate "neon_logic_op2" + (ior (match_operand 0 "imm_for_neon_logic_operand") + (match_operand 0 "s_register_operand"))) + ;; Any hard register. (define_predicate "arm_hard_register_operand" (match_code "reg") @@ -162,6 +173,17 @@ (match_test "const_ok_for_dimode_op (INTVAL (op), AND)")) (match_operand 0 "neon_inv_logic_op2"))) +(define_predicate "arm_iordi_operand_neon" + (ior (match_operand 0 "s_register_operand") + (and (match_code "const_int") + (match_test "const_ok_for_dimode_op (INTVAL (op), IOR)")) + (match_operand 0 "neon_logic_op2"))) + +(define_predicate "arm_xordi_operand" + (ior (match_operand 0 "s_register_operand") + (and (match_code "const_int") + (match_test "const_ok_for_dimode_op (INTVAL (op), XOR)")))) + (define_predicate "arm_adddi_operand" (ior (match_operand 0 "s_register_operand") (and (match_code "const_int") @@ -535,17 +557,6 @@ (ior (match_operand 0 "s_register_operand") (match_operand 0 "imm_for_neon_rshift_operand"))) -(define_predicate "imm_for_neon_logic_operand" - (match_code "const_vector") -{ - return (TARGET_NEON - && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL)); -}) - -(define_predicate "neon_logic_op2" - (ior (match_operand 0 "imm_for_neon_logic_operand") - (match_operand 0 "s_register_operand"))) - ;; Predicates for named expanders that overlap multiple ISAs. (define_predicate "cmpdi_operand" diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md index 980234836c9..8f7bd71c317 100644 --- a/gcc/config/arm/sync.md +++ b/gcc/config/arm/sync.md @@ -124,7 +124,8 @@ UNSPEC_LL))] "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN" "ldrexd%?\t%0, %H0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_expand "atomic_compare_and_swap<mode>" [(match_operand:SI 0 "s_register_operand" "") ;; bool out @@ -361,7 +362,8 @@ VUNSPEC_LL)))] "TARGET_HAVE_LDREXBH" "ldrex<sync_sfx>%?\t%0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_load_acquire_exclusive<mode>" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -371,7 +373,8 @@ VUNSPEC_LAX)))] "TARGET_HAVE_LDACQ" "ldaex<sync_sfx>%?\\t%0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_load_exclusivesi" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -380,7 +383,8 @@ VUNSPEC_LL))] "TARGET_HAVE_LDREX" "ldrex%?\t%0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_load_acquire_exclusivesi" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -389,7 +393,8 @@ VUNSPEC_LAX))] "TARGET_HAVE_LDACQ" "ldaex%?\t%0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_load_exclusivedi" [(set (match_operand:DI 0 "s_register_operand" "=r") @@ -398,7 +403,8 @@ VUNSPEC_LL))] "TARGET_HAVE_LDREXD" "ldrexd%?\t%0, %H0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_load_acquire_exclusivedi" [(set (match_operand:DI 0 "s_register_operand" "=r") @@ -407,7 +413,8 @@ VUNSPEC_LAX))] "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN" "ldaexd%?\t%0, %H0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_store_exclusive<mode>" [(set (match_operand:SI 0 "s_register_operand" "=&r") @@ -431,7 +438,8 @@ } return "strex<sync_sfx>%?\t%0, %2, %C1"; } - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_store_release_exclusivedi" [(set (match_operand:SI 0 "s_register_operand" "=&r") @@ -448,7 +456,8 @@ operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1); return "stlexd%?\t%0, %2, %3, %C1"; } - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_store_release_exclusive<mode>" [(set (match_operand:SI 0 "s_register_operand" "=&r") @@ -459,4 +468,5 @@ VUNSPEC_SLX))] "TARGET_HAVE_LDACQ" "stlex<sync_sfx>%?\t%0, %2, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) diff --git a/gcc/config/c6x/c6x.h b/gcc/config/c6x/c6x.h index ce331cbe363..c30a9718e76 100644 --- a/gcc/config/c6x/c6x.h +++ b/gcc/config/c6x/c6x.h @@ -134,7 +134,7 @@ extern c6x_cpu_t c6x_arch; Really only externally visible arrays must be aligned this way, as only those are directly visible from another compilation unit. But we don't have that information available here. */ -#define DATA_ALIGNMENT(TYPE, ALIGN) \ +#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \ (((ALIGN) < BITS_PER_UNIT * 8 && TREE_CODE (TYPE) == ARRAY_TYPE) \ ? BITS_PER_UNIT * 8 : (ALIGN)) diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index ef4dc761d5a..f228e87a2e4 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -207,7 +207,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); #endif /* RTX_CODE */ #ifdef TREE_CODE -extern int ix86_data_alignment (tree, int); +extern int ix86_data_alignment (tree, int, bool); extern unsigned int ix86_local_alignment (tree, enum machine_mode, unsigned int); extern unsigned int ix86_minimum_alignment (tree, enum machine_mode, diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e8f47c9d417..45e88996ad2 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2108,7 +2108,12 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for a conditional move. */ - m_ATOM + m_ATOM, + + /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for + fp converts to destination register. */ + m_SLM + }; /* Feature tests against the various architecture variations. */ @@ -17392,10 +17397,24 @@ distance_agu_use (unsigned int regno0, rtx insn) static bool ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1, - unsigned int regno2, int split_cost) + unsigned int regno2, int split_cost, bool has_scale) { int dist_define, dist_use; + /* For Silvermont if using a 2-source or 3-source LEA for + non-destructive destination purposes, or due to wanting + ability to use SCALE, the use of LEA is justified. */ + if (ix86_tune == PROCESSOR_SLM) + { + if (has_scale) + return true; + if (split_cost < 1) + return false; + if (regno0 == regno1 || regno0 == regno2) + return false; + return true; + } + dist_define = distance_non_agu_define (regno1, regno2, insn); dist_use = distance_agu_use (regno0, insn); @@ -17484,7 +17503,7 @@ ix86_avoid_lea_for_add (rtx insn, rtx operands[]) if (regno0 == regno1 || regno0 == regno2) return false; else - return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1); + return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false); } /* Return true if we should emit lea instruction instead of mov @@ -17506,7 +17525,7 @@ ix86_use_lea_for_mov (rtx insn, rtx operands[]) regno0 = true_regnum (operands[0]); regno1 = true_regnum (operands[1]); - return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0); + return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false); } /* Return true if we need to split lea into a sequence of @@ -17585,7 +17604,8 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[]) split_cost -= 1; } - return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost); + return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost, + parts.scale > 1); } /* Emit x86 binary operand CODE in mode MODE, where the first operand @@ -17770,7 +17790,7 @@ ix86_lea_for_add_ok (rtx insn, rtx operands[]) if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) return false; - return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0); + return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false); } /* Return true if destination reg of SET_BODY is shift count of @@ -24368,6 +24388,73 @@ ix86_agi_dependent (rtx set_insn, rtx use_insn) return false; } +/* Helper function for exact_store_load_dependency. + Return true if addr is found in insn. */ +static bool +exact_dependency_1 (rtx addr, rtx insn) +{ + enum rtx_code code; + const char *format_ptr; + int i, j; + + code = GET_CODE (insn); + switch (code) + { + case MEM: + if (rtx_equal_p (addr, insn)) + return true; + break; + case REG: + CASE_CONST_ANY: + case SYMBOL_REF: + case CODE_LABEL: + case PC: + case CC0: + case EXPR_LIST: + return false; + default: + break; + } + + format_ptr = GET_RTX_FORMAT (code); + for (i = 0; i < GET_RTX_LENGTH (code); i++) + { + switch (*format_ptr++) + { + case 'e': + if (exact_dependency_1 (addr, XEXP (insn, i))) + return true; + break; + case 'E': + for (j = 0; j < XVECLEN (insn, i); j++) + if (exact_dependency_1 (addr, XVECEXP (insn, i, j))) + return true; + break; + } + } + return false; +} + +/* Return true if there exists exact dependency for store & load, i.e. + the same memory address is used in them. */ +static bool +exact_store_load_dependency (rtx store, rtx load) +{ + rtx set1, set2; + + set1 = single_set (store); + if (!set1) + return false; + if (!MEM_P (SET_DEST (set1))) + return false; + set2 = single_set (load); + if (!set2) + return false; + if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2))) + return true; + return false; +} + static int ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) { @@ -24519,6 +24606,39 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) else cost = 0; } + break; + + case PROCESSOR_SLM: + if (!reload_completed) + return cost; + + /* Increase cost of integer loads. */ + memory = get_attr_memory (dep_insn); + if (memory == MEMORY_LOAD || memory == MEMORY_BOTH) + { + enum attr_unit unit = get_attr_unit (dep_insn); + if (unit == UNIT_INTEGER && cost == 1) + { + if (memory == MEMORY_LOAD) + cost = 3; + else + { + /* Increase cost of ld/st for short int types only + because of store forwarding issue. */ + rtx set = single_set (dep_insn); + if (set && (GET_MODE (SET_DEST (set)) == QImode + || GET_MODE (SET_DEST (set)) == HImode)) + { + /* Increase cost of store/load insn if exact + dependence exists and it is load insn. */ + enum attr_memory insn_memory = get_attr_memory (insn); + if (insn_memory == MEMORY_LOAD + && exact_store_load_dependency (dep_insn, insn)) + cost = 3; + } + } + } + } default: break; @@ -24565,110 +24685,204 @@ ia32_multipass_dfa_lookahead (void) execution. It is applied if (1) IMUL instruction is on the top of list; (2) There exists the only producer of independent IMUL instruction in - ready list; - (3) Put found producer on the top of ready list. - Returns issue rate. */ - + ready list. + Return index of IMUL producer if it was found and -1 otherwise. */ static int -ix86_sched_reorder(FILE *dump, int sched_verbose, rtx *ready, int *pn_ready, - int clock_var ATTRIBUTE_UNUSED) +do_reorder_for_imul (rtx *ready, int n_ready) { - static int issue_rate = -1; - int n_ready = *pn_ready; - rtx insn, insn1, insn2; - int i; + rtx insn, set, insn1, insn2; sd_iterator_def sd_it; dep_t dep; int index = -1; + int i; - /* Set up issue rate. */ - issue_rate = ix86_issue_rate(); - - /* Do reodering for Atom only. */ if (ix86_tune != PROCESSOR_ATOM) - return issue_rate; - /* Do not perform ready list reodering for pre-reload schedule pass. */ - if (!reload_completed) - return issue_rate; - /* Nothing to do if ready list contains only 1 instruction. */ - if (n_ready <= 1) - return issue_rate; + return index; /* Check that IMUL instruction is on the top of ready list. */ insn = ready[n_ready - 1]; - if (!NONDEBUG_INSN_P (insn)) - return issue_rate; - insn = PATTERN (insn); - if (GET_CODE (insn) == PARALLEL) - insn = XVECEXP (insn, 0, 0); - if (GET_CODE (insn) != SET) - return issue_rate; - if (!(GET_CODE (SET_SRC (insn)) == MULT - && GET_MODE (SET_SRC (insn)) == SImode)) - return issue_rate; + set = single_set (insn); + if (!set) + return index; + if (!(GET_CODE (SET_SRC (set)) == MULT + && GET_MODE (SET_SRC (set)) == SImode)) + return index; /* Search for producer of independent IMUL instruction. */ - for (i = n_ready - 2; i>= 0; i--) + for (i = n_ready - 2; i >= 0; i--) { insn = ready[i]; if (!NONDEBUG_INSN_P (insn)) - continue; + continue; /* Skip IMUL instruction. */ insn2 = PATTERN (insn); if (GET_CODE (insn2) == PARALLEL) - insn2 = XVECEXP (insn2, 0, 0); + insn2 = XVECEXP (insn2, 0, 0); if (GET_CODE (insn2) == SET - && GET_CODE (SET_SRC (insn2)) == MULT - && GET_MODE (SET_SRC (insn2)) == SImode) - continue; + && GET_CODE (SET_SRC (insn2)) == MULT + && GET_MODE (SET_SRC (insn2)) == SImode) + continue; FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep) - { - rtx con; + { + rtx con; con = DEP_CON (dep); if (!NONDEBUG_INSN_P (con)) continue; - insn1 = PATTERN (con); - if (GET_CODE (insn1) == PARALLEL) - insn1 = XVECEXP (insn1, 0, 0); - - if (GET_CODE (insn1) == SET - && GET_CODE (SET_SRC (insn1)) == MULT - && GET_MODE (SET_SRC (insn1)) == SImode) - { - sd_iterator_def sd_it1; - dep_t dep1; - /* Check if there is no other dependee for IMUL. */ - index = i; - FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1) - { - rtx pro; - pro = DEP_PRO (dep1); + insn1 = PATTERN (con); + if (GET_CODE (insn1) == PARALLEL) + insn1 = XVECEXP (insn1, 0, 0); + + if (GET_CODE (insn1) == SET + && GET_CODE (SET_SRC (insn1)) == MULT + && GET_MODE (SET_SRC (insn1)) == SImode) + { + sd_iterator_def sd_it1; + dep_t dep1; + /* Check if there is no other dependee for IMUL. */ + index = i; + FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1) + { + rtx pro; + pro = DEP_PRO (dep1); if (!NONDEBUG_INSN_P (pro)) continue; - if (pro != insn) - index = -1; - } - if (index >= 0) - break; - } - } + if (pro != insn) + index = -1; + } + if (index >= 0) + break; + } + } if (index >= 0) - break; + break; } - if (index < 0) - return issue_rate; /* Didn't find IMUL producer. */ + return index; +} - if (sched_verbose > 1) - fprintf(dump, ";;\tatom sched_reorder: swap %d and %d insns\n", - INSN_UID (ready[index]), INSN_UID (ready[n_ready - 1])); +/* Try to find the best candidate on the top of ready list if two insns + have the same priority - candidate is best if its dependees were + scheduled earlier. Applied for Silvermont only. + Return true if top 2 insns must be interchanged. */ +static bool +swap_top_of_ready_list (rtx *ready, int n_ready) +{ + rtx top = ready[n_ready - 1]; + rtx next = ready[n_ready - 2]; + rtx set; + sd_iterator_def sd_it; + dep_t dep; + int clock1 = -1; + int clock2 = -1; + #define INSN_TICK(INSN) (HID (INSN)->tick) - /* Put IMUL producer (ready[index]) at the top of ready list. */ - insn1= ready[index]; - for (i = index; i < n_ready - 1; i++) - ready[i] = ready[i + 1]; - ready[n_ready - 1] = insn1; + if (ix86_tune != PROCESSOR_SLM) + return false; + + if (!NONDEBUG_INSN_P (top)) + return false; + if (!NONJUMP_INSN_P (top)) + return false; + if (!NONDEBUG_INSN_P (next)) + return false; + if (!NONJUMP_INSN_P (next)) + return false; + set = single_set (top); + if (!set) + return false; + set = single_set (next); + if (!set) + return false; + if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next)) + { + if (INSN_PRIORITY (top) != INSN_PRIORITY (next)) + return false; + /* Determine winner more precise. */ + FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep) + { + rtx pro; + pro = DEP_PRO (dep); + if (!NONDEBUG_INSN_P (pro)) + continue; + if (INSN_TICK (pro) > clock1) + clock1 = INSN_TICK (pro); + } + FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep) + { + rtx pro; + pro = DEP_PRO (dep); + if (!NONDEBUG_INSN_P (pro)) + continue; + if (INSN_TICK (pro) > clock2) + clock2 = INSN_TICK (pro); + } + + if (clock1 == clock2) + { + /* Determine winner - load must win. */ + enum attr_memory memory1, memory2; + memory1 = get_attr_memory (top); + memory2 = get_attr_memory (next); + if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD) + return true; + } + return (bool) (clock2 < clock1); + } + return false; + #undef INSN_TICK +} + +/* Perform possible reodering of ready list for Atom/Silvermont only. + Return issue rate. */ +static int +ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready, + int clock_var) +{ + int issue_rate = -1; + int n_ready = *pn_ready; + int i; + rtx insn; + int index = -1; + + /* Set up issue rate. */ + issue_rate = ix86_issue_rate (); + + /* Do reodering for Atom/SLM only. */ + if (ix86_tune != PROCESSOR_ATOM && ix86_tune != PROCESSOR_SLM) + return issue_rate; + + /* Nothing to do if ready list contains only 1 instruction. */ + if (n_ready <= 1) + return issue_rate; + + /* Do reodering for post-reload scheduler only. */ + if (!reload_completed) + return issue_rate; + + if ((index = do_reorder_for_imul (ready, n_ready)) >= 0) + { + if (sched_verbose > 1) + fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n", + INSN_UID (ready[index])); + + /* Put IMUL producer (ready[index]) at the top of ready list. */ + insn = ready[index]; + for (i = index; i < n_ready - 1; i++) + ready[i] = ready[i + 1]; + ready[n_ready - 1] = insn; + return issue_rate; + } + if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready)) + { + if (sched_verbose > 1) + fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n", + INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2])); + /* Swap 2 top elements of ready list. */ + insn = ready[n_ready - 1]; + ready[n_ready - 1] = ready[n_ready - 2]; + ready[n_ready - 2] = insn; + } return issue_rate; } @@ -25161,11 +25375,12 @@ ix86_constant_alignment (tree exp, int align) instead of that alignment to align the object. */ int -ix86_data_alignment (tree type, int align) +ix86_data_alignment (tree type, int align, bool opt) { int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT); - if (AGGREGATE_TYPE_P (type) + if (opt + && AGGREGATE_TYPE_P (type) && TYPE_SIZE (type) && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align @@ -25177,14 +25392,17 @@ ix86_data_alignment (tree type, int align) to 16byte boundary. */ if (TARGET_64BIT) { - if (AGGREGATE_TYPE_P (type) - && TYPE_SIZE (type) - && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST - && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 - || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) + if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE) + && TYPE_SIZE (type) + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 + || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) return 128; } + if (!opt) + return align; + if (TREE_CODE (type) == ARRAY_TYPE) { if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) @@ -29834,11 +30052,11 @@ fold_builtin_cpu (tree fndecl, tree *args) M_AMD, M_CPU_TYPE_START, M_INTEL_ATOM, - M_INTEL_SLM, M_INTEL_CORE2, M_INTEL_COREI7, M_AMDFAM10H, M_AMDFAM15H, + M_INTEL_SLM, M_CPU_SUBTYPE_START, M_INTEL_COREI7_NEHALEM, M_INTEL_COREI7_WESTMERE, @@ -33737,6 +33955,8 @@ static inline bool inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2, enum machine_mode mode, int strict) { + if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) + return false; if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 776582a66de..7d940f98804 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -333,6 +333,7 @@ enum ix86_tune_indices { X86_TUNE_REASSOC_FP_TO_PARALLEL, X86_TUNE_GENERAL_REGS_SSE_SPILL, X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, + X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, X86_TUNE_LAST }; @@ -443,6 +444,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL] #define TARGET_AVOID_MEM_OPND_FOR_CMOVE \ ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE] +#define TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS \ + ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { @@ -856,7 +859,18 @@ enum target_cpu_default cause character arrays to be word-aligned so that `strcpy' calls that copy constants to character arrays can be done inline. */ -#define DATA_ALIGNMENT(TYPE, ALIGN) ix86_data_alignment ((TYPE), (ALIGN)) +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + ix86_data_alignment ((TYPE), (ALIGN), true) + +/* Similar to DATA_ALIGNMENT, but for the cases where the ABI mandates + some alignment increase, instead of optimization only purposes. E.g. + AMD x86-64 psABI says that variables with array type larger than 15 bytes + must be aligned to 16 byte boundaries. + + If this macro is not defined, then ALIGN is used. */ + +#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \ + ix86_data_alignment ((TYPE), (ALIGN), false) /* If defined, a C expression to compute the alignment for a local variable. TYPE is the data type, and ALIGN is the alignment that diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 28b0c78093a..e97a4570501 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -3625,6 +3625,18 @@ CONST0_RTX (V4SFmode), operands[1])); }) +;; It's more profitable to split and then extend in the same register. +(define_peephole2 + [(set (match_operand:DF 0 "register_operand") + (float_extend:DF + (match_operand:SF 1 "memory_operand")))] + "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS + && optimize_insn_for_speed_p () + && SSE_REG_P (operands[0])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float_extend:DF (match_dup 2)))] + "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));") + (define_insn "*extendsfdf2_mixed" [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x") (float_extend:DF @@ -3766,6 +3778,18 @@ CONST0_RTX (V2DFmode), operands[1])); }) +;; It's more profitable to split and then extend in the same register. +(define_peephole2 + [(set (match_operand:SF 0 "register_operand") + (float_truncate:SF + (match_operand:DF 1 "memory_operand")))] + "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS + && optimize_insn_for_speed_p () + && SSE_REG_P (operands[0])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float_truncate:SF (match_dup 2)))] + "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));") + (define_expand "truncdfsf2_with_temp" [(parallel [(set (match_operand:SF 0) (float_truncate:SF (match_operand:DF 1))) @@ -16567,6 +16591,7 @@ "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (4, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) && (<MODE>mode != QImode || immediate_operand (operands[2], QImode) || q_regs_operand (operands[2], QImode)) @@ -16631,6 +16656,7 @@ || immediate_operand (operands[2], SImode) || q_regs_operand (operands[2], SImode)) && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) && ix86_match_ccmode (peep2_next_insn (3), (GET_CODE (operands[3]) == PLUS || GET_CODE (operands[3]) == MINUS) diff --git a/gcc/config/mips/mips-cpus.def b/gcc/config/mips/mips-cpus.def index f08fad6778d..d7db0ba96d6 100644 --- a/gcc/config/mips/mips-cpus.def +++ b/gcc/config/mips/mips-cpus.def @@ -68,6 +68,7 @@ MIPS_CPU ("r4600", PROCESSOR_R4600, 3, 0) MIPS_CPU ("orion", PROCESSOR_R4600, 3, 0) MIPS_CPU ("r4650", PROCESSOR_R4650, 3, 0) MIPS_CPU ("r4700", PROCESSOR_R4700, 3, 0) +MIPS_CPU ("r5900", PROCESSOR_R5900, 3, 0) /* ST Loongson 2E/2F processors. */ MIPS_CPU ("loongson2e", PROCESSOR_LOONGSON_2E, 3, PTF_AVOID_BRANCHLIKELY) MIPS_CPU ("loongson2f", PROCESSOR_LOONGSON_2F, 3, PTF_AVOID_BRANCHLIKELY) diff --git a/gcc/config/mips/mips-dsp.md b/gcc/config/mips/mips-dsp.md index 002c9992001..49a08689638 100644 --- a/gcc/config/mips/mips-dsp.md +++ b/gcc/config/mips/mips-dsp.md @@ -1131,8 +1131,7 @@ "ISA_HAS_L<SHORT:SIZE><U>X" "l<SHORT:size><u>x\t%0,%2(%1)" [(set_attr "type" "load") - (set_attr "mode" "<GPR:MODE>") - (set_attr "length" "4")]) + (set_attr "mode" "<GPR:MODE>")]) (define_expand "mips_lhx" [(match_operand:SI 0 "register_operand") @@ -1165,8 +1164,7 @@ "ISA_HAS_L<GPR:SIZE>X" "l<GPR:size>x\t%0,%2(%1)" [(set_attr "type" "load") - (set_attr "mode" "<GPR:MODE>") - (set_attr "length" "4")]) + (set_attr "mode" "<GPR:MODE>")]) (define_insn "*mips_lw<u>x_<P:mode>_ext" [(set (match_operand:DI 0 "register_operand" "=d") @@ -1176,8 +1174,7 @@ "ISA_HAS_LW<U>X && TARGET_64BIT" "lw<u>x\t%0,%2(%1)" [(set_attr "type" "load") - (set_attr "mode" "DI") - (set_attr "length" "4")]) + (set_attr "mode" "DI")]) ;; Table 2-8. MIPS DSP ASE Instructions: Branch ;; BPOSGE32 diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md index 9c70cc4324f..a22c7829b77 100644 --- a/gcc/config/mips/mips-ps-3d.md +++ b/gcc/config/mips/mips-ps-3d.md @@ -481,7 +481,7 @@ operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8); } [(set_attr "type" "fcmp") - (set_attr "length" "8") + (set_attr "insn_count" "2") (set_attr "mode" "FPSW")]) (define_insn_and_split "mips_cabs_cond_4s" @@ -510,7 +510,7 @@ operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8); } [(set_attr "type" "fcmp") - (set_attr "length" "8") + (set_attr "insn_count" "2") (set_attr "mode" "FPSW")]) diff --git a/gcc/config/mips/mips-tables.opt b/gcc/config/mips/mips-tables.opt index 0d7fa26510d..8ed412cc15e 100644 --- a/gcc/config/mips/mips-tables.opt +++ b/gcc/config/mips/mips-tables.opt @@ -208,425 +208,431 @@ EnumValue Enum(mips_arch_opt_value) String(4700) Value(22) EnumValue -Enum(mips_arch_opt_value) String(loongson2e) Value(23) Canonical +Enum(mips_arch_opt_value) String(r5900) Value(23) Canonical EnumValue -Enum(mips_arch_opt_value) String(loongson2f) Value(24) Canonical +Enum(mips_arch_opt_value) String(5900) Value(23) EnumValue -Enum(mips_arch_opt_value) String(r8000) Value(25) Canonical +Enum(mips_arch_opt_value) String(loongson2e) Value(24) Canonical EnumValue -Enum(mips_arch_opt_value) String(r8k) Value(25) +Enum(mips_arch_opt_value) String(loongson2f) Value(25) Canonical EnumValue -Enum(mips_arch_opt_value) String(8000) Value(25) +Enum(mips_arch_opt_value) String(r8000) Value(26) Canonical EnumValue -Enum(mips_arch_opt_value) String(8k) Value(25) +Enum(mips_arch_opt_value) String(r8k) Value(26) EnumValue -Enum(mips_arch_opt_value) String(r10000) Value(26) Canonical +Enum(mips_arch_opt_value) String(8000) Value(26) EnumValue -Enum(mips_arch_opt_value) String(r10k) Value(26) +Enum(mips_arch_opt_value) String(8k) Value(26) EnumValue -Enum(mips_arch_opt_value) String(10000) Value(26) +Enum(mips_arch_opt_value) String(r10000) Value(27) Canonical EnumValue -Enum(mips_arch_opt_value) String(10k) Value(26) +Enum(mips_arch_opt_value) String(r10k) Value(27) EnumValue -Enum(mips_arch_opt_value) String(r12000) Value(27) Canonical +Enum(mips_arch_opt_value) String(10000) Value(27) EnumValue -Enum(mips_arch_opt_value) String(r12k) Value(27) +Enum(mips_arch_opt_value) String(10k) Value(27) EnumValue -Enum(mips_arch_opt_value) String(12000) Value(27) +Enum(mips_arch_opt_value) String(r12000) Value(28) Canonical EnumValue -Enum(mips_arch_opt_value) String(12k) Value(27) +Enum(mips_arch_opt_value) String(r12k) Value(28) EnumValue -Enum(mips_arch_opt_value) String(r14000) Value(28) Canonical +Enum(mips_arch_opt_value) String(12000) Value(28) EnumValue -Enum(mips_arch_opt_value) String(r14k) Value(28) +Enum(mips_arch_opt_value) String(12k) Value(28) EnumValue -Enum(mips_arch_opt_value) String(14000) Value(28) +Enum(mips_arch_opt_value) String(r14000) Value(29) Canonical EnumValue -Enum(mips_arch_opt_value) String(14k) Value(28) +Enum(mips_arch_opt_value) String(r14k) Value(29) EnumValue -Enum(mips_arch_opt_value) String(r16000) Value(29) Canonical +Enum(mips_arch_opt_value) String(14000) Value(29) EnumValue -Enum(mips_arch_opt_value) String(r16k) Value(29) +Enum(mips_arch_opt_value) String(14k) Value(29) EnumValue -Enum(mips_arch_opt_value) String(16000) Value(29) +Enum(mips_arch_opt_value) String(r16000) Value(30) Canonical EnumValue -Enum(mips_arch_opt_value) String(16k) Value(29) +Enum(mips_arch_opt_value) String(r16k) Value(30) EnumValue -Enum(mips_arch_opt_value) String(vr5000) Value(30) Canonical +Enum(mips_arch_opt_value) String(16000) Value(30) EnumValue -Enum(mips_arch_opt_value) String(vr5k) Value(30) +Enum(mips_arch_opt_value) String(16k) Value(30) EnumValue -Enum(mips_arch_opt_value) String(5000) Value(30) +Enum(mips_arch_opt_value) String(vr5000) Value(31) Canonical EnumValue -Enum(mips_arch_opt_value) String(5k) Value(30) +Enum(mips_arch_opt_value) String(vr5k) Value(31) EnumValue -Enum(mips_arch_opt_value) String(r5000) Value(30) +Enum(mips_arch_opt_value) String(5000) Value(31) EnumValue -Enum(mips_arch_opt_value) String(r5k) Value(30) +Enum(mips_arch_opt_value) String(5k) Value(31) EnumValue -Enum(mips_arch_opt_value) String(vr5400) Value(31) Canonical +Enum(mips_arch_opt_value) String(r5000) Value(31) EnumValue -Enum(mips_arch_opt_value) String(5400) Value(31) +Enum(mips_arch_opt_value) String(r5k) Value(31) EnumValue -Enum(mips_arch_opt_value) String(r5400) Value(31) +Enum(mips_arch_opt_value) String(vr5400) Value(32) Canonical EnumValue -Enum(mips_arch_opt_value) String(vr5500) Value(32) Canonical +Enum(mips_arch_opt_value) String(5400) Value(32) EnumValue -Enum(mips_arch_opt_value) String(5500) Value(32) +Enum(mips_arch_opt_value) String(r5400) Value(32) EnumValue -Enum(mips_arch_opt_value) String(r5500) Value(32) +Enum(mips_arch_opt_value) String(vr5500) Value(33) Canonical EnumValue -Enum(mips_arch_opt_value) String(rm7000) Value(33) Canonical +Enum(mips_arch_opt_value) String(5500) Value(33) EnumValue -Enum(mips_arch_opt_value) String(rm7k) Value(33) +Enum(mips_arch_opt_value) String(r5500) Value(33) EnumValue -Enum(mips_arch_opt_value) String(7000) Value(33) +Enum(mips_arch_opt_value) String(rm7000) Value(34) Canonical EnumValue -Enum(mips_arch_opt_value) String(7k) Value(33) +Enum(mips_arch_opt_value) String(rm7k) Value(34) EnumValue -Enum(mips_arch_opt_value) String(r7000) Value(33) +Enum(mips_arch_opt_value) String(7000) Value(34) EnumValue -Enum(mips_arch_opt_value) String(r7k) Value(33) +Enum(mips_arch_opt_value) String(7k) Value(34) EnumValue -Enum(mips_arch_opt_value) String(rm9000) Value(34) Canonical +Enum(mips_arch_opt_value) String(r7000) Value(34) EnumValue -Enum(mips_arch_opt_value) String(rm9k) Value(34) +Enum(mips_arch_opt_value) String(r7k) Value(34) EnumValue -Enum(mips_arch_opt_value) String(9000) Value(34) +Enum(mips_arch_opt_value) String(rm9000) Value(35) Canonical EnumValue -Enum(mips_arch_opt_value) String(9k) Value(34) +Enum(mips_arch_opt_value) String(rm9k) Value(35) EnumValue -Enum(mips_arch_opt_value) String(r9000) Value(34) +Enum(mips_arch_opt_value) String(9000) Value(35) EnumValue -Enum(mips_arch_opt_value) String(r9k) Value(34) +Enum(mips_arch_opt_value) String(9k) Value(35) EnumValue -Enum(mips_arch_opt_value) String(4kc) Value(35) Canonical +Enum(mips_arch_opt_value) String(r9000) Value(35) EnumValue -Enum(mips_arch_opt_value) String(r4kc) Value(35) +Enum(mips_arch_opt_value) String(r9k) Value(35) EnumValue -Enum(mips_arch_opt_value) String(4km) Value(36) Canonical +Enum(mips_arch_opt_value) String(4kc) Value(36) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4km) Value(36) +Enum(mips_arch_opt_value) String(r4kc) Value(36) EnumValue -Enum(mips_arch_opt_value) String(4kp) Value(37) Canonical +Enum(mips_arch_opt_value) String(4km) Value(37) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4kp) Value(37) +Enum(mips_arch_opt_value) String(r4km) Value(37) EnumValue -Enum(mips_arch_opt_value) String(4ksc) Value(38) Canonical +Enum(mips_arch_opt_value) String(4kp) Value(38) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4ksc) Value(38) +Enum(mips_arch_opt_value) String(r4kp) Value(38) EnumValue -Enum(mips_arch_opt_value) String(m4k) Value(39) Canonical +Enum(mips_arch_opt_value) String(4ksc) Value(39) Canonical EnumValue -Enum(mips_arch_opt_value) String(m14kc) Value(40) Canonical +Enum(mips_arch_opt_value) String(r4ksc) Value(39) EnumValue -Enum(mips_arch_opt_value) String(m14k) Value(41) Canonical +Enum(mips_arch_opt_value) String(m4k) Value(40) Canonical EnumValue -Enum(mips_arch_opt_value) String(4kec) Value(42) Canonical +Enum(mips_arch_opt_value) String(m14kc) Value(41) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4kec) Value(42) +Enum(mips_arch_opt_value) String(m14k) Value(42) Canonical EnumValue -Enum(mips_arch_opt_value) String(4kem) Value(43) Canonical +Enum(mips_arch_opt_value) String(4kec) Value(43) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4kem) Value(43) +Enum(mips_arch_opt_value) String(r4kec) Value(43) EnumValue -Enum(mips_arch_opt_value) String(4kep) Value(44) Canonical +Enum(mips_arch_opt_value) String(4kem) Value(44) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4kep) Value(44) +Enum(mips_arch_opt_value) String(r4kem) Value(44) EnumValue -Enum(mips_arch_opt_value) String(4ksd) Value(45) Canonical +Enum(mips_arch_opt_value) String(4kep) Value(45) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4ksd) Value(45) +Enum(mips_arch_opt_value) String(r4kep) Value(45) EnumValue -Enum(mips_arch_opt_value) String(24kc) Value(46) Canonical +Enum(mips_arch_opt_value) String(4ksd) Value(46) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kc) Value(46) +Enum(mips_arch_opt_value) String(r4ksd) Value(46) EnumValue -Enum(mips_arch_opt_value) String(24kf2_1) Value(47) Canonical +Enum(mips_arch_opt_value) String(24kc) Value(47) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kf2_1) Value(47) +Enum(mips_arch_opt_value) String(r24kc) Value(47) EnumValue -Enum(mips_arch_opt_value) String(24kf) Value(48) Canonical +Enum(mips_arch_opt_value) String(24kf2_1) Value(48) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kf) Value(48) +Enum(mips_arch_opt_value) String(r24kf2_1) Value(48) EnumValue -Enum(mips_arch_opt_value) String(24kf1_1) Value(49) Canonical +Enum(mips_arch_opt_value) String(24kf) Value(49) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kf1_1) Value(49) +Enum(mips_arch_opt_value) String(r24kf) Value(49) EnumValue -Enum(mips_arch_opt_value) String(24kfx) Value(50) Canonical +Enum(mips_arch_opt_value) String(24kf1_1) Value(50) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kfx) Value(50) +Enum(mips_arch_opt_value) String(r24kf1_1) Value(50) EnumValue -Enum(mips_arch_opt_value) String(24kx) Value(51) Canonical +Enum(mips_arch_opt_value) String(24kfx) Value(51) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kx) Value(51) +Enum(mips_arch_opt_value) String(r24kfx) Value(51) EnumValue -Enum(mips_arch_opt_value) String(24kec) Value(52) Canonical +Enum(mips_arch_opt_value) String(24kx) Value(52) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kec) Value(52) +Enum(mips_arch_opt_value) String(r24kx) Value(52) EnumValue -Enum(mips_arch_opt_value) String(24kef2_1) Value(53) Canonical +Enum(mips_arch_opt_value) String(24kec) Value(53) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kef2_1) Value(53) +Enum(mips_arch_opt_value) String(r24kec) Value(53) EnumValue -Enum(mips_arch_opt_value) String(24kef) Value(54) Canonical +Enum(mips_arch_opt_value) String(24kef2_1) Value(54) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kef) Value(54) +Enum(mips_arch_opt_value) String(r24kef2_1) Value(54) EnumValue -Enum(mips_arch_opt_value) String(24kef1_1) Value(55) Canonical +Enum(mips_arch_opt_value) String(24kef) Value(55) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kef1_1) Value(55) +Enum(mips_arch_opt_value) String(r24kef) Value(55) EnumValue -Enum(mips_arch_opt_value) String(24kefx) Value(56) Canonical +Enum(mips_arch_opt_value) String(24kef1_1) Value(56) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kefx) Value(56) +Enum(mips_arch_opt_value) String(r24kef1_1) Value(56) EnumValue -Enum(mips_arch_opt_value) String(24kex) Value(57) Canonical +Enum(mips_arch_opt_value) String(24kefx) Value(57) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kex) Value(57) +Enum(mips_arch_opt_value) String(r24kefx) Value(57) EnumValue -Enum(mips_arch_opt_value) String(34kc) Value(58) Canonical +Enum(mips_arch_opt_value) String(24kex) Value(58) Canonical EnumValue -Enum(mips_arch_opt_value) String(r34kc) Value(58) +Enum(mips_arch_opt_value) String(r24kex) Value(58) EnumValue -Enum(mips_arch_opt_value) String(34kf2_1) Value(59) Canonical +Enum(mips_arch_opt_value) String(34kc) Value(59) Canonical EnumValue -Enum(mips_arch_opt_value) String(r34kf2_1) Value(59) +Enum(mips_arch_opt_value) String(r34kc) Value(59) EnumValue -Enum(mips_arch_opt_value) String(34kf) Value(60) Canonical +Enum(mips_arch_opt_value) String(34kf2_1) Value(60) Canonical EnumValue -Enum(mips_arch_opt_value) String(r34kf) Value(60) +Enum(mips_arch_opt_value) String(r34kf2_1) Value(60) EnumValue -Enum(mips_arch_opt_value) String(34kf1_1) Value(61) Canonical +Enum(mips_arch_opt_value) String(34kf) Value(61) Canonical EnumValue -Enum(mips_arch_opt_value) String(r34kf1_1) Value(61) +Enum(mips_arch_opt_value) String(r34kf) Value(61) EnumValue -Enum(mips_arch_opt_value) String(34kfx) Value(62) Canonical +Enum(mips_arch_opt_value) String(34kf1_1) Value(62) Canonical EnumValue -Enum(mips_arch_opt_value) String(r34kfx) Value(62) +Enum(mips_arch_opt_value) String(r34kf1_1) Value(62) EnumValue -Enum(mips_arch_opt_value) String(34kx) Value(63) Canonical +Enum(mips_arch_opt_value) String(34kfx) Value(63) Canonical EnumValue -Enum(mips_arch_opt_value) String(r34kx) Value(63) +Enum(mips_arch_opt_value) String(r34kfx) Value(63) EnumValue -Enum(mips_arch_opt_value) String(34kn) Value(64) Canonical +Enum(mips_arch_opt_value) String(34kx) Value(64) Canonical EnumValue -Enum(mips_arch_opt_value) String(r34kn) Value(64) +Enum(mips_arch_opt_value) String(r34kx) Value(64) EnumValue -Enum(mips_arch_opt_value) String(74kc) Value(65) Canonical +Enum(mips_arch_opt_value) String(34kn) Value(65) Canonical EnumValue -Enum(mips_arch_opt_value) String(r74kc) Value(65) +Enum(mips_arch_opt_value) String(r34kn) Value(65) EnumValue -Enum(mips_arch_opt_value) String(74kf2_1) Value(66) Canonical +Enum(mips_arch_opt_value) String(74kc) Value(66) Canonical EnumValue -Enum(mips_arch_opt_value) String(r74kf2_1) Value(66) +Enum(mips_arch_opt_value) String(r74kc) Value(66) EnumValue -Enum(mips_arch_opt_value) String(74kf) Value(67) Canonical +Enum(mips_arch_opt_value) String(74kf2_1) Value(67) Canonical EnumValue -Enum(mips_arch_opt_value) String(r74kf) Value(67) +Enum(mips_arch_opt_value) String(r74kf2_1) Value(67) EnumValue -Enum(mips_arch_opt_value) String(74kf1_1) Value(68) Canonical +Enum(mips_arch_opt_value) String(74kf) Value(68) Canonical EnumValue -Enum(mips_arch_opt_value) String(r74kf1_1) Value(68) +Enum(mips_arch_opt_value) String(r74kf) Value(68) EnumValue -Enum(mips_arch_opt_value) String(74kfx) Value(69) Canonical +Enum(mips_arch_opt_value) String(74kf1_1) Value(69) Canonical EnumValue -Enum(mips_arch_opt_value) String(r74kfx) Value(69) +Enum(mips_arch_opt_value) String(r74kf1_1) Value(69) EnumValue -Enum(mips_arch_opt_value) String(74kx) Value(70) Canonical +Enum(mips_arch_opt_value) String(74kfx) Value(70) Canonical EnumValue -Enum(mips_arch_opt_value) String(r74kx) Value(70) +Enum(mips_arch_opt_value) String(r74kfx) Value(70) EnumValue -Enum(mips_arch_opt_value) String(74kf3_2) Value(71) Canonical +Enum(mips_arch_opt_value) String(74kx) Value(71) Canonical EnumValue -Enum(mips_arch_opt_value) String(r74kf3_2) Value(71) +Enum(mips_arch_opt_value) String(r74kx) Value(71) EnumValue -Enum(mips_arch_opt_value) String(1004kc) Value(72) Canonical +Enum(mips_arch_opt_value) String(74kf3_2) Value(72) Canonical EnumValue -Enum(mips_arch_opt_value) String(r1004kc) Value(72) +Enum(mips_arch_opt_value) String(r74kf3_2) Value(72) EnumValue -Enum(mips_arch_opt_value) String(1004kf2_1) Value(73) Canonical +Enum(mips_arch_opt_value) String(1004kc) Value(73) Canonical EnumValue -Enum(mips_arch_opt_value) String(r1004kf2_1) Value(73) +Enum(mips_arch_opt_value) String(r1004kc) Value(73) EnumValue -Enum(mips_arch_opt_value) String(1004kf) Value(74) Canonical +Enum(mips_arch_opt_value) String(1004kf2_1) Value(74) Canonical EnumValue -Enum(mips_arch_opt_value) String(r1004kf) Value(74) +Enum(mips_arch_opt_value) String(r1004kf2_1) Value(74) EnumValue -Enum(mips_arch_opt_value) String(1004kf1_1) Value(75) Canonical +Enum(mips_arch_opt_value) String(1004kf) Value(75) Canonical EnumValue -Enum(mips_arch_opt_value) String(r1004kf1_1) Value(75) +Enum(mips_arch_opt_value) String(r1004kf) Value(75) EnumValue -Enum(mips_arch_opt_value) String(5kc) Value(76) Canonical +Enum(mips_arch_opt_value) String(1004kf1_1) Value(76) Canonical EnumValue -Enum(mips_arch_opt_value) String(r5kc) Value(76) +Enum(mips_arch_opt_value) String(r1004kf1_1) Value(76) EnumValue -Enum(mips_arch_opt_value) String(5kf) Value(77) Canonical +Enum(mips_arch_opt_value) String(5kc) Value(77) Canonical EnumValue -Enum(mips_arch_opt_value) String(r5kf) Value(77) +Enum(mips_arch_opt_value) String(r5kc) Value(77) EnumValue -Enum(mips_arch_opt_value) String(20kc) Value(78) Canonical +Enum(mips_arch_opt_value) String(5kf) Value(78) Canonical EnumValue -Enum(mips_arch_opt_value) String(r20kc) Value(78) +Enum(mips_arch_opt_value) String(r5kf) Value(78) EnumValue -Enum(mips_arch_opt_value) String(sb1) Value(79) Canonical +Enum(mips_arch_opt_value) String(20kc) Value(79) Canonical EnumValue -Enum(mips_arch_opt_value) String(sb1a) Value(80) Canonical +Enum(mips_arch_opt_value) String(r20kc) Value(79) EnumValue -Enum(mips_arch_opt_value) String(sr71000) Value(81) Canonical +Enum(mips_arch_opt_value) String(sb1) Value(80) Canonical EnumValue -Enum(mips_arch_opt_value) String(sr71k) Value(81) +Enum(mips_arch_opt_value) String(sb1a) Value(81) Canonical EnumValue -Enum(mips_arch_opt_value) String(xlr) Value(82) Canonical +Enum(mips_arch_opt_value) String(sr71000) Value(82) Canonical EnumValue -Enum(mips_arch_opt_value) String(loongson3a) Value(83) Canonical +Enum(mips_arch_opt_value) String(sr71k) Value(82) EnumValue -Enum(mips_arch_opt_value) String(octeon) Value(84) Canonical +Enum(mips_arch_opt_value) String(xlr) Value(83) Canonical EnumValue -Enum(mips_arch_opt_value) String(octeon+) Value(85) Canonical +Enum(mips_arch_opt_value) String(loongson3a) Value(84) Canonical EnumValue -Enum(mips_arch_opt_value) String(octeon2) Value(86) Canonical +Enum(mips_arch_opt_value) String(octeon) Value(85) Canonical EnumValue -Enum(mips_arch_opt_value) String(xlp) Value(87) Canonical +Enum(mips_arch_opt_value) String(octeon+) Value(86) Canonical + +EnumValue +Enum(mips_arch_opt_value) String(octeon2) Value(87) Canonical + +EnumValue +Enum(mips_arch_opt_value) String(xlp) Value(88) Canonical diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 222c768b9b9..bd1d10b0e4e 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -1029,6 +1029,19 @@ static const struct mips_rtx_cost_data 1, /* branch_cost */ 4 /* memory_latency */ }, + { /* R5900 */ + COSTS_N_INSNS (4), /* fp_add */ + COSTS_N_INSNS (4), /* fp_mult_sf */ + COSTS_N_INSNS (256), /* fp_mult_df */ + COSTS_N_INSNS (8), /* fp_div_sf */ + COSTS_N_INSNS (256), /* fp_div_df */ + COSTS_N_INSNS (4), /* int_mult_si */ + COSTS_N_INSNS (256), /* int_mult_di */ + COSTS_N_INSNS (37), /* int_div_si */ + COSTS_N_INSNS (256), /* int_div_di */ + 1, /* branch_cost */ + 4 /* memory_latency */ + }, { /* R7000 */ /* The only costs that are changed here are integer multiplication. */ @@ -12450,7 +12463,10 @@ mips_start_ll_sc_sync_block (void) if (!ISA_HAS_LL_SC) { output_asm_insn (".set\tpush", 0); - output_asm_insn (".set\tmips2", 0); + if (TARGET_64BIT) + output_asm_insn (".set\tmips3", 0); + else + output_asm_insn (".set\tmips2", 0); } } @@ -13005,6 +13021,7 @@ mips_issue_rate (void) case PROCESSOR_R4130: case PROCESSOR_R5400: case PROCESSOR_R5500: + case PROCESSOR_R5900: case PROCESSOR_R7000: case PROCESSOR_R9000: case PROCESSOR_OCTEON: @@ -16025,8 +16042,9 @@ mips_reorg_process_insns (void) cfun->machine->all_noreorder_p = false; /* Code compiled with -mfix-vr4120 or -mfix-24k can't be all noreorder - because we rely on the assembler to work around some errata. */ - if (TARGET_FIX_VR4120 || TARGET_FIX_24K) + because we rely on the assembler to work around some errata. + The r5900 too has several bugs. */ + if (TARGET_FIX_VR4120 || TARGET_FIX_24K || TARGET_MIPS5900) cfun->machine->all_noreorder_p = false; /* The same is true for -mfix-vr4130 if we might generate MFLO or diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index a8cf1dbedc7..ff631c1a30b 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -222,6 +222,7 @@ struct mips_cpu_info { #define TARGET_MIPS4130 (mips_arch == PROCESSOR_R4130) #define TARGET_MIPS5400 (mips_arch == PROCESSOR_R5400) #define TARGET_MIPS5500 (mips_arch == PROCESSOR_R5500) +#define TARGET_MIPS5900 (mips_arch == PROCESSOR_R5900) #define TARGET_MIPS7000 (mips_arch == PROCESSOR_R7000) #define TARGET_MIPS9000 (mips_arch == PROCESSOR_R9000) #define TARGET_OCTEON (mips_arch == PROCESSOR_OCTEON \ @@ -399,6 +400,9 @@ struct mips_cpu_info { if (TARGET_MCU) \ builtin_define ("__mips_mcu"); \ \ + if (TARGET_EVA) \ + builtin_define ("__mips_eva"); \ + \ if (TARGET_DSP) \ { \ builtin_define ("__mips_dsp"); \ @@ -803,6 +807,7 @@ struct mips_cpu_info { #define ISA_HAS_MUL3 ((TARGET_MIPS3900 \ || TARGET_MIPS5400 \ || TARGET_MIPS5500 \ + || TARGET_MIPS5900 \ || TARGET_MIPS7000 \ || TARGET_MIPS9000 \ || TARGET_MAD \ @@ -817,6 +822,22 @@ struct mips_cpu_info { && TARGET_OCTEON \ && !TARGET_MIPS16) +/* ISA supports instructions DMULT and DMULTU. */ +#define ISA_HAS_DMULT (TARGET_64BIT && !TARGET_MIPS5900) + +/* ISA supports instructions MULT and MULTU. + This is always true, but the macro is needed for ISA_HAS_<D>MULT + in mips.md. */ +#define ISA_HAS_MULT (1) + +/* ISA supports instructions DDIV and DDIVU. */ +#define ISA_HAS_DDIV (TARGET_64BIT && !TARGET_MIPS5900) + +/* ISA supports instructions DIV and DIVU. + This is always true, but the macro is needed for ISA_HAS_<D>DIV + in mips.md. */ +#define ISA_HAS_DIV (1) + #define ISA_HAS_DIV3 ((TARGET_LOONGSON_2EF \ || TARGET_LOONGSON_3A) \ && !TARGET_MIPS16) @@ -833,7 +854,9 @@ struct mips_cpu_info { /* ISA has the integer conditional move instructions introduced in mips4 and ST Loongson 2E/2F. */ -#define ISA_HAS_CONDMOVE (ISA_HAS_FP_CONDMOVE || TARGET_LOONGSON_2EF) +#define ISA_HAS_CONDMOVE (ISA_HAS_FP_CONDMOVE \ + || TARGET_MIPS5900 \ + || TARGET_LOONGSON_2EF) /* ISA has LDC1 and SDC1. */ #define ISA_HAS_LDC1_SDC1 (!ISA_MIPS1 && !TARGET_MIPS16) @@ -946,6 +969,7 @@ struct mips_cpu_info { /* ISA has data prefetch instructions. This controls use of 'pref'. */ #define ISA_HAS_PREFETCH ((ISA_MIPS4 \ || TARGET_LOONGSON_2EF \ + || TARGET_MIPS5900 \ || ISA_MIPS32 \ || ISA_MIPS32R2 \ || ISA_MIPS64 \ @@ -1007,15 +1031,18 @@ struct mips_cpu_info { and "addiu $4,$4,1". */ #define ISA_HAS_LOAD_DELAY (ISA_MIPS1 \ && !TARGET_MIPS3900 \ + && !TARGET_MIPS5900 \ && !TARGET_MIPS16 \ && !TARGET_MICROMIPS) /* Likewise mtc1 and mfc1. */ #define ISA_HAS_XFER_DELAY (mips_isa <= 3 \ + && !TARGET_MIPS5900 \ && !TARGET_LOONGSON_2EF) /* Likewise floating-point comparisons. */ #define ISA_HAS_FCMP_DELAY (mips_isa <= 3 \ + && !TARGET_MIPS5900 \ && !TARGET_LOONGSON_2EF) /* True if mflo and mfhi can be immediately followed by instructions @@ -1035,6 +1062,7 @@ struct mips_cpu_info { || ISA_MIPS64 \ || ISA_MIPS64R2 \ || TARGET_MIPS5500 \ + || TARGET_MIPS5900 \ || TARGET_LOONGSON_2EF) /* ISA includes synci, jr.hb and jalr.hb. */ @@ -1052,7 +1080,7 @@ struct mips_cpu_info { /* ISA includes ll and sc. Note that this implies ISA_HAS_SYNC because the expanders use both ISA_HAS_SYNC and ISA_HAS_LL_SC instructions. */ -#define ISA_HAS_LL_SC (mips_isa >= 2 && !TARGET_MIPS16) +#define ISA_HAS_LL_SC (mips_isa >= 2 && !TARGET_MIPS5900 && !TARGET_MIPS16) #define GENERATE_LL_SC \ (target_flags_explicit & MASK_LLSC \ ? TARGET_LLSC && !TARGET_MIPS16 \ @@ -1125,6 +1153,7 @@ struct mips_cpu_info { %{mdsp} %{mno-dsp} \ %{mdspr2} %{mno-dspr2} \ %{mmcu} %{mno-mcu} \ +%{meva} %{mno-eva} \ %{msmartmips} %{mno-smartmips} \ %{mmt} %{mno-mt} \ %{mfix-vr4120} %{mfix-vr4130} \ diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index 6f6484b0d8c..ce322d8bc36 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -55,6 +55,7 @@ r5000 r5400 r5500 + r5900 r7000 r8000 r9000 @@ -406,8 +407,12 @@ ;; Is this an extended instruction in mips16 mode? (define_attr "extended_mips16" "no,yes" - (if_then_else (ior (eq_attr "move_type" "sll0") - (eq_attr "jal" "direct")) + (if_then_else (ior ;; In general, constant-pool loads are extended + ;; instructions. We don't yet optimize for 16-bit + ;; PC-relative references. + (eq_attr "move_type" "sll0,loadpool") + (eq_attr "jal" "direct") + (eq_attr "got" "load")) (const_string "yes") (const_string "no"))) @@ -420,14 +425,89 @@ (match_test "TARGET_MICROMIPS"))) (const_string "yes") (const_string "no"))) - -;; Length of instruction in bytes. -(define_attr "length" "" - (cond [(and (eq_attr "extended_mips16" "yes") - (match_test "TARGET_MIPS16")) - (const_int 4) - (and (eq_attr "compression" "micromips,all") +;; The number of individual instructions that a non-branch pattern generates, +;; using units of BASE_INSN_LENGTH. +(define_attr "insn_count" "" + (cond [;; "Ghost" instructions occupy no space. + (eq_attr "type" "ghost") + (const_int 0) + + ;; Extended instructions count as 2. + (and (eq_attr "extended_mips16" "yes") + (match_test "TARGET_MIPS16")) + (const_int 2) + + ;; A GOT load followed by an add of $gp. This is not used for MIPS16. + (eq_attr "got" "xgot_high") + (const_int 2) + + ;; SHIFT_SHIFTs are decomposed into two separate instructions. + ;; They are extended instructions on MIPS16 targets. + (eq_attr "move_type" "shift_shift") + (if_then_else (match_test "TARGET_MIPS16") + (const_int 4) + (const_int 2)) + + ;; Check for doubleword moves that are decomposed into two + ;; instructions. The individual instructions are unextended + ;; MIPS16 ones. + (and (eq_attr "move_type" "mtc,mfc,mtlo,mflo,move") + (eq_attr "dword_mode" "yes")) + (const_int 2) + + ;; Constants, loads and stores are handled by external routines. + (and (eq_attr "move_type" "const,constN") + (eq_attr "dword_mode" "yes")) + (symbol_ref "mips_split_const_insns (operands[1])") + (eq_attr "move_type" "const,constN") + (symbol_ref "mips_const_insns (operands[1])") + (eq_attr "move_type" "load,fpload") + (symbol_ref "mips_load_store_insns (operands[1], insn)") + (eq_attr "move_type" "store,fpstore") + (symbol_ref "mips_load_store_insns (operands[0], insn) + + (TARGET_FIX_24K ? 1 : 0)") + + ;; In the worst case, a call macro will take 8 instructions: + ;; + ;; lui $25,%call_hi(FOO) + ;; addu $25,$25,$28 + ;; lw $25,%call_lo(FOO)($25) + ;; nop + ;; jalr $25 + ;; nop + ;; lw $gp,X($sp) + ;; nop + (eq_attr "jal_macro" "yes") + (const_int 8) + + ;; Various VR4120 errata require a nop to be inserted after a macc + ;; instruction. The assembler does this for us, so account for + ;; the worst-case length here. + (and (eq_attr "type" "imadd") + (match_test "TARGET_FIX_VR4120")) + (const_int 2) + + ;; VR4120 errata MD(4): if there are consecutive dmult instructions, + ;; the result of the second one is missed. The assembler should work + ;; around this by inserting a nop after the first dmult. + (and (eq_attr "type" "imul,imul3") + (eq_attr "mode" "DI") + (match_test "TARGET_FIX_VR4120")) + (const_int 2) + + (eq_attr "type" "idiv,idiv3") + (symbol_ref "mips_idiv_insns ()") + + (not (eq_attr "sync_mem" "none")) + (symbol_ref "mips_sync_loop_insns (insn, operands)")] + (const_int 1))) + +;; Length of instruction in bytes. The default is derived from "insn_count", +;; but there are special cases for branches (which must be handled here) +;; and for compressed single instructions. +(define_attr "length" "" + (cond [(and (eq_attr "compression" "micromips,all") (eq_attr "dword_mode" "no") (match_test "TARGET_MICROMIPS")) (const_int 2) @@ -580,95 +660,8 @@ (const_int 20) (match_test "Pmode == SImode") (const_int 16) - ] (const_int 24)) - - ;; "Ghost" instructions occupy no space. - (eq_attr "type" "ghost") - (const_int 0) - - ;; GOT loads are extended MIPS16 instructions and 4-byte - ;; microMIPS instructions. - (eq_attr "got" "load") - (const_int 4) - - ;; A GOT load followed by an add of $gp. - (eq_attr "got" "xgot_high") - (const_int 8) - - ;; In general, constant-pool loads are extended instructions. - (eq_attr "move_type" "loadpool") - (const_int 4) - - ;; SHIFT_SHIFTs are decomposed into two separate instructions. - ;; They are extended instructions on MIPS16 targets. - (eq_attr "move_type" "shift_shift") - (const_int 8) - - ;; Check for doubleword moves that are decomposed into two - ;; instructions. The individual instructions are unextended - ;; MIPS16 ones or 2-byte microMIPS ones. - (and (eq_attr "move_type" "mtc,mfc,mtlo,mflo,move") - (eq_attr "dword_mode" "yes")) - (if_then_else (match_test "TARGET_COMPRESSION") - (const_int 4) - (const_int 8)) - - ;; Doubleword CONST{,N} moves are split into two word - ;; CONST{,N} moves. - (and (eq_attr "move_type" "const,constN") - (eq_attr "dword_mode" "yes")) - (symbol_ref "mips_split_const_insns (operands[1]) * BASE_INSN_LENGTH") - - ;; Otherwise, constants, loads and stores are handled by external - ;; routines. - (eq_attr "move_type" "const,constN") - (symbol_ref "mips_const_insns (operands[1]) * BASE_INSN_LENGTH") - (eq_attr "move_type" "load,fpload") - (symbol_ref "mips_load_store_insns (operands[1], insn) - * BASE_INSN_LENGTH") - (eq_attr "move_type" "store,fpstore") - (symbol_ref "mips_load_store_insns (operands[0], insn) - * BASE_INSN_LENGTH - + (TARGET_FIX_24K ? NOP_INSN_LENGTH : 0)") - - ;; In the worst case, a call macro will take 8 instructions: - ;; - ;; lui $25,%call_hi(FOO) - ;; addu $25,$25,$28 - ;; lw $25,%call_lo(FOO)($25) - ;; nop - ;; jalr $25 - ;; nop - ;; lw $gp,X($sp) - ;; nop - (eq_attr "jal_macro" "yes") - (const_int 32) - - ;; Various VR4120 errata require a nop to be inserted after a macc - ;; instruction. The assembler does this for us, so account for - ;; the worst-case length here. - (and (eq_attr "type" "imadd") - (match_test "TARGET_FIX_VR4120")) - (const_int 8) - - ;; VR4120 errata MD(4): if there are consecutive dmult instructions, - ;; the result of the second one is missed. The assembler should work - ;; around this by inserting a nop after the first dmult. - (and (eq_attr "type" "imul,imul3") - (and (eq_attr "mode" "DI") - (match_test "TARGET_FIX_VR4120"))) - (const_int 8) - - (eq_attr "type" "idiv,idiv3") - (symbol_ref "mips_idiv_insns () * BASE_INSN_LENGTH") - - (not (eq_attr "sync_mem" "none")) - (symbol_ref "mips_sync_loop_insns (insn, operands) - * BASE_INSN_LENGTH") - - (match_test "TARGET_MIPS16") - (const_int 2) - ] (const_int 4))) + ] (const_int 24))] + (symbol_ref "get_attr_insn_count (insn) * BASE_INSN_LENGTH"))) ;; Attribute describing the processor. (define_enum_attr "cpu" "processor" @@ -701,16 +694,11 @@ (const_string "hilo")] (const_string "none"))) -;; Is it a single instruction? -(define_attr "single_insn" "no,yes" - (symbol_ref "(get_attr_length (insn) == (TARGET_MIPS16 ? 2 : 4) - ? SINGLE_INSN_YES : SINGLE_INSN_NO)")) - ;; Can the instruction be put into a delay slot? (define_attr "can_delay" "no,yes" (if_then_else (and (eq_attr "type" "!branch,call,jump") - (and (eq_attr "hazard" "none") - (eq_attr "single_insn" "yes"))) + (eq_attr "hazard" "none") + (match_test "get_attr_insn_count (insn) == 1")) (const_string "yes") (const_string "no"))) @@ -755,7 +743,9 @@ ;; This mode iterator allows :MOVECC to be used anywhere that a ;; conditional-move-type condition is needed. (define_mode_iterator MOVECC [SI (DI "TARGET_64BIT") - (CC "TARGET_HARD_FLOAT && !TARGET_LOONGSON_2EF")]) + (CC "TARGET_HARD_FLOAT + && !TARGET_LOONGSON_2EF + && !TARGET_MIPS5900")]) ;; 32-bit integer moves for which we provide move patterns. (define_mode_iterator IMOVE32 @@ -1417,7 +1407,7 @@ "mul.<fmt>\t%0,%1,%2\;nop" [(set_attr "type" "fmul") (set_attr "mode" "<MODE>") - (set_attr "length" "8")]) + (set_attr "insn_count" "2")]) (define_insn "mulv2sf3" [(set (match_operand:V2SF 0 "register_operand" "=f") @@ -1478,7 +1468,7 @@ [(set (match_operand:GPR 0 "register_operand") (mult:GPR (match_operand:GPR 1 "register_operand") (match_operand:GPR 2 "register_operand")))] - "" + "ISA_HAS_<D>MULT" { rtx lo; @@ -1524,7 +1514,7 @@ { if (which_alternative == 1) return "<d>mult\t%1,%2"; - if (<MODE>mode == SImode && TARGET_MIPS3900) + if (<MODE>mode == SImode && (TARGET_MIPS3900 || TARGET_MIPS5900)) return "mult\t%0,%1,%2"; return "<d>mul\t%0,%1,%2"; } @@ -1558,7 +1548,7 @@ [(set (match_operand:GPR 0 "muldiv_target_operand" "=l") (mult:GPR (match_operand:GPR 1 "register_operand" "d") (match_operand:GPR 2 "register_operand" "d")))] - "!TARGET_FIX_R4000" + "ISA_HAS_<D>MULT && !TARGET_FIX_R4000" "<d>mult\t%1,%2" [(set_attr "type" "imul") (set_attr "mode" "<MODE>")]) @@ -1568,11 +1558,11 @@ (mult:GPR (match_operand:GPR 1 "register_operand" "d") (match_operand:GPR 2 "register_operand" "d"))) (clobber (match_scratch:GPR 3 "=l"))] - "TARGET_FIX_R4000" + "ISA_HAS_<D>MULT && TARGET_FIX_R4000" "<d>mult\t%1,%2\;mflo\t%0" [(set_attr "type" "imul") (set_attr "mode" "<MODE>") - (set_attr "length" "8")]) + (set_attr "insn_count" "2")]) ;; On the VR4120 and VR4130, it is better to use "mtlo $0; macc" instead ;; of "mult; mflo". They have the same latency, but the first form gives @@ -1632,7 +1622,7 @@ [(set_attr "type" "imadd") (set_attr "accum_in" "3") (set_attr "mode" "SI") - (set_attr "length" "4,8")]) + (set_attr "insn_count" "1,2")]) ;; The same idea applies here. The middle alternative needs one less ;; clobber than the final alternative, so we add "*?" as a counterweight. @@ -1651,7 +1641,7 @@ [(set_attr "type" "imadd") (set_attr "accum_in" "3") (set_attr "mode" "SI") - (set_attr "length" "4,4,8")]) + (set_attr "insn_count" "1,1,2")]) ;; Split *mul_acc_si if both the source and destination accumulator ;; values are GPRs. @@ -1732,7 +1722,7 @@ "" [(set_attr "type" "imadd") (set_attr "accum_in" "1") - (set_attr "length" "8")]) + (set_attr "insn_count" "2")]) ;; Patterns generated by the define_peephole2 below. @@ -1868,7 +1858,7 @@ [(set_attr "type" "imadd") (set_attr "accum_in" "1") (set_attr "mode" "SI") - (set_attr "length" "4,8")]) + (set_attr "insn_count" "1,2")]) ;; Split *mul_sub_si if both the source and destination accumulator ;; values are GPRs. @@ -1949,7 +1939,7 @@ "mult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0" [(set_attr "type" "imul") (set_attr "mode" "SI") - (set_attr "length" "12")]) + (set_attr "insn_count" "3")]) (define_insn_and_split "<u>mulsidi3_64bit" [(set (match_operand:DI 0 "register_operand" "=d") @@ -1968,10 +1958,10 @@ } [(set_attr "type" "imul") (set_attr "mode" "SI") - (set (attr "length") + (set (attr "insn_count") (if_then_else (match_test "ISA_HAS_EXT_INS") - (const_int 16) - (const_int 28)))]) + (const_int 4) + (const_int 7)))]) (define_expand "<u>mulsidi3_64bit_mips16" [(set (match_operand:DI 0 "register_operand") @@ -2035,7 +2025,7 @@ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d")) (sign_extend:DI (match_operand:SI 2 "register_operand" "d")))) (clobber (match_scratch:DI 3 "=l"))] - "TARGET_64BIT && ISA_HAS_DMUL3" + "ISA_HAS_DMUL3" "dmul\t%0,%1,%2" [(set_attr "type" "imul3") (set_attr "mode" "DI")]) @@ -2122,7 +2112,7 @@ } [(set_attr "type" "imul") (set_attr "mode" "SI") - (set_attr "length" "8")]) + (set_attr "insn_count" "2")]) (define_expand "<su>mulsi3_highpart_split" [(set (match_operand:SI 0 "register_operand") @@ -2189,7 +2179,7 @@ (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand")) (any_extend:TI (match_operand:DI 2 "register_operand"))) (const_int 64))))] - "TARGET_64BIT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)" + "ISA_HAS_DMULT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)" { if (TARGET_MIPS16) emit_insn (gen_<su>muldi3_highpart_split (operands[0], operands[1], @@ -2208,7 +2198,7 @@ (any_extend:TI (match_operand:DI 2 "register_operand" "d"))) (const_int 64)))) (clobber (match_scratch:DI 3 "=l"))] - "TARGET_64BIT + "ISA_HAS_DMULT && !TARGET_MIPS16 && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)" { return TARGET_FIX_R4000 ? "dmult<u>\t%1,%2\n\tmfhi\t%0" : "#"; } @@ -2221,7 +2211,7 @@ } [(set_attr "type" "imul") (set_attr "mode" "DI") - (set_attr "length" "8")]) + (set_attr "insn_count" "2")]) (define_expand "<su>muldi3_highpart_split" [(set (match_operand:DI 0 "register_operand") @@ -2244,7 +2234,7 @@ [(set (match_operand:TI 0 "register_operand") (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand")) (any_extend:TI (match_operand:DI 2 "register_operand"))))] - "TARGET_64BIT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)" + "ISA_HAS_DMULT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)" { rtx hilo; @@ -2266,7 +2256,7 @@ [(set (match_operand:TI 0 "muldiv_target_operand" "=x") (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d")) (any_extend:TI (match_operand:DI 2 "register_operand" "d"))))] - "TARGET_64BIT + "ISA_HAS_DMULT && !TARGET_FIX_R4000 && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)" "dmult<u>\t%1,%2" @@ -2278,13 +2268,13 @@ (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d")) (any_extend:TI (match_operand:DI 2 "register_operand" "d")))) (clobber (match_scratch:TI 3 "=x"))] - "TARGET_64BIT + "ISA_HAS_DMULT && TARGET_FIX_R4000 && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)" "dmult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0" [(set_attr "type" "imul") (set_attr "mode" "DI") - (set_attr "length" "12")]) + (set_attr "insn_count" "3")]) ;; The R4650 supports a 32-bit multiply/ 64-bit accumulate ;; instruction. The HI/LO registers are used as a 64-bit accumulator. @@ -2535,10 +2525,10 @@ } [(set_attr "type" "fdiv") (set_attr "mode" "<UNITMODE>") - (set (attr "length") + (set (attr "insn_count") (if_then_else (match_test "TARGET_FIX_SB1") - (const_int 8) - (const_int 4)))]) + (const_int 2) + (const_int 1)))]) (define_insn "*recip<mode>3" [(set (match_operand:ANYF 0 "register_operand" "=f") @@ -2553,10 +2543,10 @@ } [(set_attr "type" "frdiv") (set_attr "mode" "<UNITMODE>") - (set (attr "length") + (set (attr "insn_count") (if_then_else (match_test "TARGET_FIX_SB1") - (const_int 8) - (const_int 4)))]) + (const_int 2) + (const_int 1)))]) ;; VR4120 errata MD(A1): signed division instructions do not work correctly ;; with negative operands. We use special libgcc functions instead. @@ -2574,7 +2564,7 @@ (set (match_operand:GPR 3 "register_operand" "=d") (mod:GPR (match_dup 1) (match_dup 2)))] - "!TARGET_FIX_VR4120" + "ISA_HAS_<D>DIV && !TARGET_FIX_VR4120" "#" "&& ((TARGET_MIPS16 && cse_not_expected) || reload_completed)" [(const_int 0)] @@ -2586,7 +2576,8 @@ } [(set_attr "type" "idiv") (set_attr "mode" "<MODE>") - (set_attr "length" "8")]) + ;; Worst case for MIPS16. + (set_attr "insn_count" "3")]) ;; See the comment above "divmod<mode>4" for the MIPS16 handling. (define_insn_and_split "udivmod<mode>4" @@ -2596,7 +2587,7 @@ (set (match_operand:GPR 3 "register_operand" "=d") (umod:GPR (match_dup 1) (match_dup 2)))] - "" + "ISA_HAS_<D>DIV" "#" "(TARGET_MIPS16 && cse_not_expected) || reload_completed" [(const_int 0)] @@ -2606,9 +2597,10 @@ emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM)); DONE; } - [(set_attr "type" "idiv") - (set_attr "mode" "<MODE>") - (set_attr "length" "8")]) + [(set_attr "type" "idiv") + (set_attr "mode" "<MODE>") + ;; Worst case for MIPS16. + (set_attr "insn_count" "3")]) (define_expand "<u>divmod<mode>4_split" [(set (match_operand:GPR 0 "register_operand") @@ -2641,7 +2633,7 @@ [(any_div:GPR (match_operand:GPR 1 "register_operand" "d") (match_operand:GPR 2 "register_operand" "d"))] UNSPEC_SET_HILO))] - "" + "ISA_HAS_<GPR:D>DIV" { return mips_output_division ("<GPR:d>div<u>\t%.,%1,%2", operands); } [(set_attr "type" "idiv") (set_attr "mode" "<GPR:MODE>")]) @@ -2668,10 +2660,10 @@ } [(set_attr "type" "fsqrt") (set_attr "mode" "<UNITMODE>") - (set (attr "length") + (set (attr "insn_count") (if_then_else (match_test "TARGET_FIX_SB1") - (const_int 8) - (const_int 4)))]) + (const_int 2) + (const_int 1)))]) (define_insn "*rsqrt<mode>a" [(set (match_operand:ANYF 0 "register_operand" "=f") @@ -2686,10 +2678,10 @@ } [(set_attr "type" "frsqrt") (set_attr "mode" "<UNITMODE>") - (set (attr "length") + (set (attr "insn_count") (if_then_else (match_test "TARGET_FIX_SB1") - (const_int 8) - (const_int 4)))]) + (const_int 2) + (const_int 1)))]) (define_insn "*rsqrt<mode>b" [(set (match_operand:ANYF 0 "register_operand" "=f") @@ -2704,10 +2696,10 @@ } [(set_attr "type" "frsqrt") (set_attr "mode" "<UNITMODE>") - (set (attr "length") + (set (attr "insn_count") (if_then_else (match_test "TARGET_FIX_SB1") - (const_int 8) - (const_int 4)))]) + (const_int 2) + (const_int 1)))]) ;; ;; .................... @@ -3500,7 +3492,7 @@ [(set_attr "type" "fcvt") (set_attr "mode" "DF") (set_attr "cnv_mode" "D2I") - (set_attr "length" "36")]) + (set_attr "insn_count" "9")]) (define_expand "fix_truncsfsi2" [(set (match_operand:SI 0 "register_operand") @@ -3537,7 +3529,7 @@ [(set_attr "type" "fcvt") (set_attr "mode" "SF") (set_attr "cnv_mode" "S2I") - (set_attr "length" "36")]) + (set_attr "insn_count" "9")]) (define_insn "fix_truncdfdi2" @@ -4015,7 +4007,7 @@ operands[2] = mips_unspec_address (operands[1], SYMBOL_64_HIGH); operands[3] = mips_unspec_address (operands[1], SYMBOL_64_MID); } - [(set_attr "length" "20")]) + [(set_attr "insn_count" "5")]) ;; Use a scratch register to reduce the latency of the above pattern ;; on superscalar machines. The optimized sequence is: @@ -4070,7 +4062,7 @@ operands[3] = mips_unspec_address (operands[1], SYMBOL_64_HIGH); operands[4] = mips_unspec_address (operands[1], SYMBOL_64_LOW); } - [(set_attr "length" "24")]) + [(set_attr "insn_count" "6")]) ;; Split HIGHs into: ;; @@ -5080,7 +5072,7 @@ return ".cprestore\t%1"; } [(set_attr "type" "store") - (set_attr "length" "4,12")]) + (set_attr "insn_count" "1,3")]) (define_insn "use_cprestore_<mode>" [(set (reg:P CPRESTORE_SLOT_REGNUM) @@ -5141,7 +5133,7 @@ "\tjr.hb\t$31\n" "\tnop%>%)"; } - [(set_attr "length" "20")]) + [(set_attr "insn_count" "5")]) ;; Cache operations for R4000-style caches. (define_insn "mips_cache" @@ -5334,8 +5326,7 @@ ;; not have and immediate). We recognize a shift of a load in order ;; to make it simple enough for combine to understand. ;; -;; The length here is the worst case: the length of the split version -;; will be more accurate. +;; The instruction count here is the worst case. (define_insn_and_split "" [(set (match_operand:SI 0 "register_operand" "=d") (lshiftrt:SI (match_operand:SI 1 "memory_operand" "m") @@ -5348,7 +5339,8 @@ "" [(set_attr "type" "load") (set_attr "mode" "SI") - (set_attr "length" "8")]) + (set (attr "insn_count") + (symbol_ref "mips_load_store_insns (operands[1], insn) + 2"))]) (define_insn "rotr<mode>3" [(set (match_operand:GPR 0 "register_operand" "=d") @@ -5987,7 +5979,7 @@ return "j\t%4"; } - [(set_attr "length" "32")]) + [(set_attr "insn_count" "16")]) ;; For TARGET_USE_GOT, we save the gp in the jmp_buf as well. ;; While it is possible to either pull it off the stack (in the @@ -6878,11 +6870,8 @@ (set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))] "" [(set_attr "type" "unknown") - ; Since rdhwr always generates a trap for now, putting it in a delay - ; slot would make the kernel's emulation of it much slower. - (set_attr "can_delay" "no") (set_attr "mode" "<MODE>") - (set_attr "length" "8")]) + (set_attr "insn_count" "2")]) (define_insn "*tls_get_tp_<mode>_split" [(set (reg:P TLS_GET_TP_REGNUM) @@ -6890,7 +6879,8 @@ "HAVE_AS_TLS && !TARGET_MIPS16" ".set\tpush\;.set\tmips32r2\t\;rdhwr\t$3,$29\;.set\tpop" [(set_attr "type" "unknown") - ; See tls_get_tp_<mode> + ; Since rdhwr always generates a trap for now, putting it in a delay + ; slot would make the kernel's emulation of it much slower. (set_attr "can_delay" "no") (set_attr "mode" "<MODE>")]) @@ -6922,7 +6912,7 @@ (set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))] "" [(set_attr "type" "multi") - (set_attr "length" "8") + (set_attr "insn_count" "4") (set_attr "mode" "<MODE>")]) (define_insn "*tls_get_tp_mips16_call_<mode>" @@ -6934,7 +6924,7 @@ "HAVE_AS_TLS && TARGET_MIPS16" { return MIPS_CALL ("jal", operands, 0, -1); } [(set_attr "type" "call") - (set_attr "length" "6") + (set_attr "insn_count" "3") (set_attr "mode" "<MODE>")]) ;; Named pattern for expanding thread pointer reference. diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt index e11710db3c0..08ab29b1810 100644 --- a/gcc/config/mips/mips.opt +++ b/gcc/config/mips/mips.opt @@ -141,6 +141,10 @@ membedded-data Target Report Var(TARGET_EMBEDDED_DATA) Use ROM instead of RAM +meva +Target Report Var(TARGET_EVA) +Use Enhanced Virtual Addressing instructions + mexplicit-relocs Target Report Mask(EXPLICIT_RELOCS) Use NewABI-style %reloc() assembly operators diff --git a/gcc/config/mmix/mmix.c b/gcc/config/mmix/mmix.c index 1af09e559b0..bd37067dfc4 100644 --- a/gcc/config/mmix/mmix.c +++ b/gcc/config/mmix/mmix.c @@ -313,7 +313,7 @@ mmix_init_machine_status (void) return ggc_alloc_cleared_machine_function (); } -/* DATA_ALIGNMENT. +/* DATA_ABI_ALIGNMENT. We have trouble getting the address of stuff that is located at other than 32-bit alignments (GETA requirements), so try to give everything at least 32-bit alignment. */ diff --git a/gcc/config/mmix/mmix.h b/gcc/config/mmix/mmix.h index 4ca1a2b8c86..c5edc5777a9 100644 --- a/gcc/config/mmix/mmix.h +++ b/gcc/config/mmix/mmix.h @@ -164,7 +164,7 @@ struct GTY(()) machine_function /* Copied from elfos.h. */ #define MAX_OFILE_ALIGNMENT (32768 * 8) -#define DATA_ALIGNMENT(TYPE, BASIC_ALIGN) \ +#define DATA_ABI_ALIGNMENT(TYPE, BASIC_ALIGN) \ mmix_data_alignment (TYPE, BASIC_ALIGN) #define CONSTANT_ALIGNMENT(CONSTANT, BASIC_ALIGN) \ diff --git a/gcc/config/rl78/rl78.c b/gcc/config/rl78/rl78.c index 2e18bebf3d8..c2ed7389bc4 100644 --- a/gcc/config/rl78/rl78.c +++ b/gcc/config/rl78/rl78.c @@ -647,6 +647,15 @@ rl78_addr_space_pointer_mode (addr_space_t addrspace) } } +/* Returns TRUE for valid addresses. */ +#undef TARGET_VALID_POINTER_MODE +#define TARGET_VALID_POINTER_MODE rl78_valid_pointer_mode +static bool +rl78_valid_pointer_mode (enum machine_mode m) +{ + return (m == HImode || m == SImode); +} + /* Return the appropriate mode for a named address address. */ #undef TARGET_ADDR_SPACE_ADDRESS_MODE #define TARGET_ADDR_SPACE_ADDRESS_MODE rl78_addr_space_address_mode @@ -2730,6 +2739,16 @@ rl78_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) } + +#undef TARGET_UNWIND_WORD_MODE +#define TARGET_UNWIND_WORD_MODE rl78_unwind_word_mode + +static enum machine_mode +rl78_unwind_word_mode (void) +{ + return HImode; +} + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-rl78.h" diff --git a/gcc/config/rl78/rl78.md b/gcc/config/rl78/rl78.md index b3cfe6d1bbc..efc26210498 100644 --- a/gcc/config/rl78/rl78.md +++ b/gcc/config/rl78/rl78.md @@ -235,6 +235,24 @@ [(set_attr "valloc" "macax")] ) +(define_expand "mulqi3" + [(set (match_operand:QI 0 "register_operand" "") + (mult:QI (match_operand:QI 1 "general_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + ] + "" ; mulu supported by all targets + "" +) + +(define_expand "mulhi3" + [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (match_operand:HI 1 "general_operand" "") + (match_operand:HI 2 "nonmemory_operand" ""))) + ] + "! RL78_MUL_NONE" + "" +) + (define_expand "mulsi3" [(set (match_operand:SI 0 "register_operand" "=&v") (mult:SI (match_operand:SI 1 "nonmemory_operand" "vi") @@ -244,6 +262,58 @@ "" ) +(define_insn "*mulqi3_rl78" + [(set (match_operand:QI 0 "register_operand" "=&v") + (mult:QI (match_operand:QI 1 "general_operand" "+viU") + (match_operand:QI 2 "general_operand" "vi"))) + ] + "" ; mulu supported by all targets + "; mulqi macro %0 = %1 * %2 + mov a, %h1 + mov x, a + mov a, %h2 + mulu x ; ax = a * x + mov a, x + mov %h0, a + ; end of mulqi macro" +;; [(set_attr "valloc" "macax")] +) + +(define_insn "*mulhi3_rl78" + [(set (match_operand:HI 0 "register_operand" "=&v") + (mult:HI (match_operand:HI 1 "general_operand" "+viU") + (match_operand:HI 2 "general_operand" "vi"))) + ] + "RL78_MUL_RL78" + "; mulhi macro %0 = %1 * %2 + movw ax, %h1 + movw bc, %h2 + mulhu ; bcax = bc * ax + movw %h0, ax + ; end of mulhi macro" +;; [(set_attr "valloc" "macax")] +) + +(define_insn "*mulhi3_g13" + [(set (match_operand:HI 0 "register_operand" "=&v") + (mult:HI (match_operand:HI 1 "general_operand" "+viU") + (match_operand:HI 2 "general_operand" "vi"))) + ] + "RL78_MUL_G13" + "; mulhi macro %0 = %1 * %2 + mov a, #0x00 + mov !0xf00e8, a ; MDUC + movw ax, %h1 + movw 0xffff0, ax ; MDAL + movw ax, %h2 + movw 0xffff2, ax ; MDAH + nop ; mdb = mdal * mdah + movw ax, 0xffff6 ; MDBL + movw %h0, ax + ; end of mulhi macro" +;; [(set_attr "valloc" "umul")] +) + ;; 0xFFFF0 is MACR(L). 0xFFFF2 is MACR(H) but we don't care about it ;; because we're only using the lower 16 bits (which is the upper 16 ;; bits of the result). diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 19a4ae9297d..4b91c5c5e24 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -323,15 +323,31 @@ #ifdef _ARCH_PWR8 /* Vector additions added in ISA 2.07. */ +#define vec_eqv __builtin_vec_eqv +#define vec_nand __builtin_vec_nand +#define vec_orc __builtin_vec_orc #define vec_vaddudm __builtin_vec_vaddudm +#define vec_vclz __builtin_vec_vclz +#define vec_vclzb __builtin_vec_vclzb +#define vec_vclzd __builtin_vec_vclzd +#define vec_vclzh __builtin_vec_vclzh +#define vec_vclzw __builtin_vec_vclzw +#define vec_vgbbd __builtin_vec_vgbbd #define vec_vmaxsd __builtin_vec_vmaxsd #define vec_vmaxud __builtin_vec_vmaxud #define vec_vminsd __builtin_vec_vminsd #define vec_vminud __builtin_vec_vminud +#define vec_vmrgew __builtin_vec_vmrgew +#define vec_vmrgow __builtin_vec_vmrgow #define vec_vpksdss __builtin_vec_vpksdss #define vec_vpksdus __builtin_vec_vpksdus #define vec_vpkudum __builtin_vec_vpkudum #define vec_vpkudus __builtin_vec_vpkudus +#define vec_vpopcnt __builtin_vec_vpopcnt +#define vec_vpopcntb __builtin_vec_vpopcntb +#define vec_vpopcntd __builtin_vec_vpopcntd +#define vec_vpopcnth __builtin_vec_vpopcnth +#define vec_vpopcntw __builtin_vec_vpopcntw #define vec_vrld __builtin_vec_vrld #define vec_vsld __builtin_vec_vsld #define vec_vsrad __builtin_vec_vsrad diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 78d29001440..6607e450be3 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -128,6 +128,7 @@ UNSPEC_VUPKLS_V4SF UNSPEC_VUPKHU_V4SF UNSPEC_VUPKLU_V4SF + UNSPEC_VGBBD ]) (define_c_enum "unspecv" @@ -941,6 +942,31 @@ "vmrglw %0,%1,%2" [(set_attr "type" "vecperm")]) +;; Power8 vector merge even/odd +(define_insn "p8_vmrgew" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6)])))] + "TARGET_P8_VECTOR" + "vmrgew %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "p8_vmrgow" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 1) (const_int 5) + (const_int 3) (const_int 7)])))] + "TARGET_P8_VECTOR" + "vmrgow %0,%1,%2" + [(set_attr "type" "vecperm")]) + (define_insn "vec_widen_umult_even_v16qi" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") @@ -1017,10 +1043,13 @@ ;; logical ops. Have the logical ops follow the memory ops in ;; terms of whether to prefer VSX or Altivec +;; AND has a clobber to be consistant with VSX, which adds splitters for using +;; the GPR registers. (define_insn "*altivec_and<mode>3" [(set (match_operand:VM 0 "register_operand" "=v") (and:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v")))] + (match_operand:VM 2 "register_operand" "v"))) + (clobber (match_scratch:CC 3 "=X"))] "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" "vand %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -1050,8 +1079,8 @@ (define_insn "*altivec_nor<mode>3" [(set (match_operand:VM 0 "register_operand" "=v") - (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v"))))] + (and:VM (not:VM (match_operand:VM 1 "register_operand" "v")) + (not:VM (match_operand:VM 2 "register_operand" "v"))))] "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" "vnor %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -2370,3 +2399,34 @@ emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx)); DONE; }") + + +;; Power8 vector instructions encoded as Altivec instructions + +;; Vector count leading zeros +(define_insn "*p8v_clz<mode>2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (clz:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P8_VECTOR" + "vclz<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector population count +(define_insn "*p8v_popcount<mode>2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P8_VECTOR" + "vpopcnt<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector Gather Bits by Bytes by Doubleword +(define_insn "p8v_vgbbd" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] + UNSPEC_VGBBD))] + "TARGET_P8_VECTOR" + "vgbbd %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) diff --git a/gcc/config/rs6000/driver-rs6000.c b/gcc/config/rs6000/driver-rs6000.c index e608dce184c..1a173d0b1cc 100644 --- a/gcc/config/rs6000/driver-rs6000.c +++ b/gcc/config/rs6000/driver-rs6000.c @@ -167,7 +167,7 @@ elf_platform (void) if (fd != -1) { - char buf[1024]; + static char buf[1024]; ElfW(auxv_t) *av; ssize_t n; diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h index 3f280581feb..79f0f0b5f00 100644 --- a/gcc/config/rs6000/linux64.h +++ b/gcc/config/rs6000/linux64.h @@ -136,8 +136,11 @@ extern int dot_symbols; SET_CMODEL (CMODEL_MEDIUM); \ if (rs6000_current_cmodel != CMODEL_SMALL) \ { \ - TARGET_NO_FP_IN_TOC = 0; \ - TARGET_NO_SUM_IN_TOC = 0; \ + if (!global_options_set.x_TARGET_NO_FP_IN_TOC) \ + TARGET_NO_FP_IN_TOC \ + = rs6000_current_cmodel == CMODEL_MEDIUM; \ + if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) \ + TARGET_NO_SUM_IN_TOC = 0; \ } \ } \ } \ diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 078c9387350..12a602b78c4 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -207,7 +207,7 @@ if (!REG_P (op)) return 0; - if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op))) + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) return 1; return INT_REGNO_P (REGNO (op)); @@ -1121,9 +1121,16 @@ GET_MODE (XEXP (op, 0))), 1")))) +;; Return 1 if OP is a valid comparison operator for "cbranch" instructions. +;; If we're assuming that FP operations cannot generate user-visible traps, +;; then on e500 we can use the ordered-signaling instructions to implement +;; the unordered-quiet FP comparison predicates modulo a reversal. (define_predicate "rs6000_cbranch_operator" (if_then_else (match_test "TARGET_HARD_FLOAT && !TARGET_FPRS") - (match_operand 0 "ordered_comparison_operator") + (if_then_else (match_test "flag_trapping_math") + (match_operand 0 "ordered_comparison_operator") + (ior (match_operand 0 "ordered_comparison_operator") + (match_code ("unlt,unle,ungt,unge")))) (match_operand 0 "comparison_operator"))) ;; Return 1 if OP is a comparison operation that is valid for an SCC insn -- diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 7a80eff8924..1a5a709751d 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1234,10 +1234,23 @@ BU_VSX_OVERLOAD_2 (XXSPLTW, "xxspltw") BU_VSX_OVERLOAD_X (LD, "ld") BU_VSX_OVERLOAD_X (ST, "st") +/* 1 argument VSX instructions added in ISA 2.07. */ +BU_P8V_VSX_1 (XSCVSPDPN, "xscvspdpn", CONST, vsx_xscvspdpn) +BU_P8V_VSX_1 (XSCVDPSPN, "xscvdpspn", CONST, vsx_xscvdpspn) + /* 1 argument altivec instructions added in ISA 2.07. */ BU_P8V_AV_1 (ABS_V2DI, "abs_v2di", CONST, absv2di2) BU_P8V_AV_1 (VUPKHSW, "vupkhsw", CONST, altivec_vupkhsw) BU_P8V_AV_1 (VUPKLSW, "vupklsw", CONST, altivec_vupklsw) +BU_P8V_AV_1 (VCLZB, "vclzb", CONST, clzv16qi2) +BU_P8V_AV_1 (VCLZH, "vclzh", CONST, clzv8hi2) +BU_P8V_AV_1 (VCLZW, "vclzw", CONST, clzv4si2) +BU_P8V_AV_1 (VCLZD, "vclzd", CONST, clzv2di2) +BU_P8V_AV_1 (VPOPCNTB, "vpopcntb", CONST, popcountv16qi2) +BU_P8V_AV_1 (VPOPCNTH, "vpopcnth", CONST, popcountv8hi2) +BU_P8V_AV_1 (VPOPCNTW, "vpopcntw", CONST, popcountv4si2) +BU_P8V_AV_1 (VPOPCNTD, "vpopcntd", CONST, popcountv2di2) +BU_P8V_AV_1 (VGBBD, "vgbbd", CONST, p8v_vgbbd) /* 2 argument altivec instructions added in ISA 2.07. */ BU_P8V_AV_2 (VADDUDM, "vaddudm", CONST, addv2di3) @@ -1245,6 +1258,8 @@ BU_P8V_AV_2 (VMINSD, "vminsd", CONST, sminv2di3) BU_P8V_AV_2 (VMAXSD, "vmaxsd", CONST, smaxv2di3) BU_P8V_AV_2 (VMINUD, "vminud", CONST, uminv2di3) BU_P8V_AV_2 (VMAXUD, "vmaxud", CONST, umaxv2di3) +BU_P8V_AV_2 (VMRGEW, "vmrgew", CONST, p8_vmrgew) +BU_P8V_AV_2 (VMRGOW, "vmrgow", CONST, p8_vmrgow) BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum) BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss) BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus) @@ -1255,6 +1270,27 @@ BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3) BU_P8V_AV_2 (VSRAD, "vsrad", CONST, vashrv2di3) BU_P8V_AV_2 (VSUBUDM, "vsubudm", CONST, subv2di3) +BU_P8V_AV_2 (EQV_V16QI, "eqv_v16qi", CONST, eqvv16qi3) +BU_P8V_AV_2 (EQV_V8HI, "eqv_v8hi", CONST, eqvv8hi3) +BU_P8V_AV_2 (EQV_V4SI, "eqv_v4si", CONST, eqvv4si3) +BU_P8V_AV_2 (EQV_V2DI, "eqv_v2di", CONST, eqvv2di3) +BU_P8V_AV_2 (EQV_V4SF, "eqv_v4sf", CONST, eqvv4sf3) +BU_P8V_AV_2 (EQV_V2DF, "eqv_v2df", CONST, eqvv2df3) + +BU_P8V_AV_2 (NAND_V16QI, "nand_v16qi", CONST, nandv16qi3) +BU_P8V_AV_2 (NAND_V8HI, "nand_v8hi", CONST, nandv8hi3) +BU_P8V_AV_2 (NAND_V4SI, "nand_v4si", CONST, nandv4si3) +BU_P8V_AV_2 (NAND_V2DI, "nand_v2di", CONST, nandv2di3) +BU_P8V_AV_2 (NAND_V4SF, "nand_v4sf", CONST, nandv4sf3) +BU_P8V_AV_2 (NAND_V2DF, "nand_v2df", CONST, nandv2df3) + +BU_P8V_AV_2 (ORC_V16QI, "orc_v16qi", CONST, orcv16qi3) +BU_P8V_AV_2 (ORC_V8HI, "orc_v8hi", CONST, orcv8hi3) +BU_P8V_AV_2 (ORC_V4SI, "orc_v4si", CONST, orcv4si3) +BU_P8V_AV_2 (ORC_V2DI, "orc_v2di", CONST, orcv2di3) +BU_P8V_AV_2 (ORC_V4SF, "orc_v4sf", CONST, orcv4sf3) +BU_P8V_AV_2 (ORC_V2DF, "orc_v2df", CONST, orcv2df3) + /* Vector comparison instructions added in ISA 2.07. */ BU_P8V_AV_2 (VCMPEQUD, "vcmpequd", CONST, vector_eqv2di) BU_P8V_AV_2 (VCMPGTSD, "vcmpgtsd", CONST, vector_gtv2di) @@ -1268,13 +1304,29 @@ BU_P8V_AV_P (VCMPGTUD_P, "vcmpgtud_p", CONST, vector_gtu_v2di_p) /* ISA 2.07 vector overloaded 1 argument functions. */ BU_P8V_OVERLOAD_1 (VUPKHSW, "vupkhsw") BU_P8V_OVERLOAD_1 (VUPKLSW, "vupklsw") +BU_P8V_OVERLOAD_1 (VCLZ, "vclz") +BU_P8V_OVERLOAD_1 (VCLZB, "vclzb") +BU_P8V_OVERLOAD_1 (VCLZH, "vclzh") +BU_P8V_OVERLOAD_1 (VCLZW, "vclzw") +BU_P8V_OVERLOAD_1 (VCLZD, "vclzd") +BU_P8V_OVERLOAD_1 (VPOPCNT, "vpopcnt") +BU_P8V_OVERLOAD_1 (VPOPCNTB, "vpopcntb") +BU_P8V_OVERLOAD_1 (VPOPCNTH, "vpopcnth") +BU_P8V_OVERLOAD_1 (VPOPCNTW, "vpopcntw") +BU_P8V_OVERLOAD_1 (VPOPCNTD, "vpopcntd") +BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd") /* ISA 2.07 vector overloaded 2 argument functions. */ +BU_P8V_OVERLOAD_2 (EQV, "eqv") +BU_P8V_OVERLOAD_2 (NAND, "nand") +BU_P8V_OVERLOAD_2 (ORC, "orc") BU_P8V_OVERLOAD_2 (VADDUDM, "vaddudm") BU_P8V_OVERLOAD_2 (VMAXSD, "vmaxsd") BU_P8V_OVERLOAD_2 (VMAXUD, "vmaxud") BU_P8V_OVERLOAD_2 (VMINSD, "vminsd") BU_P8V_OVERLOAD_2 (VMINUD, "vminud") +BU_P8V_OVERLOAD_2 (VMRGEW, "vmrgew") +BU_P8V_OVERLOAD_2 (VMRGOW, "vmrgow") BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss") BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus") BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum") diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index 801b5bb225d..593b772ebd1 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -3515,6 +3515,404 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + /* Power8 vector overloaded functions. */ + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VPKSDSS, P8V_BUILTIN_VPKSDSS, + RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VPKUDUS, P8V_BUILTIN_VPKUDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VPKSDUS, P8V_BUILTIN_VPKSDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, + RS6000_BTI_V16QI, 0, 0, 0 }, + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, + RS6000_BTI_unsigned_V16QI, 0, 0, 0 }, + /* Crypto builtins. */ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, @@ -3822,11 +4220,20 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, TREE_USED (decl) = 1; TREE_TYPE (decl) = arg1_type; TREE_READONLY (decl) = TYPE_READONLY (arg1_type); - DECL_INITIAL (decl) = arg1; - stmt = build1 (DECL_EXPR, arg1_type, decl); - TREE_ADDRESSABLE (decl) = 1; - SET_EXPR_LOCATION (stmt, loc); - stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); + if (c_dialect_cxx ()) + { + stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1, + NULL_TREE, NULL_TREE); + SET_EXPR_LOCATION (stmt, loc); + } + else + { + DECL_INITIAL (decl) = arg1; + stmt = build1 (DECL_EXPR, arg1_type, decl); + TREE_ADDRESSABLE (decl) = 1; + SET_EXPR_LOCATION (stmt, loc); + stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); + } innerptrtype = build_pointer_type (arg1_inner_type); @@ -3901,11 +4308,20 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, TREE_USED (decl) = 1; TREE_TYPE (decl) = arg1_type; TREE_READONLY (decl) = TYPE_READONLY (arg1_type); - DECL_INITIAL (decl) = arg1; - stmt = build1 (DECL_EXPR, arg1_type, decl); - TREE_ADDRESSABLE (decl) = 1; - SET_EXPR_LOCATION (stmt, loc); - stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); + if (c_dialect_cxx ()) + { + stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1, + NULL_TREE, NULL_TREE); + SET_EXPR_LOCATION (stmt, loc); + } + else + { + DECL_INITIAL (decl) = arg1; + stmt = build1 (DECL_EXPR, arg1_type, decl); + TREE_ADDRESSABLE (decl) = 1; + SET_EXPR_LOCATION (stmt, loc); + stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); + } innerptrtype = build_pointer_type (arg1_inner_type); diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h index e143a4ca203..d528a4fd87a 100644 --- a/gcc/config/rs6000/rs6000-opts.h +++ b/gcc/config/rs6000/rs6000-opts.h @@ -30,21 +30,22 @@ /* Processor type. Order must match cpu attribute in MD file. */ enum processor_type { - PROCESSOR_RS64A, - PROCESSOR_MPCCORE, - PROCESSOR_PPC403, - PROCESSOR_PPC405, - PROCESSOR_PPC440, - PROCESSOR_PPC476, PROCESSOR_PPC601, PROCESSOR_PPC603, PROCESSOR_PPC604, PROCESSOR_PPC604e, PROCESSOR_PPC620, PROCESSOR_PPC630, + PROCESSOR_PPC750, PROCESSOR_PPC7400, PROCESSOR_PPC7450, + + PROCESSOR_PPC403, + PROCESSOR_PPC405, + PROCESSOR_PPC440, + PROCESSOR_PPC476, + PROCESSOR_PPC8540, PROCESSOR_PPC8548, PROCESSOR_PPCE300C2, @@ -53,16 +54,21 @@ enum processor_type PROCESSOR_PPCE500MC64, PROCESSOR_PPCE5500, PROCESSOR_PPCE6500, + PROCESSOR_POWER4, PROCESSOR_POWER5, PROCESSOR_POWER6, PROCESSOR_POWER7, + PROCESSOR_POWER8, + + PROCESSOR_RS64A, + PROCESSOR_MPCCORE, PROCESSOR_CELL, PROCESSOR_PPCA2, - PROCESSOR_TITAN, - PROCESSOR_POWER8 + PROCESSOR_TITAN }; + /* FP processor type. */ enum fpu_type_t { diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index d9bcf1a41ed..02836ecea6d 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -50,6 +50,7 @@ extern rtx rs6000_got_register (rtx); extern rtx find_addr_reg (rtx); extern rtx gen_easy_altivec_constant (rtx); extern const char *output_vec_const_move (rtx *); +extern const char *rs6000_output_move_128bit (rtx *); extern void rs6000_expand_vector_init (rtx, rtx); extern void paired_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); @@ -70,6 +71,8 @@ extern int insvdi_rshift_rlwimi_p (rtx, rtx, rtx); extern int registers_ok_for_quad_peep (rtx, rtx); extern int mems_ok_for_quad_peep (rtx, rtx); extern bool gpr_or_gpr_p (rtx, rtx); +extern bool direct_move_p (rtx, rtx); +extern bool quad_load_store_p (rtx, rtx); extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class); extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index cb6876051d7..55273ab81bd 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -292,6 +292,39 @@ typedef rtx (*gen_2arg_fn_t) (rtx, rtx, rtx); don't link in rs6000-c.c, so we can't call it directly. */ void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT); +/* Simplfy register classes into simpler classifications. We assume + GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range + check for standard register classes (gpr/floating/altivec/vsx) and + floating/vector classes (float/altivec/vsx). */ + +enum rs6000_reg_type { + NO_REG_TYPE, + PSEUDO_REG_TYPE, + GPR_REG_TYPE, + VSX_REG_TYPE, + ALTIVEC_REG_TYPE, + FPR_REG_TYPE, + SPR_REG_TYPE, + CR_REG_TYPE, + SPE_ACC_TYPE, + SPEFSCR_REG_TYPE +}; + +/* Map register class to register type. */ +static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES]; + +/* First/last register type for the 'normal' register types (i.e. general + purpose, floating point, altivec, and VSX registers). */ +#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE) + +#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE) + +/* Direct moves to/from vsx/gpr registers that need an additional register to + do the move. */ +static enum insn_code reload_fpr_gpr[NUM_MACHINE_MODES]; +static enum insn_code reload_gpr_vsx[NUM_MACHINE_MODES]; +static enum insn_code reload_vsx_gpr[NUM_MACHINE_MODES]; + /* Target cpu costs. */ @@ -1042,6 +1075,13 @@ static void rs6000_print_isa_options (FILE *, int, const char *, static void rs6000_print_builtin_options (FILE *, int, const char *, HOST_WIDE_INT); +static enum rs6000_reg_type register_to_reg_type (rtx, bool *); +static bool rs6000_secondary_reload_move (enum rs6000_reg_type, + enum rs6000_reg_type, + enum machine_mode, + secondary_reload_info *, + bool); + /* Hash table stuff for keeping track of TOC entries. */ struct GTY(()) toc_hash_struct @@ -1587,8 +1627,7 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode) return ALTIVEC_REGNO_P (last_regno); } - /* Allow TImode in all VSX registers if the user asked for it. Note, PTImode - can only go in GPRs. */ + /* Allow TImode in all VSX registers if the user asked for it. */ if (mode == TImode && TARGET_VSX_TIMODE && VSX_REGNO_P (regno)) return 1; @@ -2154,6 +2193,36 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS; rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS; + /* Precalculate register class to simpler reload register class. We don't + need all of the register classes that are combinations of different + classes, just the simple ones that have constraint letters. */ + for (c = 0; c < N_REG_CLASSES; c++) + reg_class_to_reg_type[c] = NO_REG_TYPE; + + reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE; + reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE; + reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE; + reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE; + reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE; + reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE; + reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE; + + if (TARGET_VSX) + { + reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE; + reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE; + } + else + { + reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE; + reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE; + } + /* Precalculate vector information, this must be set up before the rs6000_hard_regno_nregs_internal below. */ for (m = 0; m < NUM_MACHINE_MODES; ++m) @@ -2305,7 +2374,15 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) if (TARGET_LFIWZX) rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; - /* Set up the reload helper functions. */ + /* Setup the direct move combinations. */ + for (m = 0; m < NUM_MACHINE_MODES; ++m) + { + reload_fpr_gpr[m] = CODE_FOR_nothing; + reload_gpr_vsx[m] = CODE_FOR_nothing; + reload_vsx_gpr[m] = CODE_FOR_nothing; + } + + /* Set up the reload helper and direct move functions. */ if (TARGET_VSX || TARGET_ALTIVEC) { if (TARGET_64BIT) @@ -2329,11 +2406,47 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_reload[DDmode][0] = CODE_FOR_reload_dd_di_store; rs6000_vector_reload[DDmode][1] = CODE_FOR_reload_dd_di_load; } + if (TARGET_P8_VECTOR) + { + rs6000_vector_reload[SFmode][0] = CODE_FOR_reload_sf_di_store; + rs6000_vector_reload[SFmode][1] = CODE_FOR_reload_sf_di_load; + rs6000_vector_reload[SDmode][0] = CODE_FOR_reload_sd_di_store; + rs6000_vector_reload[SDmode][1] = CODE_FOR_reload_sd_di_load; + } if (TARGET_VSX_TIMODE) { rs6000_vector_reload[TImode][0] = CODE_FOR_reload_ti_di_store; rs6000_vector_reload[TImode][1] = CODE_FOR_reload_ti_di_load; } + if (TARGET_DIRECT_MOVE) + { + if (TARGET_POWERPC64) + { + reload_gpr_vsx[TImode] = CODE_FOR_reload_gpr_from_vsxti; + reload_gpr_vsx[V2DFmode] = CODE_FOR_reload_gpr_from_vsxv2df; + reload_gpr_vsx[V2DImode] = CODE_FOR_reload_gpr_from_vsxv2di; + reload_gpr_vsx[V4SFmode] = CODE_FOR_reload_gpr_from_vsxv4sf; + reload_gpr_vsx[V4SImode] = CODE_FOR_reload_gpr_from_vsxv4si; + reload_gpr_vsx[V8HImode] = CODE_FOR_reload_gpr_from_vsxv8hi; + reload_gpr_vsx[V16QImode] = CODE_FOR_reload_gpr_from_vsxv16qi; + reload_gpr_vsx[SFmode] = CODE_FOR_reload_gpr_from_vsxsf; + + reload_vsx_gpr[TImode] = CODE_FOR_reload_vsx_from_gprti; + reload_vsx_gpr[V2DFmode] = CODE_FOR_reload_vsx_from_gprv2df; + reload_vsx_gpr[V2DImode] = CODE_FOR_reload_vsx_from_gprv2di; + reload_vsx_gpr[V4SFmode] = CODE_FOR_reload_vsx_from_gprv4sf; + reload_vsx_gpr[V4SImode] = CODE_FOR_reload_vsx_from_gprv4si; + reload_vsx_gpr[V8HImode] = CODE_FOR_reload_vsx_from_gprv8hi; + reload_vsx_gpr[V16QImode] = CODE_FOR_reload_vsx_from_gprv16qi; + reload_vsx_gpr[SFmode] = CODE_FOR_reload_vsx_from_gprsf; + } + else + { + reload_fpr_gpr[DImode] = CODE_FOR_reload_fpr_from_gprdi; + reload_fpr_gpr[DDmode] = CODE_FOR_reload_fpr_from_gprdd; + reload_fpr_gpr[DFmode] = CODE_FOR_reload_fpr_from_gprdf; + } + } } else { @@ -2356,6 +2469,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_reload[DDmode][0] = CODE_FOR_reload_dd_si_store; rs6000_vector_reload[DDmode][1] = CODE_FOR_reload_dd_si_load; } + if (TARGET_P8_VECTOR) + { + rs6000_vector_reload[SFmode][0] = CODE_FOR_reload_sf_si_store; + rs6000_vector_reload[SFmode][1] = CODE_FOR_reload_sf_si_load; + rs6000_vector_reload[SDmode][0] = CODE_FOR_reload_sd_si_store; + rs6000_vector_reload[SDmode][1] = CODE_FOR_reload_sd_si_load; + } if (TARGET_VSX_TIMODE) { rs6000_vector_reload[TImode][0] = CODE_FOR_reload_ti_si_store; @@ -2916,6 +3036,16 @@ rs6000_option_override_internal (bool global_init_p) rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE; } + /* The quad memory instructions only works in 64-bit mode. In 32-bit mode, + silently turn off quad memory mode. */ + if (TARGET_QUAD_MEMORY && !TARGET_POWERPC64) + { + if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0) + warning (0, N_("-mquad-memory requires 64-bit mode")); + + rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY; + } + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags); @@ -3042,7 +3172,8 @@ rs6000_option_override_internal (bool global_init_p) /* Place FP constants in the constant pool instead of TOC if section anchors enabled. */ - if (flag_section_anchors) + if (flag_section_anchors + && !global_options_set.x_TARGET_NO_FP_IN_TOC) TARGET_NO_FP_IN_TOC = 1; if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) @@ -4082,6 +4213,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out, enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); switch (fn) { + case BUILT_IN_CLZIMAX: + case BUILT_IN_CLZLL: + case BUILT_IN_CLZL: + case BUILT_IN_CLZ: + if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n) + { + if (out_mode == QImode && out_n == 16) + return rs6000_builtin_decls[P8V_BUILTIN_VCLZB]; + else if (out_mode == HImode && out_n == 8) + return rs6000_builtin_decls[P8V_BUILTIN_VCLZH]; + else if (out_mode == SImode && out_n == 4) + return rs6000_builtin_decls[P8V_BUILTIN_VCLZW]; + else if (out_mode == DImode && out_n == 2) + return rs6000_builtin_decls[P8V_BUILTIN_VCLZD]; + } + break; case BUILT_IN_COPYSIGN: if (VECTOR_UNIT_VSX_P (V2DFmode) && out_mode == DFmode && out_n == 2 @@ -4097,6 +4244,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out, if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)) return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF]; break; + case BUILT_IN_POPCOUNTIMAX: + case BUILT_IN_POPCOUNTLL: + case BUILT_IN_POPCOUNTL: + case BUILT_IN_POPCOUNT: + if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n) + { + if (out_mode == QImode && out_n == 16) + return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB]; + else if (out_mode == HImode && out_n == 8) + return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH]; + else if (out_mode == SImode && out_n == 4) + return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW]; + else if (out_mode == DImode && out_n == 2) + return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD]; + } + break; case BUILT_IN_SQRT: if (VECTOR_UNIT_VSX_P (V2DFmode) && out_mode == DFmode && out_n == 2 @@ -4955,8 +5118,11 @@ rs6000_expand_vector_init (rtx target, rtx vals) { rtx freg = gen_reg_rtx (V4SFmode); rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0)); + rtx cvt = ((TARGET_XSCVDPSPN) + ? gen_vsx_xscvdpspn_scalar (freg, sreg) + : gen_vsx_xscvdpsp_scalar (freg, sreg)); - emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg)); + emit_insn (cvt); emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx)); } else @@ -5339,6 +5505,72 @@ gpr_or_gpr_p (rtx op0, rtx op1) || (REG_P (op1) && INT_REGNO_P (REGNO (op1)))); } +/* Return true if this is a move direct operation between GPR registers and + floating point/VSX registers. */ + +bool +direct_move_p (rtx op0, rtx op1) +{ + int regno0, regno1; + + if (!REG_P (op0) || !REG_P (op1)) + return false; + + if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR) + return false; + + regno0 = REGNO (op0); + regno1 = REGNO (op1); + if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER) + return false; + + if (INT_REGNO_P (regno0)) + return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1); + + else if (INT_REGNO_P (regno1)) + { + if (TARGET_MFPGPR && FP_REGNO_P (regno0)) + return true; + + else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0)) + return true; + } + + return false; +} + +/* Return true if this is a load or store quad operation. */ + +bool +quad_load_store_p (rtx op0, rtx op1) +{ + bool ret; + + if (!TARGET_QUAD_MEMORY) + ret = false; + + else if (REG_P (op0) && MEM_P (op1)) + ret = (quad_int_reg_operand (op0, GET_MODE (op0)) + && quad_memory_operand (op1, GET_MODE (op1)) + && !reg_overlap_mentioned_p (op0, op1)); + + else if (MEM_P (op0) && REG_P (op1)) + ret = (quad_memory_operand (op0, GET_MODE (op0)) + && quad_int_reg_operand (op1, GET_MODE (op1))); + + else + ret = false; + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\n========== quad_load_store, return %s\n", + ret ? "true" : "false"); + debug_rtx (gen_rtx_SET (VOIDmode, op0, op1)); + } + + return ret; +} + /* Given an address, return a constant offset term if one exists. */ static rtx @@ -5474,91 +5706,102 @@ virtual_stack_registers_memory_p (rtx op) && regnum <= LAST_VIRTUAL_POINTER_REGISTER); } -/* Return true if memory accesses to OP are known to never straddle - a 32k boundary. */ +/* Return true if a MODE sized memory accesses to OP plus OFFSET + is known to not straddle a 32k boundary. */ static bool offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset, enum machine_mode mode) { tree decl, type; - unsigned HOST_WIDE_INT dsize, dalign; + unsigned HOST_WIDE_INT dsize, dalign, lsb, mask; if (GET_CODE (op) != SYMBOL_REF) return false; + dsize = GET_MODE_SIZE (mode); decl = SYMBOL_REF_DECL (op); if (!decl) { - if (GET_MODE_SIZE (mode) == 0) + if (dsize == 0) return false; /* -fsection-anchors loses the original SYMBOL_REF_DECL when replacing memory addresses with an anchor plus offset. We could find the decl by rummaging around in the block->objects VEC for the given offset but that seems like too much work. */ - dalign = 1; + dalign = BITS_PER_UNIT; if (SYMBOL_REF_HAS_BLOCK_INFO_P (op) && SYMBOL_REF_ANCHOR_P (op) && SYMBOL_REF_BLOCK (op) != NULL) { struct object_block *block = SYMBOL_REF_BLOCK (op); - HOST_WIDE_INT lsb, mask; - /* Given the alignment of the block.. */ dalign = block->alignment; - mask = dalign / BITS_PER_UNIT - 1; - - /* ..and the combined offset of the anchor and any offset - to this block object.. */ offset += SYMBOL_REF_BLOCK_OFFSET (op); - lsb = offset & -offset; + } + else if (CONSTANT_POOL_ADDRESS_P (op)) + { + /* It would be nice to have get_pool_align().. */ + enum machine_mode cmode = get_pool_mode (op); - /* ..find how many bits of the alignment we know for the - object. */ - mask &= lsb - 1; - dalign = mask + 1; + dalign = GET_MODE_ALIGNMENT (cmode); } - return dalign >= GET_MODE_SIZE (mode); } - - if (DECL_P (decl)) + else if (DECL_P (decl)) { - if (TREE_CODE (decl) == FUNCTION_DECL) - return true; + dalign = DECL_ALIGN (decl); - if (!DECL_SIZE_UNIT (decl)) - return false; + if (dsize == 0) + { + /* Allow BLKmode when the entire object is known to not + cross a 32k boundary. */ + if (!DECL_SIZE_UNIT (decl)) + return false; - if (!host_integerp (DECL_SIZE_UNIT (decl), 1)) - return false; + if (!host_integerp (DECL_SIZE_UNIT (decl), 1)) + return false; - dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1); - if (dsize > 32768) - return false; + dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1); + if (dsize > 32768) + return false; - dalign = DECL_ALIGN_UNIT (decl); - return dalign >= dsize; + return dalign / BITS_PER_UNIT >= dsize; + } } + else + { + type = TREE_TYPE (decl); - type = TREE_TYPE (decl); + dalign = TYPE_ALIGN (type); + if (CONSTANT_CLASS_P (decl)) + dalign = CONSTANT_ALIGNMENT (decl, dalign); + else + dalign = DATA_ALIGNMENT (decl, dalign); - if (TREE_CODE (decl) == STRING_CST) - dsize = TREE_STRING_LENGTH (decl); - else if (TYPE_SIZE_UNIT (type) - && host_integerp (TYPE_SIZE_UNIT (type), 1)) - dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1); - else - return false; - if (dsize > 32768) - return false; + if (dsize == 0) + { + /* BLKmode, check the entire object. */ + if (TREE_CODE (decl) == STRING_CST) + dsize = TREE_STRING_LENGTH (decl); + else if (TYPE_SIZE_UNIT (type) + && host_integerp (TYPE_SIZE_UNIT (type), 1)) + dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1); + else + return false; + if (dsize > 32768) + return false; + + return dalign / BITS_PER_UNIT >= dsize; + } + } + + /* Find how many bits of the alignment we know for this access. */ + mask = dalign / BITS_PER_UNIT - 1; + lsb = offset & -offset; + mask &= lsb - 1; + dalign = mask + 1; - dalign = TYPE_ALIGN (type); - if (CONSTANT_CLASS_P (decl)) - dalign = CONSTANT_ALIGNMENT (decl, dalign); - else - dalign = DATA_ALIGNMENT (decl, dalign); - dalign /= BITS_PER_UNIT; return dalign >= dsize; } @@ -5846,8 +6089,11 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx) return force_reg (Pmode, XEXP (x, 0)); + /* For TImode with load/store quad, restrict addresses to just a single + pointer, so it works with both GPRs and VSX registers. */ /* Make sure both operands are registers. */ - else if (GET_CODE (x) == PLUS) + else if (GET_CODE (x) == PLUS + && (mode != TImode || !TARGET_QUAD_MEMORY)) return gen_rtx_PLUS (Pmode, force_reg (Pmode, XEXP (x, 0)), force_reg (Pmode, XEXP (x, 1))); @@ -6504,7 +6750,6 @@ use_toc_relative_ref (rtx sym) && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym), get_pool_mode (sym))) || (TARGET_CMODEL == CMODEL_MEDIUM - && !CONSTANT_POOL_ADDRESS_P (sym) && SYMBOL_REF_LOCAL_P (sym))); } @@ -6802,6 +7047,13 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict) if (reg_offset_p && legitimate_constant_pool_address_p (x, mode, reg_ok_strict)) return 1; + /* For TImode, if we have load/store quad, only allow register indirect + addresses. This will allow the values to go in either GPRs or VSX + registers without reloading. The vector types would tend to go into VSX + registers, so we allow REG+REG, while TImode seems somewhat split, in that + some uses are GPR based, and some VSX based. */ + if (mode == TImode && TARGET_QUAD_MEMORY) + return 0; /* If not REG_OK_STRICT (before reload) let pass any stack offset. */ if (! reg_ok_strict && reg_offset_p @@ -9314,20 +9566,17 @@ setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode, if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG && cfun->va_list_gpr_size) { - int nregs = GP_ARG_NUM_REG - first_reg_offset; + int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset; if (va_list_gpr_counter_field) - { - /* V4 va_list_gpr_size counts number of registers needed. */ - if (nregs > cfun->va_list_gpr_size) - nregs = cfun->va_list_gpr_size; - } + /* V4 va_list_gpr_size counts number of registers needed. */ + n_gpr = cfun->va_list_gpr_size; else - { - /* char * va_list instead counts number of bytes needed. */ - if (nregs > cfun->va_list_gpr_size / reg_size) - nregs = cfun->va_list_gpr_size / reg_size; - } + /* char * va_list instead counts number of bytes needed. */ + n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size; + + if (nregs > n_gpr) + nregs = n_gpr; mem = gen_rtx_MEM (BLKmode, plus_constant (Pmode, save_area, @@ -12857,6 +13106,7 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0, { /* unsigned 1 argument functions. */ case CRYPTO_BUILTIN_VSBOX: + case P8V_BUILTIN_VGBBD: h.uns_p[0] = 1; h.uns_p[1] = 1; break; @@ -13947,29 +14197,226 @@ rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) return NULL_TREE; } -enum reload_reg_type { - GPR_REGISTER_TYPE, - VECTOR_REGISTER_TYPE, - OTHER_REGISTER_TYPE -}; +/* Classify a register type. Because the FMRGOW/FMRGEW instructions only work + on traditional floating point registers, and the VMRGOW/VMRGEW instructions + only work on the traditional altivec registers, note if an altivec register + was choosen. */ -static enum reload_reg_type -rs6000_reload_register_type (enum reg_class rclass) +static enum rs6000_reg_type +register_to_reg_type (rtx reg, bool *is_altivec) { - switch (rclass) + HOST_WIDE_INT regno; + enum reg_class rclass; + + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + + if (!REG_P (reg)) + return NO_REG_TYPE; + + regno = REGNO (reg); + if (regno >= FIRST_PSEUDO_REGISTER) { - case GENERAL_REGS: - case BASE_REGS: - return GPR_REGISTER_TYPE; + if (!lra_in_progress && !reload_in_progress && !reload_completed) + return PSEUDO_REG_TYPE; - case FLOAT_REGS: - case ALTIVEC_REGS: - case VSX_REGS: - return VECTOR_REGISTER_TYPE; + regno = true_regnum (reg); + if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER) + return PSEUDO_REG_TYPE; + } - default: - return OTHER_REGISTER_TYPE; + gcc_assert (regno >= 0); + + if (is_altivec && ALTIVEC_REGNO_P (regno)) + *is_altivec = true; + + rclass = rs6000_regno_regclass[regno]; + return reg_class_to_reg_type[(int)rclass]; +} + +/* Helper function for rs6000_secondary_reload to return true if a move to a + different register classe is really a simple move. */ + +static bool +rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, + enum rs6000_reg_type from_type, + enum machine_mode mode) +{ + int size; + + /* Add support for various direct moves available. In this function, we only + look at cases where we don't need any extra registers, and one or more + simple move insns are issued. At present, 32-bit integers are not allowed + in FPR/VSX registers. Single precision binary floating is not a simple + move because we need to convert to the single precision memory layout. + The 4-byte SDmode can be moved. */ + size = GET_MODE_SIZE (mode); + if (TARGET_DIRECT_MOVE + && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8)) + && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) + return true; + + else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8 + && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE) + || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE))) + return true; + + else if ((size == 4 || (TARGET_POWERPC64 && size == 8)) + && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE) + || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE))) + return true; + + return false; +} + +/* Power8 helper function for rs6000_secondary_reload, handle all of the + special direct moves that involve allocating an extra register, return the + insn code of the helper function if there is such a function or + CODE_FOR_nothing if not. */ + +static bool +rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, + enum rs6000_reg_type from_type, + enum machine_mode mode, + secondary_reload_info *sri, + bool altivec_p) +{ + bool ret = false; + enum insn_code icode = CODE_FOR_nothing; + int cost = 0; + int size = GET_MODE_SIZE (mode); + + if (TARGET_POWERPC64) + { + if (size == 16) + { + /* Handle moving 128-bit values from GPRs to VSX point registers on + power8 when running in 64-bit mode using XXPERMDI to glue the two + 64-bit values back together. */ + if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) + { + cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ + icode = reload_vsx_gpr[(int)mode]; + } + + /* Handle moving 128-bit values from VSX point registers to GPRs on + power8 when running in 64-bit mode using XXPERMDI to get access to the + bottom 64-bit value. */ + else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + { + cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */ + icode = reload_gpr_vsx[(int)mode]; + } + } + + else if (mode == SFmode) + { + if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + { + cost = 3; /* xscvdpspn, mfvsrd, and. */ + icode = reload_gpr_vsx[(int)mode]; + } + + else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) + { + cost = 2; /* mtvsrz, xscvspdpn. */ + icode = reload_vsx_gpr[(int)mode]; + } + } } + + if (TARGET_POWERPC64 && size == 16) + { + /* Handle moving 128-bit values from GPRs to VSX point registers on + power8 when running in 64-bit mode using XXPERMDI to glue the two + 64-bit values back together. */ + if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) + { + cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ + icode = reload_vsx_gpr[(int)mode]; + } + + /* Handle moving 128-bit values from VSX point registers to GPRs on + power8 when running in 64-bit mode using XXPERMDI to get access to the + bottom 64-bit value. */ + else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + { + cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */ + icode = reload_gpr_vsx[(int)mode]; + } + } + + else if (!TARGET_POWERPC64 && size == 8) + { + /* Handle moving 64-bit values from GPRs to floating point registers on + power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit + values back together. Altivec register classes must be handled + specially since a different instruction is used, and the secondary + reload support requires a single instruction class in the scratch + register constraint. However, right now TFmode is not allowed in + Altivec registers, so the pattern will never match. */ + if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p) + { + cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */ + icode = reload_fpr_gpr[(int)mode]; + } + } + + if (icode != CODE_FOR_nothing) + { + ret = true; + if (sri) + { + sri->icode = icode; + sri->extra_cost = cost; + } + } + + return ret; +} + +/* Return whether a move between two register classes can be done either + directly (simple move) or via a pattern that uses a single extra temporary + (using power8's direct move in this case. */ + +static bool +rs6000_secondary_reload_move (enum rs6000_reg_type to_type, + enum rs6000_reg_type from_type, + enum machine_mode mode, + secondary_reload_info *sri, + bool altivec_p) +{ + /* Fall back to load/store reloads if either type is not a register. */ + if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE) + return false; + + /* If we haven't allocated registers yet, assume the move can be done for the + standard register types. */ + if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE) + || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type)) + || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type))) + return true; + + /* Moves to the same set of registers is a simple move for non-specialized + registers. */ + if (to_type == from_type && IS_STD_REG_TYPE (to_type)) + return true; + + /* Check whether a simple move can be done directly. */ + if (rs6000_secondary_reload_simple_move (to_type, from_type, mode)) + { + if (sri) + { + sri->icode = CODE_FOR_nothing; + sri->extra_cost = 0; + } + return true; + } + + /* Now check if we can do it in a few steps. */ + return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri, + altivec_p); } /* Inform reload about cases where moving X with a mode MODE to a register in @@ -13995,11 +14442,32 @@ rs6000_secondary_reload (bool in_p, bool default_p = false; sri->icode = CODE_FOR_nothing; - - /* Convert vector loads and stores into gprs to use an additional base - register. */ icode = rs6000_vector_reload[mode][in_p != false]; - if (icode != CODE_FOR_nothing) + + if (REG_P (x) || register_operand (x, mode)) + { + enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass]; + bool altivec_p = (rclass == ALTIVEC_REGS); + enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p); + + if (!in_p) + { + enum rs6000_reg_type exchange = to_type; + to_type = from_type; + from_type = exchange; + } + + if (rs6000_secondary_reload_move (to_type, from_type, mode, sri, + altivec_p)) + { + icode = (enum insn_code)sri->icode; + default_p = false; + ret = NO_REGS; + } + } + + /* Handle vector moves with reload helper functions. */ + if (ret == ALL_REGS && icode != CODE_FOR_nothing) { ret = NO_REGS; sri->icode = CODE_FOR_nothing; @@ -14011,12 +14479,21 @@ rs6000_secondary_reload (bool in_p, /* Loads to and stores from gprs can do reg+offset, and wouldn't need an extra register in that case, but it would need an extra - register if the addressing is reg+reg or (reg+reg)&(-16). */ + register if the addressing is reg+reg or (reg+reg)&(-16). Special + case load/store quad. */ if (rclass == GENERAL_REGS || rclass == BASE_REGS) { - if (!legitimate_indirect_address_p (addr, false) - && !rs6000_legitimate_offset_address_p (PTImode, addr, - false, true)) + if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY + && GET_MODE_SIZE (mode) == 16 + && quad_memory_operand (x, mode)) + { + sri->icode = icode; + sri->extra_cost = 2; + } + + else if (!legitimate_indirect_address_p (addr, false) + && !rs6000_legitimate_offset_address_p (PTImode, addr, + false, true)) { sri->icode = icode; /* account for splitting the loads, and converting the @@ -14030,7 +14507,7 @@ rs6000_secondary_reload (bool in_p, else if ((rclass == FLOAT_REGS || rclass == NO_REGS) && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) && (legitimate_indirect_address_p (addr, false) - || legitimate_indirect_address_p (XEXP (addr, 0), false) + || legitimate_indirect_address_p (addr, false) || rs6000_legitimate_offset_address_p (mode, addr, false, true))) @@ -14082,12 +14559,12 @@ rs6000_secondary_reload (bool in_p, else { enum reg_class xclass = REGNO_REG_CLASS (regno); - enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass); - enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass); + enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass]; + enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass]; /* If memory is needed, use default_secondary_reload to create the stack slot. */ - if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE) + if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1)) default_p = true; else ret = NO_REGS; @@ -14097,7 +14574,7 @@ rs6000_secondary_reload (bool in_p, default_p = true; } else if (TARGET_POWERPC64 - && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE + && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE && MEM_P (x) && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD) { @@ -14136,7 +14613,7 @@ rs6000_secondary_reload (bool in_p, default_p = true; } else if (!TARGET_POWERPC64 - && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE + && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE && MEM_P (x) && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) { @@ -14699,42 +15176,25 @@ rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass) set and vice versa. */ static bool -rs6000_secondary_memory_needed (enum reg_class class1, - enum reg_class class2, +rs6000_secondary_memory_needed (enum reg_class from_class, + enum reg_class to_class, enum machine_mode mode) { - if (class1 == class2) - return false; - - /* Under VSX, there are 3 register classes that values could be in (VSX_REGS, - ALTIVEC_REGS, and FLOAT_REGS). We don't need to use memory to copy - between these classes. But we need memory for other things that can go in - FLOAT_REGS like SFmode. */ - if (TARGET_VSX - && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode)) - && (class1 == VSX_REGS || class1 == ALTIVEC_REGS - || class1 == FLOAT_REGS)) - return (class2 != VSX_REGS && class2 != ALTIVEC_REGS - && class2 != FLOAT_REGS); + enum rs6000_reg_type from_type, to_type; + bool altivec_p = ((from_class == ALTIVEC_REGS) + || (to_class == ALTIVEC_REGS)); - if (class1 == VSX_REGS || class2 == VSX_REGS) - return true; - - if (class1 == FLOAT_REGS - && (!TARGET_MFPGPR || !TARGET_POWERPC64 - || ((mode != DFmode) - && (mode != DDmode) - && (mode != DImode)))) - return true; + /* If a simple/direct move is available, we don't need secondary memory */ + from_type = reg_class_to_reg_type[(int)from_class]; + to_type = reg_class_to_reg_type[(int)to_class]; - if (class2 == FLOAT_REGS - && (!TARGET_MFPGPR || !TARGET_POWERPC64 - || ((mode != DFmode) - && (mode != DDmode) - && (mode != DImode)))) - return true; + if (rs6000_secondary_reload_move (to_type, from_type, mode, + (secondary_reload_info *)0, altivec_p)) + return false; - if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS) + /* If we have a floating point or vector register class, we need to use + memory to transfer the data. */ + if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type)) return true; return false; @@ -14742,17 +15202,19 @@ rs6000_secondary_memory_needed (enum reg_class class1, /* Debug version of rs6000_secondary_memory_needed. */ static bool -rs6000_debug_secondary_memory_needed (enum reg_class class1, - enum reg_class class2, +rs6000_debug_secondary_memory_needed (enum reg_class from_class, + enum reg_class to_class, enum machine_mode mode) { - bool ret = rs6000_secondary_memory_needed (class1, class2, mode); + bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode); fprintf (stderr, - "rs6000_secondary_memory_needed, return: %s, class1 = %s, " - "class2 = %s, mode = %s\n", - ret ? "true" : "false", reg_class_names[class1], - reg_class_names[class2], GET_MODE_NAME (mode)); + "rs6000_secondary_memory_needed, return: %s, from_class = %s, " + "to_class = %s, mode = %s\n", + ret ? "true" : "false", + reg_class_names[from_class], + reg_class_names[to_class], + GET_MODE_NAME (mode)); return ret; } @@ -14958,6 +15420,170 @@ rs6000_debug_cannot_change_mode_class (enum machine_mode from, return ret; } +/* Return a string to do a move operation of 128 bits of data. */ + +const char * +rs6000_output_move_128bit (rtx operands[]) +{ + rtx dest = operands[0]; + rtx src = operands[1]; + enum machine_mode mode = GET_MODE (dest); + int dest_regno; + int src_regno; + bool dest_gpr_p, dest_fp_p, dest_av_p, dest_vsx_p; + bool src_gpr_p, src_fp_p, src_av_p, src_vsx_p; + + if (REG_P (dest)) + { + dest_regno = REGNO (dest); + dest_gpr_p = INT_REGNO_P (dest_regno); + dest_fp_p = FP_REGNO_P (dest_regno); + dest_av_p = ALTIVEC_REGNO_P (dest_regno); + dest_vsx_p = dest_fp_p | dest_av_p; + } + else + { + dest_regno = -1; + dest_gpr_p = dest_fp_p = dest_av_p = dest_vsx_p = false; + } + + if (REG_P (src)) + { + src_regno = REGNO (src); + src_gpr_p = INT_REGNO_P (src_regno); + src_fp_p = FP_REGNO_P (src_regno); + src_av_p = ALTIVEC_REGNO_P (src_regno); + src_vsx_p = src_fp_p | src_av_p; + } + else + { + src_regno = -1; + src_gpr_p = src_fp_p = src_av_p = src_vsx_p = false; + } + + /* Register moves. */ + if (dest_regno >= 0 && src_regno >= 0) + { + if (dest_gpr_p) + { + if (src_gpr_p) + return "#"; + + else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p) + return "#"; + } + + else if (TARGET_VSX && dest_vsx_p) + { + if (src_vsx_p) + return "xxlor %x0,%x1,%x1"; + + else if (TARGET_DIRECT_MOVE && src_gpr_p) + return "#"; + } + + else if (TARGET_ALTIVEC && dest_av_p && src_av_p) + return "vor %0,%1,%1"; + + else if (dest_fp_p && src_fp_p) + return "#"; + } + + /* Loads. */ + else if (dest_regno >= 0 && MEM_P (src)) + { + if (dest_gpr_p) + { + if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0 + && quad_memory_operand (src, mode) + && !reg_overlap_mentioned_p (dest, src)) + { + /* lq/stq only has DQ-form, so avoid X-form that %y produces. */ + return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1"; + } + else + return "#"; + } + + else if (TARGET_ALTIVEC && dest_av_p + && altivec_indexed_or_indirect_operand (src, mode)) + return "lvx %0,%y1"; + + else if (TARGET_VSX && dest_vsx_p) + { + if (mode == V16QImode || mode == V8HImode || mode == V4SImode) + return "lxvw4x %x0,%y1"; + else + return "lxvd2x %x0,%y1"; + } + + else if (TARGET_ALTIVEC && dest_av_p) + return "lvx %0,%y1"; + + else if (dest_fp_p) + return "#"; + } + + /* Stores. */ + else if (src_regno >= 0 && MEM_P (dest)) + { + if (src_gpr_p) + { + if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0 + && quad_memory_operand (dest, mode)) + { + /* lq/stq only has DQ-form, so avoid X-form that %y produces. */ + return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0"; + } + else + return "#"; + } + + else if (TARGET_ALTIVEC && src_av_p + && altivec_indexed_or_indirect_operand (src, mode)) + return "stvx %1,%y0"; + + else if (TARGET_VSX && src_vsx_p) + { + if (mode == V16QImode || mode == V8HImode || mode == V4SImode) + return "stxvw4x %x1,%y0"; + else + return "stxvd2x %x1,%y0"; + } + + else if (TARGET_ALTIVEC && src_av_p) + return "stvx %1,%y0"; + + else if (src_fp_p) + return "#"; + } + + /* Constants. */ + else if (dest_regno >= 0 + && (GET_CODE (src) == CONST_INT + || GET_CODE (src) == CONST_DOUBLE + || GET_CODE (src) == CONST_VECTOR)) + { + if (dest_gpr_p) + return "#"; + + else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode)) + return "xxlxor %x0,%x0,%x0"; + + else if (TARGET_ALTIVEC && dest_av_p) + return output_vec_const_move (operands); + } + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\n===== Bad 128 bit move:\n"); + debug_rtx (gen_rtx_SET (VOIDmode, dest, src)); + } + + gcc_unreachable (); +} + + /* Given a comparison operation, return the bit number in CCR to test. We know this is a valid comparison. @@ -15674,11 +16300,6 @@ print_operand (FILE *file, rtx x, int code) TOCs and the like. */ gcc_assert (GET_CODE (x) == SYMBOL_REF); - /* Mark the decl as referenced so that cgraph will output the - function. */ - if (SYMBOL_REF_DECL (x)) - mark_decl_referenced (SYMBOL_REF_DECL (x)); - /* For macho, check to see if we need a stub. */ if (TARGET_MACHO) { @@ -16087,16 +16708,41 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode) { rtx cmp, or_result, compare_result2; enum machine_mode op_mode = GET_MODE (op0); + bool reverse_p; if (op_mode == VOIDmode) op_mode = GET_MODE (op1); + /* First reverse the condition codes that aren't directly supported. */ + switch (code) + { + case NE: + case UNLT: + case UNLE: + case UNGT: + case UNGE: + code = reverse_condition_maybe_unordered (code); + reverse_p = true; + break; + + case EQ: + case LT: + case LE: + case GT: + case GE: + reverse_p = false; + break; + + default: + gcc_unreachable (); + } + /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only. This explains the following mess. */ switch (code) { - case EQ: case UNEQ: case NE: case LTGT: + case EQ: switch (op_mode) { case SFmode: @@ -16122,7 +16768,8 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode) } break; - case GT: case GTU: case UNGT: case UNGE: case GE: case GEU: + case GT: + case GE: switch (op_mode) { case SFmode: @@ -16148,7 +16795,8 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode) } break; - case LT: case LTU: case UNLT: case UNLE: case LE: case LEU: + case LT: + case LE: switch (op_mode) { case SFmode: @@ -16173,24 +16821,16 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode) gcc_unreachable (); } break; + default: gcc_unreachable (); } /* Synthesize LE and GE from LT/GT || EQ. */ - if (code == LE || code == GE || code == LEU || code == GEU) + if (code == LE || code == GE) { emit_insn (cmp); - switch (code) - { - case LE: code = LT; break; - case GE: code = GT; break; - case LEU: code = LT; break; - case GEU: code = GT; break; - default: gcc_unreachable (); - } - compare_result2 = gen_reg_rtx (CCFPmode); /* Do the EQ. */ @@ -16217,23 +16857,18 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode) default: gcc_unreachable (); } + emit_insn (cmp); /* OR them together. */ or_result = gen_reg_rtx (CCFPmode); cmp = gen_e500_cr_ior_compare (or_result, compare_result, - compare_result2); + compare_result2); compare_result = or_result; - code = EQ; - } - else - { - if (code == NE || code == LTGT) - code = NE; - else - code = EQ; } + code = reverse_p ? NE : EQ; + emit_insn (cmp); } else @@ -17113,7 +17748,8 @@ emit_unlikely_jump (rtx cond, rtx label) } /* A subroutine of the atomic operation splitters. Emit a load-locked - instruction in MODE. */ + instruction in MODE. For QI/HImode, possibly use a pattern than includes + the zero_extend operation. */ static void emit_load_locked (enum machine_mode mode, rtx reg, rtx mem) @@ -17122,12 +17758,26 @@ emit_load_locked (enum machine_mode mode, rtx reg, rtx mem) switch (mode) { + case QImode: + fn = gen_load_lockedqi; + break; + case HImode: + fn = gen_load_lockedhi; + break; case SImode: - fn = gen_load_lockedsi; + if (GET_MODE (mem) == QImode) + fn = gen_load_lockedqi_si; + else if (GET_MODE (mem) == HImode) + fn = gen_load_lockedhi_si; + else + fn = gen_load_lockedsi; break; case DImode: fn = gen_load_lockeddi; break; + case TImode: + fn = gen_load_lockedti; + break; default: gcc_unreachable (); } @@ -17144,12 +17794,21 @@ emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val) switch (mode) { + case QImode: + fn = gen_store_conditionalqi; + break; + case HImode: + fn = gen_store_conditionalhi; + break; case SImode: fn = gen_store_conditionalsi; break; case DImode: fn = gen_store_conditionaldi; break; + case TImode: + fn = gen_store_conditionalti; + break; default: gcc_unreachable (); } @@ -17246,8 +17905,9 @@ rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask) shift = gen_reg_rtx (SImode); addr = gen_lowpart (SImode, addr); emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask))); - shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask), - shift, 1, OPTAB_LIB_WIDEN); + if (WORDS_BIG_ENDIAN) + shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask), + shift, 1, OPTAB_LIB_WIDEN); *pshift = shift; /* Mask for insertion. */ @@ -17295,7 +17955,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[]) { rtx boolval, retval, mem, oldval, newval, cond; rtx label1, label2, x, mask, shift; - enum machine_mode mode; + enum machine_mode mode, orig_mode; enum memmodel mod_s, mod_f; bool is_weak; @@ -17307,22 +17967,29 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[]) is_weak = (INTVAL (operands[5]) != 0); mod_s = (enum memmodel) INTVAL (operands[6]); mod_f = (enum memmodel) INTVAL (operands[7]); - mode = GET_MODE (mem); + orig_mode = mode = GET_MODE (mem); mask = shift = NULL_RTX; if (mode == QImode || mode == HImode) { - mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); - - /* Shift and mask OLDVAL into position with the word. */ + /* Before power8, we didn't have access to lbarx/lharx, so generate a + lwarx and shift/mask operations. With power8, we need to do the + comparison in SImode, but the store is still done in QI/HImode. */ oldval = convert_modes (SImode, mode, oldval, 1); - oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift, - NULL_RTX, 1, OPTAB_LIB_WIDEN); - /* Shift and mask NEWVAL into position within the word. */ - newval = convert_modes (SImode, mode, newval, 1); - newval = expand_simple_binop (SImode, ASHIFT, newval, shift, - NULL_RTX, 1, OPTAB_LIB_WIDEN); + if (!TARGET_SYNC_HI_QI) + { + mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); + + /* Shift and mask OLDVAL into position with the word. */ + oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift, + NULL_RTX, 1, OPTAB_LIB_WIDEN); + + /* Shift and mask NEWVAL into position within the word. */ + newval = convert_modes (SImode, mode, newval, 1); + newval = expand_simple_binop (SImode, ASHIFT, newval, shift, + NULL_RTX, 1, OPTAB_LIB_WIDEN); + } /* Prepare to adjust the return value. */ retval = gen_reg_rtx (SImode); @@ -17351,7 +18018,25 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[]) } cond = gen_reg_rtx (CCmode); - x = gen_rtx_COMPARE (CCmode, x, oldval); + /* If we have TImode, synthesize a comparison. */ + if (mode != TImode) + x = gen_rtx_COMPARE (CCmode, x, oldval); + else + { + rtx xor1_result = gen_reg_rtx (DImode); + rtx xor2_result = gen_reg_rtx (DImode); + rtx or_result = gen_reg_rtx (DImode); + rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0); + rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8); + rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0); + rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8); + + emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0)); + emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1)); + emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result)); + x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx); + } + emit_insn (gen_rtx_SET (VOIDmode, cond, x)); x = gen_rtx_NE (VOIDmode, cond, const0_rtx); @@ -17361,7 +18046,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[]) if (mask) x = rs6000_mask_atomic_subword (retval, newval, mask); - emit_store_conditional (mode, cond, mem, x); + emit_store_conditional (orig_mode, cond, mem, x); if (!is_weak) { @@ -17379,6 +18064,8 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[]) if (shift) rs6000_finish_atomic_subword (operands[1], retval, shift); + else if (mode != GET_MODE (operands[1])) + convert_move (operands[1], retval, 1); /* In all cases, CR0 contains EQ on success, and NE on failure. */ x = gen_rtx_EQ (SImode, cond, const0_rtx); @@ -17402,7 +18089,7 @@ rs6000_expand_atomic_exchange (rtx operands[]) mode = GET_MODE (mem); mask = shift = NULL_RTX; - if (mode == QImode || mode == HImode) + if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode)) { mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); @@ -17451,53 +18138,70 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, { enum memmodel model = (enum memmodel) INTVAL (model_rtx); enum machine_mode mode = GET_MODE (mem); + enum machine_mode store_mode = mode; rtx label, x, cond, mask, shift; rtx before = orig_before, after = orig_after; mask = shift = NULL_RTX; + /* On power8, we want to use SImode for the operation. On previous systems, + use the operation in a subword and shift/mask to get the proper byte or + halfword. */ if (mode == QImode || mode == HImode) { - mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); - - /* Shift and mask VAL into position with the word. */ - val = convert_modes (SImode, mode, val, 1); - val = expand_simple_binop (SImode, ASHIFT, val, shift, - NULL_RTX, 1, OPTAB_LIB_WIDEN); + if (TARGET_SYNC_HI_QI) + { + val = convert_modes (SImode, mode, val, 1); - switch (code) + /* Prepare to adjust the return value. */ + before = gen_reg_rtx (SImode); + if (after) + after = gen_reg_rtx (SImode); + mode = SImode; + } + else { - case IOR: - case XOR: - /* We've already zero-extended VAL. That is sufficient to - make certain that it does not affect other bits. */ - mask = NULL; - break; + mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); - case AND: - /* If we make certain that all of the other bits in VAL are - set, that will be sufficient to not affect other bits. */ - x = gen_rtx_NOT (SImode, mask); - x = gen_rtx_IOR (SImode, x, val); - emit_insn (gen_rtx_SET (VOIDmode, val, x)); - mask = NULL; - break; + /* Shift and mask VAL into position with the word. */ + val = convert_modes (SImode, mode, val, 1); + val = expand_simple_binop (SImode, ASHIFT, val, shift, + NULL_RTX, 1, OPTAB_LIB_WIDEN); - case NOT: - case PLUS: - case MINUS: - /* These will all affect bits outside the field and need - adjustment via MASK within the loop. */ - break; + switch (code) + { + case IOR: + case XOR: + /* We've already zero-extended VAL. That is sufficient to + make certain that it does not affect other bits. */ + mask = NULL; + break; - default: - gcc_unreachable (); - } + case AND: + /* If we make certain that all of the other bits in VAL are + set, that will be sufficient to not affect other bits. */ + x = gen_rtx_NOT (SImode, mask); + x = gen_rtx_IOR (SImode, x, val); + emit_insn (gen_rtx_SET (VOIDmode, val, x)); + mask = NULL; + break; - /* Prepare to adjust the return value. */ - before = gen_reg_rtx (SImode); - if (after) - after = gen_reg_rtx (SImode); - mode = SImode; + case NOT: + case PLUS: + case MINUS: + /* These will all affect bits outside the field and need + adjustment via MASK within the loop. */ + break; + + default: + gcc_unreachable (); + } + + /* Prepare to adjust the return value. */ + before = gen_reg_rtx (SImode); + if (after) + after = gen_reg_rtx (SImode); + store_mode = mode = SImode; + } } mem = rs6000_pre_atomic_barrier (mem, model); @@ -17530,9 +18234,11 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, NULL_RTX, 1, OPTAB_LIB_WIDEN); x = rs6000_mask_atomic_subword (before, x, mask); } + else if (store_mode != mode) + x = convert_modes (store_mode, mode, x, 1); cond = gen_reg_rtx (CCmode); - emit_store_conditional (mode, cond, mem, x); + emit_store_conditional (store_mode, cond, mem, x); x = gen_rtx_NE (VOIDmode, cond, const0_rtx); emit_unlikely_jump (x, label); @@ -17541,11 +18247,22 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, if (shift) { + /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and + then do the calcuations in a SImode register. */ if (orig_before) rs6000_finish_atomic_subword (orig_before, before, shift); if (orig_after) rs6000_finish_atomic_subword (orig_after, after, shift); } + else if (store_mode != mode) + { + /* QImode/HImode on machines with lbarx/lharx where we do the native + operation and then do the calcuations in a SImode register. */ + if (orig_before) + convert_move (orig_before, before, 1); + if (orig_after) + convert_move (orig_after, after, 1); + } else if (orig_after && after != orig_after) emit_move_insn (orig_after, after); } @@ -22560,7 +23277,10 @@ output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode) fputs (DOUBLE_INT_ASM_OP, file); else fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff); - fprintf (file, "0x%lx00000000\n", l & 0xffffffff); + if (WORDS_BIG_ENDIAN) + fprintf (file, "0x%lx00000000\n", l & 0xffffffff); + else + fprintf (file, "0x%lx\n", l & 0xffffffff); return; } else @@ -27197,26 +27917,31 @@ bool altivec_expand_vec_perm_const (rtx operands[4]) { struct altivec_perm_insn { + HOST_WIDE_INT mask; enum insn_code impl; unsigned char perm[16]; }; static const struct altivec_perm_insn patterns[] = { - { CODE_FOR_altivec_vpkuhum, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum, { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, - { CODE_FOR_altivec_vpkuwum, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum, { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, - { CODE_FOR_altivec_vmrghb, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghb, { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, - { CODE_FOR_altivec_vmrghh, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghh, { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, - { CODE_FOR_altivec_vmrghw, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghw, { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, - { CODE_FOR_altivec_vmrglb, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglb, { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, - { CODE_FOR_altivec_vmrglh, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglh, { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, - { CODE_FOR_altivec_vmrglw, - { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } } + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglw, + { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, + { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew, + { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } }, + { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow, + { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } } }; unsigned int i, j, elt, which; @@ -27316,6 +28041,9 @@ altivec_expand_vec_perm_const (rtx operands[4]) { bool swapped; + if ((patterns[j].mask & rs6000_isa_flags) == 0) + continue; + elt = patterns[j].perm[0]; if (perm[0] == elt) swapped = false; diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 76f3bf99250..021e72a80e3 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -715,6 +715,11 @@ extern unsigned char rs6000_recip_bits[]; instructions for them. Might as well be consistent with bits and bytes. */ #define WORDS_BIG_ENDIAN 1 +/* This says that for the IBM long double the larger magnitude double + comes first. It's really a two element double array, and arrays + don't index differently between little- and big-endian. */ +#define LONG_DOUBLE_LARGE_FIRST 1 + #define MAX_BITS_PER_WORD 64 /* Width of a word, in units (bytes). */ @@ -1114,10 +1119,10 @@ extern unsigned rs6000_pointer_size; #define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N) /* Alternate name for any vector register supporting logical operations, no - matter which instruction set(s) are available. Under VSX, we allow GPRs as - well as vector registers on 64-bit systems. We don't allow 32-bit systems, - due to the number of registers involved, and the number of instructions to - load/store the values.. */ + matter which instruction set(s) are available. For 64-bit mode, we also + allow logical operations in the GPRS. This is to allow atomic quad word + builtins not to need the VSX registers for lqarx/stqcx. It also helps with + __int128_t arguments that are passed in GPRs. */ #define VLOGICAL_REGNO_P(N) \ (ALTIVEC_REGNO_P (N) \ || (TARGET_VSX && FP_REGNO_P (N)) \ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 013a0e38551..7fe23bff351 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -127,6 +127,13 @@ UNSPEC_LFIWZX UNSPEC_FCTIWUZ UNSPEC_GRP_END_NOP + UNSPEC_P8V_FMRGOW + UNSPEC_P8V_MTVSRWZ + UNSPEC_P8V_RELOAD_FROM_GPR + UNSPEC_P8V_MTVSRD + UNSPEC_P8V_XXPERMDI + UNSPEC_P8V_RELOAD_FROM_VSX + UNSPEC_FUSION_GPR ]) ;; @@ -166,9 +173,14 @@ (const_int 4))) ;; Processor type -- this attribute must exactly match the processor_type -;; enumeration in rs6000.h. - -(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan,power8" +;; enumeration in rs6000-opts.h. +(define_attr "cpu" + "ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630, + ppc750,ppc7400,ppc7450, + ppc403,ppc405,ppc440,ppc476, + ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500, + power4,power5,power6,power7,power8, + rs64a,mpccore,cell,ppca2,titan" (const (symbol_ref "rs6000_cpu_attr"))) @@ -227,6 +239,12 @@ ; extend modes for DImode (define_mode_iterator QHSI [QI HI SI]) +; QImode or HImode for small atomic ops +(define_mode_iterator QHI [QI HI]) + +; HImode or SImode for sign extended fusion ops +(define_mode_iterator HSI [HI SI]) + ; SImode or DImode, even if DImode doesn't fit in GPRs. (define_mode_iterator SDI [SI DI]) @@ -268,6 +286,15 @@ (define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128") (TD "TARGET_HARD_FLOAT && TARGET_FPRS")]) +; Iterators for 128 bit types for direct move +(define_mode_iterator FMOVE128_GPR [(TI "TARGET_VSX_TIMODE") + (V16QI "") + (V8HI "") + (V4SI "") + (V4SF "") + (V2DI "") + (V2DF "")]) + ; Whether a floating point move is ok, don't allow SD without hardware FP (define_mode_attr fmove_ok [(SF "") (DF "") @@ -284,11 +311,16 @@ (define_mode_attr f32_lr [(SF "f") (SD "wz")]) (define_mode_attr f32_lm [(SF "m") (SD "Z")]) (define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")]) +(define_mode_attr f32_lv [(SF "lxsspx %0,%y1") (SD "lxsiwzx %0,%y1")]) ; Definitions for store from 32-bit fpr register (define_mode_attr f32_sr [(SF "f") (SD "wx")]) (define_mode_attr f32_sm [(SF "m") (SD "Z")]) (define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")]) +(define_mode_attr f32_sv [(SF "stxsspx %1,%y0") (SD "stxsiwzx %1,%y0")]) + +; Definitions for 32-bit fpr direct move +(define_mode_attr f32_dm [(SF "wn") (SD "wm")]) ; These modes do not fit in integer registers in 32-bit mode. ; but on e500v2, the gpr are 64 bit registers @@ -368,7 +400,7 @@ (define_insn "*zero_extend<mode>di2_internal1" [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") (zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))] - "TARGET_POWERPC64" + "TARGET_POWERPC64 && (<MODE>mode != SImode || !TARGET_LFIWZX)" "@ l<wd>z%U1%X1 %0,%1 rldicl %0,%1,0,<dbits>" @@ -434,6 +466,29 @@ (const_int 0)))] "") +(define_insn "*zero_extendsidi2_lfiwzx" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wm") + (zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))] + "TARGET_POWERPC64 && TARGET_LFIWZX" + "@ + lwz%U1%X1 %0,%1 + rldicl %0,%1,0,32 + mtvsrwz %x0,%1 + lfiwzx %0,%y1 + lxsiwzx %x0,%y1" + [(set_attr_alternative "type" + [(if_then_else + (match_test "update_indexed_address_mem (operands[1], VOIDmode)") + (const_string "load_ux") + (if_then_else + (match_test "update_address_mem (operands[1], VOIDmode)") + (const_string "load_u") + (const_string "load"))) + (const_string "*") + (const_string "mffgpr") + (const_string "fpload") + (const_string "fpload")])]) + (define_insn "extendqidi2" [(set (match_operand:DI 0 "gpc_reg_operand" "=r") (sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))] @@ -581,10 +636,33 @@ "TARGET_POWERPC64" "") -(define_insn "" +(define_insn "*extendsidi2_lfiwax" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wm") + (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))] + "TARGET_POWERPC64 && TARGET_LFIWAX" + "@ + lwa%U1%X1 %0,%1 + extsw %0,%1 + mtvsrwa %x0,%1 + lfiwax %0,%y1 + lxsiwax %x0,%y1" + [(set_attr_alternative "type" + [(if_then_else + (match_test "update_indexed_address_mem (operands[1], VOIDmode)") + (const_string "load_ext_ux") + (if_then_else + (match_test "update_address_mem (operands[1], VOIDmode)") + (const_string "load_ext_u") + (const_string "load_ext"))) + (const_string "exts") + (const_string "mffgpr") + (const_string "fpload") + (const_string "fpload")])]) + +(define_insn "*extendsidi2_nocell" [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))] - "TARGET_POWERPC64 && rs6000_gen_cell_microcode" + "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX" "@ lwa%U1%X1 %0,%1 extsw %0,%1" @@ -598,7 +676,7 @@ (const_string "load_ext"))) (const_string "exts")])]) -(define_insn "" +(define_insn "*extendsidi2_nocell" [(set (match_operand:DI 0 "gpc_reg_operand" "=r") (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))] "TARGET_POWERPC64 && !rs6000_gen_cell_microcode" @@ -2035,7 +2113,9 @@ [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))] "TARGET_CMPB && TARGET_POPCNTB" - "prty<wd> %0,%1") + "prty<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "popcnt")]) (define_expand "parity<mode>2" [(set (match_operand:GPR 0 "gpc_reg_operand" "") @@ -4316,7 +4396,7 @@ # # #" - [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") (set_attr "length" "4,4,4,8,8,8")]) (define_split @@ -4348,7 +4428,7 @@ # # #" - [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") (set_attr "length" "4,4,4,8,8,8")]) (define_split @@ -5104,6 +5184,41 @@ "frsqrtes %0,%1" [(set_attr "type" "fp")]) +;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in +;; builtins.c and optabs.c that are not correct for IBM long double +;; when little-endian. +(define_expand "signbittf2" + [(set (match_dup 2) + (float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" ""))) + (set (match_dup 3) + (subreg:DI (match_dup 2) 0)) + (set (match_dup 4) + (match_dup 5)) + (set (match_operand:SI 0 "gpc_reg_operand" "") + (match_dup 6))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE) + && TARGET_LONG_DOUBLE_128" +{ + operands[2] = gen_reg_rtx (DFmode); + operands[3] = gen_reg_rtx (DImode); + if (TARGET_POWERPC64) + { + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_rtx_LSHIFTRT (DImode, operands[3], GEN_INT (63)); + operands[6] = gen_rtx_SUBREG (SImode, operands[4], + WORDS_BIG_ENDIAN ? 4 : 0); + } + else + { + operands[4] = gen_reg_rtx (SImode); + operands[5] = gen_rtx_SUBREG (SImode, operands[3], + WORDS_BIG_ENDIAN ? 0 : 4); + operands[6] = gen_rtx_LSHIFTRT (SImode, operands[4], GEN_INT (31)); + } +}) + (define_expand "copysign<mode>3" [(set (match_dup 3) (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ""))) @@ -5553,12 +5668,15 @@ ; We don't define lfiwax/lfiwzx with the normal definition, because we ; don't want to support putting SImode in FPR registers. (define_insn "lfiwax" - [(set (match_operand:DI 0 "gpc_reg_operand" "=d") - (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")] + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm") + (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")] UNSPEC_LFIWAX))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX" - "lfiwax %0,%y1" - [(set_attr "type" "fpload")]) + "@ + lfiwax %0,%y1 + lxsiwax %x0,%y1 + mtvsrwa %x0,%1" + [(set_attr "type" "fpload,fpload,mffgpr")]) ; This split must be run before register allocation because it allocates the ; memory slot that is needed to move values to/from the FPR. We don't allocate @@ -5580,7 +5698,8 @@ rtx src = operands[1]; rtx tmp; - if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64) + if (!MEM_P (src) && TARGET_POWERPC64 + && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) tmp = convert_to_mode (DImode, src, false); else { @@ -5629,12 +5748,15 @@ (set_attr "type" "fpload")]) (define_insn "lfiwzx" - [(set (match_operand:DI 0 "gpc_reg_operand" "=d") - (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")] + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm") + (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")] UNSPEC_LFIWZX))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX" - "lfiwzx %0,%y1" - [(set_attr "type" "fpload")]) + "@ + lfiwzx %0,%y1 + lxsiwzx %x0,%y1 + mtvsrwz %x0,%1" + [(set_attr "type" "fpload,fpload,mftgpr")]) (define_insn_and_split "floatunssi<mode>2_lfiwzx" [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") @@ -5651,7 +5773,8 @@ rtx src = operands[1]; rtx tmp; - if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64) + if (!MEM_P (src) && TARGET_POWERPC64 + && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) tmp = convert_to_mode (DImode, src, true); else { @@ -5942,7 +6065,7 @@ emit_insn (gen_stfiwx (dest, tmp)); DONE; } - else if (TARGET_MFPGPR && TARGET_POWERPC64) + else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) { dest = gen_lowpart (DImode, dest); emit_move_insn (dest, tmp); @@ -6036,7 +6159,7 @@ emit_insn (gen_stfiwx (dest, tmp)); DONE; } - else if (TARGET_MFPGPR && TARGET_POWERPC64) + else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) { dest = gen_lowpart (DImode, dest); emit_move_insn (dest, tmp); @@ -8285,6 +8408,18 @@ (compare:CC (match_dup 0) (const_int 0)))] "") + +;; Eqv operation. +(define_insn "*eqv<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (not:GPR + (xor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r"))))] + "" + "eqv %0,%1,%2" + [(set_attr "type" "integer") + (set_attr "length" "4")]) + ;; Now define ways of moving data around. @@ -8490,7 +8625,7 @@ cmp<wd>i %2,%0,0 mr. %0,%1 #" - [(set_attr "type" "cmp,compare,cmp") + [(set_attr "type" "cmp,fast_compare,cmp") (set_attr "length" "4,4,8")]) (define_split @@ -8680,8 +8815,8 @@ }") (define_insn "mov<mode>_hardfloat" - [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,*c*l,!r,*h,!r,!r") - (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,r,h,0,G,Fn"))] + [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wm,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r") + (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wm,r,<f32_dm>,r,h,0,G,Fn"))] "(gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode)) && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)" @@ -8694,6 +8829,10 @@ xxlxor %x0,%x0,%x0 <f32_li> <f32_si> + <f32_lv> + <f32_sv> + mtvsrwz %x0,%1 + mfvsrwz %0,%x1 mt%0 %1 mf%1 %0 nop @@ -8732,16 +8871,20 @@ (match_test "update_address_mem (operands[0], VOIDmode)") (const_string "fpstore_u") (const_string "fpstore"))) + (const_string "fpload") + (const_string "fpstore") + (const_string "mftgpr") + (const_string "mffgpr") (const_string "mtjmpr") (const_string "mfjmpr") (const_string "*") (const_string "*") (const_string "*")]) - (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8")]) + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8")]) (define_insn "*mov<mode>_softfloat" [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h") - (match_operand:FMOVE32 1 "input_operand" "r, r,h,m,r,I,L,G,Fn,0"))] + (match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))] "(gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode)) && (TARGET_SOFT_FLOAT || !TARGET_FPRS)" @@ -8954,8 +9097,8 @@ ; ld/std require word-aligned displacements -> 'Y' constraint. ; List Y->r and r->Y before r->r for reload. (define_insn "*mov<mode>_hardfloat64" - [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg") - (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r"))] + [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm") + (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))] "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode))" @@ -8980,7 +9123,9 @@ # # mftgpr %0,%1 - mffgpr %0,%1" + mffgpr %0,%1 + mfvsrd %0,%x1 + mtvsrd %x0,%1" [(set_attr_alternative "type" [(if_then_else (match_test "update_indexed_address_mem (operands[0], VOIDmode)") @@ -9038,8 +9183,10 @@ (const_string "*") (const_string "*") (const_string "mftgpr") + (const_string "mffgpr") + (const_string "mftgpr") (const_string "mffgpr")]) - (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")]) + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")]) (define_insn "*mov<mode>_softfloat64" [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h") @@ -9154,8 +9301,8 @@ "&& reload_completed" [(pc)] { - const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0; - const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word), operands[1]); emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word), @@ -9384,8 +9531,8 @@ && TARGET_LONG_DOUBLE_128" " { - const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode); - const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; operands[3] = gen_reg_rtx (DFmode); operands[4] = gen_reg_rtx (CCFPmode); operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word); @@ -9419,6 +9566,216 @@ }) +;; Power8 merge instructions to allow direct move to/from floating point +;; registers in 32-bit mode. We use TF mode to get two registers to move the +;; individual 32-bit parts across. Subreg doesn't work too well on the TF +;; value, since it is allocated in reload and not all of the flow information +;; is setup for it. We have two patterns to do the two moves between gprs and +;; fprs. There isn't a dependancy between the two, but we could potentially +;; schedule other instructions between the two instructions. TFmode is +;; currently limited to traditional FPR registers. If/when this is changed, we +;; will need to revist %L to make sure it works with VSX registers, or add an +;; %x version of %L. + +(define_insn "p8_fmrgow_<mode>" + [(set (match_operand:FMOVE64X 0 "register_operand" "=d") + (unspec:FMOVE64X [(match_operand:TF 1 "register_operand" "d")] + UNSPEC_P8V_FMRGOW))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "fmrgow %0,%1,%L1" + [(set_attr "type" "vecperm")]) + +(define_insn "p8_mtvsrwz_1" + [(set (match_operand:TF 0 "register_operand" "=d") + (unspec:TF [(match_operand:SI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRWZ))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrwz %x0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn "p8_mtvsrwz_2" + [(set (match_operand:TF 0 "register_operand" "+d") + (unspec:TF [(match_dup 0) + (match_operand:SI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRWZ))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrwz %L0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn_and_split "reload_fpr_from_gpr<mode>" + [(set (match_operand:FMOVE64X 0 "register_operand" "=ws") + (unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:TF 2 "register_operand" "=d"))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx gpr_hi_reg = gen_highpart (SImode, src); + rtx gpr_lo_reg = gen_lowpart (SImode, src); + + emit_insn (gen_p8_mtvsrwz_1 (tmp, gpr_hi_reg)); + emit_insn (gen_p8_mtvsrwz_2 (tmp, gpr_lo_reg)); + emit_insn (gen_p8_fmrgow_<mode> (dest, tmp)); + DONE; +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +;; Move 128 bit values from GPRs to VSX registers in 64-bit mode +(define_insn "p8_mtvsrd_1" + [(set (match_operand:TF 0 "register_operand" "=ws") + (unspec:TF [(match_operand:DI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRD))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrd %0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn "p8_mtvsrd_2" + [(set (match_operand:TF 0 "register_operand" "+ws") + (unspec:TF [(match_dup 0) + (match_operand:DI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRD))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrd %L0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn "p8_xxpermdi_<mode>" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa") + (unspec:FMOVE128_GPR [(match_operand:TF 1 "register_operand" "ws")] + UNSPEC_P8V_XXPERMDI))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "xxpermdi %x0,%1,%L1,0" + [(set_attr "type" "vecperm")]) + +(define_insn_and_split "reload_vsx_from_gpr<mode>" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa") + (unspec:FMOVE128_GPR + [(match_operand:FMOVE128_GPR 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:TF 2 "register_operand" "=ws"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx gpr_hi_reg = gen_highpart (DImode, src); + rtx gpr_lo_reg = gen_lowpart (DImode, src); + + emit_insn (gen_p8_mtvsrd_1 (tmp, gpr_hi_reg)); + emit_insn (gen_p8_mtvsrd_2 (tmp, gpr_lo_reg)); + emit_insn (gen_p8_xxpermdi_<mode> (dest, tmp)); +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +;; Move SFmode to a VSX from a GPR register. Because scalar floating point +;; type is stored internally as double precision in the VSX registers, we have +;; to convert it from the vector format. + +(define_insn_and_split "reload_vsx_from_gprsf" + [(set (match_operand:SF 0 "register_operand" "=wa") + (unspec:SF [(match_operand:SF 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:DI 2 "register_operand" "=r"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op0_di = simplify_gen_subreg (DImode, op0, SFmode, 0); + rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0); + + /* Move SF value to upper 32-bits for xscvspdpn. */ + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); + emit_move_insn (op0_di, op2); + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "two")]) + +;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a +;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value, +;; and then doing a move of that. +(define_insn "p8_mfvsrd_3_<mode>" + [(set (match_operand:DF 0 "register_operand" "=r") + (unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "mfvsrd %0,%x1" + [(set_attr "type" "mftgpr")]) + +(define_insn_and_split "reload_gpr_from_vsx<mode>" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r") + (unspec:FMOVE128_GPR + [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX)) + (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx gpr_hi_reg = gen_highpart (DFmode, dest); + rtx gpr_lo_reg = gen_lowpart (DFmode, dest); + + emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src)); + emit_insn (gen_vsx_xxpermdi_<mode> (tmp, src, src, GEN_INT (3))); + emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp)); +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +;; Move SFmode to a GPR from a VSX register. Because scalar floating point +;; type is stored internally as double precision, we have to convert it to the +;; vector format. + +(define_insn_and_split "reload_gpr_from_vsxsf" + [(set (match_operand:SF 0 "register_operand" "=r") + (unspec:SF [(match_operand:SF 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX)) + (clobber (match_operand:V4SF 2 "register_operand" "=wa"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0); + + emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1)); + emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2)); + emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32))); + DONE; +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +(define_insn "p8_mfvsrd_4_disf" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "mfvsrd %0,%x1" + [(set_attr "type" "mftgpr")]) + + ;; Next come the multi-word integer load and store and the load and store ;; multiple insns. @@ -9467,7 +9824,8 @@ [(set (match_operand:DI 0 "gpc_reg_operand" "") (match_operand:DI 1 "const_int_operand" ""))] "! TARGET_POWERPC64 && reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1])" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 1))] " @@ -9485,13 +9843,14 @@ [(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "") (match_operand:DIFD 1 "input_operand" ""))] "reload_completed && !TARGET_POWERPC64 - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1])" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) (define_insn "*movdi_internal64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg") - (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg,r,?*wm") + (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r,*wm,r"))] "TARGET_POWERPC64 && (gpc_reg_operand (operands[0], DImode) || gpc_reg_operand (operands[1], DImode))" @@ -9513,7 +9872,9 @@ nop xxlxor %x0,%x0,%x0 mftgpr %0,%1 - mffgpr %0,%1" + mffgpr %0,%1 + mfvsrd %0,%x1 + mtvsrd %x0,%1" [(set_attr_alternative "type" [(if_then_else (match_test "update_indexed_address_mem (operands[0], VOIDmode)") @@ -9562,8 +9923,10 @@ (const_string "*") (const_string "vecsimple") (const_string "mftgpr") + (const_string "mffgpr") + (const_string "mftgpr") (const_string "mffgpr")]) - (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4")]) + (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4,4,4")]) ;; Generate all one-bits and clear left or right. ;; Use (and:DI (rotate:DI ...)) to avoid anddi3 unnecessary clobber. @@ -9652,19 +10015,23 @@ (const_string "conditional")))]) (define_insn "*mov<mode>_ppc64" - [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r") - (match_operand:TI2 1 "input_operand" "r,Y,r"))] - "(TARGET_POWERPC64 - && (<MODE>mode != TImode || VECTOR_MEM_NONE_P (TImode)) + [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r") + (match_operand:TI2 1 "input_operand" "r,Y,r,F"))] + "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode) && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode)))" - "#" - [(set_attr "type" "store,load,*")]) +{ + return rs6000_output_move_128bit (operands); +} + [(set_attr "type" "store,load,*,*") + (set_attr "length" "8")]) (define_split - [(set (match_operand:TI2 0 "gpc_reg_operand" "") + [(set (match_operand:TI2 0 "int_reg_operand" "") (match_operand:TI2 1 "const_double_operand" ""))] - "TARGET_POWERPC64" + "TARGET_POWERPC64 + && (VECTOR_MEM_NONE_P (<MODE>mode) + || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 5))] " @@ -9691,7 +10058,9 @@ [(set (match_operand:TI2 0 "nonimmediate_operand" "") (match_operand:TI2 1 "input_operand" ""))] "reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1]) + && !quad_load_store_p (operands[0], operands[1])" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) @@ -12554,8 +12923,8 @@ (match_dup 13)] { REAL_VALUE_TYPE rv; - const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0; - const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, hi_word); operands[6] = simplify_gen_subreg (DFmode, operands[1], TFmode, lo_word); diff --git a/gcc/config/rs6000/spe.md b/gcc/config/rs6000/spe.md index cec2b430b82..bf10a5dc180 100644 --- a/gcc/config/rs6000/spe.md +++ b/gcc/config/rs6000/spe.md @@ -2604,8 +2604,8 @@ && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128" " { - const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode); - const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; operands[3] = gen_reg_rtx (DFmode); operands[4] = gen_reg_rtx (CCFPmode); operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word); @@ -2627,8 +2627,8 @@ && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128" " { - const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode); - const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; operands[3] = gen_reg_rtx (DFmode); operands[4] = gen_reg_rtx (CCFPmode); operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word); diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md index 252e2690a98..8616b3eca5f 100644 --- a/gcc/config/rs6000/sync.md +++ b/gcc/config/rs6000/sync.md @@ -18,14 +18,23 @@ ;; along with GCC; see the file COPYING3. If not see ;; <http://www.gnu.org/licenses/>. -(define_mode_attr larx [(SI "lwarx") (DI "ldarx")]) -(define_mode_attr stcx [(SI "stwcx.") (DI "stdcx.")]) +(define_mode_attr larx [(QI "lbarx") + (HI "lharx") + (SI "lwarx") + (DI "ldarx") + (TI "lqarx")]) + +(define_mode_attr stcx [(QI "stbcx.") + (HI "sthcx.") + (SI "stwcx.") + (DI "stdcx.") + (TI "stqcx.")]) (define_code_iterator FETCHOP [plus minus ior xor and]) (define_code_attr fetchop_name [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")]) (define_code_attr fetchop_pred - [(plus "add_operand") (minus "gpc_reg_operand") + [(plus "add_operand") (minus "int_reg_operand") (ior "logical_operand") (xor "logical_operand") (and "and_operand")]) (define_expand "mem_thread_fence" @@ -129,16 +138,7 @@ case MEMMODEL_CONSUME: case MEMMODEL_ACQUIRE: case MEMMODEL_SEQ_CST: - if (GET_MODE (operands[0]) == QImode) - emit_insn (gen_loadsync_qi (operands[0])); - else if (GET_MODE (operands[0]) == HImode) - emit_insn (gen_loadsync_hi (operands[0])); - else if (GET_MODE (operands[0]) == SImode) - emit_insn (gen_loadsync_si (operands[0])); - else if (GET_MODE (operands[0]) == DImode) - emit_insn (gen_loadsync_di (operands[0])); - else - gcc_unreachable (); + emit_insn (gen_loadsync_<mode> (operands[0])); break; default: gcc_unreachable (); @@ -170,35 +170,109 @@ DONE; }) -;; ??? Power ISA 2.06B says that there *is* a load-{byte,half}-and-reserve -;; opcode that is "phased-in". Not implemented as of Power7, so not yet used, -;; but let's prepare the macros anyway. +;; Any supported integer mode that has atomic l<x>arx/st<x>cx. instrucitons +;; other than the quad memory operations, which have special restrictions. +;; Byte/halfword atomic instructions were added in ISA 2.06B, but were phased +;; in and did not show up until power8. TImode atomic lqarx/stqcx. require +;; special handling due to even/odd register requirements. +(define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI") + (HI "TARGET_SYNC_HI_QI") + SI + (DI "TARGET_POWERPC64")]) + +;; Types that we should provide atomic instructions for. -(define_mode_iterator ATOMIC [SI (DI "TARGET_POWERPC64")]) +(define_mode_iterator AINT [QI + HI + SI + (DI "TARGET_POWERPC64") + (TI "TARGET_SYNC_TI")]) (define_insn "load_locked<mode>" - [(set (match_operand:ATOMIC 0 "gpc_reg_operand" "=r") + [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r") (unspec_volatile:ATOMIC [(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))] "" "<larx> %0,%y1" [(set_attr "type" "load_l")]) +(define_insn "load_locked<QHI:mode>_si" + [(set (match_operand:SI 0 "int_reg_operand" "=r") + (unspec_volatile:SI + [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))] + "TARGET_SYNC_HI_QI" + "<QHI:larx> %0,%y1" + [(set_attr "type" "load_l")]) + +;; Use PTImode to get even/odd register pairs +(define_expand "load_lockedti" + [(use (match_operand:TI 0 "quad_int_reg_operand" "")) + (use (match_operand:TI 1 "memory_operand" ""))] + "TARGET_SYNC_TI" +{ + /* Use a temporary register to force getting an even register for the + lqarx/stqcrx. instructions. Normal optimizations will eliminate this + extra copy. */ + rtx pti = gen_reg_rtx (PTImode); + emit_insn (gen_load_lockedpti (pti, operands[1])); + emit_move_insn (operands[0], gen_lowpart (TImode, pti)); + DONE; +}) + +(define_insn "load_lockedpti" + [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r") + (unspec_volatile:PTI + [(match_operand:TI 1 "memory_operand" "Z")] UNSPECV_LL))] + "TARGET_SYNC_TI + && !reg_mentioned_p (operands[0], operands[1]) + && quad_int_reg_operand (operands[0], PTImode)" + "lqarx %0,%y1" + [(set_attr "type" "load_l")]) + (define_insn "store_conditional<mode>" [(set (match_operand:CC 0 "cc_reg_operand" "=x") (unspec_volatile:CC [(const_int 0)] UNSPECV_SC)) (set (match_operand:ATOMIC 1 "memory_operand" "=Z") - (match_operand:ATOMIC 2 "gpc_reg_operand" "r"))] + (match_operand:ATOMIC 2 "int_reg_operand" "r"))] "" "<stcx> %2,%y1" [(set_attr "type" "store_c")]) +(define_expand "store_conditionalti" + [(use (match_operand:CC 0 "cc_reg_operand" "")) + (use (match_operand:TI 1 "memory_operand" "")) + (use (match_operand:TI 2 "quad_int_reg_operand" ""))] + "TARGET_SYNC_TI" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx pti_op1 = change_address (op1, PTImode, XEXP (op1, 0)); + rtx pti_op2 = gen_reg_rtx (PTImode); + + /* Use a temporary register to force getting an even register for the + lqarx/stqcrx. instructions. Normal optimizations will eliminate this + extra copy. */ + emit_move_insn (pti_op2, gen_lowpart (PTImode, op2)); + emit_insn (gen_store_conditionalpti (op0, pti_op1, pti_op2)); + DONE; +}) + +(define_insn "store_conditionalpti" + [(set (match_operand:CC 0 "cc_reg_operand" "=x") + (unspec_volatile:CC [(const_int 0)] UNSPECV_SC)) + (set (match_operand:PTI 1 "memory_operand" "=Z") + (match_operand:PTI 2 "quad_int_reg_operand" "r"))] + "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)" + "stqcx. %2,%y1" + [(set_attr "type" "store_c")]) + (define_expand "atomic_compare_and_swap<mode>" - [(match_operand:SI 0 "gpc_reg_operand" "") ;; bool out - (match_operand:INT1 1 "gpc_reg_operand" "") ;; val out - (match_operand:INT1 2 "memory_operand" "") ;; memory - (match_operand:INT1 3 "reg_or_short_operand" "") ;; expected - (match_operand:INT1 4 "gpc_reg_operand" "") ;; desired + [(match_operand:SI 0 "int_reg_operand" "") ;; bool out + (match_operand:AINT 1 "int_reg_operand" "") ;; val out + (match_operand:AINT 2 "memory_operand" "") ;; memory + (match_operand:AINT 3 "reg_or_short_operand" "") ;; expected + (match_operand:AINT 4 "int_reg_operand" "") ;; desired (match_operand:SI 5 "const_int_operand" "") ;; is_weak (match_operand:SI 6 "const_int_operand" "") ;; model succ (match_operand:SI 7 "const_int_operand" "")] ;; model fail @@ -209,9 +283,9 @@ }) (define_expand "atomic_exchange<mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (match_operand:INT1 2 "gpc_reg_operand" "") ;; input + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (match_operand:AINT 2 "int_reg_operand" "") ;; input (match_operand:SI 3 "const_int_operand" "")] ;; model "" { @@ -220,9 +294,9 @@ }) (define_expand "atomic_<fetchop_name><mode>" - [(match_operand:INT1 0 "memory_operand" "") ;; memory - (FETCHOP:INT1 (match_dup 0) - (match_operand:INT1 1 "<fetchop_pred>" "")) ;; operand + [(match_operand:AINT 0 "memory_operand" "") ;; memory + (FETCHOP:AINT (match_dup 0) + (match_operand:AINT 1 "<fetchop_pred>" "")) ;; operand (match_operand:SI 2 "const_int_operand" "")] ;; model "" { @@ -232,8 +306,8 @@ }) (define_expand "atomic_nand<mode>" - [(match_operand:INT1 0 "memory_operand" "") ;; memory - (match_operand:INT1 1 "gpc_reg_operand" "") ;; operand + [(match_operand:AINT 0 "memory_operand" "") ;; memory + (match_operand:AINT 1 "int_reg_operand" "") ;; operand (match_operand:SI 2 "const_int_operand" "")] ;; model "" { @@ -243,10 +317,10 @@ }) (define_expand "atomic_fetch_<fetchop_name><mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (FETCHOP:INT1 (match_dup 1) - (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (FETCHOP:AINT (match_dup 1) + (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand (match_operand:SI 3 "const_int_operand" "")] ;; model "" { @@ -256,9 +330,9 @@ }) (define_expand "atomic_fetch_nand<mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (match_operand:AINT 2 "int_reg_operand" "") ;; operand (match_operand:SI 3 "const_int_operand" "")] ;; model "" { @@ -268,10 +342,10 @@ }) (define_expand "atomic_<fetchop_name>_fetch<mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (FETCHOP:INT1 (match_dup 1) - (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (FETCHOP:AINT (match_dup 1) + (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand (match_operand:SI 3 "const_int_operand" "")] ;; model "" { @@ -281,9 +355,9 @@ }) (define_expand "atomic_nand_fetch<mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (match_operand:AINT 2 "int_reg_operand" "") ;; operand (match_operand:SI 3 "const_int_operand" "")] ;; model "" { diff --git a/gcc/config/rs6000/t-linux b/gcc/config/rs6000/t-linux index 017a293cde3..62a5b941389 100644 --- a/gcc/config/rs6000/t-linux +++ b/gcc/config/rs6000/t-linux @@ -2,7 +2,7 @@ # or soft-float. ifeq (,$(filter $(with_cpu),$(SOFT_FLOAT_CPUS))$(findstring soft,$(with_float))) ifneq (,$(findstring spe,$(target))) -MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring rs6000/e500-double.h, $(tm_file_list)),,v1) +MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring 8548,$(with_cpu)),,v1) else MULTIARCH_DIRNAME = powerpc-linux-gnu endif diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 8b8b3427454..6cfebdeebdc 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -126,7 +126,9 @@ (match_operand:VEC_L 1 "input_operand" ""))] "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode) && reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1]) + && !quad_load_store_p (operands[0], operands[1])" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); @@ -730,9 +732,10 @@ "") (define_expand "and<mode>3" - [(set (match_operand:VEC_L 0 "vlogical_operand" "") - (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") - (match_operand:VEC_L 2 "vlogical_operand" "")))] + [(parallel [(set (match_operand:VEC_L 0 "vlogical_operand" "") + (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") + (match_operand:VEC_L 2 "vlogical_operand" ""))) + (clobber (match_scratch:CC 3 ""))])] "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode) && (<MODE>mode != TImode || TARGET_POWERPC64)" "") @@ -746,8 +749,8 @@ (define_expand "nor<mode>3" [(set (match_operand:VEC_L 0 "vlogical_operand" "") - (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") - (match_operand:VEC_L 2 "vlogical_operand" ""))))] + (and:VEC_L (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")) + (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" ""))))] "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode) && (<MODE>mode != TImode || TARGET_POWERPC64)" "") @@ -760,6 +763,47 @@ && (<MODE>mode != TImode || TARGET_POWERPC64)" "") +;; Power8 vector logical instructions. +(define_expand "eqv<mode>3" + [(set (match_operand:VEC_L 0 "register_operand" "") + (not:VEC_L + (xor:VEC_L (match_operand:VEC_L 1 "register_operand" "") + (match_operand:VEC_L 2 "register_operand" ""))))] + "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode) + && (<MODE>mode != TImode || TARGET_POWERPC64)") + +;; Rewrite nand into canonical form +(define_expand "nand<mode>3" + [(set (match_operand:VEC_L 0 "register_operand" "") + (ior:VEC_L + (not:VEC_L (match_operand:VEC_L 1 "register_operand" "")) + (not:VEC_L (match_operand:VEC_L 2 "register_operand" ""))))] + "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode) + && (<MODE>mode != TImode || TARGET_POWERPC64)") + +;; The canonical form is to have the negated elment first, so we need to +;; reverse arguments. +(define_expand "orc<mode>3" + [(set (match_operand:VEC_L 0 "register_operand" "") + (ior:VEC_L + (not:VEC_L (match_operand:VEC_L 1 "register_operand" "")) + (match_operand:VEC_L 2 "register_operand" "")))] + "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode) + && (<MODE>mode != TImode || TARGET_POWERPC64)") + +;; Vector count leading zeros +(define_expand "clz<mode>2" + [(set (match_operand:VEC_I 0 "register_operand" "") + (clz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))] + "TARGET_P8_VECTOR") + +;; Vector population count +(define_expand "popcount<mode>2" + [(set (match_operand:VEC_I 0 "register_operand" "") + (popcount:VEC_I (match_operand:VEC_I 1 "register_operand" "")))] + "TARGET_P8_VECTOR") + + ;; Same size conversions (define_expand "float<VEC_int><mode>2" [(set (match_operand:VEC_F 0 "vfloat_operand" "") diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 4adf6e5ac55..b87da826a95 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -36,6 +36,10 @@ ;; Iterator for logical types supported by VSX (define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI]) +;; Like VSX_L, but don't support TImode for doing logical instructions in +;; 32-bit +(define_mode_iterator VSX_L2 [V16QI V8HI V4SI V2DI V4SF V2DF]) + ;; Iterator for memory move. Handle TImode specially to allow ;; it to use gprs as well as vsx registers. (define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF]) @@ -191,6 +195,8 @@ UNSPEC_VSX_CVDPSXWS UNSPEC_VSX_CVDPUXWS UNSPEC_VSX_CVSPDP + UNSPEC_VSX_CVSPDPN + UNSPEC_VSX_CVDPSPN UNSPEC_VSX_CVSXWDP UNSPEC_VSX_CVUXWDP UNSPEC_VSX_CVSXDSP @@ -207,112 +213,31 @@ ;; VSX moves (define_insn "*vsx_mov<mode>" - [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v") - (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))] + [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v") + (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))] "VECTOR_MEM_VSX_P (<MODE>mode) && (register_operand (operands[0], <MODE>mode) || register_operand (operands[1], <MODE>mode))" { - switch (which_alternative) - { - case 0: - case 3: - gcc_assert (MEM_P (operands[0]) - && GET_CODE (XEXP (operands[0], 0)) != PRE_INC - && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY); - return "stx<VSm>x %x1,%y0"; - - case 1: - case 4: - gcc_assert (MEM_P (operands[1]) - && GET_CODE (XEXP (operands[1], 0)) != PRE_INC - && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY); - return "lx<VSm>x %x0,%y1"; - - case 2: - case 5: - return "xxlor %x0,%x1,%x1"; - - case 6: - case 7: - case 8: - case 11: - return "#"; - - case 9: - case 10: - return "xxlxor %x0,%x0,%x0"; - - case 12: - return output_vec_const_move (operands); - - case 13: - gcc_assert (MEM_P (operands[0]) - && GET_CODE (XEXP (operands[0], 0)) != PRE_INC - && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY); - return "stvx %1,%y0"; - - case 14: - gcc_assert (MEM_P (operands[0]) - && GET_CODE (XEXP (operands[0], 0)) != PRE_INC - && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY); - return "lvx %0,%y1"; - - default: - gcc_unreachable (); - } + return rs6000_output_move_128bit (operands); } - [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")]) + [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload") + (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")]) ;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal ;; use of TImode is for unions. However for plain data movement, slightly ;; favor the vector loads (define_insn "*vsx_movti_64bit" - [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,?Y,?r,?r,?r") - (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v, r, Y, r, n"))] + [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r") + (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))] "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode) && (register_operand (operands[0], TImode) || register_operand (operands[1], TImode))" { - switch (which_alternative) - { - case 0: - return "stxvd2x %x1,%y0"; - - case 1: - return "lxvd2x %x0,%y1"; - - case 2: - return "xxlor %x0,%x1,%x1"; - - case 3: - return "xxlxor %x0,%x0,%x0"; - - case 4: - return output_vec_const_move (operands); - - case 5: - return "stvx %1,%y0"; - - case 6: - return "lvx %0,%y1"; - - case 7: - case 8: - case 9: - case 10: - return "#"; - - default: - gcc_unreachable (); - } + return rs6000_output_move_128bit (operands); } - [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,*,*,*,*") - (set_attr "length" " 4, 4, 4, 4, 8, 4, 4,8,8,8,8")]) + [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*") + (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")]) (define_insn "*vsx_movti_32bit" [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r") @@ -1003,6 +928,40 @@ "xscvspdp %x0,%x1" [(set_attr "type" "fp")]) +;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs +(define_insn "vsx_xscvdpspn" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa") + (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")] + UNSPEC_VSX_CVDPSPN))] + "TARGET_XSCVDPSPN" + "xscvdpspn %x0,%x1" + [(set_attr "type" "fp")]) + +(define_insn "vsx_xscvspdpn" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa") + (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")] + UNSPEC_VSX_CVSPDPN))] + "TARGET_XSCVSPDPN" + "xscvspdpn %x0,%x1" + [(set_attr "type" "fp")]) + +(define_insn "vsx_xscvdpspn_scalar" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")] + UNSPEC_VSX_CVDPSPN))] + "TARGET_XSCVDPSPN" + "xscvdpspn %x0,%x1" + [(set_attr "type" "fp")]) + +;; Used by direct move to move a SFmode value from GPR to VSX register +(define_insn "vsx_xscvspdpn_directmove" + [(set (match_operand:SF 0 "vsx_register_operand" "=wa") + (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVSPDPN))] + "TARGET_XSCVSPDPN" + "xscvspdpn %x0,%x1" + [(set_attr "type" "fp")]) + ;; Convert from 64-bit to 32-bit types ;; Note, favor the Altivec registers since the usual use of these instructions ;; is in vector converts and we need to use the Altivec vperm instruction. @@ -1088,70 +1047,368 @@ (set_attr "fp_type" "<VSfptype_simple>")]) -;; Logical operations -;; Do not support TImode logical instructions on 32-bit at present, because the -;; compiler will see that we have a TImode and when it wanted DImode, and -;; convert the DImode to TImode, store it on the stack, and load it in a VSX -;; register. -(define_insn "*vsx_and<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (and:VSX_L - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa") - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode) - && (<MODE>mode != TImode || TARGET_POWERPC64)" +;; Logical operations. Do not support TImode logical instructions on 32-bit at +;; present, because the compiler will see that we have a TImode and when it +;; wanted DImode, and convert the DImode to TImode, store it on the stack, and +;; load it in a VSX register or generate extra logical instructions in GPR +;; registers. + +;; When we are splitting the operations to GPRs, we use three alternatives, two +;; where the first/second inputs and output are in the same register, and the +;; third where the output specifies an early clobber so that we don't have to +;; worry about overlapping registers. + +(define_insn "*vsx_and<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (and:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa") + (match_operand:VSX_L2 2 "vlogical_operand" "wa"))) + (clobber (match_scratch:CC 3 "X"))] + "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" "xxland %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) -(define_insn "*vsx_ior<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa") - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode) - && (<MODE>mode != TImode || TARGET_POWERPC64)" +(define_insn_and_split "*vsx_and<mode>3_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r") + (and:VSX_L + (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r") + (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))) + (clobber (match_scratch:CC 3 "X,X,X,X"))] + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxland %x0,%x1,%x2 + # + # + #" + "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(parallel [(set (match_dup 4) (and:DI (match_dup 5) (match_dup 6))) + (clobber (match_dup 3))]) + (parallel [(set (match_dup 7) (and:DI (match_dup 8) (match_dup 9))) + (clobber (match_dup 3))])] +{ + operands[4] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[7] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[9] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two,two") + (set_attr "length" "4,8,8,8")]) + +(define_insn "*vsx_ior<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (ior:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa") + (match_operand:VSX_L2 2 "vlogical_operand" "wa")))] + "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" "xxlor %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn_and_split "*vsx_ior<mode>3_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r,?r,&?r") + (ior:VSX_L + (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r,0,r") + (match_operand:VSX_L 2 "vsx_reg_or_cint_operand" "wa,r,0,r,n,n")))] + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxlor %x0,%x1,%x2 + # + # + # + # + #" + "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(const_int 0)] +{ + operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); -(define_insn "*vsx_xor<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (xor:VSX_L - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa") - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode) - && (<MODE>mode != TImode || TARGET_POWERPC64)" + if (operands[5] == constm1_rtx) + emit_move_insn (operands[3], constm1_rtx); + + else if (operands[5] == const0_rtx) + { + if (!rtx_equal_p (operands[3], operands[4])) + emit_move_insn (operands[3], operands[4]); + } + else + emit_insn (gen_iordi3 (operands[3], operands[4], operands[5])); + + if (operands[8] == constm1_rtx) + emit_move_insn (operands[8], constm1_rtx); + + else if (operands[8] == const0_rtx) + { + if (!rtx_equal_p (operands[6], operands[7])) + emit_move_insn (operands[6], operands[7]); + } + else + emit_insn (gen_iordi3 (operands[6], operands[7], operands[8])); + DONE; +} + [(set_attr "type" "vecsimple,two,two,two,three,three") + (set_attr "length" "4,8,8,8,16,16")]) + +(define_insn "*vsx_xor<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (xor:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa") + (match_operand:VSX_L2 2 "vlogical_operand" "wa")))] + "VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_POWERPC64" "xxlxor %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) -(define_insn "*vsx_one_cmpl<mode>2" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (not:VSX_L - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode) - && (<MODE>mode != TImode || TARGET_POWERPC64)" +(define_insn_and_split "*vsx_xor<mode>3_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r,?r,&?r") + (xor:VSX_L + (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r,0,r") + (match_operand:VSX_L 2 "vsx_reg_or_cint_operand" "wa,r,0,r,n,n")))] + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxlxor %x0,%x1,%x2 + # + # + # + # + #" + "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(set (match_dup 3) (xor:DI (match_dup 4) (match_dup 5))) + (set (match_dup 6) (xor:DI (match_dup 7) (match_dup 8)))] +{ + operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two,two,three,three") + (set_attr "length" "4,8,8,8,16,16")]) + +(define_insn "*vsx_one_cmpl<mode>2_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")))] + "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" "xxlnor %x0,%x1,%x1" - [(set_attr "type" "vecsimple")]) + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn_and_split "*vsx_one_cmpl<mode>2_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,&?r") + (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r")))] + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxlnor %x0,%x1,%x1 + # + #" + "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(set (match_dup 2) (not:DI (match_dup 3))) + (set (match_dup 4) (not:DI (match_dup 5)))] +{ + operands[2] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[3] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[5] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two") + (set_attr "length" "4,8,8")]) -(define_insn "*vsx_nor<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (not:VSX_L - (ior:VSX_L - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa") - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))] - "VECTOR_MEM_VSX_P (<MODE>mode) - && (<MODE>mode != TImode || TARGET_POWERPC64)" +(define_insn "*vsx_nor<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (and:VSX_L2 + (not:VSX_L2 (match_operand:VSX_L 1 "vlogical_operand" "%wa")) + (not:VSX_L2 (match_operand:VSX_L 2 "vlogical_operand" "wa"))))] + "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" "xxlnor %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn_and_split "*vsx_nor<mode>3_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r") + (and:VSX_L + (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r")) + (not:VSX_L (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))))] + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxlnor %x0,%x1,%x2 + # + # + #" + "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(set (match_dup 3) (and:DI (not:DI (match_dup 4)) (not:DI (match_dup 5)))) + (set (match_dup 6) (and:DI (not:DI (match_dup 7)) (not:DI (match_dup 8))))] +{ + operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two,two") + (set_attr "length" "4,8,8,8")]) + +(define_insn "*vsx_andc<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (and:VSX_L2 + (not:VSX_L2 + (match_operand:VSX_L2 2 "vlogical_operand" "wa")) + (match_operand:VSX_L2 1 "vlogical_operand" "wa")))] + "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" + "xxlandc %x0,%x1,%x2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) -(define_insn "*vsx_andc<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") +(define_insn_and_split "*vsx_andc<mode>3_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r") (and:VSX_L (not:VSX_L - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")) - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode) - && (<MODE>mode != TImode || TARGET_POWERPC64)" - "xxlandc %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) + (match_operand:VSX_L 2 "vlogical_operand" "wa,0,r,r")) + (match_operand:VSX_L 1 "vlogical_operand" "wa,r,0,r")))] + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxlandc %x0,%x1,%x2 + # + # + #" + "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(set (match_dup 3) (and:DI (not:DI (match_dup 4)) (match_dup 5))) + (set (match_dup 6) (and:DI (not:DI (match_dup 7)) (match_dup 8)))] +{ + operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two,two") + (set_attr "length" "4,8,8,8")]) + +;; Power8 vector logical instructions. +(define_insn "*vsx_eqv<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (not:VSX_L2 + (xor:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa") + (match_operand:VSX_L2 2 "vlogical_operand" "wa"))))] + "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)" + "xxleqv %x0,%x1,%x2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn_and_split "*vsx_eqv<mode>3_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r") + (not:VSX_L + (xor:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r,r") + (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))))] + "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxleqv %x0,%x1,%x2 + # + # + #" + "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR + && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(set (match_dup 3) (not:DI (xor:DI (match_dup 4) (match_dup 5)))) + (set (match_dup 6) (not:DI (xor:DI (match_dup 7) (match_dup 8))))] +{ + operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two,two") + (set_attr "length" "4,8,8,8")]) + +;; Rewrite nand into canonical form +(define_insn "*vsx_nand<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (ior:VSX_L2 + (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")) + (not:VSX_L2 (match_operand:VSX_L2 2 "vlogical_operand" "wa"))))] + "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)" + "xxlnand %x0,%x1,%x2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn_and_split "*vsx_nand<mode>3_64bit" + [(set (match_operand:VSX_L 0 "register_operand" "=wa,?r,?r,?r") + (ior:VSX_L + (not:VSX_L (match_operand:VSX_L 1 "register_operand" "wa,0,r,r")) + (not:VSX_L (match_operand:VSX_L 2 "register_operand" "wa,r,0,r"))))] + "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxlnand %x0,%x1,%x2 + # + # + #" + "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR + && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(set (match_dup 3) (ior:DI (not:DI (match_dup 4)) (not:DI (match_dup 5)))) + (set (match_dup 6) (ior:DI (not:DI (match_dup 7)) (not:DI (match_dup 8))))] +{ + operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two,two") + (set_attr "length" "4,8,8,8")]) + +;; Rewrite or complement into canonical form, by reversing the arguments +(define_insn "*vsx_orc<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (ior:VSX_L2 + (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")) + (match_operand:VSX_L2 2 "vlogical_operand" "wa")))] + "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)" + "xxlorc %x0,%x2,%x1" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn_and_split "*vsx_orc<mode>3_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r") + (ior:VSX_L + (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r,r")) + (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r")))] + "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxlorc %x0,%x2,%x1 + # + # + #" + "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR + && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(set (match_dup 3) (ior:DI (not:DI (match_dup 4)) (match_dup 5))) + (set (match_dup 6) (ior:DI (not:DI (match_dup 7)) (match_dup 8)))] +{ + operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two,two") + (set_attr "length" "4,8,8,8")]) ;; Permute operations diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 30c34901f8d..358345a4437 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -2017,14 +2017,18 @@ s390_decompose_address (rtx addr, struct s390_address *out) Thus we don't check the displacement for validity here. If after elimination the displacement turns out to be invalid after all, this is fixed up by reload in any case. */ - if (base != arg_pointer_rtx - && indx != arg_pointer_rtx - && base != return_address_pointer_rtx - && indx != return_address_pointer_rtx - && base != frame_pointer_rtx - && indx != frame_pointer_rtx - && base != virtual_stack_vars_rtx - && indx != virtual_stack_vars_rtx) + /* LRA maintains always displacements up to date and we need to + know the displacement is right during all LRA not only at the + final elimination. */ + if (lra_in_progress + || (base != arg_pointer_rtx + && indx != arg_pointer_rtx + && base != return_address_pointer_rtx + && indx != return_address_pointer_rtx + && base != frame_pointer_rtx + && indx != frame_pointer_rtx + && base != virtual_stack_vars_rtx + && indx != virtual_stack_vars_rtx)) if (!DISP_IN_RANGE (offset)) return false; } @@ -3189,7 +3193,9 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, /* We need a scratch register when loading a PLUS expression which is not a legitimate operand of the LOAD ADDRESS instruction. */ - if (in_p && s390_plus_operand (x, mode)) + /* LRA can deal with transformation of plus op very well -- so we + don't need to prompt LRA in this case. */ + if (! lra_in_progress && in_p && s390_plus_operand (x, mode)) sri->icode = (TARGET_64BIT ? CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus); @@ -7868,6 +7874,13 @@ s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode) return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; } +/* Return true if we use LRA instead of reload pass. */ +static bool +s390_lra_p (void) +{ + return s390_lra_flag; +} + /* Return true if register FROM can be eliminated via register TO. */ static bool @@ -11105,6 +11118,9 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop) #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p +#undef TARGET_LRA_P +#define TARGET_LRA_P s390_lra_p + #undef TARGET_CAN_ELIMINATE #define TARGET_CAN_ELIMINATE s390_can_eliminate diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index 43e24d5d112..b0e530f0ed4 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -221,7 +221,7 @@ enum processor_flags /* Alignment on even addresses for LARL instruction. */ #define CONSTANT_ALIGNMENT(EXP, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN) -#define DATA_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN) +#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN) /* Alignment is not required by the hardware. */ #define STRICT_ALIGNMENT 0 diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index cad4f5f579a..0141b9813ef 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -277,7 +277,8 @@ (define_attr "cpu" "g5,g6,z900,z990,z9_109,z9_ec,z10,z196,zEC12" (const (symbol_ref "s390_tune_attr"))) -(define_attr "cpu_facility" "standard,ieee,zarch,longdisp,extimm,dfp,z10,z196,zEC12" +(define_attr "cpu_facility" + "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12" (const_string "standard")) (define_attr "enabled" "" @@ -304,6 +305,10 @@ (match_test "TARGET_DFP")) (const_int 1) + (and (eq_attr "cpu_facility" "cpu_zarch") + (match_test "TARGET_CPU_ZARCH")) + (const_int 1) + (and (eq_attr "cpu_facility" "z10") (match_test "TARGET_Z10")) (const_int 1) @@ -2690,7 +2695,7 @@ "(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)" "#" [(set_attr "type" "cs") - (set_attr "cpu_facility" "*,*,z10,*")]) + (set_attr "cpu_facility" "*,*,z10,cpu_zarch")]) (define_split [(set (match_operand:BLK 0 "memory_operand" "") @@ -2899,7 +2904,7 @@ "(GET_MODE (operands[1]) == Pmode || GET_MODE (operands[1]) == VOIDmode)" "#" [(set_attr "type" "cs") - (set_attr "cpu_facility" "*,*,z10,*")]) + (set_attr "cpu_facility" "*,*,z10,cpu_zarch")]) (define_split [(set (match_operand:BLK 0 "memory_operand" "") @@ -3075,7 +3080,7 @@ "(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)" "#" [(set_attr "type" "cs") - (set_attr "cpu_facility" "*,*,z10,*")]) + (set_attr "cpu_facility" "*,*,z10,cpu_zarch")]) (define_split [(set (reg:CCU CC_REGNUM) diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt index b326441173c..ba38e6e14ed 100644 --- a/gcc/config/s390/s390.opt +++ b/gcc/config/s390/s390.opt @@ -149,3 +149,7 @@ Target Report Joined RejectNegative UInteger Var(s390_branch_cost) Init(1) Set the branch costs for conditional branch instructions. Reasonable values are small, non-negative integers. The default branch cost is 1. + +mlra +Target Report Var(s390_lra_flag) Init(1) Save +Use LRA instead of reload |