diff options
author | Aldy Hernandez <aldyh@redhat.com> | 2013-06-28 09:57:43 -0700 |
---|---|---|
committer | Aldy Hernandez <aldyh@redhat.com> | 2013-06-28 09:57:43 -0700 |
commit | 7fb75753fa7e7c54af3b5e0aea65d8051feac55d (patch) | |
tree | 568d89cbf5521cbb882c33a3a42fb332ff2e49b8 /gcc/config | |
parent | db2127098137dea6c246041e0d763a57a174fa3c (diff) | |
parent | 2814409c2f46b5f71706f08358f395dddc9d8a81 (diff) | |
download | gcc-7fb75753fa7e7c54af3b5e0aea65d8051feac55d.tar.gz |
Merge remote-tracking branch 'origin/gomp-4_0-branch' into cilk-in-gomp
Conflicts:
gcc/Makefile.in
gcc/c-family/c-common.h
gcc/c/c-parser.c
gcc/cp/Make-lang.in
gcc/cp/cp-tree.h
gcc/gimple.h
gcc/omp-low.c
gcc/testsuite/g++.dg/cilk-plus/cilk-plus.exp
gcc/testsuite/gcc.dg/cilk-plus/cilk-plus.exp
Diffstat (limited to 'gcc/config')
158 files changed, 9750 insertions, 2818 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 4fdfe247a21..2a0e5fdc391 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -1245,6 +1245,16 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) return AARCH64_FIND_FRINT_VARIANT (sqrt); #undef AARCH64_CHECK_BUILTIN_MODE #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == SImode && out_n == C \ + && in_mode == N##Imode && in_n == C) + case BUILT_IN_CLZ: + { + if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_clzv4si]; + return NULL_TREE; + } +#undef AARCH64_CHECK_BUILTIN_MODE +#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ (out_mode == N##Imode && out_n == C \ && in_mode == N##Fmode && in_n == C) case BUILT_IN_LFLOOR: diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 001842e43b4..e5ae556736c 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -68,6 +68,13 @@ enum aarch64_symbol_context Each of of these represents a thread-local symbol, and corresponds to the thread local storage relocation operator for the symbol being referred to. + SYMBOL_TINY_ABSOLUTE + + Generate symbol accesses as a PC relative address using a single + instruction. To compute the address of symbol foo, we generate: + + ADR x0, foo + SYMBOL_FORCE_TO_MEM : Global variables are addressed using constant pool. All variable addresses are spilled into constant pools. The constant pools themselves are addressed using PC @@ -81,6 +88,7 @@ enum aarch64_symbol_type SYMBOL_SMALL_TLSDESC, SYMBOL_SMALL_GOTTPREL, SYMBOL_SMALL_TPREL, + SYMBOL_TINY_ABSOLUTE, SYMBOL_FORCE_TO_MEM }; @@ -136,6 +144,8 @@ struct tune_params HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode); +enum aarch64_symbol_type +aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context); bool aarch64_constant_address_p (rtx); bool aarch64_float_const_zero_rtx_p (rtx); bool aarch64_function_arg_regno_p (unsigned); @@ -146,6 +156,10 @@ bool aarch64_is_long_call_p (rtx); bool aarch64_label_mentioned_p (rtx); bool aarch64_legitimate_pic_operand_p (rtx); bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode); +bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context, + enum machine_mode); +char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode); +char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned); bool aarch64_pad_arg_upward (enum machine_mode, const_tree); bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool); bool aarch64_regno_ok_for_base_p (int, bool); @@ -154,9 +168,9 @@ bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode); bool aarch64_simd_imm_zero_p (rtx, enum machine_mode); bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode); bool aarch64_simd_shift_imm_p (rtx, enum machine_mode, bool); +bool aarch64_simd_valid_immediate (rtx, enum machine_mode, bool, + struct simd_immediate_info *); bool aarch64_symbolic_address_p (rtx); -bool aarch64_symbolic_constant_p (rtx, enum aarch64_symbol_context, - enum aarch64_symbol_type *); bool aarch64_uimm12_shift (HOST_WIDE_INT); const char *aarch64_output_casesi (rtx *); enum aarch64_symbol_type aarch64_classify_symbol (rtx, @@ -219,6 +233,8 @@ void aarch64_split_128bit_move (rtx, rtx); bool aarch64_split_128bit_move_p (rtx, rtx); +void aarch64_split_simd_combine (rtx, rtx, rtx); + void aarch64_split_simd_move (rtx, rtx); /* Check for a legitimate floating point constant for FMOV. */ @@ -254,6 +270,4 @@ extern void aarch64_split_combinev16qi (rtx operands[3]); extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); extern bool aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); - -char* aarch64_output_simd_mov_immediate (rtx *, enum machine_mode, unsigned); #endif /* GCC_AARCH64_PROTOS_H */ diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index e4201732bcd..4d9b966d0ac 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -49,6 +49,7 @@ BUILTIN_VDQF (UNOP, sqrt, 2) BUILTIN_VD_BHSI (BINOP, addp, 0) VAR1 (UNOP, addp, 0, di) + VAR1 (UNOP, clz, 2, v4si) BUILTIN_VD_RE (REINTERP, reinterpretdi, 0) BUILTIN_VDC (REINTERP, reinterpretv8qi, 0) @@ -63,7 +64,7 @@ BUILTIN_VQ (REINTERP, reinterpretv2df, 0) BUILTIN_VDQ_I (BINOP, dup_lane, 0) - BUILTIN_SDQ_I (BINOP, dup_lane, 0) + BUILTIN_VDQ_I (BINOP, dup_lane_scalar, 0) /* Implemented by aarch64_<sur>q<r>shl<mode>. */ BUILTIN_VSDQ_I (BINOP, sqshl, 0) BUILTIN_VSDQ_I (BINOP, uqshl, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 9069a73c46c..08826b5dd9f 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -357,29 +357,18 @@ (set_attr "simd_mode" "<MODE>")] ) -(define_insn "aarch64_dup_lane<mode>" - [(set (match_operand:ALLX 0 "register_operand" "=w") +(define_insn "aarch64_dup_lane_scalar<mode>" + [(set (match_operand:<VEL> 0 "register_operand" "=w, r") (vec_select:<VEL> - (match_operand:<VCON> 1 "register_operand" "w") - (parallel [(match_operand:SI 2 "immediate_operand" "i")]) + (match_operand:VDQ 1 "register_operand" "w, w") + (parallel [(match_operand:SI 2 "immediate_operand" "i, i")]) ))] "TARGET_SIMD" - "dup\\t%<v>0<Vmtype>, %1.<Vetype>[%2]" - [(set_attr "simd_type" "simd_dup") - (set_attr "simd_mode" "<MODE>")] -) - -(define_insn "aarch64_dup_lanedi" - [(set (match_operand:DI 0 "register_operand" "=w,r") - (vec_select:DI - (match_operand:V2DI 1 "register_operand" "w,w") - (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] - "TARGET_SIMD" "@ - dup\\t%<v>0<Vmtype>, %1.<Vetype>[%2] - umov\t%0, %1.d[%2]" - [(set_attr "simd_type" "simd_dup") - (set_attr "simd_mode" "DI")] + dup\\t%<Vetype>0, %1.<Vetype>[%2] + umov\\t%<vw>0, %1.<Vetype>[%2]" + [(set_attr "simd_type" "simd_dup, simd_movgp") + (set_attr "simd_mode" "<MODE>")] ) (define_insn "aarch64_simd_dup<mode>" @@ -409,7 +398,7 @@ case 4: return "ins\t%0.d[0], %1"; case 5: return "mov\t%0, %1"; case 6: - return aarch64_output_simd_mov_immediate (&operands[1], + return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 64); default: gcc_unreachable (); } @@ -440,7 +429,7 @@ case 5: return "#"; case 6: - return aarch64_output_simd_mov_immediate (&operands[1], <MODE>mode, 128); + return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128); default: gcc_unreachable (); } @@ -1058,9 +1047,9 @@ (vec_duplicate:<VHALF> (const_int 0))))] "TARGET_SIMD" "@ - mov\\t%d0, %d1 - fmov\t%d0, %1 - dup\t%d0, %1" + dup\\t%d0, %1.d[0] + fmov\\t%d0, %1 + dup\\t%d0, %1" [(set_attr "v8type" "*,fmov,*") (set_attr "simd_type" "simd_dup,*,simd_dup") (set_attr "simd_mode" "<MODE>") @@ -1190,6 +1179,104 @@ ;; Widening arithmetic. +(define_insn "*aarch64_<su>mlal_lo<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (plus:<VWIDE> + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3)))) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "<MODE>")] +) + +(define_insn "*aarch64_<su>mlal_hi<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (plus:<VWIDE> + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3)))) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "<MODE>")] +) + +(define_insn "*aarch64_<su>mlsl_lo<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (minus:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3))))))] + "TARGET_SIMD" + "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "<MODE>")] +) + +(define_insn "*aarch64_<su>mlsl_hi<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (minus:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3))))))] + "TARGET_SIMD" + "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "<MODE>")] +) + +(define_insn "*aarch64_<su>mlal<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (plus:<VWIDE> + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (match_operand:VDW 1 "register_operand" "w")) + (ANY_EXTEND:<VWIDE> + (match_operand:VDW 2 "register_operand" "w"))) + (match_operand:<VWIDE> 3 "register_operand" "0")))] + "TARGET_SIMD" + "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "<MODE>")] +) + +(define_insn "*aarch64_<su>mlsl<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (minus:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (match_operand:VDW 2 "register_operand" "w")) + (ANY_EXTEND:<VWIDE> + (match_operand:VDW 3 "register_operand" "w")))))] + "TARGET_SIMD" + "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>" + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "<MODE>")] +) + (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> @@ -1611,6 +1698,15 @@ DONE; }) +(define_insn "clz<mode>2" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] + "TARGET_SIMD" + "clz\\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "simd_type" "simd_cls") + (set_attr "simd_mode" "<MODE>")] +) + ;; 'across lanes' max and min ops. (define_insn "reduc_<maxmin_uns>_<mode>" @@ -2209,15 +2305,29 @@ (set_attr "simd_mode" "<MODE>")] ) -(define_insn "aarch64_combine<mode>" +(define_insn_and_split "aarch64_combine<mode>" [(set (match_operand:<VDBL> 0 "register_operand" "=&w") (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w") (match_operand:VDC 2 "register_operand" "w")))] "TARGET_SIMD" - "mov\\t%0.d[0], %1.d[0]\;ins\\t%0.d[1], %2.d[0]" - [(set_attr "simd_type" "simd_ins") - (set_attr "simd_mode" "<MODE>")] -) + "#" + "&& reload_completed" + [(const_int 0)] +{ + aarch64_split_simd_combine (operands[0], operands[1], operands[2]); + DONE; +}) + +(define_expand "aarch64_simd_combine<mode>" + [(set (match_operand:<VDBL> 0 "register_operand" "=&w") + (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w") + (match_operand:VDC 2 "register_operand" "w")))] + "TARGET_SIMD" + { + emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1])); + emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2])); + DONE; + }) ;; <su><addsub>l<q>. @@ -3280,7 +3390,8 @@ (COMPARISONS:DI (match_operand:DI 1 "register_operand" "w,w,r") (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r") - )))] + ))) + (clobber (reg:CC CC_REGNUM))] "TARGET_SIMD" "@ cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> @@ -3291,15 +3402,7 @@ happening in the 'w' constraint cases. */ && GP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))" - [(set (reg:CC CC_REGNUM) - (compare:CC - (match_dup 1) - (match_dup 2))) - (set (match_dup 0) - (neg:DI - (COMPARISONS:DI - (match_operand 3 "cc_register" "") - (const_int 0))))] + [(const_int 0)] { enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]); rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); @@ -3332,7 +3435,8 @@ (UCOMPARISONS:DI (match_operand:DI 1 "register_operand" "w,r") (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r") - )))] + ))) + (clobber (reg:CC CC_REGNUM))] "TARGET_SIMD" "@ cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> @@ -3342,17 +3446,9 @@ happening in the 'w' constraint cases. */ && GP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))" - [(set (reg:CC CC_REGNUM) - (compare:CC - (match_dup 1) - (match_dup 2))) - (set (match_dup 0) - (neg:DI - (UCOMPARISONS:DI - (match_operand 3 "cc_register" "") - (const_int 0))))] + [(const_int 0)] { - enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]); + enum machine_mode mode = CCmode; rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); @@ -3385,7 +3481,8 @@ (and:DI (match_operand:DI 1 "register_operand" "w,r") (match_operand:DI 2 "register_operand" "w,r")) - (const_int 0))))] + (const_int 0)))) + (clobber (reg:CC CC_REGNUM))] "TARGET_SIMD" "@ cmtst\t%d0, %d1, %d2 @@ -3395,16 +3492,7 @@ happening in the 'w' constraint cases. */ && GP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))" - [(set (reg:CC_NZ CC_REGNUM) - (compare:CC_NZ - (and:DI (match_dup 1) - (match_dup 2)) - (const_int 0))) - (set (match_dup 0) - (neg:DI - (ne:DI - (match_operand 3 "cc_register" "") - (const_int 0))))] + [(const_int 0)] { rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index a0aff58668b..072f5401279 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1,4 +1,5 @@ -/* Machine description for AArch64 architecture. + +1;3201;0c/* Machine description for AArch64 architecture. Copyright (C) 2009-2013 Free Software Foundation, Inc. Contributed by ARM Ltd. @@ -87,6 +88,14 @@ struct aarch64_address_info { enum aarch64_symbol_type symbol_type; }; +struct simd_immediate_info +{ + rtx value; + int shift; + int element_width; + bool mvn; +}; + /* The current code model. */ enum aarch64_code_model aarch64_cmodel; @@ -103,8 +112,6 @@ static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode, static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_override_options_after_change (void); -static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *, - int *, unsigned char *, int *, int *); static bool aarch64_vector_mode_supported_p (enum machine_mode); static unsigned bit_count (unsigned HOST_WIDE_INT); static bool aarch64_const_vec_all_same_int_p (rtx, @@ -524,13 +531,15 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, return; } + case SYMBOL_TINY_ABSOLUTE: + emit_insn (gen_rtx_SET (Pmode, dest, imm)); + return; + case SYMBOL_SMALL_GOT: { rtx tmp_reg = dest; if (can_create_pseudo_p ()) - { - tmp_reg = gen_reg_rtx (Pmode); - } + tmp_reg = gen_reg_rtx (Pmode); emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm)); emit_insn (gen_ldr_got_small (dest, tmp_reg, imm)); return; @@ -692,6 +701,49 @@ aarch64_split_128bit_move_p (rtx dst, rtx src) || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src)))); } +/* Split a complex SIMD combine. */ + +void +aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2) +{ + enum machine_mode src_mode = GET_MODE (src1); + enum machine_mode dst_mode = GET_MODE (dst); + + gcc_assert (VECTOR_MODE_P (dst_mode)); + + if (REG_P (dst) && REG_P (src1) && REG_P (src2)) + { + rtx (*gen) (rtx, rtx, rtx); + + switch (src_mode) + { + case V8QImode: + gen = gen_aarch64_simd_combinev8qi; + break; + case V4HImode: + gen = gen_aarch64_simd_combinev4hi; + break; + case V2SImode: + gen = gen_aarch64_simd_combinev2si; + break; + case V2SFmode: + gen = gen_aarch64_simd_combinev2sf; + break; + case DImode: + gen = gen_aarch64_simd_combinedi; + break; + case DFmode: + gen = gen_aarch64_simd_combinedf; + break; + default: + gcc_unreachable (); + } + + emit_insn (gen (dst, src1, src2)); + return; + } +} + /* Split a complex SIMD move. */ void @@ -738,10 +790,10 @@ aarch64_split_simd_move (rtx dst, rtx src) } static rtx -aarch64_force_temporary (rtx x, rtx value) +aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value) { if (can_create_pseudo_p ()) - return force_reg (Pmode, value); + return force_reg (mode, value); else { x = aarch64_emit_move (x, value); @@ -753,15 +805,16 @@ aarch64_force_temporary (rtx x, rtx value) static rtx aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset) { - if (!aarch64_plus_immediate (GEN_INT (offset), DImode)) + if (!aarch64_plus_immediate (GEN_INT (offset), mode)) { rtx high; /* Load the full offset into a register. This might be improvable in the future. */ high = GEN_INT (offset); offset = 0; - high = aarch64_force_temporary (temp, high); - reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg)); + high = aarch64_force_temporary (mode, temp, high); + reg = aarch64_force_temporary (mode, temp, + gen_rtx_PLUS (mode, high, reg)); } return plus_constant (mode, reg, offset); } @@ -800,7 +853,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) && targetm.cannot_force_const_mem (mode, imm)) { gcc_assert(can_create_pseudo_p ()); - base = aarch64_force_temporary (dest, base); + base = aarch64_force_temporary (mode, dest, base); base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); aarch64_emit_move (dest, base); return; @@ -817,7 +870,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) if (offset != const0_rtx) { gcc_assert(can_create_pseudo_p ()); - base = aarch64_force_temporary (dest, base); + base = aarch64_force_temporary (mode, dest, base); base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); aarch64_emit_move (dest, base); return; @@ -826,6 +879,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) case SYMBOL_SMALL_TPREL: case SYMBOL_SMALL_ABSOLUTE: + case SYMBOL_TINY_ABSOLUTE: aarch64_load_symref_appropriately (dest, imm, sty); return; @@ -2634,12 +2688,14 @@ static bool aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) { rtx base, offset; + if (GET_CODE (x) == HIGH) return true; split_const (x, &base, &offset); if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF) - return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM); + return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) + != SYMBOL_FORCE_TO_MEM); return aarch64_tls_referenced_p (x); } @@ -3077,10 +3133,13 @@ aarch64_symbolic_address_p (rtx x) /* Classify the base of symbolic expression X, given that X appears in context CONTEXT. */ -static enum aarch64_symbol_type -aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context) + +enum aarch64_symbol_type +aarch64_classify_symbolic_expression (rtx x, + enum aarch64_symbol_context context) { rtx offset; + split_const (x, &x, &offset); return aarch64_classify_symbol (x, context); } @@ -3428,13 +3487,13 @@ aarch64_print_operand (FILE *f, rtx x, char code) break; case 'X': - /* Print integer constant in hex. */ + /* Print bottom 16 bits of integer constant in hex. */ if (GET_CODE (x) != CONST_INT) { output_operand_lossage ("invalid operand for '%%%c'", code); return; } - asm_fprintf (f, "0x%wx", UINTVAL (x)); + asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff); break; case 'w': @@ -5016,6 +5075,7 @@ aarch64_classify_tls_symbol (rtx x) /* Return the method that should be used to access SYMBOL_REF or LABEL_REF X in context CONTEXT. */ + enum aarch64_symbol_type aarch64_classify_symbol (rtx x, enum aarch64_symbol_context context ATTRIBUTE_UNUSED) @@ -5029,6 +5089,8 @@ aarch64_classify_symbol (rtx x, case AARCH64_CMODEL_TINY_PIC: case AARCH64_CMODEL_TINY: + return SYMBOL_TINY_ABSOLUTE; + case AARCH64_CMODEL_SMALL_PIC: case AARCH64_CMODEL_SMALL: return SYMBOL_SMALL_ABSOLUTE; @@ -5038,70 +5100,46 @@ aarch64_classify_symbol (rtx x, } } - gcc_assert (GET_CODE (x) == SYMBOL_REF); - - switch (aarch64_cmodel) + if (GET_CODE (x) == SYMBOL_REF) { - case AARCH64_CMODEL_LARGE: - return SYMBOL_FORCE_TO_MEM; - - case AARCH64_CMODEL_TINY: - case AARCH64_CMODEL_SMALL: - - /* This is needed to get DFmode, TImode constants to be loaded off - the constant pool. Is it necessary to dump TImode values into - the constant pool. We don't handle TImode constant loads properly - yet and hence need to use the constant pool. */ - if (CONSTANT_POOL_ADDRESS_P (x)) + if (aarch64_cmodel == AARCH64_CMODEL_LARGE + || CONSTANT_POOL_ADDRESS_P (x)) return SYMBOL_FORCE_TO_MEM; if (aarch64_tls_symbol_p (x)) return aarch64_classify_tls_symbol (x); - if (SYMBOL_REF_WEAK (x)) - return SYMBOL_FORCE_TO_MEM; - - return SYMBOL_SMALL_ABSOLUTE; - - case AARCH64_CMODEL_TINY_PIC: - case AARCH64_CMODEL_SMALL_PIC: - - if (CONSTANT_POOL_ADDRESS_P (x)) - return SYMBOL_FORCE_TO_MEM; + switch (aarch64_cmodel) + { + case AARCH64_CMODEL_TINY: + if (SYMBOL_REF_WEAK (x)) + return SYMBOL_FORCE_TO_MEM; + return SYMBOL_TINY_ABSOLUTE; - if (aarch64_tls_symbol_p (x)) - return aarch64_classify_tls_symbol (x); + case AARCH64_CMODEL_SMALL: + if (SYMBOL_REF_WEAK (x)) + return SYMBOL_FORCE_TO_MEM; + return SYMBOL_SMALL_ABSOLUTE; - if (!aarch64_symbol_binds_local_p (x)) - return SYMBOL_SMALL_GOT; + case AARCH64_CMODEL_TINY_PIC: + if (!aarch64_symbol_binds_local_p (x)) + return SYMBOL_SMALL_GOT; + return SYMBOL_TINY_ABSOLUTE; - return SYMBOL_SMALL_ABSOLUTE; + case AARCH64_CMODEL_SMALL_PIC: + if (!aarch64_symbol_binds_local_p (x)) + return SYMBOL_SMALL_GOT; + return SYMBOL_SMALL_ABSOLUTE; - default: - gcc_unreachable (); + default: + gcc_unreachable (); + } } + /* By default push everything into the constant pool. */ return SYMBOL_FORCE_TO_MEM; } -/* Return true if X is a symbolic constant that can be used in context - CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */ - -bool -aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context, - enum aarch64_symbol_type *symbol_type) -{ - rtx offset; - split_const (x, &x, &offset); - if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF) - *symbol_type = aarch64_classify_symbol (x, context); - else - return false; - - /* No checking of offset at this point. */ - return true; -} - bool aarch64_constant_address_p (rtx x) { @@ -5152,8 +5190,7 @@ aarch64_legitimate_constant_p (enum machine_mode mode, rtx x) /* This could probably go away because we now decompose CONST_INTs according to expand_mov_immediate. */ if ((GET_CODE (x) == CONST_VECTOR - && aarch64_simd_valid_immediate (x, mode, false, - NULL, NULL, NULL, NULL, NULL) != -1) + && aarch64_simd_valid_immediate (x, mode, false, NULL)) || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x)) return !targetm.cannot_force_const_mem (mode, x); @@ -5984,32 +6021,57 @@ aarch64_vector_mode_supported_p (enum machine_mode mode) return false; } -/* Return quad mode as the preferred SIMD mode. */ +/* Return appropriate SIMD container + for MODE within a vector of WIDTH bits. */ static enum machine_mode -aarch64_preferred_simd_mode (enum machine_mode mode) +aarch64_simd_container_mode (enum machine_mode mode, unsigned width) { + gcc_assert (width == 64 || width == 128); if (TARGET_SIMD) - switch (mode) - { - case DFmode: - return V2DFmode; - case SFmode: - return V4SFmode; - case SImode: - return V4SImode; - case HImode: - return V8HImode; - case QImode: - return V16QImode; - case DImode: - return V2DImode; - break; - - default:; - } + { + if (width == 128) + switch (mode) + { + case DFmode: + return V2DFmode; + case SFmode: + return V4SFmode; + case SImode: + return V4SImode; + case HImode: + return V8HImode; + case QImode: + return V16QImode; + case DImode: + return V2DImode; + default: + break; + } + else + switch (mode) + { + case SFmode: + return V2SFmode; + case SImode: + return V2SImode; + case HImode: + return V4HImode; + case QImode: + return V8QImode; + default: + break; + } + } return word_mode; } +/* Return 128-bit container as the preferred SIMD mode for MODE. */ +static enum machine_mode +aarch64_preferred_simd_mode (enum machine_mode mode) +{ + return aarch64_simd_container_mode (mode, 128); +} + /* Return the bitmask of possible vector sizes for the vectorizer to iterate over. */ static unsigned int @@ -6097,7 +6159,7 @@ aarch64_mangle_type (const_tree type) } /* Return the equivalent letter for size. */ -static unsigned char +static char sizetochar (int size) { switch (size) @@ -6144,15 +6206,10 @@ aarch64_vect_float_const_representable_p (rtx x) return aarch64_float_const_representable_p (x0); } -/* TODO: This function returns values similar to those - returned by neon_valid_immediate in gcc/config/arm/arm.c - but the API here is different enough that these magic numbers - are not used. It should be sufficient to return true or false. */ -static int -aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, - rtx *modconst, int *elementwidth, - unsigned char *elementchar, - int *mvn, int *shift) +/* Return true for valid and false for invalid. */ +bool +aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse, + struct simd_immediate_info *info) { #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \ matches = 1; \ @@ -6163,7 +6220,6 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, { \ immtype = (CLASS); \ elsize = (ELSIZE); \ - elchar = sizetochar (elsize); \ eshift = (SHIFT); \ emvn = (NEG); \ break; \ @@ -6172,36 +6228,25 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op); unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); unsigned char bytes[16]; - unsigned char elchar = 0; int immtype = -1, matches; unsigned int invmask = inverse ? 0xff : 0; int eshift, emvn; if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) { - bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode); - int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0))); - - if (!(simd_imm_zero - || aarch64_vect_float_const_representable_p (op))) - return -1; - - if (modconst) - *modconst = CONST_VECTOR_ELT (op, 0); - - if (elementwidth) - *elementwidth = elem_width; - - if (elementchar) - *elementchar = sizetochar (elem_width); + if (! (aarch64_simd_imm_zero_p (op, mode) + || aarch64_vect_float_const_representable_p (op))) + return false; - if (shift) - *shift = 0; + if (info) + { + info->value = CONST_VECTOR_ELT (op, 0); + info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value)); + info->mvn = false; + info->shift = 0; + } - if (simd_imm_zero) - return 19; - else - return 18; + return true; } /* Splat vector constant out into a byte vector. */ @@ -6299,23 +6344,14 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, if (immtype == -1 || (immtype >= 12 && immtype <= 15) || immtype == 18) - return -1; - - - if (elementwidth) - *elementwidth = elsize; - - if (elementchar) - *elementchar = elchar; - - if (mvn) - *mvn = emvn; - - if (shift) - *shift = eshift; + return false; - if (modconst) + if (info) { + info->element_width = elsize; + info->mvn = emvn != 0; + info->shift = eshift; + unsigned HOST_WIDE_INT imm = 0; /* Un-invert bytes of recognized vector, if necessary. */ @@ -6332,68 +6368,27 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0) << (i * BITS_PER_UNIT); - *modconst = GEN_INT (imm); - } - else - { - unsigned HOST_WIDE_INT imm = 0; - for (i = 0; i < elsize / BITS_PER_UNIT; i++) - imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); + info->value = GEN_INT (imm); + } + else + { + for (i = 0; i < elsize / BITS_PER_UNIT; i++) + imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); /* Construct 'abcdefgh' because the assembler cannot handle - generic constants. */ - gcc_assert (shift != NULL && mvn != NULL); - if (*mvn) + generic constants. */ + if (info->mvn) imm = ~imm; - imm = (imm >> *shift) & 0xff; - *modconst = GEN_INT (imm); - } + imm = (imm >> info->shift) & 0xff; + info->value = GEN_INT (imm); + } } - return immtype; + return true; #undef CHECK } -/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction - (or, implicitly, MVNI) immediate. Write back width per element - to *ELEMENTWIDTH, and a modified constant (whatever should be output - for a MOVI instruction) in *MODCONST. */ -int -aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode, - rtx *modconst, int *elementwidth, - unsigned char *elementchar, - int *mvn, int *shift) -{ - rtx tmpconst; - int tmpwidth; - unsigned char tmpwidthc; - int tmpmvn = 0, tmpshift = 0; - int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst, - &tmpwidth, &tmpwidthc, - &tmpmvn, &tmpshift); - - if (retval == -1) - return 0; - - if (modconst) - *modconst = tmpconst; - - if (elementwidth) - *elementwidth = tmpwidth; - - if (elementchar) - *elementchar = tmpwidthc; - - if (mvn) - *mvn = tmpmvn; - - if (shift) - *shift = tmpshift; - - return 1; -} - static bool aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT minval, @@ -6455,6 +6450,25 @@ aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED) return true; } +bool +aarch64_mov_operand_p (rtx x, + enum aarch64_symbol_context context, + enum machine_mode mode) +{ + if (GET_CODE (x) == HIGH + && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) + return true; + + if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode)) + return true; + + if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x)) + return true; + + return aarch64_classify_symbolic_expression (x, context) + == SYMBOL_TINY_ABSOLUTE; +} + /* Return a const_int vector of VAL. */ rtx aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val) @@ -6479,9 +6493,7 @@ aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode) gcc_assert (!VECTOR_MODE_P (mode)); vmode = aarch64_preferred_simd_mode (mode); rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op)); - int retval = aarch64_simd_immediate_valid_for_move (op_v, vmode, 0, - NULL, NULL, NULL, NULL); - return retval; + return aarch64_simd_valid_immediate (op_v, vmode, false, NULL); } /* Construct and return a PARALLEL RTX vector. */ @@ -6709,8 +6721,7 @@ aarch64_simd_make_constant (rtx vals) gcc_unreachable (); if (const_vec != NULL_RTX - && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL, - NULL, NULL, NULL)) + && aarch64_simd_valid_immediate (const_vec, mode, false, NULL)) /* Load using MOVI/MVNI. */ return const_vec; else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX) @@ -7268,49 +7279,78 @@ aarch64_float_const_representable_p (rtx x) } char* -aarch64_output_simd_mov_immediate (rtx *const_vector, +aarch64_output_simd_mov_immediate (rtx const_vector, enum machine_mode mode, unsigned width) { - int is_valid; - unsigned char widthc; - int lane_width_bits; + bool is_valid; static char templ[40]; - int shift = 0, mvn = 0; const char *mnemonic; unsigned int lane_count = 0; + char element_char; + + struct simd_immediate_info info; - is_valid = - aarch64_simd_immediate_valid_for_move (*const_vector, mode, - const_vector, &lane_width_bits, - &widthc, &mvn, &shift); + /* This will return true to show const_vector is legal for use as either + a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will + also update INFO to show how the immediate should be generated. */ + is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info); gcc_assert (is_valid); + element_char = sizetochar (info.element_width); + lane_count = width / info.element_width; + mode = GET_MODE_INNER (mode); if (mode == SFmode || mode == DFmode) { - bool zero_p = - aarch64_float_const_zero_rtx_p (*const_vector); - gcc_assert (shift == 0); - mnemonic = zero_p ? "movi" : "fmov"; + gcc_assert (info.shift == 0 && ! info.mvn); + if (aarch64_float_const_zero_rtx_p (info.value)) + info.value = GEN_INT (0); + else + { +#define buf_size 20 + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, info.value); + char float_buf[buf_size] = {'\0'}; + real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode); +#undef buf_size + + if (lane_count == 1) + snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf); + else + snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s", + lane_count, element_char, float_buf); + return templ; + } } - else - mnemonic = mvn ? "mvni" : "movi"; - gcc_assert (lane_width_bits != 0); - lane_count = width / lane_width_bits; + mnemonic = info.mvn ? "mvni" : "movi"; if (lane_count == 1) - snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic); - else if (shift) - snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d", - mnemonic, lane_count, widthc, shift); + snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX, + mnemonic, UINTVAL (info.value)); + else if (info.shift) + snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX + ", lsl %d", mnemonic, lane_count, element_char, + UINTVAL (info.value), info.shift); else - snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1", - mnemonic, lane_count, widthc); + snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX, + mnemonic, lane_count, element_char, UINTVAL (info.value)); return templ; } +char* +aarch64_output_scalar_simd_mov_immediate (rtx immediate, + enum machine_mode mode) +{ + enum machine_mode vmode; + + gcc_assert (!VECTOR_MODE_P (mode)); + vmode = aarch64_simd_container_mode (mode, 64); + rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate)); + return aarch64_output_simd_mov_immediate (v_op, vmode, 64); +} + /* Split operands into moves from op[1] + op[2] into op[0]. */ void diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index b27bcdaa97c..e88e5be894e 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -774,17 +774,34 @@ (match_operand:SHORT 1 "general_operand" " r,M,D<hq>,m, m,rZ,*w,*w, r,*w"))] "(register_operand (operands[0], <MODE>mode) || aarch64_reg_or_zero (operands[1], <MODE>mode))" - "@ - mov\\t%w0, %w1 - mov\\t%w0, %1 - movi\\t%0.<Vallxd>, %1 - ldr<size>\\t%w0, %1 - ldr\\t%<size>0, %1 - str<size>\\t%w1, %0 - str\\t%<size>1, %0 - umov\\t%w0, %1.<v>[0] - dup\\t%0.<Vallxd>, %w1 - dup\\t%0, %1.<v>[0]" +{ + switch (which_alternative) + { + case 0: + return "mov\t%w0, %w1"; + case 1: + return "mov\t%w0, %1"; + case 2: + return aarch64_output_scalar_simd_mov_immediate (operands[1], + <MODE>mode); + case 3: + return "ldr<size>\t%w0, %1"; + case 4: + return "ldr\t%<size>0, %1"; + case 5: + return "str<size>\t%w1, %0"; + case 6: + return "str\t%<size>1, %0"; + case 7: + return "umov\t%w0, %1.<v>[0]"; + case 8: + return "dup\t%0.<Vallxd>, %w1"; + case 9: + return "dup\t%0, %1.<v>[0]"; + default: + gcc_unreachable (); + } +} [(set_attr "v8type" "move,alu,alu,load1,load1,store1,store1,*,*,*") (set_attr "simd_type" "*,*,simd_move_imm,*,*,*,*,simd_movgp,simd_dupgp,simd_dup") (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes") @@ -829,8 +846,8 @@ ) (define_insn "*movdi_aarch64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r, r, *w, r,*w,w") - (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,Usa,Ush,rZ,*w,*w,Dd"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r, *w, r,*w,w") + (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))] "(register_operand (operands[0], DImode) || aarch64_reg_or_zero (operands[1], DImode))" "@ @@ -850,7 +867,8 @@ movi\\t%d0, %1" [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov,fmov") (set_attr "mode" "DI") - (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,yes")] + (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") + (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] ) (define_insn "insv_imm<mode>" @@ -858,9 +876,8 @@ (const_int 16) (match_operand:GPI 1 "const_int_operand" "n")) (match_operand:GPI 2 "const_int_operand" "n"))] - "INTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode) - && INTVAL (operands[1]) % 16 == 0 - && UINTVAL (operands[2]) <= 0xffff" + "UINTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode) + && UINTVAL (operands[1]) % 16 == 0" "movk\\t%<w>0, %X2, lsl %1" [(set_attr "v8type" "movk") (set_attr "mode" "<MODE>")] @@ -3164,6 +3181,50 @@ (set_attr "mode" "<MODE>")] ) +;; Bitfield Insert (insv) +(define_expand "insv<mode>" + [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand") + (match_operand 1 "const_int_operand") + (match_operand 2 "const_int_operand")) + (match_operand:GPI 3 "general_operand"))] + "" +{ + unsigned HOST_WIDE_INT width = UINTVAL (operands[1]); + unsigned HOST_WIDE_INT pos = UINTVAL (operands[2]); + rtx value = operands[3]; + + if (width == 0 || (pos + width) > GET_MODE_BITSIZE (<MODE>mode)) + FAIL; + + if (CONST_INT_P (value)) + { + unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT)1 << width) - 1; + + /* Prefer AND/OR for inserting all zeros or all ones. */ + if ((UINTVAL (value) & mask) == 0 + || (UINTVAL (value) & mask) == mask) + FAIL; + + /* 16-bit aligned 16-bit wide insert is handled by insv_imm. */ + if (width == 16 && (pos % 16) == 0) + DONE; + } + operands[3] = force_reg (<MODE>mode, value); +}) + +(define_insn "*insv_reg<mode>" + [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") + (match_operand 1 "const_int_operand" "n") + (match_operand 2 "const_int_operand" "n")) + (match_operand:GPI 3 "register_operand" "r"))] + "!(UINTVAL (operands[1]) == 0 + || (UINTVAL (operands[2]) + UINTVAL (operands[1]) + > GET_MODE_BITSIZE (<MODE>mode)))" + "bfi\\t%<w>0, %<w>3, %2, %1" + [(set_attr "v8type" "bfm") + (set_attr "mode" "<MODE>")] +) + (define_insn "*<optab><ALLX:mode>_shft_<GPI:mode>" [(set (match_operand:GPI 0 "register_operand" "=r") (ashift:GPI (ANY_EXTEND:GPI diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 608db35b3dd..760ba3dc1e1 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -20234,49 +20234,49 @@ vcvtpq_u64_f64 (float64x2_t __a) __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) vdupb_lane_s8 (int8x16_t a, int const b) { - return __builtin_aarch64_dup_laneqi (a, b); + return __builtin_aarch64_dup_lane_scalarv16qi (a, b); } __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) vdupb_lane_u8 (uint8x16_t a, int const b) { - return (uint8x1_t) __builtin_aarch64_dup_laneqi ((int8x16_t) a, b); + return (uint8x1_t) __builtin_aarch64_dup_lane_scalarv16qi ((int8x16_t) a, b); } __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) vduph_lane_s16 (int16x8_t a, int const b) { - return __builtin_aarch64_dup_lanehi (a, b); + return __builtin_aarch64_dup_lane_scalarv8hi (a, b); } __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) vduph_lane_u16 (uint16x8_t a, int const b) { - return (uint16x1_t) __builtin_aarch64_dup_lanehi ((int16x8_t) a, b); + return (uint16x1_t) __builtin_aarch64_dup_lane_scalarv8hi ((int16x8_t) a, b); } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) vdups_lane_s32 (int32x4_t a, int const b) { - return __builtin_aarch64_dup_lanesi (a, b); + return __builtin_aarch64_dup_lane_scalarv4si (a, b); } __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) vdups_lane_u32 (uint32x4_t a, int const b) { - return (uint32x1_t) __builtin_aarch64_dup_lanesi ((int32x4_t) a, b); + return (uint32x1_t) __builtin_aarch64_dup_lane_scalarv4si ((int32x4_t) a, b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vdupd_lane_s64 (int64x2_t a, int const b) { - return __builtin_aarch64_dup_lanedi (a, b); + return __builtin_aarch64_dup_lane_scalarv2di (a, b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vdupd_lane_u64 (uint64x2_t a, int const b) { - return (uint64x1_t) __builtin_aarch64_dup_lanedi ((int64x2_t) a, b); + return (uint64x1_t) __builtin_aarch64_dup_lane_scalarv2di ((int64x2_t) a, b); } /* vldn */ diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index 18ac16a3160..7cafc08fdd9 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -75,11 +75,6 @@ "Integer constant zero." (match_test "op == const0_rtx")) -(define_constraint "Usa" - "A constraint that matches an absolute symbolic address." - (and (match_code "const,symbol_ref") - (match_test "aarch64_symbolic_address_p (op)"))) - (define_constraint "Ush" "A constraint that matches an absolute symbolic address high part." (and (match_code "high") @@ -148,9 +143,8 @@ "@internal A constraint that matches vector of immediates." (and (match_code "const_vector") - (match_test "aarch64_simd_immediate_valid_for_move (op, GET_MODE (op), - NULL, NULL, NULL, - NULL, NULL) != 0"))) + (match_test "aarch64_simd_valid_immediate (op, GET_MODE (op), + false, NULL)"))) (define_constraint "Dh" "@internal diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 860d4d9a187..8e40c5de5d4 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -385,7 +385,8 @@ ;; Double modes of vector modes (lower case). (define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi") (V2SI "v4si") (V2SF "v4sf") - (SI "v2si") (DI "v2di")]) + (SI "v2si") (DI "v2di") + (DF "v2df")]) ;; Narrowed modes for VDN. (define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI") diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 8514e8f8fbd..3e2b6b34357 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -115,16 +115,11 @@ (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL, 0)"))) -(define_predicate "aarch64_const_address" - (and (match_code "symbol_ref") - (match_test "mode == DImode && CONSTANT_ADDRESS_P (op)"))) - (define_predicate "aarch64_valid_symref" (match_code "const, symbol_ref, label_ref") { - enum aarch64_symbol_type symbol_type; - return (aarch64_symbolic_constant_p (op, SYMBOL_CONTEXT_ADR, &symbol_type) - && symbol_type != SYMBOL_FORCE_TO_MEM); + return (aarch64_classify_symbolic_expression (op, SYMBOL_CONTEXT_ADR) + != SYMBOL_FORCE_TO_MEM); }) (define_predicate "aarch64_tls_ie_symref" @@ -170,15 +165,10 @@ }) (define_predicate "aarch64_mov_operand" - (and (match_code "reg,subreg,mem,const_int,symbol_ref,high") + (and (match_code "reg,subreg,mem,const,const_int,symbol_ref,label_ref,high") (ior (match_operand 0 "register_operand") (ior (match_operand 0 "memory_operand") - (ior (match_test "GET_CODE (op) == HIGH - && aarch64_valid_symref (XEXP (op, 0), - GET_MODE (XEXP (op, 0)))") - (ior (match_test "CONST_INT_P (op) - && aarch64_move_imm (INTVAL (op), mode)") - (match_test "aarch64_const_address (op, mode)"))))))) + (match_test "aarch64_mov_operand_p (op, SYMBOL_CONTEXT_ADR, mode)"))))) (define_predicate "aarch64_movti_operand" (and (match_code "reg,subreg,mem,const_int") diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c index 69007a93385..5f5b33e347b 100644 --- a/gcc/config/alpha/alpha.c +++ b/gcc/config/alpha/alpha.c @@ -2700,12 +2700,12 @@ alpha_emit_conditional_move (rtx cmp, enum machine_mode mode) break; case GE: case GT: case GEU: case GTU: - /* These must be swapped. */ - if (op1 != CONST0_RTX (cmp_mode)) - { - code = swap_condition (code); - tem = op0, op0 = op1, op1 = tem; - } + /* These normally need swapping, but for integer zero we have + special patterns that recognize swapped operands. */ + if (cmp_mode == DImode && op1 == const0_rtx) + break; + code = swap_condition (code); + tem = op0, op0 = op1, op1 = tem; break; default: @@ -3067,12 +3067,9 @@ alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1) operands[1] = op1; out = gen_reg_rtx (DImode); - /* What's actually returned is -1,0,1, not a proper boolean value, - so use an EXPR_LIST as with a generic libcall instead of a - comparison type expression. */ - note = gen_rtx_EXPR_LIST (VOIDmode, op1, NULL_RTX); - note = gen_rtx_EXPR_LIST (VOIDmode, op0, note); - note = gen_rtx_EXPR_LIST (VOIDmode, func, note); + /* What's actually returned is -1,0,1, not a proper boolean value. */ + note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1); + note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE); alpha_emit_xfloating_libcall (func, out, operands, 2, note); return out; diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md index 439752780a0..b020b457df2 100644 --- a/gcc/config/alpha/alpha.md +++ b/gcc/config/alpha/alpha.md @@ -23,6 +23,7 @@ ;; Uses of UNSPEC in this file: (define_c_enum "unspec" [ + UNSPEC_XFLT_COMPARE UNSPEC_ARG_HOME UNSPEC_LDGP1 UNSPEC_INSXH diff --git a/gcc/config/arm/arm-fixed.md b/gcc/config/arm/arm-fixed.md index 10da396ab66..12bbbaf9083 100644 --- a/gcc/config/arm/arm-fixed.md +++ b/gcc/config/arm/arm-fixed.md @@ -19,12 +19,13 @@ ;; This file contains ARM instructions that support fixed-point operations. (define_insn "add<mode>3" - [(set (match_operand:FIXED 0 "s_register_operand" "=r") - (plus:FIXED (match_operand:FIXED 1 "s_register_operand" "r") - (match_operand:FIXED 2 "s_register_operand" "r")))] + [(set (match_operand:FIXED 0 "s_register_operand" "=l,r") + (plus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r") + (match_operand:FIXED 2 "s_register_operand" "l,r")))] "TARGET_32BIT" "add%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no")]) (define_insn "add<mode>3" [(set (match_operand:ADDSUB 0 "s_register_operand" "=r") @@ -32,7 +33,8 @@ (match_operand:ADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "sadd<qaddsub_suf>%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "usadd<mode>3" [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r") @@ -40,7 +42,8 @@ (match_operand:UQADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "uqadd<qaddsub_suf>%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "ssadd<mode>3" [(set (match_operand:QADDSUB 0 "s_register_operand" "=r") @@ -48,15 +51,17 @@ (match_operand:QADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "qadd<qaddsub_suf>%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "sub<mode>3" - [(set (match_operand:FIXED 0 "s_register_operand" "=r") - (minus:FIXED (match_operand:FIXED 1 "s_register_operand" "r") - (match_operand:FIXED 2 "s_register_operand" "r")))] + [(set (match_operand:FIXED 0 "s_register_operand" "=l,r") + (minus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r") + (match_operand:FIXED 2 "s_register_operand" "l,r")))] "TARGET_32BIT" "sub%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no")]) (define_insn "sub<mode>3" [(set (match_operand:ADDSUB 0 "s_register_operand" "=r") @@ -64,7 +69,8 @@ (match_operand:ADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "ssub<qaddsub_suf>%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "ussub<mode>3" [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r") @@ -73,7 +79,8 @@ (match_operand:UQADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "uqsub<qaddsub_suf>%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "sssub<mode>3" [(set (match_operand:QADDSUB 0 "s_register_operand" "=r") @@ -81,7 +88,8 @@ (match_operand:QADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "qsub<qaddsub_suf>%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) ;; Fractional multiplies. @@ -374,6 +382,7 @@ "TARGET_32BIT && arm_arch6" "ssat%?\\t%0, #16, %2%S1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "insn" "sat") (set_attr "shift" "1") (set_attr "type" "alu_shift")]) @@ -384,4 +393,5 @@ "TARGET_INT_SIMD" "usat%?\\t%0, #16, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "insn" "sat")]) diff --git a/gcc/config/arm/arm-generic.md b/gcc/config/arm/arm-generic.md index 9705f751ae6..8a3335055d1 100644 --- a/gcc/config/arm/arm-generic.md +++ b/gcc/config/arm/arm-generic.md @@ -114,7 +114,9 @@ (define_insn_reservation "mult" 16 (and (eq_attr "generic_sched" "yes") - (and (eq_attr "ldsched" "no") (eq_attr "type" "mult"))) + (and (eq_attr "ldsched" "no") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes")))) "core*16") (define_insn_reservation "mult_ldsched_strongarm" 3 @@ -122,7 +124,8 @@ (and (eq_attr "ldsched" "yes") (and (eq_attr "tune" "strongarm,strongarm110,strongarm1100,strongarm1110") - (eq_attr "type" "mult")))) + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))))) "core*2") (define_insn_reservation "mult_ldsched" 4 @@ -130,13 +133,17 @@ (and (eq_attr "ldsched" "yes") (and (eq_attr "tune" "!strongarm,strongarm110,strongarm1100,strongarm1110") - (eq_attr "type" "mult")))) + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))))) "core*4") (define_insn_reservation "multi_cycle" 32 (and (eq_attr "generic_sched" "yes") (and (eq_attr "core_cycles" "multi") - (eq_attr "type" "!mult,load_byte,load1,load2,load3,load4,store1,store2,store3,store4"))) + (and (eq_attr "type" "!load_byte,load1,load2,load3,load4,\ + store1,store2,store3,store4") + (not (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes")))))) "core*32") (define_insn_reservation "single_cycle" 1 diff --git a/gcc/config/arm/arm-ldmstm.ml b/gcc/config/arm/arm-ldmstm.ml index 2bc9702bee2..e615437b125 100644 --- a/gcc/config/arm/arm-ldmstm.ml +++ b/gcc/config/arm/arm-ldmstm.ml @@ -146,12 +146,15 @@ let can_thumb addrmode update is_store = | IA, true, true -> true | _ -> false +exception InvalidAddrMode of string;; + let target addrmode thumb = match addrmode, thumb with IA, true -> "TARGET_THUMB1" | IA, false -> "TARGET_32BIT" | DB, false -> "TARGET_32BIT" | _, false -> "TARGET_ARM" + | _, _ -> raise (InvalidAddrMode "ERROR: Invalid Addressing mode for Thumb1.") let write_pattern_1 name ls addrmode nregs write_set_fn update thumb = let astr = string_of_addrmode addrmode in @@ -181,8 +184,10 @@ let write_pattern_1 name ls addrmode nregs write_set_fn update thumb = done; Printf.printf "}\"\n"; Printf.printf " [(set_attr \"type\" \"%s%d\")" ls nregs; - begin if not thumb then + if not thumb then begin Printf.printf "\n (set_attr \"predicable\" \"yes\")"; + if addrmode == IA || addrmode == DB then + Printf.printf "\n (set_attr \"predicable_short_it\" \"no\")"; end; Printf.printf "])\n\n" diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index c791341f69b..ef94bbcea25 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -24,12 +24,13 @@ extern enum unwind_info_type arm_except_unwind_info (struct gcc_options *); extern int use_return_insn (int, rtx); +extern bool use_simple_return_p (void); extern enum reg_class arm_regno_class (int); extern void arm_load_pic_register (unsigned long); extern int arm_volatile_func (void); extern void arm_expand_prologue (void); extern void arm_expand_epilogue (bool); -extern void thumb2_expand_return (void); +extern void thumb2_expand_return (bool); extern const char *arm_strip_name_encoding (const char *); extern void arm_asm_output_labelref (FILE *, const char *); extern void thumb2_asm_output_opcode (FILE *); @@ -94,7 +95,7 @@ extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx, extern bool arm_tls_referenced_p (rtx); extern int arm_coproc_mem_operand (rtx, bool); -extern int neon_vector_mem_operand (rtx, int); +extern int neon_vector_mem_operand (rtx, int, bool); extern int neon_struct_mem_operand (rtx); extern int arm_no_early_store_addr_dep (rtx, rtx); extern int arm_early_store_addr_dep (rtx, rtx); @@ -227,6 +228,8 @@ extern const char *arm_mangle_type (const_tree); extern void arm_order_regs_for_local_alloc (void); +extern int arm_max_conditional_execute (); + /* Vectorizer cost model implementation. */ struct cpu_vec_costs { const int scalar_stmt_cost; /* Cost of any scalar operation, excluding @@ -256,8 +259,7 @@ struct tune_params bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool); bool (*sched_adjust_cost) (rtx, rtx, rtx, int *); int constant_limit; - /* Maximum number of instructions to conditionalise in - arm_final_prescan_insn. */ + /* Maximum number of instructions to conditionalise. */ int max_insns_skipped; int num_prefetch_slots; int l1_cache_size; diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 44286926eb6..e6fd42079cb 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -23,6 +23,7 @@ #include "config.h" #include "system.h" #include "coretypes.h" +#include "hash-table.h" #include "tm.h" #include "rtl.h" #include "tree.h" @@ -661,6 +662,10 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_ASAN_SHADOW_OFFSET #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset +#undef MAX_INSN_PER_IT_BLOCK +#define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4) + + struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -1054,7 +1059,7 @@ const struct tune_params arm_cortex_a15_tune = arm_9e_rtx_costs, NULL, 1, /* Constant limit. */ - 5, /* Max cond insns. */ + 2, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ arm_default_branch_cost, @@ -1870,6 +1875,11 @@ arm_option_override (void) arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0; arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0; arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; + if (arm_restrict_it == 2) + arm_restrict_it = arm_arch8 && TARGET_THUMB2; + + if (!TARGET_THUMB2) + arm_restrict_it = 0; /* If we are not using the default (ARM mode) section anchor offset ranges, then set the correct ranges now. */ @@ -2168,6 +2178,14 @@ arm_option_override (void) global_options.x_param_values, global_options_set.x_param_values); + /* Disable shrink-wrap when optimizing function for size, since it tends to + generate additional returns. */ + if (optimize_function_for_size_p (cfun) && TARGET_THUMB2) + flag_shrink_wrap = false; + /* TBD: Dwarf info for apcs frame is not handled yet. */ + if (TARGET_APCS_FRAME) + flag_shrink_wrap = false; + /* Register global variables with the garbage collector. */ arm_add_gc_roots (); } @@ -2517,6 +2535,18 @@ use_return_insn (int iscond, rtx sibling) return 1; } +/* Return TRUE if we should try to use a simple_return insn, i.e. perform + shrink-wrapping if possible. This is the case if we need to emit a + prologue, which we can test by looking at the offsets. */ +bool +use_simple_return_p (void) +{ + arm_stack_offsets *offsets; + + offsets = arm_get_frame_offsets (); + return offsets->outgoing_args != 0; +} + /* Return TRUE if int I is a valid immediate ARM constant. */ int @@ -2656,6 +2686,8 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code) switch (code) { case AND: + case IOR: + case XOR: return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF) && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF); case PLUS: @@ -3816,36 +3848,48 @@ arm_function_value(const_tree type, const_tree func, return arm_libcall_value_1 (mode); } -static int -libcall_eq (const void *p1, const void *p2) +/* libcall hashtable helpers. */ + +struct libcall_hasher : typed_noop_remove <rtx_def> +{ + typedef rtx_def value_type; + typedef rtx_def compare_type; + static inline hashval_t hash (const value_type *); + static inline bool equal (const value_type *, const compare_type *); + static inline void remove (value_type *); +}; + +inline bool +libcall_hasher::equal (const value_type *p1, const compare_type *p2) { - return rtx_equal_p ((const_rtx) p1, (const_rtx) p2); + return rtx_equal_p (p1, p2); } -static hashval_t -libcall_hash (const void *p1) +inline hashval_t +libcall_hasher::hash (const value_type *p1) { - return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE); + return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE); } +typedef hash_table <libcall_hasher> libcall_table_type; + static void -add_libcall (htab_t htab, rtx libcall) +add_libcall (libcall_table_type htab, rtx libcall) { - *htab_find_slot (htab, libcall, INSERT) = libcall; + *htab.find_slot (libcall, INSERT) = libcall; } static bool arm_libcall_uses_aapcs_base (const_rtx libcall) { static bool init_done = false; - static htab_t libcall_htab; + static libcall_table_type libcall_htab; if (!init_done) { init_done = true; - libcall_htab = htab_create (31, libcall_hash, libcall_eq, - NULL); + libcall_htab.create (31); add_libcall (libcall_htab, convert_optab_libfunc (sfloat_optab, SFmode, SImode)); add_libcall (libcall_htab, @@ -3904,7 +3948,7 @@ arm_libcall_uses_aapcs_base (const_rtx libcall) DFmode)); } - return libcall && htab_find (libcall_htab, libcall) != NULL; + return libcall && libcall_htab.find (libcall) != NULL; } static rtx @@ -7819,7 +7863,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) && GET_CODE (SET_SRC (x)) == VEC_SELECT) { *total = rtx_cost (SET_DEST (x), code, 0, speed); - if (!neon_vector_mem_operand (SET_DEST (x), 2)) + if (!neon_vector_mem_operand (SET_DEST (x), 2, true)) *total += COSTS_N_INSNS (1); return true; } @@ -7830,7 +7874,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) { rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0); *total = rtx_cost (mem, code, 0, speed); - if (!neon_vector_mem_operand (mem, 2)) + if (!neon_vector_mem_operand (mem, 2, true)) *total += COSTS_N_INSNS (1); return true; } @@ -9101,6 +9145,12 @@ arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost) return cost; } +int +arm_max_conditional_execute (void) +{ + return max_insns_skipped; +} + static int arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED) { @@ -10002,7 +10052,7 @@ arm_coproc_mem_operand (rtx op, bool wb) 2 - Element/structure loads (vld1) */ int -neon_vector_mem_operand (rtx op, int type) +neon_vector_mem_operand (rtx op, int type, bool strict) { rtx ind; @@ -10014,7 +10064,7 @@ neon_vector_mem_operand (rtx op, int type) || reg_mentioned_p (virtual_outgoing_args_rtx, op) || reg_mentioned_p (virtual_stack_dynamic_rtx, op) || reg_mentioned_p (virtual_stack_vars_rtx, op))) - return FALSE; + return !strict; /* Constants are converted into offsets from labels. */ if (!MEM_P (op)) @@ -10124,7 +10174,7 @@ coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb) { if (!TARGET_NEON_FP16) return GENERAL_REGS; - if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2)) + if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true)) return NO_REGS; return GENERAL_REGS; } @@ -16135,25 +16185,34 @@ arm_compute_save_reg0_reg12_mask (void) return save_reg_mask; } +/* Return true if r3 is live at the start of the function. */ + +static bool +arm_r3_live_at_start_p (void) +{ + /* Just look at cfg info, which is still close enough to correct at this + point. This gives false positives for broken functions that might use + uninitialized data that happens to be allocated in r3, but who cares? */ + return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 3); +} /* Compute the number of bytes used to store the static chain register on the - stack, above the stack frame. We need to know this accurately to get the - alignment of the rest of the stack frame correct. */ + stack, above the stack frame. We need to know this accurately to get the + alignment of the rest of the stack frame correct. */ -static int arm_compute_static_chain_stack_bytes (void) +static int +arm_compute_static_chain_stack_bytes (void) { - unsigned long func_type = arm_current_func_type (); - int static_chain_stack_bytes = 0; - - if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM && - IS_NESTED (func_type) && - df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0) - static_chain_stack_bytes = 4; + /* See the defining assertion in arm_expand_prologue. */ + if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM + && IS_NESTED (arm_current_func_type ()) + && arm_r3_live_at_start_p () + && crtl->args.pretend_args_size == 0) + return 4; - return static_chain_stack_bytes; + return 0; } - /* Compute a bit mask of which registers need to be saved on the stack for the current function. This is used by arm_get_frame_offsets, which may add extra registers. */ @@ -17122,6 +17181,19 @@ emit_multi_reg_push (unsigned long mask) return par; } +/* Add a REG_CFA_ADJUST_CFA REG note to INSN. + SIZE is the offset to be adjusted. + DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */ +static void +arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src) +{ + rtx dwarf; + + RTX_FRAME_RELATED_P (insn) = 1; + dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size)); + add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf); +} + /* Generate and emit an insn pattern that we will recognize as a pop_multi. SAVED_REGS_MASK shows which registers need to be restored. @@ -17212,6 +17284,9 @@ arm_emit_multi_reg_pop (unsigned long saved_regs_mask) par = emit_insn (par); REG_NOTES (par) = dwarf; + if (!return_in_pc) + arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs, + stack_pointer_rtx, stack_pointer_rtx); } /* Generate and emit an insn pattern that we will recognize as a pop_multi @@ -17282,6 +17357,9 @@ arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg) par = emit_insn (par); REG_NOTES (par) = dwarf; + + arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs, + base_reg, base_reg); } /* Generate and emit a pattern that will be recognized as LDRD pattern. If even @@ -17357,6 +17435,7 @@ thumb2_emit_ldrd_pop (unsigned long saved_regs_mask) pattern can be emitted now. */ par = emit_insn (par); REG_NOTES (par) = dwarf; + RTX_FRAME_RELATED_P (par) = 1; } i++; @@ -17373,7 +17452,12 @@ thumb2_emit_ldrd_pop (unsigned long saved_regs_mask) stack_pointer_rtx, plus_constant (Pmode, stack_pointer_rtx, 4 * i)); RTX_FRAME_RELATED_P (tmp) = 1; - emit_insn (tmp); + tmp = emit_insn (tmp); + if (!return_in_pc) + { + arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i, + stack_pointer_rtx, stack_pointer_rtx); + } dwarf = NULL_RTX; @@ -17407,9 +17491,11 @@ thumb2_emit_ldrd_pop (unsigned long saved_regs_mask) else { par = emit_insn (tmp); + REG_NOTES (par) = dwarf; + arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD, + stack_pointer_rtx, stack_pointer_rtx); } - REG_NOTES (par) = dwarf; } else if ((num_regs % 2) == 1 && return_in_pc) { @@ -17568,11 +17654,27 @@ thumb_force_lr_save (void) || df_regs_ever_live_p (LR_REGNUM)); } +/* We do not know if r3 will be available because + we do have an indirect tailcall happening in this + particular case. */ +static bool +is_indirect_tailcall_p (rtx call) +{ + rtx pat = PATTERN (call); + + /* Indirect tail call. */ + pat = XVECEXP (pat, 0, 0); + if (GET_CODE (pat) == SET) + pat = SET_SRC (pat); + + pat = XEXP (XEXP (pat, 0), 0); + return REG_P (pat); +} /* Return true if r3 is used by any of the tail call insns in the current function. */ static bool -any_sibcall_uses_r3 (void) +any_sibcall_could_use_r3 (void) { edge_iterator ei; edge e; @@ -17586,7 +17688,8 @@ any_sibcall_uses_r3 (void) if (!CALL_P (call)) call = prev_nonnote_nondebug_insn (call); gcc_assert (CALL_P (call) && SIBLING_CALL_P (call)); - if (find_regno_fusage (call, USE, 3)) + if (find_regno_fusage (call, USE, 3) + || is_indirect_tailcall_p (call)) return true; } return false; @@ -17753,7 +17856,7 @@ arm_get_frame_offsets (void) /* If it is safe to use r3, then do so. This sometimes generates better code on Thumb-2 by avoiding the need to use 32-bit push/pop instructions. */ - if (! any_sibcall_uses_r3 () + if (! any_sibcall_could_use_r3 () && arm_size_return_regs () <= 12 && (offsets->saved_regs_mask & (1 << 3)) == 0 && (TARGET_THUMB2 || !current_tune->prefer_ldrd_strd)) @@ -18064,16 +18167,16 @@ arm_expand_prologue (void) } else if (IS_NESTED (func_type)) { - /* The Static chain register is the same as the IP register + /* The static chain register is the same as the IP register used as a scratch register during stack frame creation. To get around this need to find somewhere to store IP whilst the frame is being created. We try the following places in order: - 1. The last argument register. + 1. The last argument register r3. 2. A slot on the stack above the frame. (This only works if the function is not a varargs function). - 3. Register r3, after pushing the argument registers + 3. Register r3 again, after pushing the argument registers onto the stack. Note - we only need to tell the dwarf2 backend about the SP @@ -18081,7 +18184,7 @@ arm_expand_prologue (void) doesn't need to be unwound, as it doesn't contain a value inherited from the caller. */ - if (df_regs_ever_live_p (3) == false) + if (!arm_r3_live_at_start_p ()) insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx); else if (args_to_push == 0) { @@ -18222,8 +18325,7 @@ arm_expand_prologue (void) if (IS_NESTED (func_type)) { /* Recover the static chain register. */ - if (!df_regs_ever_live_p (3) - || saved_pretend_args) + if (!arm_r3_live_at_start_p () || saved_pretend_args) insn = gen_rtx_REG (SImode, 3); else /* if (crtl->args.pretend_args_size == 0) */ { @@ -19471,6 +19573,13 @@ thumb2_final_prescan_insn (rtx insn) enum arm_cond_code code; int n; int mask; + int max; + + /* Maximum number of conditionally executed instructions in a block + is minimum of the two max values: maximum allowed in an IT block + and maximum that is beneficial according to the cost model and tune. */ + max = (max_insns_skipped < MAX_INSN_PER_IT_BLOCK) ? + max_insns_skipped : MAX_INSN_PER_IT_BLOCK; /* Remove the previous insn from the count of insns to be output. */ if (arm_condexec_count) @@ -19513,9 +19622,9 @@ thumb2_final_prescan_insn (rtx insn) /* ??? Recognize conditional jumps, and combine them with IT blocks. */ if (GET_CODE (body) != COND_EXEC) break; - /* Allow up to 4 conditionally executed instructions in a block. */ + /* Maximum number of conditionally executed instructions in a block. */ n = get_attr_ce_count (insn); - if (arm_condexec_masklen + n > 4) + if (arm_condexec_masklen + n > max) break; predicate = COND_EXEC_TEST (body); @@ -23978,7 +24087,7 @@ thumb1_expand_prologue (void) all we really need to check here is if single register is to be returned, or multiple register return. */ void -thumb2_expand_return (void) +thumb2_expand_return (bool simple_return) { int i, num_regs; unsigned long saved_regs_mask; @@ -23991,7 +24100,7 @@ thumb2_expand_return (void) if (saved_regs_mask & (1 << i)) num_regs++; - if (saved_regs_mask) + if (!simple_return && saved_regs_mask) { if (num_regs == 1) { @@ -24269,6 +24378,7 @@ arm_expand_epilogue (bool really_return) if (frame_pointer_needed) { + rtx insn; /* Restore stack pointer if necessary. */ if (TARGET_ARM) { @@ -24279,9 +24389,12 @@ arm_expand_epilogue (bool really_return) /* Force out any pending memory operations that reference stacked data before stack de-allocation occurs. */ emit_insn (gen_blockage ()); - emit_insn (gen_addsi3 (stack_pointer_rtx, - hard_frame_pointer_rtx, - GEN_INT (amount))); + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (amount))); + arm_add_cfa_adjust_cfa_note (insn, amount, + stack_pointer_rtx, + hard_frame_pointer_rtx); /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not deleted. */ @@ -24291,16 +24404,25 @@ arm_expand_epilogue (bool really_return) { /* In Thumb-2 mode, the frame pointer points to the last saved register. */ - amount = offsets->locals_base - offsets->saved_regs; - if (amount) - emit_insn (gen_addsi3 (hard_frame_pointer_rtx, - hard_frame_pointer_rtx, - GEN_INT (amount))); + amount = offsets->locals_base - offsets->saved_regs; + if (amount) + { + insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (amount))); + arm_add_cfa_adjust_cfa_note (insn, amount, + hard_frame_pointer_rtx, + hard_frame_pointer_rtx); + } /* Force out any pending memory operations that reference stacked data before stack de-allocation occurs. */ emit_insn (gen_blockage ()); - emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx)); + insn = emit_insn (gen_movsi (stack_pointer_rtx, + hard_frame_pointer_rtx)); + arm_add_cfa_adjust_cfa_note (insn, 0, + stack_pointer_rtx, + hard_frame_pointer_rtx); /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not deleted. */ emit_insn (gen_force_register_use (stack_pointer_rtx)); @@ -24313,12 +24435,15 @@ arm_expand_epilogue (bool really_return) amount = offsets->outgoing_args - offsets->saved_regs; if (amount) { + rtx tmp; /* Force out any pending memory operations that reference stacked data before stack de-allocation occurs. */ emit_insn (gen_blockage ()); - emit_insn (gen_addsi3 (stack_pointer_rtx, - stack_pointer_rtx, - GEN_INT (amount))); + tmp = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (amount))); + arm_add_cfa_adjust_cfa_note (tmp, amount, + stack_pointer_rtx, stack_pointer_rtx); /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not deleted. */ emit_insn (gen_force_register_use (stack_pointer_rtx)); @@ -24371,6 +24496,8 @@ arm_expand_epilogue (bool really_return) REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE, gen_rtx_REG (V2SImode, i), NULL_RTX); + arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD, + stack_pointer_rtx, stack_pointer_rtx); } if (saved_regs_mask) @@ -24418,6 +24545,9 @@ arm_expand_epilogue (bool really_return) REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE, gen_rtx_REG (SImode, i), NULL_RTX); + arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD, + stack_pointer_rtx, + stack_pointer_rtx); } } } @@ -24442,9 +24572,33 @@ arm_expand_epilogue (bool really_return) } if (crtl->args.pretend_args_size) - emit_insn (gen_addsi3 (stack_pointer_rtx, - stack_pointer_rtx, - GEN_INT (crtl->args.pretend_args_size))); + { + int i, j; + rtx dwarf = NULL_RTX; + rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (crtl->args.pretend_args_size))); + + RTX_FRAME_RELATED_P (tmp) = 1; + + if (cfun->machine->uses_anonymous_args) + { + /* Restore pretend args. Refer arm_expand_prologue on how to save + pretend_args in stack. */ + int num_regs = crtl->args.pretend_args_size / 4; + saved_regs_mask = (0xf0 >> num_regs) & 0xf; + for (j = 0, i = 0; j < num_regs; i++) + if (saved_regs_mask & (1 << i)) + { + rtx reg = gen_rtx_REG (SImode, i); + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + j++; + } + REG_NOTES (tmp) = dwarf; + } + arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size, + stack_pointer_rtx, stack_pointer_rtx); + } if (!really_return) return; @@ -25861,9 +26015,8 @@ arm_dwarf_register_span (rtx rtl) nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8; p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs)); - regno = (regno - FIRST_VFP_REGNUM) / 2; for (i = 0; i < nregs; i++) - XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i); + XVECEXP (p, 0, i) = gen_rtx_REG (DImode, regno + i); return p; } @@ -26113,9 +26266,17 @@ arm_unwind_emit (FILE * asm_out_file, rtx insn) handled_one = true; break; + /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P + to get correct dwarf information for shrink-wrap. We should not + emit unwind information for it because these are used either for + pretend arguments or notes to adjust sp and restore registers from + stack. */ + case REG_CFA_ADJUST_CFA: + case REG_CFA_RESTORE: + return; + case REG_CFA_DEF_CFA: case REG_CFA_EXPRESSION: - case REG_CFA_ADJUST_CFA: case REG_CFA_OFFSET: /* ??? Only handling here what we actually emit. */ gcc_unreachable (); diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 3a49a90c184..387d2717431 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -183,6 +183,11 @@ extern arm_cc arm_current_cc; #define ARM_INVERSE_CONDITION_CODE(X) ((arm_cc) (((int)X) ^ 1)) +/* The maximaum number of instructions that is beneficial to + conditionally execute. */ +#undef MAX_CONDITIONAL_EXECUTE +#define MAX_CONDITIONAL_EXECUTE arm_max_conditional_execute () + extern int arm_target_label; extern int arm_ccfsm_state; extern GTY(()) rtx arm_target_insn; diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 4b45c984bf4..c464eddebd4 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -93,6 +93,15 @@ ; IS_THUMB1 is set to 'yes' iff we are generating Thumb-1 code. (define_attr "is_thumb1" "no,yes" (const (symbol_ref "thumb1_code"))) +; We use this attribute to disable alternatives that can produce 32-bit +; instructions inside an IT-block in Thumb2 state. ARMv8 deprecates IT blocks +; that contain 32-bit instructions. +(define_attr "enabled_for_depr_it" "no,yes" (const_string "yes")) + +; This attribute is used to disable a predicated alternative when we have +; arm_restrict_it. +(define_attr "predicable_short_it" "no,yes" (const_string "yes")) + ;; Operand number of an input operand that is shifted. Zero if the ;; given instruction does not shift one of its input operands. (define_attr "shift" "" (const_int 0)) @@ -103,6 +112,8 @@ (define_attr "fpu" "none,vfp" (const (symbol_ref "arm_fpu_attr"))) +(define_attr "predicated" "yes,no" (const_string "no")) + ; LENGTH of an instruction (in bytes) (define_attr "length" "" (const_int 4)) @@ -190,6 +201,15 @@ (cond [(eq_attr "insn_enabled" "no") (const_string "no") + (and (eq_attr "predicable_short_it" "no") + (and (eq_attr "predicated" "yes") + (match_test "arm_restrict_it"))) + (const_string "no") + + (and (eq_attr "enabled_for_depr_it" "no") + (match_test "arm_restrict_it")) + (const_string "no") + (eq_attr "arch_enabled" "no") (const_string "no") @@ -230,53 +250,91 @@ ;; scheduling information. (define_attr "insn" - "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,sat,other" + "mov,mvn,clz,mrs,msr,xtab,sat,other" (const_string "other")) -; TYPE attribute is used to detect floating point instructions which, if -; running on a co-processor can run in parallel with other, basic instructions -; If write-buffer scheduling is enabled then it can also be used in the -; scheduling of writes. - -; Classification of each insn -; Note: vfp.md has different meanings for some of these, and some further -; types as well. See that file for details. -; simple_alu_imm a simple alu instruction that doesn't hit memory or fp -; regs or have a shifted source operand and has an immediate -; operand. This currently only tracks very basic immediate -; alu operations. -; alu_reg any alu instruction that doesn't hit memory or fp -; regs or have a shifted source operand -; and does not have an immediate operand. This is -; also the default -; simple_alu_shift covers UXTH, UXTB, SXTH, SXTB -; alu_shift any data instruction that doesn't hit memory or fp -; regs, but has a source operand shifted by a constant -; alu_shift_reg any data instruction that doesn't hit memory or fp -; regs, but has a source operand shifted by a register value -; mult a multiply instruction -; block blockage insn, this blocks all functional units -; float a floating point arithmetic operation (subject to expansion) -; fdivd DFmode floating point division -; fdivs SFmode floating point division -; f_load[sd] A single/double load from memory. Used for VFP unit. -; f_store[sd] A single/double store to memory. Used for VFP unit. -; f_flag a transfer of co-processor flags to the CPSR -; f_2_r transfer float to core (no memory needed) -; r_2_f transfer core to float -; f_cvt convert floating<->integral -; branch a branch -; call a subroutine call -; load_byte load byte(s) from memory to arm registers -; load1 load 1 word from memory to arm registers -; load2 load 2 words from memory to arm registers -; load3 load 3 words from memory to arm registers -; load4 load 4 words from memory to arm registers -; store store 1 word to memory from arm registers -; store2 store 2 words -; store3 store 3 words -; store4 store 4 (or more) words +; TYPE attribute is used to classify instructions for use in scheduling. ; +; Instruction classification: +; +; alu_reg any alu instruction that doesn't hit memory or fp +; regs or have a shifted source operand and does not have +; an immediate operand. This is also the default. +; alu_shift any data instruction that doesn't hit memory or fp. +; regs, but has a source operand shifted by a constant. +; alu_shift_reg any data instruction that doesn't hit memory or fp. +; block blockage insn, this blocks all functional units. +; branch branch. +; call subroutine call. +; f_2_r transfer from float to core (no memory needed). +; f_cvt conversion between float and integral. +; f_flag transfer of co-processor flags to the CPSR. +; f_load[d,s] double/single load from memory. Used for VFP unit. +; f_minmax[d,s] double/single floating point minimum/maximum. +; f_rint[d,s] double/single floating point rount to integral. +; f_sel[d,s] double/single floating byte select. +; f_store[d,s] double/single store to memory. Used for VFP unit. +; fadd[d,s] double/single floating-point scalar addition. +; fcmp[d,s] double/single floating-point compare. +; fconst[d,s] double/single load immediate. +; fcpys single precision floating point cpy. +; fdiv[d,s] double/single precision floating point division. +; ffarith[d,s] double/single floating point abs/neg/cpy. +; ffma[d,s] double/single floating point fused multiply-accumulate. +; float floating point arithmetic operation. +; fmac[d,s] double/single floating point multiply-accumulate. +; fmul[d,s] double/single floating point multiply. +; load_byte load byte(s) from memory to arm registers. +; load1 load 1 word from memory to arm registers. +; load2 load 2 words from memory to arm registers. +; load3 load 3 words from memory to arm registers. +; load4 load 4 words from memory to arm registers. +; mla integer multiply accumulate. +; mlas integer multiply accumulate, flag setting. +; mov integer move. +; mul integer multiply. +; muls integer multiply, flag setting. +; r_2_f transfer from core to float. +; sdiv signed division. +; simple_alu_imm simple alu instruction that doesn't hit memory or fp +; regs or have a shifted source operand and has an +; immediate operand. This currently only tracks very basic +; immediate alu operations. +; simple_alu_shift simple alu instruction with a shifted source operand. +; smlad signed multiply accumulate dual. +; smladx signed multiply accumulate dual reverse. +; smlal signed multiply accumulate long. +; smlald signed multiply accumulate long dual. +; smlals signed multiply accumulate long, flag setting. +; smlalxy signed multiply accumulate, 16x16-bit, 64-bit accumulate. +; smlawx signed multiply accumulate, 32x16-bit, 32-bit accumulate. +; smlawy signed multiply accumulate wide, 32x16-bit, +; 32-bit accumulate. +; smlaxy signed multiply accumulate, 16x16-bit, 32-bit accumulate. +; smlsd signed multiply subtract dual. +; smlsdx signed multiply subtract dual reverse. +; smlsld signed multiply subtract long dual. +; smmla signed most significant word multiply accumulate. +; smmul signed most significant word multiply. +; smmulr signed most significant word multiply, rounded. +; smuad signed dual multiply add. +; smuadx signed dual multiply add reverse. +; smull signed multiply long. +; smulls signed multiply long, flag setting. +; smulwy signed multiply wide, 32x16-bit, 32-bit accumulate. +; smulxy signed multiply, 16x16-bit, 32-bit accumulate. +; smusd signed dual multiply subtract. +; smusdx signed dual multiply subtract reverse. +; store1 store 1 word to memory from arm registers. +; store2 store 2 words to memory from arm registers. +; store3 store 3 words to memory from arm registers. +; store4 store 4 (or more) words to memory from arm registers. +; udiv unsigned division. +; umaal unsigned multiply accumulate accumulate long. +; umlal unsigned multiply accumulate long. +; umlals unsigned multiply accumulate long, flag setting. +; umull unsigned multiply long. +; umulls unsigned multiply long, flag setting. (define_attr "type" "simple_alu_imm,\ @@ -284,7 +342,6 @@ simple_alu_shift,\ alu_shift,\ alu_shift_reg,\ - mult,\ block,\ float,\ fdivd,\ @@ -328,18 +385,57 @@ ffarithd,\ fcmps,\ fcmpd,\ - fcpys" - (if_then_else - (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,\ - umull,umulls,umlal,umlals,smull,smulls,smlal,smlals") - (const_string "mult") - (const_string "alu_reg"))) + fcpys,\ + smulxy,\ + smlaxy,\ + smlalxy,\ + smulwy,\ + smlawx,\ + mul,\ + muls,\ + mla,\ + mlas,\ + umull,\ + umulls,\ + umlal,\ + umlals,\ + smull,\ + smulls,\ + smlal,\ + smlals,\ + smlawy,\ + smuad,\ + smuadx,\ + smlad,\ + smladx,\ + smusd,\ + smusdx,\ + smlsd,\ + smlsdx,\ + smmul,\ + smmulr,\ + smmla,\ + umaal,\ + smlald,\ + smlsld,\ + sdiv,\ + udiv" + (const_string "alu_reg")) + +; Is this an (integer side) multiply with a 32-bit (or smaller) result? +(define_attr "mul32" "no,yes" + (if_then_else + (eq_attr "type" + "smulxy,smlaxy,smulwy,smlawx,mul,muls,mla,mlas,smlawy,smuad,smuadx,\ + smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,smlald,smlsld") + (const_string "yes") + (const_string "no"))) ; Is this an (integer side) multiply with a 64-bit result? (define_attr "mul64" "no,yes" (if_then_else - (eq_attr "insn" - "smlalxy,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals") + (eq_attr "type" + "smlalxy,umull,umulls,umaal,umlal,umlals,smull,smulls,smlal,smlals") (const_string "yes") (const_string "no"))) @@ -1464,18 +1560,21 @@ (match_operand:SI 1 "s_register_operand" "%0,r")))] "TARGET_32BIT && !arm_arch6" "mul%?\\t%0, %2, %1" - [(set_attr "insn" "mul") + [(set_attr "type" "mul") (set_attr "predicable" "yes")] ) (define_insn "*arm_mulsi3_v6" - [(set (match_operand:SI 0 "s_register_operand" "=r") - (mult:SI (match_operand:SI 1 "s_register_operand" "r") - (match_operand:SI 2 "s_register_operand" "r")))] + [(set (match_operand:SI 0 "s_register_operand" "=l,l,r") + (mult:SI (match_operand:SI 1 "s_register_operand" "0,l,r") + (match_operand:SI 2 "s_register_operand" "l,0,r")))] "TARGET_32BIT && arm_arch6" "mul%?\\t%0, %1, %2" - [(set_attr "insn" "mul") - (set_attr "predicable" "yes")] + [(set_attr "type" "mul") + (set_attr "predicable" "yes") + (set_attr "arch" "t2,t2,*") + (set_attr "length" "4") + (set_attr "predicable_short_it" "yes,yes,no")] ) ; Unfortunately with the Thumb the '&'/'0' trick can fails when operands @@ -1495,7 +1594,7 @@ return \"mul\\t%0, %2\"; " [(set_attr "length" "4,4,2") - (set_attr "insn" "mul")] + (set_attr "type" "muls")] ) (define_insn "*thumb_mulsi3_v6" @@ -1508,7 +1607,7 @@ mul\\t%0, %1 mul\\t%0, %1" [(set_attr "length" "2") - (set_attr "insn" "mul")] + (set_attr "type" "muls")] ) (define_insn "*mulsi3_compare0" @@ -1522,7 +1621,7 @@ "TARGET_ARM && !arm_arch6" "mul%.\\t%0, %2, %1" [(set_attr "conds" "set") - (set_attr "insn" "muls")] + (set_attr "type" "muls")] ) (define_insn "*mulsi3_compare0_v6" @@ -1536,7 +1635,7 @@ "TARGET_ARM && arm_arch6 && optimize_size" "mul%.\\t%0, %2, %1" [(set_attr "conds" "set") - (set_attr "insn" "muls")] + (set_attr "type" "muls")] ) (define_insn "*mulsi_compare0_scratch" @@ -1549,7 +1648,7 @@ "TARGET_ARM && !arm_arch6" "mul%.\\t%0, %2, %1" [(set_attr "conds" "set") - (set_attr "insn" "muls")] + (set_attr "type" "muls")] ) (define_insn "*mulsi_compare0_scratch_v6" @@ -1562,7 +1661,7 @@ "TARGET_ARM && arm_arch6 && optimize_size" "mul%.\\t%0, %2, %1" [(set_attr "conds" "set") - (set_attr "insn" "muls")] + (set_attr "type" "muls")] ) ;; Unnamed templates to match MLA instruction. @@ -1575,7 +1674,7 @@ (match_operand:SI 3 "s_register_operand" "r,r,0,0")))] "TARGET_32BIT && !arm_arch6" "mla%?\\t%0, %2, %1, %3" - [(set_attr "insn" "mla") + [(set_attr "type" "mla") (set_attr "predicable" "yes")] ) @@ -1587,8 +1686,9 @@ (match_operand:SI 3 "s_register_operand" "r")))] "TARGET_32BIT && arm_arch6" "mla%?\\t%0, %2, %1, %3" - [(set_attr "insn" "mla") - (set_attr "predicable" "yes")] + [(set_attr "type" "mla") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*mulsi3addsi_compare0" @@ -1605,7 +1705,7 @@ "TARGET_ARM && arm_arch6" "mla%.\\t%0, %2, %1, %3" [(set_attr "conds" "set") - (set_attr "insn" "mlas")] + (set_attr "type" "mlas")] ) (define_insn "*mulsi3addsi_compare0_v6" @@ -1622,7 +1722,7 @@ "TARGET_ARM && arm_arch6 && optimize_size" "mla%.\\t%0, %2, %1, %3" [(set_attr "conds" "set") - (set_attr "insn" "mlas")] + (set_attr "type" "mlas")] ) (define_insn "*mulsi3addsi_compare0_scratch" @@ -1637,7 +1737,7 @@ "TARGET_ARM && !arm_arch6" "mla%.\\t%0, %2, %1, %3" [(set_attr "conds" "set") - (set_attr "insn" "mlas")] + (set_attr "type" "mlas")] ) (define_insn "*mulsi3addsi_compare0_scratch_v6" @@ -1652,7 +1752,7 @@ "TARGET_ARM && arm_arch6 && optimize_size" "mla%.\\t%0, %2, %1, %3" [(set_attr "conds" "set") - (set_attr "insn" "mlas")] + (set_attr "type" "mlas")] ) (define_insn "*mulsi3subsi" @@ -1663,8 +1763,9 @@ (match_operand:SI 1 "s_register_operand" "r"))))] "TARGET_32BIT && arm_arch_thumb2" "mls%?\\t%0, %2, %1, %3" - [(set_attr "insn" "mla") - (set_attr "predicable" "yes")] + [(set_attr "type" "mla") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "maddsidi4" @@ -1686,7 +1787,7 @@ (match_operand:DI 1 "s_register_operand" "0")))] "TARGET_32BIT && arm_arch3m && !arm_arch6" "smlal%?\\t%Q0, %R0, %3, %2" - [(set_attr "insn" "smlal") + [(set_attr "type" "smlal") (set_attr "predicable" "yes")] ) @@ -1699,8 +1800,9 @@ (match_operand:DI 1 "s_register_operand" "0")))] "TARGET_32BIT && arm_arch6" "smlal%?\\t%Q0, %R0, %3, %2" - [(set_attr "insn" "smlal") - (set_attr "predicable" "yes")] + [(set_attr "type" "smlal") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) ;; 32x32->64 widening multiply. @@ -1725,7 +1827,7 @@ (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] "TARGET_32BIT && arm_arch3m && !arm_arch6" "smull%?\\t%Q0, %R0, %1, %2" - [(set_attr "insn" "smull") + [(set_attr "type" "smull") (set_attr "predicable" "yes")] ) @@ -1736,8 +1838,9 @@ (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] "TARGET_32BIT && arm_arch6" "smull%?\\t%Q0, %R0, %1, %2" - [(set_attr "insn" "smull") - (set_attr "predicable" "yes")] + [(set_attr "type" "smull") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "umulsidi3" @@ -1756,7 +1859,7 @@ (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] "TARGET_32BIT && arm_arch3m && !arm_arch6" "umull%?\\t%Q0, %R0, %1, %2" - [(set_attr "insn" "umull") + [(set_attr "type" "umull") (set_attr "predicable" "yes")] ) @@ -1767,8 +1870,9 @@ (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] "TARGET_32BIT && arm_arch6" "umull%?\\t%Q0, %R0, %1, %2" - [(set_attr "insn" "umull") - (set_attr "predicable" "yes")] + [(set_attr "type" "umull") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "umaddsidi4" @@ -1790,7 +1894,7 @@ (match_operand:DI 1 "s_register_operand" "0")))] "TARGET_32BIT && arm_arch3m && !arm_arch6" "umlal%?\\t%Q0, %R0, %3, %2" - [(set_attr "insn" "umlal") + [(set_attr "type" "umlal") (set_attr "predicable" "yes")] ) @@ -1803,8 +1907,9 @@ (match_operand:DI 1 "s_register_operand" "0")))] "TARGET_32BIT && arm_arch6" "umlal%?\\t%Q0, %R0, %3, %2" - [(set_attr "insn" "umlal") - (set_attr "predicable" "yes")] + [(set_attr "type" "umlal") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "smulsi3_highpart" @@ -1832,7 +1937,7 @@ (clobber (match_scratch:SI 3 "=&r,&r"))] "TARGET_32BIT && arm_arch3m && !arm_arch6" "smull%?\\t%3, %0, %2, %1" - [(set_attr "insn" "smull") + [(set_attr "type" "smull") (set_attr "predicable" "yes")] ) @@ -1847,8 +1952,9 @@ (clobber (match_scratch:SI 3 "=r"))] "TARGET_32BIT && arm_arch6" "smull%?\\t%3, %0, %2, %1" - [(set_attr "insn" "smull") - (set_attr "predicable" "yes")] + [(set_attr "type" "smull") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "umulsi3_highpart" @@ -1876,7 +1982,7 @@ (clobber (match_scratch:SI 3 "=&r,&r"))] "TARGET_32BIT && arm_arch3m && !arm_arch6" "umull%?\\t%3, %0, %2, %1" - [(set_attr "insn" "umull") + [(set_attr "type" "umull") (set_attr "predicable" "yes")] ) @@ -1891,8 +1997,9 @@ (clobber (match_scratch:SI 3 "=r"))] "TARGET_32BIT && arm_arch6" "umull%?\\t%3, %0, %2, %1" - [(set_attr "insn" "umull") - (set_attr "predicable" "yes")] + [(set_attr "type" "umull") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "mulhisi3" @@ -1903,7 +2010,7 @@ (match_operand:HI 2 "s_register_operand" "r"))))] "TARGET_DSP_MULTIPLY" "smulbb%?\\t%0, %1, %2" - [(set_attr "insn" "smulxy") + [(set_attr "type" "smulxy") (set_attr "predicable" "yes")] ) @@ -1916,8 +2023,9 @@ (match_operand:HI 2 "s_register_operand" "r"))))] "TARGET_DSP_MULTIPLY" "smultb%?\\t%0, %1, %2" - [(set_attr "insn" "smulxy") - (set_attr "predicable" "yes")] + [(set_attr "type" "smulxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*mulhisi3bt" @@ -1929,8 +2037,9 @@ (const_int 16))))] "TARGET_DSP_MULTIPLY" "smulbt%?\\t%0, %1, %2" - [(set_attr "insn" "smulxy") - (set_attr "predicable" "yes")] + [(set_attr "type" "smulxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*mulhisi3tt" @@ -1943,8 +2052,9 @@ (const_int 16))))] "TARGET_DSP_MULTIPLY" "smultt%?\\t%0, %1, %2" - [(set_attr "insn" "smulxy") - (set_attr "predicable" "yes")] + [(set_attr "type" "smulxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "maddhisi4" @@ -1956,8 +2066,9 @@ (match_operand:SI 3 "s_register_operand" "r")))] "TARGET_DSP_MULTIPLY" "smlabb%?\\t%0, %1, %2, %3" - [(set_attr "insn" "smlaxy") - (set_attr "predicable" "yes")] + [(set_attr "type" "smlaxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) ;; Note: there is no maddhisi4ibt because this one is canonical form @@ -1971,8 +2082,9 @@ (match_operand:SI 3 "s_register_operand" "r")))] "TARGET_DSP_MULTIPLY" "smlatb%?\\t%0, %1, %2, %3" - [(set_attr "insn" "smlaxy") - (set_attr "predicable" "yes")] + [(set_attr "type" "smlaxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*maddhisi4tt" @@ -1986,22 +2098,24 @@ (match_operand:SI 3 "s_register_operand" "r")))] "TARGET_DSP_MULTIPLY" "smlatt%?\\t%0, %1, %2, %3" - [(set_attr "insn" "smlaxy") - (set_attr "predicable" "yes")] + [(set_attr "type" "smlaxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "maddhidi4" [(set (match_operand:DI 0 "s_register_operand" "=r") (plus:DI (mult:DI (sign_extend:DI - (match_operand:HI 1 "s_register_operand" "r")) + (match_operand:HI 1 "s_register_operand" "r")) (sign_extend:DI (match_operand:HI 2 "s_register_operand" "r"))) (match_operand:DI 3 "s_register_operand" "0")))] "TARGET_DSP_MULTIPLY" "smlalbb%?\\t%Q0, %R0, %1, %2" - [(set_attr "insn" "smlalxy") - (set_attr "predicable" "yes")]) + [(set_attr "type" "smlalxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) ;; Note: there is no maddhidi4ibt because this one is canonical form (define_insn "*maddhidi4tb" @@ -2016,8 +2130,9 @@ (match_operand:DI 3 "s_register_operand" "0")))] "TARGET_DSP_MULTIPLY" "smlaltb%?\\t%Q0, %R0, %1, %2" - [(set_attr "insn" "smlalxy") - (set_attr "predicable" "yes")]) + [(set_attr "type" "smlalxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*maddhidi4tt" [(set (match_operand:DI 0 "s_register_operand" "=r") @@ -2033,8 +2148,9 @@ (match_operand:DI 3 "s_register_operand" "0")))] "TARGET_DSP_MULTIPLY" "smlaltt%?\\t%Q0, %R0, %1, %2" - [(set_attr "insn" "smlalxy") - (set_attr "predicable" "yes")]) + [(set_attr "type" "smlalxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_expand "mulsf3" [(set (match_operand:SF 0 "s_register_operand" "") @@ -2163,29 +2279,28 @@ ) (define_insn_and_split "*anddi3_insn" - [(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r,&r,w,w ,?&r,?&r,?w,?w") - (and:DI (match_operand:DI 1 "s_register_operand" "%0 ,r ,0,r ,w,0 ,0 ,r ,w ,0") - (match_operand:DI 2 "arm_anddi_operand_neon" "r ,r ,De,De,w,DL,r ,r ,w ,DL")))] + [(set (match_operand:DI 0 "s_register_operand" "=w,w ,&r,&r,&r,&r,?w,?w") + (and:DI (match_operand:DI 1 "s_register_operand" "%w,0 ,0 ,r ,0 ,r ,w ,0") + (match_operand:DI 2 "arm_anddi_operand_neon" "w ,DL,r ,r ,De,De,w ,DL")))] "TARGET_32BIT && !TARGET_IWMMXT" { switch (which_alternative) { - case 0: - case 1: + case 0: /* fall through */ + case 6: return "vand\t%P0, %P1, %P2"; + case 1: /* fall through */ + case 7: return neon_output_logic_immediate ("vand", &operands[2], + DImode, 1, VALID_NEON_QREG_MODE (DImode)); case 2: - case 3: /* fall through */ - return "#"; - case 4: /* fall through */ - case 8: return "vand\t%P0, %P1, %P2"; + case 3: + case 4: case 5: /* fall through */ - case 9: return neon_output_logic_immediate ("vand", &operands[2], - DImode, 1, VALID_NEON_QREG_MODE (DImode)); - case 6: return "#"; - case 7: return "#"; + return "#"; default: gcc_unreachable (); } } - "TARGET_32BIT && !TARGET_IWMMXT" + "TARGET_32BIT && !TARGET_IWMMXT && reload_completed + && !(IS_VFP_REGNUM (REGNO (operands[0])))" [(set (match_dup 3) (match_dup 4)) (set (match_dup 5) (match_dup 6))] " @@ -2201,19 +2316,11 @@ gen_highpart_mode (SImode, DImode, operands[2])); }" - [(set_attr "neon_type" "*,*,*,*,neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1") - (set_attr "arch" "*,*,*,*,neon_for_64bits,neon_for_64bits,*,*, + [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1") + (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*, avoid_neon_for_64bits,avoid_neon_for_64bits") - (set_attr "length" "8,8,8,8,*,*,8,8,*,*") - (set (attr "insn_enabled") (if_then_else - (lt (symbol_ref "which_alternative") - (const_int 4)) - (if_then_else (match_test "!TARGET_NEON") - (const_string "yes") - (const_string "no")) - (if_then_else (match_test "TARGET_NEON") - (const_string "yes") - (const_string "no"))))] + (set_attr "length" "*,*,8,8,8,8,*,*") + ] ) (define_insn_and_split "*anddi_zesidi_di" @@ -2399,7 +2506,7 @@ [(set (reg:CC_NOOV CC_REGNUM) (compare:CC_NOOV (zero_extract:SI (match_operand:SI 0 "s_register_operand" "r") - (match_operand 1 "const_int_operand" "n") + (match_operand 1 "const_int_operand" "n") (match_operand 2 "const_int_operand" "n")) (const_int 0)))] "TARGET_32BIT @@ -2415,6 +2522,7 @@ " [(set_attr "conds" "set") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "simple_alu_imm")] ) @@ -2842,7 +2950,8 @@ "arm_arch_thumb2" "bfc%?\t%0, %2, %1" [(set_attr "length" "4") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "insv_t2" @@ -2853,7 +2962,8 @@ "arm_arch_thumb2" "bfi%?\t%0, %3, %2, %1" [(set_attr "length" "4") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) ; constants for op 2 will never be given to these patterns. @@ -2880,7 +2990,7 @@ [(set_attr "length" "8") (set_attr "predicable" "yes")] ) - + (define_insn_and_split "*anddi_notzesidi_di" [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") (and:DI (not:DI (zero_extend:DI @@ -2905,9 +3015,10 @@ operands[1] = gen_lowpart (SImode, operands[1]); }" [(set_attr "length" "4,8") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) - + (define_insn_and_split "*anddi_notsesidi_di" [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") (and:DI (not:DI (sign_extend:DI @@ -2928,16 +3039,18 @@ operands[1] = gen_lowpart (SImode, operands[1]); }" [(set_attr "length" "8") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) - + (define_insn "andsi_notsi_si" [(set (match_operand:SI 0 "s_register_operand" "=r") (and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r")) (match_operand:SI 1 "s_register_operand" "r")))] "TARGET_32BIT" "bic%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "thumb1_bicsi3" @@ -2997,14 +3110,47 @@ "" ) -(define_insn "*iordi3_insn" - [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") - (ior:DI (match_operand:DI 1 "s_register_operand" "%0,r") - (match_operand:DI 2 "s_register_operand" "r,r")))] - "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON" - "#" - [(set_attr "length" "8") - (set_attr "predicable" "yes")] +(define_insn_and_split "*iordi3_insn" + [(set (match_operand:DI 0 "s_register_operand" "=w,w ,&r,&r,&r,&r,?w,?w") + (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0 ,0 ,r ,0 ,r ,w ,0") + (match_operand:DI 2 "arm_iordi_operand_neon" "w ,Dl,r ,r ,Df,Df,w ,Dl")))] + "TARGET_32BIT && !TARGET_IWMMXT" + { + switch (which_alternative) + { + case 0: /* fall through */ + case 6: return "vorr\t%P0, %P1, %P2"; + case 1: /* fall through */ + case 7: return neon_output_logic_immediate ("vorr", &operands[2], + DImode, 0, VALID_NEON_QREG_MODE (DImode)); + case 2: + case 3: + case 4: + case 5: + return "#"; + default: gcc_unreachable (); + } + } + "TARGET_32BIT && !TARGET_IWMMXT && reload_completed + && !(IS_VFP_REGNUM (REGNO (operands[0])))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 5) (match_dup 6))] + " + { + operands[3] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_highpart (SImode, operands[0]); + + operands[4] = simplify_gen_binary (IOR, SImode, + gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2])); + operands[6] = simplify_gen_binary (IOR, SImode, + gen_highpart (SImode, operands[1]), + gen_highpart_mode (SImode, DImode, operands[2])); + + }" + [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1") + (set_attr "length" "*,*,8,8,8,8,*,*") + (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")] ) (define_insn "*iordi_zesidi_di" @@ -3017,7 +3163,8 @@ orr%?\\t%Q0, %Q1, %2 #" [(set_attr "length" "4,8") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*iordi_sesidi_di" @@ -3137,19 +3284,49 @@ (define_expand "xordi3" [(set (match_operand:DI 0 "s_register_operand" "") (xor:DI (match_operand:DI 1 "s_register_operand" "") - (match_operand:DI 2 "s_register_operand" "")))] + (match_operand:DI 2 "arm_xordi_operand" "")))] "TARGET_32BIT" "" ) -(define_insn "*xordi3_insn" - [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") - (xor:DI (match_operand:DI 1 "s_register_operand" "%0,r") - (match_operand:DI 2 "s_register_operand" "r,r")))] - "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON" - "#" - [(set_attr "length" "8") - (set_attr "predicable" "yes")] +(define_insn_and_split "*xordi3_insn" + [(set (match_operand:DI 0 "s_register_operand" "=w,&r,&r,&r,&r,?w") + (xor:DI (match_operand:DI 1 "s_register_operand" "w ,%0,r ,0 ,r ,w") + (match_operand:DI 2 "arm_xordi_operand" "w ,r ,r ,Dg,Dg,w")))] + "TARGET_32BIT && !TARGET_IWMMXT" +{ + switch (which_alternative) + { + case 1: + case 2: + case 3: + case 4: /* fall through */ + return "#"; + case 0: /* fall through */ + case 5: return "veor\t%P0, %P1, %P2"; + default: gcc_unreachable (); + } +} + "TARGET_32BIT && !TARGET_IWMMXT && reload_completed + && !(IS_VFP_REGNUM (REGNO (operands[0])))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 5) (match_dup 6))] + " + { + operands[3] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_highpart (SImode, operands[0]); + + operands[4] = simplify_gen_binary (XOR, SImode, + gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2])); + operands[6] = simplify_gen_binary (XOR, SImode, + gen_highpart (SImode, operands[1]), + gen_highpart_mode (SImode, DImode, operands[2])); + + }" + [(set_attr "length" "*,8,8,8,8,*") + (set_attr "neon_type" "neon_int_1,*,*,*,*,neon_int_1") + (set_attr "arch" "neon_for_64bits,*,*,*,*,avoid_neon_for_64bits")] ) (define_insn "*xordi_zesidi_di" @@ -3162,7 +3339,8 @@ eor%?\\t%Q0, %Q1, %2 #" [(set_attr "length" "4,8") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*xordi_sesidi_di" @@ -3292,7 +3470,8 @@ "" [(set_attr "length" "8") (set_attr "ce_count" "2") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) ; ??? Are these four splitters still beneficial when the Thumb-2 bitfield @@ -3428,7 +3607,8 @@ (const_int 0)))] "TARGET_32BIT" "bic%?\\t%0, %1, %1, asr #31" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*smax_m1" @@ -3437,7 +3617,8 @@ (const_int -1)))] "TARGET_32BIT" "orr%?\\t%0, %1, %1, asr #31" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn_and_split "*arm_smax_insn" @@ -3485,7 +3666,8 @@ (const_int 0)))] "TARGET_32BIT" "and%?\\t%0, %1, %1, asr #31" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn_and_split "*arm_smin_insn" @@ -4160,6 +4342,7 @@ "TARGET_32BIT" "mvn%?\\t%0, %1%S3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "shift" "1") (set_attr "insn" "mvn") (set_attr "arch" "32,a") @@ -4373,6 +4556,7 @@ [(set_attr "arch" "t2,any") (set_attr "length" "2,4") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") (set_attr "type" "load1")]) (define_insn "unaligned_loadhis" @@ -4385,6 +4569,7 @@ [(set_attr "arch" "t2,any") (set_attr "length" "2,4") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") (set_attr "type" "load_byte")]) (define_insn "unaligned_loadhiu" @@ -4397,6 +4582,7 @@ [(set_attr "arch" "t2,any") (set_attr "length" "2,4") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") (set_attr "type" "load_byte")]) (define_insn "unaligned_storesi" @@ -4408,6 +4594,7 @@ [(set_attr "arch" "t2,any") (set_attr "length" "2,4") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") (set_attr "type" "store1")]) (define_insn "unaligned_storehi" @@ -4419,6 +4606,7 @@ [(set_attr "arch" "t2,any") (set_attr "length" "2,4") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") (set_attr "type" "store1")]) ;; Unaligned double-word load and store. @@ -4487,7 +4675,8 @@ "arm_arch_thumb2" "sbfx%?\t%0, %1, %3, %2" [(set_attr "length" "4") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "extzv_t2" @@ -4498,7 +4687,8 @@ "arm_arch_thumb2" "ubfx%?\t%0, %1, %3, %2" [(set_attr "length" "4") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) @@ -4510,7 +4700,8 @@ "TARGET_IDIV" "sdiv%?\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "insn" "sdiv")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "sdiv")] ) (define_insn "udivsi3" @@ -4520,7 +4711,8 @@ "TARGET_IDIV" "udiv%?\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "insn" "udiv")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "udiv")] ) @@ -4582,11 +4774,14 @@ ) (define_insn "*arm_negsi2" - [(set (match_operand:SI 0 "s_register_operand" "=r") - (neg:SI (match_operand:SI 1 "s_register_operand" "r")))] + [(set (match_operand:SI 0 "s_register_operand" "=l,r") + (neg:SI (match_operand:SI 1 "s_register_operand" "l,r")))] "TARGET_32BIT" "rsb%?\\t%0, %1, #0" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") + (set_attr "arch" "t2,*") + (set_attr "length" "4")] ) (define_insn "*thumb1_negsi2" @@ -4904,11 +5099,14 @@ ) (define_insn "*arm_one_cmplsi2" - [(set (match_operand:SI 0 "s_register_operand" "=r") - (not:SI (match_operand:SI 1 "s_register_operand" "r")))] + [(set (match_operand:SI 0 "s_register_operand" "=l,r") + (not:SI (match_operand:SI 1 "s_register_operand" "l,r")))] "TARGET_32BIT" "mvn%?\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") + (set_attr "arch" "t2,*") + (set_attr "length" "4") (set_attr "insn" "mvn")] ) @@ -5234,7 +5432,8 @@ "TARGET_INT_SIMD" "uxtah%?\\t%0, %2, %1" [(set_attr "type" "alu_shift") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "zero_extendqisi2" @@ -5327,6 +5526,7 @@ "TARGET_INT_SIMD" "uxtab%?\\t%0, %2, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "insn" "xtab") (set_attr "type" "alu_shift")] ) @@ -5379,7 +5579,8 @@ "TARGET_32BIT" "tst%?\\t%0, #255" [(set_attr "conds" "set") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "extendhisi2" @@ -5565,6 +5766,7 @@ ldr%(sh%)\\t%0, %1" [(set_attr "type" "simple_alu_shift,load_byte") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "pool_range" "*,256") (set_attr "neg_pool_range" "*,244")] ) @@ -5677,7 +5879,8 @@ "sxtab%?\\t%0, %2, %1" [(set_attr "type" "alu_shift") (set_attr "insn" "xtab") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_split @@ -6133,6 +6336,7 @@ "arm_arch_thumb2" "movt%?\t%0, #:upper16:%c2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "length" "4")] ) @@ -7012,26 +7216,28 @@ " ) - (define_insn "*arm_movqi_insn" - [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,l,Uu,r,m") - (match_operand:QI 1 "general_operand" "r,I,K,Uu,l,m,r"))] + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,l,r,l,Uu,r,m") + (match_operand:QI 1 "general_operand" "r,r,I,Py,K,Uu,l,m,r"))] "TARGET_32BIT && ( register_operand (operands[0], QImode) || register_operand (operands[1], QImode))" "@ mov%?\\t%0, %1 mov%?\\t%0, %1 + mov%?\\t%0, %1 + mov%?\\t%0, %1 mvn%?\\t%0, #%B1 ldr%(b%)\\t%0, %1 str%(b%)\\t%1, %0 ldr%(b%)\\t%0, %1 str%(b%)\\t%1, %0" - [(set_attr "type" "*,simple_alu_imm,simple_alu_imm,load1, store1, load1, store1") - (set_attr "insn" "mov,mov,mvn,*,*,*,*") + [(set_attr "type" "*,*,simple_alu_imm,simple_alu_imm,simple_alu_imm,load1, store1, load1, store1") + (set_attr "insn" "mov,mov,mov,mov,mvn,*,*,*,*") (set_attr "predicable" "yes") - (set_attr "arch" "any,any,any,t2,t2,any,any") - (set_attr "length" "4,4,4,2,2,4,4")] + (set_attr "predicable_short_it" "yes,yes,yes,no,no,no,no,no,no") + (set_attr "arch" "t2,any,any,t2,any,t2,t2,any,any") + (set_attr "length" "2,4,4,2,4,2,2,4,4")] ) (define_insn "*thumb1_movqi_insn" @@ -8701,7 +8907,7 @@ (define_expand "movsfcc" [(set (match_operand:SF 0 "s_register_operand" "") - (if_then_else:SF (match_operand 1 "expandable_comparison_operator" "") + (if_then_else:SF (match_operand 1 "arm_cond_move_operator" "") (match_operand:SF 2 "s_register_operand" "") (match_operand:SF 3 "s_register_operand" "")))] "TARGET_32BIT && TARGET_HARD_FLOAT" @@ -8723,7 +8929,7 @@ (define_expand "movdfcc" [(set (match_operand:DF 0 "s_register_operand" "") - (if_then_else:DF (match_operand 1 "expandable_comparison_operator" "") + (if_then_else:DF (match_operand 1 "arm_cond_move_operator" "") (match_operand:DF 2 "s_register_operand" "") (match_operand:DF 3 "s_register_operand" "")))] "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" @@ -9276,17 +9482,17 @@ [(set_attr "type" "call")] ) -(define_expand "return" - [(return)] +(define_expand "<return_str>return" + [(returns)] "(TARGET_ARM || (TARGET_THUMB2 && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL && !IS_STACKALIGN (arm_current_func_type ()))) - && USE_RETURN_INSN (FALSE)" + <return_cond_false>" " { if (TARGET_THUMB2) { - thumb2_expand_return (); + thumb2_expand_return (<return_simple_p>); DONE; } } @@ -9311,13 +9517,13 @@ (set_attr "predicable" "yes")] ) -(define_insn "*cond_return" +(define_insn "*cond_<return_str>return" [(set (pc) (if_then_else (match_operator 0 "arm_comparison_operator" [(match_operand 1 "cc_register" "") (const_int 0)]) - (return) + (returns) (pc)))] - "TARGET_ARM && USE_RETURN_INSN (TRUE)" + "TARGET_ARM <return_cond_true>" "* { if (arm_ccfsm_state == 2) @@ -9325,20 +9531,21 @@ arm_ccfsm_state += 2; return \"\"; } - return output_return_instruction (operands[0], true, false, false); + return output_return_instruction (operands[0], true, false, + <return_simple_p>); }" [(set_attr "conds" "use") (set_attr "length" "12") (set_attr "type" "load1")] ) -(define_insn "*cond_return_inverted" +(define_insn "*cond_<return_str>return_inverted" [(set (pc) (if_then_else (match_operator 0 "arm_comparison_operator" [(match_operand 1 "cc_register" "") (const_int 0)]) (pc) - (return)))] - "TARGET_ARM && USE_RETURN_INSN (TRUE)" + (returns)))] + "TARGET_ARM <return_cond_true>" "* { if (arm_ccfsm_state == 2) @@ -9346,7 +9553,8 @@ arm_ccfsm_state += 2; return \"\"; } - return output_return_instruction (operands[0], true, true, false); + return output_return_instruction (operands[0], true, true, + <return_simple_p>); }" [(set_attr "conds" "use") (set_attr "length" "12") @@ -9908,6 +10116,16 @@ (eq:SI (match_operand:SI 1 "s_register_operand" "") (const_int 0))) (clobber (reg:CC CC_REGNUM))] + "arm_arch5 && TARGET_32BIT" + [(set (match_dup 0) (clz:SI (match_dup 1))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))] +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (eq:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] "TARGET_32BIT && reload_completed" [(parallel [(set (reg:CC CC_REGNUM) @@ -9948,7 +10166,7 @@ (set (match_dup 0) (const_int 1)))]) (define_insn_and_split "*compare_scc" - [(set (match_operand:SI 0 "s_register_operand" "=r,r") + [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts") (match_operator:SI 1 "arm_comparison_operator" [(match_operand:SI 2 "s_register_operand" "r,r") (match_operand:SI 3 "arm_add_operand" "rI,L")])) @@ -9977,29 +10195,87 @@ ;; Attempt to improve the sequence generated by the compare_scc splitters ;; not to use conditional execution. + +;; Rd = (eq (reg1) (const_int0)) // ARMv5 +;; clz Rd, reg1 +;; lsr Rd, Rd, #5 (define_peephole2 [(set (reg:CC CC_REGNUM) (compare:CC (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "arm_rhs_operand" ""))) + (const_int 0))) + (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0)) + (set (match_operand:SI 0 "register_operand" "") (const_int 0))) + (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1)))] + "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)" + [(set (match_dup 0) (clz:SI (match_dup 1))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))] +) + +;; Rd = (eq (reg1) (const_int0)) // !ARMv5 +;; negs Rd, reg1 +;; adc Rd, Rd, reg1 +(define_peephole2 + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "register_operand" "") + (const_int 0))) (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0)) (set (match_operand:SI 0 "register_operand" "") (const_int 0))) (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0)) (set (match_dup 0) (const_int 1))) - (match_scratch:SI 3 "r")] - "TARGET_32BIT" + (match_scratch:SI 2 "r")] + "TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)" [(parallel [(set (reg:CC CC_REGNUM) - (compare:CC (match_dup 1) (match_dup 2))) - (set (match_dup 3) (minus:SI (match_dup 1) (match_dup 2)))]) + (compare:CC (const_int 0) (match_dup 1))) + (set (match_dup 2) (minus:SI (const_int 0) (match_dup 1)))]) + (set (match_dup 0) + (plus:SI (plus:SI (match_dup 1) (match_dup 2)) + (geu:SI (reg:CC CC_REGNUM) (const_int 0))))] +) + +;; Rd = (eq (reg1) (reg2/imm)) // ARMv5 +;; sub Rd, Reg1, reg2 +;; clz Rd, Rd +;; lsr Rd, Rd, #5 +(define_peephole2 + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0)) + (set (match_operand:SI 0 "register_operand" "") (const_int 0))) + (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1)))] + "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)" + [(set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (clz:SI (match_dup 0))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))] +) + + +;; Rd = (eq (reg1) (reg2/imm)) // ! ARMv5 +;; sub T1, Reg1, reg2 +;; negs Rd, T1 +;; adc Rd, Rd, T1 +(define_peephole2 + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0)) + (set (match_operand:SI 0 "register_operand" "") (const_int 0))) + (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1))) + (match_scratch:SI 3 "r")] + "TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)" + [(set (match_dup 3) (minus:SI (match_dup 1) (match_dup 2))) (parallel [(set (reg:CC CC_REGNUM) (compare:CC (const_int 0) (match_dup 3))) (set (match_dup 0) (minus:SI (const_int 0) (match_dup 3)))]) - (parallel - [(set (match_dup 0) - (plus:SI (plus:SI (match_dup 0) (match_dup 3)) - (geu:SI (reg:CC CC_REGNUM) (const_int 0)))) - (clobber (reg:CC CC_REGNUM))])]) + (set (match_dup 0) + (plus:SI (plus:SI (match_dup 0) (match_dup 3)) + (geu:SI (reg:CC CC_REGNUM) (const_int 0))))] +) (define_insn "*cond_move" [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") @@ -10400,7 +10676,7 @@ ) (define_insn_and_split "*ior_scc_scc" - [(set (match_operand:SI 0 "s_register_operand" "=r") + [(set (match_operand:SI 0 "s_register_operand" "=Ts") (ior:SI (match_operator:SI 3 "arm_comparison_operator" [(match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "arm_add_operand" "rIL")]) @@ -10438,7 +10714,7 @@ [(match_operand:SI 4 "s_register_operand" "r") (match_operand:SI 5 "arm_add_operand" "rIL")])) (const_int 0))) - (set (match_operand:SI 7 "s_register_operand" "=r") + (set (match_operand:SI 7 "s_register_operand" "=Ts") (ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)]) (match_op_dup 6 [(match_dup 4) (match_dup 5)])))] "TARGET_32BIT" @@ -10456,7 +10732,7 @@ (set_attr "length" "16")]) (define_insn_and_split "*and_scc_scc" - [(set (match_operand:SI 0 "s_register_operand" "=r") + [(set (match_operand:SI 0 "s_register_operand" "=Ts") (and:SI (match_operator:SI 3 "arm_comparison_operator" [(match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "arm_add_operand" "rIL")]) @@ -10496,7 +10772,7 @@ [(match_operand:SI 4 "s_register_operand" "r") (match_operand:SI 5 "arm_add_operand" "rIL")])) (const_int 0))) - (set (match_operand:SI 7 "s_register_operand" "=r") + (set (match_operand:SI 7 "s_register_operand" "=Ts") (and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)]) (match_op_dup 6 [(match_dup 4) (match_dup 5)])))] "TARGET_32BIT" @@ -10518,7 +10794,7 @@ ;; need only zero the value if false (if true, then the value is already ;; correct). (define_insn_and_split "*and_scc_scc_nodom" - [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r") + [(set (match_operand:SI 0 "s_register_operand" "=&Ts,&Ts,&Ts") (and:SI (match_operator:SI 3 "arm_comparison_operator" [(match_operand:SI 1 "s_register_operand" "r,r,0") (match_operand:SI 2 "arm_add_operand" "rIL,0,rIL")]) @@ -12095,6 +12371,7 @@ (const_int 0)])] "TARGET_32BIT" "" +[(set_attr "predicated" "yes")] ) (define_insn "force_register_use" @@ -12365,7 +12642,8 @@ false, true))" "ldrd%?\t%0, %3, [%1, %2]" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb2_ldrd_base" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -12379,7 +12657,8 @@ operands[1], 0, false, true))" "ldrd%?\t%0, %2, [%1]" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb2_ldrd_base_neg" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -12393,7 +12672,8 @@ operands[1], -4, false, true))" "ldrd%?\t%0, %2, [%1, #-4]" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb2_strd" [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk") @@ -12410,7 +12690,8 @@ false, false))" "strd%?\t%2, %4, [%0, %1]" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb2_strd_base" [(set (mem:SI (match_operand:SI 0 "s_register_operand" "rk")) @@ -12424,7 +12705,8 @@ operands[0], 0, false, false))" "strd%?\t%1, %2, [%0]" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb2_strd_base_neg" [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk") @@ -12438,7 +12720,8 @@ operands[0], -4, false, false))" "strd%?\t%1, %2, [%0, #-4]" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) ;; Load the load/store double peephole optimizations. diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index afb42421c06..b9ae2b09682 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -239,6 +239,10 @@ mword-relocations Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS) Only generate absolute relocations on word sized values. +mrestrict-it +Target Report Var(arm_restrict_it) Init(2) +Generate IT blocks appropriate for ARMv8. + mfix-cortex-m3-ldrd Target Report Var(fix_cm3_ldrd) Init(2) Avoid overlapping destination and address registers on LDRD instructions diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md index ab65978aebc..94e8c35f839 100644 --- a/gcc/config/arm/arm1020e.md +++ b/gcc/config/arm/arm1020e.md @@ -96,7 +96,7 @@ ;; until after the memory stage. (define_insn_reservation "1020mult1" 2 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "insn" "smulxy,smulwy")) + (eq_attr "type" "smulxy,smulwy")) "1020a_e,1020a_m,1020a_w") ;; The "smlaxy" and "smlawx" instructions require two iterations through @@ -104,7 +104,7 @@ ;; the execute stage. (define_insn_reservation "1020mult2" 2 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "insn" "smlaxy,smlalxy,smlawx")) + (eq_attr "type" "smlaxy,smlalxy,smlawx")) "1020a_e*2,1020a_m,1020a_w") ;; The "smlalxy", "mul", and "mla" instructions require two iterations @@ -112,7 +112,7 @@ ;; the memory stage. (define_insn_reservation "1020mult3" 3 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "insn" "smlalxy,mul,mla")) + (eq_attr "type" "smlalxy,mul,mla")) "1020a_e*2,1020a_m,1020a_w") ;; The "muls" and "mlas" instructions loop in the execute stage for @@ -120,7 +120,7 @@ ;; available after three iterations. (define_insn_reservation "1020mult4" 3 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "insn" "muls,mlas")) + (eq_attr "type" "muls,mlas")) "1020a_e*4,1020a_m,1020a_w") ;; Long multiply instructions that produce two registers of @@ -135,7 +135,7 @@ ;; available after the memory cycle. (define_insn_reservation "1020mult5" 4 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "insn" "umull,umlal,smull,smlal")) + (eq_attr "type" "umull,umlal,smull,smlal")) "1020a_e*3,1020a_m,1020a_w") ;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in @@ -143,7 +143,7 @@ ;; The value result is available after four iterations. (define_insn_reservation "1020mult6" 4 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "insn" "umulls,umlals,smulls,smlals")) + (eq_attr "type" "umulls,umlals,smulls,smlals")) "1020a_e*5,1020a_m,1020a_w") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/arm1026ejs.md b/gcc/config/arm/arm1026ejs.md index 3fa4bd0c378..67b985ce68e 100644 --- a/gcc/config/arm/arm1026ejs.md +++ b/gcc/config/arm/arm1026ejs.md @@ -96,7 +96,7 @@ ;; until after the memory stage. (define_insn_reservation "mult1" 2 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "insn" "smulxy,smulwy")) + (eq_attr "type" "smulxy,smulwy")) "a_e,a_m,a_w") ;; The "smlaxy" and "smlawx" instructions require two iterations through @@ -104,7 +104,7 @@ ;; the execute stage. (define_insn_reservation "mult2" 2 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "insn" "smlaxy,smlalxy,smlawx")) + (eq_attr "type" "smlaxy,smlalxy,smlawx")) "a_e*2,a_m,a_w") ;; The "smlalxy", "mul", and "mla" instructions require two iterations @@ -112,7 +112,7 @@ ;; the memory stage. (define_insn_reservation "mult3" 3 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "insn" "smlalxy,mul,mla")) + (eq_attr "type" "smlalxy,mul,mla")) "a_e*2,a_m,a_w") ;; The "muls" and "mlas" instructions loop in the execute stage for @@ -120,7 +120,7 @@ ;; available after three iterations. (define_insn_reservation "mult4" 3 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "insn" "muls,mlas")) + (eq_attr "type" "muls,mlas")) "a_e*4,a_m,a_w") ;; Long multiply instructions that produce two registers of @@ -135,7 +135,7 @@ ;; available after the memory cycle. (define_insn_reservation "mult5" 4 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "insn" "umull,umlal,smull,smlal")) + (eq_attr "type" "umull,umlal,smull,smlal")) "a_e*3,a_m,a_w") ;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in @@ -143,7 +143,7 @@ ;; The value result is available after four iterations. (define_insn_reservation "mult6" 4 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "insn" "umulls,umlals,smulls,smlals")) + (eq_attr "type" "umulls,umlals,smulls,smlals")) "a_e*5,a_m,a_w") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/arm1136jfs.md b/gcc/config/arm/arm1136jfs.md index b5802e03919..3030182acca 100644 --- a/gcc/config/arm/arm1136jfs.md +++ b/gcc/config/arm/arm1136jfs.md @@ -129,13 +129,13 @@ ;; Multiply and multiply-accumulate results are available after four stages. (define_insn_reservation "11_mult1" 4 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "insn" "mul,mla")) + (eq_attr "type" "mul,mla")) "e_1*2,e_2,e_3,e_wb") ;; The *S variants set the condition flags, which requires three more cycles. (define_insn_reservation "11_mult2" 4 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "insn" "muls,mlas")) + (eq_attr "type" "muls,mlas")) "e_1*2,e_2,e_3,e_wb") (define_bypass 3 "11_mult1,11_mult2" @@ -160,13 +160,13 @@ ;; the two multiply-accumulate instructions. (define_insn_reservation "11_mult3" 5 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "insn" "smull,umull,smlal,umlal")) + (eq_attr "type" "smull,umull,smlal,umlal")) "e_1*3,e_2,e_3,e_wb*2") ;; The *S variants set the condition flags, which requires three more cycles. (define_insn_reservation "11_mult4" 5 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "insn" "smulls,umulls,smlals,umlals")) + (eq_attr "type" "smulls,umulls,smlals,umlals")) "e_1*3,e_2,e_3,e_wb*2") (define_bypass 4 "11_mult3,11_mult4" @@ -190,7 +190,8 @@ ;; cycles. (define_insn_reservation "11_mult5" 3 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "insn" "smulxy,smlaxy,smulwy,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx")) + (eq_attr "type" "smulxy,smlaxy,smulwy,smlawy,smuad,smuadx,smlad,smladx,\ + smusd,smusdx,smlsd,smlsdx")) "e_1,e_2,e_3,e_wb") (define_bypass 2 "11_mult5" @@ -211,14 +212,14 @@ ;; The same idea, then the 32-bit result is added to a 64-bit quantity. (define_insn_reservation "11_mult6" 4 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "insn" "smlalxy")) + (eq_attr "type" "smlalxy")) "e_1*2,e_2,e_3,e_wb*2") ;; Signed 32x32 multiply, then the most significant 32 bits are extracted ;; and are available after the memory stage. (define_insn_reservation "11_mult7" 4 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "insn" "smmul,smmulr")) + (eq_attr "type" "smmul,smmulr")) "e_1*2,e_2,e_3,e_wb") (define_bypass 3 "11_mult6,11_mult7" diff --git a/gcc/config/arm/arm926ejs.md b/gcc/config/arm/arm926ejs.md index 1fc82d3db7f..4db404e766f 100644 --- a/gcc/config/arm/arm926ejs.md +++ b/gcc/config/arm/arm926ejs.md @@ -81,32 +81,32 @@ (define_insn_reservation "9_mult1" 3 (and (eq_attr "tune" "arm926ejs") - (eq_attr "insn" "smlalxy,mul,mla")) + (eq_attr "type" "smlalxy,mul,mla")) "e*2,m,w") (define_insn_reservation "9_mult2" 4 (and (eq_attr "tune" "arm926ejs") - (eq_attr "insn" "muls,mlas")) + (eq_attr "type" "muls,mlas")) "e*3,m,w") (define_insn_reservation "9_mult3" 4 (and (eq_attr "tune" "arm926ejs") - (eq_attr "insn" "umull,umlal,smull,smlal")) + (eq_attr "type" "umull,umlal,smull,smlal")) "e*3,m,w") (define_insn_reservation "9_mult4" 5 (and (eq_attr "tune" "arm926ejs") - (eq_attr "insn" "umulls,umlals,smulls,smlals")) + (eq_attr "type" "umulls,umlals,smulls,smlals")) "e*4,m,w") (define_insn_reservation "9_mult5" 2 (and (eq_attr "tune" "arm926ejs") - (eq_attr "insn" "smulxy,smlaxy,smlawx")) + (eq_attr "type" "smulxy,smlaxy,smlawx")) "e,m,w") (define_insn_reservation "9_mult6" 3 (and (eq_attr "tune" "arm926ejs") - (eq_attr "insn" "smlalxy")) + (eq_attr "type" "smlalxy")) "e*2,m,w") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index 7e7b3e69e0a..251d4975b7c 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -260,6 +260,18 @@ (and (match_code "const_int") (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, AND)"))) +(define_constraint "Df" + "@internal + In ARM/Thumb-2 state a const_int that can be used by insn iordi." + (and (match_code "const_int") + (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, IOR)"))) + +(define_constraint "Dg" + "@internal + In ARM/Thumb-2 state a const_int that can be used by insn xordi." + (and (match_code "const_int") + (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, XOR)"))) + (define_constraint "Di" "@internal In ARM/Thumb-2 state a const_int or const_double where both the high @@ -317,6 +329,9 @@ (and (match_code "const_double") (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_fract_bits (op)"))) +(define_register_constraint "Ts" "(arm_restrict_it) ? LO_REGS : GENERAL_REGS" + "For arm_restrict_it the core registers @code{r0}-@code{r7}. GENERAL_REGS otherwise.") + (define_memory_constraint "Ua" "@internal An address valid for loading/storing register exclusive" @@ -346,21 +361,21 @@ In ARM/Thumb-2 state a valid address for Neon doubleword vector load/store instructions." (and (match_code "mem") - (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 0)"))) + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 0, true)"))) (define_memory_constraint "Um" "@internal In ARM/Thumb-2 state a valid address for Neon element and structure load/store instructions." (and (match_code "mem") - (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)"))) + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, true)"))) (define_memory_constraint "Us" "@internal In ARM/Thumb-2 state a valid address for non-offset loads/stores of quad-word values in four ARM registers." (and (match_code "mem") - (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1)"))) + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1, true)"))) (define_memory_constraint "Uq" "@internal diff --git a/gcc/config/arm/cortex-a15.md b/gcc/config/arm/cortex-a15.md index f0c1985fab5..981d055c668 100644 --- a/gcc/config/arm/cortex-a15.md +++ b/gcc/config/arm/cortex-a15.md @@ -87,28 +87,26 @@ ;; 32-bit multiplies (define_insn_reservation "cortex_a15_mult32" 3 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "mult") - (and (eq_attr "neon_type" "none") - (eq_attr "mul64" "no")))) + (and (eq_attr "mul32" "yes") + (eq_attr "neon_type" "none"))) "ca15_issue1,ca15_mx") ;; 64-bit multiplies (define_insn_reservation "cortex_a15_mult64" 4 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "mult") - (and (eq_attr "neon_type" "none") - (eq_attr "mul64" "yes")))) + (and (eq_attr "mul64" "yes") + (eq_attr "neon_type" "none"))) "ca15_issue1,ca15_mx*2") ;; Integer divide (define_insn_reservation "cortex_a15_udiv" 9 (and (eq_attr "tune" "cortexa15") - (eq_attr "insn" "udiv")) + (eq_attr "type" "udiv")) "ca15_issue1,ca15_mx") (define_insn_reservation "cortex_a15_sdiv" 10 (and (eq_attr "tune" "cortexa15") - (eq_attr "insn" "sdiv")) + (eq_attr "type" "sdiv")) "ca15_issue1,ca15_mx") ;; Block all issue pipes for a cycle diff --git a/gcc/config/arm/cortex-a5.md b/gcc/config/arm/cortex-a5.md index 41a2c37e8fa..963d5babd7b 100644 --- a/gcc/config/arm/cortex-a5.md +++ b/gcc/config/arm/cortex-a5.md @@ -80,7 +80,8 @@ (define_insn_reservation "cortex_a5_mul" 2 (and (eq_attr "tune" "cortexa5") - (eq_attr "type" "mult")) + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))) "cortex_a5_ex1") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md index b6a291e017b..e67fe55ecd3 100644 --- a/gcc/config/arm/cortex-a53.md +++ b/gcc/config/arm/cortex-a53.md @@ -89,7 +89,8 @@ (define_insn_reservation "cortex_a53_mul" 3 (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "mult")) + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))) "cortex_a53_single_issue") ;; A multiply with a single-register result or an MLA, followed by an @@ -103,12 +104,12 @@ ;; Punt with a high enough latency for divides. (define_insn_reservation "cortex_a53_udiv" 8 (and (eq_attr "tune" "cortexa53") - (eq_attr "insn" "udiv")) + (eq_attr "type" "udiv")) "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*7") (define_insn_reservation "cortex_a53_sdiv" 9 (and (eq_attr "tune" "cortexa53") - (eq_attr "insn" "sdiv")) + (eq_attr "type" "sdiv")) "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*8") diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md index 3750f74f2c6..960174fb90a 100644 --- a/gcc/config/arm/cortex-a7.md +++ b/gcc/config/arm/cortex-a7.md @@ -127,8 +127,9 @@ (define_insn_reservation "cortex_a7_mul" 2 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "mult") - (eq_attr "neon_type" "none"))) + (and (eq_attr "neon_type" "none") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes")))) "cortex_a7_both") ;; Forward the result of a multiply operation to the accumulator @@ -140,7 +141,7 @@ ;; The latency depends on the operands, so we use an estimate here. (define_insn_reservation "cortex_a7_idiv" 5 (and (eq_attr "tune" "cortexa7") - (eq_attr "insn" "udiv,sdiv")) + (eq_attr "type" "udiv,sdiv")) "cortex_a7_both*5") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md index bd1132a18c3..8d3e98734ce 100644 --- a/gcc/config/arm/cortex-a8.md +++ b/gcc/config/arm/cortex-a8.md @@ -139,22 +139,22 @@ (define_insn_reservation "cortex_a8_mul" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "insn" "mul,smulxy,smmul")) + (eq_attr "type" "mul,smulxy,smmul")) "cortex_a8_multiply_2") (define_insn_reservation "cortex_a8_mla" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "insn" "mla,smlaxy,smlawy,smmla,smlad,smlsd")) + (eq_attr "type" "mla,smlaxy,smlawy,smmla,smlad,smlsd")) "cortex_a8_multiply_2") (define_insn_reservation "cortex_a8_mull" 7 (and (eq_attr "tune" "cortexa8") - (eq_attr "insn" "smull,umull,smlal,umlal,umaal,smlalxy")) + (eq_attr "type" "smull,umull,smlal,umlal,umaal,smlalxy")) "cortex_a8_multiply_3") (define_insn_reservation "cortex_a8_smulwy" 5 (and (eq_attr "tune" "cortexa8") - (eq_attr "insn" "smulwy,smuad,smusd")) + (eq_attr "type" "smulwy,smuad,smusd")) "cortex_a8_multiply") ;; smlald and smlsld are multiply-accumulate instructions but do not @@ -162,7 +162,7 @@ ;; cannot go in cortex_a8_mla above. (See below for bypass details.) (define_insn_reservation "cortex_a8_smlald" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "insn" "smlald,smlsld")) + (eq_attr "type" "smlald,smlsld")) "cortex_a8_multiply_2") ;; A multiply with a single-register result or an MLA, followed by an diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md index abbaa8d4e1e..05c114dc366 100644 --- a/gcc/config/arm/cortex-a9.md +++ b/gcc/config/arm/cortex-a9.md @@ -130,29 +130,29 @@ cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1") ;; We get 16*16 multiply / mac results in 3 cycles. (define_insn_reservation "cortex_a9_mult16" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "insn" "smulxy")) + (eq_attr "type" "smulxy")) "cortex_a9_mult16") ;; The 16*16 mac is slightly different that it ;; reserves M1 and M2 in the same cycle. (define_insn_reservation "cortex_a9_mac16" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "insn" "smlaxy")) + (eq_attr "type" "smlaxy")) "cortex_a9_mac16") (define_insn_reservation "cortex_a9_multiply" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "insn" "mul,smmul,smmulr")) + (eq_attr "type" "mul,smmul,smmulr")) "cortex_a9_mult") (define_insn_reservation "cortex_a9_mac" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "insn" "mla,smmla")) + (eq_attr "type" "mla,smmla")) "cortex_a9_mac") (define_insn_reservation "cortex_a9_multiply_long" 5 (and (eq_attr "tune" "cortexa9") - (eq_attr "insn" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals")) + (eq_attr "type" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals")) "cortex_a9_mult_long") ;; An instruction with a result in E2 can be forwarded diff --git a/gcc/config/arm/cortex-m4.md b/gcc/config/arm/cortex-m4.md index 47b03644f73..dc3a3299572 100644 --- a/gcc/config/arm/cortex-m4.md +++ b/gcc/config/arm/cortex-m4.md @@ -31,7 +31,10 @@ ;; ALU and multiply is one cycle. (define_insn_reservation "cortex_m4_alu" 1 (and (eq_attr "tune" "cortexm4") - (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift,alu_shift_reg,mult")) + (ior (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,\ + alu_shift,alu_shift_reg") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes")))) "cortex_m4_ex") ;; Byte, half-word and word load is two cycles. diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md index 84e4a3a1e60..6d37079f2b3 100644 --- a/gcc/config/arm/cortex-r4.md +++ b/gcc/config/arm/cortex-r4.md @@ -128,32 +128,32 @@ (define_insn_reservation "cortex_r4_mul_4" 4 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "mul,smmul")) + (eq_attr "type" "mul,smmul")) "cortex_r4_mul_2") (define_insn_reservation "cortex_r4_mul_3" 3 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "smulxy,smulwy,smuad,smusd")) + (eq_attr "type" "smulxy,smulwy,smuad,smusd")) "cortex_r4_mul") (define_insn_reservation "cortex_r4_mla_4" 4 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "mla,smmla")) + (eq_attr "type" "mla,smmla")) "cortex_r4_mul_2") (define_insn_reservation "cortex_r4_mla_3" 3 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "smlaxy,smlawy,smlad,smlsd")) + (eq_attr "type" "smlaxy,smlawy,smlad,smlsd")) "cortex_r4_mul") (define_insn_reservation "cortex_r4_smlald" 3 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "smlald,smlsld")) + (eq_attr "type" "smlald,smlsld")) "cortex_r4_mul") (define_insn_reservation "cortex_r4_mull" 4 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "smull,umull,umlal,umaal")) + (eq_attr "type" "smull,umull,umlal,umaal")) "cortex_r4_mul_2") ;; A multiply or an MLA with a single-register result, followed by an @@ -196,12 +196,12 @@ ;; This gives a latency of nine for udiv and ten for sdiv. (define_insn_reservation "cortex_r4_udiv" 9 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "udiv")) + (eq_attr "type" "udiv")) "cortex_r4_div_9") (define_insn_reservation "cortex_r4_sdiv" 10 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "sdiv")) + (eq_attr "type" "sdiv")) "cortex_r4_div_10") ;; Branches. We assume correct prediction. diff --git a/gcc/config/arm/fa526.md b/gcc/config/arm/fa526.md index e03894aa61c..efc6a1db959 100644 --- a/gcc/config/arm/fa526.md +++ b/gcc/config/arm/fa526.md @@ -76,12 +76,12 @@ (define_insn_reservation "526_mult1" 2 (and (eq_attr "tune" "fa526") - (eq_attr "insn" "smlalxy,smulxy,smlaxy,smlalxy")) + (eq_attr "type" "smlalxy,smulxy,smlaxy,smlalxy")) "fa526_core") (define_insn_reservation "526_mult2" 5 (and (eq_attr "tune" "fa526") - (eq_attr "insn" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\ + (eq_attr "type" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\ umlals,smulls,smlals,smlawx")) "fa526_core*4") diff --git a/gcc/config/arm/fa606te.md b/gcc/config/arm/fa606te.md index d53617a78e3..dec26c5c3ac 100644 --- a/gcc/config/arm/fa606te.md +++ b/gcc/config/arm/fa606te.md @@ -71,22 +71,22 @@ (define_insn_reservation "606te_mult1" 2 (and (eq_attr "tune" "fa606te") - (eq_attr "insn" "smlalxy")) + (eq_attr "type" "smlalxy")) "fa606te_core") (define_insn_reservation "606te_mult2" 3 (and (eq_attr "tune" "fa606te") - (eq_attr "insn" "smlaxy,smulxy,smulwy,smlawy")) + (eq_attr "type" "smlaxy,smulxy,smulwy,smlawy")) "fa606te_core*2") (define_insn_reservation "606te_mult3" 4 (and (eq_attr "tune" "fa606te") - (eq_attr "insn" "mul,mla,muls,mlas")) + (eq_attr "type" "mul,mla,muls,mlas")) "fa606te_core*3") (define_insn_reservation "606te_mult4" 5 (and (eq_attr "tune" "fa606te") - (eq_attr "insn" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals")) + (eq_attr "type" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals")) "fa606te_core*4") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/fa626te.md b/gcc/config/arm/fa626te.md index 690cb46d878..818ad607b47 100644 --- a/gcc/config/arm/fa626te.md +++ b/gcc/config/arm/fa626te.md @@ -82,22 +82,22 @@ (define_insn_reservation "626te_mult1" 2 (and (eq_attr "tune" "fa626,fa626te") - (eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy")) + (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy")) "fa626te_core") (define_insn_reservation "626te_mult2" 2 (and (eq_attr "tune" "fa626,fa626te") - (eq_attr "insn" "mul,mla")) + (eq_attr "type" "mul,mla")) "fa626te_core") (define_insn_reservation "626te_mult3" 3 (and (eq_attr "tune" "fa626,fa626te") - (eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx")) + (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx")) "fa626te_core*2") (define_insn_reservation "626te_mult4" 4 (and (eq_attr "tune" "fa626,fa626te") - (eq_attr "insn" "smulls,smlals,umulls,umlals")) + (eq_attr "type" "smulls,smlals,umulls,umlals")) "fa626te_core*3") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/fa726te.md b/gcc/config/arm/fa726te.md index 07ab018f667..8790b035aa5 100644 --- a/gcc/config/arm/fa726te.md +++ b/gcc/config/arm/fa726te.md @@ -115,7 +115,7 @@ (define_insn_reservation "726te_mult_op" 3 (and (eq_attr "tune" "fa726te") - (eq_attr "insn" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\ + (eq_attr "type" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\ umulls,umlals,smulls,smlals,smlawx,smulxy,smlaxy")) "fa726te_issue+fa726te_mac_pipe") diff --git a/gcc/config/arm/fmp626.md b/gcc/config/arm/fmp626.md index 8691450c3c7..f3b7dadcba2 100644 --- a/gcc/config/arm/fmp626.md +++ b/gcc/config/arm/fmp626.md @@ -77,22 +77,22 @@ (define_insn_reservation "mp626_mult1" 2 (and (eq_attr "tune" "fmp626") - (eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy")) + (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy")) "fmp626_core") (define_insn_reservation "mp626_mult2" 2 (and (eq_attr "tune" "fmp626") - (eq_attr "insn" "mul,mla")) + (eq_attr "type" "mul,mla")) "fmp626_core") (define_insn_reservation "mp626_mult3" 3 (and (eq_attr "tune" "fmp626") - (eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx")) + (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx")) "fmp626_core*2") (define_insn_reservation "mp626_mult4" 4 (and (eq_attr "tune" "fmp626") - (eq_attr "insn" "smulls,smlals,umulls,umlals")) + (eq_attr "type" "smulls,smlals,umulls,umlals")) "fmp626_core*3") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index b3ad42b376f..d84929f3d1f 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -496,3 +496,11 @@ (define_int_attr nvrint_variant [(UNSPEC_NVRINTZ "z") (UNSPEC_NVRINTP "p") (UNSPEC_NVRINTA "a") (UNSPEC_NVRINTM "m") (UNSPEC_NVRINTX "x") (UNSPEC_NVRINTN "n")]) +;; Both kinds of return insn. +(define_code_iterator returns [return simple_return]) +(define_code_attr return_str [(return "") (simple_return "simple_")]) +(define_code_attr return_simple_p [(return "false") (simple_return "true")]) +(define_code_attr return_cond_false [(return " && USE_RETURN_INSN (FALSE)") + (simple_return " && use_simple_return_p ()")]) +(define_code_attr return_cond_true [(return " && USE_RETURN_INSN (TRUE)") + (simple_return " && use_simple_return_p ()")]) diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md index 8ebdfc81761..ad137d492e4 100644 --- a/gcc/config/arm/ldmstm.md +++ b/gcc/config/arm/ldmstm.md @@ -37,7 +37,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "ldm%(ia%)\t%5, {%1, %2, %3, %4}" [(set_attr "type" "load4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm4_ia" [(match_parallel 0 "load_multiple_operation" @@ -74,7 +75,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" "ldm%(ia%)\t%5!, {%1, %2, %3, %4}" [(set_attr "type" "load4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm4_ia_update" [(match_parallel 0 "load_multiple_operation" @@ -108,7 +110,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "stm%(ia%)\t%5, {%1, %2, %3, %4}" [(set_attr "type" "store4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm4_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -125,7 +128,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" "stm%(ia%)\t%5!, {%1, %2, %3, %4}" [(set_attr "type" "store4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_stm4_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -302,7 +306,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "ldm%(db%)\t%5, {%1, %2, %3, %4}" [(set_attr "type" "load4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*ldm4_db_update" [(match_parallel 0 "load_multiple_operation" @@ -323,7 +328,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" "ldm%(db%)\t%5!, {%1, %2, %3, %4}" [(set_attr "type" "load4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm4_db" [(match_parallel 0 "store_multiple_operation" @@ -338,7 +344,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "stm%(db%)\t%5, {%1, %2, %3, %4}" [(set_attr "type" "store4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm4_db_update" [(match_parallel 0 "store_multiple_operation" @@ -355,7 +362,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" "stm%(db%)\t%5!, {%1, %2, %3, %4}" [(set_attr "type" "store4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_peephole2 [(set (match_operand:SI 0 "s_register_operand" "") @@ -477,7 +485,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "ldm%(ia%)\t%4, {%1, %2, %3}" [(set_attr "type" "load3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm3_ia" [(match_parallel 0 "load_multiple_operation" @@ -508,7 +517,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "ldm%(ia%)\t%4!, {%1, %2, %3}" [(set_attr "type" "load3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm3_ia_update" [(match_parallel 0 "load_multiple_operation" @@ -537,7 +547,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "stm%(ia%)\t%4, {%1, %2, %3}" [(set_attr "type" "store3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm3_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -552,7 +563,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "stm%(ia%)\t%4!, {%1, %2, %3}" [(set_attr "type" "store3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_stm3_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -704,7 +716,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "ldm%(db%)\t%4, {%1, %2, %3}" [(set_attr "type" "load3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*ldm3_db_update" [(match_parallel 0 "load_multiple_operation" @@ -722,7 +735,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "ldm%(db%)\t%4!, {%1, %2, %3}" [(set_attr "type" "load3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm3_db" [(match_parallel 0 "store_multiple_operation" @@ -735,7 +749,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "stm%(db%)\t%4, {%1, %2, %3}" [(set_attr "type" "store3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm3_db_update" [(match_parallel 0 "store_multiple_operation" @@ -750,7 +765,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "stm%(db%)\t%4!, {%1, %2, %3}" [(set_attr "type" "store3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_peephole2 [(set (match_operand:SI 0 "s_register_operand" "") @@ -855,7 +871,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" "ldm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm2_ia" [(match_parallel 0 "load_multiple_operation" @@ -880,7 +897,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "ldm%(ia%)\t%3!, {%1, %2}" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm2_ia_update" [(match_parallel 0 "load_multiple_operation" @@ -904,7 +922,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" "stm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm2_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -917,7 +936,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "stm%(ia%)\t%3!, {%1, %2}" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_stm2_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -1044,7 +1064,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" "ldm%(db%)\t%3, {%1, %2}" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*ldm2_db_update" [(match_parallel 0 "load_multiple_operation" @@ -1059,7 +1080,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "ldm%(db%)\t%3!, {%1, %2}" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm2_db" [(match_parallel 0 "store_multiple_operation" @@ -1070,7 +1092,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" "stm%(db%)\t%3, {%1, %2}" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm2_db_update" [(match_parallel 0 "store_multiple_operation" @@ -1083,7 +1106,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "stm%(db%)\t%3!, {%1, %2}" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_peephole2 [(set (match_operand:SI 0 "s_register_operand" "") diff --git a/gcc/config/arm/marvell-pj4.md b/gcc/config/arm/marvell-pj4.md index 39f4c584515..4004fa59409 100644 --- a/gcc/config/arm/marvell-pj4.md +++ b/gcc/config/arm/marvell-pj4.md @@ -95,10 +95,14 @@ "pj4_ir_mul,pj4_ir_div,pj4_core_to_vfp") (define_insn_reservation "pj4_ir_mul" 3 - (and (eq_attr "tune" "marvell_pj4") (eq_attr "type" "mult")) "pj4_is,pj4_mul,nothing*2,pj4_cp") + (and (eq_attr "tune" "marvell_pj4") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))) + "pj4_is,pj4_mul,nothing*2,pj4_cp") (define_insn_reservation "pj4_ir_div" 20 - (and (eq_attr "tune" "marvell_pj4") (eq_attr "insn" "udiv,sdiv")) "pj4_is,pj4_div*19,pj4_cp") + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "udiv,sdiv")) "pj4_is,pj4_div*19,pj4_cp") ;; Branches and calls. diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index f91a6f7d08b..2761adb286a 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -241,8 +241,8 @@ }) (define_expand "movmisalign<mode>" - [(set (match_operand:VDQX 0 "neon_struct_or_register_operand") - (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_or_register_operand")] + [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand") + (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")] UNSPEC_MISALIGNED_ACCESS))] "TARGET_NEON && !BYTES_BIG_ENDIAN" { @@ -255,7 +255,7 @@ }) (define_insn "*movmisalign<mode>_neon_store" - [(set (match_operand:VDX 0 "neon_struct_operand" "=Um") + [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um") (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")] UNSPEC_MISALIGNED_ACCESS))] "TARGET_NEON && !BYTES_BIG_ENDIAN" @@ -263,15 +263,16 @@ [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]) (define_insn "*movmisalign<mode>_neon_load" - [(set (match_operand:VDX 0 "s_register_operand" "=w") - (unspec:VDX [(match_operand:VDX 1 "neon_struct_operand" " Um")] + [(set (match_operand:VDX 0 "s_register_operand" "=w") + (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand" + " Um")] UNSPEC_MISALIGNED_ACCESS))] "TARGET_NEON && !BYTES_BIG_ENDIAN" "vld1.<V_sz_elem>\t{%P0}, %A1" [(set_attr "neon_type" "neon_vld1_1_2_regs")]) (define_insn "*movmisalign<mode>_neon_store" - [(set (match_operand:VQX 0 "neon_struct_operand" "=Um") + [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um") (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")] UNSPEC_MISALIGNED_ACCESS))] "TARGET_NEON && !BYTES_BIG_ENDIAN" @@ -279,8 +280,9 @@ [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]) (define_insn "*movmisalign<mode>_neon_load" - [(set (match_operand:VQX 0 "s_register_operand" "=w") - (unspec:VQX [(match_operand:VQX 1 "neon_struct_operand" " Um")] + [(set (match_operand:VQX 0 "s_register_operand" "=w") + (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand" + " Um")] UNSPEC_MISALIGNED_ACCESS))] "TARGET_NEON && !BYTES_BIG_ENDIAN" "vld1.<V_sz_elem>\t{%q0}, %A1" @@ -679,29 +681,6 @@ [(set_attr "neon_type" "neon_int_1")] ) -(define_insn "iordi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w") - (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r,w,0") - (match_operand:DI 2 "neon_logic_op2" "w,Dl,r,r,w,Dl")))] - "TARGET_NEON" -{ - switch (which_alternative) - { - case 0: /* fall through */ - case 4: return "vorr\t%P0, %P1, %P2"; - case 1: /* fall through */ - case 5: return neon_output_logic_immediate ("vorr", &operands[2], - DImode, 0, VALID_NEON_QREG_MODE (DImode)); - case 2: return "#"; - case 3: return "#"; - default: gcc_unreachable (); - } -} - [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1") - (set_attr "length" "*,*,8,8,*,*") - (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")] -) - ;; The concrete forms of the Neon immediate-logic instructions are vbic and ;; vorr. We support the pseudo-instruction vand instead, because that ;; corresponds to the canonical form the middle-end expects to use for @@ -805,21 +784,6 @@ [(set_attr "neon_type" "neon_int_1")] ) -(define_insn "xordi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w") - (xor:DI (match_operand:DI 1 "s_register_operand" "%w,0,r,w") - (match_operand:DI 2 "s_register_operand" "w,r,r,w")))] - "TARGET_NEON" - "@ - veor\t%P0, %P1, %P2 - # - # - veor\t%P0, %P1, %P2" - [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1") - (set_attr "length" "*,8,8,*") - (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")] -) - (define_insn "one_cmpl<mode>2" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))] @@ -5617,7 +5581,7 @@ (match_operand:SI 3 "immediate_operand" "")] "TARGET_NEON" { - emit_insn (gen_ior<mode>3<V_suf64> (operands[0], operands[1], operands[2])); + emit_insn (gen_ior<mode>3 (operands[0], operands[1], operands[2])); DONE; }) @@ -5628,7 +5592,7 @@ (match_operand:SI 3 "immediate_operand" "")] "TARGET_NEON" { - emit_insn (gen_xor<mode>3<V_suf64> (operands[0], operands[1], operands[2])); + emit_insn (gen_xor<mode>3 (operands[0], operands[1], operands[2])); DONE; }) diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index 92de9fe8bd9..f4a4515fa39 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -42,6 +42,17 @@ (ior (match_operand 0 "imm_for_neon_inv_logic_operand") (match_operand 0 "s_register_operand"))) +(define_predicate "imm_for_neon_logic_operand" + (match_code "const_vector") +{ + return (TARGET_NEON + && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL)); +}) + +(define_predicate "neon_logic_op2" + (ior (match_operand 0 "imm_for_neon_logic_operand") + (match_operand 0 "s_register_operand"))) + ;; Any hard register. (define_predicate "arm_hard_register_operand" (match_code "reg") @@ -162,6 +173,17 @@ (match_test "const_ok_for_dimode_op (INTVAL (op), AND)")) (match_operand 0 "neon_inv_logic_op2"))) +(define_predicate "arm_iordi_operand_neon" + (ior (match_operand 0 "s_register_operand") + (and (match_code "const_int") + (match_test "const_ok_for_dimode_op (INTVAL (op), IOR)")) + (match_operand 0 "neon_logic_op2"))) + +(define_predicate "arm_xordi_operand" + (ior (match_operand 0 "s_register_operand") + (and (match_code "const_int") + (match_test "const_ok_for_dimode_op (INTVAL (op), XOR)")))) + (define_predicate "arm_adddi_operand" (ior (match_operand 0 "s_register_operand") (and (match_code "const_int") @@ -299,6 +321,12 @@ || maybe_get_arm_condition_code (op) == ARM_NE || maybe_get_arm_condition_code (op) == ARM_VC"))) +(define_special_predicate "arm_cond_move_operator" + (if_then_else (match_test "arm_restrict_it") + (and (match_test "TARGET_FPU_ARMV8") + (match_operand 0 "arm_vsel_comparison_operator")) + (match_operand 0 "expandable_comparison_operator"))) + (define_special_predicate "noov_comparison_operator" (match_code "lt,ge,eq,ne")) @@ -535,17 +563,6 @@ (ior (match_operand 0 "s_register_operand") (match_operand 0 "imm_for_neon_rshift_operand"))) -(define_predicate "imm_for_neon_logic_operand" - (match_code "const_vector") -{ - return (TARGET_NEON - && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL)); -}) - -(define_predicate "neon_logic_op2" - (ior (match_operand 0 "imm_for_neon_logic_operand") - (match_operand 0 "s_register_operand"))) - ;; Predicates for named expanders that overlap multiple ISAs. (define_predicate "cmpdi_operand" @@ -623,10 +640,14 @@ (define_predicate "neon_struct_operand" (and (match_code "mem") - (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)"))) + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, true)"))) + +(define_predicate "neon_permissive_struct_operand" + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, false)"))) -(define_predicate "neon_struct_or_register_operand" - (ior (match_operand 0 "neon_struct_operand") +(define_predicate "neon_perm_struct_or_reg_operand" + (ior (match_operand 0 "neon_permissive_struct_operand") (match_operand 0 "s_register_operand"))) (define_special_predicate "add_operator" diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md index 980234836c9..8f7bd71c317 100644 --- a/gcc/config/arm/sync.md +++ b/gcc/config/arm/sync.md @@ -124,7 +124,8 @@ UNSPEC_LL))] "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN" "ldrexd%?\t%0, %H0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_expand "atomic_compare_and_swap<mode>" [(match_operand:SI 0 "s_register_operand" "") ;; bool out @@ -361,7 +362,8 @@ VUNSPEC_LL)))] "TARGET_HAVE_LDREXBH" "ldrex<sync_sfx>%?\t%0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_load_acquire_exclusive<mode>" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -371,7 +373,8 @@ VUNSPEC_LAX)))] "TARGET_HAVE_LDACQ" "ldaex<sync_sfx>%?\\t%0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_load_exclusivesi" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -380,7 +383,8 @@ VUNSPEC_LL))] "TARGET_HAVE_LDREX" "ldrex%?\t%0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_load_acquire_exclusivesi" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -389,7 +393,8 @@ VUNSPEC_LAX))] "TARGET_HAVE_LDACQ" "ldaex%?\t%0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_load_exclusivedi" [(set (match_operand:DI 0 "s_register_operand" "=r") @@ -398,7 +403,8 @@ VUNSPEC_LL))] "TARGET_HAVE_LDREXD" "ldrexd%?\t%0, %H0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_load_acquire_exclusivedi" [(set (match_operand:DI 0 "s_register_operand" "=r") @@ -407,7 +413,8 @@ VUNSPEC_LAX))] "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN" "ldaexd%?\t%0, %H0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_store_exclusive<mode>" [(set (match_operand:SI 0 "s_register_operand" "=&r") @@ -431,7 +438,8 @@ } return "strex<sync_sfx>%?\t%0, %2, %C1"; } - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_store_release_exclusivedi" [(set (match_operand:SI 0 "s_register_operand" "=&r") @@ -448,7 +456,8 @@ operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1); return "stlexd%?\t%0, %2, %3, %C1"; } - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "arm_store_release_exclusive<mode>" [(set (match_operand:SI 0 "s_register_operand" "=&r") @@ -459,4 +468,5 @@ VUNSPEC_SLX))] "TARGET_HAVE_LDACQ" "stlex<sync_sfx>%?\t%0, %2, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm index fe075e5862a..246f0f5b540 100644 --- a/gcc/config/arm/t-arm +++ b/gcc/config/arm/t-arm @@ -78,8 +78,8 @@ $(srcdir)/config/arm/arm-tables.opt: $(srcdir)/config/arm/genopt.sh \ $(SHELL) $(srcdir)/config/arm/genopt.sh $(srcdir)/config/arm > \ $(srcdir)/config/arm/arm-tables.opt -arm.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ - $(RTL_H) $(TREE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ +arm.o: $(srcdir)/config/arm/arm.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ insn-config.h conditions.h output.h dumpfile.h \ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index ca4eedb037b..cd5837480b8 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -267,8 +267,8 @@ ;; regs. The high register alternatives are not taken into account when ;; choosing register preferences in order to reflect their expense. (define_insn "*thumb2_movsi_insn" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,l ,*hk,m,*m") - (match_operand:SI 1 "general_operand" "rk ,I,K,j,mi,*mi,l,*hk"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l ,*hk,m,*m") + (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk"))] "TARGET_THUMB2 && ! TARGET_IWMMXT && !(TARGET_HARD_FLOAT && TARGET_VFP) && ( register_operand (operands[0], SImode) @@ -276,16 +276,19 @@ "@ mov%?\\t%0, %1 mov%?\\t%0, %1 + mov%?\\t%0, %1 mvn%?\\t%0, #%B1 movw%?\\t%0, %1 ldr%?\\t%0, %1 ldr%?\\t%0, %1 str%?\\t%1, %0 str%?\\t%1, %0" - [(set_attr "type" "*,*,simple_alu_imm,*,load1,load1,store1,store1") + [(set_attr "type" "*,simple_alu_imm,simple_alu_imm,simple_alu_imm,*,load1,load1,store1,store1") + (set_attr "length" "2,4,2,4,4,4,4,4,4") (set_attr "predicable" "yes") - (set_attr "pool_range" "*,*,*,*,1018,4094,*,*") - (set_attr "neg_pool_range" "*,*,*,*,0,0,*,*")] + (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no") + (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*") + (set_attr "neg_pool_range" "*,*,*,*,*,0,0,*,*")] ) (define_insn "tls_load_dot_plus_four" @@ -390,26 +393,32 @@ ) (define_insn_and_split "*thumb2_movsicc_insn" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r") + [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,l") (if_then_else:SI (match_operator 3 "arm_comparison_operator" [(match_operand 4 "cc_register" "") (const_int 0)]) - (match_operand:SI 1 "arm_not_operand" "0,0,rI,K,rI,rI,K,K") - (match_operand:SI 2 "arm_not_operand" "rI,K,0,0,rI,K,rI,K")))] + (match_operand:SI 1 "arm_not_operand" "0 ,Py,0 ,0,rI,K,rI,rI,K ,K,r,lPy") + (match_operand:SI 2 "arm_not_operand" "Py,0 ,rI,K,0 ,0,rI,K ,rI,K,r,lPy")))] "TARGET_THUMB2" "@ it\\t%D3\;mov%D3\\t%0, %2 + it\\t%d3\;mov%d3\\t%0, %1 + it\\t%D3\;mov%D3\\t%0, %2 it\\t%D3\;mvn%D3\\t%0, #%B2 it\\t%d3\;mov%d3\\t%0, %1 it\\t%d3\;mvn%d3\\t%0, #%B1 # # # + # + # #" - ; alt 4: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 - ; alt 5: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2 - ; alt 6: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 - ; alt 7: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2" + ; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 + ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2 + ; alt 8: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 + ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2 + ; alt 10: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 + ; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2" "&& reload_completed" [(const_int 0)] { @@ -440,7 +449,8 @@ operands[2]))); DONE; } - [(set_attr "length" "6,6,6,6,10,10,10,10") + [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6,6") + (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes,yes") (set_attr "conds" "use")] ) @@ -491,29 +501,30 @@ (define_insn_and_split "*thumb2_and_scc" - [(set (match_operand:SI 0 "s_register_operand" "=r") + [(set (match_operand:SI 0 "s_register_operand" "=Ts") (and:SI (match_operator:SI 1 "arm_comparison_operator" [(match_operand 2 "cc_register" "") (const_int 0)]) (match_operand:SI 3 "s_register_operand" "r")))] "TARGET_THUMB2" - "#" ; "ite\\t%D1\;mov%D1\\t%0, #0\;and%d1\\t%0, %3, #1" + "#" ; "and\\t%0, %3, #1\;it\\t%D1\;mov%D1\\t%0, #0" "&& reload_completed" - [(cond_exec (match_dup 5) (set (match_dup 0) (const_int 0))) - (cond_exec (match_dup 4) (set (match_dup 0) - (and:SI (match_dup 3) (const_int 1))))] + [(set (match_dup 0) + (and:SI (match_dup 3) (const_int 1))) + (cond_exec (match_dup 4) (set (match_dup 0) (const_int 0)))] { enum machine_mode mode = GET_MODE (operands[2]); enum rtx_code rc = GET_CODE (operands[1]); - operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); if (mode == CCFPmode || mode == CCFPEmode) rc = reverse_condition_maybe_unordered (rc); else rc = reverse_condition (rc); - operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); } [(set_attr "conds" "use") - (set_attr "length" "10")] + (set (attr "length") (if_then_else (match_test "arm_restrict_it") + (const_int 8) + (const_int 10)))] ) (define_insn_and_split "*thumb2_ior_scc" @@ -649,7 +660,7 @@ ) (define_insn_and_split "*thumb2_negscc" - [(set (match_operand:SI 0 "s_register_operand" "=r") + [(set (match_operand:SI 0 "s_register_operand" "=Ts") (neg:SI (match_operator 3 "arm_comparison_operator" [(match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "arm_rhs_operand" "rI")]))) @@ -671,7 +682,7 @@ GEN_INT (31)))); DONE; } - else if (GET_CODE (operands[3]) == NE) + else if (GET_CODE (operands[3]) == NE && !arm_restrict_it) { /* Emit subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, #0 */ if (CONST_INT_P (operands[2])) @@ -691,7 +702,7 @@ } else { - /* Emit: cmp\\t%1, %2\;ite\\t%D3\;mov%D3\\t%0, #0\;mvn%d3\\t%0, #0 */ + /* Emit: cmp\\t%1, %2\;mvn\\t%0, #0\;it\\t%D3\;mov%D3\\t%0, #0\;*/ enum rtx_code rc = reverse_condition (GET_CODE (operands[3])); enum machine_mode mode = SELECT_CC_MODE (rc, operands[1], operands[2]); rtx tmp1 = gen_rtx_REG (mode, CC_REGNUM); @@ -699,21 +710,15 @@ emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (CCmode, operands[1], operands[2]))); + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], GEN_INT (~0))); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, gen_rtx_fmt_ee (rc, VOIDmode, tmp1, const0_rtx), gen_rtx_SET (VOIDmode, operands[0], const0_rtx))); - rc = GET_CODE (operands[3]); - emit_insn (gen_rtx_COND_EXEC (VOIDmode, - gen_rtx_fmt_ee (rc, - VOIDmode, - tmp1, - const0_rtx), - gen_rtx_SET (VOIDmode, - operands[0], - GEN_INT (~0)))); DONE; } FAIL; @@ -1063,7 +1068,7 @@ "mul%!\\t%0, %2, %0" [(set_attr "predicable" "yes") (set_attr "length" "2") - (set_attr "insn" "muls")]) + (set_attr "type" "muls")]) (define_insn "*thumb2_mulsi_short_compare0" [(set (reg:CC_NOOV CC_REGNUM) @@ -1076,7 +1081,7 @@ "TARGET_THUMB2 && optimize_size" "muls\\t%0, %2, %0" [(set_attr "length" "2") - (set_attr "insn" "muls")]) + (set_attr "type" "muls")]) (define_insn "*thumb2_mulsi_short_compare0_scratch" [(set (reg:CC_NOOV CC_REGNUM) @@ -1088,7 +1093,7 @@ "TARGET_THUMB2 && optimize_size" "muls\\t%0, %2, %0" [(set_attr "length" "2") - (set_attr "insn" "muls")]) + (set_attr "type" "muls")]) (define_insn "*thumb2_cbz" [(set (pc) (if_then_else diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 1930cddb835..9ac887e9b19 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -18,31 +18,6 @@ ;; along with GCC; see the file COPYING3. If not see ;; <http://www.gnu.org/licenses/>. */ -;; The VFP "type" attributes differ from those used in the FPA model. -;; fcpys Single precision cpy. -;; ffariths Single precision abs, neg. -;; ffarithd Double precision abs, neg, cpy. -;; fadds Single precision add/sub. -;; faddd Double precision add/sub. -;; fconsts Single precision load immediate. -;; fconstd Double precision load immediate. -;; fcmps Single precision comparison. -;; fcmpd Double precision comparison. -;; fmuls Single precision multiply. -;; fmuld Double precision multiply. -;; fmacs Single precision multiply-accumulate. -;; fmacd Double precision multiply-accumulate. -;; ffmas Single precision fused multiply-accumulate. -;; ffmad Double precision fused multiply-accumulate. -;; fdivs Single precision sqrt or division. -;; fdivd Double precision sqrt or division. -;; f_flag fmstat operation -;; f_load[sd] Floating point load from memory. -;; f_store[sd] Floating point store to memory. -;; f_2_r Transfer vfp to arm reg. -;; r_2_f Transfer arm to vfp reg. -;; f_cvt Convert floating<->integral - ;; SImode moves ;; ??? For now do not allow loading constants into vfp regs. This causes ;; problems because small constants get converted into adds. @@ -87,45 +62,52 @@ ;; See thumb2.md:thumb2_movsi_insn for an explanation of the split ;; high/low register alternatives for loads and stores here. +;; The l/Py alternative should come after r/I to ensure that the short variant +;; is chosen with length 2 when the instruction is predicated for +;; arm_restrict_it. (define_insn "*thumb2_movsi_vfp" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv") - (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv") + (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))] "TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT && ( s_register_operand (operands[0], SImode) || s_register_operand (operands[1], SImode))" "* switch (which_alternative) { - case 0: case 1: - return \"mov%?\\t%0, %1\"; + case 0: + case 1: case 2: - return \"mvn%?\\t%0, #%B1\"; + return \"mov%?\\t%0, %1\"; case 3: - return \"movw%?\\t%0, %1\"; + return \"mvn%?\\t%0, #%B1\"; case 4: + return \"movw%?\\t%0, %1\"; case 5: - return \"ldr%?\\t%0, %1\"; case 6: + return \"ldr%?\\t%0, %1\"; case 7: - return \"str%?\\t%1, %0\"; case 8: - return \"fmsr%?\\t%0, %1\\t%@ int\"; + return \"str%?\\t%1, %0\"; case 9: - return \"fmrs%?\\t%0, %1\\t%@ int\"; + return \"fmsr%?\\t%0, %1\\t%@ int\"; case 10: + return \"fmrs%?\\t%0, %1\\t%@ int\"; + case 11: return \"fcpys%?\\t%0, %1\\t%@ int\"; - case 11: case 12: + case 12: case 13: return output_move_vfp (operands); default: gcc_unreachable (); } " [(set_attr "predicable" "yes") - (set_attr "type" "*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") - (set_attr "neon_type" "*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*") - (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*") - (set_attr "pool_range" "*,*,*,*,1018,4094,*,*,*,*,*,1018,*") - (set_attr "neg_pool_range" "*,*,*,*, 0, 0,*,*,*,*,*,1008,*")] + (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no") + (set_attr "type" "*,*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") + (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4") + (set_attr "neon_type" "*,*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*") + (set_attr "insn" "mov,mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*") + (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*") + (set_attr "neg_pool_range" "*,*,*,*,*, 0, 0,*,*,*,*,*,1008,*")] ) @@ -412,6 +394,7 @@ } " [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*") (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*") @@ -420,7 +403,6 @@ (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")] ) - ;; DFmode moves (define_insn "*movdf_vfp" @@ -550,7 +532,7 @@ [(match_operand 4 "cc_register" "") (const_int 0)]) (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t") (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))] - "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP && !arm_restrict_it" "@ it\\t%D3\;fcpys%D3\\t%0, %2 it\\t%d3\;fcpys%d3\\t%0, %1 @@ -598,7 +580,7 @@ [(match_operand 4 "cc_register" "") (const_int 0)]) (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w") (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))] - "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it" "@ it\\t%D3\;fcpyd%D3\\t%P0, %P2 it\\t%d3\;fcpyd%d3\\t%P0, %P1 @@ -624,6 +606,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fabss%?\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "ffariths")] ) @@ -633,6 +616,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fabsd%?\\t%P0, %P1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "ffarithd")] ) @@ -644,6 +628,7 @@ fnegs%?\\t%0, %1 eor%?\\t%0, %1, #-2147483648" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "ffariths")] ) @@ -689,6 +674,7 @@ } " [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "length" "4,4,8") (set_attr "type" "ffarithd")] ) @@ -703,6 +689,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fadds%?\\t%0, %1, %2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fadds")] ) @@ -713,6 +700,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "faddd%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "faddd")] ) @@ -724,6 +712,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fsubs%?\\t%0, %1, %2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fadds")] ) @@ -734,6 +723,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fsubd%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "faddd")] ) @@ -747,6 +737,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fdivs%?\\t%0, %1, %2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fdivs")] ) @@ -757,6 +748,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fdivd%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fdivd")] ) @@ -770,6 +762,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fmuls%?\\t%0, %1, %2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmuls")] ) @@ -780,6 +773,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fmuld%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmuld")] ) @@ -790,6 +784,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fnmuls%?\\t%0, %1, %2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmuls")] ) @@ -800,6 +795,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fnmuld%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmuld")] ) @@ -815,6 +811,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fmacs%?\\t%0, %2, %3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacs")] ) @@ -826,6 +823,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fmacd%?\\t%P0, %P2, %P3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacd")] ) @@ -838,6 +836,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fmscs%?\\t%0, %2, %3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacs")] ) @@ -849,6 +848,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fmscd%?\\t%P0, %P2, %P3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacd")] ) @@ -861,6 +861,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fnmacs%?\\t%0, %2, %3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacs")] ) @@ -872,6 +873,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fnmacd%?\\t%P0, %P2, %P3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacd")] ) @@ -886,6 +888,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fnmscs%?\\t%0, %2, %3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacs")] ) @@ -898,6 +901,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fnmscd%?\\t%P0, %P2, %P3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacd")] ) @@ -911,6 +915,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA" "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "ffma<vfp_type>")] ) @@ -923,6 +928,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA" "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "ffma<vfp_type>")] ) @@ -934,6 +940,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA" "vfnms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "ffma<vfp_type>")] ) @@ -946,6 +953,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA" "vfnma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "ffma<vfp_type>")] ) @@ -958,6 +966,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fcvtds%?\\t%P0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -967,6 +976,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fcvtsd%?\\t%0, %P1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -976,6 +986,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" "vcvtb%?.f32.f16\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -985,6 +996,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" "vcvtb%?.f16.f32\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -994,6 +1006,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "ftosizs%?\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1003,6 +1016,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "ftosizd%?\\t%0, %P1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1013,6 +1027,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "ftouizs%?\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1022,6 +1037,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "ftouizd%?\\t%0, %P1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1032,6 +1048,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fsitos%?\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1041,6 +1058,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fsitod%?\\t%P0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1051,6 +1069,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fuitos%?\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1060,6 +1079,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fuitod%?\\t%P0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1072,6 +1092,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fsqrts%?\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fdivs")] ) @@ -1081,6 +1102,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fsqrtd%?\\t%P0, %P1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fdivd")] ) @@ -1168,6 +1190,7 @@ fcmps%?\\t%0, %1 fcmpzs%?\\t%0" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fcmps")] ) @@ -1180,6 +1203,7 @@ fcmpes%?\\t%0, %1 fcmpezs%?\\t%0" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fcmps")] ) @@ -1192,6 +1216,7 @@ fcmpd%?\\t%P0, %P1 fcmpzd%?\\t%P0" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fcmpd")] ) @@ -1204,6 +1229,7 @@ fcmped%?\\t%P0, %P1 fcmpezd%?\\t%P0" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fcmpd")] ) @@ -1263,6 +1289,7 @@ "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>" "vrint<vrint_variant>%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1" [(set_attr "predicable" "<vrint_predicable>") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_rint<vfp_type>")] ) diff --git a/gcc/config/c6x/c6x.h b/gcc/config/c6x/c6x.h index ce331cbe363..c30a9718e76 100644 --- a/gcc/config/c6x/c6x.h +++ b/gcc/config/c6x/c6x.h @@ -134,7 +134,7 @@ extern c6x_cpu_t c6x_arch; Really only externally visible arrays must be aligned this way, as only those are directly visible from another compilation unit. But we don't have that information available here. */ -#define DATA_ALIGNMENT(TYPE, ALIGN) \ +#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \ (((ALIGN) < BITS_PER_UNIT * 8 && TREE_CODE (TYPE) == ARRAY_TYPE) \ ? BITS_PER_UNIT * 8 : (ALIGN)) diff --git a/gcc/config/i386/ammintrin.h b/gcc/config/i386/ammintrin.h index 311292c7a44..297b98dd0d8 100644 --- a/gcc/config/i386/ammintrin.h +++ b/gcc/config/i386/ammintrin.h @@ -27,13 +27,15 @@ #ifndef _AMMINTRIN_H_INCLUDED #define _AMMINTRIN_H_INCLUDED -#ifndef __SSE4A__ -# error "SSE4A instruction set not enabled" -#else - /* We need definitions from the SSE3, SSE2 and SSE header files*/ #include <pmmintrin.h> +#ifndef __SSE4A__ +#pragma GCC push_options +#pragma GCC target("sse4a") +#define __DISABLE_SSE4A__ +#endif /* __SSE4A__ */ + extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_sd (double * __P, __m128d __Y) { @@ -83,6 +85,9 @@ _mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned cons (unsigned int)(I), (unsigned int)(L))) #endif -#endif /* __SSE4A__ */ +#ifdef __DISABLE_SSE4A__ +#undef __DISABLE_SSE4A__ +#pragma GCC pop_options +#endif /* __DISABLE_SSE4A__ */ #endif /* _AMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h index 1537bf5add0..4030dfe2bc2 100644 --- a/gcc/config/i386/avx2intrin.h +++ b/gcc/config/i386/avx2intrin.h @@ -25,6 +25,15 @@ # error "Never use <avx2intrin.h> directly; include <immintrin.h> instead." #endif +#ifndef _AVX2INTRIN_H_INCLUDED +#define _AVX2INTRIN_H_INCLUDED + +#ifndef __AVX2__ +#pragma GCC push_options +#pragma GCC target("avx2") +#define __DISABLE_AVX2__ +#endif /* __AVX2__ */ + /* Sum absolute 8-bit integer difference of adjacent groups of 4 byte integers in the first 2 operands. Starting offsets within operands are determined by the 3rd mask operand. */ @@ -1871,3 +1880,10 @@ _mm256_mask_i64gather_epi32 (__m128i src, int const *base, (__v4si)(__m128i)MASK, \ (int)SCALE) #endif /* __OPTIMIZE__ */ + +#ifdef __DISABLE_AVX2__ +#undef __DISABLE_AVX2__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX2__ */ + +#endif /* _AVX2INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h index b75de451af9..7f2109a7299 100644 --- a/gcc/config/i386/avxintrin.h +++ b/gcc/config/i386/avxintrin.h @@ -28,6 +28,15 @@ # error "Never use <avxintrin.h> directly; include <immintrin.h> instead." #endif +#ifndef _AVXINTRIN_H_INCLUDED +#define _AVXINTRIN_H_INCLUDED + +#ifndef __AVX__ +#pragma GCC push_options +#pragma GCC target("avx") +#define __DISABLE_AVX__ +#endif /* __AVX__ */ + /* Internal data types for implementing the intrinsics. */ typedef double __v4df __attribute__ ((__vector_size__ (32))); typedef float __v8sf __attribute__ ((__vector_size__ (32))); @@ -1424,3 +1433,10 @@ _mm256_castsi128_si256 (__m128i __A) { return (__m256i) __builtin_ia32_si256_si ((__v4si)__A); } + +#ifdef __DISABLE_AVX__ +#undef __DISABLE_AVX__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX__ */ + +#endif /* _AVXINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/bmi2intrin.h b/gcc/config/i386/bmi2intrin.h index 929ea20b970..0c6cb9616c8 100644 --- a/gcc/config/i386/bmi2intrin.h +++ b/gcc/config/i386/bmi2intrin.h @@ -25,13 +25,15 @@ # error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead." #endif -#ifndef __BMI2__ -# error "BMI2 instruction set not enabled" -#endif /* __BMI2__ */ - #ifndef _BMI2INTRIN_H_INCLUDED #define _BMI2INTRIN_H_INCLUDED +#ifndef __BMI2__ +#pragma GCC push_options +#pragma GCC target("bmi2") +#define __DISABLE_BMI2__ +#endif /* __BMI2__ */ + extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _bzhi_u32 (unsigned int __X, unsigned int __Y) @@ -99,4 +101,9 @@ _mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P) #endif /* !__x86_64__ */ +#ifdef __DISABLE_BMI2__ +#undef __DISABLE_BMI2__ +#pragma GCC pop_options +#endif /* __DISABLE_BMI2__ */ + #endif /* _BMI2INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h index 0087f5c06e0..281ebaaf4f2 100644 --- a/gcc/config/i386/bmiintrin.h +++ b/gcc/config/i386/bmiintrin.h @@ -25,13 +25,15 @@ # error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead." #endif -#ifndef __BMI__ -# error "BMI instruction set not enabled" -#endif /* __BMI__ */ - #ifndef _BMIINTRIN_H_INCLUDED #define _BMIINTRIN_H_INCLUDED +#ifndef __BMI__ +#pragma GCC push_options +#pragma GCC target("bmi") +#define __DISABLE_BMI__ +#endif /* __BMI__ */ + extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzcnt_u16 (unsigned short __X) { @@ -52,6 +54,12 @@ __bextr_u32 (unsigned int __X, unsigned int __Y) } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z) +{ + return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsi_u32 (unsigned int __X) { return __X & -__X; @@ -91,6 +99,12 @@ __bextr_u64 (unsigned long long __X, unsigned long long __Y) } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z) +{ + return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsi_u64 (unsigned long long __X) { return __X & -__X; @@ -116,4 +130,9 @@ __tzcnt_u64 (unsigned long long __X) #endif /* __x86_64__ */ +#ifdef __DISABLE_BMI__ +#undef __DISABLE_BMI__ +#pragma GCC pop_options +#endif /* __DISABLE_BMI__ */ + #endif /* _BMIINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index e28f098b03a..249c4cd1d53 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -674,8 +674,14 @@ const char *host_detect_local_cpu (int argc, const char **argv) /* Assume Sandy Bridge. */ cpu = "corei7-avx"; else if (has_sse4_2) - /* Assume Core i7. */ - cpu = "corei7"; + { + if (has_movbe) + /* Assume SLM. */ + cpu = "slm"; + else + /* Assume Core i7. */ + cpu = "corei7"; + } else if (has_ssse3) { if (has_movbe) diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index cf404a13536..c30f05657d6 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -27,13 +27,15 @@ #ifndef _EMMINTRIN_H_INCLUDED #define _EMMINTRIN_H_INCLUDED -#ifndef __SSE2__ -# error "SSE2 instruction set not enabled" -#else - /* We need definitions from the SSE header files*/ #include <xmmintrin.h> +#ifndef __SSE2__ +#pragma GCC push_options +#pragma GCC target("sse2") +#define __DISABLE_SSE2__ +#endif /* __SSE2__ */ + /* SSE2 */ typedef double __v2df __attribute__ ((__vector_size__ (16))); typedef long long __v2di __attribute__ ((__vector_size__ (16))); @@ -1515,6 +1517,9 @@ _mm_castsi128_pd(__m128i __A) return (__m128d) __A; } -#endif /* __SSE2__ */ +#ifdef __DISABLE_SSE2__ +#undef __DISABLE_SSE2__ +#pragma GCC pop_options +#endif /* __DISABLE_SSE2__ */ #endif /* _EMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/f16cintrin.h b/gcc/config/i386/f16cintrin.h index 88903c16231..76f35fa1eac 100644 --- a/gcc/config/i386/f16cintrin.h +++ b/gcc/config/i386/f16cintrin.h @@ -25,13 +25,15 @@ # error "Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead." #endif -#ifndef __F16C__ -# error "F16C instruction set not enabled" -#else - #ifndef _F16CINTRIN_H_INCLUDED #define _F16CINTRIN_H_INCLUDED +#ifndef __F16C__ +#pragma GCC push_options +#pragma GCC target("f16c") +#define __DISABLE_F16C__ +#endif /* __F16C__ */ + extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _cvtsh_ss (unsigned short __S) { @@ -88,5 +90,9 @@ _mm256_cvtps_ph (__m256 __A, const int __I) ((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) A, (int) (I))) #endif /* __OPTIMIZE */ +#ifdef __DISABLE_F16C__ +#undef __DISABLE_F16C__ +#pragma GCC pop_options +#endif /* __DISABLE_F16C__ */ + #endif /* _F16CINTRIN_H_INCLUDED */ -#endif /* __F16C__ */ diff --git a/gcc/config/i386/fma4intrin.h b/gcc/config/i386/fma4intrin.h index 00ba7813123..e615f3e7ba0 100644 --- a/gcc/config/i386/fma4intrin.h +++ b/gcc/config/i386/fma4intrin.h @@ -28,13 +28,15 @@ #ifndef _FMA4INTRIN_H_INCLUDED #define _FMA4INTRIN_H_INCLUDED -#ifndef __FMA4__ -# error "FMA4 instruction set not enabled" -#else - /* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files. */ #include <ammintrin.h> +#ifndef __FMA4__ +#pragma GCC push_options +#pragma GCC target("fma4") +#define __DISABLE_FMA4__ +#endif /* __FMA4__ */ + /* 128b Floating point multiply/add type instructions. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_macc_ps (__m128 __A, __m128 __B, __m128 __C) @@ -231,6 +233,9 @@ _mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C) return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C); } -#endif +#ifdef __DISABLE_FMA4__ +#undef __DISABLE_FMA4__ +#pragma GCC pop_options +#endif /* __DISABLE_FMA4__ */ #endif diff --git a/gcc/config/i386/fmaintrin.h b/gcc/config/i386/fmaintrin.h index 6ede84b18d4..97de93fd146 100644 --- a/gcc/config/i386/fmaintrin.h +++ b/gcc/config/i386/fmaintrin.h @@ -29,8 +29,10 @@ #define _FMAINTRIN_H_INCLUDED #ifndef __FMA__ -# error "FMA instruction set not enabled" -#else +#pragma GCC push_options +#pragma GCC target("fma") +#define __DISABLE_FMA__ +#endif /* __FMA__ */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -292,6 +294,9 @@ _mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C) -(__v8sf)__C); } -#endif +#ifdef __DISABLE_FMA__ +#undef __DISABLE_FMA__ +#pragma GCC pop_options +#endif /* __DISABLE_FMA__ */ #endif diff --git a/gcc/config/i386/fxsrintrin.h b/gcc/config/i386/fxsrintrin.h index 9b63222c835..41d4085b010 100644 --- a/gcc/config/i386/fxsrintrin.h +++ b/gcc/config/i386/fxsrintrin.h @@ -28,6 +28,12 @@ #ifndef _FXSRINTRIN_H_INCLUDED #define _FXSRINTRIN_H_INCLUDED +#ifndef __FXSR__ +#pragma GCC push_options +#pragma GCC target("fxsr") +#define __DISABLE_FXSR__ +#endif /* __FXSR__ */ + extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _fxsave (void *__P) @@ -58,4 +64,10 @@ _fxrstor64 (void *__P) } #endif +#ifdef __DISABLE_FXSR__ +#undef __DISABLE_FXSR__ +#pragma GCC pop_options +#endif /* __DISABLE_FXSR__ */ + + #endif /* _FXSRINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/gnu.h b/gcc/config/i386/gnu.h index 35063e68b1e..4a91c843685 100644 --- a/gcc/config/i386/gnu.h +++ b/gcc/config/i386/gnu.h @@ -36,6 +36,12 @@ along with GCC. If not, see <http://www.gnu.org/licenses/>. #endif #ifdef TARGET_LIBC_PROVIDES_SSP + +/* Not supported yet. */ +# undef TARGET_THREAD_SSP_OFFSET + /* Not supported yet. */ -#undef TARGET_THREAD_SSP_OFFSET +# undef TARGET_CAN_SPLIT_STACK +# undef TARGET_THREAD_SPLIT_STACK_OFFSET + #endif diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 51fec844bdf..31dd28a94cb 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -149,6 +149,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__atom"); def_or_undef (parse_in, "__atom__"); break; + case PROCESSOR_SLM: + def_or_undef (parse_in, "__slm"); + def_or_undef (parse_in, "__slm__"); + break; /* use PROCESSOR_max to not set/unset the arch macro. */ case PROCESSOR_max: break; @@ -241,6 +245,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, case PROCESSOR_ATOM: def_or_undef (parse_in, "__tune_atom__"); break; + case PROCESSOR_SLM: + def_or_undef (parse_in, "__tune_slm__"); + break; case PROCESSOR_GENERIC32: case PROCESSOR_GENERIC64: break; @@ -369,20 +376,23 @@ ix86_pragma_target_parse (tree args, tree pop_target) if (! args) { - cur_tree = ((pop_target) - ? pop_target - : target_option_default_node); + cur_tree = (pop_target ? pop_target : target_option_default_node); cl_target_option_restore (&global_options, TREE_TARGET_OPTION (cur_tree)); } else { cur_tree = ix86_valid_target_attribute_tree (args); - if (!cur_tree) - return false; + if (!cur_tree || cur_tree == error_mark_node) + { + cl_target_option_restore (&global_options, + TREE_TARGET_OPTION (prev_tree)); + return false; + } } target_option_current_node = cur_tree; + ix86_reset_previous_fndecl (); /* Figure out the previous/current isa, arch, tune and the differences. */ prev_opt = TREE_TARGET_OPTION (prev_tree); diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index ef4dc761d5a..09667893910 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -40,6 +40,8 @@ extern void ix86_output_addr_diff_elt (FILE *, int, int); extern enum calling_abi ix86_cfun_abi (void); extern enum calling_abi ix86_function_type_abi (const_tree); +extern void ix86_reset_previous_fndecl (void); + #ifdef RTX_CODE extern int standard_80387_constant_p (rtx); extern const char *standard_80387_constant_opcode (rtx); @@ -207,7 +209,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); #endif /* RTX_CODE */ #ifdef TREE_CODE -extern int ix86_data_alignment (tree, int); +extern int ix86_data_alignment (tree, int, bool); extern unsigned int ix86_local_alignment (tree, enum machine_mode, unsigned int); extern unsigned int ix86_minimum_alignment (tree, enum machine_mode, diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 3470fef77b2..2a65fc2a6de 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1482,6 +1482,79 @@ struct processor_costs atom_cost = { 1, /* cond_not_taken_branch_cost. */ }; +static const +struct processor_costs slm_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 256, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + {{libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, + {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}, + {{libcall, {{8, loop, false}, {15, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{24, loop, false}, {32, unrolled_loop, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + /* Generic64 should produce code tuned for Nocona and K8. */ static const struct processor_costs generic64_cost = { @@ -1735,6 +1808,7 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_HASWELL (1<<PROCESSOR_HASWELL) #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_HASWELL) #define m_ATOM (1<<PROCESSOR_ATOM) +#define m_SLM (1<<PROCESSOR_SLM) #define m_GEODE (1<<PROCESSOR_GEODE) #define m_K6 (1<<PROCESSOR_K6) @@ -1778,7 +1852,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { m_486 | m_PENT, /* X86_TUNE_UNROLL_STRLEN */ - m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE_ALL | m_K6 | m_AMD_MULTIPLE | m_GENERIC, + m_486 | m_PENT | m_PPRO | m_ATOM | m_SLM | m_CORE_ALL | m_K6 | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based on simulation result. But after P4 was made, no performance benefit @@ -1790,11 +1864,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { ~m_386, /* X86_TUNE_USE_SAHF */ - m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC, + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC, /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid partial dependencies. */ - m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC, + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial register stalls on Generic32 compilation setting as well. However @@ -1817,13 +1891,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { m_386 | m_486 | m_K6_GEODE, /* X86_TUNE_USE_SIMODE_FIOP */ - ~(m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC), + ~(m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC), /* X86_TUNE_USE_MOV0 */ m_K6, /* X86_TUNE_USE_CLTD */ - ~(m_PENT | m_ATOM | m_K6), + ~(m_PENT | m_ATOM | m_SLM | m_K6), /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */ m_PENT4, @@ -1838,7 +1912,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { ~(m_PENT | m_PPRO), /* X86_TUNE_PROMOTE_QIMODE */ - m_386 | m_486 | m_PENT | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC, + m_386 | m_486 | m_PENT | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_FAST_PREFIX */ ~(m_386 | m_486 | m_PENT), @@ -1879,10 +1953,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred for DFmode copies */ - ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC), + ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC), /* X86_TUNE_PARTIAL_REG_DEPENDENCY */ - m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC, + m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a conflict here in between PPro/Pentium4 based chips that thread 128bit @@ -1893,13 +1967,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { shows that disabling this option on P4 brings over 20% SPECfp regression, while enabling it on K8 brings roughly 2.4% regression that can be partly masked by careful scheduling of moves. */ - m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC, + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMDFAM10 | m_BDVER | m_GENERIC, /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */ - m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER, + m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER | m_SLM, /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */ - m_COREI7 | m_BDVER, + m_COREI7 | m_BDVER | m_SLM, /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */ m_BDVER , @@ -1917,7 +1991,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { m_PPRO | m_P4_NOCONA, /* X86_TUNE_MEMORY_MISMATCH_STALL */ - m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC, + m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_PROLOGUE_USING_MOVE */ m_PPRO | m_ATHLON_K8, @@ -1942,16 +2016,16 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more than 4 branch instructions in the 16 byte window. */ - m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC, + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_SCHEDULE */ - m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC, + m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_USE_BT */ - m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC, + m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_USE_INCDEC */ - ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_GENERIC), + ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GENERIC), /* X86_TUNE_PAD_RETURNS */ m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC, @@ -1960,7 +2034,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { m_ATOM, /* X86_TUNE_EXT_80387_CONSTANTS */ - m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC, + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC, /* X86_TUNE_AVOID_VECTOR_DECODE */ m_CORE_ALL | m_K8 | m_GENERIC64, @@ -2005,13 +2079,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag will impact LEA instruction selection. */ - m_ATOM, + m_ATOM | m_SLM, /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector instructions. */ ~m_ATOM, - /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching + /* X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL: Enable software prefetching at -O3. For the moment, the prefetching seems badly tuned for Intel chips. */ m_K6_GEODE | m_AMD_MULTIPLE, @@ -2026,7 +2100,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations during reassociation of fp computation. */ - m_ATOM | m_HASWELL, + m_ATOM | m_SLM | m_HASWELL | m_BDVER1 | m_BDVER2, /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE regs instead of memory. */ @@ -2034,7 +2108,12 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for a conditional move. */ - m_ATOM + m_ATOM, + + /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for + fp converts to destination register. */ + m_SLM + }; /* Feature tests against the various architecture variations. */ @@ -2060,10 +2139,10 @@ static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = { }; static const unsigned int x86_accumulate_outgoing_args - = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC; + = m_PPRO | m_P4_NOCONA | m_ATOM | m_SLM | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC; static const unsigned int x86_arch_always_fancy_math_387 - = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC; + = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC; static const unsigned int x86_avx256_split_unaligned_load = m_COREI7 | m_GENERIC; @@ -2458,7 +2537,8 @@ static const struct ptt processor_target_table[PROCESSOR_max] = {&bdver3_cost, 16, 10, 16, 7, 11}, {&btver1_cost, 16, 10, 16, 7, 11}, {&btver2_cost, 16, 10, 16, 7, 11}, - {&atom_cost, 16, 15, 16, 7, 16} + {&atom_cost, 16, 15, 16, 7, 16}, + {&slm_cost, 16, 15, 16, 7, 16} }; static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = @@ -2479,6 +2559,7 @@ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = "corei7", "core-avx2", "atom", + "slm", "geode", "k6", "k6-2", @@ -2940,6 +3021,10 @@ ix86_option_override_internal (bool main_args_p) {"atom", PROCESSOR_ATOM, CPU_ATOM, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE | PTA_FXSR}, + {"slm", PROCESSOR_SLM, CPU_SLM, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_MOVBE + | PTA_FXSR}, {"geode", PROCESSOR_GEODE, CPU_GEODE, PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW}, {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX}, @@ -4564,6 +4649,13 @@ ix86_can_inline_p (tree caller, tree callee) /* Remember the last target of ix86_set_current_function. */ static GTY(()) tree ix86_previous_fndecl; +/* Invalidate ix86_previous_fndecl cache. */ +void +ix86_reset_previous_fndecl (void) +{ + ix86_previous_fndecl = NULL_TREE; +} + /* Establish appropriate back-end context for processing the function FNDECL. The argument might be NULL to indicate processing at top level, outside of any function scope. */ @@ -6413,7 +6505,7 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode, /* Likewise, error if the ABI requires us to return values in the x87 registers and the user specified -mno-80387. */ - if (!TARGET_80387 && in_return) + if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return) for (i = 0; i < n; i++) if (regclass[i] == X86_64_X87_CLASS || regclass[i] == X86_64_X87UP_CLASS @@ -17312,10 +17404,24 @@ distance_agu_use (unsigned int regno0, rtx insn) static bool ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1, - unsigned int regno2, int split_cost) + unsigned int regno2, int split_cost, bool has_scale) { int dist_define, dist_use; + /* For Silvermont if using a 2-source or 3-source LEA for + non-destructive destination purposes, or due to wanting + ability to use SCALE, the use of LEA is justified. */ + if (ix86_tune == PROCESSOR_SLM) + { + if (has_scale) + return true; + if (split_cost < 1) + return false; + if (regno0 == regno1 || regno0 == regno2) + return false; + return true; + } + dist_define = distance_non_agu_define (regno1, regno2, insn); dist_use = distance_agu_use (regno0, insn); @@ -17404,7 +17510,7 @@ ix86_avoid_lea_for_add (rtx insn, rtx operands[]) if (regno0 == regno1 || regno0 == regno2) return false; else - return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1); + return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false); } /* Return true if we should emit lea instruction instead of mov @@ -17426,7 +17532,7 @@ ix86_use_lea_for_mov (rtx insn, rtx operands[]) regno0 = true_regnum (operands[0]); regno1 = true_regnum (operands[1]); - return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0); + return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false); } /* Return true if we need to split lea into a sequence of @@ -17505,7 +17611,8 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[]) split_cost -= 1; } - return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost); + return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost, + parts.scale > 1); } /* Emit x86 binary operand CODE in mode MODE, where the first operand @@ -17690,7 +17797,7 @@ ix86_lea_for_add_ok (rtx insn, rtx operands[]) if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) return false; - return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0); + return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false); } /* Return true if destination reg of SET_BODY is shift count of @@ -24199,6 +24306,7 @@ ix86_issue_rate (void) { case PROCESSOR_PENTIUM: case PROCESSOR_ATOM: + case PROCESSOR_SLM: case PROCESSOR_K6: case PROCESSOR_BTVER2: return 2; @@ -24287,6 +24395,73 @@ ix86_agi_dependent (rtx set_insn, rtx use_insn) return false; } +/* Helper function for exact_store_load_dependency. + Return true if addr is found in insn. */ +static bool +exact_dependency_1 (rtx addr, rtx insn) +{ + enum rtx_code code; + const char *format_ptr; + int i, j; + + code = GET_CODE (insn); + switch (code) + { + case MEM: + if (rtx_equal_p (addr, insn)) + return true; + break; + case REG: + CASE_CONST_ANY: + case SYMBOL_REF: + case CODE_LABEL: + case PC: + case CC0: + case EXPR_LIST: + return false; + default: + break; + } + + format_ptr = GET_RTX_FORMAT (code); + for (i = 0; i < GET_RTX_LENGTH (code); i++) + { + switch (*format_ptr++) + { + case 'e': + if (exact_dependency_1 (addr, XEXP (insn, i))) + return true; + break; + case 'E': + for (j = 0; j < XVECLEN (insn, i); j++) + if (exact_dependency_1 (addr, XVECEXP (insn, i, j))) + return true; + break; + } + } + return false; +} + +/* Return true if there exists exact dependency for store & load, i.e. + the same memory address is used in them. */ +static bool +exact_store_load_dependency (rtx store, rtx load) +{ + rtx set1, set2; + + set1 = single_set (store); + if (!set1) + return false; + if (!MEM_P (SET_DEST (set1))) + return false; + set2 = single_set (load); + if (!set2) + return false; + if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2))) + return true; + return false; +} + static int ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) { @@ -24438,6 +24613,39 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) else cost = 0; } + break; + + case PROCESSOR_SLM: + if (!reload_completed) + return cost; + + /* Increase cost of integer loads. */ + memory = get_attr_memory (dep_insn); + if (memory == MEMORY_LOAD || memory == MEMORY_BOTH) + { + enum attr_unit unit = get_attr_unit (dep_insn); + if (unit == UNIT_INTEGER && cost == 1) + { + if (memory == MEMORY_LOAD) + cost = 3; + else + { + /* Increase cost of ld/st for short int types only + because of store forwarding issue. */ + rtx set = single_set (dep_insn); + if (set && (GET_MODE (SET_DEST (set)) == QImode + || GET_MODE (SET_DEST (set)) == HImode)) + { + /* Increase cost of store/load insn if exact + dependence exists and it is load insn. */ + enum attr_memory insn_memory = get_attr_memory (insn); + if (insn_memory == MEMORY_LOAD + && exact_store_load_dependency (dep_insn, insn)) + cost = 3; + } + } + } + } default: break; @@ -24466,6 +24674,7 @@ ia32_multipass_dfa_lookahead (void) case PROCESSOR_COREI7: case PROCESSOR_HASWELL: case PROCESSOR_ATOM: + case PROCESSOR_SLM: /* Generally, we want haifa-sched:max_issue() to look ahead as far as many instructions can be executed on a cycle, i.e., issue_rate. I wonder why tuning for many CPUs does not do this. */ @@ -24483,110 +24692,204 @@ ia32_multipass_dfa_lookahead (void) execution. It is applied if (1) IMUL instruction is on the top of list; (2) There exists the only producer of independent IMUL instruction in - ready list; - (3) Put found producer on the top of ready list. - Returns issue rate. */ - + ready list. + Return index of IMUL producer if it was found and -1 otherwise. */ static int -ix86_sched_reorder(FILE *dump, int sched_verbose, rtx *ready, int *pn_ready, - int clock_var ATTRIBUTE_UNUSED) +do_reorder_for_imul (rtx *ready, int n_ready) { - static int issue_rate = -1; - int n_ready = *pn_ready; - rtx insn, insn1, insn2; - int i; + rtx insn, set, insn1, insn2; sd_iterator_def sd_it; dep_t dep; int index = -1; + int i; - /* Set up issue rate. */ - issue_rate = ix86_issue_rate(); - - /* Do reodering for Atom only. */ if (ix86_tune != PROCESSOR_ATOM) - return issue_rate; - /* Do not perform ready list reodering for pre-reload schedule pass. */ - if (!reload_completed) - return issue_rate; - /* Nothing to do if ready list contains only 1 instruction. */ - if (n_ready <= 1) - return issue_rate; + return index; /* Check that IMUL instruction is on the top of ready list. */ insn = ready[n_ready - 1]; - if (!NONDEBUG_INSN_P (insn)) - return issue_rate; - insn = PATTERN (insn); - if (GET_CODE (insn) == PARALLEL) - insn = XVECEXP (insn, 0, 0); - if (GET_CODE (insn) != SET) - return issue_rate; - if (!(GET_CODE (SET_SRC (insn)) == MULT - && GET_MODE (SET_SRC (insn)) == SImode)) - return issue_rate; + set = single_set (insn); + if (!set) + return index; + if (!(GET_CODE (SET_SRC (set)) == MULT + && GET_MODE (SET_SRC (set)) == SImode)) + return index; /* Search for producer of independent IMUL instruction. */ - for (i = n_ready - 2; i>= 0; i--) + for (i = n_ready - 2; i >= 0; i--) { insn = ready[i]; if (!NONDEBUG_INSN_P (insn)) - continue; + continue; /* Skip IMUL instruction. */ insn2 = PATTERN (insn); if (GET_CODE (insn2) == PARALLEL) - insn2 = XVECEXP (insn2, 0, 0); + insn2 = XVECEXP (insn2, 0, 0); if (GET_CODE (insn2) == SET - && GET_CODE (SET_SRC (insn2)) == MULT - && GET_MODE (SET_SRC (insn2)) == SImode) - continue; + && GET_CODE (SET_SRC (insn2)) == MULT + && GET_MODE (SET_SRC (insn2)) == SImode) + continue; FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep) - { - rtx con; + { + rtx con; con = DEP_CON (dep); if (!NONDEBUG_INSN_P (con)) continue; - insn1 = PATTERN (con); - if (GET_CODE (insn1) == PARALLEL) - insn1 = XVECEXP (insn1, 0, 0); - - if (GET_CODE (insn1) == SET - && GET_CODE (SET_SRC (insn1)) == MULT - && GET_MODE (SET_SRC (insn1)) == SImode) - { - sd_iterator_def sd_it1; - dep_t dep1; - /* Check if there is no other dependee for IMUL. */ - index = i; - FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1) - { - rtx pro; - pro = DEP_PRO (dep1); + insn1 = PATTERN (con); + if (GET_CODE (insn1) == PARALLEL) + insn1 = XVECEXP (insn1, 0, 0); + + if (GET_CODE (insn1) == SET + && GET_CODE (SET_SRC (insn1)) == MULT + && GET_MODE (SET_SRC (insn1)) == SImode) + { + sd_iterator_def sd_it1; + dep_t dep1; + /* Check if there is no other dependee for IMUL. */ + index = i; + FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1) + { + rtx pro; + pro = DEP_PRO (dep1); if (!NONDEBUG_INSN_P (pro)) continue; - if (pro != insn) - index = -1; - } - if (index >= 0) - break; - } - } + if (pro != insn) + index = -1; + } + if (index >= 0) + break; + } + } if (index >= 0) - break; + break; } - if (index < 0) - return issue_rate; /* Didn't find IMUL producer. */ + return index; +} + +/* Try to find the best candidate on the top of ready list if two insns + have the same priority - candidate is best if its dependees were + scheduled earlier. Applied for Silvermont only. + Return true if top 2 insns must be interchanged. */ +static bool +swap_top_of_ready_list (rtx *ready, int n_ready) +{ + rtx top = ready[n_ready - 1]; + rtx next = ready[n_ready - 2]; + rtx set; + sd_iterator_def sd_it; + dep_t dep; + int clock1 = -1; + int clock2 = -1; + #define INSN_TICK(INSN) (HID (INSN)->tick) - if (sched_verbose > 1) - fprintf(dump, ";;\tatom sched_reorder: swap %d and %d insns\n", - INSN_UID (ready[index]), INSN_UID (ready[n_ready - 1])); + if (ix86_tune != PROCESSOR_SLM) + return false; - /* Put IMUL producer (ready[index]) at the top of ready list. */ - insn1= ready[index]; - for (i = index; i < n_ready - 1; i++) - ready[i] = ready[i + 1]; - ready[n_ready - 1] = insn1; + if (!NONDEBUG_INSN_P (top)) + return false; + if (!NONJUMP_INSN_P (top)) + return false; + if (!NONDEBUG_INSN_P (next)) + return false; + if (!NONJUMP_INSN_P (next)) + return false; + set = single_set (top); + if (!set) + return false; + set = single_set (next); + if (!set) + return false; + if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next)) + { + if (INSN_PRIORITY (top) != INSN_PRIORITY (next)) + return false; + /* Determine winner more precise. */ + FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep) + { + rtx pro; + pro = DEP_PRO (dep); + if (!NONDEBUG_INSN_P (pro)) + continue; + if (INSN_TICK (pro) > clock1) + clock1 = INSN_TICK (pro); + } + FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep) + { + rtx pro; + pro = DEP_PRO (dep); + if (!NONDEBUG_INSN_P (pro)) + continue; + if (INSN_TICK (pro) > clock2) + clock2 = INSN_TICK (pro); + } + + if (clock1 == clock2) + { + /* Determine winner - load must win. */ + enum attr_memory memory1, memory2; + memory1 = get_attr_memory (top); + memory2 = get_attr_memory (next); + if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD) + return true; + } + return (bool) (clock2 < clock1); + } + return false; + #undef INSN_TICK +} + +/* Perform possible reodering of ready list for Atom/Silvermont only. + Return issue rate. */ +static int +ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready, + int clock_var) +{ + int issue_rate = -1; + int n_ready = *pn_ready; + int i; + rtx insn; + int index = -1; + + /* Set up issue rate. */ + issue_rate = ix86_issue_rate (); + + /* Do reodering for Atom/SLM only. */ + if (ix86_tune != PROCESSOR_ATOM && ix86_tune != PROCESSOR_SLM) + return issue_rate; + + /* Nothing to do if ready list contains only 1 instruction. */ + if (n_ready <= 1) + return issue_rate; + + /* Do reodering for post-reload scheduler only. */ + if (!reload_completed) + return issue_rate; + + if ((index = do_reorder_for_imul (ready, n_ready)) >= 0) + { + if (sched_verbose > 1) + fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n", + INSN_UID (ready[index])); + + /* Put IMUL producer (ready[index]) at the top of ready list. */ + insn = ready[index]; + for (i = index; i < n_ready - 1; i++) + ready[i] = ready[i + 1]; + ready[n_ready - 1] = insn; + return issue_rate; + } + if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready)) + { + if (sched_verbose > 1) + fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n", + INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2])); + /* Swap 2 top elements of ready list. */ + insn = ready[n_ready - 1]; + ready[n_ready - 1] = ready[n_ready - 2]; + ready[n_ready - 2] = insn; + } return issue_rate; } @@ -25079,11 +25382,12 @@ ix86_constant_alignment (tree exp, int align) instead of that alignment to align the object. */ int -ix86_data_alignment (tree type, int align) +ix86_data_alignment (tree type, int align, bool opt) { int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT); - if (AGGREGATE_TYPE_P (type) + if (opt + && AGGREGATE_TYPE_P (type) && TYPE_SIZE (type) && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align @@ -25095,14 +25399,17 @@ ix86_data_alignment (tree type, int align) to 16byte boundary. */ if (TARGET_64BIT) { - if (AGGREGATE_TYPE_P (type) - && TYPE_SIZE (type) - && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST - && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 - || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) + if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE) + && TYPE_SIZE (type) + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 + || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) return 128; } + if (!opt) + return align; + if (TREE_CODE (type) == ARRAY_TYPE) { if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) @@ -25614,8 +25921,6 @@ enum ix86_builtins IX86_BUILTIN_CMPNEQSS, IX86_BUILTIN_CMPNLTSS, IX86_BUILTIN_CMPNLESS, - IX86_BUILTIN_CMPNGTSS, - IX86_BUILTIN_CMPNGESS, IX86_BUILTIN_CMPORDSS, IX86_BUILTIN_CMPUNORDSS, @@ -27252,8 +27557,6 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, @@ -29468,7 +29771,7 @@ ix86_get_function_versions_dispatcher (void *decl) dispatcher_version_info = insert_new_cgraph_node_version (dispatcher_node); dispatcher_version_info->next = default_version_info; - dispatcher_node->local.finalized = 1; + dispatcher_node->symbol.definition = 1; /* Set the dispatcher for all the versions. */ it_v = default_version_info; @@ -29623,7 +29926,7 @@ ix86_generate_version_dispatcher_body (void *node_p) default_ver_decl = node_version_info->next->this_node->symbol.decl; /* node is going to be an alias, so remove the finalized bit. */ - node->local.finalized = false; + node->symbol.definition = false; resolver_decl = make_resolver_func (default_ver_decl, node->symbol.decl, &empty_bb); @@ -29756,6 +30059,7 @@ fold_builtin_cpu (tree fndecl, tree *args) M_INTEL_COREI7, M_AMDFAM10H, M_AMDFAM15H, + M_INTEL_SLM, M_CPU_SUBTYPE_START, M_INTEL_COREI7_NEHALEM, M_INTEL_COREI7_WESTMERE, @@ -29778,6 +30082,7 @@ fold_builtin_cpu (tree fndecl, tree *args) {"amd", M_AMD}, {"intel", M_INTEL}, {"atom", M_INTEL_ATOM}, + {"slm", M_INTEL_SLM}, {"core2", M_INTEL_CORE2}, {"corei7", M_INTEL_COREI7}, {"nehalem", M_INTEL_COREI7_NEHALEM}, @@ -29817,6 +30122,9 @@ fold_builtin_cpu (tree fndecl, tree *args) tree __cpu_model_var = make_var_decl (__processor_model_type, "__cpu_model"); + + varpool_add_new_variable (__cpu_model_var); + gcc_assert ((args != NULL) && (*args != NULL)); param_string_cst = *args; @@ -33650,6 +33958,8 @@ static inline bool inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2, enum machine_mode mode, int strict) { + if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) + return false; if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) @@ -35564,6 +35874,46 @@ ix86_pad_short_function (void) } } +/* Fix up a Windows system unwinder issue. If an EH region falls thru into + the epilogue, the Windows system unwinder will apply epilogue logic and + produce incorrect offsets. This can be avoided by adding a nop between + the last insn that can throw and the first insn of the epilogue. */ + +static void +ix86_seh_fixup_eh_fallthru (void) +{ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) + { + rtx insn, next; + + /* Find the beginning of the epilogue. */ + for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn)) + if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG) + break; + if (insn == NULL) + continue; + + /* We only care about preceeding insns that can throw. */ + insn = prev_active_insn (insn); + if (insn == NULL || !can_throw_internal (insn)) + continue; + + /* Do not separate calls from their debug information. */ + for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next)) + if (NOTE_P (next) + && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION + || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)) + insn = next; + else + break; + + emit_insn_after (gen_nops (const1_rtx), insn); + } +} + /* Implement machine specific optimizations. We implement padding of returns for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ static void @@ -35573,6 +35923,9 @@ ix86_reorg (void) with old MDEP_REORGS that are not CFG based. Recompute it now. */ compute_bb_for_insn (); + if (TARGET_SEH && current_function_has_exception_handlers ()) + ix86_seh_fixup_eh_fallthru (); + if (optimize && optimize_function_for_speed_p (cfun)) { if (TARGET_PAD_SHORT_FUNCTION) @@ -42682,6 +43035,9 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val) #undef TARGET_REGISTER_PRIORITY #define TARGET_REGISTER_PRIORITY ix86_register_priority +#undef TARGET_REGISTER_USAGE_LEVELING_P +#define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true + #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 6055b99a55b..7d940f98804 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -257,6 +257,7 @@ extern const struct processor_costs ix86_size_cost; #define TARGET_BTVER1 (ix86_tune == PROCESSOR_BTVER1) #define TARGET_BTVER2 (ix86_tune == PROCESSOR_BTVER2) #define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM) +#define TARGET_SLM (ix86_tune == PROCESSOR_SLM) /* Feature tests against the various tunings. */ enum ix86_tune_indices { @@ -332,6 +333,7 @@ enum ix86_tune_indices { X86_TUNE_REASSOC_FP_TO_PARALLEL, X86_TUNE_GENERAL_REGS_SSE_SPILL, X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, + X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, X86_TUNE_LAST }; @@ -442,6 +444,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL] #define TARGET_AVOID_MEM_OPND_FOR_CMOVE \ ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE] +#define TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS \ + ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { @@ -623,6 +627,7 @@ enum target_cpu_default TARGET_CPU_DEFAULT_corei7, TARGET_CPU_DEFAULT_haswell, TARGET_CPU_DEFAULT_atom, + TARGET_CPU_DEFAULT_slm, TARGET_CPU_DEFAULT_geode, TARGET_CPU_DEFAULT_k6, @@ -854,7 +859,18 @@ enum target_cpu_default cause character arrays to be word-aligned so that `strcpy' calls that copy constants to character arrays can be done inline. */ -#define DATA_ALIGNMENT(TYPE, ALIGN) ix86_data_alignment ((TYPE), (ALIGN)) +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + ix86_data_alignment ((TYPE), (ALIGN), true) + +/* Similar to DATA_ALIGNMENT, but for the cases where the ABI mandates + some alignment increase, instead of optimization only purposes. E.g. + AMD x86-64 psABI says that variables with array type larger than 15 bytes + must be aligned to 16 byte boundaries. + + If this macro is not defined, then ALIGN is used. */ + +#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \ + ix86_data_alignment ((TYPE), (ALIGN), false) /* If defined, a C expression to compute the alignment for a local variable. TYPE is the data type, and ALIGN is the alignment that @@ -2131,6 +2147,7 @@ enum processor_type PROCESSOR_BTVER1, PROCESSOR_BTVER2, PROCESSOR_ATOM, + PROCESSOR_SLM, PROCESSOR_max }; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ce77f15f009..a6e2946584b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -323,7 +323,7 @@ ;; Processor type. (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,corei7, - atom,generic64,amdfam10,bdver1,bdver2,bdver3,btver1,btver2" + atom,slm,generic64,amdfam10,bdver1,bdver2,bdver3,btver1,btver2" (const (symbol_ref "ix86_schedule"))) ;; A basic instruction type. Refinements due to arguments to be @@ -964,6 +964,7 @@ (include "btver2.md") (include "geode.md") (include "atom.md") +(include "slm.md") (include "core2.md") @@ -3624,6 +3625,18 @@ CONST0_RTX (V4SFmode), operands[1])); }) +;; It's more profitable to split and then extend in the same register. +(define_peephole2 + [(set (match_operand:DF 0 "register_operand") + (float_extend:DF + (match_operand:SF 1 "memory_operand")))] + "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS + && optimize_insn_for_speed_p () + && SSE_REG_P (operands[0])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float_extend:DF (match_dup 2)))] + "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));") + (define_insn "*extendsfdf2_mixed" [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x") (float_extend:DF @@ -3765,6 +3778,18 @@ CONST0_RTX (V2DFmode), operands[1])); }) +;; It's more profitable to split and then extend in the same register. +(define_peephole2 + [(set (match_operand:SF 0 "register_operand") + (float_truncate:SF + (match_operand:DF 1 "memory_operand")))] + "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS + && optimize_insn_for_speed_p () + && SSE_REG_P (operands[0])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float_truncate:SF (match_dup 2)))] + "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));") + (define_expand "truncdfsf2_with_temp" [(parallel [(set (match_operand:SF 0) (float_truncate:SF (match_operand:DF 1))) @@ -11654,8 +11679,8 @@ (define_insn "bmi_bextr_<mode>" [(set (match_operand:SWI48 0 "register_operand" "=r,r") - (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r,r") - (match_operand:SWI48 2 "nonimmediate_operand" "r,m")] + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m") + (match_operand:SWI48 2 "register_operand" "r,r")] UNSPEC_BEXTR)) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI" @@ -11708,9 +11733,9 @@ ;; BMI2 instructions. (define_insn "bmi2_bzhi_<mode>3" [(set (match_operand:SWI48 0 "register_operand" "=r") - (and:SWI48 (match_operand:SWI48 1 "register_operand" "r") - (lshiftrt:SWI48 (const_int -1) - (match_operand:SWI48 2 "nonimmediate_operand" "rm")))) + (and:SWI48 (lshiftrt:SWI48 (const_int -1) + (match_operand:SWI48 2 "register_operand" "r")) + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI2" "bzhi\t{%2, %1, %0|%0, %1, %2}" @@ -16566,6 +16591,7 @@ "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (4, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) && (<MODE>mode != QImode || immediate_operand (operands[2], QImode) || q_regs_operand (operands[2], QImode)) @@ -16630,6 +16656,7 @@ || immediate_operand (operands[2], SImode) || q_regs_operand (operands[2], SImode)) && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) && ix86_match_ccmode (peep2_next_insn (3), (GET_CODE (operands[3]) == PLUS || GET_CODE (operands[3]) == MINUS) diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h index 131af0be2b7..b26dc46d256 100644 --- a/gcc/config/i386/ia32intrin.h +++ b/gcc/config/i386/ia32intrin.h @@ -49,7 +49,12 @@ __bswapd (int __X) return __builtin_bswap32 (__X); } -#ifdef __SSE4_2__ +#ifndef __SSE4_2__ +#pragma GCC push_options +#pragma GCC target("sse4.2") +#define __DISABLE_SSE4_2__ +#endif /* __SSE4_2__ */ + /* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -71,7 +76,11 @@ __crc32d (unsigned int __C, unsigned int __V) { return __builtin_ia32_crc32si (__C, __V); } -#endif /* SSE4.2 */ + +#ifdef __DISABLE_SSE4_2__ +#undef __DISABLE_SSE4_2__ +#pragma GCC pop_options +#endif /* __DISABLE_SSE4_2__ */ /* 32bit popcnt */ extern __inline int @@ -186,7 +195,12 @@ __bswapq (long long __X) return __builtin_bswap64 (__X); } -#ifdef __SSE4_2__ +#ifndef __SSE4_2__ +#pragma GCC push_options +#pragma GCC target("sse4.2") +#define __DISABLE_SSE4_2__ +#endif /* __SSE4_2__ */ + /* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -194,7 +208,11 @@ __crc32q (unsigned long long __C, unsigned long long __V) { return __builtin_ia32_crc32di (__C, __V); } -#endif + +#ifdef __DISABLE_SSE4_2__ +#undef __DISABLE_SSE4_2__ +#pragma GCC pop_options +#endif /* __DISABLE_SSE4_2__ */ /* 64bit popcnt */ extern __inline long long diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index b137753a4f5..e825c34a256 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -24,71 +24,43 @@ #ifndef _IMMINTRIN_H_INCLUDED #define _IMMINTRIN_H_INCLUDED -#ifdef __MMX__ #include <mmintrin.h> -#endif -#ifdef __SSE__ #include <xmmintrin.h> -#endif -#ifdef __SSE2__ #include <emmintrin.h> -#endif -#ifdef __SSE3__ #include <pmmintrin.h> -#endif -#ifdef __SSSE3__ #include <tmmintrin.h> -#endif -#if defined (__SSE4_2__) || defined (__SSE4_1__) #include <smmintrin.h> -#endif -#if defined (__AES__) || defined (__PCLMUL__) #include <wmmintrin.h> -#endif -#ifdef __AVX__ #include <avxintrin.h> -#endif -#ifdef __AVX2__ #include <avx2intrin.h> -#endif -#ifdef __LZCNT__ #include <lzcntintrin.h> -#endif -#ifdef __BMI__ #include <bmiintrin.h> -#endif -#ifdef __BMI2__ #include <bmi2intrin.h> -#endif -#ifdef __FMA__ #include <fmaintrin.h> -#endif -#ifdef __F16C__ #include <f16cintrin.h> -#endif -#ifdef __RTM__ #include <rtmintrin.h> -#endif -#ifdef __RTM__ #include <xtestintrin.h> -#endif -#ifdef __RDRND__ +#ifndef __RDRND__ +#pragma GCC push_options +#pragma GCC target("rdrnd") +#define __DISABLE_RDRND__ +#endif /* __RDRND__ */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _rdrand16_step (unsigned short *__P) @@ -102,10 +74,18 @@ _rdrand32_step (unsigned int *__P) { return __builtin_ia32_rdrand32_step (__P); } -#endif /* __RDRND__ */ +#ifdef __DISABLE_RDRND__ +#undef __DISABLE_RDRND__ +#pragma GCC pop_options +#endif /* __DISABLE_RDRND__ */ #ifdef __x86_64__ -#ifdef __FSGSBASE__ + +#ifndef __FSGSBASE__ +#pragma GCC push_options +#pragma GCC target("fsgsbase") +#define __DISABLE_FSGSBASE__ +#endif /* __FSGSBASE__ */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _readfsbase_u32 (void) @@ -161,16 +141,27 @@ _writegsbase_u64 (unsigned long long __B) { __builtin_ia32_wrgsbase64 (__B); } -#endif /* __FSGSBASE__ */ - -#ifdef __RDRND__ +#ifdef __DISABLE_FSGSBASE__ +#undef __DISABLE_FSGSBASE__ +#pragma GCC pop_options +#endif /* __DISABLE_FSGSBASE__ */ + +#ifndef __RDRND__ +#pragma GCC push_options +#pragma GCC target("rdrnd") +#define __DISABLE_RDRND__ +#endif /* __RDRND__ */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _rdrand64_step (unsigned long long *__P) { return __builtin_ia32_rdrand64_step (__P); } -#endif /* __RDRND__ */ +#ifdef __DISABLE_RDRND__ +#undef __DISABLE_RDRND__ +#pragma GCC pop_options +#endif /* __DISABLE_RDRND__ */ + #endif /* __x86_64__ */ #endif /* _IMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h index 8c708508a80..64ba7321fd9 100644 --- a/gcc/config/i386/lwpintrin.h +++ b/gcc/config/i386/lwpintrin.h @@ -29,8 +29,10 @@ #define _LWPINTRIN_H_INCLUDED #ifndef __LWP__ -# error "LWP instruction set not enabled" -#else +#pragma GCC push_options +#pragma GCC target("lwp") +#define __DISABLE_LWP__ +#endif /* __LWP__ */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __llwpcb (void *pcbAddress) @@ -95,6 +97,9 @@ __lwpins64 (unsigned long long data2, unsigned int data1, unsigned int flags) #endif #endif -#endif /* __LWP__ */ +#ifdef __DISABLE_LWP__ +#undef __DISABLE_LWP__ +#pragma GCC pop_options +#endif /* __DISABLE_LWP__ */ #endif /* _LWPINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/lzcntintrin.h b/gcc/config/i386/lzcntintrin.h index 9382bb96ecc..22b9ee7999e 100644 --- a/gcc/config/i386/lzcntintrin.h +++ b/gcc/config/i386/lzcntintrin.h @@ -25,13 +25,16 @@ # error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead." #endif -#ifndef __LZCNT__ -# error "LZCNT instruction is not enabled" -#endif /* __LZCNT__ */ #ifndef _LZCNTINTRIN_H_INCLUDED #define _LZCNTINTRIN_H_INCLUDED +#ifndef __LZCNT__ +#pragma GCC push_options +#pragma GCC target("lzcnt") +#define __DISABLE_LZCNT__ +#endif /* __LZCNT__ */ + extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __lzcnt16 (unsigned short __X) { @@ -64,4 +67,9 @@ _lzcnt_u64 (unsigned long long __X) } #endif +#ifdef __DISABLE_LZCNT__ +#undef __DISABLE_LZCNT__ +#pragma GCC pop_options +#endif /* __DISABLE_LZCNT__ */ + #endif /* _LZCNTINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/mm3dnow.h b/gcc/config/i386/mm3dnow.h index 7e806b701a1..093d5e77932 100644 --- a/gcc/config/i386/mm3dnow.h +++ b/gcc/config/i386/mm3dnow.h @@ -27,11 +27,15 @@ #ifndef _MM3DNOW_H_INCLUDED #define _MM3DNOW_H_INCLUDED -#ifdef __3dNOW__ - #include <mmintrin.h> #include <prfchwintrin.h> +#ifndef __3dNOW__ +#pragma GCC push_options +#pragma GCC target("3dnow") +#define __DISABLE_3dNOW__ +#endif /* __3dNOW__ */ + extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_femms (void) { @@ -205,6 +209,10 @@ _m_pswapd (__m64 __A) } #endif /* __3dNOW_A__ */ -#endif /* __3dNOW__ */ + +#ifdef __DISABLE_3dNOW__ +#undef __DISABLE_3dNOW__ +#pragma GCC pop_options +#endif /* __DISABLE_3dNOW__ */ #endif /* _MM3DNOW_H_INCLUDED */ diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h index c76203b5477..c0729709373 100644 --- a/gcc/config/i386/mmintrin.h +++ b/gcc/config/i386/mmintrin.h @@ -28,8 +28,11 @@ #define _MMINTRIN_H_INCLUDED #ifndef __MMX__ -# error "MMX instruction set not enabled" -#else +#pragma GCC push_options +#pragma GCC target("mmx") +#define __DISABLE_MMX__ +#endif /* __MMX__ */ + /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__)); @@ -303,13 +306,21 @@ _m_paddd (__m64 __m1, __m64 __m2) } /* Add the 64-bit values in M1 to the 64-bit values in M2. */ -#ifdef __SSE2__ +#ifndef __SSE2__ +#pragma GCC push_options +#pragma GCC target("sse2") +#define __DISABLE_SSE2__ +#endif /* __SSE2__ */ + extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_si64 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2); } -#endif +#ifdef __DISABLE_SSE2__ +#undef __DISABLE_SSE2__ +#pragma GCC pop_options +#endif /* __DISABLE_SSE2__ */ /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed saturated arithmetic. */ @@ -407,13 +418,21 @@ _m_psubd (__m64 __m1, __m64 __m2) } /* Add the 64-bit values in M1 to the 64-bit values in M2. */ -#ifdef __SSE2__ +#ifndef __SSE2__ +#pragma GCC push_options +#pragma GCC target("sse2") +#define __DISABLE_SSE2__ +#endif /* __SSE2__ */ + extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_si64 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2); } -#endif +#ifdef __DISABLE_SSE2__ +#undef __DISABLE_SSE2__ +#pragma GCC pop_options +#endif /* __DISABLE_SSE2__ */ /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed saturating arithmetic. */ @@ -915,6 +934,9 @@ _mm_set1_pi8 (char __b) { return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b); } +#ifdef __DISABLE_MMX__ +#undef __DISABLE_MMX__ +#pragma GCC pop_options +#endif /* __DISABLE_MMX__ */ -#endif /* __MMX__ */ #endif /* _MMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/nmmintrin.h b/gcc/config/i386/nmmintrin.h index a4fbed26268..aefe3ef9e90 100644 --- a/gcc/config/i386/nmmintrin.h +++ b/gcc/config/i386/nmmintrin.h @@ -27,11 +27,7 @@ #ifndef _NMMINTRIN_H_INCLUDED #define _NMMINTRIN_H_INCLUDED -#ifndef __SSE4_2__ -# error "SSE4.2 instruction set not enabled" -#else /* We just include SSE4.1 header file. */ #include <smmintrin.h> -#endif /* __SSE4_2__ */ #endif /* _NMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/pmmintrin.h b/gcc/config/i386/pmmintrin.h index 9c6956c1374..2447d5aa31b 100644 --- a/gcc/config/i386/pmmintrin.h +++ b/gcc/config/i386/pmmintrin.h @@ -27,13 +27,15 @@ #ifndef _PMMINTRIN_H_INCLUDED #define _PMMINTRIN_H_INCLUDED -#ifndef __SSE3__ -# error "SSE3 instruction set not enabled" -#else - /* We need definitions from the SSE2 and SSE header files*/ #include <emmintrin.h> +#ifndef __SSE3__ +#pragma GCC push_options +#pragma GCC target("sse3") +#define __DISABLE_SSE3__ +#endif /* __SSE3__ */ + /* Additional bits in the MXCSR. */ #define _MM_DENORMALS_ZERO_MASK 0x0040 #define _MM_DENORMALS_ZERO_ON 0x0040 @@ -122,6 +124,9 @@ _mm_mwait (unsigned int __E, unsigned int __H) __builtin_ia32_mwait (__E, __H); } -#endif /* __SSE3__ */ +#ifdef __DISABLE_SSE3__ +#undef __DISABLE_SSE3__ +#pragma GCC pop_options +#endif /* __DISABLE_SSE3__ */ #endif /* _PMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/popcntintrin.h b/gcc/config/i386/popcntintrin.h index af7efdf5d10..ee3a8e0d076 100644 --- a/gcc/config/i386/popcntintrin.h +++ b/gcc/config/i386/popcntintrin.h @@ -21,13 +21,15 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef __POPCNT__ -# error "POPCNT instruction set not enabled" -#endif /* __POPCNT__ */ - #ifndef _POPCNTINTRIN_H_INCLUDED #define _POPCNTINTRIN_H_INCLUDED +#ifndef __POPCNT__ +#pragma GCC push_options +#pragma GCC target("popcnt") +#define __DISABLE_POPCNT__ +#endif /* __POPCNT__ */ + /* Calculate a number of bits set to 1. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_popcnt_u32 (unsigned int __X) @@ -43,4 +45,9 @@ _mm_popcnt_u64 (unsigned long long __X) } #endif +#ifdef __DISABLE_POPCNT__ +#undef __DISABLE_POPCNT__ +#pragma GCC pop_options +#endif /* __DISABLE_POPCNT__ */ + #endif /* _POPCNTINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/prfchwintrin.h b/gcc/config/i386/prfchwintrin.h index b8011bb6bd1..73aa4cac7af 100644 --- a/gcc/config/i386/prfchwintrin.h +++ b/gcc/config/i386/prfchwintrin.h @@ -26,17 +26,24 @@ #endif -#if !defined (__PRFCHW__) && !defined (__3dNOW__) -# error "PRFCHW instruction not enabled" -#endif /* __PRFCHW__ or __3dNOW__*/ - #ifndef _PRFCHWINTRIN_H_INCLUDED #define _PRFCHWINTRIN_H_INCLUDED +#ifndef __PRFCHW__ +#pragma GCC push_options +#pragma GCC target("prfchw") +#define __DISABLE_PRFCHW__ +#endif /* __PRFCHW__ */ + extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_prefetchw (void *__P) { __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */); } +#ifdef __DISABLE_PRFCHW__ +#undef __DISABLE_PRFCHW__ +#pragma GCC pop_options +#endif /* __DISABLE_PRFCHW__ */ + #endif /* _PRFCHWINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/rdseedintrin.h b/gcc/config/i386/rdseedintrin.h index f30c237a6cb..3d040ab3af6 100644 --- a/gcc/config/i386/rdseedintrin.h +++ b/gcc/config/i386/rdseedintrin.h @@ -25,12 +25,15 @@ # error "Never use <rdseedintrin.h> directly; include <x86intrin.h> instead." #endif +#ifndef _RDSEEDINTRIN_H_INCLUDED +#define _RDSEEDINTRIN_H_INCLUDED + #ifndef __RDSEED__ -# error "RDSEED instruction not enabled" +#pragma GCC push_options +#pragma GCC target("rdseed") +#define __DISABLE_RDSEED__ #endif /* __RDSEED__ */ -#ifndef _RDSEEDINTRIN_H_INCLUDED -#define _RDSEEDINTRIN_H_INCLUDED extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -55,4 +58,9 @@ _rdseed64_step (unsigned long long *p) } #endif +#ifdef __DISABLE_RDSEED__ +#undef __DISABLE_RDSEED__ +#pragma GCC pop_options +#endif /* __DISABLE_RDSEED__ */ + #endif /* _RDSEEDINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/rtmintrin.h b/gcc/config/i386/rtmintrin.h index 003a7718db3..eb2812fd82e 100644 --- a/gcc/config/i386/rtmintrin.h +++ b/gcc/config/i386/rtmintrin.h @@ -25,13 +25,15 @@ # error "Never use <rtmintrin.h> directly; include <immintrin.h> instead." #endif -#ifndef __RTM__ -# error "RTM instruction set not enabled" -#endif /* __RTM__ */ - #ifndef _RTMINTRIN_H_INCLUDED #define _RTMINTRIN_H_INCLUDED +#ifndef __RTM__ +#pragma GCC push_options +#pragma GCC target("rtm") +#define __DISABLE_RTM__ +#endif /* __RTM__ */ + #define _XBEGIN_STARTED (~0u) #define _XABORT_EXPLICIT (1 << 0) #define _XABORT_RETRY (1 << 1) @@ -74,4 +76,9 @@ _xabort (const unsigned int imm) #define _xabort(N) __builtin_ia32_xabort (N) #endif /* __OPTIMIZE__ */ +#ifdef __DISABLE_RTM__ +#undef __DISABLE_RTM__ +#pragma GCC pop_options +#endif /* __DISABLE_RTM__ */ + #endif /* _RTMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/slm.md b/gcc/config/i386/slm.md new file mode 100644 index 00000000000..3ac919e372c --- /dev/null +++ b/gcc/config/i386/slm.md @@ -0,0 +1,758 @@ +;; Slivermont(SLM) Scheduling +;; Copyright (C) 2009, 2010 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. +;; +;; Silvermont has 2 out-of-order IEC, 2 in-order FEC and 1 in-order MEC. + + +(define_automaton "slm") + +;; EU: Execution Unit +;; Silvermont EUs are connected by port 0 or port 1. + +;; SLM has two ports: port 0 and port 1 connecting to all execution units +(define_cpu_unit "slm-port-0,slm-port-1" "slm") + +(define_cpu_unit "slm-ieu-0, slm-ieu-1, + slm-imul, slm-feu-0, slm-feu-1" + "slm") + +(define_reservation "slm-all-ieu" "(slm-ieu-0 + slm-ieu-1 + slm-imul)") +(define_reservation "slm-all-feu" "(slm-feu-0 + slm-feu-1)") +(define_reservation "slm-all-eu" "(slm-all-ieu + slm-all-feu)") +(define_reservation "slm-fp-0" "(slm-port-0 + slm-feu-0)") + +;; Some EUs have duplicated copied and can be accessed via either +;; port 0 or port 1 +;; (define_reservation "slm-port-either" "(slm-port-0 | slm-port-1)" +(define_reservation "slm-port-dual" "(slm-port-0 + slm-port-1)") + +;;; fmul insn can have 4 or 5 cycles latency +(define_reservation "slm-fmul-5c" + "(slm-port-0 + slm-feu-0), slm-feu-0, nothing*3") +(define_reservation "slm-fmul-4c" "(slm-port-0 + slm-feu-0), nothing*3") + +;;; fadd can has 3 cycles latency depends on instruction forms +(define_reservation "slm-fadd-3c" "(slm-port-1 + slm-feu-1), nothing*2") +(define_reservation "slm-fadd-4c" + "(slm-port-1 + slm-feu-1), slm-feu-1, nothing*2") + +;;; imul insn has 3 cycles latency for SI operands +(define_reservation "slm-imul-32" + "(slm-port-1 + slm-imul), nothing*2") +(define_reservation "slm-imul-mem-32" + "(slm-port-1 + slm-imul + slm-port-0), nothing*2") +;;; imul has 4 cycles latency for DI operands with 1/2 tput +(define_reservation "slm-imul-64" + "(slm-port-1 + slm-imul), slm-imul, nothing*2") + +;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on +;;; instruction forms +(define_reservation "slm-dual-1c" "(slm-port-dual + slm-all-eu)") +(define_reservation "slm-dual-2c" + "(slm-port-dual + slm-all-eu, nothing)") + +;;; Most of simple ALU instructions have 1 cycle latency. Some of them +;;; issue in port 0, some in port 0 and some in either port. +(define_reservation "slm-simple-0" "(slm-port-0 + slm-ieu-0)") +(define_reservation "slm-simple-1" "(slm-port-1 + slm-ieu-1)") +(define_reservation "slm-simple-either" "(slm-simple-0 | slm-simple-1)") + +;;; Complex macro-instruction has variants of latency, and uses both ports. +(define_reservation "slm-complex" "(slm-port-dual + slm-all-eu)") + +(define_insn_reservation "slm_other" 9 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "other") + (eq_attr "atom_unit" "!jeu"))) + "slm-complex, slm-all-eu*8") + +;; return has type "other" with atom_unit "jeu" +(define_insn_reservation "slm_other_2" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "other") + (eq_attr "atom_unit" "jeu"))) + "slm-dual-1c") + +(define_insn_reservation "slm_multi" 9 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "multi")) + "slm-complex, slm-all-eu*8") + +;; Normal alu insns without carry +(define_insn_reservation "slm_alu" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "none") + (eq_attr "use_carry" "0")))) + "slm-simple-either") + +;; Normal alu insns without carry, but use MEC. +(define_insn_reservation "slm_alu_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "!none") + (eq_attr "use_carry" "0")))) + "slm-simple-either") + +;; Alu insn consuming CF, such as add/sbb +(define_insn_reservation "slm_alu_carry" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "none") + (eq_attr "use_carry" "1")))) + "slm-simple-either, nothing") + +;; Alu insn consuming CF, such as add/sbb +(define_insn_reservation "slm_alu_carry_mem" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "!none") + (eq_attr "use_carry" "1")))) + "slm-simple-either, nothing") + +(define_insn_reservation "slm_alu1" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))) + "slm-simple-either") + +;; bsf and bsf insn +(define_insn_reservation "slm_alu1_1" 10 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "none") (eq_attr "prefix_0f" "1"))) + "slm-simple-1, slm-ieu-1*9") + +(define_insn_reservation "slm_alu1_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "!none"))) + "slm-simple-either") + +(define_insn_reservation "slm_negnot" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "negnot") + (eq_attr "memory" "none"))) + "slm-simple-either") + +(define_insn_reservation "slm_negnot_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "negnot") + (eq_attr "memory" "!none"))) + "slm-simple-either") + +(define_insn_reservation "slm_imov" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imov") + (eq_attr "memory" "none"))) + "slm-simple-either") + +(define_insn_reservation "slm_imov_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imov") + (eq_attr "memory" "!none"))) + "slm-simple-0") + +;; 16<-16, 32<-32 +(define_insn_reservation "slm_imovx" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "none") + (ior (and (match_operand:HI 0 "register_operand") + (match_operand:HI 1 "general_operand")) + (and (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "general_operand")))))) + "slm-simple-either") + +;; 16<-16, 32<-32, mem +(define_insn_reservation "slm_imovx_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "!none") + (ior (and (match_operand:HI 0 "register_operand") + (match_operand:HI 1 "general_operand")) + (and (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "general_operand")))))) + "slm-simple-either") + +;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8 +(define_insn_reservation "slm_imovx_2" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "none") + (ior (match_operand:QI 0 "register_operand") + (ior (and (match_operand:SI 0 "register_operand") + (not (match_operand:SI 1 "general_operand"))) + (match_operand:DI 0 "register_operand")))))) + "slm-simple-either") + +;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem +(define_insn_reservation "slm_imovx_2_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "!none") + (ior (match_operand:QI 0 "register_operand") + (ior (and (match_operand:SI 0 "register_operand") + (not (match_operand:SI 1 "general_operand"))) + (match_operand:DI 0 "register_operand")))))) + "slm-simple-0") + +;; 16<-8 +(define_insn_reservation "slm_imovx_3" 3 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imovx") + (and (match_operand:HI 0 "register_operand") + (match_operand:QI 1 "general_operand")))) + "slm-simple-0, nothing*2") + +(define_insn_reservation "slm_lea" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "lea") + (eq_attr "mode" "!HI"))) + "slm-simple-either") + +;; lea 16bit address is complex insn +(define_insn_reservation "slm_lea_2" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "lea") + (eq_attr "mode" "HI"))) + "slm-complex, slm-all-eu") + +(define_insn_reservation "slm_incdec" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "incdec") + (eq_attr "memory" "none"))) + "slm-simple-0") + +(define_insn_reservation "slm_incdec_mem" 3 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "incdec") + (eq_attr "memory" "!none"))) + "slm-simple-0, nothing*2") + +;; simple shift instruction use SHIFT eu, none memory +(define_insn_reservation "slm_ishift" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ishift") + (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0")))) + "slm-simple-0") + +;; simple shift instruction use SHIFT eu, memory +(define_insn_reservation "slm_ishift_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ishift") + (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0")))) + "slm-simple-0") + +;; DF shift (prefixed with 0f) is complex insn with latency of 4 cycles +(define_insn_reservation "slm_ishift_3" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ishift") + (eq_attr "prefix_0f" "1"))) + "slm-complex, slm-all-eu*3") + +(define_insn_reservation "slm_ishift1" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ishift1") + (eq_attr "memory" "none"))) + "slm-simple-0") + +(define_insn_reservation "slm_ishift1_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ishift1") + (eq_attr "memory" "!none"))) + "slm-simple-0") + +(define_insn_reservation "slm_rotate" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "rotate") + (eq_attr "memory" "none"))) + "slm-simple-0") + +(define_insn_reservation "slm_rotate_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "rotate") + (eq_attr "memory" "!none"))) + "slm-simple-0") + +(define_insn_reservation "slm_rotate1" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "rotate1") + (eq_attr "memory" "none"))) + "slm-simple-0") + +(define_insn_reservation "slm_rotate1_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "rotate1") + (eq_attr "memory" "!none"))) + "slm-simple-0") + +(define_insn_reservation "slm_imul" 3 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imul") + (and (eq_attr "memory" "none") (eq_attr "mode" "SI")))) + "slm-imul-32") + +(define_insn_reservation "slm_imul_mem" 3 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imul") + (and (eq_attr "memory" "!none") (eq_attr "mode" "SI")))) + "slm-imul-mem-32") + +;; latency set to 4 as common 64x64 imul with 1/2 tput +(define_insn_reservation "slm_imul_3" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imul") + (eq_attr "mode" "!SI"))) + "slm-imul-64") + +(define_insn_reservation "slm_idiv" 33 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "idiv")) + "slm-complex, slm-all-eu*16, nothing*16") + +(define_insn_reservation "slm_icmp" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "icmp") + (eq_attr "memory" "none"))) + "slm-simple-either") + +(define_insn_reservation "slm_icmp_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "icmp") + (eq_attr "memory" "!none"))) + "slm-simple-either") + +(define_insn_reservation "slm_test" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "test") + (eq_attr "memory" "none"))) + "slm-simple-either") + +(define_insn_reservation "slm_test_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "test") + (eq_attr "memory" "!none"))) + "slm-simple-either") + +(define_insn_reservation "slm_ibr" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "!load"))) + "slm-simple-1") + +;; complex if jump target is from address +(define_insn_reservation "slm_ibr_2" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "load"))) + "slm-complex, slm-all-eu") + +(define_insn_reservation "slm_setcc" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "setcc") + (eq_attr "memory" "!store"))) + "slm-simple-either") + +;; 2 cycles complex if target is in memory +(define_insn_reservation "slm_setcc_2" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "setcc") + (eq_attr "memory" "store"))) + "slm-complex, slm-all-eu") + +(define_insn_reservation "slm_icmov" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "icmov") + (eq_attr "memory" "none"))) + "slm-simple-either, nothing") + +(define_insn_reservation "slm_icmov_mem" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "icmov") + (eq_attr "memory" "!none"))) + "slm-simple-0, nothing") + +;; UCODE if segreg, ignored +(define_insn_reservation "slm_push" 2 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "push")) + "slm-dual-2c") + +;; pop r64 is 1 cycle. UCODE if segreg, ignored +(define_insn_reservation "slm_pop" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "pop") + (eq_attr "mode" "DI"))) + "slm-dual-1c") + +;; pop non-r64 is 2 cycles. UCODE if segreg, ignored +(define_insn_reservation "slm_pop_2" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "pop") + (eq_attr "mode" "!DI"))) + "slm-dual-2c") + +;; UCODE if segreg, ignored +(define_insn_reservation "slm_call" 1 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "call")) + "slm-dual-1c") + +(define_insn_reservation "slm_callv" 1 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "callv")) + "slm-dual-1c") + +(define_insn_reservation "slm_leave" 3 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "leave")) + "slm-complex, slm-all-eu*2") + +(define_insn_reservation "slm_str" 3 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "str")) + "slm-complex, slm-all-eu*2") + +(define_insn_reservation "slm_sselog" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sselog") + (eq_attr "memory" "none"))) + "slm-simple-either") + +(define_insn_reservation "slm_sselog_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sselog") + (eq_attr "memory" "!none"))) + "slm-simple-either") + +(define_insn_reservation "slm_sselog1" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sselog1") + (eq_attr "memory" "none"))) + "slm-simple-0") + +(define_insn_reservation "slm_sselog1_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sselog1") + (eq_attr "memory" "!none"))) + "slm-simple-0") + +;; not pmad, not psad +(define_insn_reservation "slm_sseiadd" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseiadd") + (and (not (match_operand:V2DI 0 "register_operand")) + (and (eq_attr "atom_unit" "!simul") + (eq_attr "atom_unit" "!complex"))))) + "slm-simple-either") + +;; pmad, psad and 64 +(define_insn_reservation "slm_sseiadd_2" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseiadd") + (and (not (match_operand:V2DI 0 "register_operand")) + (and (eq_attr "atom_unit" "simul" ) + (eq_attr "mode" "DI"))))) + "slm-fmul-4c") + +;; pmad, psad and 128 +(define_insn_reservation "slm_sseiadd_3" 5 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseiadd") + (and (not (match_operand:V2DI 0 "register_operand")) + (and (eq_attr "atom_unit" "simul" ) + (eq_attr "mode" "TI"))))) + "slm-fmul-5c") + +;; if paddq(64 bit op), phadd/phsub +(define_insn_reservation "slm_sseiadd_4" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseiadd") + (ior (match_operand:V2DI 0 "register_operand") + (eq_attr "atom_unit" "complex")))) + "slm-fadd-4c") + +;; if immediate op. +(define_insn_reservation "slm_sseishft" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseishft") + (and (eq_attr "atom_unit" "!sishuf") + (match_operand 2 "immediate_operand")))) + "slm-simple-either") + +;; if palignr or psrldq +(define_insn_reservation "slm_sseishft_2" 1 + (and (eq_attr "cpu" "slm") + (ior (eq_attr "type" "sseishft1") + (and (eq_attr "type" "sseishft") + (and (eq_attr "atom_unit" "sishuf") + (match_operand 2 "immediate_operand"))))) + "slm-simple-0") + +;; if reg/mem op +(define_insn_reservation "slm_sseishft_3" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseishft") + (not (match_operand 2 "immediate_operand")))) + "slm-complex, slm-all-eu") + +(define_insn_reservation "slm_sseimul" 5 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "sseimul")) + "slm-fmul-5c") + +;; rcpss or rsqrtss +(define_insn_reservation "slm_sse" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sse") + (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF")))) + "slm-fmul-4c") + +;; movshdup, movsldup. Suggest to type sseishft +(define_insn_reservation "slm_sse_2" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sse") + (eq_attr "atom_sse_attr" "movdup"))) + "slm-simple-0") + +;; lfence +(define_insn_reservation "slm_sse_3" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sse") + (eq_attr "atom_sse_attr" "lfence"))) + "slm-simple-either") + +;; sfence,clflush,mfence, prefetch +(define_insn_reservation "slm_sse_4" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sse") + (ior (eq_attr "atom_sse_attr" "fence") + (eq_attr "atom_sse_attr" "prefetch")))) + "slm-simple-0") + +;; rcpps, rsqrtss, sqrt, ldmxcsr +(define_insn_reservation "slm_sse_5" 9 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sse") + (ior (ior (eq_attr "atom_sse_attr" "sqrt") + (eq_attr "atom_sse_attr" "mxcsr")) + (and (eq_attr "atom_sse_attr" "rcp") + (eq_attr "mode" "V4SF"))))) + "slm-complex, slm-all-eu*7, nothing") + +;; xmm->xmm +(define_insn_reservation "slm_ssemov" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssemov") + (and (match_operand 0 "register_operand" "xy") + (match_operand 1 "register_operand" "xy")))) + "slm-simple-either") + +;; reg->xmm +(define_insn_reservation "slm_ssemov_2" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssemov") + (and (match_operand 0 "register_operand" "xy") + (match_operand 1 "register_operand" "r")))) + "slm-simple-0") + +;; xmm->reg +(define_insn_reservation "slm_ssemov_3" 3 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssemov") + (and (match_operand 0 "register_operand" "r") + (match_operand 1 "register_operand" "xy")))) + "slm-simple-0, nothing*2") + +;; mov mem +(define_insn_reservation "slm_ssemov_4" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssemov") + (and (eq_attr "movu" "0") (eq_attr "memory" "!none")))) + "slm-simple-0") + +;; movu mem +(define_insn_reservation "slm_ssemov_5" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssemov") + (ior (eq_attr "movu" "1") (eq_attr "memory" "!none")))) + "slm-simple-0, nothing") + +;; no memory simple +(define_insn_reservation "slm_sseadd" 3 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseadd") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "!V2DF") + (eq_attr "atom_unit" "!complex"))))) + "slm-fadd-3c") + +;; memory simple +(define_insn_reservation "slm_sseadd_mem" 3 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseadd") + (and (eq_attr "memory" "!none") + (and (eq_attr "mode" "!V2DF") + (eq_attr "atom_unit" "!complex"))))) + "slm-fadd-3c") + +;; maxps, minps, *pd, hadd, hsub +(define_insn_reservation "slm_sseadd_3" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseadd") + (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex")))) + "slm-fadd-4c") + +;; Except dppd/dpps +(define_insn_reservation "slm_ssemul" 5 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssemul") + (eq_attr "mode" "!SF"))) + "slm-fmul-5c") + +;; Except dppd/dpps, 4 cycle if mulss +(define_insn_reservation "slm_ssemul_2" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssemul") + (eq_attr "mode" "SF"))) + "slm-fmul-4c") + +(define_insn_reservation "slm_ssecmp" 1 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "ssecmp")) + "slm-simple-either") + +(define_insn_reservation "slm_ssecomi" 1 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "ssecomi")) + "slm-simple-0") + +;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi +(define_insn_reservation "slm_ssecvt" 5 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssecvt") + (ior (and (match_operand:V2SI 0 "register_operand") + (match_operand:V4SF 1 "register_operand")) + (and (match_operand:V4SF 0 "register_operand") + (match_operand:V2SI 1 "register_operand"))))) + "slm-fp-0, slm-feu-0, nothing*3") + +;; memory and cvtpi2ps, cvtps2pi, cvttps2pi +(define_insn_reservation "slm_ssecvt_mem" 5 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssecvt") + (ior (and (match_operand:V2SI 0 "register_operand") + (match_operand:V4SF 1 "memory_operand")) + (and (match_operand:V4SF 0 "register_operand") + (match_operand:V2SI 1 "memory_operand"))))) +"slm-fp-0, slm-feu-0, nothing*3") + +;; cvtpd2pi, cvtpi2pd +(define_insn_reservation "slm_ssecvt_1" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssecvt") + (ior (and (match_operand:V2DF 0 "register_operand") + (match_operand:V2SI 1 "register_operand")) + (and (match_operand:V2SI 0 "register_operand") + (match_operand:V2DF 1 "register_operand"))))) + "slm-fp-0, slm-feu-0") + +;; memory and cvtpd2pi, cvtpi2pd +(define_insn_reservation "slm_ssecvt_1_mem" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssecvt") + (ior (and (match_operand:V2DF 0 "register_operand") + (match_operand:V2SI 1 "memory_operand")) + (and (match_operand:V2SI 0 "register_operand") + (match_operand:V2DF 1 "memory_operand"))))) + "slm-fp-0, slm-feu-0") + +;; otherwise. 4 cycles average for cvtss2sd +(define_insn_reservation "slm_ssecvt_3" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssecvt") + (not (ior (and (match_operand:V2SI 0 "register_operand") + (match_operand:V4SF 1 "nonimmediate_operand")) + (and (match_operand:V4SF 0 "register_operand") + (match_operand:V2SI 1 "nonimmediate_operand")))))) + "slm-fp-0, nothing*3") + +;; memory and cvtsi2sd +(define_insn_reservation "slm_sseicvt" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseicvt") + (and (match_operand:V2DF 0 "register_operand") + (match_operand:SI 1 "nonimmediate_operand")))) + "slm-fp-0") + +;; otherwise. 8 cycles average for cvtsd2si +(define_insn_reservation "slm_sseicvt_2" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseicvt") + (not (and (match_operand:V2DF 0 "register_operand") + (match_operand:SI 1 "memory_operand"))))) + "slm-fp-0, nothing*3") + +(define_insn_reservation "slm_ssediv" 13 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "ssediv")) + "slm-fp-0, slm-feu-0*10, nothing*2") + +;; simple for fmov +(define_insn_reservation "slm_fmov" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "none"))) + "slm-simple-either") + +;; simple for fmov +(define_insn_reservation "slm_fmov_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "!none"))) + "slm-simple-either") + +;; Define bypass here + +;; There will be 0 cycle stall from cmp/test to jcc + +;; There will be 1 cycle stall from flag producer to cmov and adc/sbb +(define_bypass 2 "slm_icmp, slm_test, slm_alu, slm_alu_carry, + slm_alu1, slm_negnot, slm_incdec, slm_ishift, + slm_ishift1, slm_rotate, slm_rotate1" + "slm_icmov, slm_alu_carry") + +;; lea to shift source stall is 1 cycle +(define_bypass 2 "slm_lea" + "slm_ishift, slm_ishift1, slm_rotate, slm_rotate1" + "!ix86_dep_by_shift_count") + +;; non-lea to shift count stall is 1 cycle +(define_bypass 2 "slm_alu_carry, + slm_alu,slm_alu1,slm_negnot,slm_imov,slm_imovx, + slm_incdec,slm_ishift,slm_ishift1,slm_rotate, + slm_rotate1, slm_setcc, slm_icmov, slm_pop, + slm_alu_mem, slm_alu_carry_mem, slm_alu1_mem, + slm_imovx_mem, slm_imovx_2_mem, + slm_imov_mem, slm_icmov_mem, slm_fmov_mem" + "slm_ishift, slm_ishift1, slm_rotate, slm_rotate1, + slm_ishift_mem, slm_ishift1_mem, + slm_rotate_mem, slm_rotate1_mem" + "ix86_dep_by_shift_count") diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h index 3ae916ce5d3..20fa2ca2f94 100644 --- a/gcc/config/i386/smmintrin.h +++ b/gcc/config/i386/smmintrin.h @@ -27,14 +27,16 @@ #ifndef _SMMINTRIN_H_INCLUDED #define _SMMINTRIN_H_INCLUDED -#ifndef __SSE4_1__ -# error "SSE4.1 instruction set not enabled" -#else - /* We need definitions from the SSSE3, SSE3, SSE2 and SSE header files. */ #include <tmmintrin.h> +#ifndef __SSE4_1__ +#pragma GCC push_options +#pragma GCC target("sse4.1") +#define __DISABLE_SSE4_1__ +#endif /* __SSE4_1__ */ + /* Rounding mode macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 #define _MM_FROUND_TO_NEG_INF 0x01 @@ -582,7 +584,11 @@ _mm_stream_load_si128 (__m128i *__X) return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X); } -#ifdef __SSE4_2__ +#ifndef __SSE4_2__ +#pragma GCC push_options +#pragma GCC target("sse4.2") +#define __DISABLE_SSE4_2__ +#endif /* __SSE4_2__ */ /* These macros specify the source data format. */ #define _SIDD_UBYTE_OPS 0x00 @@ -792,9 +798,29 @@ _mm_cmpgt_epi64 (__m128i __X, __m128i __Y) return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y); } -#ifdef __POPCNT__ +#ifdef __DISABLE_SSE4_2__ +#undef __DISABLE_SSE4_2__ +#pragma GCC pop_options +#endif /* __DISABLE_SSE4_2__ */ + +#ifdef __DISABLE_SSE4_1__ +#undef __DISABLE_SSE4_1__ +#pragma GCC pop_options +#endif /* __DISABLE_SSE4_1__ */ + #include <popcntintrin.h> -#endif + +#ifndef __SSE4_1__ +#pragma GCC push_options +#pragma GCC target("sse4.1") +#define __DISABLE_SSE4_1__ +#endif /* __SSE4_1__ */ + +#ifndef __SSE4_2__ +#pragma GCC push_options +#pragma GCC target("sse4.2") +#define __DISABLE_SSE4_2__ +#endif /* __SSE4_1__ */ /* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -823,8 +849,14 @@ _mm_crc32_u64 (unsigned long long __C, unsigned long long __V) } #endif -#endif /* __SSE4_2__ */ +#ifdef __DISABLE_SSE4_2__ +#undef __DISABLE_SSE4_2__ +#pragma GCC pop_options +#endif /* __DISABLE_SSE4_2__ */ -#endif /* __SSE4_1__ */ +#ifdef __DISABLE_SSE4_1__ +#undef __DISABLE_SSE4_1__ +#pragma GCC pop_options +#endif /* __DISABLE_SSE4_1__ */ #endif /* _SMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/t-cygming b/gcc/config/i386/t-cygming index f615ad7a2af..ba076a7f49a 100644 --- a/gcc/config/i386/t-cygming +++ b/gcc/config/i386/t-cygming @@ -22,7 +22,7 @@ LIMITS_H_TEST = true winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \ - $(TM_P_H) $(HASHTAB_H) $(GGC_H) $(LTO_STREAMER_H) + $(TM_P_H) $(HASH_TABLE_H) $(GGC_H) $(LTO_STREAMER_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/i386/winnt.c diff --git a/gcc/config/i386/t-interix b/gcc/config/i386/t-interix index 43443e72a45..4d7b5987037 100644 --- a/gcc/config/i386/t-interix +++ b/gcc/config/i386/t-interix @@ -18,7 +18,7 @@ winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \ - $(TM_P_H) $(HASHTAB_H) $(GGC_H) + $(TM_P_H) $(HASH_TABLE_H) $(GGC_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/i386/winnt.c diff --git a/gcc/config/i386/tbmintrin.h b/gcc/config/i386/tbmintrin.h index 07c4f77fdd6..9235d6c713d 100644 --- a/gcc/config/i386/tbmintrin.h +++ b/gcc/config/i386/tbmintrin.h @@ -25,13 +25,15 @@ # error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead." #endif -#ifndef __TBM__ -# error "TBM instruction set not enabled" -#endif /* __TBM__ */ - #ifndef _TBMINTRIN_H_INCLUDED #define _TBMINTRIN_H_INCLUDED +#ifndef __TBM__ +#pragma GCC push_options +#pragma GCC target("tbm") +#define __DISABLE_TBM__ +#endif /* __TBM__ */ + #ifdef __OPTIMIZE__ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bextri_u32 (unsigned int __X, const unsigned int __I) @@ -169,4 +171,10 @@ __tzmsk_u64 (unsigned long long __X) #endif /* __x86_64__ */ + +#ifdef __DISABLE_TBM__ +#undef __DISABLE_TBM__ +#pragma GCC pop_options +#endif /* __DISABLE_TBM__ */ + #endif /* _TBMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/tmmintrin.h b/gcc/config/i386/tmmintrin.h index 767b199d3c4..3f63b4f8934 100644 --- a/gcc/config/i386/tmmintrin.h +++ b/gcc/config/i386/tmmintrin.h @@ -27,13 +27,15 @@ #ifndef _TMMINTRIN_H_INCLUDED #define _TMMINTRIN_H_INCLUDED -#ifndef __SSSE3__ -# error "SSSE3 instruction set not enabled" -#else - /* We need definitions from the SSE3, SSE2 and SSE header files*/ #include <pmmintrin.h> +#ifndef __SSSE3__ +#pragma GCC push_options +#pragma GCC target("ssse3") +#define __DISABLE_SSSE3__ +#endif /* __SSSE3__ */ + extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_epi16 (__m128i __X, __m128i __Y) { @@ -239,6 +241,9 @@ _mm_abs_pi32 (__m64 __X) return (__m64) __builtin_ia32_pabsd ((__v2si)__X); } -#endif /* __SSSE3__ */ +#ifdef __DISABLE_SSSE3__ +#undef __DISABLE_SSSE3__ +#pragma GCC pop_options +#endif /* __DISABLE_SSSE3__ */ #endif /* _TMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c index f0f972c56d9..c9e3aa98a37 100644 --- a/gcc/config/i386/winnt.c +++ b/gcc/config/i386/winnt.c @@ -30,7 +30,7 @@ along with GCC; see the file COPYING3. If not see #include "flags.h" #include "tm_p.h" #include "diagnostic-core.h" -#include "hashtab.h" +#include "hash-table.h" #include "langhooks.h" #include "ggc.h" #include "target.h" @@ -449,7 +449,7 @@ i386_pe_reloc_rw_mask (void) unsigned int i386_pe_section_type_flags (tree decl, const char *name, int reloc) { - static htab_t htab; + static hash_table <pointer_hash <unsigned int> > htab; unsigned int flags; unsigned int **slot; @@ -460,8 +460,8 @@ i386_pe_section_type_flags (tree decl, const char *name, int reloc) /* The names we put in the hashtable will always be the unique versions given to us by the stringtable, so we can just use their addresses as the keys. */ - if (!htab) - htab = htab_create (31, htab_hash_pointer, htab_eq_pointer, NULL); + if (!htab.is_created ()) + htab.create (31); if (decl && TREE_CODE (decl) == FUNCTION_DECL) flags = SECTION_CODE; @@ -480,7 +480,7 @@ i386_pe_section_type_flags (tree decl, const char *name, int reloc) flags |= SECTION_LINKONCE; /* See if we already have an entry for this section. */ - slot = (unsigned int **) htab_find_slot (htab, name, INSERT); + slot = htab.find_slot ((unsigned int *)name, INSERT); if (!*slot) { *slot = (unsigned int *) xmalloc (sizeof (unsigned int)); @@ -714,12 +714,29 @@ i386_pe_record_stub (const char *name) #ifdef CXX_WRAP_SPEC_LIST +/* Hashtable helpers. */ + +struct wrapped_symbol_hasher : typed_noop_remove <char> +{ + typedef char value_type; + typedef char compare_type; + static inline hashval_t hash (const value_type *); + static inline bool equal (const value_type *, const compare_type *); + static inline void remove (value_type *); +}; + +inline hashval_t +wrapped_symbol_hasher::hash (const value_type *v) +{ + return htab_hash_string (v); +} + /* Hash table equality helper function. */ -static int -wrapper_strcmp (const void *x, const void *y) +inline bool +wrapped_symbol_hasher::equal (const value_type *x, const compare_type *y) { - return !strcmp ((const char *) x, (const char *) y); + return !strcmp (x, y); } /* Search for a function named TARGET in the list of library wrappers @@ -733,7 +750,7 @@ static const char * i386_find_on_wrapper_list (const char *target) { static char first_time = 1; - static htab_t wrappers; + static hash_table <wrapped_symbol_hasher> wrappers; if (first_time) { @@ -746,8 +763,7 @@ i386_find_on_wrapper_list (const char *target) char *bufptr; /* Breaks up the char array into separated strings strings and enter them into the hash table. */ - wrappers = htab_create_alloc (8, htab_hash_string, wrapper_strcmp, - 0, xcalloc, free); + wrappers.create (8); for (bufptr = wrapper_list_buffer; *bufptr; ++bufptr) { char *found = NULL; @@ -760,12 +776,12 @@ i386_find_on_wrapper_list (const char *target) if (*bufptr) *bufptr = 0; if (found) - *htab_find_slot (wrappers, found, INSERT) = found; + *wrappers.find_slot (found, INSERT) = found; } first_time = 0; } - return (const char *) htab_find (wrappers, target); + return wrappers.find (target); } #endif /* CXX_WRAP_SPEC_LIST */ diff --git a/gcc/config/i386/wmmintrin.h b/gcc/config/i386/wmmintrin.h index 93c24f41ce6..defcfd82acc 100644 --- a/gcc/config/i386/wmmintrin.h +++ b/gcc/config/i386/wmmintrin.h @@ -30,13 +30,14 @@ /* We need definitions from the SSE2 header file. */ #include <emmintrin.h> -#if !defined (__AES__) && !defined (__PCLMUL__) -# error "AES/PCLMUL instructions not enabled" -#else - /* AES */ -#ifdef __AES__ +#ifndef __AES__ +#pragma GCC push_options +#pragma GCC target("aes") +#define __DISABLE_AES__ +#endif /* __AES__ */ + /* Performs 1 round of AES decryption of the first m128i using the second m128i as a round key. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -92,11 +93,20 @@ _mm_aeskeygenassist_si128 (__m128i __X, const int __C) ((__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)(__m128i)(X), \ (int)(C))) #endif -#endif /* __AES__ */ + +#ifdef __DISABLE_AES__ +#undef __DISABLE_AES__ +#pragma GCC pop_options +#endif /* __DISABLE_AES__ */ /* PCLMUL */ -#ifdef __PCLMUL__ +#ifndef __PCLMUL__ +#pragma GCC push_options +#pragma GCC target("pclmul") +#define __DISABLE_PCLMUL__ +#endif /* __PCLMUL__ */ + /* Performs carry-less integer multiplication of 64-bit halves of 128-bit input operands. The third parameter inducates which 64-bit haves of the input parameters v1 and v2 should be used. It must be @@ -113,8 +123,10 @@ _mm_clmulepi64_si128 (__m128i __X, __m128i __Y, const int __I) ((__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)(__m128i)(X), \ (__v2di)(__m128i)(Y), (int)(I))) #endif -#endif /* __PCLMUL__ */ -#endif /* __AES__/__PCLMUL__ */ +#ifdef __DISABLE_PCLMUL__ +#undef __DISABLE_PCLMUL__ +#pragma GCC pop_options +#endif /* __DISABLE_PCLMUL__ */ #endif /* _WMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h index 5bf29d5d361..46ced969a9f 100644 --- a/gcc/config/i386/x86intrin.h +++ b/gcc/config/i386/x86intrin.h @@ -26,96 +26,52 @@ #include <ia32intrin.h> -#ifdef __MMX__ #include <mmintrin.h> -#endif -#ifdef __SSE__ #include <xmmintrin.h> -#endif -#ifdef __SSE2__ #include <emmintrin.h> -#endif -#ifdef __SSE3__ #include <pmmintrin.h> -#endif -#ifdef __SSSE3__ #include <tmmintrin.h> -#endif -#ifdef __SSE4A__ #include <ammintrin.h> -#endif -#if defined (__SSE4_2__) || defined (__SSE4_1__) #include <smmintrin.h> -#endif -#if defined (__AES__) || defined (__PCLMUL__) #include <wmmintrin.h> -#endif /* For including AVX instructions */ #include <immintrin.h> -#ifdef __3dNOW__ #include <mm3dnow.h> -#endif -#ifdef __FMA4__ #include <fma4intrin.h> -#endif -#ifdef __XOP__ #include <xopintrin.h> -#endif -#ifdef __LWP__ #include <lwpintrin.h> -#endif -#ifdef __BMI__ #include <bmiintrin.h> -#endif -#ifdef __BMI2__ #include <bmi2intrin.h> -#endif -#ifdef __TBM__ #include <tbmintrin.h> -#endif -#ifdef __LZCNT__ #include <lzcntintrin.h> -#endif -#ifdef __POPCNT__ #include <popcntintrin.h> -#endif -#ifdef __RDSEED__ #include <rdseedintrin.h> -#endif -#ifdef __PRFCHW__ #include <prfchwintrin.h> -#endif -#ifdef __FXSR__ #include <fxsrintrin.h> -#endif -#ifdef __XSAVE__ #include <xsaveintrin.h> -#endif -#ifdef __XSAVEOPT__ #include <xsaveoptintrin.h> -#endif #include <adxintrin.h> diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index a223562490e..14d1e7fe2b0 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -27,16 +27,18 @@ #ifndef _XMMINTRIN_H_INCLUDED #define _XMMINTRIN_H_INCLUDED -#ifndef __SSE__ -# error "SSE instruction set not enabled" -#else - /* We need type definitions from the MMX header file. */ #include <mmintrin.h> /* Get _mm_malloc () and _mm_free (). */ #include <mm_malloc.h> +#ifndef __SSE__ +#pragma GCC push_options +#pragma GCC target("sse") +#define __DISABLE_SSE__ +#endif /* __SSE__ */ + /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); @@ -1242,9 +1244,11 @@ do { \ } while (0) /* For backward source compatibility. */ -#ifdef __SSE2__ # include <emmintrin.h> -#endif -#endif /* __SSE__ */ +#ifdef __DISABLE_SSE__ +#undef __DISABLE_SSE__ +#pragma GCC pop_options +#endif /* __DISABLE_SSE__ */ + #endif /* _XMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/xopintrin.h b/gcc/config/i386/xopintrin.h index 66b0f0de5c9..e0d148a0818 100644 --- a/gcc/config/i386/xopintrin.h +++ b/gcc/config/i386/xopintrin.h @@ -28,12 +28,14 @@ #ifndef _XOPMMINTRIN_H_INCLUDED #define _XOPMMINTRIN_H_INCLUDED -#ifndef __XOP__ -# error "XOP instruction set not enabled" -#else - #include <fma4intrin.h> +#ifndef __XOP__ +#pragma GCC push_options +#pragma GCC target("xop") +#define __DISABLE_XOP__ +#endif /* __XOP__ */ + /* Integer multiply/add intructions. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) @@ -830,6 +832,9 @@ _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I) (int)(I))) #endif /* __OPTIMIZE__ */ -#endif /* __XOP__ */ +#ifdef __DISABLE_XOP__ +#undef __DISABLE_XOP__ +#pragma GCC pop_options +#endif /* __DISABLE_XOP__ */ #endif /* _XOPMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/xsaveintrin.h b/gcc/config/i386/xsaveintrin.h index f5665894084..31c17b1d2c5 100644 --- a/gcc/config/i386/xsaveintrin.h +++ b/gcc/config/i386/xsaveintrin.h @@ -28,6 +28,12 @@ #ifndef _XSAVEINTRIN_H_INCLUDED #define _XSAVEINTRIN_H_INCLUDED +#ifndef __XSAVE__ +#pragma GCC push_options +#pragma GCC target("xsave") +#define __DISABLE_XSAVE__ +#endif /* __XSAVE__ */ + extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _xsave (void *__P, long long __M) @@ -58,4 +64,9 @@ _xrstor64 (void *__P, long long __M) } #endif +#ifdef __DISABLE_XSAVE__ +#undef __DISABLE_XSAVE__ +#pragma GCC pop_options +#endif /* __DISABLE_XSAVE__ */ + #endif /* _XSAVEINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/xsaveoptintrin.h b/gcc/config/i386/xsaveoptintrin.h index 0d73e341f3f..aa9538da33e 100644 --- a/gcc/config/i386/xsaveoptintrin.h +++ b/gcc/config/i386/xsaveoptintrin.h @@ -28,6 +28,12 @@ #ifndef _XSAVEOPTINTRIN_H_INCLUDED #define _XSAVEOPTINTRIN_H_INCLUDED +#ifndef __XSAVEOPT__ +#pragma GCC push_options +#pragma GCC target("xsaveopt") +#define __DISABLE_XSAVEOPT__ +#endif /* __XSAVEOPT__ */ + extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _xsaveopt (void *__P, long long __M) @@ -44,4 +50,9 @@ _xsaveopt64 (void *__P, long long __M) } #endif +#ifdef __DISABLE_XSAVEOPT__ +#undef __DISABLE_XSAVEOPT__ +#pragma GCC pop_options +#endif /* __DISABLE_XSAVEOPT__ */ + #endif /* _XSAVEOPTINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/xtestintrin.h b/gcc/config/i386/xtestintrin.h index c82fb7a61ae..a6afa896b4f 100644 --- a/gcc/config/i386/xtestintrin.h +++ b/gcc/config/i386/xtestintrin.h @@ -25,13 +25,15 @@ # error "Never use <xtestintrin.h> directly; include <immintrin.h> instead." #endif -#ifndef __RTM__ -# error "RTM instruction set not enabled" -#endif /* __RTM__ */ - #ifndef _XTESTINTRIN_H_INCLUDED #define _XTESTINTRIN_H_INCLUDED +#ifndef __RTM__ +#pragma GCC push_options +#pragma GCC target("rtm") +#define __DISABLE_RTM__ +#endif /* __RTM__ */ + /* Return non-zero if the instruction executes inside an RTM or HLE code region. Return zero otherwise. */ extern __inline int @@ -41,4 +43,9 @@ _xtest (void) return __builtin_ia32_xtest (); } +#ifdef __DISABLE_RTM__ +#undef __DISABLE_RTM__ +#pragma GCC pop_options +#endif /* __DISABLE_RTM__ */ + #endif /* _XTESTINTRIN_H_INCLUDED */ diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index 144cf7ee5ee..a128b19c7ca 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -47,7 +47,7 @@ along with GCC; see the file COPYING3. If not see #include "target-def.h" #include "common/common-target.h" #include "tm_p.h" -#include "hashtab.h" +#include "hash-table.h" #include "langhooks.h" #include "gimple.h" #include "intl.h" @@ -170,7 +170,7 @@ static ds_t ia64_get_insn_spec_ds (rtx); static ds_t ia64_get_insn_checked_ds (rtx); static bool ia64_skip_rtx_p (const_rtx); static int ia64_speculate_insn (rtx, ds_t, rtx *); -static bool ia64_needs_block_p (int); +static bool ia64_needs_block_p (ds_t); static rtx ia64_gen_spec_check (rtx, rtx, ds_t); static int ia64_spec_check_p (rtx); static int ia64_spec_check_src_p (rtx); @@ -257,8 +257,6 @@ static struct bundle_state *get_free_bundle_state (void); static void free_bundle_state (struct bundle_state *); static void initiate_bundle_states (void); static void finish_bundle_states (void); -static unsigned bundle_state_hash (const void *); -static int bundle_state_eq_p (const void *, const void *); static int insert_bundle_state (struct bundle_state *); static void initiate_bundle_state_table (void); static void finish_bundle_state_table (void); @@ -8341,9 +8339,7 @@ ia64_needs_block_p (ds_t ts) return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc); } -/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN. - If (LABEL != 0 || MUTATE_P), generate branchy recovery check. - Otherwise, generate a simple check. */ +/* Generate (or regenerate) a recovery check for INSN. */ static rtx ia64_gen_spec_check (rtx insn, rtx label, ds_t ds) { @@ -8528,18 +8524,21 @@ finish_bundle_states (void) } } -/* Hash table of the bundle states. The key is dfa_state and insn_num - of the bundle states. */ +/* Hashtable helpers. */ -static htab_t bundle_state_table; +struct bundle_state_hasher : typed_noop_remove <bundle_state> +{ + typedef bundle_state value_type; + typedef bundle_state compare_type; + static inline hashval_t hash (const value_type *); + static inline bool equal (const value_type *, const compare_type *); +}; /* The function returns hash of BUNDLE_STATE. */ -static unsigned -bundle_state_hash (const void *bundle_state) +inline hashval_t +bundle_state_hasher::hash (const value_type *state) { - const struct bundle_state *const state - = (const struct bundle_state *) bundle_state; unsigned result, i; for (result = i = 0; i < dfa_state_size; i++) @@ -8550,19 +8549,20 @@ bundle_state_hash (const void *bundle_state) /* The function returns nonzero if the bundle state keys are equal. */ -static int -bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2) +inline bool +bundle_state_hasher::equal (const value_type *state1, + const compare_type *state2) { - const struct bundle_state *const state1 - = (const struct bundle_state *) bundle_state_1; - const struct bundle_state *const state2 - = (const struct bundle_state *) bundle_state_2; - return (state1->insn_num == state2->insn_num && memcmp (state1->dfa_state, state2->dfa_state, dfa_state_size) == 0); } +/* Hash table of the bundle states. The key is dfa_state and insn_num + of the bundle states. */ + +static hash_table <bundle_state_hasher> bundle_state_table; + /* The function inserts the BUNDLE_STATE into the hash table. The function returns nonzero if the bundle has been inserted into the table. The table contains the best bundle state with given key. */ @@ -8570,39 +8570,35 @@ bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2) static int insert_bundle_state (struct bundle_state *bundle_state) { - void **entry_ptr; + struct bundle_state **entry_ptr; - entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT); + entry_ptr = bundle_state_table.find_slot (bundle_state, INSERT); if (*entry_ptr == NULL) { bundle_state->next = index_to_bundle_states [bundle_state->insn_num]; index_to_bundle_states [bundle_state->insn_num] = bundle_state; - *entry_ptr = (void *) bundle_state; + *entry_ptr = bundle_state; return TRUE; } - else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost - || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost - && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num + else if (bundle_state->cost < (*entry_ptr)->cost + || (bundle_state->cost == (*entry_ptr)->cost + && ((*entry_ptr)->accumulated_insns_num > bundle_state->accumulated_insns_num - || (((struct bundle_state *) - *entry_ptr)->accumulated_insns_num + || ((*entry_ptr)->accumulated_insns_num == bundle_state->accumulated_insns_num - && (((struct bundle_state *) - *entry_ptr)->branch_deviation + && ((*entry_ptr)->branch_deviation > bundle_state->branch_deviation - || (((struct bundle_state *) - *entry_ptr)->branch_deviation + || ((*entry_ptr)->branch_deviation == bundle_state->branch_deviation - && ((struct bundle_state *) - *entry_ptr)->middle_bundle_stops + && (*entry_ptr)->middle_bundle_stops > bundle_state->middle_bundle_stops)))))) { struct bundle_state temp; - temp = *(struct bundle_state *) *entry_ptr; - *(struct bundle_state *) *entry_ptr = *bundle_state; - ((struct bundle_state *) *entry_ptr)->next = temp.next; + temp = **entry_ptr; + **entry_ptr = *bundle_state; + (*entry_ptr)->next = temp.next; *bundle_state = temp; } return FALSE; @@ -8613,8 +8609,7 @@ insert_bundle_state (struct bundle_state *bundle_state) static void initiate_bundle_state_table (void) { - bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p, - (htab_del) 0); + bundle_state_table.create (50); } /* Finish work with the hash table. */ @@ -8622,7 +8617,7 @@ initiate_bundle_state_table (void) static void finish_bundle_state_table (void) { - htab_delete (bundle_state_table); + bundle_state_table.dispose (); } diff --git a/gcc/config/ia64/t-ia64 b/gcc/config/ia64/t-ia64 index 5c3ac644be3..b009cdf2bc5 100644 --- a/gcc/config/ia64/t-ia64 +++ b/gcc/config/ia64/t-ia64 @@ -24,4 +24,5 @@ ia64-c.o: $(srcdir)/config/ia64/ia64-c.c $(CONFIG_H) $(SYSTEM_H) \ # genattrtab generates very long string literals. insn-attrtab.o-warn = -Wno-error -ia64.o: debug.h $(PARAMS_H) sel-sched.h reload.h $(OPTS_H) dumpfile.h +ia64.o: $(srcdir)/config/ia64/ia64.c debug.h $(PARAMS_H) sel-sched.h reload.h \ + $(OPTS_H) dumpfile.h $(HASH_TABLE_H) diff --git a/gcc/config/mips/constraints.md b/gcc/config/mips/constraints.md index ddef8cc495d..1fe6119d075 100644 --- a/gcc/config/mips/constraints.md +++ b/gcc/config/mips/constraints.md @@ -92,6 +92,12 @@ ;; but the DSP version allows any accumulator target. (define_register_constraint "ka" "ISA_HAS_DSP_MULT ? ACC_REGS : MD_REGS") +;; The register class to use for an allocatable division result. +;; MIPS16 uses M16_REGS because LO is fixed. +(define_register_constraint "kl" + "TARGET_MIPS16 ? M16_REGS : TARGET_BIG_ENDIAN ? MD1_REG : MD0_REG" + "@internal") + (define_constraint "kf" "@internal" (match_operand 0 "force_to_mem_operand")) diff --git a/gcc/config/mips/mips-cpus.def b/gcc/config/mips/mips-cpus.def index 9e5fd162189..a1c65915f78 100644 --- a/gcc/config/mips/mips-cpus.def +++ b/gcc/config/mips/mips-cpus.def @@ -43,7 +43,7 @@ MIPS_CPU ("mips4", PROCESSOR_R8000, 4, 0) that to a recommendation to avoid the instructions in code that isn't tuned to a specific processor. */ MIPS_CPU ("mips32", PROCESSOR_4KC, 32, PTF_AVOID_BRANCHLIKELY) -MIPS_CPU ("mips32r2", PROCESSOR_M4K, 33, PTF_AVOID_BRANCHLIKELY) +MIPS_CPU ("mips32r2", PROCESSOR_74KF2_1, 33, PTF_AVOID_BRANCHLIKELY) MIPS_CPU ("mips64", PROCESSOR_5KC, 64, PTF_AVOID_BRANCHLIKELY) /* ??? For now just tune the generic MIPS64r2 for 5KC as well. */ MIPS_CPU ("mips64r2", PROCESSOR_5KC, 65, PTF_AVOID_BRANCHLIKELY) @@ -68,6 +68,7 @@ MIPS_CPU ("r4600", PROCESSOR_R4600, 3, 0) MIPS_CPU ("orion", PROCESSOR_R4600, 3, 0) MIPS_CPU ("r4650", PROCESSOR_R4650, 3, 0) MIPS_CPU ("r4700", PROCESSOR_R4700, 3, 0) +MIPS_CPU ("r5900", PROCESSOR_R5900, 3, 0) /* ST Loongson 2E/2F processors. */ MIPS_CPU ("loongson2e", PROCESSOR_LOONGSON_2E, 3, PTF_AVOID_BRANCHLIKELY) MIPS_CPU ("loongson2f", PROCESSOR_LOONGSON_2F, 3, PTF_AVOID_BRANCHLIKELY) @@ -94,6 +95,8 @@ MIPS_CPU ("4ksc", PROCESSOR_4KC, 32, 0) MIPS_CPU ("m4k", PROCESSOR_M4K, 33, 0) MIPS_CPU ("m14kc", PROCESSOR_M4K, 33, 0) MIPS_CPU ("m14k", PROCESSOR_M4K, 33, 0) +MIPS_CPU ("m14ke", PROCESSOR_M4K, 33, 0) +MIPS_CPU ("m14kec", PROCESSOR_M4K, 33, 0) MIPS_CPU ("4kec", PROCESSOR_4KC, 33, 0) MIPS_CPU ("4kem", PROCESSOR_4KC, 33, 0) MIPS_CPU ("4kep", PROCESSOR_4KP, 33, 0) diff --git a/gcc/config/mips/mips-dsp.md b/gcc/config/mips/mips-dsp.md index 002c9992001..49a08689638 100644 --- a/gcc/config/mips/mips-dsp.md +++ b/gcc/config/mips/mips-dsp.md @@ -1131,8 +1131,7 @@ "ISA_HAS_L<SHORT:SIZE><U>X" "l<SHORT:size><u>x\t%0,%2(%1)" [(set_attr "type" "load") - (set_attr "mode" "<GPR:MODE>") - (set_attr "length" "4")]) + (set_attr "mode" "<GPR:MODE>")]) (define_expand "mips_lhx" [(match_operand:SI 0 "register_operand") @@ -1165,8 +1164,7 @@ "ISA_HAS_L<GPR:SIZE>X" "l<GPR:size>x\t%0,%2(%1)" [(set_attr "type" "load") - (set_attr "mode" "<GPR:MODE>") - (set_attr "length" "4")]) + (set_attr "mode" "<GPR:MODE>")]) (define_insn "*mips_lw<u>x_<P:mode>_ext" [(set (match_operand:DI 0 "register_operand" "=d") @@ -1176,8 +1174,7 @@ "ISA_HAS_LW<U>X && TARGET_64BIT" "lw<u>x\t%0,%2(%1)" [(set_attr "type" "load") - (set_attr "mode" "DI") - (set_attr "length" "4")]) + (set_attr "mode" "DI")]) ;; Table 2-8. MIPS DSP ASE Instructions: Branch ;; BPOSGE32 diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md index 9c70cc4324f..a22c7829b77 100644 --- a/gcc/config/mips/mips-ps-3d.md +++ b/gcc/config/mips/mips-ps-3d.md @@ -481,7 +481,7 @@ operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8); } [(set_attr "type" "fcmp") - (set_attr "length" "8") + (set_attr "insn_count" "2") (set_attr "mode" "FPSW")]) (define_insn_and_split "mips_cabs_cond_4s" @@ -510,7 +510,7 @@ operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8); } [(set_attr "type" "fcmp") - (set_attr "length" "8") + (set_attr "insn_count" "2") (set_attr "mode" "FPSW")]) diff --git a/gcc/config/mips/mips-tables.opt b/gcc/config/mips/mips-tables.opt index 0d7fa26510d..409356e1af7 100644 --- a/gcc/config/mips/mips-tables.opt +++ b/gcc/config/mips/mips-tables.opt @@ -208,425 +208,437 @@ EnumValue Enum(mips_arch_opt_value) String(4700) Value(22) EnumValue -Enum(mips_arch_opt_value) String(loongson2e) Value(23) Canonical +Enum(mips_arch_opt_value) String(r5900) Value(23) Canonical EnumValue -Enum(mips_arch_opt_value) String(loongson2f) Value(24) Canonical +Enum(mips_arch_opt_value) String(5900) Value(23) EnumValue -Enum(mips_arch_opt_value) String(r8000) Value(25) Canonical +Enum(mips_arch_opt_value) String(loongson2e) Value(24) Canonical EnumValue -Enum(mips_arch_opt_value) String(r8k) Value(25) +Enum(mips_arch_opt_value) String(loongson2f) Value(25) Canonical EnumValue -Enum(mips_arch_opt_value) String(8000) Value(25) +Enum(mips_arch_opt_value) String(r8000) Value(26) Canonical EnumValue -Enum(mips_arch_opt_value) String(8k) Value(25) +Enum(mips_arch_opt_value) String(r8k) Value(26) EnumValue -Enum(mips_arch_opt_value) String(r10000) Value(26) Canonical +Enum(mips_arch_opt_value) String(8000) Value(26) EnumValue -Enum(mips_arch_opt_value) String(r10k) Value(26) +Enum(mips_arch_opt_value) String(8k) Value(26) EnumValue -Enum(mips_arch_opt_value) String(10000) Value(26) +Enum(mips_arch_opt_value) String(r10000) Value(27) Canonical EnumValue -Enum(mips_arch_opt_value) String(10k) Value(26) +Enum(mips_arch_opt_value) String(r10k) Value(27) EnumValue -Enum(mips_arch_opt_value) String(r12000) Value(27) Canonical +Enum(mips_arch_opt_value) String(10000) Value(27) EnumValue -Enum(mips_arch_opt_value) String(r12k) Value(27) +Enum(mips_arch_opt_value) String(10k) Value(27) EnumValue -Enum(mips_arch_opt_value) String(12000) Value(27) +Enum(mips_arch_opt_value) String(r12000) Value(28) Canonical EnumValue -Enum(mips_arch_opt_value) String(12k) Value(27) +Enum(mips_arch_opt_value) String(r12k) Value(28) EnumValue -Enum(mips_arch_opt_value) String(r14000) Value(28) Canonical +Enum(mips_arch_opt_value) String(12000) Value(28) EnumValue -Enum(mips_arch_opt_value) String(r14k) Value(28) +Enum(mips_arch_opt_value) String(12k) Value(28) EnumValue -Enum(mips_arch_opt_value) String(14000) Value(28) +Enum(mips_arch_opt_value) String(r14000) Value(29) Canonical EnumValue -Enum(mips_arch_opt_value) String(14k) Value(28) +Enum(mips_arch_opt_value) String(r14k) Value(29) EnumValue -Enum(mips_arch_opt_value) String(r16000) Value(29) Canonical +Enum(mips_arch_opt_value) String(14000) Value(29) EnumValue -Enum(mips_arch_opt_value) String(r16k) Value(29) +Enum(mips_arch_opt_value) String(14k) Value(29) EnumValue -Enum(mips_arch_opt_value) String(16000) Value(29) +Enum(mips_arch_opt_value) String(r16000) Value(30) Canonical EnumValue -Enum(mips_arch_opt_value) String(16k) Value(29) +Enum(mips_arch_opt_value) String(r16k) Value(30) EnumValue -Enum(mips_arch_opt_value) String(vr5000) Value(30) Canonical +Enum(mips_arch_opt_value) String(16000) Value(30) EnumValue -Enum(mips_arch_opt_value) String(vr5k) Value(30) +Enum(mips_arch_opt_value) String(16k) Value(30) EnumValue -Enum(mips_arch_opt_value) String(5000) Value(30) +Enum(mips_arch_opt_value) String(vr5000) Value(31) Canonical EnumValue -Enum(mips_arch_opt_value) String(5k) Value(30) +Enum(mips_arch_opt_value) String(vr5k) Value(31) EnumValue -Enum(mips_arch_opt_value) String(r5000) Value(30) +Enum(mips_arch_opt_value) String(5000) Value(31) EnumValue -Enum(mips_arch_opt_value) String(r5k) Value(30) +Enum(mips_arch_opt_value) String(5k) Value(31) EnumValue -Enum(mips_arch_opt_value) String(vr5400) Value(31) Canonical +Enum(mips_arch_opt_value) String(r5000) Value(31) EnumValue -Enum(mips_arch_opt_value) String(5400) Value(31) +Enum(mips_arch_opt_value) String(r5k) Value(31) EnumValue -Enum(mips_arch_opt_value) String(r5400) Value(31) +Enum(mips_arch_opt_value) String(vr5400) Value(32) Canonical EnumValue -Enum(mips_arch_opt_value) String(vr5500) Value(32) Canonical +Enum(mips_arch_opt_value) String(5400) Value(32) EnumValue -Enum(mips_arch_opt_value) String(5500) Value(32) +Enum(mips_arch_opt_value) String(r5400) Value(32) EnumValue -Enum(mips_arch_opt_value) String(r5500) Value(32) +Enum(mips_arch_opt_value) String(vr5500) Value(33) Canonical EnumValue -Enum(mips_arch_opt_value) String(rm7000) Value(33) Canonical +Enum(mips_arch_opt_value) String(5500) Value(33) EnumValue -Enum(mips_arch_opt_value) String(rm7k) Value(33) +Enum(mips_arch_opt_value) String(r5500) Value(33) EnumValue -Enum(mips_arch_opt_value) String(7000) Value(33) +Enum(mips_arch_opt_value) String(rm7000) Value(34) Canonical EnumValue -Enum(mips_arch_opt_value) String(7k) Value(33) +Enum(mips_arch_opt_value) String(rm7k) Value(34) EnumValue -Enum(mips_arch_opt_value) String(r7000) Value(33) +Enum(mips_arch_opt_value) String(7000) Value(34) EnumValue -Enum(mips_arch_opt_value) String(r7k) Value(33) +Enum(mips_arch_opt_value) String(7k) Value(34) EnumValue -Enum(mips_arch_opt_value) String(rm9000) Value(34) Canonical +Enum(mips_arch_opt_value) String(r7000) Value(34) EnumValue -Enum(mips_arch_opt_value) String(rm9k) Value(34) +Enum(mips_arch_opt_value) String(r7k) Value(34) EnumValue -Enum(mips_arch_opt_value) String(9000) Value(34) +Enum(mips_arch_opt_value) String(rm9000) Value(35) Canonical EnumValue -Enum(mips_arch_opt_value) String(9k) Value(34) +Enum(mips_arch_opt_value) String(rm9k) Value(35) EnumValue -Enum(mips_arch_opt_value) String(r9000) Value(34) +Enum(mips_arch_opt_value) String(9000) Value(35) EnumValue -Enum(mips_arch_opt_value) String(r9k) Value(34) +Enum(mips_arch_opt_value) String(9k) Value(35) EnumValue -Enum(mips_arch_opt_value) String(4kc) Value(35) Canonical +Enum(mips_arch_opt_value) String(r9000) Value(35) EnumValue -Enum(mips_arch_opt_value) String(r4kc) Value(35) +Enum(mips_arch_opt_value) String(r9k) Value(35) EnumValue -Enum(mips_arch_opt_value) String(4km) Value(36) Canonical +Enum(mips_arch_opt_value) String(4kc) Value(36) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4km) Value(36) +Enum(mips_arch_opt_value) String(r4kc) Value(36) EnumValue -Enum(mips_arch_opt_value) String(4kp) Value(37) Canonical +Enum(mips_arch_opt_value) String(4km) Value(37) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4kp) Value(37) +Enum(mips_arch_opt_value) String(r4km) Value(37) EnumValue -Enum(mips_arch_opt_value) String(4ksc) Value(38) Canonical +Enum(mips_arch_opt_value) String(4kp) Value(38) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4ksc) Value(38) +Enum(mips_arch_opt_value) String(r4kp) Value(38) EnumValue -Enum(mips_arch_opt_value) String(m4k) Value(39) Canonical +Enum(mips_arch_opt_value) String(4ksc) Value(39) Canonical EnumValue -Enum(mips_arch_opt_value) String(m14kc) Value(40) Canonical +Enum(mips_arch_opt_value) String(r4ksc) Value(39) EnumValue -Enum(mips_arch_opt_value) String(m14k) Value(41) Canonical +Enum(mips_arch_opt_value) String(m4k) Value(40) Canonical EnumValue -Enum(mips_arch_opt_value) String(4kec) Value(42) Canonical +Enum(mips_arch_opt_value) String(m14kc) Value(41) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4kec) Value(42) +Enum(mips_arch_opt_value) String(m14k) Value(42) Canonical EnumValue -Enum(mips_arch_opt_value) String(4kem) Value(43) Canonical +Enum(mips_arch_opt_value) String(m14ke) Value(43) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4kem) Value(43) +Enum(mips_arch_opt_value) String(m14kec) Value(44) Canonical EnumValue -Enum(mips_arch_opt_value) String(4kep) Value(44) Canonical +Enum(mips_arch_opt_value) String(4kec) Value(45) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4kep) Value(44) +Enum(mips_arch_opt_value) String(r4kec) Value(45) EnumValue -Enum(mips_arch_opt_value) String(4ksd) Value(45) Canonical +Enum(mips_arch_opt_value) String(4kem) Value(46) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4ksd) Value(45) +Enum(mips_arch_opt_value) String(r4kem) Value(46) EnumValue -Enum(mips_arch_opt_value) String(24kc) Value(46) Canonical +Enum(mips_arch_opt_value) String(4kep) Value(47) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kc) Value(46) +Enum(mips_arch_opt_value) String(r4kep) Value(47) EnumValue -Enum(mips_arch_opt_value) String(24kf2_1) Value(47) Canonical +Enum(mips_arch_opt_value) String(4ksd) Value(48) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kf2_1) Value(47) +Enum(mips_arch_opt_value) String(r4ksd) Value(48) EnumValue -Enum(mips_arch_opt_value) String(24kf) Value(48) Canonical +Enum(mips_arch_opt_value) String(24kc) Value(49) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kf) Value(48) +Enum(mips_arch_opt_value) String(r24kc) Value(49) EnumValue -Enum(mips_arch_opt_value) String(24kf1_1) Value(49) Canonical +Enum(mips_arch_opt_value) String(24kf2_1) Value(50) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kf1_1) Value(49) +Enum(mips_arch_opt_value) String(r24kf2_1) Value(50) EnumValue -Enum(mips_arch_opt_value) String(24kfx) Value(50) Canonical +Enum(mips_arch_opt_value) String(24kf) Value(51) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kfx) Value(50) +Enum(mips_arch_opt_value) String(r24kf) Value(51) EnumValue -Enum(mips_arch_opt_value) String(24kx) Value(51) Canonical +Enum(mips_arch_opt_value) String(24kf1_1) Value(52) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kx) Value(51) +Enum(mips_arch_opt_value) String(r24kf1_1) Value(52) EnumValue -Enum(mips_arch_opt_value) String(24kec) Value(52) Canonical +Enum(mips_arch_opt_value) String(24kfx) Value(53) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kec) Value(52) +Enum(mips_arch_opt_value) String(r24kfx) Value(53) EnumValue -Enum(mips_arch_opt_value) String(24kef2_1) Value(53) Canonical +Enum(mips_arch_opt_value) String(24kx) Value(54) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kef2_1) Value(53) +Enum(mips_arch_opt_value) String(r24kx) Value(54) EnumValue -Enum(mips_arch_opt_value) String(24kef) Value(54) Canonical +Enum(mips_arch_opt_value) String(24kec) Value(55) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kef) Value(54) +Enum(mips_arch_opt_value) String(r24kec) Value(55) EnumValue -Enum(mips_arch_opt_value) String(24kef1_1) Value(55) Canonical +Enum(mips_arch_opt_value) String(24kef2_1) Value(56) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kef1_1) Value(55) +Enum(mips_arch_opt_value) String(r24kef2_1) Value(56) EnumValue -Enum(mips_arch_opt_value) String(24kefx) Value(56) Canonical +Enum(mips_arch_opt_value) String(24kef) Value(57) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kefx) Value(56) +Enum(mips_arch_opt_value) String(r24kef) Value(57) EnumValue -Enum(mips_arch_opt_value) String(24kex) Value(57) Canonical +Enum(mips_arch_opt_value) String(24kef1_1) Value(58) Canonical EnumValue -Enum(mips_arch_opt_value) String(r24kex) Value(57) +Enum(mips_arch_opt_value) String(r24kef1_1) Value(58) EnumValue -Enum(mips_arch_opt_value) String(34kc) Value(58) Canonical +Enum(mips_arch_opt_value) String(24kefx) Value(59) Canonical EnumValue -Enum(mips_arch_opt_value) String(r34kc) Value(58) +Enum(mips_arch_opt_value) String(r24kefx) Value(59) EnumValue -Enum(mips_arch_opt_value) String(34kf2_1) Value(59) Canonical +Enum(mips_arch_opt_value) String(24kex) Value(60) Canonical EnumValue -Enum(mips_arch_opt_value) String(r34kf2_1) Value(59) +Enum(mips_arch_opt_value) String(r24kex) Value(60) EnumValue -Enum(mips_arch_opt_value) String(34kf) Value(60) Canonical +Enum(mips_arch_opt_value) String(34kc) Value(61) Canonical EnumValue -Enum(mips_arch_opt_value) String(r34kf) Value(60) +Enum(mips_arch_opt_value) String(r34kc) Value(61) EnumValue -Enum(mips_arch_opt_value) String(34kf1_1) Value(61) Canonical +Enum(mips_arch_opt_value) String(34kf2_1) Value(62) Canonical EnumValue -Enum(mips_arch_opt_value) String(r34kf1_1) Value(61) +Enum(mips_arch_opt_value) String(r34kf2_1) Value(62) EnumValue -Enum(mips_arch_opt_value) String(34kfx) Value(62) Canonical +Enum(mips_arch_opt_value) String(34kf) Value(63) Canonical EnumValue -Enum(mips_arch_opt_value) String(r34kfx) Value(62) +Enum(mips_arch_opt_value) String(r34kf) Value(63) EnumValue -Enum(mips_arch_opt_value) String(34kx) Value(63) Canonical +Enum(mips_arch_opt_value) String(34kf1_1) Value(64) Canonical EnumValue -Enum(mips_arch_opt_value) String(r34kx) Value(63) +Enum(mips_arch_opt_value) String(r34kf1_1) Value(64) EnumValue -Enum(mips_arch_opt_value) String(34kn) Value(64) Canonical +Enum(mips_arch_opt_value) String(34kfx) Value(65) Canonical EnumValue -Enum(mips_arch_opt_value) String(r34kn) Value(64) +Enum(mips_arch_opt_value) String(r34kfx) Value(65) EnumValue -Enum(mips_arch_opt_value) String(74kc) Value(65) Canonical +Enum(mips_arch_opt_value) String(34kx) Value(66) Canonical EnumValue -Enum(mips_arch_opt_value) String(r74kc) Value(65) +Enum(mips_arch_opt_value) String(r34kx) Value(66) EnumValue -Enum(mips_arch_opt_value) String(74kf2_1) Value(66) Canonical +Enum(mips_arch_opt_value) String(34kn) Value(67) Canonical EnumValue -Enum(mips_arch_opt_value) String(r74kf2_1) Value(66) +Enum(mips_arch_opt_value) String(r34kn) Value(67) EnumValue -Enum(mips_arch_opt_value) String(74kf) Value(67) Canonical +Enum(mips_arch_opt_value) String(74kc) Value(68) Canonical EnumValue -Enum(mips_arch_opt_value) String(r74kf) Value(67) +Enum(mips_arch_opt_value) String(r74kc) Value(68) EnumValue -Enum(mips_arch_opt_value) String(74kf1_1) Value(68) Canonical +Enum(mips_arch_opt_value) String(74kf2_1) Value(69) Canonical EnumValue -Enum(mips_arch_opt_value) String(r74kf1_1) Value(68) +Enum(mips_arch_opt_value) String(r74kf2_1) Value(69) EnumValue -Enum(mips_arch_opt_value) String(74kfx) Value(69) Canonical +Enum(mips_arch_opt_value) String(74kf) Value(70) Canonical EnumValue -Enum(mips_arch_opt_value) String(r74kfx) Value(69) +Enum(mips_arch_opt_value) String(r74kf) Value(70) EnumValue -Enum(mips_arch_opt_value) String(74kx) Value(70) Canonical +Enum(mips_arch_opt_value) String(74kf1_1) Value(71) Canonical EnumValue -Enum(mips_arch_opt_value) String(r74kx) Value(70) +Enum(mips_arch_opt_value) String(r74kf1_1) Value(71) EnumValue -Enum(mips_arch_opt_value) String(74kf3_2) Value(71) Canonical +Enum(mips_arch_opt_value) String(74kfx) Value(72) Canonical EnumValue -Enum(mips_arch_opt_value) String(r74kf3_2) Value(71) +Enum(mips_arch_opt_value) String(r74kfx) Value(72) EnumValue -Enum(mips_arch_opt_value) String(1004kc) Value(72) Canonical +Enum(mips_arch_opt_value) String(74kx) Value(73) Canonical EnumValue -Enum(mips_arch_opt_value) String(r1004kc) Value(72) +Enum(mips_arch_opt_value) String(r74kx) Value(73) EnumValue -Enum(mips_arch_opt_value) String(1004kf2_1) Value(73) Canonical +Enum(mips_arch_opt_value) String(74kf3_2) Value(74) Canonical EnumValue -Enum(mips_arch_opt_value) String(r1004kf2_1) Value(73) +Enum(mips_arch_opt_value) String(r74kf3_2) Value(74) EnumValue -Enum(mips_arch_opt_value) String(1004kf) Value(74) Canonical +Enum(mips_arch_opt_value) String(1004kc) Value(75) Canonical EnumValue -Enum(mips_arch_opt_value) String(r1004kf) Value(74) +Enum(mips_arch_opt_value) String(r1004kc) Value(75) EnumValue -Enum(mips_arch_opt_value) String(1004kf1_1) Value(75) Canonical +Enum(mips_arch_opt_value) String(1004kf2_1) Value(76) Canonical EnumValue -Enum(mips_arch_opt_value) String(r1004kf1_1) Value(75) +Enum(mips_arch_opt_value) String(r1004kf2_1) Value(76) EnumValue -Enum(mips_arch_opt_value) String(5kc) Value(76) Canonical +Enum(mips_arch_opt_value) String(1004kf) Value(77) Canonical EnumValue -Enum(mips_arch_opt_value) String(r5kc) Value(76) +Enum(mips_arch_opt_value) String(r1004kf) Value(77) EnumValue -Enum(mips_arch_opt_value) String(5kf) Value(77) Canonical +Enum(mips_arch_opt_value) String(1004kf1_1) Value(78) Canonical EnumValue -Enum(mips_arch_opt_value) String(r5kf) Value(77) +Enum(mips_arch_opt_value) String(r1004kf1_1) Value(78) EnumValue -Enum(mips_arch_opt_value) String(20kc) Value(78) Canonical +Enum(mips_arch_opt_value) String(5kc) Value(79) Canonical EnumValue -Enum(mips_arch_opt_value) String(r20kc) Value(78) +Enum(mips_arch_opt_value) String(r5kc) Value(79) EnumValue -Enum(mips_arch_opt_value) String(sb1) Value(79) Canonical +Enum(mips_arch_opt_value) String(5kf) Value(80) Canonical EnumValue -Enum(mips_arch_opt_value) String(sb1a) Value(80) Canonical +Enum(mips_arch_opt_value) String(r5kf) Value(80) EnumValue -Enum(mips_arch_opt_value) String(sr71000) Value(81) Canonical +Enum(mips_arch_opt_value) String(20kc) Value(81) Canonical EnumValue -Enum(mips_arch_opt_value) String(sr71k) Value(81) +Enum(mips_arch_opt_value) String(r20kc) Value(81) EnumValue -Enum(mips_arch_opt_value) String(xlr) Value(82) Canonical +Enum(mips_arch_opt_value) String(sb1) Value(82) Canonical EnumValue -Enum(mips_arch_opt_value) String(loongson3a) Value(83) Canonical +Enum(mips_arch_opt_value) String(sb1a) Value(83) Canonical EnumValue -Enum(mips_arch_opt_value) String(octeon) Value(84) Canonical +Enum(mips_arch_opt_value) String(sr71000) Value(84) Canonical EnumValue -Enum(mips_arch_opt_value) String(octeon+) Value(85) Canonical +Enum(mips_arch_opt_value) String(sr71k) Value(84) EnumValue -Enum(mips_arch_opt_value) String(octeon2) Value(86) Canonical +Enum(mips_arch_opt_value) String(xlr) Value(85) Canonical EnumValue -Enum(mips_arch_opt_value) String(xlp) Value(87) Canonical +Enum(mips_arch_opt_value) String(loongson3a) Value(86) Canonical + +EnumValue +Enum(mips_arch_opt_value) String(octeon) Value(87) Canonical + +EnumValue +Enum(mips_arch_opt_value) String(octeon+) Value(88) Canonical + +EnumValue +Enum(mips_arch_opt_value) String(octeon2) Value(89) Canonical + +EnumValue +Enum(mips_arch_opt_value) String(xlp) Value(90) Canonical diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 1f2774638fc..bd1d10b0e4e 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -43,7 +43,7 @@ along with GCC; see the file COPYING3. If not see #include "tm_p.h" #include "ggc.h" #include "gstab.h" -#include "hashtab.h" +#include "hash-table.h" #include "debug.h" #include "target.h" #include "target-def.h" @@ -1029,6 +1029,19 @@ static const struct mips_rtx_cost_data 1, /* branch_cost */ 4 /* memory_latency */ }, + { /* R5900 */ + COSTS_N_INSNS (4), /* fp_add */ + COSTS_N_INSNS (4), /* fp_mult_sf */ + COSTS_N_INSNS (256), /* fp_mult_df */ + COSTS_N_INSNS (8), /* fp_div_sf */ + COSTS_N_INSNS (256), /* fp_div_df */ + COSTS_N_INSNS (4), /* int_mult_si */ + COSTS_N_INSNS (256), /* int_mult_di */ + COSTS_N_INSNS (37), /* int_div_si */ + COSTS_N_INSNS (256), /* int_div_di */ + 1, /* branch_cost */ + 4 /* memory_latency */ + }, { /* R7000 */ /* The only costs that are changed here are integer multiplication. */ @@ -1426,6 +1439,16 @@ mips_merge_decl_attributes (tree olddecl, tree newdecl) return merge_attributes (DECL_ATTRIBUTES (olddecl), DECL_ATTRIBUTES (newdecl)); } + +/* Implement TARGET_CAN_INLINE_P. */ + +static bool +mips_can_inline_p (tree caller, tree callee) +{ + if (mips_get_compress_mode (callee) != mips_get_compress_mode (caller)) + return false; + return default_target_can_inline_p (caller, callee); +} /* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR and *OFFSET_PTR. Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise. */ @@ -12440,7 +12463,10 @@ mips_start_ll_sc_sync_block (void) if (!ISA_HAS_LL_SC) { output_asm_insn (".set\tpush", 0); - output_asm_insn (".set\tmips2", 0); + if (TARGET_64BIT) + output_asm_insn (".set\tmips3", 0); + else + output_asm_insn (".set\tmips2", 0); } } @@ -12995,6 +13021,7 @@ mips_issue_rate (void) case PROCESSOR_R4130: case PROCESSOR_R5400: case PROCESSOR_R5500: + case PROCESSOR_R5900: case PROCESSOR_R7000: case PROCESSOR_R9000: case PROCESSOR_OCTEON: @@ -15796,30 +15823,43 @@ mips_hash_base (rtx base) return hash_rtx (base, GET_MODE (base), &do_not_record_p, NULL, false); } +/* Hashtable helpers. */ + +struct mips_lo_sum_offset_hasher : typed_free_remove <mips_lo_sum_offset> +{ + typedef mips_lo_sum_offset value_type; + typedef rtx_def compare_type; + static inline hashval_t hash (const value_type *); + static inline bool equal (const value_type *, const compare_type *); +}; + /* Hash-table callbacks for mips_lo_sum_offsets. */ -static hashval_t -mips_lo_sum_offset_hash (const void *entry) +inline hashval_t +mips_lo_sum_offset_hasher::hash (const value_type *entry) { - return mips_hash_base (((const struct mips_lo_sum_offset *) entry)->base); + return mips_hash_base (entry->base); } -static int -mips_lo_sum_offset_eq (const void *entry, const void *value) +inline bool +mips_lo_sum_offset_hasher::equal (const value_type *entry, + const compare_type *value) { - return rtx_equal_p (((const struct mips_lo_sum_offset *) entry)->base, - (const_rtx) value); + return rtx_equal_p (entry->base, value); } +typedef hash_table <mips_lo_sum_offset_hasher> mips_offset_table; + /* Look up symbolic constant X in HTAB, which is a hash table of mips_lo_sum_offsets. If OPTION is NO_INSERT, return true if X can be paired with a recorded LO_SUM, otherwise record X in the table. */ static bool -mips_lo_sum_offset_lookup (htab_t htab, rtx x, enum insert_option option) +mips_lo_sum_offset_lookup (mips_offset_table htab, rtx x, + enum insert_option option) { rtx base, offset; - void **slot; + mips_lo_sum_offset **slot; struct mips_lo_sum_offset *entry; /* Split X into a base and offset. */ @@ -15828,7 +15868,7 @@ mips_lo_sum_offset_lookup (htab_t htab, rtx x, enum insert_option option) base = UNSPEC_ADDRESS (base); /* Look up the base in the hash table. */ - slot = htab_find_slot_with_hash (htab, base, mips_hash_base (base), option); + slot = htab.find_slot_with_hash (base, mips_hash_base (base), option); if (slot == NULL) return false; @@ -15858,7 +15898,8 @@ static int mips_record_lo_sum (rtx *loc, void *data) { if (GET_CODE (*loc) == LO_SUM) - mips_lo_sum_offset_lookup ((htab_t) data, XEXP (*loc, 1), INSERT); + mips_lo_sum_offset_lookup (*(mips_offset_table*) data, + XEXP (*loc, 1), INSERT); return 0; } @@ -15867,7 +15908,7 @@ mips_record_lo_sum (rtx *loc, void *data) LO_SUMs in the current function. */ static bool -mips_orphaned_high_part_p (htab_t htab, rtx insn) +mips_orphaned_high_part_p (mips_offset_table htab, rtx insn) { enum mips_symbol_type type; rtx x, set; @@ -15975,7 +16016,7 @@ mips_reorg_process_insns (void) { rtx insn, last_insn, subinsn, next_insn, lo_reg, delayed_reg; int hilo_delay; - htab_t htab; + mips_offset_table htab; /* Force all instructions to be split into their final form. */ split_all_insns_noflow (); @@ -16001,8 +16042,9 @@ mips_reorg_process_insns (void) cfun->machine->all_noreorder_p = false; /* Code compiled with -mfix-vr4120 or -mfix-24k can't be all noreorder - because we rely on the assembler to work around some errata. */ - if (TARGET_FIX_VR4120 || TARGET_FIX_24K) + because we rely on the assembler to work around some errata. + The r5900 too has several bugs. */ + if (TARGET_FIX_VR4120 || TARGET_FIX_24K || TARGET_MIPS5900) cfun->machine->all_noreorder_p = false; /* The same is true for -mfix-vr4130 if we might generate MFLO or @@ -16012,14 +16054,13 @@ mips_reorg_process_insns (void) if (TARGET_FIX_VR4130 && !ISA_HAS_MACCHI) cfun->machine->all_noreorder_p = false; - htab = htab_create (37, mips_lo_sum_offset_hash, - mips_lo_sum_offset_eq, free); + htab.create (37); /* Make a first pass over the instructions, recording all the LO_SUMs. */ for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn)) FOR_EACH_SUBINSN (subinsn, insn) if (USEFUL_INSN_P (subinsn)) - for_each_rtx (&PATTERN (subinsn), mips_record_lo_sum, htab); + for_each_rtx (&PATTERN (subinsn), mips_record_lo_sum, &htab); last_insn = 0; hilo_delay = 2; @@ -16076,7 +16117,7 @@ mips_reorg_process_insns (void) } } - htab_delete (htab); + htab.dispose (); } /* Return true if the function has a long branch instruction. */ @@ -18600,6 +18641,8 @@ mips_expand_vec_minmax (rtx target, rtx op0, rtx op1, #define TARGET_INSERT_ATTRIBUTES mips_insert_attributes #undef TARGET_MERGE_DECL_ATTRIBUTES #define TARGET_MERGE_DECL_ATTRIBUTES mips_merge_decl_attributes +#undef TARGET_CAN_INLINE_P +#define TARGET_CAN_INLINE_P mips_can_inline_p #undef TARGET_SET_CURRENT_FUNCTION #define TARGET_SET_CURRENT_FUNCTION mips_set_current_function diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index 50a030f7f2c..d775a8c940b 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -222,6 +222,7 @@ struct mips_cpu_info { #define TARGET_MIPS4130 (mips_arch == PROCESSOR_R4130) #define TARGET_MIPS5400 (mips_arch == PROCESSOR_R5400) #define TARGET_MIPS5500 (mips_arch == PROCESSOR_R5500) +#define TARGET_MIPS5900 (mips_arch == PROCESSOR_R5900) #define TARGET_MIPS7000 (mips_arch == PROCESSOR_R7000) #define TARGET_MIPS9000 (mips_arch == PROCESSOR_R9000) #define TARGET_OCTEON (mips_arch == PROCESSOR_OCTEON \ @@ -399,6 +400,9 @@ struct mips_cpu_info { if (TARGET_MCU) \ builtin_define ("__mips_mcu"); \ \ + if (TARGET_EVA) \ + builtin_define ("__mips_eva"); \ + \ if (TARGET_DSP) \ { \ builtin_define ("__mips_dsp"); \ @@ -614,39 +618,25 @@ struct mips_cpu_info { #endif #ifndef MULTILIB_ISA_DEFAULT -# if MIPS_ISA_DEFAULT == 1 -# define MULTILIB_ISA_DEFAULT "mips1" -# else -# if MIPS_ISA_DEFAULT == 2 -# define MULTILIB_ISA_DEFAULT "mips2" -# else -# if MIPS_ISA_DEFAULT == 3 -# define MULTILIB_ISA_DEFAULT "mips3" -# else -# if MIPS_ISA_DEFAULT == 4 -# define MULTILIB_ISA_DEFAULT "mips4" -# else -# if MIPS_ISA_DEFAULT == 32 -# define MULTILIB_ISA_DEFAULT "mips32" -# else -# if MIPS_ISA_DEFAULT == 33 -# define MULTILIB_ISA_DEFAULT "mips32r2" -# else -# if MIPS_ISA_DEFAULT == 64 -# define MULTILIB_ISA_DEFAULT "mips64" -# else -# if MIPS_ISA_DEFAULT == 65 -# define MULTILIB_ISA_DEFAULT "mips64r2" -# else -# define MULTILIB_ISA_DEFAULT "mips1" -# endif -# endif -# endif -# endif -# endif -# endif -# endif -# endif +#if MIPS_ISA_DEFAULT == 1 +#define MULTILIB_ISA_DEFAULT "mips1" +#elif MIPS_ISA_DEFAULT == 2 +#define MULTILIB_ISA_DEFAULT "mips2" +#elif MIPS_ISA_DEFAULT == 3 +#define MULTILIB_ISA_DEFAULT "mips3" +#elif MIPS_ISA_DEFAULT == 4 +#define MULTILIB_ISA_DEFAULT "mips4" +#elif MIPS_ISA_DEFAULT == 32 +#define MULTILIB_ISA_DEFAULT "mips32" +#elif MIPS_ISA_DEFAULT == 33 +#define MULTILIB_ISA_DEFAULT "mips32r2" +#elif MIPS_ISA_DEFAULT == 64 +#define MULTILIB_ISA_DEFAULT "mips64" +#elif MIPS_ISA_DEFAULT == 65 +#define MULTILIB_ISA_DEFAULT "mips64r2" +#else +#define MULTILIB_ISA_DEFAULT "mips1" +#endif #endif #ifndef MIPS_ABI_DEFAULT @@ -657,21 +647,13 @@ struct mips_cpu_info { #if MIPS_ABI_DEFAULT == ABI_32 #define MULTILIB_ABI_DEFAULT "mabi=32" -#endif - -#if MIPS_ABI_DEFAULT == ABI_O64 +#elif MIPS_ABI_DEFAULT == ABI_O64 #define MULTILIB_ABI_DEFAULT "mabi=o64" -#endif - -#if MIPS_ABI_DEFAULT == ABI_N32 +#elif MIPS_ABI_DEFAULT == ABI_N32 #define MULTILIB_ABI_DEFAULT "mabi=n32" -#endif - -#if MIPS_ABI_DEFAULT == ABI_64 +#elif MIPS_ABI_DEFAULT == ABI_64 #define MULTILIB_ABI_DEFAULT "mabi=64" -#endif - -#if MIPS_ABI_DEFAULT == ABI_EABI +#elif MIPS_ABI_DEFAULT == ABI_EABI #define MULTILIB_ABI_DEFAULT "mabi=eabi" #endif @@ -743,9 +725,9 @@ struct mips_cpu_info { #define MIPS_ISA_SYNCI_SPEC \ "%{msynci|mno-synci:;:%{mips32r2|mips64r2:-msynci;:-mno-synci}}" -#if MIPS_ABI_DEFAULT == ABI_O64 \ - || MIPS_ABI_DEFAULT == ABI_N32 \ - || MIPS_ABI_DEFAULT == ABI_64 +#if (MIPS_ABI_DEFAULT == ABI_O64 \ + || MIPS_ABI_DEFAULT == ABI_N32 \ + || MIPS_ABI_DEFAULT == ABI_64) #define OPT_ARCH64 "mabi=32|mgp32:;" #define OPT_ARCH32 "mabi=32|mgp32" #else @@ -781,7 +763,7 @@ struct mips_cpu_info { #define BASE_DRIVER_SELF_SPECS \ "%{!mno-dsp: \ %{march=24ke*|march=34kc*|march=34kf*|march=34kx*|march=1004k*: -mdsp} \ - %{march=74k*:%{!mno-dspr2: -mdspr2 -mdsp}}}" + %{march=74k*|march=m14ke*: %{!mno-dspr2: -mdspr2 -mdsp}}}" #define DRIVER_SELF_SPECS BASE_DRIVER_SELF_SPECS @@ -825,6 +807,7 @@ struct mips_cpu_info { #define ISA_HAS_MUL3 ((TARGET_MIPS3900 \ || TARGET_MIPS5400 \ || TARGET_MIPS5500 \ + || TARGET_MIPS5900 \ || TARGET_MIPS7000 \ || TARGET_MIPS9000 \ || TARGET_MAD \ @@ -839,6 +822,26 @@ struct mips_cpu_info { && TARGET_OCTEON \ && !TARGET_MIPS16) +/* ISA supports instructions DMULT and DMULTU. */ +#define ISA_HAS_DMULT (TARGET_64BIT && !TARGET_MIPS5900) + +/* ISA supports instructions MULT and MULTU. + This is always true, but the macro is needed for ISA_HAS_<D>MULT + in mips.md. */ +#define ISA_HAS_MULT (1) + +/* ISA supports instructions DDIV and DDIVU. */ +#define ISA_HAS_DDIV (TARGET_64BIT && !TARGET_MIPS5900) + +/* ISA supports instructions DIV and DIVU. + This is always true, but the macro is needed for ISA_HAS_<D>DIV + in mips.md. */ +#define ISA_HAS_DIV (1) + +#define ISA_HAS_DIV3 ((TARGET_LOONGSON_2EF \ + || TARGET_LOONGSON_3A) \ + && !TARGET_MIPS16) + /* ISA has the floating-point conditional move instructions introduced in mips4. */ #define ISA_HAS_FP_CONDMOVE ((ISA_MIPS4 \ @@ -851,7 +854,9 @@ struct mips_cpu_info { /* ISA has the integer conditional move instructions introduced in mips4 and ST Loongson 2E/2F. */ -#define ISA_HAS_CONDMOVE (ISA_HAS_FP_CONDMOVE || TARGET_LOONGSON_2EF) +#define ISA_HAS_CONDMOVE (ISA_HAS_FP_CONDMOVE \ + || TARGET_MIPS5900 \ + || TARGET_LOONGSON_2EF) /* ISA has LDC1 and SDC1. */ #define ISA_HAS_LDC1_SDC1 (!ISA_MIPS1 && !TARGET_MIPS16) @@ -964,6 +969,7 @@ struct mips_cpu_info { /* ISA has data prefetch instructions. This controls use of 'pref'. */ #define ISA_HAS_PREFETCH ((ISA_MIPS4 \ || TARGET_LOONGSON_2EF \ + || TARGET_MIPS5900 \ || ISA_MIPS32 \ || ISA_MIPS32R2 \ || ISA_MIPS64 \ @@ -1025,15 +1031,18 @@ struct mips_cpu_info { and "addiu $4,$4,1". */ #define ISA_HAS_LOAD_DELAY (ISA_MIPS1 \ && !TARGET_MIPS3900 \ + && !TARGET_MIPS5900 \ && !TARGET_MIPS16 \ && !TARGET_MICROMIPS) /* Likewise mtc1 and mfc1. */ #define ISA_HAS_XFER_DELAY (mips_isa <= 3 \ + && !TARGET_MIPS5900 \ && !TARGET_LOONGSON_2EF) /* Likewise floating-point comparisons. */ #define ISA_HAS_FCMP_DELAY (mips_isa <= 3 \ + && !TARGET_MIPS5900 \ && !TARGET_LOONGSON_2EF) /* True if mflo and mfhi can be immediately followed by instructions @@ -1053,6 +1062,7 @@ struct mips_cpu_info { || ISA_MIPS64 \ || ISA_MIPS64R2 \ || TARGET_MIPS5500 \ + || TARGET_MIPS5900 \ || TARGET_LOONGSON_2EF) /* ISA includes synci, jr.hb and jalr.hb. */ @@ -1070,7 +1080,7 @@ struct mips_cpu_info { /* ISA includes ll and sc. Note that this implies ISA_HAS_SYNC because the expanders use both ISA_HAS_SYNC and ISA_HAS_LL_SC instructions. */ -#define ISA_HAS_LL_SC (mips_isa >= 2 && !TARGET_MIPS16) +#define ISA_HAS_LL_SC (mips_isa >= 2 && !TARGET_MIPS5900 && !TARGET_MIPS16) #define GENERATE_LL_SC \ (target_flags_explicit & MASK_LLSC \ ? TARGET_LLSC && !TARGET_MIPS16 \ @@ -1143,6 +1153,7 @@ struct mips_cpu_info { %{mdsp} %{mno-dsp} \ %{mdspr2} %{mno-dspr2} \ %{mmcu} %{mno-mcu} \ +%{meva} %{mno-eva} \ %{msmartmips} %{mno-smartmips} \ %{mmt} %{mno-mt} \ %{mfix-vr4120} %{mfix-vr4130} \ @@ -1361,8 +1372,8 @@ struct mips_cpu_info { #define MAX_FIXED_MODE_SIZE LONG_DOUBLE_TYPE_SIZE #ifdef IN_LIBGCC2 -#if (defined _ABIN32 && _MIPS_SIM == _ABIN32) \ - || (defined _ABI64 && _MIPS_SIM == _ABI64) +#if ((defined _ABIN32 && _MIPS_SIM == _ABIN32) \ + || (defined _ABI64 && _MIPS_SIM == _ABI64)) # define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128 # else # define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 @@ -2868,9 +2879,8 @@ while (0) jal " USER_LABEL_PREFIX #FUNC "\n\ .set pop\n\ " TEXT_SECTION_ASM_OP); -#endif /* Switch to #elif when we're no longer limited by K&R C. */ -#if (defined _ABIN32 && _MIPS_SIM == _ABIN32) \ - || (defined _ABI64 && _MIPS_SIM == _ABI64) +#elif ((defined _ABIN32 && _MIPS_SIM == _ABIN32) \ + || (defined _ABI64 && _MIPS_SIM == _ABI64)) #define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ asm (SECTION_OP "\n\ .set push\n\ diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index 7284e5f3384..b832dda27f0 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -55,6 +55,7 @@ r5000 r5400 r5500 + r5900 r7000 r8000 r9000 @@ -406,8 +407,12 @@ ;; Is this an extended instruction in mips16 mode? (define_attr "extended_mips16" "no,yes" - (if_then_else (ior (eq_attr "move_type" "sll0") - (eq_attr "jal" "direct")) + (if_then_else (ior ;; In general, constant-pool loads are extended + ;; instructions. We don't yet optimize for 16-bit + ;; PC-relative references. + (eq_attr "move_type" "sll0,loadpool") + (eq_attr "jal" "direct") + (eq_attr "got" "load")) (const_string "yes") (const_string "no"))) @@ -420,14 +425,89 @@ (match_test "TARGET_MICROMIPS"))) (const_string "yes") (const_string "no"))) - -;; Length of instruction in bytes. -(define_attr "length" "" - (cond [(and (eq_attr "extended_mips16" "yes") - (match_test "TARGET_MIPS16")) - (const_int 4) - (and (eq_attr "compression" "micromips,all") +;; The number of individual instructions that a non-branch pattern generates, +;; using units of BASE_INSN_LENGTH. +(define_attr "insn_count" "" + (cond [;; "Ghost" instructions occupy no space. + (eq_attr "type" "ghost") + (const_int 0) + + ;; Extended instructions count as 2. + (and (eq_attr "extended_mips16" "yes") + (match_test "TARGET_MIPS16")) + (const_int 2) + + ;; A GOT load followed by an add of $gp. This is not used for MIPS16. + (eq_attr "got" "xgot_high") + (const_int 2) + + ;; SHIFT_SHIFTs are decomposed into two separate instructions. + ;; They are extended instructions on MIPS16 targets. + (eq_attr "move_type" "shift_shift") + (if_then_else (match_test "TARGET_MIPS16") + (const_int 4) + (const_int 2)) + + ;; Check for doubleword moves that are decomposed into two + ;; instructions. The individual instructions are unextended + ;; MIPS16 ones. + (and (eq_attr "move_type" "mtc,mfc,mtlo,mflo,move") + (eq_attr "dword_mode" "yes")) + (const_int 2) + + ;; Constants, loads and stores are handled by external routines. + (and (eq_attr "move_type" "const,constN") + (eq_attr "dword_mode" "yes")) + (symbol_ref "mips_split_const_insns (operands[1])") + (eq_attr "move_type" "const,constN") + (symbol_ref "mips_const_insns (operands[1])") + (eq_attr "move_type" "load,fpload") + (symbol_ref "mips_load_store_insns (operands[1], insn)") + (eq_attr "move_type" "store,fpstore") + (symbol_ref "mips_load_store_insns (operands[0], insn) + + (TARGET_FIX_24K ? 1 : 0)") + + ;; In the worst case, a call macro will take 8 instructions: + ;; + ;; lui $25,%call_hi(FOO) + ;; addu $25,$25,$28 + ;; lw $25,%call_lo(FOO)($25) + ;; nop + ;; jalr $25 + ;; nop + ;; lw $gp,X($sp) + ;; nop + (eq_attr "jal_macro" "yes") + (const_int 8) + + ;; Various VR4120 errata require a nop to be inserted after a macc + ;; instruction. The assembler does this for us, so account for + ;; the worst-case length here. + (and (eq_attr "type" "imadd") + (match_test "TARGET_FIX_VR4120")) + (const_int 2) + + ;; VR4120 errata MD(4): if there are consecutive dmult instructions, + ;; the result of the second one is missed. The assembler should work + ;; around this by inserting a nop after the first dmult. + (and (eq_attr "type" "imul,imul3") + (eq_attr "mode" "DI") + (match_test "TARGET_FIX_VR4120")) + (const_int 2) + + (eq_attr "type" "idiv,idiv3") + (symbol_ref "mips_idiv_insns ()") + + (not (eq_attr "sync_mem" "none")) + (symbol_ref "mips_sync_loop_insns (insn, operands)")] + (const_int 1))) + +;; Length of instruction in bytes. The default is derived from "insn_count", +;; but there are special cases for branches (which must be handled here) +;; and for compressed single instructions. +(define_attr "length" "" + (cond [(and (eq_attr "compression" "micromips,all") (eq_attr "dword_mode" "no") (match_test "TARGET_MICROMIPS")) (const_int 2) @@ -580,95 +660,8 @@ (const_int 20) (match_test "Pmode == SImode") (const_int 16) - ] (const_int 24)) - - ;; "Ghost" instructions occupy no space. - (eq_attr "type" "ghost") - (const_int 0) - - ;; GOT loads are extended MIPS16 instructions and 4-byte - ;; microMIPS instructions. - (eq_attr "got" "load") - (const_int 4) - - ;; A GOT load followed by an add of $gp. - (eq_attr "got" "xgot_high") - (const_int 8) - - ;; In general, constant-pool loads are extended instructions. - (eq_attr "move_type" "loadpool") - (const_int 4) - - ;; SHIFT_SHIFTs are decomposed into two separate instructions. - ;; They are extended instructions on MIPS16 targets. - (eq_attr "move_type" "shift_shift") - (const_int 8) - - ;; Check for doubleword moves that are decomposed into two - ;; instructions. The individual instructions are unextended - ;; MIPS16 ones or 2-byte microMIPS ones. - (and (eq_attr "move_type" "mtc,mfc,mtlo,mflo,move") - (eq_attr "dword_mode" "yes")) - (if_then_else (match_test "TARGET_COMPRESSION") - (const_int 4) - (const_int 8)) - - ;; Doubleword CONST{,N} moves are split into two word - ;; CONST{,N} moves. - (and (eq_attr "move_type" "const,constN") - (eq_attr "dword_mode" "yes")) - (symbol_ref "mips_split_const_insns (operands[1]) * BASE_INSN_LENGTH") - - ;; Otherwise, constants, loads and stores are handled by external - ;; routines. - (eq_attr "move_type" "const,constN") - (symbol_ref "mips_const_insns (operands[1]) * BASE_INSN_LENGTH") - (eq_attr "move_type" "load,fpload") - (symbol_ref "mips_load_store_insns (operands[1], insn) - * BASE_INSN_LENGTH") - (eq_attr "move_type" "store,fpstore") - (symbol_ref "mips_load_store_insns (operands[0], insn) - * BASE_INSN_LENGTH - + (TARGET_FIX_24K ? NOP_INSN_LENGTH : 0)") - - ;; In the worst case, a call macro will take 8 instructions: - ;; - ;; lui $25,%call_hi(FOO) - ;; addu $25,$25,$28 - ;; lw $25,%call_lo(FOO)($25) - ;; nop - ;; jalr $25 - ;; nop - ;; lw $gp,X($sp) - ;; nop - (eq_attr "jal_macro" "yes") - (const_int 32) - - ;; Various VR4120 errata require a nop to be inserted after a macc - ;; instruction. The assembler does this for us, so account for - ;; the worst-case length here. - (and (eq_attr "type" "imadd") - (match_test "TARGET_FIX_VR4120")) - (const_int 8) - - ;; VR4120 errata MD(4): if there are consecutive dmult instructions, - ;; the result of the second one is missed. The assembler should work - ;; around this by inserting a nop after the first dmult. - (and (eq_attr "type" "imul,imul3") - (and (eq_attr "mode" "DI") - (match_test "TARGET_FIX_VR4120"))) - (const_int 8) - - (eq_attr "type" "idiv,idiv3") - (symbol_ref "mips_idiv_insns () * BASE_INSN_LENGTH") - - (not (eq_attr "sync_mem" "none")) - (symbol_ref "mips_sync_loop_insns (insn, operands) - * BASE_INSN_LENGTH") - - (match_test "TARGET_MIPS16") - (const_int 2) - ] (const_int 4))) + ] (const_int 24))] + (symbol_ref "get_attr_insn_count (insn) * BASE_INSN_LENGTH"))) ;; Attribute describing the processor. (define_enum_attr "cpu" "processor" @@ -701,16 +694,11 @@ (const_string "hilo")] (const_string "none"))) -;; Is it a single instruction? -(define_attr "single_insn" "no,yes" - (symbol_ref "(get_attr_length (insn) == (TARGET_MIPS16 ? 2 : 4) - ? SINGLE_INSN_YES : SINGLE_INSN_NO)")) - ;; Can the instruction be put into a delay slot? (define_attr "can_delay" "no,yes" (if_then_else (and (eq_attr "type" "!branch,call,jump") - (and (eq_attr "hazard" "none") - (eq_attr "single_insn" "yes"))) + (eq_attr "hazard" "none") + (match_test "get_attr_insn_count (insn) == 1")) (const_string "yes") (const_string "no"))) @@ -755,7 +743,9 @@ ;; This mode iterator allows :MOVECC to be used anywhere that a ;; conditional-move-type condition is needed. (define_mode_iterator MOVECC [SI (DI "TARGET_64BIT") - (CC "TARGET_HARD_FLOAT && !TARGET_LOONGSON_2EF")]) + (CC "TARGET_HARD_FLOAT + && !TARGET_LOONGSON_2EF + && !TARGET_MIPS5900")]) ;; 32-bit integer moves for which we provide move patterns. (define_mode_iterator IMOVE32 @@ -1417,7 +1407,7 @@ "mul.<fmt>\t%0,%1,%2\;nop" [(set_attr "type" "fmul") (set_attr "mode" "<MODE>") - (set_attr "length" "8")]) + (set_attr "insn_count" "2")]) (define_insn "mulv2sf3" [(set (match_operand:V2SF 0 "register_operand" "=f") @@ -1478,7 +1468,7 @@ [(set (match_operand:GPR 0 "register_operand") (mult:GPR (match_operand:GPR 1 "register_operand") (match_operand:GPR 2 "register_operand")))] - "" + "ISA_HAS_<D>MULT" { rtx lo; @@ -1524,7 +1514,7 @@ { if (which_alternative == 1) return "<d>mult\t%1,%2"; - if (<MODE>mode == SImode && TARGET_MIPS3900) + if (<MODE>mode == SImode && (TARGET_MIPS3900 || TARGET_MIPS5900)) return "mult\t%0,%1,%2"; return "<d>mul\t%0,%1,%2"; } @@ -1558,7 +1548,7 @@ [(set (match_operand:GPR 0 "muldiv_target_operand" "=l") (mult:GPR (match_operand:GPR 1 "register_operand" "d") (match_operand:GPR 2 "register_operand" "d")))] - "!TARGET_FIX_R4000" + "ISA_HAS_<D>MULT && !TARGET_FIX_R4000" "<d>mult\t%1,%2" [(set_attr "type" "imul") (set_attr "mode" "<MODE>")]) @@ -1568,11 +1558,11 @@ (mult:GPR (match_operand:GPR 1 "register_operand" "d") (match_operand:GPR 2 "register_operand" "d"))) (clobber (match_scratch:GPR 3 "=l"))] - "TARGET_FIX_R4000" + "ISA_HAS_<D>MULT && TARGET_FIX_R4000" "<d>mult\t%1,%2\;mflo\t%0" [(set_attr "type" "imul") (set_attr "mode" "<MODE>") - (set_attr "length" "8")]) + (set_attr "insn_count" "2")]) ;; On the VR4120 and VR4130, it is better to use "mtlo $0; macc" instead ;; of "mult; mflo". They have the same latency, but the first form gives @@ -1632,7 +1622,7 @@ [(set_attr "type" "imadd") (set_attr "accum_in" "3") (set_attr "mode" "SI") - (set_attr "length" "4,8")]) + (set_attr "insn_count" "1,2")]) ;; The same idea applies here. The middle alternative needs one less ;; clobber than the final alternative, so we add "*?" as a counterweight. @@ -1651,7 +1641,7 @@ [(set_attr "type" "imadd") (set_attr "accum_in" "3") (set_attr "mode" "SI") - (set_attr "length" "4,4,8")]) + (set_attr "insn_count" "1,1,2")]) ;; Split *mul_acc_si if both the source and destination accumulator ;; values are GPRs. @@ -1732,7 +1722,7 @@ "" [(set_attr "type" "imadd") (set_attr "accum_in" "1") - (set_attr "length" "8")]) + (set_attr "insn_count" "2")]) ;; Patterns generated by the define_peephole2 below. @@ -1868,7 +1858,7 @@ [(set_attr "type" "imadd") (set_attr "accum_in" "1") (set_attr "mode" "SI") - (set_attr "length" "4,8")]) + (set_attr "insn_count" "1,2")]) ;; Split *mul_sub_si if both the source and destination accumulator ;; values are GPRs. @@ -1949,7 +1939,7 @@ "mult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0" [(set_attr "type" "imul") (set_attr "mode" "SI") - (set_attr "length" "12")]) + (set_attr "insn_count" "3")]) (define_insn_and_split "<u>mulsidi3_64bit" [(set (match_operand:DI 0 "register_operand" "=d") @@ -1968,10 +1958,10 @@ } [(set_attr "type" "imul") (set_attr "mode" "SI") - (set (attr "length") + (set (attr "insn_count") (if_then_else (match_test "ISA_HAS_EXT_INS") - (const_int 16) - (const_int 28)))]) + (const_int 4) + (const_int 7)))]) (define_expand "<u>mulsidi3_64bit_mips16" [(set (match_operand:DI 0 "register_operand") @@ -2035,7 +2025,7 @@ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d")) (sign_extend:DI (match_operand:SI 2 "register_operand" "d")))) (clobber (match_scratch:DI 3 "=l"))] - "TARGET_64BIT && ISA_HAS_DMUL3" + "ISA_HAS_DMUL3" "dmul\t%0,%1,%2" [(set_attr "type" "imul3") (set_attr "mode" "DI")]) @@ -2122,7 +2112,7 @@ } [(set_attr "type" "imul") (set_attr "mode" "SI") - (set_attr "length" "8")]) + (set_attr "insn_count" "2")]) (define_expand "<su>mulsi3_highpart_split" [(set (match_operand:SI 0 "register_operand") @@ -2189,7 +2179,7 @@ (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand")) (any_extend:TI (match_operand:DI 2 "register_operand"))) (const_int 64))))] - "TARGET_64BIT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)" + "ISA_HAS_DMULT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)" { if (TARGET_MIPS16) emit_insn (gen_<su>muldi3_highpart_split (operands[0], operands[1], @@ -2208,7 +2198,7 @@ (any_extend:TI (match_operand:DI 2 "register_operand" "d"))) (const_int 64)))) (clobber (match_scratch:DI 3 "=l"))] - "TARGET_64BIT + "ISA_HAS_DMULT && !TARGET_MIPS16 && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)" { return TARGET_FIX_R4000 ? "dmult<u>\t%1,%2\n\tmfhi\t%0" : "#"; } @@ -2221,7 +2211,7 @@ } [(set_attr "type" "imul") (set_attr "mode" "DI") - (set_attr "length" "8")]) + (set_attr "insn_count" "2")]) (define_expand "<su>muldi3_highpart_split" [(set (match_operand:DI 0 "register_operand") @@ -2244,7 +2234,7 @@ [(set (match_operand:TI 0 "register_operand") (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand")) (any_extend:TI (match_operand:DI 2 "register_operand"))))] - "TARGET_64BIT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)" + "ISA_HAS_DMULT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)" { rtx hilo; @@ -2266,7 +2256,7 @@ [(set (match_operand:TI 0 "muldiv_target_operand" "=x") (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d")) (any_extend:TI (match_operand:DI 2 "register_operand" "d"))))] - "TARGET_64BIT + "ISA_HAS_DMULT && !TARGET_FIX_R4000 && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)" "dmult<u>\t%1,%2" @@ -2278,13 +2268,13 @@ (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d")) (any_extend:TI (match_operand:DI 2 "register_operand" "d")))) (clobber (match_scratch:TI 3 "=x"))] - "TARGET_64BIT + "ISA_HAS_DMULT && TARGET_FIX_R4000 && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)" "dmult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0" [(set_attr "type" "imul") (set_attr "mode" "DI") - (set_attr "length" "12")]) + (set_attr "insn_count" "3")]) ;; The R4650 supports a 32-bit multiply/ 64-bit accumulate ;; instruction. The HI/LO registers are used as a 64-bit accumulator. @@ -2535,10 +2525,10 @@ } [(set_attr "type" "fdiv") (set_attr "mode" "<UNITMODE>") - (set (attr "length") + (set (attr "insn_count") (if_then_else (match_test "TARGET_FIX_SB1") - (const_int 8) - (const_int 4)))]) + (const_int 2) + (const_int 1)))]) (define_insn "*recip<mode>3" [(set (match_operand:ANYF 0 "register_operand" "=f") @@ -2553,92 +2543,64 @@ } [(set_attr "type" "frdiv") (set_attr "mode" "<UNITMODE>") - (set (attr "length") + (set (attr "insn_count") (if_then_else (match_test "TARGET_FIX_SB1") - (const_int 8) - (const_int 4)))]) + (const_int 2) + (const_int 1)))]) ;; VR4120 errata MD(A1): signed division instructions do not work correctly ;; with negative operands. We use special libgcc functions instead. -(define_expand "divmod<mode>4" - [(set (match_operand:GPR 0 "register_operand") - (div:GPR (match_operand:GPR 1 "register_operand") - (match_operand:GPR 2 "register_operand"))) - (set (match_operand:GPR 3 "register_operand") - (mod:GPR (match_dup 1) - (match_dup 2)))] - "!TARGET_FIX_VR4120" -{ - if (TARGET_MIPS16) - { - emit_insn (gen_divmod<mode>4_split (operands[3], operands[1], - operands[2])); - emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM)); - } - else - emit_insn (gen_divmod<mode>4_internal (operands[0], operands[1], - operands[2], operands[3])); - DONE; -}) - -(define_insn_and_split "divmod<mode>4_internal" - [(set (match_operand:GPR 0 "muldiv_target_operand" "=l") +;; +;; Expand generates divmod instructions for individual division and modulus +;; operations. We then rely on CSE to reuse earlier divmods where possible. +;; This means that, when generating MIPS16 code, it is better not to expose +;; the fixed LO register until after CSE has finished. However, it's still +;; better to split before register allocation, so that we don't allocate +;; one of the scarce MIPS16 registers to an unused result. +(define_insn_and_split "divmod<mode>4" + [(set (match_operand:GPR 0 "register_operand" "=kl") (div:GPR (match_operand:GPR 1 "register_operand" "d") (match_operand:GPR 2 "register_operand" "d"))) (set (match_operand:GPR 3 "register_operand" "=d") (mod:GPR (match_dup 1) (match_dup 2)))] - "!TARGET_FIX_VR4120 && !TARGET_MIPS16" + "ISA_HAS_<D>DIV && !TARGET_FIX_VR4120" "#" - "&& reload_completed" + "&& ((TARGET_MIPS16 && cse_not_expected) || reload_completed)" [(const_int 0)] { emit_insn (gen_divmod<mode>4_split (operands[3], operands[1], operands[2])); + if (TARGET_MIPS16) + emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM)); DONE; } [(set_attr "type" "idiv") (set_attr "mode" "<MODE>") - (set_attr "length" "8")]) + ;; Worst case for MIPS16. + (set_attr "insn_count" "3")]) -(define_expand "udivmod<mode>4" - [(set (match_operand:GPR 0 "register_operand") - (udiv:GPR (match_operand:GPR 1 "register_operand") - (match_operand:GPR 2 "register_operand"))) - (set (match_operand:GPR 3 "register_operand") - (umod:GPR (match_dup 1) - (match_dup 2)))] - "" -{ - if (TARGET_MIPS16) - { - emit_insn (gen_udivmod<mode>4_split (operands[3], operands[1], - operands[2])); - emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM)); - } - else - emit_insn (gen_udivmod<mode>4_internal (operands[0], operands[1], - operands[2], operands[3])); - DONE; -}) - -(define_insn_and_split "udivmod<mode>4_internal" - [(set (match_operand:GPR 0 "muldiv_target_operand" "=l") +;; See the comment above "divmod<mode>4" for the MIPS16 handling. +(define_insn_and_split "udivmod<mode>4" + [(set (match_operand:GPR 0 "register_operand" "=kl") (udiv:GPR (match_operand:GPR 1 "register_operand" "d") (match_operand:GPR 2 "register_operand" "d"))) (set (match_operand:GPR 3 "register_operand" "=d") (umod:GPR (match_dup 1) (match_dup 2)))] - "!TARGET_MIPS16" + "ISA_HAS_<D>DIV" "#" - "reload_completed" + "(TARGET_MIPS16 && cse_not_expected) || reload_completed" [(const_int 0)] { emit_insn (gen_udivmod<mode>4_split (operands[3], operands[1], operands[2])); + if (TARGET_MIPS16) + emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM)); DONE; } - [(set_attr "type" "idiv") - (set_attr "mode" "<MODE>") - (set_attr "length" "8")]) + [(set_attr "type" "idiv") + (set_attr "mode" "<MODE>") + ;; Worst case for MIPS16. + (set_attr "insn_count" "3")]) (define_expand "<u>divmod<mode>4_split" [(set (match_operand:GPR 0 "register_operand") @@ -2671,7 +2633,7 @@ [(any_div:GPR (match_operand:GPR 1 "register_operand" "d") (match_operand:GPR 2 "register_operand" "d"))] UNSPEC_SET_HILO))] - "" + "ISA_HAS_<GPR:D>DIV" { return mips_output_division ("<GPR:d>div<u>\t%.,%1,%2", operands); } [(set_attr "type" "idiv") (set_attr "mode" "<GPR:MODE>")]) @@ -2698,10 +2660,10 @@ } [(set_attr "type" "fsqrt") (set_attr "mode" "<UNITMODE>") - (set (attr "length") + (set (attr "insn_count") (if_then_else (match_test "TARGET_FIX_SB1") - (const_int 8) - (const_int 4)))]) + (const_int 2) + (const_int 1)))]) (define_insn "*rsqrt<mode>a" [(set (match_operand:ANYF 0 "register_operand" "=f") @@ -2716,10 +2678,10 @@ } [(set_attr "type" "frsqrt") (set_attr "mode" "<UNITMODE>") - (set (attr "length") + (set (attr "insn_count") (if_then_else (match_test "TARGET_FIX_SB1") - (const_int 8) - (const_int 4)))]) + (const_int 2) + (const_int 1)))]) (define_insn "*rsqrt<mode>b" [(set (match_operand:ANYF 0 "register_operand" "=f") @@ -2734,10 +2696,10 @@ } [(set_attr "type" "frsqrt") (set_attr "mode" "<UNITMODE>") - (set (attr "length") + (set (attr "insn_count") (if_then_else (match_test "TARGET_FIX_SB1") - (const_int 8) - (const_int 4)))]) + (const_int 2) + (const_int 1)))]) ;; ;; .................... @@ -3530,7 +3492,7 @@ [(set_attr "type" "fcvt") (set_attr "mode" "DF") (set_attr "cnv_mode" "D2I") - (set_attr "length" "36")]) + (set_attr "insn_count" "9")]) (define_expand "fix_truncsfsi2" [(set (match_operand:SI 0 "register_operand") @@ -3567,7 +3529,7 @@ [(set_attr "type" "fcvt") (set_attr "mode" "SF") (set_attr "cnv_mode" "S2I") - (set_attr "length" "36")]) + (set_attr "insn_count" "9")]) (define_insn "fix_truncdfdi2" @@ -4045,7 +4007,7 @@ operands[2] = mips_unspec_address (operands[1], SYMBOL_64_HIGH); operands[3] = mips_unspec_address (operands[1], SYMBOL_64_MID); } - [(set_attr "length" "20")]) + [(set_attr "insn_count" "5")]) ;; Use a scratch register to reduce the latency of the above pattern ;; on superscalar machines. The optimized sequence is: @@ -4100,7 +4062,7 @@ operands[3] = mips_unspec_address (operands[1], SYMBOL_64_HIGH); operands[4] = mips_unspec_address (operands[1], SYMBOL_64_LOW); } - [(set_attr "length" "24")]) + [(set_attr "insn_count" "6")]) ;; Split HIGHs into: ;; @@ -5110,7 +5072,7 @@ return ".cprestore\t%1"; } [(set_attr "type" "store") - (set_attr "length" "4,12")]) + (set_attr "insn_count" "1,3")]) (define_insn "use_cprestore_<mode>" [(set (reg:P CPRESTORE_SLOT_REGNUM) @@ -5171,7 +5133,7 @@ "\tjr.hb\t$31\n" "\tnop%>%)"; } - [(set_attr "length" "20")]) + [(set_attr "insn_count" "5")]) ;; Cache operations for R4000-style caches. (define_insn "mips_cache" @@ -5364,8 +5326,7 @@ ;; not have and immediate). We recognize a shift of a load in order ;; to make it simple enough for combine to understand. ;; -;; The length here is the worst case: the length of the split version -;; will be more accurate. +;; The instruction count here is the worst case. (define_insn_and_split "" [(set (match_operand:SI 0 "register_operand" "=d") (lshiftrt:SI (match_operand:SI 1 "memory_operand" "m") @@ -5378,7 +5339,8 @@ "" [(set_attr "type" "load") (set_attr "mode" "SI") - (set_attr "length" "8")]) + (set (attr "insn_count") + (symbol_ref "mips_load_store_insns (operands[1], insn) + 2"))]) (define_insn "rotr<mode>3" [(set (match_operand:GPR 0 "register_operand" "=d") @@ -5986,7 +5948,7 @@ (clobber (reg:SI MIPS16_T_REGNUM))] "TARGET_MIPS16_SHORT_JUMP_TABLES" { - rtx diff_vec = PATTERN (next_real_insn (operands[2])); + rtx diff_vec = PATTERN (NEXT_INSN (operands[2])); gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); @@ -6017,7 +5979,7 @@ return "j\t%4"; } - [(set_attr "length" "32")]) + [(set_attr "insn_count" "16")]) ;; For TARGET_USE_GOT, we save the gp in the jmp_buf as well. ;; While it is possible to either pull it off the stack (in the @@ -6908,11 +6870,8 @@ (set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))] "" [(set_attr "type" "unknown") - ; Since rdhwr always generates a trap for now, putting it in a delay - ; slot would make the kernel's emulation of it much slower. - (set_attr "can_delay" "no") (set_attr "mode" "<MODE>") - (set_attr "length" "8")]) + (set_attr "insn_count" "2")]) (define_insn "*tls_get_tp_<mode>_split" [(set (reg:P TLS_GET_TP_REGNUM) @@ -6920,7 +6879,8 @@ "HAVE_AS_TLS && !TARGET_MIPS16" ".set\tpush\;.set\tmips32r2\t\;rdhwr\t$3,$29\;.set\tpop" [(set_attr "type" "unknown") - ; See tls_get_tp_<mode> + ; Since rdhwr always generates a trap for now, putting it in a delay + ; slot would make the kernel's emulation of it much slower. (set_attr "can_delay" "no") (set_attr "mode" "<MODE>")]) @@ -6952,7 +6912,7 @@ (set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))] "" [(set_attr "type" "multi") - (set_attr "length" "8") + (set_attr "insn_count" "4") (set_attr "mode" "<MODE>")]) (define_insn "*tls_get_tp_mips16_call_<mode>" @@ -6964,7 +6924,7 @@ "HAVE_AS_TLS && TARGET_MIPS16" { return MIPS_CALL ("jal", operands, 0, -1); } [(set_attr "type" "call") - (set_attr "length" "6") + (set_attr "insn_count" "3") (set_attr "mode" "<MODE>")]) ;; Named pattern for expanding thread pointer reference. diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt index e11710db3c0..08ab29b1810 100644 --- a/gcc/config/mips/mips.opt +++ b/gcc/config/mips/mips.opt @@ -141,6 +141,10 @@ membedded-data Target Report Var(TARGET_EMBEDDED_DATA) Use ROM instead of RAM +meva +Target Report Var(TARGET_EVA) +Use Enhanced Virtual Addressing instructions + mexplicit-relocs Target Report Mask(EXPLICIT_RELOCS) Use NewABI-style %reloc() assembly operators diff --git a/gcc/config/mips/mti-linux.h b/gcc/config/mips/mti-linux.h index a3fb48976bd..45bc0b88107 100644 --- a/gcc/config/mips/mti-linux.h +++ b/gcc/config/mips/mti-linux.h @@ -20,7 +20,7 @@ along with GCC; see the file COPYING3. If not see /* This target is a multilib target, specify the sysroot paths. */ #undef SYSROOT_SUFFIX_SPEC #define SYSROOT_SUFFIX_SPEC \ - "%{mips32:/mips32}%{mips64:/mips64}%{mips64r2:/mips64r2}%{mabi=64:/64}%{mel|EL:/el}%{msoft-float:/sof}" + "%{mips32:/mips32}%{mips64:/mips64}%{mips64r2:/mips64r2}%{mips16:/mips16}%{mmicromips:/micromips}%{mabi=64:/64}%{mel|EL:/el}%{msoft-float:/sof}" #undef DRIVER_SELF_SPECS #define DRIVER_SELF_SPECS \ diff --git a/gcc/config/mips/n32-elf.h b/gcc/config/mips/n32-elf.h new file mode 100644 index 00000000000..0f41a6e9fc7 --- /dev/null +++ b/gcc/config/mips/n32-elf.h @@ -0,0 +1,35 @@ +/* Definitions of target machine for GNU compiler. + n32 for embedded systems. + Copyright (C) 2003-2013 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Use standard ELF-style local labels (not '$' as on early Irix). */ +#undef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." + +/* Use periods rather than dollar signs in special g++ assembler names. */ +#define NO_DOLLAR_IN_LABEL + +/* Force n32 to use 64-bit long doubles. */ +#undef LONG_DOUBLE_TYPE_SIZE +#define LONG_DOUBLE_TYPE_SIZE 64 + +#ifdef IN_LIBGCC2 +#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 +#endif diff --git a/gcc/config/mips/sde.h b/gcc/config/mips/sde.h index d42fee6309f..d35f79f25be 100644 --- a/gcc/config/mips/sde.h +++ b/gcc/config/mips/sde.h @@ -89,23 +89,6 @@ along with GCC; see the file COPYING3. If not see #undef PTRDIFF_TYPE #define PTRDIFF_TYPE "long int" -/* Use standard ELF-style local labels (not '$' as on early Irix). */ -#undef LOCAL_LABEL_PREFIX -#define LOCAL_LABEL_PREFIX "." - -/* Use periods rather than dollar signs in special g++ assembler names. */ -#define NO_DOLLAR_IN_LABEL - -/* Currently we don't support 128bit long doubles, so for now we force - n32 to be 64bit. */ -#undef LONG_DOUBLE_TYPE_SIZE -#define LONG_DOUBLE_TYPE_SIZE 64 - -#ifdef IN_LIBGCC2 -#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE -#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 -#endif - /* Force all .init and .fini entries to be 32-bit, not mips16, so that in a mixed environment they are all the same mode. The crti.asm and crtn.asm files will also be compiled as 32-bit due to the diff --git a/gcc/config/mips/t-mti-elf b/gcc/config/mips/t-mti-elf index 3f0868fb856..bce8f063452 100644 --- a/gcc/config/mips/t-mti-elf +++ b/gcc/config/mips/t-mti-elf @@ -16,20 +16,29 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16 mabi=64 EL msoft-float -MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 64 el sof -MULTILIB_MATCHES = EL=mel EB=meb +# The default build is mips32r2, hard-float big-endian. Add mips32, +# soft-float, and little-endian variations. -# We do not want to build mips16 versions of mips64* architectures. -MULTILIB_EXCEPTIONS += *mips64*/*mips16* +MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16 mmicromips mabi=64 EL msoft-float +MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof +MULTILIB_MATCHES = EL=mel EB=meb -# 64 bit ABI is not supported on mips32 architecture. +# The 64 bit ABI is not supported on the mips32 architecture. MULTILIB_EXCEPTIONS += *mips32*/*mabi=64* -# The 64 bit ABI is not supported on the mips32r2 bit architecture. -# Because mips32r2 is the default the exception list is a little messy. -# Basically we are saying any list that doesn't specify mips32, mips64, -# or mips64r2 but does specify mabi=64 is not allowed because that -# would be defaulting to the mips32r2 architecture. +# The 64 bit ABI is not supported on the mips32r2 architecture. +# Because mips32r2 is the default we can't use that flag to trigger +# the exception so we check for mabi=64 with no specific mips +# architecture flag instead. MULTILIB_EXCEPTIONS += mabi=64* -MULTILIB_EXCEPTIONS += mips16/mabi=64* + +# We do not want to build mips16 versions of mips64* architectures. +MULTILIB_EXCEPTIONS += *mips64*/*mips16* +MULTILIB_EXCEPTIONS += *mips16/mabi=64* + +# We only want micromips for mips32r2 architecture and we do not want +# it used in conjunction with -mips16. +MULTILIB_EXCEPTIONS += *mips16/mmicromips* +MULTILIB_EXCEPTIONS += *mips64*/mmicromips* +MULTILIB_EXCEPTIONS += *mips32/mmicromips* +MULTILIB_EXCEPTIONS += *mmicromips/mabi=64* diff --git a/gcc/config/mips/t-mti-linux b/gcc/config/mips/t-mti-linux index 775a68d9dae..bce8f063452 100644 --- a/gcc/config/mips/t-mti-linux +++ b/gcc/config/mips/t-mti-linux @@ -19,8 +19,8 @@ # The default build is mips32r2, hard-float big-endian. Add mips32, # soft-float, and little-endian variations. -MULTILIB_OPTIONS = mips32/mips64/mips64r2 mabi=64 EL msoft-float -MULTILIB_DIRNAMES = mips32 mips64 mips64r2 64 el sof +MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16 mmicromips mabi=64 EL msoft-float +MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof MULTILIB_MATCHES = EL=mel EB=meb # The 64 bit ABI is not supported on the mips32 architecture. @@ -28,6 +28,17 @@ MULTILIB_EXCEPTIONS += *mips32*/*mabi=64* # The 64 bit ABI is not supported on the mips32r2 architecture. # Because mips32r2 is the default we can't use that flag to trigger -# the exception so we check for mabi=64 with no specific mips flag -# instead. +# the exception so we check for mabi=64 with no specific mips +# architecture flag instead. MULTILIB_EXCEPTIONS += mabi=64* + +# We do not want to build mips16 versions of mips64* architectures. +MULTILIB_EXCEPTIONS += *mips64*/*mips16* +MULTILIB_EXCEPTIONS += *mips16/mabi=64* + +# We only want micromips for mips32r2 architecture and we do not want +# it used in conjunction with -mips16. +MULTILIB_EXCEPTIONS += *mips16/mmicromips* +MULTILIB_EXCEPTIONS += *mips64*/mmicromips* +MULTILIB_EXCEPTIONS += *mips32/mmicromips* +MULTILIB_EXCEPTIONS += *mmicromips/mabi=64* diff --git a/gcc/config/mmix/mmix.c b/gcc/config/mmix/mmix.c index 1af09e559b0..bd37067dfc4 100644 --- a/gcc/config/mmix/mmix.c +++ b/gcc/config/mmix/mmix.c @@ -313,7 +313,7 @@ mmix_init_machine_status (void) return ggc_alloc_cleared_machine_function (); } -/* DATA_ALIGNMENT. +/* DATA_ABI_ALIGNMENT. We have trouble getting the address of stuff that is located at other than 32-bit alignments (GETA requirements), so try to give everything at least 32-bit alignment. */ diff --git a/gcc/config/mmix/mmix.h b/gcc/config/mmix/mmix.h index 4ca1a2b8c86..c5edc5777a9 100644 --- a/gcc/config/mmix/mmix.h +++ b/gcc/config/mmix/mmix.h @@ -164,7 +164,7 @@ struct GTY(()) machine_function /* Copied from elfos.h. */ #define MAX_OFILE_ALIGNMENT (32768 * 8) -#define DATA_ALIGNMENT(TYPE, BASIC_ALIGN) \ +#define DATA_ABI_ALIGNMENT(TYPE, BASIC_ALIGN) \ mmix_data_alignment (TYPE, BASIC_ALIGN) #define CONSTANT_ALIGNMENT(CONSTANT, BASIC_ALIGN) \ diff --git a/gcc/config/rl78/rl78.c b/gcc/config/rl78/rl78.c index 2e18bebf3d8..c2ed7389bc4 100644 --- a/gcc/config/rl78/rl78.c +++ b/gcc/config/rl78/rl78.c @@ -647,6 +647,15 @@ rl78_addr_space_pointer_mode (addr_space_t addrspace) } } +/* Returns TRUE for valid addresses. */ +#undef TARGET_VALID_POINTER_MODE +#define TARGET_VALID_POINTER_MODE rl78_valid_pointer_mode +static bool +rl78_valid_pointer_mode (enum machine_mode m) +{ + return (m == HImode || m == SImode); +} + /* Return the appropriate mode for a named address address. */ #undef TARGET_ADDR_SPACE_ADDRESS_MODE #define TARGET_ADDR_SPACE_ADDRESS_MODE rl78_addr_space_address_mode @@ -2730,6 +2739,16 @@ rl78_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) } + +#undef TARGET_UNWIND_WORD_MODE +#define TARGET_UNWIND_WORD_MODE rl78_unwind_word_mode + +static enum machine_mode +rl78_unwind_word_mode (void) +{ + return HImode; +} + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-rl78.h" diff --git a/gcc/config/rl78/rl78.md b/gcc/config/rl78/rl78.md index b3cfe6d1bbc..efc26210498 100644 --- a/gcc/config/rl78/rl78.md +++ b/gcc/config/rl78/rl78.md @@ -235,6 +235,24 @@ [(set_attr "valloc" "macax")] ) +(define_expand "mulqi3" + [(set (match_operand:QI 0 "register_operand" "") + (mult:QI (match_operand:QI 1 "general_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + ] + "" ; mulu supported by all targets + "" +) + +(define_expand "mulhi3" + [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (match_operand:HI 1 "general_operand" "") + (match_operand:HI 2 "nonmemory_operand" ""))) + ] + "! RL78_MUL_NONE" + "" +) + (define_expand "mulsi3" [(set (match_operand:SI 0 "register_operand" "=&v") (mult:SI (match_operand:SI 1 "nonmemory_operand" "vi") @@ -244,6 +262,58 @@ "" ) +(define_insn "*mulqi3_rl78" + [(set (match_operand:QI 0 "register_operand" "=&v") + (mult:QI (match_operand:QI 1 "general_operand" "+viU") + (match_operand:QI 2 "general_operand" "vi"))) + ] + "" ; mulu supported by all targets + "; mulqi macro %0 = %1 * %2 + mov a, %h1 + mov x, a + mov a, %h2 + mulu x ; ax = a * x + mov a, x + mov %h0, a + ; end of mulqi macro" +;; [(set_attr "valloc" "macax")] +) + +(define_insn "*mulhi3_rl78" + [(set (match_operand:HI 0 "register_operand" "=&v") + (mult:HI (match_operand:HI 1 "general_operand" "+viU") + (match_operand:HI 2 "general_operand" "vi"))) + ] + "RL78_MUL_RL78" + "; mulhi macro %0 = %1 * %2 + movw ax, %h1 + movw bc, %h2 + mulhu ; bcax = bc * ax + movw %h0, ax + ; end of mulhi macro" +;; [(set_attr "valloc" "macax")] +) + +(define_insn "*mulhi3_g13" + [(set (match_operand:HI 0 "register_operand" "=&v") + (mult:HI (match_operand:HI 1 "general_operand" "+viU") + (match_operand:HI 2 "general_operand" "vi"))) + ] + "RL78_MUL_G13" + "; mulhi macro %0 = %1 * %2 + mov a, #0x00 + mov !0xf00e8, a ; MDUC + movw ax, %h1 + movw 0xffff0, ax ; MDAL + movw ax, %h2 + movw 0xffff2, ax ; MDAH + nop ; mdb = mdal * mdah + movw ax, 0xffff6 ; MDBL + movw %h0, ax + ; end of mulhi macro" +;; [(set_attr "valloc" "umul")] +) + ;; 0xFFFF0 is MACR(L). 0xFFFF2 is MACR(H) but we don't care about it ;; because we're only using the lower 16 bits (which is the upper 16 ;; bits of the result). diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index fd6d07f50ff..4b91c5c5e24 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -321,6 +321,42 @@ #define vec_vsx_st __builtin_vec_vsx_st #endif +#ifdef _ARCH_PWR8 +/* Vector additions added in ISA 2.07. */ +#define vec_eqv __builtin_vec_eqv +#define vec_nand __builtin_vec_nand +#define vec_orc __builtin_vec_orc +#define vec_vaddudm __builtin_vec_vaddudm +#define vec_vclz __builtin_vec_vclz +#define vec_vclzb __builtin_vec_vclzb +#define vec_vclzd __builtin_vec_vclzd +#define vec_vclzh __builtin_vec_vclzh +#define vec_vclzw __builtin_vec_vclzw +#define vec_vgbbd __builtin_vec_vgbbd +#define vec_vmaxsd __builtin_vec_vmaxsd +#define vec_vmaxud __builtin_vec_vmaxud +#define vec_vminsd __builtin_vec_vminsd +#define vec_vminud __builtin_vec_vminud +#define vec_vmrgew __builtin_vec_vmrgew +#define vec_vmrgow __builtin_vec_vmrgow +#define vec_vpksdss __builtin_vec_vpksdss +#define vec_vpksdus __builtin_vec_vpksdus +#define vec_vpkudum __builtin_vec_vpkudum +#define vec_vpkudus __builtin_vec_vpkudus +#define vec_vpopcnt __builtin_vec_vpopcnt +#define vec_vpopcntb __builtin_vec_vpopcntb +#define vec_vpopcntd __builtin_vec_vpopcntd +#define vec_vpopcnth __builtin_vec_vpopcnth +#define vec_vpopcntw __builtin_vec_vpopcntw +#define vec_vrld __builtin_vec_vrld +#define vec_vsld __builtin_vec_vsld +#define vec_vsrad __builtin_vec_vsrad +#define vec_vsrd __builtin_vec_vsrd +#define vec_vsubudm __builtin_vec_vsubudm +#define vec_vupkhsw __builtin_vec_vupkhsw +#define vec_vupklsw __builtin_vec_vupklsw +#endif + /* Predicates. For C++, we use templates in order to allow non-parenthesized arguments. For C, instead, we use macros since non-parenthesized arguments were diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 1b0b5c3fb13..6607e450be3 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -41,15 +41,11 @@ UNSPEC_VMULOSB UNSPEC_VMULOUH UNSPEC_VMULOSH - UNSPEC_VPKUHUM - UNSPEC_VPKUWUM UNSPEC_VPKPX - UNSPEC_VPKSHSS - UNSPEC_VPKSWSS - UNSPEC_VPKUHUS - UNSPEC_VPKSHUS - UNSPEC_VPKUWUS - UNSPEC_VPKSWUS + UNSPEC_VPACK_SIGN_SIGN_SAT + UNSPEC_VPACK_SIGN_UNS_SAT + UNSPEC_VPACK_UNS_UNS_SAT + UNSPEC_VPACK_UNS_UNS_MOD UNSPEC_VSLV4SI UNSPEC_VSLO UNSPEC_VSR @@ -71,12 +67,10 @@ UNSPEC_VLOGEFP UNSPEC_VEXPTEFP UNSPEC_VLSDOI - UNSPEC_VUPKHSB + UNSPEC_VUNPACK_HI_SIGN + UNSPEC_VUNPACK_LO_SIGN UNSPEC_VUPKHPX - UNSPEC_VUPKHSH - UNSPEC_VUPKLSB UNSPEC_VUPKLPX - UNSPEC_VUPKLSH UNSPEC_DST UNSPEC_DSTT UNSPEC_DSTST @@ -134,6 +128,7 @@ UNSPEC_VUPKLS_V4SF UNSPEC_VUPKHU_V4SF UNSPEC_VUPKLU_V4SF + UNSPEC_VGBBD ]) (define_c_enum "unspecv" @@ -146,6 +141,8 @@ ;; Vec int modes (define_mode_iterator VI [V4SI V8HI V16QI]) +;; Like VI, but add ISA 2.07 integer vector ops +(define_mode_iterator VI2 [V4SI V8HI V16QI V2DI]) ;; Short vec in modes (define_mode_iterator VIshort [V8HI V16QI]) ;; Vec float modes @@ -159,8 +156,18 @@ ;; Like VM, except don't do TImode (define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI]) -(define_mode_attr VI_char [(V4SI "w") (V8HI "h") (V16QI "b")]) -(define_mode_attr VI_scalar [(V4SI "SI") (V8HI "HI") (V16QI "QI")]) +(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")]) +(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")]) +(define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)") + (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)") + (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)") + (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")]) + +;; Vector pack/unpack +(define_mode_iterator VP [V2DI V4SI V8HI]) +(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")]) +(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")]) +(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")]) ;; Vector move instructions. (define_insn "*altivec_mov<mode>" @@ -378,10 +385,10 @@ ;; add (define_insn "add<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (plus:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (plus:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vaddu<VI_char>m %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -398,17 +405,17 @@ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VADDCUW))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" "vaddcuw %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "altivec_vaddu<VI_char>s" [(set (match_operand:VI 0 "register_operand" "=v") (unspec:VI [(match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")] + (match_operand:VI 2 "register_operand" "v")] UNSPEC_VADDU)) (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + "<VI_unit>" "vaddu<VI_char>s %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -418,16 +425,16 @@ (match_operand:VI 2 "register_operand" "v")] UNSPEC_VADDS)) (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" "vadds<VI_char>s %0,%1,%2" [(set_attr "type" "vecsimple")]) ;; sub (define_insn "sub<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (minus:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (minus:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vsubu<VI_char>m %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -444,7 +451,7 @@ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] UNSPEC_VSUBCUW))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" "vsubcuw %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -454,7 +461,7 @@ (match_operand:VI 2 "register_operand" "v")] UNSPEC_VSUBU)) (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" "vsubu<VI_char>s %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -464,7 +471,7 @@ (match_operand:VI 2 "register_operand" "v")] UNSPEC_VSUBS)) (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" "vsubs<VI_char>s %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -483,7 +490,7 @@ (unspec:VI [(match_operand:VI 1 "register_operand" "v") (match_operand:VI 2 "register_operand" "v")] UNSPEC_VAVGS))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" "vavgs<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -492,31 +499,31 @@ (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v")] UNSPEC_VCMPBFP))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" "vcmpbfp %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_eq<mode>" - [(set (match_operand:VI 0 "altivec_register_operand" "=v") - (eq:VI (match_operand:VI 1 "altivec_register_operand" "v") - (match_operand:VI 2 "altivec_register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "<VI_unit>" "vcmpequ<VI_char> %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_gt<mode>" - [(set (match_operand:VI 0 "altivec_register_operand" "=v") - (gt:VI (match_operand:VI 1 "altivec_register_operand" "v") - (match_operand:VI 2 "altivec_register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (gt:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "<VI_unit>" "vcmpgts<VI_char> %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_gtu<mode>" - [(set (match_operand:VI 0 "altivec_register_operand" "=v") - (gtu:VI (match_operand:VI 1 "altivec_register_operand" "v") - (match_operand:VI 2 "altivec_register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "<VI_unit>" "vcmpgtu<VI_char> %0,%1,%2" [(set_attr "type" "veccmp")]) @@ -744,18 +751,18 @@ ;; max (define_insn "umax<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (umax:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (umax:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vmaxu<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "smax<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (smax:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (smax:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vmaxs<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -768,18 +775,18 @@ [(set_attr "type" "veccmp")]) (define_insn "umin<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (umin:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (umin:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vminu<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "smin<mode>3" - [(set (match_operand:VI 0 "register_operand" "=v") - (smin:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (smin:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vmins<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -935,6 +942,31 @@ "vmrglw %0,%1,%2" [(set_attr "type" "vecperm")]) +;; Power8 vector merge even/odd +(define_insn "p8_vmrgew" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6)])))] + "TARGET_P8_VECTOR" + "vmrgew %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "p8_vmrgow" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 1) (const_int 5) + (const_int 3) (const_int 7)])))] + "TARGET_P8_VECTOR" + "vmrgow %0,%1,%2" + [(set_attr "type" "vecperm")]) + (define_insn "vec_widen_umult_even_v16qi" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") @@ -1011,10 +1043,13 @@ ;; logical ops. Have the logical ops follow the memory ops in ;; terms of whether to prefer VSX or Altivec +;; AND has a clobber to be consistant with VSX, which adds splitters for using +;; the GPR registers. (define_insn "*altivec_and<mode>3" [(set (match_operand:VM 0 "register_operand" "=v") (and:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v")))] + (match_operand:VM 2 "register_operand" "v"))) + (clobber (match_scratch:CC 3 "=X"))] "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" "vand %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -1044,8 +1079,8 @@ (define_insn "*altivec_nor<mode>3" [(set (match_operand:VM 0 "register_operand" "=v") - (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v"))))] + (and:VM (not:VM (match_operand:VM 1 "register_operand" "v")) + (not:VM (match_operand:VM 2 "register_operand" "v"))))] "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" "vnor %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -1058,24 +1093,6 @@ "vandc %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "altivec_vpkuhum" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKUHUM))] - "TARGET_ALTIVEC" - "vpkuhum %0,%1,%2" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vpkuwum" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKUWUM))] - "TARGET_ALTIVEC" - "vpkuwum %0,%1,%2" - [(set_attr "type" "vecperm")]) - (define_insn "altivec_vpkpx" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") @@ -1085,71 +1102,47 @@ "vpkpx %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkshss" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKSHSS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkshss %0,%1,%2" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vpkswss" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKSWSS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkswss %0,%1,%2" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vpkuhus" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKUHUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkuhus %0,%1,%2" +(define_insn "altivec_vpks<VI_char>ss" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_SIGN_SIGN_SAT))] + "<VI_unit>" + "vpks<VI_char>ss %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkshus" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKSHUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkshus %0,%1,%2" +(define_insn "altivec_vpks<VI_char>us" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_SIGN_UNS_SAT))] + "<VI_unit>" + "vpks<VI_char>us %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkuwus" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKUWUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkuwus %0,%1,%2" +(define_insn "altivec_vpku<VI_char>us" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_SAT))] + "<VI_unit>" + "vpku<VI_char>us %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vpkswus" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKSWUS)) - (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] - "TARGET_ALTIVEC" - "vpkswus %0,%1,%2" +(define_insn "altivec_vpku<VI_char>um" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_MOD))] + "<VI_unit>" + "vpku<VI_char>um %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "*altivec_vrl<VI_char>" - [(set (match_operand:VI 0 "register_operand" "=v") - (rotate:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (rotate:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vrl<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -1172,26 +1165,26 @@ [(set_attr "type" "vecperm")]) (define_insn "*altivec_vsl<VI_char>" - [(set (match_operand:VI 0 "register_operand" "=v") - (ashift:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (ashift:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vsl<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "*altivec_vsr<VI_char>" - [(set (match_operand:VI 0 "register_operand" "=v") - (lshiftrt:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vsr<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "*altivec_vsra<VI_char>" - [(set (match_operand:VI 0 "register_operand" "=v") - (ashiftrt:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + [(set (match_operand:VI2 0 "register_operand" "=v") + (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" "vsra<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -1476,12 +1469,20 @@ "vsldoi %0,%1,%2,%3" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vupkhsb" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKHSB))] - "TARGET_ALTIVEC" - "vupkhsb %0,%1" +(define_insn "altivec_vupkhs<VU_char>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_HI_SIGN))] + "<VI_unit>" + "vupkhs<VU_char> %0,%1" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vupkls<VU_char>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_LO_SIGN))] + "<VI_unit>" + "vupkls<VU_char> %0,%1" [(set_attr "type" "vecperm")]) (define_insn "altivec_vupkhpx" @@ -1492,22 +1493,6 @@ "vupkhpx %0,%1" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vupkhsh" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKHSH))] - "TARGET_ALTIVEC" - "vupkhsh %0,%1" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vupklsb" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKLSB))] - "TARGET_ALTIVEC" - "vupklsb %0,%1" - [(set_attr "type" "vecperm")]) - (define_insn "altivec_vupklpx" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] @@ -1516,49 +1501,41 @@ "vupklpx %0,%1" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vupklsh" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKLSH))] - "TARGET_ALTIVEC" - "vupklsh %0,%1" - [(set_attr "type" "vecperm")]) - ;; Compare vectors producing a vector result and a predicate, setting CR6 to ;; indicate a combined status (define_insn "*altivec_vcmpequ<VI_char>_p" [(set (reg:CC 74) - (unspec:CC [(eq:CC (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v"))] + (unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] UNSPEC_PREDICATE)) - (set (match_operand:VI 0 "register_operand" "=v") - (eq:VI (match_dup 1) - (match_dup 2)))] - "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + (set (match_operand:VI2 0 "register_operand" "=v") + (eq:VI2 (match_dup 1) + (match_dup 2)))] + "<VI_unit>" "vcmpequ<VI_char>. %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_vcmpgts<VI_char>_p" [(set (reg:CC 74) - (unspec:CC [(gt:CC (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v"))] + (unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] UNSPEC_PREDICATE)) - (set (match_operand:VI 0 "register_operand" "=v") - (gt:VI (match_dup 1) - (match_dup 2)))] - "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + (set (match_operand:VI2 0 "register_operand" "=v") + (gt:VI2 (match_dup 1) + (match_dup 2)))] + "<VI_unit>" "vcmpgts<VI_char>. %0,%1,%2" [(set_attr "type" "veccmp")]) (define_insn "*altivec_vcmpgtu<VI_char>_p" [(set (reg:CC 74) - (unspec:CC [(gtu:CC (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v"))] + (unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] UNSPEC_PREDICATE)) - (set (match_operand:VI 0 "register_operand" "=v") - (gtu:VI (match_dup 1) - (match_dup 2)))] - "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + (set (match_operand:VI2 0 "register_operand" "=v") + (gtu:VI2 (match_dup 1) + (match_dup 2)))] + "<VI_unit>" "vcmpgtu<VI_char>. %0,%1,%2" [(set_attr "type" "veccmp")]) @@ -1779,20 +1756,28 @@ [(set_attr "type" "vecstore")]) ;; Generate -;; vspltis? SCRATCH0,0 +;; xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0 ;; vsubu?m SCRATCH2,SCRATCH1,%1 ;; vmaxs? %0,%1,SCRATCH2" (define_expand "abs<mode>2" - [(set (match_dup 2) (vec_duplicate:VI (const_int 0))) - (set (match_dup 3) - (minus:VI (match_dup 2) - (match_operand:VI 1 "register_operand" "v"))) - (set (match_operand:VI 0 "register_operand" "=v") - (smax:VI (match_dup 1) (match_dup 3)))] - "TARGET_ALTIVEC" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) + (minus:VI2 (match_dup 2) + (match_operand:VI2 1 "register_operand" "v"))) + (set (match_operand:VI2 0 "register_operand" "=v") + (smax:VI2 (match_dup 1) (match_dup 4)))] + "<VI_unit>" { - operands[2] = gen_reg_rtx (GET_MODE (operands[0])); - operands[3] = gen_reg_rtx (GET_MODE (operands[0])); + int i, n_elt = GET_MODE_NUNITS (<MODE>mode); + rtvec v = rtvec_alloc (n_elt); + + /* Create an all 0 constant. */ + for (i = 0; i < n_elt; ++i) + RTVEC_ELT (v, i) = const0_rtx; + + operands[2] = gen_reg_rtx (<MODE>mode); + operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v); + operands[4] = gen_reg_rtx (<MODE>mode); }) ;; Generate @@ -1950,49 +1935,19 @@ DONE; }") -(define_expand "vec_unpacks_hi_v16qi" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKHSB))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupkhsb (operands[0], operands[1])); - DONE; -}") - -(define_expand "vec_unpacks_hi_v8hi" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKHSH))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupkhsh (operands[0], operands[1])); - DONE; -}") - -(define_expand "vec_unpacks_lo_v16qi" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] - UNSPEC_VUPKLSB))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupklsb (operands[0], operands[1])); - DONE; -}") +(define_expand "vec_unpacks_hi_<VP_small_lc>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_HI_SIGN))] + "<VI_unit>" + "") -(define_expand "vec_unpacks_lo_v8hi" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] - UNSPEC_VUPKLSH))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vupklsh (operands[0], operands[1])); - DONE; -}") +(define_expand "vec_unpacks_lo_<VP_small_lc>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_LO_SIGN))] + "<VI_unit>" + "") (define_insn "vperm_v8hiv4si" [(set (match_operand:V4SI 0 "register_operand" "=v") @@ -2291,29 +2246,13 @@ DONE; }") -(define_expand "vec_pack_trunc_v8hi" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VPKUHUM))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vpkuhum (operands[0], operands[1], operands[2])); - DONE; -}") - -(define_expand "vec_pack_trunc_v4si" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VPKUWUM))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vpkuwum (operands[0], operands[1], operands[2])); - DONE; -}") +(define_expand "vec_pack_trunc_<mode>" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_MOD))] + "<VI_unit>" + "") (define_expand "altivec_negv4sf2" [(use (match_operand:V4SF 0 "register_operand" "")) @@ -2460,3 +2399,34 @@ emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx)); DONE; }") + + +;; Power8 vector instructions encoded as Altivec instructions + +;; Vector count leading zeros +(define_insn "*p8v_clz<mode>2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (clz:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P8_VECTOR" + "vclz<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector population count +(define_insn "*p8v_popcount<mode>2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P8_VECTOR" + "vpopcnt<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector Gather Bits by Bytes by Doubleword +(define_insn "p8v_vgbbd" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] + UNSPEC_VGBBD))] + "TARGET_P8_VECTOR" + "vgbbd %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index 463d69c6ba4..fa53cbb9de7 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -79,12 +79,31 @@ (define_register_constraint "wl" "rs6000_constraints[RS6000_CONSTRAINT_wl]" "Floating point register if the LFIWAX instruction is enabled or NO_REGS.") +(define_register_constraint "wm" "rs6000_constraints[RS6000_CONSTRAINT_wm]" + "VSX register if direct move instructions are enabled, or NO_REGS.") + +(define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]" + "General purpose register if 64-bit instructions are enabled or NO_REGS.") + +(define_register_constraint "wv" "rs6000_constraints[RS6000_CONSTRAINT_wv]" + "Altivec register if -mpower8-vector is used or NO_REGS.") + (define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]" "Floating point register if the STFIWX instruction is enabled or NO_REGS.") (define_register_constraint "wz" "rs6000_constraints[RS6000_CONSTRAINT_wz]" "Floating point register if the LFIWZX instruction is enabled or NO_REGS.") +;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use +;; direct move directly, and movsf can't to move between the register sets. +;; There is a mode_attr that resolves to wm for SDmode and wn for SFmode +(define_register_constraint "wn" "NO_REGS") + +;; Lq/stq validates the address for load/store quad +(define_memory_constraint "wQ" + "Memory operand suitable for the load/store quad instructions" + (match_operand 0 "quad_memory_operand")) + ;; Altivec style load/store that ignores the bottom bits of the address (define_memory_constraint "wZ" "Indexed or indirect memory operand, ignoring the bottom 4 bits" diff --git a/gcc/config/rs6000/crypto.md b/gcc/config/rs6000/crypto.md new file mode 100644 index 00000000000..9f7e4a1b255 --- /dev/null +++ b/gcc/config/rs6000/crypto.md @@ -0,0 +1,101 @@ +;; Cryptographic instructions added in ISA 2.07 +;; Copyright (C) 2012-2013 Free Software Foundation, Inc. +;; Contributed by Michael Meissner (meissner@linux.vnet.ibm.com) + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_c_enum "unspec" + [UNSPEC_VCIPHER + UNSPEC_VNCIPHER + UNSPEC_VCIPHERLAST + UNSPEC_VNCIPHERLAST + UNSPEC_VSBOX + UNSPEC_VSHASIGMA + UNSPEC_VPERMXOR + UNSPEC_VPMSUM]) + +;; Iterator for VPMSUM/VPERMXOR +(define_mode_iterator CR_mode [V16QI V8HI V4SI V2DI]) + +(define_mode_attr CR_char [(V16QI "b") + (V8HI "h") + (V4SI "w") + (V2DI "d")]) + +;; Iterator for VSHASIGMAD/VSHASIGMAW +(define_mode_iterator CR_hash [V4SI V2DI]) + +;; Iterator for the other crypto functions +(define_int_iterator CR_code [UNSPEC_VCIPHER + UNSPEC_VNCIPHER + UNSPEC_VCIPHERLAST + UNSPEC_VNCIPHERLAST]) + +(define_int_attr CR_insn [(UNSPEC_VCIPHER "vcipher") + (UNSPEC_VNCIPHER "vncipher") + (UNSPEC_VCIPHERLAST "vcipherlast") + (UNSPEC_VNCIPHERLAST "vncipherlast")]) + +;; 2 operand crypto instructions +(define_insn "crypto_<CR_insn>" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V2DI 2 "register_operand" "v")] + CR_code))] + "TARGET_CRYPTO" + "<CR_insn> %0,%1,%2" + [(set_attr "type" "crypto")]) + +(define_insn "crypto_vpmsum<CR_char>" + [(set (match_operand:CR_mode 0 "register_operand" "=v") + (unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v") + (match_operand:CR_mode 2 "register_operand" "v")] + UNSPEC_VPMSUM))] + "TARGET_CRYPTO" + "vpmsum<CR_char> %0,%1,%2" + [(set_attr "type" "crypto")]) + +;; 3 operand crypto instructions +(define_insn "crypto_vpermxor_<mode>" + [(set (match_operand:CR_mode 0 "register_operand" "=v") + (unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v") + (match_operand:CR_mode 2 "register_operand" "v") + (match_operand:CR_mode 3 "register_operand" "v")] + UNSPEC_VPERMXOR))] + "TARGET_CRYPTO" + "vpermxor %0,%1,%2,%3" + [(set_attr "type" "crypto")]) + +;; 1 operand crypto instruction +(define_insn "crypto_vsbox" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")] + UNSPEC_VSBOX))] + "TARGET_CRYPTO" + "vsbox %0,%1" + [(set_attr "type" "crypto")]) + +;; Hash crypto instructions +(define_insn "crypto_vshasigma<CR_char>" + [(set (match_operand:CR_hash 0 "register_operand" "=v") + (unspec:CR_hash [(match_operand:CR_hash 1 "register_operand" "v") + (match_operand:SI 2 "const_0_to_1_operand" "n") + (match_operand:SI 3 "const_0_to_15_operand" "n")] + UNSPEC_VSHASIGMA))] + "TARGET_CRYPTO" + "vshasigma<CR_char> %0,%1,%2,%3" + [(set_attr "type" "crypto")]) diff --git a/gcc/config/rs6000/driver-rs6000.c b/gcc/config/rs6000/driver-rs6000.c index e608dce184c..1a173d0b1cc 100644 --- a/gcc/config/rs6000/driver-rs6000.c +++ b/gcc/config/rs6000/driver-rs6000.c @@ -167,7 +167,7 @@ elf_platform (void) if (fd != -1) { - char buf[1024]; + static char buf[1024]; ElfW(auxv_t) *av; ssize_t n; diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h index 3f280581feb..79f0f0b5f00 100644 --- a/gcc/config/rs6000/linux64.h +++ b/gcc/config/rs6000/linux64.h @@ -136,8 +136,11 @@ extern int dot_symbols; SET_CMODEL (CMODEL_MEDIUM); \ if (rs6000_current_cmodel != CMODEL_SMALL) \ { \ - TARGET_NO_FP_IN_TOC = 0; \ - TARGET_NO_SUM_IN_TOC = 0; \ + if (!global_options_set.x_TARGET_NO_FP_IN_TOC) \ + TARGET_NO_FP_IN_TOC \ + = rs6000_current_cmodel == CMODEL_MEDIUM; \ + if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) \ + TARGET_NO_SUM_IN_TOC = 0; \ } \ } \ } \ diff --git a/gcc/config/rs6000/power8.md b/gcc/config/rs6000/power8.md new file mode 100644 index 00000000000..83bf7197483 --- /dev/null +++ b/gcc/config/rs6000/power8.md @@ -0,0 +1,373 @@ +;; Scheduling description for IBM POWER8 processor. +;; Copyright (C) 2013 Free Software Foundation, Inc. +;; +;; Contributed by Pat Haugen (pthaugen@us.ibm.com). + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "power8fxu,power8lsu,power8vsu,power8misc") + +(define_cpu_unit "fxu0_power8,fxu1_power8" "power8fxu") +(define_cpu_unit "lu0_power8,lu1_power8" "power8lsu") +(define_cpu_unit "lsu0_power8,lsu1_power8" "power8lsu") +(define_cpu_unit "vsu0_power8,vsu1_power8" "power8vsu") +(define_cpu_unit "bpu_power8,cru_power8" "power8misc") +(define_cpu_unit "du0_power8,du1_power8,du2_power8,du3_power8,du4_power8,\ + du5_power8,du6_power8" "power8misc") + + +; Dispatch group reservations +(define_reservation "DU_any_power8" + "du0_power8|du1_power8|du2_power8|du3_power8|du4_power8|\ + du5_power8") + +; 2-way Cracked instructions go in slots 0-1 +; (can also have a second in slots 3-4 if insns are adjacent) +(define_reservation "DU_cracked_power8" + "du0_power8+du1_power8") + +; Insns that are first in group +(define_reservation "DU_first_power8" + "du0_power8") + +; Insns that are first and last in group +(define_reservation "DU_both_power8" + "du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+\ + du5_power8+du6_power8") + +; Dispatch slots are allocated in order conforming to program order. +(absence_set "du0_power8" "du1_power8,du2_power8,du3_power8,du4_power8,\ + du5_power8,du6_power8") +(absence_set "du1_power8" "du2_power8,du3_power8,du4_power8,du5_power8,\ + du6_power8") +(absence_set "du2_power8" "du3_power8,du4_power8,du5_power8,du6_power8") +(absence_set "du3_power8" "du4_power8,du5_power8,du6_power8") +(absence_set "du4_power8" "du5_power8,du6_power8") +(absence_set "du5_power8" "du6_power8") + + +; Execution unit reservations +(define_reservation "FXU_power8" + "fxu0_power8|fxu1_power8") + +(define_reservation "LU_power8" + "lu0_power8|lu1_power8") + +(define_reservation "LSU_power8" + "lsu0_power8|lsu1_power8") + +(define_reservation "LU_or_LSU_power8" + "lu0_power8|lu1_power8|lsu0_power8|lsu1_power8") + +(define_reservation "VSU_power8" + "vsu0_power8|vsu1_power8") + + +; LS Unit +(define_insn_reservation "power8-load" 3 + (and (eq_attr "type" "load") + (eq_attr "cpu" "power8")) + "DU_any_power8,LU_or_LSU_power8") + +(define_insn_reservation "power8-load-update" 3 + (and (eq_attr "type" "load_u,load_ux") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LU_or_LSU_power8+FXU_power8") + +(define_insn_reservation "power8-load-ext" 3 + (and (eq_attr "type" "load_ext") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LU_or_LSU_power8,FXU_power8") + +(define_insn_reservation "power8-load-ext-update" 3 + (and (eq_attr "type" "load_ext_u,load_ext_ux") + (eq_attr "cpu" "power8")) + "DU_both_power8,LU_or_LSU_power8+FXU_power8,FXU_power8") + +(define_insn_reservation "power8-fpload" 5 + (and (eq_attr "type" "fpload,vecload") + (eq_attr "cpu" "power8")) + "DU_any_power8,LU_power8") + +(define_insn_reservation "power8-fpload-update" 5 + (and (eq_attr "type" "fpload_u,fpload_ux") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LU_power8+FXU_power8") + +(define_insn_reservation "power8-store" 5 ; store-forwarding latency + (and (eq_attr "type" "store,store_u") + (eq_attr "cpu" "power8")) + "DU_any_power8,LSU_power8+LU_power8") + +(define_insn_reservation "power8-store-update-indexed" 5 + (and (eq_attr "type" "store_ux") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LSU_power8+LU_power8") + +(define_insn_reservation "power8-fpstore" 5 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "power8")) + "DU_any_power8,LSU_power8+VSU_power8") + +(define_insn_reservation "power8-fpstore-update" 5 + (and (eq_attr "type" "fpstore_u,fpstore_ux") + (eq_attr "cpu" "power8")) + "DU_any_power8,LSU_power8+VSU_power8") + +(define_insn_reservation "power8-vecstore" 5 + (and (eq_attr "type" "vecstore") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LSU_power8+VSU_power8") + +(define_insn_reservation "power8-larx" 3 + (and (eq_attr "type" "load_l") + (eq_attr "cpu" "power8")) + "DU_both_power8,LU_or_LSU_power8") + +(define_insn_reservation "power8-stcx" 10 + (and (eq_attr "type" "store_c") + (eq_attr "cpu" "power8")) + "DU_both_power8,LSU_power8+LU_power8") + +(define_insn_reservation "power8-sync" 1 + (and (eq_attr "type" "sync,isync") + (eq_attr "cpu" "power8")) + "DU_both_power8,LSU_power8") + + +; FX Unit +(define_insn_reservation "power8-1cyc" 1 + (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\ + var_shift_rotate,exts,isel") + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +; Extra cycle to LU/LSU +(define_bypass 2 "power8-1cyc" + "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\ + power8-vecstore,power8-larx,power8-stcx") +; "power8-load,power8-load-update,power8-load-ext,\ +; power8-load-ext-update,power8-fpload,power8-fpload-update,\ +; power8-store,power8-store-update,power8-store-update-indexed,\ +; power8-fpstore,power8-fpstore-update,power8-vecstore,\ +; power8-larx,power8-stcx") + +(define_insn_reservation "power8-2cyc" 2 + (and (eq_attr "type" "cntlz,popcnt") + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +(define_insn_reservation "power8-two" 2 + (and (eq_attr "type" "two") + (eq_attr "cpu" "power8")) + "DU_any_power8+DU_any_power8,FXU_power8,FXU_power8") + +(define_insn_reservation "power8-three" 3 + (and (eq_attr "type" "three") + (eq_attr "cpu" "power8")) + "DU_any_power8+DU_any_power8+DU_any_power8,FXU_power8,FXU_power8,FXU_power8") + +; cmp - Normal compare insns +(define_insn_reservation "power8-cmp" 2 + (and (eq_attr "type" "cmp") + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +; fast_compare : add./and./nor./etc +(define_insn_reservation "power8-fast-compare" 2 + (and (eq_attr "type" "fast_compare") + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +; compare : rldicl./exts./etc +; delayed_compare : rlwinm./slwi./etc +; var_delayed_compare : rlwnm./slw./etc +(define_insn_reservation "power8-compare" 2 + (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,FXU_power8,FXU_power8") + +; Extra cycle to LU/LSU +(define_bypass 3 "power8-fast-compare,power8-compare" + "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\ + power8-vecstore,power8-larx,power8-stcx") + +; 5 cycle CR latency +(define_bypass 5 "power8-fast-compare,power8-compare" + "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch") + +(define_insn_reservation "power8-mul" 4 + (and (eq_attr "type" "imul,imul2,imul3,lmul") + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +(define_insn_reservation "power8-mul-compare" 4 + (and (eq_attr "type" "imul_compare,lmul_compare") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,FXU_power8") + +; Extra cycle to LU/LSU +(define_bypass 5 "power8-mul,power8-mul-compare" + "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\ + power8-vecstore,power8-larx,power8-stcx") + +; 7 cycle CR latency +(define_bypass 7 "power8-mul,power8-mul-compare" + "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch") + +; FXU divides are not pipelined +(define_insn_reservation "power8-idiv" 37 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,fxu0_power8*37|fxu1_power8*37") + +(define_insn_reservation "power8-ldiv" 68 + (and (eq_attr "type" "ldiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,fxu0_power8*68|fxu1_power8*68") + +(define_insn_reservation "power8-mtjmpr" 5 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "power8")) + "DU_first_power8,FXU_power8") + +; Should differentiate between 1 cr field and > 1 since mtocrf is not microcode +(define_insn_reservation "power8-mtcr" 3 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "power8")) + "DU_both_power8,FXU_power8") + + +; CR Unit +(define_insn_reservation "power8-mfjmpr" 5 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "power8")) + "DU_first_power8,cru_power8+FXU_power8") + +(define_insn_reservation "power8-crlogical" 3 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "power8")) + "DU_first_power8,cru_power8") + +(define_insn_reservation "power8-mfcr" 5 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "power8")) + "DU_both_power8,cru_power8") + +(define_insn_reservation "power8-mfcrf" 3 + (and (eq_attr "type" "mfcrf") + (eq_attr "cpu" "power8")) + "DU_first_power8,cru_power8") + + +; BR Unit +; Branches take dispatch slot 7, but reserve any remaining prior slots to +; prevent other insns from grabbing them once this is assigned. +(define_insn_reservation "power8-branch" 3 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "power8")) + "(du6_power8\ + |du5_power8+du6_power8\ + |du4_power8+du5_power8+du6_power8\ + |du3_power8+du4_power8+du5_power8+du6_power8\ + |du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\ + |du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\ + |du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+\ + du6_power8),bpu_power8") + +; Branch updating LR/CTR feeding mf[lr|ctr] +(define_bypass 4 "power8-branch" "power8-mfjmpr") + + +; VS Unit (includes FP/VSX/VMX/DFP/Crypto) +(define_insn_reservation "power8-fp" 6 + (and (eq_attr "type" "fp,dmul") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +; Additional 3 cycles for any CR result +(define_bypass 9 "power8-fp" "power8-crlogical,power8-mfcr*,power8-branch") + +(define_insn_reservation "power8-fpcompare" 8 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-sdiv" 27 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-ddiv" 33 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-sqrt" 32 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-dsqrt" 44 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-vecsimple" 2 + (and (eq_attr "type" "vecperm,vecsimple,veccmp") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-vecnormal" 6 + (and (eq_attr "type" "vecfloat,vecdouble") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_bypass 7 "power8-vecnormal" + "power8-vecsimple,power8-veccomplex,power8-fpstore*,\ + power8-vecstore") + +(define_insn_reservation "power8-veccomplex" 7 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-vecfdiv" 25 + (and (eq_attr "type" "vecfdiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-vecdiv" 31 + (and (eq_attr "type" "vecdiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-mffgpr" 5 + (and (eq_attr "type" "mffgpr") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-mftgpr" 6 + (and (eq_attr "type" "mftgpr") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-crypto" 7 + (and (eq_attr "type" "crypto") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 78ec1b20913..f47967a48aa 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -166,6 +166,11 @@ (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 2, 3)"))) +;; Match op = 0..15 +(define_predicate "const_0_to_15_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 15)"))) + ;; Return 1 if op is a register that is not special. (define_predicate "gpc_reg_operand" (match_operand 0 "register_operand") @@ -182,9 +187,68 @@ if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op))) return 1; + if (TARGET_VSX && VSX_REGNO_P (REGNO (op))) + return 1; + return INT_REGNO_P (REGNO (op)) || FP_REGNO_P (REGNO (op)); }) +;; Return 1 if op is a general purpose register. Unlike gpc_reg_operand, don't +;; allow floating point or vector registers. +(define_predicate "int_reg_operand" + (match_operand 0 "register_operand") +{ + if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode)) + return 0; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 1; + + return INT_REGNO_P (REGNO (op)); +}) + +;; Like int_reg_operand, but only return true for base registers +(define_predicate "base_reg_operand" + (match_operand 0 "int_reg_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + return (REGNO (op) != FIRST_GPR_REGNO); +}) + +;; Return 1 if op is a general purpose register that is an even register +;; which suitable for a load/store quad operation +(define_predicate "quad_int_reg_operand" + (match_operand 0 "register_operand") +{ + HOST_WIDE_INT r; + + if (!TARGET_QUAD_MEMORY) + return 0; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + r = REGNO (op); + if (r >= FIRST_PSEUDO_REGISTER) + return 1; + + return (INT_REGNO_P (r) && ((r & 1) == 0)); +}) + ;; Return 1 if op is a register that is a condition register field. (define_predicate "cc_reg_operand" (match_operand 0 "register_operand") @@ -302,6 +366,11 @@ & (~ (unsigned HOST_WIDE_INT) 0xffffffff)) == 0)") (match_operand 0 "gpc_reg_operand"))) +;; Like reg_or_logical_cint_operand, but allow vsx registers +(define_predicate "vsx_reg_or_cint_operand" + (ior (match_operand 0 "vsx_register_operand") + (match_operand 0 "reg_or_logical_cint_operand"))) + ;; Return 1 if operand is a CONST_DOUBLE that can be set in a register ;; with no more than one instruction per word. (define_predicate "easy_fp_constant" @@ -458,9 +527,11 @@ (match_test "easy_altivec_constant (op, mode)"))) { HOST_WIDE_INT val; + int elt; if (mode == V2DImode || mode == V2DFmode) return 0; - val = const_vector_elt_as_int (op, GET_MODE_NUNITS (mode) - 1); + elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 : 0; + val = const_vector_elt_as_int (op, elt); val = ((val & 0xff) ^ 0x80) - 0x80; return EASY_VECTOR_15_ADD_SELF (val); }) @@ -472,9 +543,11 @@ (match_test "easy_altivec_constant (op, mode)"))) { HOST_WIDE_INT val; + int elt; if (mode == V2DImode || mode == V2DFmode) return 0; - val = const_vector_elt_as_int (op, GET_MODE_NUNITS (mode) - 1); + elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 : 0; + val = const_vector_elt_as_int (op, elt); return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode)); }) @@ -507,6 +580,54 @@ (and (match_operand 0 "memory_operand") (match_test "offsettable_nonstrict_memref_p (op)"))) +;; Return 1 if the operand is suitable for load/store quad memory. +(define_predicate "quad_memory_operand" + (match_code "mem") +{ + rtx addr, op0, op1; + int ret; + + if (!TARGET_QUAD_MEMORY) + ret = 0; + + else if (!memory_operand (op, mode)) + ret = 0; + + else if (GET_MODE_SIZE (GET_MODE (op)) != 16) + ret = 0; + + else if (MEM_ALIGN (op) < 128) + ret = 0; + + else + { + addr = XEXP (op, 0); + if (int_reg_operand (addr, Pmode)) + ret = 1; + + else if (GET_CODE (addr) != PLUS) + ret = 0; + + else + { + op0 = XEXP (addr, 0); + op1 = XEXP (addr, 1); + ret = (int_reg_operand (op0, Pmode) + && GET_CODE (op1) == CONST_INT + && IN_RANGE (INTVAL (op1), -32768, 32767) + && (INTVAL (op1) & 15) == 0); + } + } + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nquad_memory_operand, ret = %s\n", ret ? "true" : "false"); + debug_rtx (op); + } + + return ret; +}) + ;; Return 1 if the operand is an indexed or indirect memory operand. (define_predicate "indexed_or_indirect_operand" (match_code "mem") @@ -521,6 +642,19 @@ return indexed_or_indirect_address (op, mode); }) +;; Like indexed_or_indirect_operand, but also allow a GPR register if direct +;; moves are supported. +(define_predicate "reg_or_indexed_operand" + (match_code "mem,reg") +{ + if (MEM_P (op)) + return indexed_or_indirect_operand (op, mode); + else if (TARGET_DIRECT_MOVE) + return register_operand (op, mode); + return + 0; +}) + ;; Return 1 if the operand is an indexed or indirect memory operand with an ;; AND -16 in it, used to recognize when we need to switch to Altivec loads ;; to realign loops instead of VSX (altivec silently ignores the bottom bits, @@ -991,9 +1125,16 @@ GET_MODE (XEXP (op, 0))), 1")))) +;; Return 1 if OP is a valid comparison operator for "cbranch" instructions. +;; If we're assuming that FP operations cannot generate user-visible traps, +;; then on e500 we can use the ordered-signaling instructions to implement +;; the unordered-quiet FP comparison predicates modulo a reversal. (define_predicate "rs6000_cbranch_operator" (if_then_else (match_test "TARGET_HARD_FLOAT && !TARGET_FPRS") - (match_operand 0 "ordered_comparison_operator") + (if_then_else (match_test "flag_trapping_math") + (match_operand 0 "ordered_comparison_operator") + (ior (match_operand 0 "ordered_comparison_operator") + (match_code ("unlt,unle,ungt,unge")))) (match_operand 0 "comparison_operator"))) ;; Return 1 if OP is a comparison operation that is valid for an SCC insn -- diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index a545fe3e448..1a5a709751d 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -30,7 +30,7 @@ RS6000_BUILTIN_A -- ABS builtins RS6000_BUILTIN_D -- DST builtins RS6000_BUILTIN_E -- SPE EVSEL builtins. - RS6000_BUILTIN_P -- Altivec and VSX predicate builtins + RS6000_BUILTIN_P -- Altivec, VSX, ISA 2.07 vector predicate builtins RS6000_BUILTIN_Q -- Paired floating point VSX predicate builtins RS6000_BUILTIN_S -- SPE predicate builtins RS6000_BUILTIN_X -- special builtins @@ -301,6 +301,108 @@ | RS6000_BTC_SPECIAL), \ CODE_FOR_nothing) /* ICODE */ +/* ISA 2.07 (power8) vector convenience macros. */ +/* For the instructions that are encoded as altivec instructions use + __builtin_altivec_ as the builtin name. */ +#define BU_P8V_AV_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P8V_AV_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P8V_AV_P(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_P (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_PREDICATE), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* For the instructions encoded as VSX instructions use __builtin_vsx as the + builtin name. */ +#define BU_P8V_VSX_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P8V_OVERLOAD_1(ENUM, NAME) \ + RS6000_BUILTIN_1 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P8V_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +/* Crypto convenience macros. */ +#define BU_CRYPTO_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_CRYPTO_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_CRYPTO_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_CRYPTO_OVERLOAD_1(ENUM, NAME) \ + RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_CRYPTO_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_CRYPTO_OVERLOAD_3(ENUM, NAME) \ + RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ + /* SPE convenience macros. */ #define BU_SPE_1(ENUM, NAME, ATTR, ICODE) \ RS6000_BUILTIN_1 (SPE_BUILTIN_ ## ENUM, /* ENUM */ \ @@ -1012,7 +1114,7 @@ BU_VSX_1 (XVTSQRTSP_FG, "xvtsqrtsp_fg", CONST, vsx_tsqrtv4sf2_fg) BU_VSX_1 (XVRESP, "xvresp", CONST, vsx_frev4sf2) BU_VSX_1 (XSCVDPSP, "xscvdpsp", CONST, vsx_xscvdpsp) -BU_VSX_1 (XSCVSPDP, "xscvspdp", CONST, vsx_xscvdpsp) +BU_VSX_1 (XSCVSPDP, "xscvspdp", CONST, vsx_xscvspdp) BU_VSX_1 (XVCVDPSP, "xvcvdpsp", CONST, vsx_xvcvdpsp) BU_VSX_1 (XVCVSPDP, "xvcvspdp", CONST, vsx_xvcvspdp) BU_VSX_1 (XSTSQRTDP_FE, "xstsqrtdp_fe", CONST, vsx_tsqrtdf2_fe) @@ -1132,6 +1234,139 @@ BU_VSX_OVERLOAD_2 (XXSPLTW, "xxspltw") BU_VSX_OVERLOAD_X (LD, "ld") BU_VSX_OVERLOAD_X (ST, "st") +/* 1 argument VSX instructions added in ISA 2.07. */ +BU_P8V_VSX_1 (XSCVSPDPN, "xscvspdpn", CONST, vsx_xscvspdpn) +BU_P8V_VSX_1 (XSCVDPSPN, "xscvdpspn", CONST, vsx_xscvdpspn) + +/* 1 argument altivec instructions added in ISA 2.07. */ +BU_P8V_AV_1 (ABS_V2DI, "abs_v2di", CONST, absv2di2) +BU_P8V_AV_1 (VUPKHSW, "vupkhsw", CONST, altivec_vupkhsw) +BU_P8V_AV_1 (VUPKLSW, "vupklsw", CONST, altivec_vupklsw) +BU_P8V_AV_1 (VCLZB, "vclzb", CONST, clzv16qi2) +BU_P8V_AV_1 (VCLZH, "vclzh", CONST, clzv8hi2) +BU_P8V_AV_1 (VCLZW, "vclzw", CONST, clzv4si2) +BU_P8V_AV_1 (VCLZD, "vclzd", CONST, clzv2di2) +BU_P8V_AV_1 (VPOPCNTB, "vpopcntb", CONST, popcountv16qi2) +BU_P8V_AV_1 (VPOPCNTH, "vpopcnth", CONST, popcountv8hi2) +BU_P8V_AV_1 (VPOPCNTW, "vpopcntw", CONST, popcountv4si2) +BU_P8V_AV_1 (VPOPCNTD, "vpopcntd", CONST, popcountv2di2) +BU_P8V_AV_1 (VGBBD, "vgbbd", CONST, p8v_vgbbd) + +/* 2 argument altivec instructions added in ISA 2.07. */ +BU_P8V_AV_2 (VADDUDM, "vaddudm", CONST, addv2di3) +BU_P8V_AV_2 (VMINSD, "vminsd", CONST, sminv2di3) +BU_P8V_AV_2 (VMAXSD, "vmaxsd", CONST, smaxv2di3) +BU_P8V_AV_2 (VMINUD, "vminud", CONST, uminv2di3) +BU_P8V_AV_2 (VMAXUD, "vmaxud", CONST, umaxv2di3) +BU_P8V_AV_2 (VMRGEW, "vmrgew", CONST, p8_vmrgew) +BU_P8V_AV_2 (VMRGOW, "vmrgow", CONST, p8_vmrgow) +BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum) +BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss) +BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus) +BU_P8V_AV_2 (VPKSDUS, "vpksdus", CONST, altivec_vpkswus) +BU_P8V_AV_2 (VRLD, "vrld", CONST, vrotlv2di3) +BU_P8V_AV_2 (VSLD, "vsld", CONST, vashlv2di3) +BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3) +BU_P8V_AV_2 (VSRAD, "vsrad", CONST, vashrv2di3) +BU_P8V_AV_2 (VSUBUDM, "vsubudm", CONST, subv2di3) + +BU_P8V_AV_2 (EQV_V16QI, "eqv_v16qi", CONST, eqvv16qi3) +BU_P8V_AV_2 (EQV_V8HI, "eqv_v8hi", CONST, eqvv8hi3) +BU_P8V_AV_2 (EQV_V4SI, "eqv_v4si", CONST, eqvv4si3) +BU_P8V_AV_2 (EQV_V2DI, "eqv_v2di", CONST, eqvv2di3) +BU_P8V_AV_2 (EQV_V4SF, "eqv_v4sf", CONST, eqvv4sf3) +BU_P8V_AV_2 (EQV_V2DF, "eqv_v2df", CONST, eqvv2df3) + +BU_P8V_AV_2 (NAND_V16QI, "nand_v16qi", CONST, nandv16qi3) +BU_P8V_AV_2 (NAND_V8HI, "nand_v8hi", CONST, nandv8hi3) +BU_P8V_AV_2 (NAND_V4SI, "nand_v4si", CONST, nandv4si3) +BU_P8V_AV_2 (NAND_V2DI, "nand_v2di", CONST, nandv2di3) +BU_P8V_AV_2 (NAND_V4SF, "nand_v4sf", CONST, nandv4sf3) +BU_P8V_AV_2 (NAND_V2DF, "nand_v2df", CONST, nandv2df3) + +BU_P8V_AV_2 (ORC_V16QI, "orc_v16qi", CONST, orcv16qi3) +BU_P8V_AV_2 (ORC_V8HI, "orc_v8hi", CONST, orcv8hi3) +BU_P8V_AV_2 (ORC_V4SI, "orc_v4si", CONST, orcv4si3) +BU_P8V_AV_2 (ORC_V2DI, "orc_v2di", CONST, orcv2di3) +BU_P8V_AV_2 (ORC_V4SF, "orc_v4sf", CONST, orcv4sf3) +BU_P8V_AV_2 (ORC_V2DF, "orc_v2df", CONST, orcv2df3) + +/* Vector comparison instructions added in ISA 2.07. */ +BU_P8V_AV_2 (VCMPEQUD, "vcmpequd", CONST, vector_eqv2di) +BU_P8V_AV_2 (VCMPGTSD, "vcmpgtsd", CONST, vector_gtv2di) +BU_P8V_AV_2 (VCMPGTUD, "vcmpgtud", CONST, vector_gtuv2di) + +/* Vector comparison predicate instructions added in ISA 2.07. */ +BU_P8V_AV_P (VCMPEQUD_P, "vcmpequd_p", CONST, vector_eq_v2di_p) +BU_P8V_AV_P (VCMPGTSD_P, "vcmpgtsd_p", CONST, vector_gt_v2di_p) +BU_P8V_AV_P (VCMPGTUD_P, "vcmpgtud_p", CONST, vector_gtu_v2di_p) + +/* ISA 2.07 vector overloaded 1 argument functions. */ +BU_P8V_OVERLOAD_1 (VUPKHSW, "vupkhsw") +BU_P8V_OVERLOAD_1 (VUPKLSW, "vupklsw") +BU_P8V_OVERLOAD_1 (VCLZ, "vclz") +BU_P8V_OVERLOAD_1 (VCLZB, "vclzb") +BU_P8V_OVERLOAD_1 (VCLZH, "vclzh") +BU_P8V_OVERLOAD_1 (VCLZW, "vclzw") +BU_P8V_OVERLOAD_1 (VCLZD, "vclzd") +BU_P8V_OVERLOAD_1 (VPOPCNT, "vpopcnt") +BU_P8V_OVERLOAD_1 (VPOPCNTB, "vpopcntb") +BU_P8V_OVERLOAD_1 (VPOPCNTH, "vpopcnth") +BU_P8V_OVERLOAD_1 (VPOPCNTW, "vpopcntw") +BU_P8V_OVERLOAD_1 (VPOPCNTD, "vpopcntd") +BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd") + +/* ISA 2.07 vector overloaded 2 argument functions. */ +BU_P8V_OVERLOAD_2 (EQV, "eqv") +BU_P8V_OVERLOAD_2 (NAND, "nand") +BU_P8V_OVERLOAD_2 (ORC, "orc") +BU_P8V_OVERLOAD_2 (VADDUDM, "vaddudm") +BU_P8V_OVERLOAD_2 (VMAXSD, "vmaxsd") +BU_P8V_OVERLOAD_2 (VMAXUD, "vmaxud") +BU_P8V_OVERLOAD_2 (VMINSD, "vminsd") +BU_P8V_OVERLOAD_2 (VMINUD, "vminud") +BU_P8V_OVERLOAD_2 (VMRGEW, "vmrgew") +BU_P8V_OVERLOAD_2 (VMRGOW, "vmrgow") +BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss") +BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus") +BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum") +BU_P8V_OVERLOAD_2 (VPKUDUS, "vpkudus") +BU_P8V_OVERLOAD_2 (VRLD, "vrld") +BU_P8V_OVERLOAD_2 (VSLD, "vsld") +BU_P8V_OVERLOAD_2 (VSRAD, "vsrad") +BU_P8V_OVERLOAD_2 (VSRD, "vsrd") +BU_P8V_OVERLOAD_2 (VSUBUDM, "vsubudm") + + +/* 1 argument crypto functions. */ +BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox) + +/* 2 argument crypto functions. */ +BU_CRYPTO_2 (VCIPHER, "vcipher", CONST, crypto_vcipher) +BU_CRYPTO_2 (VCIPHERLAST, "vcipherlast", CONST, crypto_vcipherlast) +BU_CRYPTO_2 (VNCIPHER, "vncipher", CONST, crypto_vncipher) +BU_CRYPTO_2 (VNCIPHERLAST, "vncipherlast", CONST, crypto_vncipherlast) +BU_CRYPTO_2 (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb) +BU_CRYPTO_2 (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh) +BU_CRYPTO_2 (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw) +BU_CRYPTO_2 (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd) + +/* 3 argument crypto functions. */ +BU_CRYPTO_3 (VPERMXOR_V2DI, "vpermxor_v2di", CONST, crypto_vpermxor_v2di) +BU_CRYPTO_3 (VPERMXOR_V4SI, "vpermxor_v4si", CONST, crypto_vpermxor_v4si) +BU_CRYPTO_3 (VPERMXOR_V8HI, "vpermxor_v8hi", CONST, crypto_vpermxor_v8hi) +BU_CRYPTO_3 (VPERMXOR_V16QI, "vpermxor_v16qi", CONST, crypto_vpermxor_v16qi) +BU_CRYPTO_3 (VSHASIGMAW, "vshasigmaw", CONST, crypto_vshasigmaw) +BU_CRYPTO_3 (VSHASIGMAD, "vshasigmad", CONST, crypto_vshasigmad) + +/* 2 argument crypto overloaded functions. */ +BU_CRYPTO_OVERLOAD_2 (VPMSUM, "vpmsum") + +/* 3 argument crypto overloaded functions. */ +BU_CRYPTO_OVERLOAD_3 (VPERMXOR, "vpermxor") +BU_CRYPTO_OVERLOAD_3 (VSHASIGMA, "vshasigma") + + /* 3 argument paired floating point builtins. */ BU_PAIRED_3 (MSUB, "msub", FP, fmsv2sf4) BU_PAIRED_3 (MADD, "madd", FP, fmav2sf4) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index a4f66ba8f1b..593b772ebd1 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -315,6 +315,8 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6X"); if ((flags & OPTION_MASK_POPCNTD) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7"); + if ((flags & OPTION_MASK_DIRECT_MOVE) != 0) + rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR8"); if ((flags & OPTION_MASK_SOFT_FLOAT) != 0) rs6000_define_or_undefine_macro (define_p, "_SOFT_FLOAT"); if ((flags & OPTION_MASK_RECIP_PRECISION) != 0) @@ -331,6 +333,10 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, } if ((flags & OPTION_MASK_VSX) != 0) rs6000_define_or_undefine_macro (define_p, "__VSX__"); + if ((flags & OPTION_MASK_P8_VECTOR) != 0) + rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__"); + if ((flags & OPTION_MASK_CRYPTO) != 0) + rs6000_define_or_undefine_macro (define_p, "__CRYPTO__"); /* options from the builtin masks. */ if ((bu_mask & RS6000_BTM_SPE) != 0) @@ -505,6 +511,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ABS, P8V_BUILTIN_ABS_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_ABS, VSX_BUILTIN_XVABSDP, @@ -577,12 +585,24 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSH, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX, RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH, RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX, @@ -601,6 +621,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSH, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX, @@ -651,6 +675,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP, @@ -937,6 +973,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD, + RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP, @@ -975,6 +1015,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSW, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTUD, + RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTSD, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP, @@ -1021,6 +1065,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSW, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTUD, + RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTSD, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_CMPLT, VSX_BUILTIN_XVCMPGTDP, @@ -1418,6 +1466,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_MAX, VSX_BUILTIN_XVMAXDP, @@ -1604,6 +1664,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_MIN, VSX_BUILTIN_XVMINDP, @@ -1786,6 +1858,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM, RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM, @@ -1812,6 +1890,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKUWUS, ALTIVEC_BUILTIN_VPKUWUS, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKUDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKSDSS, + RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKSHSS, ALTIVEC_BUILTIN_VPKSHSS, RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKUHUS, ALTIVEC_BUILTIN_VPKUHUS, @@ -1824,6 +1906,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKSWUS, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS, @@ -1844,6 +1928,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLW, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW, @@ -1868,6 +1956,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP, @@ -2032,6 +2124,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRW, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW, @@ -2056,6 +2152,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAW, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRAD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW, @@ -2196,6 +2296,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP, @@ -3327,6 +3439,20 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI }, { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P, @@ -3372,11 +3498,455 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + /* Power8 vector overloaded functions. */ + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VPKSDSS, P8V_BUILTIN_VPKSDSS, + RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VPKUDUS, P8V_BUILTIN_VPKUDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VPKSDUS, P8V_BUILTIN_VPKSDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, + RS6000_BTI_V16QI, 0, 0, 0 }, + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, + RS6000_BTI_unsigned_V16QI, 0, 0, 0 }, + + /* Crypto builtins. */ + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI }, + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { (enum rs6000_builtins) 0, (enum rs6000_builtins) 0, 0, 0, 0, 0 } }; @@ -3650,11 +4220,20 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, TREE_USED (decl) = 1; TREE_TYPE (decl) = arg1_type; TREE_READONLY (decl) = TYPE_READONLY (arg1_type); - DECL_INITIAL (decl) = arg1; - stmt = build1 (DECL_EXPR, arg1_type, decl); - TREE_ADDRESSABLE (decl) = 1; - SET_EXPR_LOCATION (stmt, loc); - stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); + if (c_dialect_cxx ()) + { + stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1, + NULL_TREE, NULL_TREE); + SET_EXPR_LOCATION (stmt, loc); + } + else + { + DECL_INITIAL (decl) = arg1; + stmt = build1 (DECL_EXPR, arg1_type, decl); + TREE_ADDRESSABLE (decl) = 1; + SET_EXPR_LOCATION (stmt, loc); + stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); + } innerptrtype = build_pointer_type (arg1_inner_type); @@ -3729,11 +4308,20 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, TREE_USED (decl) = 1; TREE_TYPE (decl) = arg1_type; TREE_READONLY (decl) = TYPE_READONLY (arg1_type); - DECL_INITIAL (decl) = arg1; - stmt = build1 (DECL_EXPR, arg1_type, decl); - TREE_ADDRESSABLE (decl) = 1; - SET_EXPR_LOCATION (stmt, loc); - stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); + if (c_dialect_cxx ()) + { + stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1, + NULL_TREE, NULL_TREE); + SET_EXPR_LOCATION (stmt, loc); + } + else + { + DECL_INITIAL (decl) = arg1; + stmt = build1 (DECL_EXPR, arg1_type, decl); + TREE_ADDRESSABLE (decl) = 1; + SET_EXPR_LOCATION (stmt, loc); + stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); + } innerptrtype = build_pointer_type (arg1_inner_type); @@ -3824,7 +4412,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, && (desc->op2 == RS6000_BTI_NOT_OPAQUE || rs6000_builtin_type_compatible (types[1], desc->op2)) && (desc->op3 == RS6000_BTI_NOT_OPAQUE - || rs6000_builtin_type_compatible (types[2], desc->op3))) + || rs6000_builtin_type_compatible (types[2], desc->op3)) + && rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE) return altivec_build_resolved_builtin (args, n, desc); bad: diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 0564018b3f0..08346b61d17 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -28,7 +28,7 @@ ALTIVEC, since in general it isn't a win on power6. In ISA 2.04, fsel, fre, fsqrt, etc. were no longer documented as optional. Group masks by server and embedded. */ -#define ISA_2_5_MASKS_EMBEDDED (ISA_2_2_MASKS \ +#define ISA_2_5_MASKS_EMBEDDED (ISA_2_4_MASKS \ | OPTION_MASK_CMPB \ | OPTION_MASK_RECIP_PRECISION \ | OPTION_MASK_PPC_GFXOPT \ @@ -45,6 +45,14 @@ | OPTION_MASK_VSX \ | OPTION_MASK_VSX_TIMODE) +/* For now, don't provide an embedded version of ISA 2.07. */ +#define ISA_2_7_MASKS_SERVER (ISA_2_6_MASKS_SERVER \ + | OPTION_MASK_P8_FUSION \ + | OPTION_MASK_P8_VECTOR \ + | OPTION_MASK_CRYPTO \ + | OPTION_MASK_DIRECT_MOVE \ + | OPTION_MASK_QUAD_MEMORY) + #define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC) /* Deal with ports that do not have -mstrict-align. */ @@ -61,7 +69,9 @@ /* Mask of all options to set the default isa flags based on -mcpu=<xxx>. */ #define POWERPC_MASKS (OPTION_MASK_ALTIVEC \ | OPTION_MASK_CMPB \ + | OPTION_MASK_CRYPTO \ | OPTION_MASK_DFP \ + | OPTION_MASK_DIRECT_MOVE \ | OPTION_MASK_DLMZB \ | OPTION_MASK_FPRND \ | OPTION_MASK_ISEL \ @@ -69,11 +79,14 @@ | OPTION_MASK_MFPGPR \ | OPTION_MASK_MULHW \ | OPTION_MASK_NO_UPDATE \ + | OPTION_MASK_P8_FUSION \ + | OPTION_MASK_P8_VECTOR \ | OPTION_MASK_POPCNTB \ | OPTION_MASK_POPCNTD \ | OPTION_MASK_POWERPC64 \ | OPTION_MASK_PPC_GFXOPT \ | OPTION_MASK_PPC_GPOPT \ + | OPTION_MASK_QUAD_MEMORY \ | OPTION_MASK_RECIP_PRECISION \ | OPTION_MASK_SOFT_FLOAT \ | OPTION_MASK_STRICT_ALIGN_OPTIONAL \ @@ -168,10 +181,7 @@ RS6000_CPU ("power7", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */ POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD | MASK_VSX | MASK_RECIP_PRECISION | MASK_VSX_TIMODE) -RS6000_CPU ("power8", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */ - POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF - | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD - | MASK_VSX | MASK_RECIP_PRECISION | MASK_VSX_TIMODE) +RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER) RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0) RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64) RS6000_CPU ("rs64", PROCESSOR_RS64A, MASK_PPC_GFXOPT | MASK_POWERPC64) diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h index fc843fd19ca..d528a4fd87a 100644 --- a/gcc/config/rs6000/rs6000-opts.h +++ b/gcc/config/rs6000/rs6000-opts.h @@ -30,21 +30,22 @@ /* Processor type. Order must match cpu attribute in MD file. */ enum processor_type { - PROCESSOR_RS64A, - PROCESSOR_MPCCORE, - PROCESSOR_PPC403, - PROCESSOR_PPC405, - PROCESSOR_PPC440, - PROCESSOR_PPC476, PROCESSOR_PPC601, PROCESSOR_PPC603, PROCESSOR_PPC604, PROCESSOR_PPC604e, PROCESSOR_PPC620, PROCESSOR_PPC630, + PROCESSOR_PPC750, PROCESSOR_PPC7400, PROCESSOR_PPC7450, + + PROCESSOR_PPC403, + PROCESSOR_PPC405, + PROCESSOR_PPC440, + PROCESSOR_PPC476, + PROCESSOR_PPC8540, PROCESSOR_PPC8548, PROCESSOR_PPCE300C2, @@ -53,15 +54,21 @@ enum processor_type PROCESSOR_PPCE500MC64, PROCESSOR_PPCE5500, PROCESSOR_PPCE6500, + PROCESSOR_POWER4, PROCESSOR_POWER5, PROCESSOR_POWER6, PROCESSOR_POWER7, + PROCESSOR_POWER8, + + PROCESSOR_RS64A, + PROCESSOR_MPCCORE, PROCESSOR_CELL, PROCESSOR_PPCA2, PROCESSOR_TITAN }; + /* FP processor type. */ enum fpu_type_t { @@ -131,11 +138,14 @@ enum rs6000_cmodel { CMODEL_LARGE }; -/* Describe which vector unit to use for a given machine mode. */ +/* Describe which vector unit to use for a given machine mode. The + VECTOR_MEM_* and VECTOR_UNIT_* macros assume that Altivec, VSX, and + P8_VECTOR are contiguous. */ enum rs6000_vector { VECTOR_NONE, /* Type is not a vector or not supported */ VECTOR_ALTIVEC, /* Use altivec for vector processing */ VECTOR_VSX, /* Use VSX for vector processing */ + VECTOR_P8_VECTOR, /* Use ISA 2.07 VSX for vector processing */ VECTOR_PAIRED, /* Use paired floating point for vectors */ VECTOR_SPE, /* Use SPE for vector processing */ VECTOR_OTHER /* Some other vector unit */ diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index d9bcf1a41ed..25bad1bfb68 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -50,6 +50,7 @@ extern rtx rs6000_got_register (rtx); extern rtx find_addr_reg (rtx); extern rtx gen_easy_altivec_constant (rtx); extern const char *output_vec_const_move (rtx *); +extern const char *rs6000_output_move_128bit (rtx *); extern void rs6000_expand_vector_init (rtx, rtx); extern void paired_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); @@ -70,6 +71,8 @@ extern int insvdi_rshift_rlwimi_p (rtx, rtx, rtx); extern int registers_ok_for_quad_peep (rtx, rtx); extern int mems_ok_for_quad_peep (rtx, rtx); extern bool gpr_or_gpr_p (rtx, rtx); +extern bool direct_move_p (rtx, rtx); +extern bool quad_load_store_p (rtx, rtx); extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class); extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, @@ -138,6 +141,7 @@ extern int rs6000_loop_align (rtx); #endif /* RTX_CODE */ #ifdef TREE_CODE +extern unsigned int rs6000_data_alignment (tree, unsigned int, enum data_align); extern unsigned int rs6000_special_round_type_align (tree, unsigned int, unsigned int); extern unsigned int darwin_rs6000_special_round_type_align (tree, unsigned int, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index e82b24e22ce..2331c5029c2 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -292,6 +292,39 @@ typedef rtx (*gen_2arg_fn_t) (rtx, rtx, rtx); don't link in rs6000-c.c, so we can't call it directly. */ void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT); +/* Simplfy register classes into simpler classifications. We assume + GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range + check for standard register classes (gpr/floating/altivec/vsx) and + floating/vector classes (float/altivec/vsx). */ + +enum rs6000_reg_type { + NO_REG_TYPE, + PSEUDO_REG_TYPE, + GPR_REG_TYPE, + VSX_REG_TYPE, + ALTIVEC_REG_TYPE, + FPR_REG_TYPE, + SPR_REG_TYPE, + CR_REG_TYPE, + SPE_ACC_TYPE, + SPEFSCR_REG_TYPE +}; + +/* Map register class to register type. */ +static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES]; + +/* First/last register type for the 'normal' register types (i.e. general + purpose, floating point, altivec, and VSX registers). */ +#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE) + +#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE) + +/* Direct moves to/from vsx/gpr registers that need an additional register to + do the move. */ +static enum insn_code reload_fpr_gpr[NUM_MACHINE_MODES]; +static enum insn_code reload_gpr_vsx[NUM_MACHINE_MODES]; +static enum insn_code reload_vsx_gpr[NUM_MACHINE_MODES]; + /* Target cpu costs. */ @@ -831,6 +864,25 @@ struct processor_costs power7_cost = { 12, /* prefetch streams */ }; +/* Instruction costs on POWER8 processors. */ +static const +struct processor_costs power8_cost = { + COSTS_N_INSNS (3), /* mulsi */ + COSTS_N_INSNS (3), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (3), /* muldi */ + COSTS_N_INSNS (19), /* divsi */ + COSTS_N_INSNS (35), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (14), /* sdiv */ + COSTS_N_INSNS (17), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 256, /* l2 cache */ + 12, /* prefetch streams */ +}; + /* Instruction costs on POWER A2 processors. */ static const struct processor_costs ppca2_cost = { @@ -1023,6 +1075,13 @@ static void rs6000_print_isa_options (FILE *, int, const char *, static void rs6000_print_builtin_options (FILE *, int, const char *, HOST_WIDE_INT); +static enum rs6000_reg_type register_to_reg_type (rtx, bool *); +static bool rs6000_secondary_reload_move (enum rs6000_reg_type, + enum rs6000_reg_type, + enum machine_mode, + secondary_reload_info *, + bool); + /* Hash table stuff for keeping track of TOC entries. */ struct GTY(()) toc_hash_struct @@ -1547,6 +1606,15 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode) { int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1; + /* PTImode can only go in GPRs. Quad word memory operations require even/odd + register combinations, and use PTImode where we need to deal with quad + word memory operations. Don't allow quad words in the argument or frame + pointer registers, just registers 0..31. */ + if (mode == PTImode) + return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) + && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) + && ((regno & 1) == 0)); + /* VSX registers that overlap the FPR registers are larger than for non-VSX implementations. Don't allow an item to be split between a FP register and an Altivec register. */ @@ -1559,8 +1627,7 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode) return ALTIVEC_REGNO_P (last_regno); } - /* Allow TImode in all VSX registers if the user asked for it. Note, PTImode - can only go in GPRs. */ + /* Allow TImode in all VSX registers if the user asked for it. */ if (mode == TImode && TARGET_VSX_TIMODE && VSX_REGNO_P (regno)) return 1; @@ -1678,6 +1745,16 @@ rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name) comma = ""; } + len += fprintf (stderr, "%sreg-class = %s", comma, + reg_class_names[(int)rs6000_regno_regclass[r]]); + comma = ", "; + + if (len > 70) + { + fprintf (stderr, ",\n\t"); + comma = ""; + } + fprintf (stderr, "%sregno = %d\n", comma, r); } } @@ -1710,6 +1787,7 @@ rs6000_debug_reg_global (void) "none", "altivec", "vsx", + "p8_vector", "paired", "spe", "other" @@ -1802,8 +1880,11 @@ rs6000_debug_reg_global (void) "wf reg_class = %s\n" "wg reg_class = %s\n" "wl reg_class = %s\n" + "wm reg_class = %s\n" + "wr reg_class = %s\n" "ws reg_class = %s\n" "wt reg_class = %s\n" + "wv reg_class = %s\n" "wx reg_class = %s\n" "wz reg_class = %s\n" "\n", @@ -1815,8 +1896,11 @@ rs6000_debug_reg_global (void) reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]); @@ -2050,6 +2134,10 @@ rs6000_debug_reg_global (void) if (targetm.lra_p ()) fprintf (stderr, DEBUG_FMT_S, "lra", "true"); + if (TARGET_P8_FUSION) + fprintf (stderr, DEBUG_FMT_S, "p8 fusion", + (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero"); + fprintf (stderr, DEBUG_FMT_S, "plt-format", TARGET_SECURE_PLT ? "secure" : "bss"); fprintf (stderr, DEBUG_FMT_S, "struct-return", @@ -2105,6 +2193,36 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS; rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS; + /* Precalculate register class to simpler reload register class. We don't + need all of the register classes that are combinations of different + classes, just the simple ones that have constraint letters. */ + for (c = 0; c < N_REG_CLASSES; c++) + reg_class_to_reg_type[c] = NO_REG_TYPE; + + reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE; + reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE; + reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE; + reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE; + reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE; + reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE; + reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE; + + if (TARGET_VSX) + { + reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE; + reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE; + } + else + { + reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE; + reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE; + } + /* Precalculate vector information, this must be set up before the rs6000_hard_regno_nregs_internal below. */ for (m = 0; m < NUM_MACHINE_MODES; ++m) @@ -2177,12 +2295,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) } } - /* V2DImode, only allow under VSX, which can do V2DI insert/splat/extract. - Altivec doesn't have 64-bit support. */ + /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to + do insert/splat/extract. Altivec doesn't have 64-bit integer support. */ if (TARGET_VSX) { rs6000_vector_mem[V2DImode] = VECTOR_VSX; - rs6000_vector_unit[V2DImode] = VECTOR_NONE; + rs6000_vector_unit[V2DImode] + = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE; rs6000_vector_align[V2DImode] = align64; } @@ -2240,13 +2359,30 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) if (TARGET_LFIWAX) rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; + if (TARGET_DIRECT_MOVE) + rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS; + + if (TARGET_POWERPC64) + rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS; + + if (TARGET_P8_VECTOR) + rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; + if (TARGET_STFIWX) rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; if (TARGET_LFIWZX) rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; - /* Set up the reload helper functions. */ + /* Setup the direct move combinations. */ + for (m = 0; m < NUM_MACHINE_MODES; ++m) + { + reload_fpr_gpr[m] = CODE_FOR_nothing; + reload_gpr_vsx[m] = CODE_FOR_nothing; + reload_vsx_gpr[m] = CODE_FOR_nothing; + } + + /* Set up the reload helper and direct move functions. */ if (TARGET_VSX || TARGET_ALTIVEC) { if (TARGET_64BIT) @@ -2270,11 +2406,47 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_reload[DDmode][0] = CODE_FOR_reload_dd_di_store; rs6000_vector_reload[DDmode][1] = CODE_FOR_reload_dd_di_load; } + if (TARGET_P8_VECTOR) + { + rs6000_vector_reload[SFmode][0] = CODE_FOR_reload_sf_di_store; + rs6000_vector_reload[SFmode][1] = CODE_FOR_reload_sf_di_load; + rs6000_vector_reload[SDmode][0] = CODE_FOR_reload_sd_di_store; + rs6000_vector_reload[SDmode][1] = CODE_FOR_reload_sd_di_load; + } if (TARGET_VSX_TIMODE) { rs6000_vector_reload[TImode][0] = CODE_FOR_reload_ti_di_store; rs6000_vector_reload[TImode][1] = CODE_FOR_reload_ti_di_load; } + if (TARGET_DIRECT_MOVE) + { + if (TARGET_POWERPC64) + { + reload_gpr_vsx[TImode] = CODE_FOR_reload_gpr_from_vsxti; + reload_gpr_vsx[V2DFmode] = CODE_FOR_reload_gpr_from_vsxv2df; + reload_gpr_vsx[V2DImode] = CODE_FOR_reload_gpr_from_vsxv2di; + reload_gpr_vsx[V4SFmode] = CODE_FOR_reload_gpr_from_vsxv4sf; + reload_gpr_vsx[V4SImode] = CODE_FOR_reload_gpr_from_vsxv4si; + reload_gpr_vsx[V8HImode] = CODE_FOR_reload_gpr_from_vsxv8hi; + reload_gpr_vsx[V16QImode] = CODE_FOR_reload_gpr_from_vsxv16qi; + reload_gpr_vsx[SFmode] = CODE_FOR_reload_gpr_from_vsxsf; + + reload_vsx_gpr[TImode] = CODE_FOR_reload_vsx_from_gprti; + reload_vsx_gpr[V2DFmode] = CODE_FOR_reload_vsx_from_gprv2df; + reload_vsx_gpr[V2DImode] = CODE_FOR_reload_vsx_from_gprv2di; + reload_vsx_gpr[V4SFmode] = CODE_FOR_reload_vsx_from_gprv4sf; + reload_vsx_gpr[V4SImode] = CODE_FOR_reload_vsx_from_gprv4si; + reload_vsx_gpr[V8HImode] = CODE_FOR_reload_vsx_from_gprv8hi; + reload_vsx_gpr[V16QImode] = CODE_FOR_reload_vsx_from_gprv16qi; + reload_vsx_gpr[SFmode] = CODE_FOR_reload_vsx_from_gprsf; + } + else + { + reload_fpr_gpr[DImode] = CODE_FOR_reload_fpr_from_gprdi; + reload_fpr_gpr[DDmode] = CODE_FOR_reload_fpr_from_gprdd; + reload_fpr_gpr[DFmode] = CODE_FOR_reload_fpr_from_gprdf; + } + } } else { @@ -2297,6 +2469,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_reload[DDmode][0] = CODE_FOR_reload_dd_si_store; rs6000_vector_reload[DDmode][1] = CODE_FOR_reload_dd_si_load; } + if (TARGET_P8_VECTOR) + { + rs6000_vector_reload[SFmode][0] = CODE_FOR_reload_sf_si_store; + rs6000_vector_reload[SFmode][1] = CODE_FOR_reload_sf_si_load; + rs6000_vector_reload[SDmode][0] = CODE_FOR_reload_sd_si_store; + rs6000_vector_reload[SDmode][1] = CODE_FOR_reload_sd_si_load; + } if (TARGET_VSX_TIMODE) { rs6000_vector_reload[TImode][0] = CODE_FOR_reload_ti_si_store; @@ -2520,16 +2699,18 @@ darwin_rs6000_override_options (void) HOST_WIDE_INT rs6000_builtin_mask_calculate (void) { - return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0) - | ((TARGET_VSX) ? RS6000_BTM_VSX : 0) - | ((TARGET_SPE) ? RS6000_BTM_SPE : 0) - | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0) - | ((TARGET_FRE) ? RS6000_BTM_FRE : 0) - | ((TARGET_FRES) ? RS6000_BTM_FRES : 0) - | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0) - | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0) - | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0) - | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)); + return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0) + | ((TARGET_VSX) ? RS6000_BTM_VSX : 0) + | ((TARGET_SPE) ? RS6000_BTM_SPE : 0) + | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0) + | ((TARGET_FRE) ? RS6000_BTM_FRE : 0) + | ((TARGET_FRES) ? RS6000_BTM_FRES : 0) + | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0) + | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0) + | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0) + | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0) + | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0) + | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)); } /* Override command line options. Mostly we process the processor type and @@ -2803,7 +2984,9 @@ rs6000_option_override_internal (bool global_init_p) /* For the newer switches (vsx, dfp, etc.) set some of the older options, unless the user explicitly used the -mno-<option> to disable the code. */ - if (TARGET_VSX) + if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO) + rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit); + else if (TARGET_VSX) rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit); else if (TARGET_POPCNTD) rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit); @@ -2818,6 +3001,34 @@ rs6000_option_override_internal (bool global_init_p) else if (TARGET_ALTIVEC) rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit); + if (TARGET_CRYPTO && !TARGET_ALTIVEC) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO) + error ("-mcrypto requires -maltivec"); + rs6000_isa_flags &= ~OPTION_MASK_CRYPTO; + } + + if (TARGET_DIRECT_MOVE && !TARGET_VSX) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) + error ("-mdirect-move requires -mvsx"); + rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE; + } + + if (TARGET_P8_VECTOR && !TARGET_ALTIVEC) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) + error ("-mpower8-vector requires -maltivec"); + rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR; + } + + if (TARGET_P8_VECTOR && !TARGET_VSX) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) + error ("-mpower8-vector requires -mvsx"); + rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR; + } + if (TARGET_VSX_TIMODE && !TARGET_VSX) { if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) @@ -2825,6 +3036,16 @@ rs6000_option_override_internal (bool global_init_p) rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE; } + /* The quad memory instructions only works in 64-bit mode. In 32-bit mode, + silently turn off quad memory mode. */ + if (TARGET_QUAD_MEMORY && !TARGET_POWERPC64) + { + if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0) + warning (0, N_("-mquad-memory requires 64-bit mode")); + + rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY; + } + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags); @@ -2951,7 +3172,8 @@ rs6000_option_override_internal (bool global_init_p) /* Place FP constants in the constant pool instead of TOC if section anchors enabled. */ - if (flag_section_anchors) + if (flag_section_anchors + && !global_options_set.x_TARGET_NO_FP_IN_TOC) TARGET_NO_FP_IN_TOC = 1; if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) @@ -3019,16 +3241,19 @@ rs6000_option_override_internal (bool global_init_p) && rs6000_cpu != PROCESSOR_POWER5 && rs6000_cpu != PROCESSOR_POWER6 && rs6000_cpu != PROCESSOR_POWER7 + && rs6000_cpu != PROCESSOR_POWER8 && rs6000_cpu != PROCESSOR_PPCA2 && rs6000_cpu != PROCESSOR_CELL && rs6000_cpu != PROCESSOR_PPC476); rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5 - || rs6000_cpu == PROCESSOR_POWER7); + || rs6000_cpu == PROCESSOR_POWER7 + || rs6000_cpu == PROCESSOR_POWER8); rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5 || rs6000_cpu == PROCESSOR_POWER6 || rs6000_cpu == PROCESSOR_POWER7 + || rs6000_cpu == PROCESSOR_POWER8 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64 || rs6000_cpu == PROCESSOR_PPCE5500 @@ -3272,6 +3497,10 @@ rs6000_option_override_internal (bool global_init_p) rs6000_cost = &power7_cost; break; + case PROCESSOR_POWER8: + rs6000_cost = &power8_cost; + break; + case PROCESSOR_PPCA2: rs6000_cost = &ppca2_cost; break; @@ -3444,7 +3673,8 @@ rs6000_loop_align (rtx label) && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5 || rs6000_cpu == PROCESSOR_POWER6 - || rs6000_cpu == PROCESSOR_POWER7)) + || rs6000_cpu == PROCESSOR_POWER7 + || rs6000_cpu == PROCESSOR_POWER8)) return 5; else return align_loops_log; @@ -3983,6 +4213,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out, enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); switch (fn) { + case BUILT_IN_CLZIMAX: + case BUILT_IN_CLZLL: + case BUILT_IN_CLZL: + case BUILT_IN_CLZ: + if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n) + { + if (out_mode == QImode && out_n == 16) + return rs6000_builtin_decls[P8V_BUILTIN_VCLZB]; + else if (out_mode == HImode && out_n == 8) + return rs6000_builtin_decls[P8V_BUILTIN_VCLZH]; + else if (out_mode == SImode && out_n == 4) + return rs6000_builtin_decls[P8V_BUILTIN_VCLZW]; + else if (out_mode == DImode && out_n == 2) + return rs6000_builtin_decls[P8V_BUILTIN_VCLZD]; + } + break; case BUILT_IN_COPYSIGN: if (VECTOR_UNIT_VSX_P (V2DFmode) && out_mode == DFmode && out_n == 2 @@ -3998,6 +4244,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out, if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)) return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF]; break; + case BUILT_IN_POPCOUNTIMAX: + case BUILT_IN_POPCOUNTLL: + case BUILT_IN_POPCOUNTL: + case BUILT_IN_POPCOUNT: + if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n) + { + if (out_mode == QImode && out_n == 16) + return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB]; + else if (out_mode == HImode && out_n == 8) + return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH]; + else if (out_mode == SImode && out_n == 4) + return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW]; + else if (out_mode == DImode && out_n == 2) + return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD]; + } + break; case BUILT_IN_SQRT: if (VECTOR_UNIT_VSX_P (V2DFmode) && out_mode == DFmode && out_n == 2 @@ -4395,7 +4657,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies) bitsize = GET_MODE_BITSIZE (inner); mask = GET_MODE_MASK (inner); - val = const_vector_elt_as_int (op, nunits - 1); + val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0); splat_val = val; msb_val = val > 0 ? 0 : -1; @@ -4435,7 +4697,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies) for (i = 0; i < nunits - 1; ++i) { HOST_WIDE_INT desired_val; - if (((i + 1) & (step - 1)) == 0) + if (((BYTES_BIG_ENDIAN ? i + 1 : i) & (step - 1)) == 0) desired_val = val; else desired_val = msb_val; @@ -4520,13 +4782,13 @@ gen_easy_altivec_constant (rtx op) { enum machine_mode mode = GET_MODE (op); int nunits = GET_MODE_NUNITS (mode); - rtx last = CONST_VECTOR_ELT (op, nunits - 1); + rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0); unsigned step = nunits / 4; unsigned copies = 1; /* Start with a vspltisw. */ if (vspltis_constant (op, step, copies)) - return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, last)); + return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val)); /* Then try with a vspltish. */ if (step == 1) @@ -4535,7 +4797,7 @@ gen_easy_altivec_constant (rtx op) step >>= 1; if (vspltis_constant (op, step, copies)) - return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, last)); + return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val)); /* And finally a vspltisb. */ if (step == 1) @@ -4544,7 +4806,7 @@ gen_easy_altivec_constant (rtx op) step >>= 1; if (vspltis_constant (op, step, copies)) - return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, last)); + return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val)); gcc_unreachable (); } @@ -4856,8 +5118,11 @@ rs6000_expand_vector_init (rtx target, rtx vals) { rtx freg = gen_reg_rtx (V4SFmode); rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0)); + rtx cvt = ((TARGET_XSCVDPSPN) + ? gen_vsx_xscvdpspn_scalar (freg, sreg) + : gen_vsx_xscvdpsp_scalar (freg, sreg)); - emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg)); + emit_insn (cvt); emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx)); } else @@ -5119,6 +5384,48 @@ invalid_e500_subreg (rtx op, enum machine_mode mode) return false; } +/* Return alignment of TYPE. Existing alignment is ALIGN. HOW + selects whether the alignment is abi mandated, optional, or + both abi and optional alignment. */ + +unsigned int +rs6000_data_alignment (tree type, unsigned int align, enum data_align how) +{ + if (how != align_opt) + { + if (TREE_CODE (type) == VECTOR_TYPE) + { + if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type))) + || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type)))) + { + if (align < 64) + align = 64; + } + else if (align < 128) + align = 128; + } + else if (TARGET_E500_DOUBLE + && TREE_CODE (type) == REAL_TYPE + && TYPE_MODE (type) == DFmode) + { + if (align < 64) + align = 64; + } + } + + if (how != align_abi) + { + if (TREE_CODE (type) == ARRAY_TYPE + && TYPE_MODE (TREE_TYPE (type)) == QImode) + { + if (align < BITS_PER_WORD) + align = BITS_PER_WORD; + } + } + + return align; +} + /* AIX increases natural record alignment to doubleword if the first field is an FP double while the FP fields remain word aligned. */ @@ -5240,6 +5547,72 @@ gpr_or_gpr_p (rtx op0, rtx op1) || (REG_P (op1) && INT_REGNO_P (REGNO (op1)))); } +/* Return true if this is a move direct operation between GPR registers and + floating point/VSX registers. */ + +bool +direct_move_p (rtx op0, rtx op1) +{ + int regno0, regno1; + + if (!REG_P (op0) || !REG_P (op1)) + return false; + + if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR) + return false; + + regno0 = REGNO (op0); + regno1 = REGNO (op1); + if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER) + return false; + + if (INT_REGNO_P (regno0)) + return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1); + + else if (INT_REGNO_P (regno1)) + { + if (TARGET_MFPGPR && FP_REGNO_P (regno0)) + return true; + + else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0)) + return true; + } + + return false; +} + +/* Return true if this is a load or store quad operation. */ + +bool +quad_load_store_p (rtx op0, rtx op1) +{ + bool ret; + + if (!TARGET_QUAD_MEMORY) + ret = false; + + else if (REG_P (op0) && MEM_P (op1)) + ret = (quad_int_reg_operand (op0, GET_MODE (op0)) + && quad_memory_operand (op1, GET_MODE (op1)) + && !reg_overlap_mentioned_p (op0, op1)); + + else if (MEM_P (op0) && REG_P (op1)) + ret = (quad_memory_operand (op0, GET_MODE (op0)) + && quad_int_reg_operand (op1, GET_MODE (op1))); + + else + ret = false; + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\n========== quad_load_store, return %s\n", + ret ? "true" : "false"); + debug_rtx (gen_rtx_SET (VOIDmode, op0, op1)); + } + + return ret; +} + /* Given an address, return a constant offset term if one exists. */ static rtx @@ -5375,91 +5748,102 @@ virtual_stack_registers_memory_p (rtx op) && regnum <= LAST_VIRTUAL_POINTER_REGISTER); } -/* Return true if memory accesses to OP are known to never straddle - a 32k boundary. */ +/* Return true if a MODE sized memory accesses to OP plus OFFSET + is known to not straddle a 32k boundary. */ static bool offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset, enum machine_mode mode) { tree decl, type; - unsigned HOST_WIDE_INT dsize, dalign; + unsigned HOST_WIDE_INT dsize, dalign, lsb, mask; if (GET_CODE (op) != SYMBOL_REF) return false; + dsize = GET_MODE_SIZE (mode); decl = SYMBOL_REF_DECL (op); if (!decl) { - if (GET_MODE_SIZE (mode) == 0) + if (dsize == 0) return false; /* -fsection-anchors loses the original SYMBOL_REF_DECL when replacing memory addresses with an anchor plus offset. We could find the decl by rummaging around in the block->objects VEC for the given offset but that seems like too much work. */ - dalign = 1; + dalign = BITS_PER_UNIT; if (SYMBOL_REF_HAS_BLOCK_INFO_P (op) && SYMBOL_REF_ANCHOR_P (op) && SYMBOL_REF_BLOCK (op) != NULL) { struct object_block *block = SYMBOL_REF_BLOCK (op); - HOST_WIDE_INT lsb, mask; - /* Given the alignment of the block.. */ dalign = block->alignment; - mask = dalign / BITS_PER_UNIT - 1; - - /* ..and the combined offset of the anchor and any offset - to this block object.. */ offset += SYMBOL_REF_BLOCK_OFFSET (op); - lsb = offset & -offset; + } + else if (CONSTANT_POOL_ADDRESS_P (op)) + { + /* It would be nice to have get_pool_align().. */ + enum machine_mode cmode = get_pool_mode (op); - /* ..find how many bits of the alignment we know for the - object. */ - mask &= lsb - 1; - dalign = mask + 1; + dalign = GET_MODE_ALIGNMENT (cmode); } - return dalign >= GET_MODE_SIZE (mode); } - - if (DECL_P (decl)) + else if (DECL_P (decl)) { - if (TREE_CODE (decl) == FUNCTION_DECL) - return true; + dalign = DECL_ALIGN (decl); - if (!DECL_SIZE_UNIT (decl)) - return false; + if (dsize == 0) + { + /* Allow BLKmode when the entire object is known to not + cross a 32k boundary. */ + if (!DECL_SIZE_UNIT (decl)) + return false; - if (!host_integerp (DECL_SIZE_UNIT (decl), 1)) - return false; + if (!host_integerp (DECL_SIZE_UNIT (decl), 1)) + return false; - dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1); - if (dsize > 32768) - return false; + dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1); + if (dsize > 32768) + return false; - dalign = DECL_ALIGN_UNIT (decl); - return dalign >= dsize; + return dalign / BITS_PER_UNIT >= dsize; + } } + else + { + type = TREE_TYPE (decl); - type = TREE_TYPE (decl); + dalign = TYPE_ALIGN (type); + if (CONSTANT_CLASS_P (decl)) + dalign = CONSTANT_ALIGNMENT (decl, dalign); + else + dalign = DATA_ALIGNMENT (decl, dalign); - if (TREE_CODE (decl) == STRING_CST) - dsize = TREE_STRING_LENGTH (decl); - else if (TYPE_SIZE_UNIT (type) - && host_integerp (TYPE_SIZE_UNIT (type), 1)) - dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1); - else - return false; - if (dsize > 32768) - return false; + if (dsize == 0) + { + /* BLKmode, check the entire object. */ + if (TREE_CODE (decl) == STRING_CST) + dsize = TREE_STRING_LENGTH (decl); + else if (TYPE_SIZE_UNIT (type) + && host_integerp (TYPE_SIZE_UNIT (type), 1)) + dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1); + else + return false; + if (dsize > 32768) + return false; + + return dalign / BITS_PER_UNIT >= dsize; + } + } + + /* Find how many bits of the alignment we know for this access. */ + mask = dalign / BITS_PER_UNIT - 1; + lsb = offset & -offset; + mask &= lsb - 1; + dalign = mask + 1; - dalign = TYPE_ALIGN (type); - if (CONSTANT_CLASS_P (decl)) - dalign = CONSTANT_ALIGNMENT (decl, dalign); - else - dalign = DATA_ALIGNMENT (decl, dalign); - dalign /= BITS_PER_UNIT; return dalign >= dsize; } @@ -5747,8 +6131,11 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx) return force_reg (Pmode, XEXP (x, 0)); + /* For TImode with load/store quad, restrict addresses to just a single + pointer, so it works with both GPRs and VSX registers. */ /* Make sure both operands are registers. */ - else if (GET_CODE (x) == PLUS) + else if (GET_CODE (x) == PLUS + && (mode != TImode || !TARGET_QUAD_MEMORY)) return gen_rtx_PLUS (Pmode, force_reg (Pmode, XEXP (x, 0)), force_reg (Pmode, XEXP (x, 1))); @@ -6405,7 +6792,6 @@ use_toc_relative_ref (rtx sym) && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym), get_pool_mode (sym))) || (TARGET_CMODEL == CMODEL_MEDIUM - && !CONSTANT_POOL_ADDRESS_P (sym) && SYMBOL_REF_LOCAL_P (sym))); } @@ -6703,6 +7089,13 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict) if (reg_offset_p && legitimate_constant_pool_address_p (x, mode, reg_ok_strict)) return 1; + /* For TImode, if we have load/store quad, only allow register indirect + addresses. This will allow the values to go in either GPRs or VSX + registers without reloading. The vector types would tend to go into VSX + registers, so we allow REG+REG, while TImode seems somewhat split, in that + some uses are GPR based, and some VSX based. */ + if (mode == TImode && TARGET_QUAD_MEMORY) + return 0; /* If not REG_OK_STRICT (before reload) let pass any stack offset. */ if (! reg_ok_strict && reg_offset_p @@ -9215,20 +9608,17 @@ setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode, if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG && cfun->va_list_gpr_size) { - int nregs = GP_ARG_NUM_REG - first_reg_offset; + int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset; if (va_list_gpr_counter_field) - { - /* V4 va_list_gpr_size counts number of registers needed. */ - if (nregs > cfun->va_list_gpr_size) - nregs = cfun->va_list_gpr_size; - } + /* V4 va_list_gpr_size counts number of registers needed. */ + n_gpr = cfun->va_list_gpr_size; else - { - /* char * va_list instead counts number of bytes needed. */ - if (nregs > cfun->va_list_gpr_size / reg_size) - nregs = cfun->va_list_gpr_size / reg_size; - } + /* char * va_list instead counts number of bytes needed. */ + n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size; + + if (nregs > n_gpr) + nregs = n_gpr; mem = gen_rtx_MEM (BLKmode, plus_constant (Pmode, save_area, @@ -10578,6 +10968,27 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) return const0_rtx; } } + else if (icode == CODE_FOR_crypto_vshasigmaw + || icode == CODE_FOR_crypto_vshasigmad) + { + /* Check whether the 2nd and 3rd arguments are integer constants and in + range and prepare arguments. */ + STRIP_NOPS (arg1); + if (TREE_CODE (arg1) != INTEGER_CST + || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1)) + { + error ("argument 2 must be 0 or 1"); + return const0_rtx; + } + + STRIP_NOPS (arg2); + if (TREE_CODE (arg2) != INTEGER_CST + || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 15)) + { + error ("argument 3 must be in the range 0..15"); + return const0_rtx; + } + } if (target == 0 || GET_MODE (target) != tmode @@ -12268,6 +12679,10 @@ altivec_init_builtins (void) = build_function_type_list (integer_type_node, integer_type_node, V4SI_type_node, V4SI_type_node, NULL_TREE); + tree int_ftype_int_v2di_v2di + = build_function_type_list (integer_type_node, + integer_type_node, V2DI_type_node, + V2DI_type_node, NULL_TREE); tree void_ftype_v4si = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE); tree v8hi_ftype_void @@ -12350,6 +12765,8 @@ altivec_init_builtins (void) = build_function_type_list (integer_type_node, integer_type_node, V2DF_type_node, V2DF_type_node, NULL_TREE); + tree v2di_ftype_v2di + = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); tree v4si_ftype_v4si = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); tree v8hi_ftype_v8hi @@ -12485,6 +12902,9 @@ altivec_init_builtins (void) case VOIDmode: type = int_ftype_int_opaque_opaque; break; + case V2DImode: + type = int_ftype_int_v2di_v2di; + break; case V4SImode: type = int_ftype_int_v4si_v4si; break; @@ -12518,6 +12938,9 @@ altivec_init_builtins (void) switch (mode0) { + case V2DImode: + type = v2di_ftype_v2di; + break; case V4SImode: type = v4si_ftype_v4si; break; @@ -12723,11 +13146,27 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0, are type correct. */ switch (builtin) { + /* unsigned 1 argument functions. */ + case CRYPTO_BUILTIN_VSBOX: + case P8V_BUILTIN_VGBBD: + h.uns_p[0] = 1; + h.uns_p[1] = 1; + break; + /* unsigned 2 argument functions. */ case ALTIVEC_BUILTIN_VMULEUB_UNS: case ALTIVEC_BUILTIN_VMULEUH_UNS: case ALTIVEC_BUILTIN_VMULOUB_UNS: case ALTIVEC_BUILTIN_VMULOUH_UNS: + case CRYPTO_BUILTIN_VCIPHER: + case CRYPTO_BUILTIN_VCIPHERLAST: + case CRYPTO_BUILTIN_VNCIPHER: + case CRYPTO_BUILTIN_VNCIPHERLAST: + case CRYPTO_BUILTIN_VPMSUMB: + case CRYPTO_BUILTIN_VPMSUMH: + case CRYPTO_BUILTIN_VPMSUMW: + case CRYPTO_BUILTIN_VPMSUMD: + case CRYPTO_BUILTIN_VPMSUM: h.uns_p[0] = 1; h.uns_p[1] = 1; h.uns_p[2] = 1; @@ -12750,6 +13189,14 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0, case VSX_BUILTIN_XXSEL_8HI_UNS: case VSX_BUILTIN_XXSEL_4SI_UNS: case VSX_BUILTIN_XXSEL_2DI_UNS: + case CRYPTO_BUILTIN_VPERMXOR: + case CRYPTO_BUILTIN_VPERMXOR_V2DI: + case CRYPTO_BUILTIN_VPERMXOR_V4SI: + case CRYPTO_BUILTIN_VPERMXOR_V8HI: + case CRYPTO_BUILTIN_VPERMXOR_V16QI: + case CRYPTO_BUILTIN_VSHASIGMAW: + case CRYPTO_BUILTIN_VSHASIGMAD: + case CRYPTO_BUILTIN_VSHASIGMA: h.uns_p[0] = 1; h.uns_p[1] = 1; h.uns_p[2] = 1; @@ -12891,8 +13338,23 @@ rs6000_common_init_builtins (void) else { enum insn_code icode = d->icode; - if (d->name == 0 || icode == CODE_FOR_nothing) - continue; + if (d->name == 0) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n", + (long unsigned)i); + + continue; + } + + if (icode == CODE_FOR_nothing) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n", + d->name); + + continue; + } type = builtin_function_type (insn_data[icode].operand[0].mode, insn_data[icode].operand[1].mode, @@ -12931,8 +13393,23 @@ rs6000_common_init_builtins (void) else { enum insn_code icode = d->icode; - if (d->name == 0 || icode == CODE_FOR_nothing) - continue; + if (d->name == 0) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n", + (long unsigned)i); + + continue; + } + + if (icode == CODE_FOR_nothing) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n", + d->name); + + continue; + } mode0 = insn_data[icode].operand[0].mode; mode1 = insn_data[icode].operand[1].mode; @@ -12993,8 +13470,23 @@ rs6000_common_init_builtins (void) else { enum insn_code icode = d->icode; - if (d->name == 0 || icode == CODE_FOR_nothing) - continue; + if (d->name == 0) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n", + (long unsigned)i); + + continue; + } + + if (icode == CODE_FOR_nothing) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n", + d->name); + + continue; + } mode0 = insn_data[icode].operand[0].mode; mode1 = insn_data[icode].operand[1].mode; @@ -13747,29 +14239,226 @@ rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) return NULL_TREE; } -enum reload_reg_type { - GPR_REGISTER_TYPE, - VECTOR_REGISTER_TYPE, - OTHER_REGISTER_TYPE -}; +/* Classify a register type. Because the FMRGOW/FMRGEW instructions only work + on traditional floating point registers, and the VMRGOW/VMRGEW instructions + only work on the traditional altivec registers, note if an altivec register + was choosen. */ -static enum reload_reg_type -rs6000_reload_register_type (enum reg_class rclass) +static enum rs6000_reg_type +register_to_reg_type (rtx reg, bool *is_altivec) { - switch (rclass) + HOST_WIDE_INT regno; + enum reg_class rclass; + + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + + if (!REG_P (reg)) + return NO_REG_TYPE; + + regno = REGNO (reg); + if (regno >= FIRST_PSEUDO_REGISTER) { - case GENERAL_REGS: - case BASE_REGS: - return GPR_REGISTER_TYPE; + if (!lra_in_progress && !reload_in_progress && !reload_completed) + return PSEUDO_REG_TYPE; - case FLOAT_REGS: - case ALTIVEC_REGS: - case VSX_REGS: - return VECTOR_REGISTER_TYPE; + regno = true_regnum (reg); + if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER) + return PSEUDO_REG_TYPE; + } - default: - return OTHER_REGISTER_TYPE; + gcc_assert (regno >= 0); + + if (is_altivec && ALTIVEC_REGNO_P (regno)) + *is_altivec = true; + + rclass = rs6000_regno_regclass[regno]; + return reg_class_to_reg_type[(int)rclass]; +} + +/* Helper function for rs6000_secondary_reload to return true if a move to a + different register classe is really a simple move. */ + +static bool +rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, + enum rs6000_reg_type from_type, + enum machine_mode mode) +{ + int size; + + /* Add support for various direct moves available. In this function, we only + look at cases where we don't need any extra registers, and one or more + simple move insns are issued. At present, 32-bit integers are not allowed + in FPR/VSX registers. Single precision binary floating is not a simple + move because we need to convert to the single precision memory layout. + The 4-byte SDmode can be moved. */ + size = GET_MODE_SIZE (mode); + if (TARGET_DIRECT_MOVE + && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8)) + && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) + return true; + + else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8 + && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE) + || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE))) + return true; + + else if ((size == 4 || (TARGET_POWERPC64 && size == 8)) + && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE) + || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE))) + return true; + + return false; +} + +/* Power8 helper function for rs6000_secondary_reload, handle all of the + special direct moves that involve allocating an extra register, return the + insn code of the helper function if there is such a function or + CODE_FOR_nothing if not. */ + +static bool +rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, + enum rs6000_reg_type from_type, + enum machine_mode mode, + secondary_reload_info *sri, + bool altivec_p) +{ + bool ret = false; + enum insn_code icode = CODE_FOR_nothing; + int cost = 0; + int size = GET_MODE_SIZE (mode); + + if (TARGET_POWERPC64) + { + if (size == 16) + { + /* Handle moving 128-bit values from GPRs to VSX point registers on + power8 when running in 64-bit mode using XXPERMDI to glue the two + 64-bit values back together. */ + if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) + { + cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ + icode = reload_vsx_gpr[(int)mode]; + } + + /* Handle moving 128-bit values from VSX point registers to GPRs on + power8 when running in 64-bit mode using XXPERMDI to get access to the + bottom 64-bit value. */ + else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + { + cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */ + icode = reload_gpr_vsx[(int)mode]; + } + } + + else if (mode == SFmode) + { + if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + { + cost = 3; /* xscvdpspn, mfvsrd, and. */ + icode = reload_gpr_vsx[(int)mode]; + } + + else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) + { + cost = 2; /* mtvsrz, xscvspdpn. */ + icode = reload_vsx_gpr[(int)mode]; + } + } } + + if (TARGET_POWERPC64 && size == 16) + { + /* Handle moving 128-bit values from GPRs to VSX point registers on + power8 when running in 64-bit mode using XXPERMDI to glue the two + 64-bit values back together. */ + if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) + { + cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ + icode = reload_vsx_gpr[(int)mode]; + } + + /* Handle moving 128-bit values from VSX point registers to GPRs on + power8 when running in 64-bit mode using XXPERMDI to get access to the + bottom 64-bit value. */ + else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + { + cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */ + icode = reload_gpr_vsx[(int)mode]; + } + } + + else if (!TARGET_POWERPC64 && size == 8) + { + /* Handle moving 64-bit values from GPRs to floating point registers on + power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit + values back together. Altivec register classes must be handled + specially since a different instruction is used, and the secondary + reload support requires a single instruction class in the scratch + register constraint. However, right now TFmode is not allowed in + Altivec registers, so the pattern will never match. */ + if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p) + { + cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */ + icode = reload_fpr_gpr[(int)mode]; + } + } + + if (icode != CODE_FOR_nothing) + { + ret = true; + if (sri) + { + sri->icode = icode; + sri->extra_cost = cost; + } + } + + return ret; +} + +/* Return whether a move between two register classes can be done either + directly (simple move) or via a pattern that uses a single extra temporary + (using power8's direct move in this case. */ + +static bool +rs6000_secondary_reload_move (enum rs6000_reg_type to_type, + enum rs6000_reg_type from_type, + enum machine_mode mode, + secondary_reload_info *sri, + bool altivec_p) +{ + /* Fall back to load/store reloads if either type is not a register. */ + if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE) + return false; + + /* If we haven't allocated registers yet, assume the move can be done for the + standard register types. */ + if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE) + || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type)) + || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type))) + return true; + + /* Moves to the same set of registers is a simple move for non-specialized + registers. */ + if (to_type == from_type && IS_STD_REG_TYPE (to_type)) + return true; + + /* Check whether a simple move can be done directly. */ + if (rs6000_secondary_reload_simple_move (to_type, from_type, mode)) + { + if (sri) + { + sri->icode = CODE_FOR_nothing; + sri->extra_cost = 0; + } + return true; + } + + /* Now check if we can do it in a few steps. */ + return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri, + altivec_p); } /* Inform reload about cases where moving X with a mode MODE to a register in @@ -13795,11 +14484,32 @@ rs6000_secondary_reload (bool in_p, bool default_p = false; sri->icode = CODE_FOR_nothing; - - /* Convert vector loads and stores into gprs to use an additional base - register. */ icode = rs6000_vector_reload[mode][in_p != false]; - if (icode != CODE_FOR_nothing) + + if (REG_P (x) || register_operand (x, mode)) + { + enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass]; + bool altivec_p = (rclass == ALTIVEC_REGS); + enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p); + + if (!in_p) + { + enum rs6000_reg_type exchange = to_type; + to_type = from_type; + from_type = exchange; + } + + if (rs6000_secondary_reload_move (to_type, from_type, mode, sri, + altivec_p)) + { + icode = (enum insn_code)sri->icode; + default_p = false; + ret = NO_REGS; + } + } + + /* Handle vector moves with reload helper functions. */ + if (ret == ALL_REGS && icode != CODE_FOR_nothing) { ret = NO_REGS; sri->icode = CODE_FOR_nothing; @@ -13811,12 +14521,21 @@ rs6000_secondary_reload (bool in_p, /* Loads to and stores from gprs can do reg+offset, and wouldn't need an extra register in that case, but it would need an extra - register if the addressing is reg+reg or (reg+reg)&(-16). */ + register if the addressing is reg+reg or (reg+reg)&(-16). Special + case load/store quad. */ if (rclass == GENERAL_REGS || rclass == BASE_REGS) { - if (!legitimate_indirect_address_p (addr, false) - && !rs6000_legitimate_offset_address_p (PTImode, addr, - false, true)) + if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY + && GET_MODE_SIZE (mode) == 16 + && quad_memory_operand (x, mode)) + { + sri->icode = icode; + sri->extra_cost = 2; + } + + else if (!legitimate_indirect_address_p (addr, false) + && !rs6000_legitimate_offset_address_p (PTImode, addr, + false, true)) { sri->icode = icode; /* account for splitting the loads, and converting the @@ -13830,7 +14549,7 @@ rs6000_secondary_reload (bool in_p, else if ((rclass == FLOAT_REGS || rclass == NO_REGS) && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) && (legitimate_indirect_address_p (addr, false) - || legitimate_indirect_address_p (XEXP (addr, 0), false) + || legitimate_indirect_address_p (addr, false) || rs6000_legitimate_offset_address_p (mode, addr, false, true))) @@ -13882,12 +14601,12 @@ rs6000_secondary_reload (bool in_p, else { enum reg_class xclass = REGNO_REG_CLASS (regno); - enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass); - enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass); + enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass]; + enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass]; /* If memory is needed, use default_secondary_reload to create the stack slot. */ - if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE) + if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1)) default_p = true; else ret = NO_REGS; @@ -13897,7 +14616,7 @@ rs6000_secondary_reload (bool in_p, default_p = true; } else if (TARGET_POWERPC64 - && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE + && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE && MEM_P (x) && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD) { @@ -13936,7 +14655,7 @@ rs6000_secondary_reload (bool in_p, default_p = true; } else if (!TARGET_POWERPC64 - && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE + && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE && MEM_P (x) && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) { @@ -14499,42 +15218,25 @@ rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass) set and vice versa. */ static bool -rs6000_secondary_memory_needed (enum reg_class class1, - enum reg_class class2, +rs6000_secondary_memory_needed (enum reg_class from_class, + enum reg_class to_class, enum machine_mode mode) { - if (class1 == class2) - return false; - - /* Under VSX, there are 3 register classes that values could be in (VSX_REGS, - ALTIVEC_REGS, and FLOAT_REGS). We don't need to use memory to copy - between these classes. But we need memory for other things that can go in - FLOAT_REGS like SFmode. */ - if (TARGET_VSX - && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode)) - && (class1 == VSX_REGS || class1 == ALTIVEC_REGS - || class1 == FLOAT_REGS)) - return (class2 != VSX_REGS && class2 != ALTIVEC_REGS - && class2 != FLOAT_REGS); + enum rs6000_reg_type from_type, to_type; + bool altivec_p = ((from_class == ALTIVEC_REGS) + || (to_class == ALTIVEC_REGS)); - if (class1 == VSX_REGS || class2 == VSX_REGS) - return true; + /* If a simple/direct move is available, we don't need secondary memory */ + from_type = reg_class_to_reg_type[(int)from_class]; + to_type = reg_class_to_reg_type[(int)to_class]; - if (class1 == FLOAT_REGS - && (!TARGET_MFPGPR || !TARGET_POWERPC64 - || ((mode != DFmode) - && (mode != DDmode) - && (mode != DImode)))) - return true; - - if (class2 == FLOAT_REGS - && (!TARGET_MFPGPR || !TARGET_POWERPC64 - || ((mode != DFmode) - && (mode != DDmode) - && (mode != DImode)))) - return true; + if (rs6000_secondary_reload_move (to_type, from_type, mode, + (secondary_reload_info *)0, altivec_p)) + return false; - if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS) + /* If we have a floating point or vector register class, we need to use + memory to transfer the data. */ + if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type)) return true; return false; @@ -14542,17 +15244,19 @@ rs6000_secondary_memory_needed (enum reg_class class1, /* Debug version of rs6000_secondary_memory_needed. */ static bool -rs6000_debug_secondary_memory_needed (enum reg_class class1, - enum reg_class class2, +rs6000_debug_secondary_memory_needed (enum reg_class from_class, + enum reg_class to_class, enum machine_mode mode) { - bool ret = rs6000_secondary_memory_needed (class1, class2, mode); + bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode); fprintf (stderr, - "rs6000_secondary_memory_needed, return: %s, class1 = %s, " - "class2 = %s, mode = %s\n", - ret ? "true" : "false", reg_class_names[class1], - reg_class_names[class2], GET_MODE_NAME (mode)); + "rs6000_secondary_memory_needed, return: %s, from_class = %s, " + "to_class = %s, mode = %s\n", + ret ? "true" : "false", + reg_class_names[from_class], + reg_class_names[to_class], + GET_MODE_NAME (mode)); return ret; } @@ -14758,6 +15462,170 @@ rs6000_debug_cannot_change_mode_class (enum machine_mode from, return ret; } +/* Return a string to do a move operation of 128 bits of data. */ + +const char * +rs6000_output_move_128bit (rtx operands[]) +{ + rtx dest = operands[0]; + rtx src = operands[1]; + enum machine_mode mode = GET_MODE (dest); + int dest_regno; + int src_regno; + bool dest_gpr_p, dest_fp_p, dest_av_p, dest_vsx_p; + bool src_gpr_p, src_fp_p, src_av_p, src_vsx_p; + + if (REG_P (dest)) + { + dest_regno = REGNO (dest); + dest_gpr_p = INT_REGNO_P (dest_regno); + dest_fp_p = FP_REGNO_P (dest_regno); + dest_av_p = ALTIVEC_REGNO_P (dest_regno); + dest_vsx_p = dest_fp_p | dest_av_p; + } + else + { + dest_regno = -1; + dest_gpr_p = dest_fp_p = dest_av_p = dest_vsx_p = false; + } + + if (REG_P (src)) + { + src_regno = REGNO (src); + src_gpr_p = INT_REGNO_P (src_regno); + src_fp_p = FP_REGNO_P (src_regno); + src_av_p = ALTIVEC_REGNO_P (src_regno); + src_vsx_p = src_fp_p | src_av_p; + } + else + { + src_regno = -1; + src_gpr_p = src_fp_p = src_av_p = src_vsx_p = false; + } + + /* Register moves. */ + if (dest_regno >= 0 && src_regno >= 0) + { + if (dest_gpr_p) + { + if (src_gpr_p) + return "#"; + + else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p) + return "#"; + } + + else if (TARGET_VSX && dest_vsx_p) + { + if (src_vsx_p) + return "xxlor %x0,%x1,%x1"; + + else if (TARGET_DIRECT_MOVE && src_gpr_p) + return "#"; + } + + else if (TARGET_ALTIVEC && dest_av_p && src_av_p) + return "vor %0,%1,%1"; + + else if (dest_fp_p && src_fp_p) + return "#"; + } + + /* Loads. */ + else if (dest_regno >= 0 && MEM_P (src)) + { + if (dest_gpr_p) + { + if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0 + && quad_memory_operand (src, mode) + && !reg_overlap_mentioned_p (dest, src)) + { + /* lq/stq only has DQ-form, so avoid X-form that %y produces. */ + return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1"; + } + else + return "#"; + } + + else if (TARGET_ALTIVEC && dest_av_p + && altivec_indexed_or_indirect_operand (src, mode)) + return "lvx %0,%y1"; + + else if (TARGET_VSX && dest_vsx_p) + { + if (mode == V16QImode || mode == V8HImode || mode == V4SImode) + return "lxvw4x %x0,%y1"; + else + return "lxvd2x %x0,%y1"; + } + + else if (TARGET_ALTIVEC && dest_av_p) + return "lvx %0,%y1"; + + else if (dest_fp_p) + return "#"; + } + + /* Stores. */ + else if (src_regno >= 0 && MEM_P (dest)) + { + if (src_gpr_p) + { + if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0 + && quad_memory_operand (dest, mode)) + { + /* lq/stq only has DQ-form, so avoid X-form that %y produces. */ + return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0"; + } + else + return "#"; + } + + else if (TARGET_ALTIVEC && src_av_p + && altivec_indexed_or_indirect_operand (src, mode)) + return "stvx %1,%y0"; + + else if (TARGET_VSX && src_vsx_p) + { + if (mode == V16QImode || mode == V8HImode || mode == V4SImode) + return "stxvw4x %x1,%y0"; + else + return "stxvd2x %x1,%y0"; + } + + else if (TARGET_ALTIVEC && src_av_p) + return "stvx %1,%y0"; + + else if (src_fp_p) + return "#"; + } + + /* Constants. */ + else if (dest_regno >= 0 + && (GET_CODE (src) == CONST_INT + || GET_CODE (src) == CONST_DOUBLE + || GET_CODE (src) == CONST_VECTOR)) + { + if (dest_gpr_p) + return "#"; + + else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode)) + return "xxlxor %x0,%x0,%x0"; + + else if (TARGET_ALTIVEC && dest_av_p) + return output_vec_const_move (operands); + } + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\n===== Bad 128 bit move:\n"); + debug_rtx (gen_rtx_SET (VOIDmode, dest, src)); + } + + gcc_unreachable (); +} + + /* Given a comparison operation, return the bit number in CCR to test. We know this is a valid comparison. @@ -15474,11 +16342,6 @@ print_operand (FILE *file, rtx x, int code) TOCs and the like. */ gcc_assert (GET_CODE (x) == SYMBOL_REF); - /* Mark the decl as referenced so that cgraph will output the - function. */ - if (SYMBOL_REF_DECL (x)) - mark_decl_referenced (SYMBOL_REF_DECL (x)); - /* For macho, check to see if we need a stub. */ if (TARGET_MACHO) { @@ -15887,16 +16750,41 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode) { rtx cmp, or_result, compare_result2; enum machine_mode op_mode = GET_MODE (op0); + bool reverse_p; if (op_mode == VOIDmode) op_mode = GET_MODE (op1); + /* First reverse the condition codes that aren't directly supported. */ + switch (code) + { + case NE: + case UNLT: + case UNLE: + case UNGT: + case UNGE: + code = reverse_condition_maybe_unordered (code); + reverse_p = true; + break; + + case EQ: + case LT: + case LE: + case GT: + case GE: + reverse_p = false; + break; + + default: + gcc_unreachable (); + } + /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only. This explains the following mess. */ switch (code) { - case EQ: case UNEQ: case NE: case LTGT: + case EQ: switch (op_mode) { case SFmode: @@ -15922,7 +16810,8 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode) } break; - case GT: case GTU: case UNGT: case UNGE: case GE: case GEU: + case GT: + case GE: switch (op_mode) { case SFmode: @@ -15948,7 +16837,8 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode) } break; - case LT: case LTU: case UNLT: case UNLE: case LE: case LEU: + case LT: + case LE: switch (op_mode) { case SFmode: @@ -15973,24 +16863,16 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode) gcc_unreachable (); } break; + default: gcc_unreachable (); } /* Synthesize LE and GE from LT/GT || EQ. */ - if (code == LE || code == GE || code == LEU || code == GEU) + if (code == LE || code == GE) { emit_insn (cmp); - switch (code) - { - case LE: code = LT; break; - case GE: code = GT; break; - case LEU: code = LT; break; - case GEU: code = GT; break; - default: gcc_unreachable (); - } - compare_result2 = gen_reg_rtx (CCFPmode); /* Do the EQ. */ @@ -16017,23 +16899,18 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode) default: gcc_unreachable (); } + emit_insn (cmp); /* OR them together. */ or_result = gen_reg_rtx (CCFPmode); cmp = gen_e500_cr_ior_compare (or_result, compare_result, - compare_result2); + compare_result2); compare_result = or_result; - code = EQ; - } - else - { - if (code == NE || code == LTGT) - code = NE; - else - code = EQ; } + code = reverse_p ? NE : EQ; + emit_insn (cmp); } else @@ -16913,7 +17790,8 @@ emit_unlikely_jump (rtx cond, rtx label) } /* A subroutine of the atomic operation splitters. Emit a load-locked - instruction in MODE. */ + instruction in MODE. For QI/HImode, possibly use a pattern than includes + the zero_extend operation. */ static void emit_load_locked (enum machine_mode mode, rtx reg, rtx mem) @@ -16922,12 +17800,26 @@ emit_load_locked (enum machine_mode mode, rtx reg, rtx mem) switch (mode) { + case QImode: + fn = gen_load_lockedqi; + break; + case HImode: + fn = gen_load_lockedhi; + break; case SImode: - fn = gen_load_lockedsi; + if (GET_MODE (mem) == QImode) + fn = gen_load_lockedqi_si; + else if (GET_MODE (mem) == HImode) + fn = gen_load_lockedhi_si; + else + fn = gen_load_lockedsi; break; case DImode: fn = gen_load_lockeddi; break; + case TImode: + fn = gen_load_lockedti; + break; default: gcc_unreachable (); } @@ -16944,12 +17836,21 @@ emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val) switch (mode) { + case QImode: + fn = gen_store_conditionalqi; + break; + case HImode: + fn = gen_store_conditionalhi; + break; case SImode: fn = gen_store_conditionalsi; break; case DImode: fn = gen_store_conditionaldi; break; + case TImode: + fn = gen_store_conditionalti; + break; default: gcc_unreachable (); } @@ -17046,8 +17947,9 @@ rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask) shift = gen_reg_rtx (SImode); addr = gen_lowpart (SImode, addr); emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask))); - shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask), - shift, 1, OPTAB_LIB_WIDEN); + if (WORDS_BIG_ENDIAN) + shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask), + shift, 1, OPTAB_LIB_WIDEN); *pshift = shift; /* Mask for insertion. */ @@ -17095,7 +17997,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[]) { rtx boolval, retval, mem, oldval, newval, cond; rtx label1, label2, x, mask, shift; - enum machine_mode mode; + enum machine_mode mode, orig_mode; enum memmodel mod_s, mod_f; bool is_weak; @@ -17107,22 +18009,29 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[]) is_weak = (INTVAL (operands[5]) != 0); mod_s = (enum memmodel) INTVAL (operands[6]); mod_f = (enum memmodel) INTVAL (operands[7]); - mode = GET_MODE (mem); + orig_mode = mode = GET_MODE (mem); mask = shift = NULL_RTX; if (mode == QImode || mode == HImode) { - mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); - - /* Shift and mask OLDVAL into position with the word. */ + /* Before power8, we didn't have access to lbarx/lharx, so generate a + lwarx and shift/mask operations. With power8, we need to do the + comparison in SImode, but the store is still done in QI/HImode. */ oldval = convert_modes (SImode, mode, oldval, 1); - oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift, - NULL_RTX, 1, OPTAB_LIB_WIDEN); - /* Shift and mask NEWVAL into position within the word. */ - newval = convert_modes (SImode, mode, newval, 1); - newval = expand_simple_binop (SImode, ASHIFT, newval, shift, - NULL_RTX, 1, OPTAB_LIB_WIDEN); + if (!TARGET_SYNC_HI_QI) + { + mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); + + /* Shift and mask OLDVAL into position with the word. */ + oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift, + NULL_RTX, 1, OPTAB_LIB_WIDEN); + + /* Shift and mask NEWVAL into position within the word. */ + newval = convert_modes (SImode, mode, newval, 1); + newval = expand_simple_binop (SImode, ASHIFT, newval, shift, + NULL_RTX, 1, OPTAB_LIB_WIDEN); + } /* Prepare to adjust the return value. */ retval = gen_reg_rtx (SImode); @@ -17151,7 +18060,25 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[]) } cond = gen_reg_rtx (CCmode); - x = gen_rtx_COMPARE (CCmode, x, oldval); + /* If we have TImode, synthesize a comparison. */ + if (mode != TImode) + x = gen_rtx_COMPARE (CCmode, x, oldval); + else + { + rtx xor1_result = gen_reg_rtx (DImode); + rtx xor2_result = gen_reg_rtx (DImode); + rtx or_result = gen_reg_rtx (DImode); + rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0); + rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8); + rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0); + rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8); + + emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0)); + emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1)); + emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result)); + x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx); + } + emit_insn (gen_rtx_SET (VOIDmode, cond, x)); x = gen_rtx_NE (VOIDmode, cond, const0_rtx); @@ -17161,7 +18088,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[]) if (mask) x = rs6000_mask_atomic_subword (retval, newval, mask); - emit_store_conditional (mode, cond, mem, x); + emit_store_conditional (orig_mode, cond, mem, x); if (!is_weak) { @@ -17179,6 +18106,8 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[]) if (shift) rs6000_finish_atomic_subword (operands[1], retval, shift); + else if (mode != GET_MODE (operands[1])) + convert_move (operands[1], retval, 1); /* In all cases, CR0 contains EQ on success, and NE on failure. */ x = gen_rtx_EQ (SImode, cond, const0_rtx); @@ -17202,7 +18131,7 @@ rs6000_expand_atomic_exchange (rtx operands[]) mode = GET_MODE (mem); mask = shift = NULL_RTX; - if (mode == QImode || mode == HImode) + if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode)) { mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); @@ -17251,53 +18180,70 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, { enum memmodel model = (enum memmodel) INTVAL (model_rtx); enum machine_mode mode = GET_MODE (mem); + enum machine_mode store_mode = mode; rtx label, x, cond, mask, shift; rtx before = orig_before, after = orig_after; mask = shift = NULL_RTX; + /* On power8, we want to use SImode for the operation. On previous systems, + use the operation in a subword and shift/mask to get the proper byte or + halfword. */ if (mode == QImode || mode == HImode) { - mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); - - /* Shift and mask VAL into position with the word. */ - val = convert_modes (SImode, mode, val, 1); - val = expand_simple_binop (SImode, ASHIFT, val, shift, - NULL_RTX, 1, OPTAB_LIB_WIDEN); + if (TARGET_SYNC_HI_QI) + { + val = convert_modes (SImode, mode, val, 1); - switch (code) + /* Prepare to adjust the return value. */ + before = gen_reg_rtx (SImode); + if (after) + after = gen_reg_rtx (SImode); + mode = SImode; + } + else { - case IOR: - case XOR: - /* We've already zero-extended VAL. That is sufficient to - make certain that it does not affect other bits. */ - mask = NULL; - break; + mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); - case AND: - /* If we make certain that all of the other bits in VAL are - set, that will be sufficient to not affect other bits. */ - x = gen_rtx_NOT (SImode, mask); - x = gen_rtx_IOR (SImode, x, val); - emit_insn (gen_rtx_SET (VOIDmode, val, x)); - mask = NULL; - break; + /* Shift and mask VAL into position with the word. */ + val = convert_modes (SImode, mode, val, 1); + val = expand_simple_binop (SImode, ASHIFT, val, shift, + NULL_RTX, 1, OPTAB_LIB_WIDEN); - case NOT: - case PLUS: - case MINUS: - /* These will all affect bits outside the field and need - adjustment via MASK within the loop. */ - break; + switch (code) + { + case IOR: + case XOR: + /* We've already zero-extended VAL. That is sufficient to + make certain that it does not affect other bits. */ + mask = NULL; + break; - default: - gcc_unreachable (); - } + case AND: + /* If we make certain that all of the other bits in VAL are + set, that will be sufficient to not affect other bits. */ + x = gen_rtx_NOT (SImode, mask); + x = gen_rtx_IOR (SImode, x, val); + emit_insn (gen_rtx_SET (VOIDmode, val, x)); + mask = NULL; + break; - /* Prepare to adjust the return value. */ - before = gen_reg_rtx (SImode); - if (after) - after = gen_reg_rtx (SImode); - mode = SImode; + case NOT: + case PLUS: + case MINUS: + /* These will all affect bits outside the field and need + adjustment via MASK within the loop. */ + break; + + default: + gcc_unreachable (); + } + + /* Prepare to adjust the return value. */ + before = gen_reg_rtx (SImode); + if (after) + after = gen_reg_rtx (SImode); + store_mode = mode = SImode; + } } mem = rs6000_pre_atomic_barrier (mem, model); @@ -17330,9 +18276,11 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, NULL_RTX, 1, OPTAB_LIB_WIDEN); x = rs6000_mask_atomic_subword (before, x, mask); } + else if (store_mode != mode) + x = convert_modes (store_mode, mode, x, 1); cond = gen_reg_rtx (CCmode); - emit_store_conditional (mode, cond, mem, x); + emit_store_conditional (store_mode, cond, mem, x); x = gen_rtx_NE (VOIDmode, cond, const0_rtx); emit_unlikely_jump (x, label); @@ -17341,11 +18289,22 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, if (shift) { + /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and + then do the calcuations in a SImode register. */ if (orig_before) rs6000_finish_atomic_subword (orig_before, before, shift); if (orig_after) rs6000_finish_atomic_subword (orig_after, after, shift); } + else if (store_mode != mode) + { + /* QImode/HImode on machines with lbarx/lharx where we do the native + operation and then do the calcuations in a SImode register. */ + if (orig_before) + convert_move (orig_before, before, 1); + if (orig_after) + convert_move (orig_after, after, 1); + } else if (orig_after && after != orig_after) emit_move_insn (orig_after, after); } @@ -22360,7 +23319,10 @@ output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode) fputs (DOUBLE_INT_ASM_OP, file); else fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff); - fprintf (file, "0x%lx00000000\n", l & 0xffffffff); + if (WORDS_BIG_ENDIAN) + fprintf (file, "0x%lx00000000\n", l & 0xffffffff); + else + fprintf (file, "0x%lx\n", l & 0xffffffff); return; } else @@ -22951,6 +23913,7 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) || rs6000_cpu_attr == CPU_POWER4 || rs6000_cpu_attr == CPU_POWER5 || rs6000_cpu_attr == CPU_POWER7 + || rs6000_cpu_attr == CPU_POWER8 || rs6000_cpu_attr == CPU_CELL) && recog_memoized (dep_insn) && (INSN_CODE (dep_insn) >= 0)) @@ -23233,7 +24196,8 @@ is_microcoded_insn (rtx insn) if (rs6000_cpu_attr == CPU_CELL) return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS; - if (rs6000_sched_groups) + if (rs6000_sched_groups + && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5)) { enum attr_type type = get_attr_type (insn); if (type == TYPE_LOAD_EXT_U @@ -23258,7 +24222,8 @@ is_cracked_insn (rtx insn) || GET_CODE (PATTERN (insn)) == CLOBBER) return false; - if (rs6000_sched_groups) + if (rs6000_sched_groups + && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5)) { enum attr_type type = get_attr_type (insn); if (type == TYPE_LOAD_U || type == TYPE_STORE_U @@ -23537,6 +24502,8 @@ rs6000_issue_rate (void) case CPU_POWER6: case CPU_POWER7: return 5; + case CPU_POWER8: + return 7; default: return 1; } @@ -24164,6 +25131,39 @@ insn_must_be_first_in_group (rtx insn) break; } break; + case PROCESSOR_POWER8: + type = get_attr_type (insn); + + switch (type) + { + case TYPE_CR_LOGICAL: + case TYPE_DELAYED_CR: + case TYPE_MFCR: + case TYPE_MFCRF: + case TYPE_MTCR: + case TYPE_COMPARE: + case TYPE_DELAYED_COMPARE: + case TYPE_VAR_DELAYED_COMPARE: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + case TYPE_SYNC: + case TYPE_ISYNC: + case TYPE_LOAD_L: + case TYPE_STORE_C: + case TYPE_LOAD_U: + case TYPE_LOAD_UX: + case TYPE_LOAD_EXT: + case TYPE_LOAD_EXT_U: + case TYPE_LOAD_EXT_UX: + case TYPE_STORE_UX: + case TYPE_VECSTORE: + case TYPE_MFJMPR: + case TYPE_MTJMPR: + return true; + default: + break; + } + break; default: break; } @@ -24242,6 +25242,25 @@ insn_must_be_last_in_group (rtx insn) break; } break; + case PROCESSOR_POWER8: + type = get_attr_type (insn); + + switch (type) + { + case TYPE_MFCR: + case TYPE_MTCR: + case TYPE_ISYNC: + case TYPE_SYNC: + case TYPE_LOAD_L: + case TYPE_STORE_C: + case TYPE_LOAD_EXT_U: + case TYPE_LOAD_EXT_UX: + case TYPE_STORE_UX: + return true; + default: + break; + } + break; default: break; } @@ -24331,8 +25350,9 @@ force_new_group (int sched_verbose, FILE *dump, rtx *group_insns, if (can_issue_more && !is_branch_slot_insn (next_insn)) can_issue_more--; - /* Power6 and Power7 have special group ending nop. */ - if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7) + /* Do we have a special group ending nop? */ + if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7 + || rs6000_cpu_attr == CPU_POWER8) { nop = gen_group_ending_nop (); emit_insn_before (nop, next_insn); @@ -26513,7 +27533,8 @@ rs6000_register_move_cost (enum machine_mode mode, /* For those processors that have slow LR/CTR moves, make them more expensive than memory in order to bias spills to memory .*/ else if ((rs6000_cpu == PROCESSOR_POWER6 - || rs6000_cpu == PROCESSOR_POWER7) + || rs6000_cpu == PROCESSOR_POWER7 + || rs6000_cpu == PROCESSOR_POWER8) && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS)) ret = 6 * hard_regno_nregs[0][mode]; @@ -26990,26 +28011,31 @@ bool altivec_expand_vec_perm_const (rtx operands[4]) { struct altivec_perm_insn { + HOST_WIDE_INT mask; enum insn_code impl; unsigned char perm[16]; }; static const struct altivec_perm_insn patterns[] = { - { CODE_FOR_altivec_vpkuhum, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum, { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, - { CODE_FOR_altivec_vpkuwum, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum, { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, - { CODE_FOR_altivec_vmrghb, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghb, { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, - { CODE_FOR_altivec_vmrghh, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghh, { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, - { CODE_FOR_altivec_vmrghw, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghw, { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, - { CODE_FOR_altivec_vmrglb, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglb, { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, - { CODE_FOR_altivec_vmrglh, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglh, { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, - { CODE_FOR_altivec_vmrglw, - { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } } + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglw, + { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, + { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew, + { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } }, + { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow, + { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } } }; unsigned int i, j, elt, which; @@ -27109,6 +28135,9 @@ altivec_expand_vec_perm_const (rtx operands[4]) { bool swapped; + if ((patterns[j].mask & rs6000_isa_flags) == 0) + continue; + elt = patterns[j].perm[0]; if (perm[0] == elt) swapped = false; @@ -27742,6 +28771,8 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { { "altivec", OPTION_MASK_ALTIVEC, false, true }, { "cmpb", OPTION_MASK_CMPB, false, true }, + { "crypto", OPTION_MASK_CRYPTO, false, true }, + { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true }, { "dlmzb", OPTION_MASK_DLMZB, false, true }, { "fprnd", OPTION_MASK_FPRND, false, true }, { "hard-dfp", OPTION_MASK_DFP, false, true }, @@ -27750,13 +28781,17 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "mfpgpr", OPTION_MASK_MFPGPR, false, true }, { "mulhw", OPTION_MASK_MULHW, false, true }, { "multiple", OPTION_MASK_MULTIPLE, false, true }, - { "update", OPTION_MASK_NO_UPDATE, true , true }, { "popcntb", OPTION_MASK_POPCNTB, false, true }, { "popcntd", OPTION_MASK_POPCNTD, false, true }, + { "power8-fusion", OPTION_MASK_P8_FUSION, false, true }, + { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true }, + { "power8-vector", OPTION_MASK_P8_VECTOR, false, true }, { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true }, { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true }, + { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true }, { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true }, { "string", OPTION_MASK_STRING, false, true }, + { "update", OPTION_MASK_NO_UPDATE, true , true }, { "vsx", OPTION_MASK_VSX, false, true }, { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true }, #ifdef OPTION_MASK_64BIT @@ -27798,6 +28833,8 @@ static struct rs6000_opt_mask const rs6000_builtin_mask_names[] = { "frsqrtes", RS6000_BTM_FRSQRTES, false, false }, { "popcntd", RS6000_BTM_POPCNTD, false, false }, { "cell", RS6000_BTM_CELL, false, false }, + { "power8-vector", RS6000_BTM_P8_VECTOR, false, false }, + { "crypto", RS6000_BTM_CRYPTO, false, false }, }; /* Option variables that we want to support inside attribute((target)) and diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 6549347b9b7..633d7891157 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -92,7 +92,7 @@ #ifdef HAVE_AS_POWER8 #define ASM_CPU_POWER8_SPEC "-mpower8" #else -#define ASM_CPU_POWER8_SPEC "-mpower4 -maltivec" +#define ASM_CPU_POWER8_SPEC ASM_CPU_POWER7_SPEC #endif #ifdef HAVE_AS_DCI @@ -164,6 +164,7 @@ %{mcpu=e6500: -me6500} \ %{maltivec: -maltivec} \ %{mvsx: -mvsx %{!maltivec: -maltivec} %{!mcpu*: %(asm_cpu_power7)}} \ +%{mpower8-vector|mcrypto|mdirect-move: %{!mcpu*: %(asm_cpu_power8)}} \ -many" #define CPP_DEFAULT_SPEC "" @@ -277,6 +278,19 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define TARGET_POPCNTD 0 #endif +/* Define the ISA 2.07 flags as 0 if the target assembler does not support the + waitasecond instruction. Allow -mpower8-fusion, since it does not add new + instructions. */ + +#ifndef HAVE_AS_POWER8 +#undef TARGET_DIRECT_MOVE +#undef TARGET_CRYPTO +#undef TARGET_P8_VECTOR +#define TARGET_DIRECT_MOVE 0 +#define TARGET_CRYPTO 0 +#define TARGET_P8_VECTOR 0 +#endif + /* Define TARGET_LWSYNC_INSTRUCTION if the assembler knows about lwsync. If not, generate the lwsync code as an integer constant. */ #ifdef HAVE_AS_LWSYNC @@ -386,6 +400,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define TARGET_DEBUG_TARGET (rs6000_debug & MASK_DEBUG_TARGET) #define TARGET_DEBUG_BUILTIN (rs6000_debug & MASK_DEBUG_BUILTIN) +/* Describe the vector unit used for arithmetic operations. */ extern enum rs6000_vector rs6000_vector_unit[]; #define VECTOR_UNIT_NONE_P(MODE) \ @@ -394,12 +409,25 @@ extern enum rs6000_vector rs6000_vector_unit[]; #define VECTOR_UNIT_VSX_P(MODE) \ (rs6000_vector_unit[(MODE)] == VECTOR_VSX) +#define VECTOR_UNIT_P8_VECTOR_P(MODE) \ + (rs6000_vector_unit[(MODE)] == VECTOR_P8_VECTOR) + #define VECTOR_UNIT_ALTIVEC_P(MODE) \ (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC) +#define VECTOR_UNIT_VSX_OR_P8_VECTOR_P(MODE) \ + (IN_RANGE ((int)rs6000_vector_unit[(MODE)], \ + (int)VECTOR_VSX, \ + (int)VECTOR_P8_VECTOR)) + +/* VECTOR_UNIT_ALTIVEC_OR_VSX_P is used in places where we are using either + altivec (VMX) or VSX vector instructions. P8 vector support is upwards + compatible, so allow it as well, rather than changing all of the uses of the + macro. */ #define VECTOR_UNIT_ALTIVEC_OR_VSX_P(MODE) \ - (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC \ - || rs6000_vector_unit[(MODE)] == VECTOR_VSX) + (IN_RANGE ((int)rs6000_vector_unit[(MODE)], \ + (int)VECTOR_ALTIVEC, \ + (int)VECTOR_P8_VECTOR)) /* Describe whether to use VSX loads or Altivec loads. For now, just use the same unit as the vector unit we are using, but we may want to migrate to @@ -412,12 +440,21 @@ extern enum rs6000_vector rs6000_vector_mem[]; #define VECTOR_MEM_VSX_P(MODE) \ (rs6000_vector_mem[(MODE)] == VECTOR_VSX) +#define VECTOR_MEM_P8_VECTOR_P(MODE) \ + (rs6000_vector_mem[(MODE)] == VECTOR_VSX) + #define VECTOR_MEM_ALTIVEC_P(MODE) \ (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC) +#define VECTOR_MEM_VSX_OR_P8_VECTOR_P(MODE) \ + (IN_RANGE ((int)rs6000_vector_mem[(MODE)], \ + (int)VECTOR_VSX, \ + (int)VECTOR_P8_VECTOR)) + #define VECTOR_MEM_ALTIVEC_OR_VSX_P(MODE) \ - (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC \ - || rs6000_vector_mem[(MODE)] == VECTOR_VSX) + (IN_RANGE ((int)rs6000_vector_mem[(MODE)], \ + (int)VECTOR_ALTIVEC, \ + (int)VECTOR_P8_VECTOR)) /* Return the alignment of a given vector type, which is set based on the vector unit use. VSX for instance can load 32 or 64 bit aligned words @@ -479,6 +516,15 @@ extern int rs6000_vector_align[]; #define TARGET_FCTIDUZ TARGET_POPCNTD #define TARGET_FCTIWUZ TARGET_POPCNTD +#define TARGET_XSCVDPSPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR) +#define TARGET_XSCVSPDPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR) + +/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present + in power7, so conditionalize them on p8 features. TImode syncs need quad + memory support. */ +#define TARGET_SYNC_HI_QI (TARGET_QUAD_MEMORY || TARGET_DIRECT_MOVE) +#define TARGET_SYNC_TI TARGET_QUAD_MEMORY + /* Power7 has both 32-bit load and store integer for the FPRs, so we don't need to allocate the SDmode stack slot to get the value into the proper location in the register. */ @@ -489,10 +535,13 @@ extern int rs6000_vector_align[]; OPTION_MASK_<xxx> back into MASK_<xxx>. */ #define MASK_ALTIVEC OPTION_MASK_ALTIVEC #define MASK_CMPB OPTION_MASK_CMPB +#define MASK_CRYPTO OPTION_MASK_CRYPTO #define MASK_DFP OPTION_MASK_DFP +#define MASK_DIRECT_MOVE OPTION_MASK_DIRECT_MOVE #define MASK_DLMZB OPTION_MASK_DLMZB #define MASK_EABI OPTION_MASK_EABI #define MASK_FPRND OPTION_MASK_FPRND +#define MASK_P8_FUSION OPTION_MASK_P8_FUSION #define MASK_HARD_FLOAT OPTION_MASK_HARD_FLOAT #define MASK_ISEL OPTION_MASK_ISEL #define MASK_MFCRF OPTION_MASK_MFCRF @@ -500,6 +549,7 @@ extern int rs6000_vector_align[]; #define MASK_MULHW OPTION_MASK_MULHW #define MASK_MULTIPLE OPTION_MASK_MULTIPLE #define MASK_NO_UPDATE OPTION_MASK_NO_UPDATE +#define MASK_P8_VECTOR OPTION_MASK_P8_VECTOR #define MASK_POPCNTB OPTION_MASK_POPCNTB #define MASK_POPCNTD OPTION_MASK_POPCNTD #define MASK_PPC_GFXOPT OPTION_MASK_PPC_GFXOPT @@ -665,6 +715,11 @@ extern unsigned char rs6000_recip_bits[]; instructions for them. Might as well be consistent with bits and bytes. */ #define WORDS_BIG_ENDIAN 1 +/* This says that for the IBM long double the larger magnitude double + comes first. It's really a two element double array, and arrays + don't index differently between little- and big-endian. */ +#define LONG_DOUBLE_LARGE_FIRST 1 + #define MAX_BITS_PER_WORD 64 /* Width of a word, in units (bytes). */ @@ -758,12 +813,6 @@ extern unsigned rs6000_pointer_size; /* No data type wants to be aligned rounder than this. */ #define BIGGEST_ALIGNMENT 128 -/* A C expression to compute the alignment for a variables in the - local store. TYPE is the data type, and ALIGN is the alignment - that the object would ordinarily have. */ -#define LOCAL_ALIGNMENT(TYPE, ALIGN) \ - DATA_ALIGNMENT (TYPE, ALIGN) - /* Alignment of field after `int : 0' in a structure. */ #define EMPTY_FIELD_BOUNDARY 32 @@ -773,8 +822,15 @@ extern unsigned rs6000_pointer_size; /* A bit-field declared as `int' forces `int' alignment for the struct. */ #define PCC_BITFIELD_TYPE_MATTERS 1 -/* Make strings word-aligned so strcpy from constants will be faster. - Make vector constants quadword aligned. */ +enum data_align { align_abi, align_opt, align_both }; + +/* A C expression to compute the alignment for a variables in the + local store. TYPE is the data type, and ALIGN is the alignment + that the object would ordinarily have. */ +#define LOCAL_ALIGNMENT(TYPE, ALIGN) \ + rs6000_data_alignment (TYPE, ALIGN, align_both) + +/* Make strings word-aligned so strcpy from constants will be faster. */ #define CONSTANT_ALIGNMENT(EXP, ALIGN) \ (TREE_CODE (EXP) == STRING_CST \ && (STRICT_ALIGNMENT || !optimize_size) \ @@ -782,21 +838,14 @@ extern unsigned rs6000_pointer_size; ? BITS_PER_WORD \ : (ALIGN)) -/* Make arrays of chars word-aligned for the same reasons. - Align vectors to 128 bits. Align SPE vectors and E500 v2 doubles to +/* Make arrays of chars word-aligned for the same reasons. */ +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + rs6000_data_alignment (TYPE, ALIGN, align_opt) + +/* Align vectors to 128 bits. Align SPE vectors and E500 v2 doubles to 64 bits. */ -#define DATA_ALIGNMENT(TYPE, ALIGN) \ - (TREE_CODE (TYPE) == VECTOR_TYPE \ - ? (((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (TYPE))) \ - || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (TYPE)))) \ - ? 64 : 128) \ - : ((TARGET_E500_DOUBLE \ - && TREE_CODE (TYPE) == REAL_TYPE \ - && TYPE_MODE (TYPE) == DFmode) \ - ? 64 \ - : (TREE_CODE (TYPE) == ARRAY_TYPE \ - && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \ - && (ALIGN) < BITS_PER_WORD) ? BITS_PER_WORD : (ALIGN))) +#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \ + rs6000_data_alignment (TYPE, ALIGN, align_abi) /* Nonzero if move instructions will actually fail to work when given unaligned data. */ @@ -1002,7 +1051,9 @@ extern unsigned rs6000_pointer_size; #define REG_ALLOC_ORDER \ {32, \ - 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, \ + /* move fr13 (ie 45) later, so if we need TFmode, it does */ \ + /* not use fr14 which is a saved register. */ \ + 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 45, \ 33, \ 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \ 50, 49, 48, 47, 46, \ @@ -1062,8 +1113,14 @@ extern unsigned rs6000_pointer_size; #define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N) /* Alternate name for any vector register supporting logical operations, no - matter which instruction set(s) are available. */ -#define VLOGICAL_REGNO_P(N) VFLOAT_REGNO_P (N) + matter which instruction set(s) are available. For 64-bit mode, we also + allow logical operations in the GPRS. This is to allow atomic quad word + builtins not to need the VSX registers for lqarx/stqcx. It also helps with + __int128_t arguments that are passed in GPRs. */ +#define VLOGICAL_REGNO_P(N) \ + (ALTIVEC_REGNO_P (N) \ + || (TARGET_VSX && FP_REGNO_P (N)) \ + || (TARGET_VSX && TARGET_POWERPC64 && INT_REGNO_P (N))) /* Return number of consecutive hard regs needed starting at reg REGNO to hold something of mode MODE. */ @@ -1124,7 +1181,7 @@ extern unsigned rs6000_pointer_size; when one has mode MODE1 and one has mode MODE2. If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, for any hard reg, then this must be 0 for correct output. */ -#define MODES_TIEABLE_P(MODE1, MODE2) \ +#define MODES_TIEABLE_P(MODE1, MODE2) \ (SCALAR_FLOAT_MODE_P (MODE1) \ ? SCALAR_FLOAT_MODE_P (MODE2) \ : SCALAR_FLOAT_MODE_P (MODE2) \ @@ -1137,14 +1194,14 @@ extern unsigned rs6000_pointer_size; ? SPE_VECTOR_MODE (MODE2) \ : SPE_VECTOR_MODE (MODE2) \ ? SPE_VECTOR_MODE (MODE1) \ - : ALTIVEC_VECTOR_MODE (MODE1) \ - ? ALTIVEC_VECTOR_MODE (MODE2) \ - : ALTIVEC_VECTOR_MODE (MODE2) \ - ? ALTIVEC_VECTOR_MODE (MODE1) \ : ALTIVEC_OR_VSX_VECTOR_MODE (MODE1) \ ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE2) \ : ALTIVEC_OR_VSX_VECTOR_MODE (MODE2) \ ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE1) \ + : ALTIVEC_VECTOR_MODE (MODE1) \ + ? ALTIVEC_VECTOR_MODE (MODE2) \ + : ALTIVEC_VECTOR_MODE (MODE2) \ + ? ALTIVEC_VECTOR_MODE (MODE1) \ : 1) /* Post-reload, we can't use any new AltiVec registers, as we already @@ -1337,8 +1394,11 @@ enum r6000_reg_class_enum { RS6000_CONSTRAINT_wg, /* FPR register for -mmfpgpr */ RS6000_CONSTRAINT_wf, /* VSX register for V4SF */ RS6000_CONSTRAINT_wl, /* FPR register for LFIWAX */ + RS6000_CONSTRAINT_wm, /* VSX register for direct move */ + RS6000_CONSTRAINT_wr, /* GPR register if 64-bit */ RS6000_CONSTRAINT_ws, /* VSX register for DF */ RS6000_CONSTRAINT_wt, /* VSX register for TImode */ + RS6000_CONSTRAINT_wv, /* Altivec register for power8 vector */ RS6000_CONSTRAINT_wx, /* FPR register for STFIWX */ RS6000_CONSTRAINT_wz, /* FPR register for LFIWZX */ RS6000_CONSTRAINT_MAX @@ -2297,6 +2357,13 @@ extern char rs6000_reg_names[][8]; /* register names (0 vs. %r0). */ /* How to align the given loop. */ #define LOOP_ALIGN(LABEL) rs6000_loop_align(LABEL) +/* Alignment guaranteed by __builtin_malloc. */ +/* FIXME: 128-bit alignment is guaranteed by glibc for TARGET_64BIT. + However, specifying the stronger guarantee currently leads to + a regression in SPEC CPU2006 437.leslie3d. The stronger + guarantee should be implemented here once that's fixed. */ +#define MALLOC_ABI_ALIGNMENT (64) + /* Pick up the return address upon entry to a procedure. Used for dwarf2 unwind information. This also enables the table driven mechanism. */ @@ -2365,6 +2432,8 @@ extern int frame_pointer_needed; #define RS6000_BTM_ALWAYS 0 /* Always enabled. */ #define RS6000_BTM_ALTIVEC MASK_ALTIVEC /* VMX/altivec vectors. */ #define RS6000_BTM_VSX MASK_VSX /* VSX (vector/scalar). */ +#define RS6000_BTM_P8_VECTOR MASK_P8_VECTOR /* ISA 2.07 vector. */ +#define RS6000_BTM_CRYPTO MASK_CRYPTO /* crypto funcs. */ #define RS6000_BTM_SPE MASK_STRING /* E500 */ #define RS6000_BTM_PAIRED MASK_MULHW /* 750CL paired insns. */ #define RS6000_BTM_FRE MASK_POPCNTB /* FRE instruction. */ @@ -2376,6 +2445,8 @@ extern int frame_pointer_needed; #define RS6000_BTM_COMMON (RS6000_BTM_ALTIVEC \ | RS6000_BTM_VSX \ + | RS6000_BTM_P8_VECTOR \ + | RS6000_BTM_CRYPTO \ | RS6000_BTM_FRE \ | RS6000_BTM_FRES \ | RS6000_BTM_FRSQRTE \ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 1e65ac1cde0..010e21f7413 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -127,6 +127,13 @@ UNSPEC_LFIWZX UNSPEC_FCTIWUZ UNSPEC_GRP_END_NOP + UNSPEC_P8V_FMRGOW + UNSPEC_P8V_MTVSRWZ + UNSPEC_P8V_RELOAD_FROM_GPR + UNSPEC_P8V_MTVSRD + UNSPEC_P8V_XXPERMDI + UNSPEC_P8V_RELOAD_FROM_VSX + UNSPEC_FUSION_GPR ]) ;; @@ -146,7 +153,7 @@ ;; Define an insn type attribute. This is used in function unit delay ;; computations. -(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt" +(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt,crypto" (const_string "integer")) ;; Define floating point instruction sub-types for use with Xfpu.md @@ -166,9 +173,14 @@ (const_int 4))) ;; Processor type -- this attribute must exactly match the processor_type -;; enumeration in rs6000.h. - -(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan" +;; enumeration in rs6000-opts.h. +(define_attr "cpu" + "ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630, + ppc750,ppc7400,ppc7450, + ppc403,ppc405,ppc440,ppc476, + ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500, + power4,power5,power6,power7,power8, + rs64a,mpccore,cell,ppca2,titan" (const (symbol_ref "rs6000_cpu_attr"))) @@ -201,6 +213,7 @@ (include "power5.md") (include "power6.md") (include "power7.md") +(include "power8.md") (include "cell.md") (include "xfpu.md") (include "a2.md") @@ -227,6 +240,12 @@ ; extend modes for DImode (define_mode_iterator QHSI [QI HI SI]) +; QImode or HImode for small atomic ops +(define_mode_iterator QHI [QI HI]) + +; HImode or SImode for sign extended fusion ops +(define_mode_iterator HSI [HI SI]) + ; SImode or DImode, even if DImode doesn't fit in GPRs. (define_mode_iterator SDI [SI DI]) @@ -268,6 +287,15 @@ (define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128") (TD "TARGET_HARD_FLOAT && TARGET_FPRS")]) +; Iterators for 128 bit types for direct move +(define_mode_iterator FMOVE128_GPR [(TI "TARGET_VSX_TIMODE") + (V16QI "") + (V8HI "") + (V4SI "") + (V4SF "") + (V2DI "") + (V2DF "")]) + ; Whether a floating point move is ok, don't allow SD without hardware FP (define_mode_attr fmove_ok [(SF "") (DF "") @@ -284,11 +312,16 @@ (define_mode_attr f32_lr [(SF "f") (SD "wz")]) (define_mode_attr f32_lm [(SF "m") (SD "Z")]) (define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")]) +(define_mode_attr f32_lv [(SF "lxsspx %0,%y1") (SD "lxsiwzx %0,%y1")]) ; Definitions for store from 32-bit fpr register (define_mode_attr f32_sr [(SF "f") (SD "wx")]) (define_mode_attr f32_sm [(SF "m") (SD "Z")]) (define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")]) +(define_mode_attr f32_sv [(SF "stxsspx %1,%y0") (SD "stxsiwzx %1,%y0")]) + +; Definitions for 32-bit fpr direct move +(define_mode_attr f32_dm [(SF "wn") (SD "wm")]) ; These modes do not fit in integer registers in 32-bit mode. ; but on e500v2, the gpr are 64 bit registers @@ -368,7 +401,7 @@ (define_insn "*zero_extend<mode>di2_internal1" [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") (zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))] - "TARGET_POWERPC64" + "TARGET_POWERPC64 && (<MODE>mode != SImode || !TARGET_LFIWZX)" "@ l<wd>z%U1%X1 %0,%1 rldicl %0,%1,0,<dbits>" @@ -434,6 +467,29 @@ (const_int 0)))] "") +(define_insn "*zero_extendsidi2_lfiwzx" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wm") + (zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))] + "TARGET_POWERPC64 && TARGET_LFIWZX" + "@ + lwz%U1%X1 %0,%1 + rldicl %0,%1,0,32 + mtvsrwz %x0,%1 + lfiwzx %0,%y1 + lxsiwzx %x0,%y1" + [(set_attr_alternative "type" + [(if_then_else + (match_test "update_indexed_address_mem (operands[1], VOIDmode)") + (const_string "load_ux") + (if_then_else + (match_test "update_address_mem (operands[1], VOIDmode)") + (const_string "load_u") + (const_string "load"))) + (const_string "*") + (const_string "mffgpr") + (const_string "fpload") + (const_string "fpload")])]) + (define_insn "extendqidi2" [(set (match_operand:DI 0 "gpc_reg_operand" "=r") (sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))] @@ -581,10 +637,33 @@ "TARGET_POWERPC64" "") -(define_insn "" +(define_insn "*extendsidi2_lfiwax" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wm") + (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))] + "TARGET_POWERPC64 && TARGET_LFIWAX" + "@ + lwa%U1%X1 %0,%1 + extsw %0,%1 + mtvsrwa %x0,%1 + lfiwax %0,%y1 + lxsiwax %x0,%y1" + [(set_attr_alternative "type" + [(if_then_else + (match_test "update_indexed_address_mem (operands[1], VOIDmode)") + (const_string "load_ext_ux") + (if_then_else + (match_test "update_address_mem (operands[1], VOIDmode)") + (const_string "load_ext_u") + (const_string "load_ext"))) + (const_string "exts") + (const_string "mffgpr") + (const_string "fpload") + (const_string "fpload")])]) + +(define_insn "*extendsidi2_nocell" [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))] - "TARGET_POWERPC64 && rs6000_gen_cell_microcode" + "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX" "@ lwa%U1%X1 %0,%1 extsw %0,%1" @@ -598,7 +677,7 @@ (const_string "load_ext"))) (const_string "exts")])]) -(define_insn "" +(define_insn "*extendsidi2_nocell" [(set (match_operand:DI 0 "gpc_reg_operand" "=r") (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))] "TARGET_POWERPC64 && !rs6000_gen_cell_microcode" @@ -2035,7 +2114,9 @@ [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))] "TARGET_CMPB && TARGET_POPCNTB" - "prty<wd> %0,%1") + "prty<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "popcnt")]) (define_expand "parity<mode>2" [(set (match_operand:GPR 0 "gpc_reg_operand" "") @@ -4316,7 +4397,7 @@ # # #" - [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") (set_attr "length" "4,4,4,8,8,8")]) (define_split @@ -4348,7 +4429,7 @@ # # #" - [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") (set_attr "length" "4,4,4,8,8,8")]) (define_split @@ -5104,6 +5185,41 @@ "frsqrtes %0,%1" [(set_attr "type" "fp")]) +;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in +;; builtins.c and optabs.c that are not correct for IBM long double +;; when little-endian. +(define_expand "signbittf2" + [(set (match_dup 2) + (float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" ""))) + (set (match_dup 3) + (subreg:DI (match_dup 2) 0)) + (set (match_dup 4) + (match_dup 5)) + (set (match_operand:SI 0 "gpc_reg_operand" "") + (match_dup 6))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE) + && TARGET_LONG_DOUBLE_128" +{ + operands[2] = gen_reg_rtx (DFmode); + operands[3] = gen_reg_rtx (DImode); + if (TARGET_POWERPC64) + { + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_rtx_LSHIFTRT (DImode, operands[3], GEN_INT (63)); + operands[6] = gen_rtx_SUBREG (SImode, operands[4], + WORDS_BIG_ENDIAN ? 4 : 0); + } + else + { + operands[4] = gen_reg_rtx (SImode); + operands[5] = gen_rtx_SUBREG (SImode, operands[3], + WORDS_BIG_ENDIAN ? 0 : 4); + operands[6] = gen_rtx_LSHIFTRT (SImode, operands[4], GEN_INT (31)); + } +}) + (define_expand "copysign<mode>3" [(set (match_dup 3) (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ""))) @@ -5553,12 +5669,15 @@ ; We don't define lfiwax/lfiwzx with the normal definition, because we ; don't want to support putting SImode in FPR registers. (define_insn "lfiwax" - [(set (match_operand:DI 0 "gpc_reg_operand" "=d") - (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")] + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm") + (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")] UNSPEC_LFIWAX))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX" - "lfiwax %0,%y1" - [(set_attr "type" "fpload")]) + "@ + lfiwax %0,%y1 + lxsiwax %x0,%y1 + mtvsrwa %x0,%1" + [(set_attr "type" "fpload,fpload,mffgpr")]) ; This split must be run before register allocation because it allocates the ; memory slot that is needed to move values to/from the FPR. We don't allocate @@ -5580,7 +5699,8 @@ rtx src = operands[1]; rtx tmp; - if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64) + if (!MEM_P (src) && TARGET_POWERPC64 + && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) tmp = convert_to_mode (DImode, src, false); else { @@ -5629,12 +5749,15 @@ (set_attr "type" "fpload")]) (define_insn "lfiwzx" - [(set (match_operand:DI 0 "gpc_reg_operand" "=d") - (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")] + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm") + (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")] UNSPEC_LFIWZX))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX" - "lfiwzx %0,%y1" - [(set_attr "type" "fpload")]) + "@ + lfiwzx %0,%y1 + lxsiwzx %x0,%y1 + mtvsrwz %x0,%1" + [(set_attr "type" "fpload,fpload,mftgpr")]) (define_insn_and_split "floatunssi<mode>2_lfiwzx" [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") @@ -5651,7 +5774,8 @@ rtx src = operands[1]; rtx tmp; - if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64) + if (!MEM_P (src) && TARGET_POWERPC64 + && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) tmp = convert_to_mode (DImode, src, true); else { @@ -5942,7 +6066,7 @@ emit_insn (gen_stfiwx (dest, tmp)); DONE; } - else if (TARGET_MFPGPR && TARGET_POWERPC64) + else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) { dest = gen_lowpart (DImode, dest); emit_move_insn (dest, tmp); @@ -6036,7 +6160,7 @@ emit_insn (gen_stfiwx (dest, tmp)); DONE; } - else if (TARGET_MFPGPR && TARGET_POWERPC64) + else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) { dest = gen_lowpart (DImode, dest); emit_move_insn (dest, tmp); @@ -8285,6 +8409,18 @@ (compare:CC (match_dup 0) (const_int 0)))] "") + +;; Eqv operation. +(define_insn "*eqv<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (not:GPR + (xor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r"))))] + "" + "eqv %0,%1,%2" + [(set_attr "type" "integer") + (set_attr "length" "4")]) + ;; Now define ways of moving data around. @@ -8490,7 +8626,7 @@ cmp<wd>i %2,%0,0 mr. %0,%1 #" - [(set_attr "type" "cmp,compare,cmp") + [(set_attr "type" "cmp,fast_compare,cmp") (set_attr "length" "4,4,8")]) (define_split @@ -8680,8 +8816,8 @@ }") (define_insn "mov<mode>_hardfloat" - [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,*c*l,!r,*h,!r,!r") - (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,r,h,0,G,Fn"))] + [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wm,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r") + (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wm,r,<f32_dm>,r,h,0,G,Fn"))] "(gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode)) && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)" @@ -8694,6 +8830,10 @@ xxlxor %x0,%x0,%x0 <f32_li> <f32_si> + <f32_lv> + <f32_sv> + mtvsrwz %x0,%1 + mfvsrwz %0,%x1 mt%0 %1 mf%1 %0 nop @@ -8732,16 +8872,20 @@ (match_test "update_address_mem (operands[0], VOIDmode)") (const_string "fpstore_u") (const_string "fpstore"))) + (const_string "fpload") + (const_string "fpstore") + (const_string "mftgpr") + (const_string "mffgpr") (const_string "mtjmpr") (const_string "mfjmpr") (const_string "*") (const_string "*") (const_string "*")]) - (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8")]) + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8")]) (define_insn "*mov<mode>_softfloat" [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h") - (match_operand:FMOVE32 1 "input_operand" "r, r,h,m,r,I,L,G,Fn,0"))] + (match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))] "(gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode)) && (TARGET_SOFT_FLOAT || !TARGET_FPRS)" @@ -8954,8 +9098,8 @@ ; ld/std require word-aligned displacements -> 'Y' constraint. ; List Y->r and r->Y before r->r for reload. (define_insn "*mov<mode>_hardfloat64" - [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg") - (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r"))] + [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm") + (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))] "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode))" @@ -8980,7 +9124,9 @@ # # mftgpr %0,%1 - mffgpr %0,%1" + mffgpr %0,%1 + mfvsrd %0,%x1 + mtvsrd %x0,%1" [(set_attr_alternative "type" [(if_then_else (match_test "update_indexed_address_mem (operands[0], VOIDmode)") @@ -9038,8 +9184,10 @@ (const_string "*") (const_string "*") (const_string "mftgpr") + (const_string "mffgpr") + (const_string "mftgpr") (const_string "mffgpr")]) - (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")]) + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")]) (define_insn "*mov<mode>_softfloat64" [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h") @@ -9154,8 +9302,8 @@ "&& reload_completed" [(pc)] { - const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0; - const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word), operands[1]); emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word), @@ -9384,8 +9532,8 @@ && TARGET_LONG_DOUBLE_128" " { - const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode); - const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; operands[3] = gen_reg_rtx (DFmode); operands[4] = gen_reg_rtx (CCFPmode); operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word); @@ -9419,6 +9567,216 @@ }) +;; Power8 merge instructions to allow direct move to/from floating point +;; registers in 32-bit mode. We use TF mode to get two registers to move the +;; individual 32-bit parts across. Subreg doesn't work too well on the TF +;; value, since it is allocated in reload and not all of the flow information +;; is setup for it. We have two patterns to do the two moves between gprs and +;; fprs. There isn't a dependancy between the two, but we could potentially +;; schedule other instructions between the two instructions. TFmode is +;; currently limited to traditional FPR registers. If/when this is changed, we +;; will need to revist %L to make sure it works with VSX registers, or add an +;; %x version of %L. + +(define_insn "p8_fmrgow_<mode>" + [(set (match_operand:FMOVE64X 0 "register_operand" "=d") + (unspec:FMOVE64X [(match_operand:TF 1 "register_operand" "d")] + UNSPEC_P8V_FMRGOW))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "fmrgow %0,%1,%L1" + [(set_attr "type" "vecperm")]) + +(define_insn "p8_mtvsrwz_1" + [(set (match_operand:TF 0 "register_operand" "=d") + (unspec:TF [(match_operand:SI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRWZ))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrwz %x0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn "p8_mtvsrwz_2" + [(set (match_operand:TF 0 "register_operand" "+d") + (unspec:TF [(match_dup 0) + (match_operand:SI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRWZ))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrwz %L0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn_and_split "reload_fpr_from_gpr<mode>" + [(set (match_operand:FMOVE64X 0 "register_operand" "=ws") + (unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:TF 2 "register_operand" "=d"))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx gpr_hi_reg = gen_highpart (SImode, src); + rtx gpr_lo_reg = gen_lowpart (SImode, src); + + emit_insn (gen_p8_mtvsrwz_1 (tmp, gpr_hi_reg)); + emit_insn (gen_p8_mtvsrwz_2 (tmp, gpr_lo_reg)); + emit_insn (gen_p8_fmrgow_<mode> (dest, tmp)); + DONE; +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +;; Move 128 bit values from GPRs to VSX registers in 64-bit mode +(define_insn "p8_mtvsrd_1" + [(set (match_operand:TF 0 "register_operand" "=ws") + (unspec:TF [(match_operand:DI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRD))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrd %0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn "p8_mtvsrd_2" + [(set (match_operand:TF 0 "register_operand" "+ws") + (unspec:TF [(match_dup 0) + (match_operand:DI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRD))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrd %L0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn "p8_xxpermdi_<mode>" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa") + (unspec:FMOVE128_GPR [(match_operand:TF 1 "register_operand" "ws")] + UNSPEC_P8V_XXPERMDI))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "xxpermdi %x0,%1,%L1,0" + [(set_attr "type" "vecperm")]) + +(define_insn_and_split "reload_vsx_from_gpr<mode>" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa") + (unspec:FMOVE128_GPR + [(match_operand:FMOVE128_GPR 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:TF 2 "register_operand" "=ws"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx gpr_hi_reg = gen_highpart (DImode, src); + rtx gpr_lo_reg = gen_lowpart (DImode, src); + + emit_insn (gen_p8_mtvsrd_1 (tmp, gpr_hi_reg)); + emit_insn (gen_p8_mtvsrd_2 (tmp, gpr_lo_reg)); + emit_insn (gen_p8_xxpermdi_<mode> (dest, tmp)); +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +;; Move SFmode to a VSX from a GPR register. Because scalar floating point +;; type is stored internally as double precision in the VSX registers, we have +;; to convert it from the vector format. + +(define_insn_and_split "reload_vsx_from_gprsf" + [(set (match_operand:SF 0 "register_operand" "=wa") + (unspec:SF [(match_operand:SF 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:DI 2 "register_operand" "=r"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op0_di = simplify_gen_subreg (DImode, op0, SFmode, 0); + rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0); + + /* Move SF value to upper 32-bits for xscvspdpn. */ + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); + emit_move_insn (op0_di, op2); + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "two")]) + +;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a +;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value, +;; and then doing a move of that. +(define_insn "p8_mfvsrd_3_<mode>" + [(set (match_operand:DF 0 "register_operand" "=r") + (unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "mfvsrd %0,%x1" + [(set_attr "type" "mftgpr")]) + +(define_insn_and_split "reload_gpr_from_vsx<mode>" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r") + (unspec:FMOVE128_GPR + [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX)) + (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx gpr_hi_reg = gen_highpart (DFmode, dest); + rtx gpr_lo_reg = gen_lowpart (DFmode, dest); + + emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src)); + emit_insn (gen_vsx_xxpermdi_<mode> (tmp, src, src, GEN_INT (3))); + emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp)); +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +;; Move SFmode to a GPR from a VSX register. Because scalar floating point +;; type is stored internally as double precision, we have to convert it to the +;; vector format. + +(define_insn_and_split "reload_gpr_from_vsxsf" + [(set (match_operand:SF 0 "register_operand" "=r") + (unspec:SF [(match_operand:SF 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX)) + (clobber (match_operand:V4SF 2 "register_operand" "=wa"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0); + + emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1)); + emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2)); + emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32))); + DONE; +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +(define_insn "p8_mfvsrd_4_disf" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "mfvsrd %0,%x1" + [(set_attr "type" "mftgpr")]) + + ;; Next come the multi-word integer load and store and the load and store ;; multiple insns. @@ -9467,7 +9825,8 @@ [(set (match_operand:DI 0 "gpc_reg_operand" "") (match_operand:DI 1 "const_int_operand" ""))] "! TARGET_POWERPC64 && reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1])" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 1))] " @@ -9485,13 +9844,14 @@ [(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "") (match_operand:DIFD 1 "input_operand" ""))] "reload_completed && !TARGET_POWERPC64 - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1])" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) (define_insn "*movdi_internal64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg") - (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg,r,?*wm") + (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r,*wm,r"))] "TARGET_POWERPC64 && (gpc_reg_operand (operands[0], DImode) || gpc_reg_operand (operands[1], DImode))" @@ -9513,7 +9873,9 @@ nop xxlxor %x0,%x0,%x0 mftgpr %0,%1 - mffgpr %0,%1" + mffgpr %0,%1 + mfvsrd %0,%x1 + mtvsrd %x0,%1" [(set_attr_alternative "type" [(if_then_else (match_test "update_indexed_address_mem (operands[0], VOIDmode)") @@ -9562,8 +9924,10 @@ (const_string "*") (const_string "vecsimple") (const_string "mftgpr") + (const_string "mffgpr") + (const_string "mftgpr") (const_string "mffgpr")]) - (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4")]) + (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4,4,4")]) ;; Generate all one-bits and clear left or right. ;; Use (and:DI (rotate:DI ...)) to avoid anddi3 unnecessary clobber. @@ -9652,19 +10016,23 @@ (const_string "conditional")))]) (define_insn "*mov<mode>_ppc64" - [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r") - (match_operand:TI2 1 "input_operand" "r,Y,r"))] - "(TARGET_POWERPC64 - && (<MODE>mode != TImode || VECTOR_MEM_NONE_P (TImode)) + [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r") + (match_operand:TI2 1 "input_operand" "r,Y,r,F"))] + "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode) && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode)))" - "#" - [(set_attr "type" "store,load,*")]) +{ + return rs6000_output_move_128bit (operands); +} + [(set_attr "type" "store,load,*,*") + (set_attr "length" "8")]) (define_split - [(set (match_operand:TI2 0 "gpc_reg_operand" "") + [(set (match_operand:TI2 0 "int_reg_operand" "") (match_operand:TI2 1 "const_double_operand" ""))] - "TARGET_POWERPC64" + "TARGET_POWERPC64 + && (VECTOR_MEM_NONE_P (<MODE>mode) + || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 5))] " @@ -9691,7 +10059,9 @@ [(set (match_operand:TI2 0 "nonimmediate_operand" "") (match_operand:TI2 1 "input_operand" ""))] "reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1]) + && !quad_load_store_p (operands[0], operands[1])" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) @@ -12554,8 +12924,8 @@ (match_dup 13)] { REAL_VALUE_TYPE rv; - const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0; - const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, hi_word); operands[6] = simplify_gen_subreg (DFmode, operands[1], TFmode, lo_word); @@ -14788,7 +15158,7 @@ (match_operand:P 2 "gpc_reg_operand" "r")] UNSPEC_BPERM))] "TARGET_POPCNTD" "bpermd %0,%1,%2" - [(set_attr "type" "integer")]) + [(set_attr "type" "popcnt")]) ;; Builtin fma support. Handle @@ -14931,3 +15301,4 @@ (include "spe.md") (include "dfp.md") (include "paired.md") +(include "crypto.md") diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 17b77629fa1..9a078198130 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -517,4 +517,28 @@ Control whether we save the TOC in the prologue for indirect calls or generate t mvsx-timode Target Undocumented Mask(VSX_TIMODE) Var(rs6000_isa_flags) -; Allow/disallow TImode in VSX registers +Allow 128-bit integers in VSX registers + +mpower8-fusion +Target Report Mask(P8_FUSION) Var(rs6000_isa_flags) +Fuse certain integer operations together for better performance on power8 + +mpower8-fusion-sign +Target Undocumented Mask(P8_FUSION_SIGN) Var(rs6000_isa_flags) +Allow sign extension in fusion operations + +mpower8-vector +Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags) +Use/do not use vector and scalar instructions added in ISA 2.07. + +mcrypto +Target Report Mask(CRYPTO) Var(rs6000_isa_flags) +Use ISA 2.07 crypto instructions + +mdirect-move +Target Report Mask(DIRECT_MOVE) Var(rs6000_isa_flags) +Use ISA 2.07 direct move between GPR & VSX register instructions + +mquad-memory +Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags) +Generate the quad word memory instructions (lq/stq/lqarx/stqcx). diff --git a/gcc/config/rs6000/spe.md b/gcc/config/rs6000/spe.md index cec2b430b82..bf10a5dc180 100644 --- a/gcc/config/rs6000/spe.md +++ b/gcc/config/rs6000/spe.md @@ -2604,8 +2604,8 @@ && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128" " { - const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode); - const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; operands[3] = gen_reg_rtx (DFmode); operands[4] = gen_reg_rtx (CCFPmode); operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word); @@ -2627,8 +2627,8 @@ && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128" " { - const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode); - const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; operands[3] = gen_reg_rtx (DFmode); operands[4] = gen_reg_rtx (CCFPmode); operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word); diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md index 252e2690a98..8616b3eca5f 100644 --- a/gcc/config/rs6000/sync.md +++ b/gcc/config/rs6000/sync.md @@ -18,14 +18,23 @@ ;; along with GCC; see the file COPYING3. If not see ;; <http://www.gnu.org/licenses/>. -(define_mode_attr larx [(SI "lwarx") (DI "ldarx")]) -(define_mode_attr stcx [(SI "stwcx.") (DI "stdcx.")]) +(define_mode_attr larx [(QI "lbarx") + (HI "lharx") + (SI "lwarx") + (DI "ldarx") + (TI "lqarx")]) + +(define_mode_attr stcx [(QI "stbcx.") + (HI "sthcx.") + (SI "stwcx.") + (DI "stdcx.") + (TI "stqcx.")]) (define_code_iterator FETCHOP [plus minus ior xor and]) (define_code_attr fetchop_name [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")]) (define_code_attr fetchop_pred - [(plus "add_operand") (minus "gpc_reg_operand") + [(plus "add_operand") (minus "int_reg_operand") (ior "logical_operand") (xor "logical_operand") (and "and_operand")]) (define_expand "mem_thread_fence" @@ -129,16 +138,7 @@ case MEMMODEL_CONSUME: case MEMMODEL_ACQUIRE: case MEMMODEL_SEQ_CST: - if (GET_MODE (operands[0]) == QImode) - emit_insn (gen_loadsync_qi (operands[0])); - else if (GET_MODE (operands[0]) == HImode) - emit_insn (gen_loadsync_hi (operands[0])); - else if (GET_MODE (operands[0]) == SImode) - emit_insn (gen_loadsync_si (operands[0])); - else if (GET_MODE (operands[0]) == DImode) - emit_insn (gen_loadsync_di (operands[0])); - else - gcc_unreachable (); + emit_insn (gen_loadsync_<mode> (operands[0])); break; default: gcc_unreachable (); @@ -170,35 +170,109 @@ DONE; }) -;; ??? Power ISA 2.06B says that there *is* a load-{byte,half}-and-reserve -;; opcode that is "phased-in". Not implemented as of Power7, so not yet used, -;; but let's prepare the macros anyway. +;; Any supported integer mode that has atomic l<x>arx/st<x>cx. instrucitons +;; other than the quad memory operations, which have special restrictions. +;; Byte/halfword atomic instructions were added in ISA 2.06B, but were phased +;; in and did not show up until power8. TImode atomic lqarx/stqcx. require +;; special handling due to even/odd register requirements. +(define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI") + (HI "TARGET_SYNC_HI_QI") + SI + (DI "TARGET_POWERPC64")]) + +;; Types that we should provide atomic instructions for. -(define_mode_iterator ATOMIC [SI (DI "TARGET_POWERPC64")]) +(define_mode_iterator AINT [QI + HI + SI + (DI "TARGET_POWERPC64") + (TI "TARGET_SYNC_TI")]) (define_insn "load_locked<mode>" - [(set (match_operand:ATOMIC 0 "gpc_reg_operand" "=r") + [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r") (unspec_volatile:ATOMIC [(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))] "" "<larx> %0,%y1" [(set_attr "type" "load_l")]) +(define_insn "load_locked<QHI:mode>_si" + [(set (match_operand:SI 0 "int_reg_operand" "=r") + (unspec_volatile:SI + [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))] + "TARGET_SYNC_HI_QI" + "<QHI:larx> %0,%y1" + [(set_attr "type" "load_l")]) + +;; Use PTImode to get even/odd register pairs +(define_expand "load_lockedti" + [(use (match_operand:TI 0 "quad_int_reg_operand" "")) + (use (match_operand:TI 1 "memory_operand" ""))] + "TARGET_SYNC_TI" +{ + /* Use a temporary register to force getting an even register for the + lqarx/stqcrx. instructions. Normal optimizations will eliminate this + extra copy. */ + rtx pti = gen_reg_rtx (PTImode); + emit_insn (gen_load_lockedpti (pti, operands[1])); + emit_move_insn (operands[0], gen_lowpart (TImode, pti)); + DONE; +}) + +(define_insn "load_lockedpti" + [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r") + (unspec_volatile:PTI + [(match_operand:TI 1 "memory_operand" "Z")] UNSPECV_LL))] + "TARGET_SYNC_TI + && !reg_mentioned_p (operands[0], operands[1]) + && quad_int_reg_operand (operands[0], PTImode)" + "lqarx %0,%y1" + [(set_attr "type" "load_l")]) + (define_insn "store_conditional<mode>" [(set (match_operand:CC 0 "cc_reg_operand" "=x") (unspec_volatile:CC [(const_int 0)] UNSPECV_SC)) (set (match_operand:ATOMIC 1 "memory_operand" "=Z") - (match_operand:ATOMIC 2 "gpc_reg_operand" "r"))] + (match_operand:ATOMIC 2 "int_reg_operand" "r"))] "" "<stcx> %2,%y1" [(set_attr "type" "store_c")]) +(define_expand "store_conditionalti" + [(use (match_operand:CC 0 "cc_reg_operand" "")) + (use (match_operand:TI 1 "memory_operand" "")) + (use (match_operand:TI 2 "quad_int_reg_operand" ""))] + "TARGET_SYNC_TI" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx pti_op1 = change_address (op1, PTImode, XEXP (op1, 0)); + rtx pti_op2 = gen_reg_rtx (PTImode); + + /* Use a temporary register to force getting an even register for the + lqarx/stqcrx. instructions. Normal optimizations will eliminate this + extra copy. */ + emit_move_insn (pti_op2, gen_lowpart (PTImode, op2)); + emit_insn (gen_store_conditionalpti (op0, pti_op1, pti_op2)); + DONE; +}) + +(define_insn "store_conditionalpti" + [(set (match_operand:CC 0 "cc_reg_operand" "=x") + (unspec_volatile:CC [(const_int 0)] UNSPECV_SC)) + (set (match_operand:PTI 1 "memory_operand" "=Z") + (match_operand:PTI 2 "quad_int_reg_operand" "r"))] + "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)" + "stqcx. %2,%y1" + [(set_attr "type" "store_c")]) + (define_expand "atomic_compare_and_swap<mode>" - [(match_operand:SI 0 "gpc_reg_operand" "") ;; bool out - (match_operand:INT1 1 "gpc_reg_operand" "") ;; val out - (match_operand:INT1 2 "memory_operand" "") ;; memory - (match_operand:INT1 3 "reg_or_short_operand" "") ;; expected - (match_operand:INT1 4 "gpc_reg_operand" "") ;; desired + [(match_operand:SI 0 "int_reg_operand" "") ;; bool out + (match_operand:AINT 1 "int_reg_operand" "") ;; val out + (match_operand:AINT 2 "memory_operand" "") ;; memory + (match_operand:AINT 3 "reg_or_short_operand" "") ;; expected + (match_operand:AINT 4 "int_reg_operand" "") ;; desired (match_operand:SI 5 "const_int_operand" "") ;; is_weak (match_operand:SI 6 "const_int_operand" "") ;; model succ (match_operand:SI 7 "const_int_operand" "")] ;; model fail @@ -209,9 +283,9 @@ }) (define_expand "atomic_exchange<mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (match_operand:INT1 2 "gpc_reg_operand" "") ;; input + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (match_operand:AINT 2 "int_reg_operand" "") ;; input (match_operand:SI 3 "const_int_operand" "")] ;; model "" { @@ -220,9 +294,9 @@ }) (define_expand "atomic_<fetchop_name><mode>" - [(match_operand:INT1 0 "memory_operand" "") ;; memory - (FETCHOP:INT1 (match_dup 0) - (match_operand:INT1 1 "<fetchop_pred>" "")) ;; operand + [(match_operand:AINT 0 "memory_operand" "") ;; memory + (FETCHOP:AINT (match_dup 0) + (match_operand:AINT 1 "<fetchop_pred>" "")) ;; operand (match_operand:SI 2 "const_int_operand" "")] ;; model "" { @@ -232,8 +306,8 @@ }) (define_expand "atomic_nand<mode>" - [(match_operand:INT1 0 "memory_operand" "") ;; memory - (match_operand:INT1 1 "gpc_reg_operand" "") ;; operand + [(match_operand:AINT 0 "memory_operand" "") ;; memory + (match_operand:AINT 1 "int_reg_operand" "") ;; operand (match_operand:SI 2 "const_int_operand" "")] ;; model "" { @@ -243,10 +317,10 @@ }) (define_expand "atomic_fetch_<fetchop_name><mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (FETCHOP:INT1 (match_dup 1) - (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (FETCHOP:AINT (match_dup 1) + (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand (match_operand:SI 3 "const_int_operand" "")] ;; model "" { @@ -256,9 +330,9 @@ }) (define_expand "atomic_fetch_nand<mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (match_operand:AINT 2 "int_reg_operand" "") ;; operand (match_operand:SI 3 "const_int_operand" "")] ;; model "" { @@ -268,10 +342,10 @@ }) (define_expand "atomic_<fetchop_name>_fetch<mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (FETCHOP:INT1 (match_dup 1) - (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (FETCHOP:AINT (match_dup 1) + (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand (match_operand:SI 3 "const_int_operand" "")] ;; model "" { @@ -281,9 +355,9 @@ }) (define_expand "atomic_nand_fetch<mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (match_operand:AINT 2 "int_reg_operand" "") ;; operand (match_operand:SI 3 "const_int_operand" "")] ;; model "" { diff --git a/gcc/config/rs6000/t-linux b/gcc/config/rs6000/t-linux index 017a293cde3..62a5b941389 100644 --- a/gcc/config/rs6000/t-linux +++ b/gcc/config/rs6000/t-linux @@ -2,7 +2,7 @@ # or soft-float. ifeq (,$(filter $(with_cpu),$(SOFT_FLOAT_CPUS))$(findstring soft,$(with_float))) ifneq (,$(findstring spe,$(target))) -MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring rs6000/e500-double.h, $(tm_file_list)),,v1) +MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring 8548,$(with_cpu)),,v1) else MULTIARCH_DIRNAME = powerpc-linux-gnu endif diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000 index 52c18391556..5889d6d82d4 100644 --- a/gcc/config/rs6000/t-rs6000 +++ b/gcc/config/rs6000/t-rs6000 @@ -60,6 +60,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \ $(srcdir)/config/rs6000/power5.md \ $(srcdir)/config/rs6000/power6.md \ $(srcdir)/config/rs6000/power7.md \ + $(srcdir)/config/rs6000/power8.md \ $(srcdir)/config/rs6000/cell.md \ $(srcdir)/config/rs6000/xfpu.md \ $(srcdir)/config/rs6000/a2.md \ @@ -70,6 +71,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \ $(srcdir)/config/rs6000/vector.md \ $(srcdir)/config/rs6000/vsx.md \ $(srcdir)/config/rs6000/altivec.md \ + $(srcdir)/config/rs6000/crypto.md \ $(srcdir)/config/rs6000/spe.md \ $(srcdir)/config/rs6000/dfp.md \ $(srcdir)/config/rs6000/paired.md diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index c1d00ca2a9b..6cfebdeebdc 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -24,13 +24,13 @@ ;; Vector int modes -(define_mode_iterator VEC_I [V16QI V8HI V4SI]) +(define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI]) ;; Vector float modes (define_mode_iterator VEC_F [V4SF V2DF]) ;; Vector arithmetic modes -(define_mode_iterator VEC_A [V16QI V8HI V4SI V4SF V2DF]) +(define_mode_iterator VEC_A [V16QI V8HI V4SI V2DI V4SF V2DF]) ;; Vector modes that need alginment via permutes (define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF]) @@ -45,7 +45,7 @@ (define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF]) ;; Vector comparison modes -(define_mode_iterator VEC_C [V16QI V8HI V4SI V4SF V2DF]) +(define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF]) ;; Vector init/extract modes (define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF]) @@ -126,7 +126,9 @@ (match_operand:VEC_L 1 "input_operand" ""))] "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode) && reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1]) + && !quad_load_store_p (operands[0], operands[1])" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); @@ -730,9 +732,10 @@ "") (define_expand "and<mode>3" - [(set (match_operand:VEC_L 0 "vlogical_operand" "") - (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") - (match_operand:VEC_L 2 "vlogical_operand" "")))] + [(parallel [(set (match_operand:VEC_L 0 "vlogical_operand" "") + (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") + (match_operand:VEC_L 2 "vlogical_operand" ""))) + (clobber (match_scratch:CC 3 ""))])] "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode) && (<MODE>mode != TImode || TARGET_POWERPC64)" "") @@ -746,8 +749,8 @@ (define_expand "nor<mode>3" [(set (match_operand:VEC_L 0 "vlogical_operand" "") - (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") - (match_operand:VEC_L 2 "vlogical_operand" ""))))] + (and:VEC_L (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")) + (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" ""))))] "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode) && (<MODE>mode != TImode || TARGET_POWERPC64)" "") @@ -760,6 +763,47 @@ && (<MODE>mode != TImode || TARGET_POWERPC64)" "") +;; Power8 vector logical instructions. +(define_expand "eqv<mode>3" + [(set (match_operand:VEC_L 0 "register_operand" "") + (not:VEC_L + (xor:VEC_L (match_operand:VEC_L 1 "register_operand" "") + (match_operand:VEC_L 2 "register_operand" ""))))] + "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode) + && (<MODE>mode != TImode || TARGET_POWERPC64)") + +;; Rewrite nand into canonical form +(define_expand "nand<mode>3" + [(set (match_operand:VEC_L 0 "register_operand" "") + (ior:VEC_L + (not:VEC_L (match_operand:VEC_L 1 "register_operand" "")) + (not:VEC_L (match_operand:VEC_L 2 "register_operand" ""))))] + "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode) + && (<MODE>mode != TImode || TARGET_POWERPC64)") + +;; The canonical form is to have the negated elment first, so we need to +;; reverse arguments. +(define_expand "orc<mode>3" + [(set (match_operand:VEC_L 0 "register_operand" "") + (ior:VEC_L + (not:VEC_L (match_operand:VEC_L 1 "register_operand" "")) + (match_operand:VEC_L 2 "register_operand" "")))] + "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode) + && (<MODE>mode != TImode || TARGET_POWERPC64)") + +;; Vector count leading zeros +(define_expand "clz<mode>2" + [(set (match_operand:VEC_I 0 "register_operand" "") + (clz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))] + "TARGET_P8_VECTOR") + +;; Vector population count +(define_expand "popcount<mode>2" + [(set (match_operand:VEC_I 0 "register_operand" "") + (popcount:VEC_I (match_operand:VEC_I 1 "register_operand" "")))] + "TARGET_P8_VECTOR") + + ;; Same size conversions (define_expand "float<VEC_int><mode>2" [(set (match_operand:VEC_F 0 "vfloat_operand" "") @@ -1074,7 +1118,7 @@ [(set (match_operand:VEC_I 0 "vint_operand" "") (rotate:VEC_I (match_operand:VEC_I 1 "vint_operand" "") (match_operand:VEC_I 2 "vint_operand" "")))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") ;; Expanders for arithmetic shift left on each vector element @@ -1082,7 +1126,7 @@ [(set (match_operand:VEC_I 0 "vint_operand" "") (ashift:VEC_I (match_operand:VEC_I 1 "vint_operand" "") (match_operand:VEC_I 2 "vint_operand" "")))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") ;; Expanders for logical shift right on each vector element @@ -1090,7 +1134,7 @@ [(set (match_operand:VEC_I 0 "vint_operand" "") (lshiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "") (match_operand:VEC_I 2 "vint_operand" "")))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") ;; Expanders for arithmetic shift right on each vector element @@ -1098,7 +1142,7 @@ [(set (match_operand:VEC_I 0 "vint_operand" "") (ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "") (match_operand:VEC_I 2 "vint_operand" "")))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") ;; Vector reduction expanders for VSX diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 4adf6e5ac55..b87da826a95 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -36,6 +36,10 @@ ;; Iterator for logical types supported by VSX (define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI]) +;; Like VSX_L, but don't support TImode for doing logical instructions in +;; 32-bit +(define_mode_iterator VSX_L2 [V16QI V8HI V4SI V2DI V4SF V2DF]) + ;; Iterator for memory move. Handle TImode specially to allow ;; it to use gprs as well as vsx registers. (define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF]) @@ -191,6 +195,8 @@ UNSPEC_VSX_CVDPSXWS UNSPEC_VSX_CVDPUXWS UNSPEC_VSX_CVSPDP + UNSPEC_VSX_CVSPDPN + UNSPEC_VSX_CVDPSPN UNSPEC_VSX_CVSXWDP UNSPEC_VSX_CVUXWDP UNSPEC_VSX_CVSXDSP @@ -207,112 +213,31 @@ ;; VSX moves (define_insn "*vsx_mov<mode>" - [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v") - (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))] + [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v") + (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))] "VECTOR_MEM_VSX_P (<MODE>mode) && (register_operand (operands[0], <MODE>mode) || register_operand (operands[1], <MODE>mode))" { - switch (which_alternative) - { - case 0: - case 3: - gcc_assert (MEM_P (operands[0]) - && GET_CODE (XEXP (operands[0], 0)) != PRE_INC - && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY); - return "stx<VSm>x %x1,%y0"; - - case 1: - case 4: - gcc_assert (MEM_P (operands[1]) - && GET_CODE (XEXP (operands[1], 0)) != PRE_INC - && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY); - return "lx<VSm>x %x0,%y1"; - - case 2: - case 5: - return "xxlor %x0,%x1,%x1"; - - case 6: - case 7: - case 8: - case 11: - return "#"; - - case 9: - case 10: - return "xxlxor %x0,%x0,%x0"; - - case 12: - return output_vec_const_move (operands); - - case 13: - gcc_assert (MEM_P (operands[0]) - && GET_CODE (XEXP (operands[0], 0)) != PRE_INC - && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY); - return "stvx %1,%y0"; - - case 14: - gcc_assert (MEM_P (operands[0]) - && GET_CODE (XEXP (operands[0], 0)) != PRE_INC - && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY); - return "lvx %0,%y1"; - - default: - gcc_unreachable (); - } + return rs6000_output_move_128bit (operands); } - [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")]) + [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload") + (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")]) ;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal ;; use of TImode is for unions. However for plain data movement, slightly ;; favor the vector loads (define_insn "*vsx_movti_64bit" - [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,?Y,?r,?r,?r") - (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v, r, Y, r, n"))] + [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r") + (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))] "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode) && (register_operand (operands[0], TImode) || register_operand (operands[1], TImode))" { - switch (which_alternative) - { - case 0: - return "stxvd2x %x1,%y0"; - - case 1: - return "lxvd2x %x0,%y1"; - - case 2: - return "xxlor %x0,%x1,%x1"; - - case 3: - return "xxlxor %x0,%x0,%x0"; - - case 4: - return output_vec_const_move (operands); - - case 5: - return "stvx %1,%y0"; - - case 6: - return "lvx %0,%y1"; - - case 7: - case 8: - case 9: - case 10: - return "#"; - - default: - gcc_unreachable (); - } + return rs6000_output_move_128bit (operands); } - [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,*,*,*,*") - (set_attr "length" " 4, 4, 4, 4, 8, 4, 4,8,8,8,8")]) + [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*") + (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")]) (define_insn "*vsx_movti_32bit" [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r") @@ -1003,6 +928,40 @@ "xscvspdp %x0,%x1" [(set_attr "type" "fp")]) +;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs +(define_insn "vsx_xscvdpspn" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa") + (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")] + UNSPEC_VSX_CVDPSPN))] + "TARGET_XSCVDPSPN" + "xscvdpspn %x0,%x1" + [(set_attr "type" "fp")]) + +(define_insn "vsx_xscvspdpn" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa") + (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")] + UNSPEC_VSX_CVSPDPN))] + "TARGET_XSCVSPDPN" + "xscvspdpn %x0,%x1" + [(set_attr "type" "fp")]) + +(define_insn "vsx_xscvdpspn_scalar" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")] + UNSPEC_VSX_CVDPSPN))] + "TARGET_XSCVDPSPN" + "xscvdpspn %x0,%x1" + [(set_attr "type" "fp")]) + +;; Used by direct move to move a SFmode value from GPR to VSX register +(define_insn "vsx_xscvspdpn_directmove" + [(set (match_operand:SF 0 "vsx_register_operand" "=wa") + (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVSPDPN))] + "TARGET_XSCVSPDPN" + "xscvspdpn %x0,%x1" + [(set_attr "type" "fp")]) + ;; Convert from 64-bit to 32-bit types ;; Note, favor the Altivec registers since the usual use of these instructions ;; is in vector converts and we need to use the Altivec vperm instruction. @@ -1088,70 +1047,368 @@ (set_attr "fp_type" "<VSfptype_simple>")]) -;; Logical operations -;; Do not support TImode logical instructions on 32-bit at present, because the -;; compiler will see that we have a TImode and when it wanted DImode, and -;; convert the DImode to TImode, store it on the stack, and load it in a VSX -;; register. -(define_insn "*vsx_and<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (and:VSX_L - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa") - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode) - && (<MODE>mode != TImode || TARGET_POWERPC64)" +;; Logical operations. Do not support TImode logical instructions on 32-bit at +;; present, because the compiler will see that we have a TImode and when it +;; wanted DImode, and convert the DImode to TImode, store it on the stack, and +;; load it in a VSX register or generate extra logical instructions in GPR +;; registers. + +;; When we are splitting the operations to GPRs, we use three alternatives, two +;; where the first/second inputs and output are in the same register, and the +;; third where the output specifies an early clobber so that we don't have to +;; worry about overlapping registers. + +(define_insn "*vsx_and<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (and:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa") + (match_operand:VSX_L2 2 "vlogical_operand" "wa"))) + (clobber (match_scratch:CC 3 "X"))] + "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" "xxland %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) -(define_insn "*vsx_ior<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa") - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode) - && (<MODE>mode != TImode || TARGET_POWERPC64)" +(define_insn_and_split "*vsx_and<mode>3_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r") + (and:VSX_L + (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r") + (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))) + (clobber (match_scratch:CC 3 "X,X,X,X"))] + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxland %x0,%x1,%x2 + # + # + #" + "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(parallel [(set (match_dup 4) (and:DI (match_dup 5) (match_dup 6))) + (clobber (match_dup 3))]) + (parallel [(set (match_dup 7) (and:DI (match_dup 8) (match_dup 9))) + (clobber (match_dup 3))])] +{ + operands[4] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[7] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[9] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two,two") + (set_attr "length" "4,8,8,8")]) + +(define_insn "*vsx_ior<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (ior:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa") + (match_operand:VSX_L2 2 "vlogical_operand" "wa")))] + "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" "xxlor %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn_and_split "*vsx_ior<mode>3_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r,?r,&?r") + (ior:VSX_L + (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r,0,r") + (match_operand:VSX_L 2 "vsx_reg_or_cint_operand" "wa,r,0,r,n,n")))] + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxlor %x0,%x1,%x2 + # + # + # + # + #" + "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(const_int 0)] +{ + operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); -(define_insn "*vsx_xor<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (xor:VSX_L - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa") - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode) - && (<MODE>mode != TImode || TARGET_POWERPC64)" + if (operands[5] == constm1_rtx) + emit_move_insn (operands[3], constm1_rtx); + + else if (operands[5] == const0_rtx) + { + if (!rtx_equal_p (operands[3], operands[4])) + emit_move_insn (operands[3], operands[4]); + } + else + emit_insn (gen_iordi3 (operands[3], operands[4], operands[5])); + + if (operands[8] == constm1_rtx) + emit_move_insn (operands[8], constm1_rtx); + + else if (operands[8] == const0_rtx) + { + if (!rtx_equal_p (operands[6], operands[7])) + emit_move_insn (operands[6], operands[7]); + } + else + emit_insn (gen_iordi3 (operands[6], operands[7], operands[8])); + DONE; +} + [(set_attr "type" "vecsimple,two,two,two,three,three") + (set_attr "length" "4,8,8,8,16,16")]) + +(define_insn "*vsx_xor<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (xor:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa") + (match_operand:VSX_L2 2 "vlogical_operand" "wa")))] + "VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_POWERPC64" "xxlxor %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) -(define_insn "*vsx_one_cmpl<mode>2" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (not:VSX_L - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode) - && (<MODE>mode != TImode || TARGET_POWERPC64)" +(define_insn_and_split "*vsx_xor<mode>3_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r,?r,&?r") + (xor:VSX_L + (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r,0,r") + (match_operand:VSX_L 2 "vsx_reg_or_cint_operand" "wa,r,0,r,n,n")))] + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxlxor %x0,%x1,%x2 + # + # + # + # + #" + "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(set (match_dup 3) (xor:DI (match_dup 4) (match_dup 5))) + (set (match_dup 6) (xor:DI (match_dup 7) (match_dup 8)))] +{ + operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two,two,three,three") + (set_attr "length" "4,8,8,8,16,16")]) + +(define_insn "*vsx_one_cmpl<mode>2_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")))] + "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" "xxlnor %x0,%x1,%x1" - [(set_attr "type" "vecsimple")]) + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn_and_split "*vsx_one_cmpl<mode>2_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,&?r") + (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r")))] + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxlnor %x0,%x1,%x1 + # + #" + "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(set (match_dup 2) (not:DI (match_dup 3))) + (set (match_dup 4) (not:DI (match_dup 5)))] +{ + operands[2] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[3] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[5] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two") + (set_attr "length" "4,8,8")]) -(define_insn "*vsx_nor<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") - (not:VSX_L - (ior:VSX_L - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa") - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))] - "VECTOR_MEM_VSX_P (<MODE>mode) - && (<MODE>mode != TImode || TARGET_POWERPC64)" +(define_insn "*vsx_nor<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (and:VSX_L2 + (not:VSX_L2 (match_operand:VSX_L 1 "vlogical_operand" "%wa")) + (not:VSX_L2 (match_operand:VSX_L 2 "vlogical_operand" "wa"))))] + "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" "xxlnor %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn_and_split "*vsx_nor<mode>3_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r") + (and:VSX_L + (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r")) + (not:VSX_L (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))))] + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxlnor %x0,%x1,%x2 + # + # + #" + "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(set (match_dup 3) (and:DI (not:DI (match_dup 4)) (not:DI (match_dup 5)))) + (set (match_dup 6) (and:DI (not:DI (match_dup 7)) (not:DI (match_dup 8))))] +{ + operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two,two") + (set_attr "length" "4,8,8,8")]) + +(define_insn "*vsx_andc<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (and:VSX_L2 + (not:VSX_L2 + (match_operand:VSX_L2 2 "vlogical_operand" "wa")) + (match_operand:VSX_L2 1 "vlogical_operand" "wa")))] + "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" + "xxlandc %x0,%x1,%x2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) -(define_insn "*vsx_andc<mode>3" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") +(define_insn_and_split "*vsx_andc<mode>3_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r") (and:VSX_L (not:VSX_L - (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")) - (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))] - "VECTOR_MEM_VSX_P (<MODE>mode) - && (<MODE>mode != TImode || TARGET_POWERPC64)" - "xxlandc %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) + (match_operand:VSX_L 2 "vlogical_operand" "wa,0,r,r")) + (match_operand:VSX_L 1 "vlogical_operand" "wa,r,0,r")))] + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxlandc %x0,%x1,%x2 + # + # + #" + "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(set (match_dup 3) (and:DI (not:DI (match_dup 4)) (match_dup 5))) + (set (match_dup 6) (and:DI (not:DI (match_dup 7)) (match_dup 8)))] +{ + operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two,two") + (set_attr "length" "4,8,8,8")]) + +;; Power8 vector logical instructions. +(define_insn "*vsx_eqv<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (not:VSX_L2 + (xor:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa") + (match_operand:VSX_L2 2 "vlogical_operand" "wa"))))] + "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)" + "xxleqv %x0,%x1,%x2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn_and_split "*vsx_eqv<mode>3_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r") + (not:VSX_L + (xor:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r,r") + (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))))] + "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxleqv %x0,%x1,%x2 + # + # + #" + "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR + && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(set (match_dup 3) (not:DI (xor:DI (match_dup 4) (match_dup 5)))) + (set (match_dup 6) (not:DI (xor:DI (match_dup 7) (match_dup 8))))] +{ + operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two,two") + (set_attr "length" "4,8,8,8")]) + +;; Rewrite nand into canonical form +(define_insn "*vsx_nand<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (ior:VSX_L2 + (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")) + (not:VSX_L2 (match_operand:VSX_L2 2 "vlogical_operand" "wa"))))] + "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)" + "xxlnand %x0,%x1,%x2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn_and_split "*vsx_nand<mode>3_64bit" + [(set (match_operand:VSX_L 0 "register_operand" "=wa,?r,?r,?r") + (ior:VSX_L + (not:VSX_L (match_operand:VSX_L 1 "register_operand" "wa,0,r,r")) + (not:VSX_L (match_operand:VSX_L 2 "register_operand" "wa,r,0,r"))))] + "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxlnand %x0,%x1,%x2 + # + # + #" + "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR + && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(set (match_dup 3) (ior:DI (not:DI (match_dup 4)) (not:DI (match_dup 5)))) + (set (match_dup 6) (ior:DI (not:DI (match_dup 7)) (not:DI (match_dup 8))))] +{ + operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two,two") + (set_attr "length" "4,8,8,8")]) + +;; Rewrite or complement into canonical form, by reversing the arguments +(define_insn "*vsx_orc<mode>3_32bit" + [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa") + (ior:VSX_L2 + (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")) + (match_operand:VSX_L2 2 "vlogical_operand" "wa")))] + "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)" + "xxlorc %x0,%x2,%x1" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn_and_split "*vsx_orc<mode>3_64bit" + [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r") + (ior:VSX_L + (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r,r")) + (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r")))] + "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxlorc %x0,%x2,%x1 + # + # + #" + "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR + && VECTOR_MEM_VSX_P (<MODE>mode) + && int_reg_operand (operands[0], <MODE>mode)" + [(set (match_dup 3) (ior:DI (not:DI (match_dup 4)) (match_dup 5))) + (set (match_dup 6) (ior:DI (not:DI (match_dup 7)) (match_dup 8)))] +{ + operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0); + operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0); + operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0); + operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8); + operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8); + operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8); +} + [(set_attr "type" "vecsimple,two,two,two") + (set_attr "length" "4,8,8,8")]) ;; Permute operations diff --git a/gcc/config/rx/rx-opts.h b/gcc/config/rx/rx-opts.h index f00de76a901..4d5455e8d8d 100644 --- a/gcc/config/rx/rx-opts.h +++ b/gcc/config/rx/rx-opts.h @@ -24,7 +24,8 @@ enum rx_cpu_types { RX600, RX610, - RX200 + RX200, + RX100 }; #endif diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c index 15d5359ea1d..d781bb73d28 100644 --- a/gcc/config/rx/rx.c +++ b/gcc/config/rx/rx.c @@ -975,6 +975,8 @@ rx_gen_move_template (rtx * operands, bool is_movu) loading an immediate into a register. */ extension = ".W"; break; + case DFmode: + case DImode: case SFmode: case SImode: extension = ".L"; @@ -988,19 +990,44 @@ rx_gen_move_template (rtx * operands, bool is_movu) } if (MEM_P (src) && rx_pid_data_operand (XEXP (src, 0)) == PID_UNENCODED) - src_template = "(%A1-__pid_base)[%P1]"; + { + gcc_assert (GET_MODE (src) != DImode); + gcc_assert (GET_MODE (src) != DFmode); + + src_template = "(%A1 - __pid_base)[%P1]"; + } else if (MEM_P (src) && rx_small_data_operand (XEXP (src, 0))) - src_template = "%%gp(%A1)[%G1]"; + { + gcc_assert (GET_MODE (src) != DImode); + gcc_assert (GET_MODE (src) != DFmode); + + src_template = "%%gp(%A1)[%G1]"; + } else src_template = "%1"; if (MEM_P (dest) && rx_small_data_operand (XEXP (dest, 0))) - dst_template = "%%gp(%A0)[%G0]"; + { + gcc_assert (GET_MODE (dest) != DImode); + gcc_assert (GET_MODE (dest) != DFmode); + + dst_template = "%%gp(%A0)[%G0]"; + } else dst_template = "%0"; - sprintf (out_template, "%s%s\t%s, %s", is_movu ? "movu" : "mov", - extension, src_template, dst_template); + if (GET_MODE (dest) == DImode || GET_MODE (dest) == DFmode) + { + gcc_assert (! is_movu); + + if (REG_P (src) && REG_P (dest) && (REGNO (dest) == REGNO (src) + 1)) + sprintf (out_template, "mov.L\t%H1, %H0 | mov.L\t%1, %0"); + else + sprintf (out_template, "mov.L\t%1, %0 | mov.L\t%H1, %H0"); + } + else + sprintf (out_template, "%s%s\t%s, %s", is_movu ? "movu" : "mov", + extension, src_template, dst_template); return out_template; } @@ -3240,6 +3267,12 @@ rx_ok_to_inline (tree caller, tree callee) || lookup_attribute ("gnu_inline", DECL_ATTRIBUTES (callee)) != NULL_TREE; } +static bool +rx_enable_lra (void) +{ + return TARGET_ENABLE_LRA || 1; +} + #undef TARGET_NARROW_VOLATILE_BITFIELD #define TARGET_NARROW_VOLATILE_BITFIELD rx_narrow_volatile_bitfield @@ -3391,6 +3424,9 @@ rx_ok_to_inline (tree caller, tree callee) #undef TARGET_WARN_FUNC_RETURN #define TARGET_WARN_FUNC_RETURN rx_warn_func_return +#undef TARGET_LRA_P +#define TARGET_LRA_P rx_enable_lra + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-rx.h" diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h index 092fd7659a2..72aee2fe214 100644 --- a/gcc/config/rx/rx.h +++ b/gcc/config/rx/rx.h @@ -29,9 +29,22 @@ builtin_define ("__RX610__"); \ builtin_assert ("machine=RX610"); \ } \ - else \ - builtin_assert ("machine=RX600"); \ - \ + else if (rx_cpu_type == RX100) \ + { \ + builtin_define ("__RX100__"); \ + builtin_assert ("machine=RX100"); \ + } \ + else if (rx_cpu_type == RX200) \ + { \ + builtin_define ("__RX200__"); \ + builtin_assert ("machine=RX200"); \ + } \ + else if (rx_cpu_type == RX600) \ + { \ + builtin_define ("__RX600__"); \ + builtin_assert ("machine=RX600"); \ + } \ + \ if (TARGET_BIG_ENDIAN_DATA) \ builtin_define ("__RX_BIG_ENDIAN__"); \ else \ @@ -60,6 +73,7 @@ #undef CC1_SPEC #define CC1_SPEC "\ %{mas100-syntax:%{gdwarf*:%e-mas100-syntax is incompatible with -gdwarf}} \ + %{mcpu=rx100:%{fpu:%erx100 cpu does not have FPU hardware}} \ %{mcpu=rx200:%{fpu:%erx200 cpu does not have FPU hardware}}" #undef STARTFILE_SPEC diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md index 3a95567a43f..692b7d220a3 100644 --- a/gcc/config/rx/rx.md +++ b/gcc/config/rx/rx.md @@ -30,7 +30,7 @@ ;; then all operations on doubles have to be handled by ;; library functions. (define_mode_iterator register_modes - [(SF "ALLOW_RX_FPU_INSNS") (SI "") (HI "") (QI "")]) + [(SF "") (SI "") (HI "") (QI "")]) (define_constants [ @@ -2621,3 +2621,21 @@ "" "" ) + +(define_insn "movdi" + [(set:DI (match_operand:DI 0 "nonimmediate_operand" "=rm") + (match_operand:DI 1 "general_operand" "rmi"))] + "TARGET_ENABLE_LRA || 1" + { return rx_gen_move_template (operands, false); } + [(set_attr "length" "16") + (set_attr "timings" "22")] +) + +(define_insn "movdf" + [(set:DF (match_operand:DF 0 "nonimmediate_operand" "=rm") + (match_operand:DF 1 "general_operand" "rmi"))] + "TARGET_ENABLE_LRA || 1" + { return rx_gen_move_template (operands, false); } + [(set_attr "length" "16") + (set_attr "timings" "22")] +) diff --git a/gcc/config/rx/rx.opt b/gcc/config/rx/rx.opt index 09d93c3e5f1..12312cfef6b 100644 --- a/gcc/config/rx/rx.opt +++ b/gcc/config/rx/rx.opt @@ -61,6 +61,9 @@ Enum(rx_cpu_types) String(rx200) Value(RX200) EnumValue Enum(rx_cpu_types) String(rx600) Value(RX600) +EnumValue +Enum(rx_cpu_types) String(rx100) Value(RX100) + ;--------------------------------------------------- mbig-endian-data @@ -132,3 +135,7 @@ Enable the use of the old, broken, ABI where all stacked function arguments are mrx-abi Target RejectNegative Report InverseMask(GCC_ABI) Enable the use the standard RX ABI where all stacked function arguments are naturally aligned. This is the default. + +mlra +Target Report Mask(ENABLE_LRA) +Enable the use of the LRA register allocator. diff --git a/gcc/config/rx/t-rx b/gcc/config/rx/t-rx index 97079859240..41a3d3a98dc 100644 --- a/gcc/config/rx/t-rx +++ b/gcc/config/rx/t-rx @@ -28,7 +28,7 @@ MULTILIB_DIRNAMES = 64-bit-double no-fpu-libs big-endian-data pid # MULTILIB_OPTIONS += mgcc-abi # MULTILIB_DIRNAMES += gcc-abi -MULTILIB_MATCHES = nofpu=mnofpu nofpu=mcpu?rx200 +MULTILIB_MATCHES = nofpu=mnofpu nofpu=mcpu?rx200 nofpu=mcpu?rx100 MULTILIB_EXCEPTIONS = MULTILIB_EXTRA_OPTS = diff --git a/gcc/config/s390/htmintrin.h b/gcc/config/s390/htmintrin.h new file mode 100644 index 00000000000..7aaa9f5bf7c --- /dev/null +++ b/gcc/config/s390/htmintrin.h @@ -0,0 +1,57 @@ +/* GNU compiler hardware transactional execution intrinsics + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef _HTMINTRIN_H +#define _HTMINTRIN_H + + +/* Condition codes generated by tbegin */ +#define _HTM_TBEGIN_STARTED 0 +#define _HTM_TBEGIN_INDETERMINATE 1 +#define _HTM_TBEGIN_TRANSIENT 2 +#define _HTM_TBEGIN_PERSISTENT 3 + +/* The abort codes below this threshold are reserved for machine + use. */ +#define _HTM_FIRST_USER_ABORT_CODE 256 + +/* The transaction diagnostic block is it is defined in the Principles + of Operation chapter 5-91. */ + +struct __htm_tdb { + unsigned char format; /* 0 */ + unsigned char flags; + unsigned char reserved1[4]; + unsigned short nesting_depth; + unsigned long long abort_code; /* 8 */ + unsigned long long conflict_token; /* 16 */ + unsigned long long atia; /* 24 */ + unsigned char eaid; /* 32 */ + unsigned char dxc; + unsigned char reserved2[2]; + unsigned int program_int_id; + unsigned long long exception_id; /* 40 */ + unsigned long long bea; /* 48 */ + unsigned char reserved3[72]; /* 56 */ + unsigned long long gprs[16]; /* 128 */ +} __attribute__((__packed__, __aligned__ (8))); + + +#endif /* _HTMINTRIN_H */ diff --git a/gcc/config/s390/htmxlintrin.h b/gcc/config/s390/htmxlintrin.h new file mode 100644 index 00000000000..bb142195b2b --- /dev/null +++ b/gcc/config/s390/htmxlintrin.h @@ -0,0 +1,182 @@ +/* XL compiler hardware transactional execution intrinsics + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef _HTMXLINTRIN_H +#define _HTMXLINTRIN_H + +#include <stdint.h> + +#include <htmintrin.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* These intrinsics are being made available for compatibility with + the IBM XL compiler. For documentation please see the "z/OS XL + C/C++ Programming Guide" publically available on the web. */ + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_simple_begin () +{ + return __builtin_tbegin_nofloat (0); +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_begin (void* const tdb) +{ + return __builtin_tbegin_nofloat (tdb); +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_end () +{ + return __builtin_tend (); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_abort () +{ + return __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_named_abort (unsigned char const code) +{ + return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + code); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_non_transactional_store (void* const addr, long long const value) +{ + __builtin_non_tx_store ((uint64_t*)addr, (uint64_t)value); +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_nesting_depth (void* const tdb_ptr) +{ + int depth = __builtin_tx_nesting_depth (); + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + if (depth != 0) + return depth; + + if (tdb->format == 0) + return 0; + return tdb->nesting_depth; +} + +/* Transaction failure diagnostics */ + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_user_abort (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + if (tdb->format == 0) + return 0; + + return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE); +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_named_user_abort (void* const tdb_ptr, unsigned char* code) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + if (tdb->format == 0) + return 0; + + if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE) + { + *code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE; + return 1; + } + return 0; +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_illegal (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return (tdb->format == 0 + && (tdb->abort_code == 4 /* unfiltered program interruption */ + || tdb->abort_code == 11 /* restricted instruction */)); +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_footprint_exceeded (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return (tdb->format == 0 + && (tdb->abort_code == 7 /* fetch overflow */ + || tdb->abort_code == 8 /* store overflow */)); +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_nested_too_deep (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return tdb->format == 0 && tdb->abort_code == 13; /* depth exceeded */ +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_conflict (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return (tdb->format == 0 + && (tdb->abort_code == 9 /* fetch conflict */ + || tdb->abort_code == 10 /* store conflict */)); +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_failure_persistent (long const result) +{ + return result == _HTM_TBEGIN_PERSISTENT; +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_failure_address (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; +#ifdef __s390x__ + return tdb->atia; +#else + return tdb->atia & 0xffffffff; +#endif +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_failure_code (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return tdb->abort_code; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _HTMXLINTRIN_H */ diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md index 523326e177d..069b42489a7 100644 --- a/gcc/config/s390/predicates.md +++ b/gcc/config/s390/predicates.md @@ -176,7 +176,11 @@ { if (GET_CODE (XEXP (op, 0)) != REG || REGNO (XEXP (op, 0)) != CC_REGNUM - || XEXP (op, 1) != const0_rtx) + || (XEXP (op, 1) != const0_rtx + && !(CONST_INT_P (XEXP (op, 1)) + && GET_MODE (XEXP (op, 0)) == CCRAWmode + && INTVAL (XEXP (op, 1)) >= 0 + && INTVAL (XEXP (op, 1)) <= 15))) return false; return (s390_branch_condition_mask (op) >= 0); @@ -224,7 +228,11 @@ if (GET_CODE (XEXP (op, 0)) != REG || REGNO (XEXP (op, 0)) != CC_REGNUM - || XEXP (op, 1) != const0_rtx) + || (XEXP (op, 1) != const0_rtx + && !(CONST_INT_P (XEXP (op, 1)) + && GET_MODE (XEXP (op, 0)) == CCRAWmode + && INTVAL (XEXP (op, 1)) >= 0 + && INTVAL (XEXP (op, 1)) <= 15))) return false; switch (GET_MODE (XEXP (op, 0))) diff --git a/gcc/config/s390/s390-modes.def b/gcc/config/s390/s390-modes.def index 419108fb473..5e0b50cafa1 100644 --- a/gcc/config/s390/s390-modes.def +++ b/gcc/config/s390/s390-modes.def @@ -152,6 +152,14 @@ The compare and swap instructions sets the condition code to 0/1 if the operands were equal/unequal. The CCZ1 mode ensures the result can be effectively placed into a register. +CCRAW + +The cc mode generated by a non-compare instruction. The condition +code mask for the CC consumer is determined by the comparison operator +(only EQ and NE allowed) and the immediate value given as second +operand to the operator. For the other CC modes this value used to be +0. + */ @@ -172,3 +180,4 @@ CC_MODE (CCT); CC_MODE (CCT1); CC_MODE (CCT2); CC_MODE (CCT3); +CC_MODE (CCRAW); diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index 1a8205359e4..67283df4553 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -58,7 +58,7 @@ extern bool s390_match_ccmode (rtx, enum machine_mode); extern enum machine_mode s390_tm_ccmode (rtx, rtx, bool); extern enum machine_mode s390_select_ccmode (enum rtx_code, rtx, rtx); extern rtx s390_emit_compare (enum rtx_code, rtx, rtx); -extern void s390_emit_jump (rtx, rtx); +extern rtx s390_emit_jump (rtx, rtx); extern bool symbolic_reference_mentioned_p (rtx); extern bool tls_symbolic_reference_mentioned_p (rtx); extern bool legitimate_la_operand_p (rtx); @@ -87,6 +87,7 @@ extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx, rtx, rtx, bool); extern void s390_expand_atomic (enum machine_mode, enum rtx_code, rtx, rtx, rtx, bool); +extern void s390_expand_tbegin (rtx, rtx, rtx, bool); extern rtx s390_return_addr_rtx (int, rtx); extern rtx s390_back_chain_rtx (void); extern rtx s390_emit_call (rtx, rtx, rtx, rtx); diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 30c34901f8d..2cacf6f52ad 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -367,6 +367,10 @@ struct GTY(()) machine_function const char *some_ld_name; bool has_landing_pad_p; + + /* True if the current function may contain a tbegin clobbering + FPRs. */ + bool tbegin_p; }; /* Few accessor macros for struct cfun->machine->s390_frame_layout. */ @@ -824,9 +828,9 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1, *op1 = constm1_rtx; } - /* Remove redundant UNSPEC_CCU_TO_INT conversions if possible. */ + /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */ if (GET_CODE (*op0) == UNSPEC - && XINT (*op0, 1) == UNSPEC_CCU_TO_INT + && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT && XVECLEN (*op0, 0) == 1 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode && GET_CODE (XVECEXP (*op0, 0, 0)) == REG @@ -852,25 +856,35 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1, } } - /* Remove redundant UNSPEC_CCZ_TO_INT conversions if possible. */ + /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */ if (GET_CODE (*op0) == UNSPEC - && XINT (*op0, 1) == UNSPEC_CCZ_TO_INT + && XINT (*op0, 1) == UNSPEC_CC_TO_INT && XVECLEN (*op0, 0) == 1 - && GET_MODE (XVECEXP (*op0, 0, 0)) == CCZmode && GET_CODE (XVECEXP (*op0, 0, 0)) == REG && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM - && *op1 == const0_rtx) + && CONST_INT_P (*op1)) { enum rtx_code new_code = UNKNOWN; - switch (*code) + switch (GET_MODE (XVECEXP (*op0, 0, 0))) { - case EQ: new_code = EQ; break; - case NE: new_code = NE; break; - default: break; + case CCZmode: + case CCRAWmode: + switch (*code) + { + case EQ: new_code = EQ; break; + case NE: new_code = NE; break; + default: break; + } + break; + default: break; } if (new_code != UNKNOWN) { + /* For CCRAWmode put the required cc mask into the second + operand. */ + if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode) + *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1))); *op0 = XVECEXP (*op0, 0, 0); *code = new_code; } @@ -942,10 +956,11 @@ s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem, const0_rtx); } -/* Emit a jump instruction to TARGET. If COND is NULL_RTX, emit an - unconditional jump, else a conditional jump under condition COND. */ +/* Emit a jump instruction to TARGET and return it. If COND is + NULL_RTX, emit an unconditional jump, else a conditional jump under + condition COND. */ -void +rtx s390_emit_jump (rtx target, rtx cond) { rtx insn; @@ -955,7 +970,7 @@ s390_emit_jump (rtx target, rtx cond) target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx); insn = gen_rtx_SET (VOIDmode, pc_rtx, target); - emit_jump_insn (insn); + return emit_jump_insn (insn); } /* Return branch condition mask to implement a branch @@ -971,7 +986,10 @@ s390_branch_condition_mask (rtx code) gcc_assert (GET_CODE (XEXP (code, 0)) == REG); gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM); - gcc_assert (XEXP (code, 1) == const0_rtx); + gcc_assert (XEXP (code, 1) == const0_rtx + || (GET_MODE (XEXP (code, 0)) == CCRAWmode + && CONST_INT_P (XEXP (code, 1)))); + switch (GET_MODE (XEXP (code, 0))) { @@ -1145,6 +1163,17 @@ s390_branch_condition_mask (rtx code) } break; + case CCRAWmode: + switch (GET_CODE (code)) + { + case EQ: + return INTVAL (XEXP (code, 1)); + case NE: + return (INTVAL (XEXP (code, 1))) ^ 0xf; + default: + gcc_unreachable (); + } + default: return -1; } @@ -1204,7 +1233,9 @@ s390_branch_condition_mnemonic (rtx code, int inv) if (GET_CODE (XEXP (code, 0)) == REG && REGNO (XEXP (code, 0)) == CC_REGNUM - && XEXP (code, 1) == const0_rtx) + && (XEXP (code, 1) == const0_rtx + || (GET_MODE (XEXP (code, 0)) == CCRAWmode + && CONST_INT_P (XEXP (code, 1))))) mask = s390_branch_condition_mask (code); else mask = s390_compare_and_branch_condition_mask (code); @@ -1602,6 +1633,11 @@ s390_option_override (void) if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP) target_flags |= MASK_HARD_DFP; + /* Enable hardware transactions if available and not explicitly + disabled by user. E.g. with -m31 -march=zEC12 -mzarch */ + if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH) + target_flags |= MASK_OPT_HTM; + if (TARGET_HARD_DFP && !TARGET_DFP) { if (target_flags_explicit & MASK_HARD_DFP) @@ -2017,14 +2053,18 @@ s390_decompose_address (rtx addr, struct s390_address *out) Thus we don't check the displacement for validity here. If after elimination the displacement turns out to be invalid after all, this is fixed up by reload in any case. */ - if (base != arg_pointer_rtx - && indx != arg_pointer_rtx - && base != return_address_pointer_rtx - && indx != return_address_pointer_rtx - && base != frame_pointer_rtx - && indx != frame_pointer_rtx - && base != virtual_stack_vars_rtx - && indx != virtual_stack_vars_rtx) + /* LRA maintains always displacements up to date and we need to + know the displacement is right during all LRA not only at the + final elimination. */ + if (lra_in_progress + || (base != arg_pointer_rtx + && indx != arg_pointer_rtx + && base != return_address_pointer_rtx + && indx != return_address_pointer_rtx + && base != frame_pointer_rtx + && indx != frame_pointer_rtx + && base != virtual_stack_vars_rtx + && indx != virtual_stack_vars_rtx)) if (!DISP_IN_RANGE (offset)) return false; } @@ -3189,7 +3229,9 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, /* We need a scratch register when loading a PLUS expression which is not a legitimate operand of the LOAD ADDRESS instruction. */ - if (in_p && s390_plus_operand (x, mode)) + /* LRA can deal with transformation of plus op very well -- so we + don't need to prompt LRA in this case. */ + if (! lra_in_progress && in_p && s390_plus_operand (x, mode)) sri->icode = (TARGET_64BIT ? CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus); @@ -7017,7 +7059,7 @@ s390_chunkify_start (void) if (LABEL_P (insn) && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn))) { - rtx vec_insn = next_real_insn (insn); + rtx vec_insn = NEXT_INSN (insn); if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn)) bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn)); } @@ -7027,6 +7069,8 @@ s390_chunkify_start (void) else if (JUMP_P (insn)) { rtx pat = PATTERN (insn); + rtx table; + if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2) pat = XVECEXP (pat, 0, 0); @@ -7040,28 +7084,18 @@ s390_chunkify_start (void) bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label)); } } - else if (GET_CODE (pat) == PARALLEL - && XVECLEN (pat, 0) == 2 - && GET_CODE (XVECEXP (pat, 0, 0)) == SET - && GET_CODE (XVECEXP (pat, 0, 1)) == USE - && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == LABEL_REF) - { - /* Find the jump table used by this casesi jump. */ - rtx vec_label = XEXP (XEXP (XVECEXP (pat, 0, 1), 0), 0); - rtx vec_insn = next_real_insn (vec_label); - if (vec_insn && JUMP_TABLE_DATA_P (vec_insn)) - { - rtx vec_pat = PATTERN (vec_insn); - int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC; - - for (i = 0; i < XVECLEN (vec_pat, diff_p); i++) - { - rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0); - - if (s390_find_pool (pool_list, label) - != s390_find_pool (pool_list, insn)) - bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label)); - } + else if (tablejump_p (insn, NULL, &table)) + { + rtx vec_pat = PATTERN (table); + int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC; + + for (i = 0; i < XVECLEN (vec_pat, diff_p); i++) + { + rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0); + + if (s390_find_pool (pool_list, label) + != s390_find_pool (pool_list, insn)) + bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label)); } } } @@ -7336,11 +7370,11 @@ s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *d if (GET_CODE (setreg) == SUBREG) { rtx inner = SUBREG_REG (setreg); - if (!GENERAL_REG_P (inner)) + if (!GENERAL_REG_P (inner) && !FP_REG_P (inner)) return; regno = subreg_regno (setreg); } - else if (GENERAL_REG_P (setreg)) + else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg)) regno = REGNO (setreg); else return; @@ -7363,13 +7397,13 @@ s390_regs_ever_clobbered (int *regs_ever_clobbered) rtx cur_insn; unsigned int i; - memset (regs_ever_clobbered, 0, 16 * sizeof (int)); + memset (regs_ever_clobbered, 0, 32 * sizeof (int)); /* For non-leaf functions we have to consider all call clobbered regs to be clobbered. */ if (!crtl->is_leaf) { - for (i = 0; i < 16; i++) + for (i = 0; i < 32; i++) regs_ever_clobbered[i] = call_really_used_regs[i]; } @@ -7391,7 +7425,7 @@ s390_regs_ever_clobbered (int *regs_ever_clobbered) See expand_builtin_unwind_init. For regs_ever_live this is done by reload. */ if (cfun->has_nonlocal_label) - for (i = 0; i < 16; i++) + for (i = 0; i < 32; i++) if (!call_really_used_regs[i]) regs_ever_clobbered[i] = 1; @@ -7457,17 +7491,6 @@ s390_register_info (int clobbered_regs[]) { int i, j; - /* fprs 8 - 15 are call saved for 64 Bit ABI. */ - cfun_frame_layout.fpr_bitmap = 0; - cfun_frame_layout.high_fprs = 0; - if (TARGET_64BIT) - for (i = 24; i < 32; i++) - if (df_regs_ever_live_p (i) && !global_regs[i]) - { - cfun_set_fpr_bit (i - 16); - cfun_frame_layout.high_fprs++; - } - /* Find first and last gpr to be saved. We trust regs_ever_live data, except that we don't save and restore global registers. @@ -7476,6 +7499,29 @@ s390_register_info (int clobbered_regs[]) s390_regs_ever_clobbered (clobbered_regs); + /* fprs 8 - 15 are call saved for 64 Bit ABI. */ + if (!epilogue_completed) + { + cfun_frame_layout.fpr_bitmap = 0; + cfun_frame_layout.high_fprs = 0; + if (TARGET_64BIT) + for (i = 24; i < 32; i++) + /* During reload we have to use the df_regs_ever_live infos + since reload is marking FPRs used as spill slots there as + live before actually making the code changes. Without + this we fail during elimination offset verification. */ + if ((clobbered_regs[i] + || (df_regs_ever_live_p (i) + && (lra_in_progress + || reload_in_progress + || crtl->saves_all_registers))) + && !global_regs[i]) + { + cfun_set_fpr_bit (i - 16); + cfun_frame_layout.high_fprs++; + } + } + for (i = 0; i < 16; i++) clobbered_regs[i] = clobbered_regs[i] && !global_regs[i] && !fixed_regs[i]; @@ -7726,7 +7772,7 @@ s390_init_frame_layout (void) { HOST_WIDE_INT frame_size; int base_used; - int clobbered_regs[16]; + int clobbered_regs[32]; /* On S/390 machines, we may need to perform branch splitting, which will require both base and return address register. We have no @@ -7761,6 +7807,157 @@ s390_init_frame_layout (void) while (frame_size != cfun_frame_layout.frame_size); } +/* Remove the FPR clobbers from a tbegin insn if it can be proven that + the TX is nonescaping. A transaction is considered escaping if + there is at least one path from tbegin returning CC0 to the + function exit block without an tend. + + The check so far has some limitations: + - only single tbegin/tend BBs are supported + - the first cond jump after tbegin must separate the CC0 path from ~CC0 + - when CC is copied to a GPR and the CC0 check is done with the GPR + this is not supported +*/ + +static void +s390_optimize_nonescaping_tx (void) +{ + const unsigned int CC0 = 1 << 3; + basic_block tbegin_bb = NULL; + basic_block tend_bb = NULL; + basic_block bb; + rtx insn; + bool result = true; + int bb_index; + rtx tbegin_insn = NULL_RTX; + + if (!cfun->machine->tbegin_p) + return; + + for (bb_index = 0; bb_index < n_basic_blocks; bb_index++) + { + bb = BASIC_BLOCK (bb_index); + + FOR_BB_INSNS (bb, insn) + { + rtx ite, cc, pat, target; + unsigned HOST_WIDE_INT mask; + + if (!INSN_P (insn) || INSN_CODE (insn) <= 0) + continue; + + pat = PATTERN (insn); + + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + + if (GET_CODE (pat) != SET + || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE) + continue; + + if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN) + { + rtx tmp; + + tbegin_insn = insn; + + /* Just return if the tbegin doesn't have clobbers. */ + if (GET_CODE (PATTERN (insn)) != PARALLEL) + return; + + if (tbegin_bb != NULL) + return; + + /* Find the next conditional jump. */ + for (tmp = NEXT_INSN (insn); + tmp != NULL_RTX; + tmp = NEXT_INSN (tmp)) + { + if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp)) + return; + if (!JUMP_P (tmp)) + continue; + + ite = SET_SRC (PATTERN (tmp)); + if (GET_CODE (ite) != IF_THEN_ELSE) + continue; + + cc = XEXP (XEXP (ite, 0), 0); + if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)) + || GET_MODE (cc) != CCRAWmode + || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT) + return; + + if (bb->succs->length () != 2) + return; + + mask = INTVAL (XEXP (XEXP (ite, 0), 1)); + if (GET_CODE (XEXP (ite, 0)) == NE) + mask ^= 0xf; + + if (mask == CC0) + target = XEXP (ite, 1); + else if (mask == (CC0 ^ 0xf)) + target = XEXP (ite, 2); + else + return; + + { + edge_iterator ei; + edge e1, e2; + + ei = ei_start (bb->succs); + e1 = ei_safe_edge (ei); + ei_next (&ei); + e2 = ei_safe_edge (ei); + + if (e2->flags & EDGE_FALLTHRU) + { + e2 = e1; + e1 = ei_safe_edge (ei); + } + + if (!(e1->flags & EDGE_FALLTHRU)) + return; + + tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest; + } + if (tmp == BB_END (bb)) + break; + } + } + + if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND) + { + if (tend_bb != NULL) + return; + tend_bb = bb; + } + } + } + + /* Either we successfully remove the FPR clobbers here or we are not + able to do anything for this TX. Both cases don't qualify for + another look. */ + cfun->machine->tbegin_p = false; + + if (tbegin_bb == NULL || tend_bb == NULL) + return; + + calculate_dominance_info (CDI_POST_DOMINATORS); + result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb); + free_dominance_info (CDI_POST_DOMINATORS); + + if (!result) + return; + + PATTERN (tbegin_insn) = XVECEXP (PATTERN (tbegin_insn), 0, 0); + INSN_CODE (tbegin_insn) = -1; + df_insn_rescan (tbegin_insn); + + return; +} + /* Update frame layout. Recompute actual register save data based on current info and update regs_ever_live for the special registers. May be called multiple times, but may never cause *more* registers @@ -7769,7 +7966,7 @@ s390_init_frame_layout (void) static void s390_update_frame_layout (void) { - int clobbered_regs[16]; + int clobbered_regs[32]; s390_register_info (clobbered_regs); @@ -7868,6 +8065,13 @@ s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode) return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; } +/* Return true if we use LRA instead of reload pass. */ +static bool +s390_lra_p (void) +{ + return s390_lra_flag; +} + /* Return true if register FROM can be eliminated via register TO. */ static bool @@ -8199,8 +8403,10 @@ s390_emit_prologue (void) int offset; int next_fpr = 0; - /* Complete frame layout. */ + /* Try to get rid of the FPR clobbers. */ + s390_optimize_nonescaping_tx (); + /* Complete frame layout. */ s390_update_frame_layout (); /* Annotate all constant pool references to let the scheduler know @@ -9348,6 +9554,294 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, return build_va_arg_indirect_ref (addr); } +/* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX) + expanders. + DEST - Register location where CC will be stored. + TDB - Pointer to a 256 byte area where to store the transaction. + diagnostic block. NULL if TDB is not needed. + RETRY - Retry count value. If non-NULL a retry loop for CC2 + is emitted + CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part + of the tbegin instruction pattern. */ + +void +s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p) +{ + const int CC0 = 1 << 3; + const int CC1 = 1 << 2; + const int CC3 = 1 << 0; + rtx abort_label = gen_label_rtx (); + rtx leave_label = gen_label_rtx (); + rtx retry_reg = gen_reg_rtx (SImode); + rtx retry_label = NULL_RTX; + rtx jump; + rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1); + + if (retry != NULL_RTX) + { + emit_move_insn (retry_reg, retry); + retry_label = gen_label_rtx (); + emit_label (retry_label); + } + + if (clobber_fprs_p) + emit_insn (gen_tbegin_1 (tdb, + gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK))); + else + emit_insn (gen_tbegin_nofloat_1 (tdb, + gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK))); + + jump = s390_emit_jump (abort_label, + gen_rtx_NE (VOIDmode, + gen_rtx_REG (CCRAWmode, CC_REGNUM), + gen_rtx_CONST_INT (VOIDmode, CC0))); + + JUMP_LABEL (jump) = abort_label; + LABEL_NUSES (abort_label) = 1; + add_reg_note (jump, REG_BR_PROB, very_unlikely); + + /* Initialize CC return value. */ + emit_move_insn (dest, const0_rtx); + + s390_emit_jump (leave_label, NULL_RTX); + LABEL_NUSES (leave_label) = 1; + emit_barrier (); + + /* Abort handler code. */ + + emit_label (abort_label); + if (retry != NULL_RTX) + { + rtx count = gen_reg_rtx (SImode); + jump = s390_emit_jump (leave_label, + gen_rtx_EQ (VOIDmode, + gen_rtx_REG (CCRAWmode, CC_REGNUM), + gen_rtx_CONST_INT (VOIDmode, CC1 | CC3))); + LABEL_NUSES (leave_label) = 2; + add_reg_note (jump, REG_BR_PROB, very_unlikely); + + /* CC2 - transient failure. Perform retry with ppa. */ + emit_move_insn (count, retry); + emit_insn (gen_subsi3 (count, count, retry_reg)); + emit_insn (gen_tx_assist (count)); + jump = emit_jump_insn (gen_doloop_si64 (retry_label, + retry_reg, + retry_reg)); + JUMP_LABEL (jump) = retry_label; + LABEL_NUSES (retry_label) = 1; + } + + emit_move_insn (dest, gen_rtx_UNSPEC (SImode, + gen_rtvec (1, gen_rtx_REG (CCRAWmode, + CC_REGNUM)), + UNSPEC_CC_TO_INT)); + emit_label (leave_label); +} + +/* Builtins. */ + +enum s390_builtin +{ + S390_BUILTIN_TBEGIN, + S390_BUILTIN_TBEGIN_NOFLOAT, + S390_BUILTIN_TBEGIN_RETRY, + S390_BUILTIN_TBEGIN_RETRY_NOFLOAT, + S390_BUILTIN_TBEGINC, + S390_BUILTIN_TEND, + S390_BUILTIN_TABORT, + S390_BUILTIN_NON_TX_STORE, + S390_BUILTIN_TX_NESTING_DEPTH, + S390_BUILTIN_TX_ASSIST, + + S390_BUILTIN_max +}; + +static enum insn_code const code_for_builtin[S390_BUILTIN_max] = { + CODE_FOR_tbegin, + CODE_FOR_tbegin_nofloat, + CODE_FOR_tbegin_retry, + CODE_FOR_tbegin_retry_nofloat, + CODE_FOR_tbeginc, + CODE_FOR_tend, + CODE_FOR_tabort, + CODE_FOR_ntstg, + CODE_FOR_etnd, + CODE_FOR_tx_assist +}; + +static void +s390_init_builtins (void) +{ + tree ftype, uint64_type; + + /* void foo (void) */ + ftype = build_function_type_list (void_type_node, NULL_TREE); + add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC, + BUILT_IN_MD, NULL, NULL_TREE); + + /* void foo (int) */ + ftype = build_function_type_list (void_type_node, integer_type_node, + NULL_TREE); + add_builtin_function ("__builtin_tabort", ftype, + S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, NULL_TREE); + add_builtin_function ("__builtin_tx_assist", ftype, + S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE); + + /* int foo (void *) */ + ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE); + add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN, + BUILT_IN_MD, NULL, NULL_TREE); + add_builtin_function ("__builtin_tbegin_nofloat", ftype, + S390_BUILTIN_TBEGIN_NOFLOAT, + BUILT_IN_MD, NULL, NULL_TREE); + + /* int foo (void *, int) */ + ftype = build_function_type_list (integer_type_node, ptr_type_node, + integer_type_node, NULL_TREE); + add_builtin_function ("__builtin_tbegin_retry", ftype, + S390_BUILTIN_TBEGIN_RETRY, + BUILT_IN_MD, + NULL, NULL_TREE); + add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype, + S390_BUILTIN_TBEGIN_RETRY_NOFLOAT, + BUILT_IN_MD, + NULL, NULL_TREE); + + /* int foo (void) */ + ftype = build_function_type_list (integer_type_node, NULL_TREE); + add_builtin_function ("__builtin_tx_nesting_depth", ftype, + S390_BUILTIN_TX_NESTING_DEPTH, + BUILT_IN_MD, NULL, NULL_TREE); + add_builtin_function ("__builtin_tend", ftype, + S390_BUILTIN_TEND, BUILT_IN_MD, NULL, NULL_TREE); + + /* void foo (uint64_t *, uint64_t) */ + if (TARGET_64BIT) + uint64_type = long_unsigned_type_node; + else + uint64_type = long_long_unsigned_type_node; + + ftype = build_function_type_list (void_type_node, + build_pointer_type (uint64_type), + uint64_type, NULL_TREE); + add_builtin_function ("__builtin_non_tx_store", ftype, + S390_BUILTIN_NON_TX_STORE, + BUILT_IN_MD, NULL, NULL_TREE); +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ +#define MAX_ARGS 2 + + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + enum insn_code icode; + rtx op[MAX_ARGS], pat; + int arity; + bool nonvoid; + tree arg; + call_expr_arg_iterator iter; + + if (fcode >= S390_BUILTIN_max) + internal_error ("bad builtin fcode"); + icode = code_for_builtin[fcode]; + if (icode == 0) + internal_error ("bad builtin fcode"); + + if (!TARGET_ZEC12) + error ("Transactional execution builtins require zEC12 or later\n"); + + if (!TARGET_HTM && TARGET_ZEC12) + error ("Transactional execution builtins not enabled (-mtx)\n"); + + /* Set a flag in the machine specific cfun part in order to support + saving/restoring of FPRs. */ + if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY) + cfun->machine->tbegin_p = true; + + nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; + + arity = 0; + FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) + { + const struct insn_operand_data *insn_op; + + if (arg == error_mark_node) + return NULL_RTX; + if (arity >= MAX_ARGS) + return NULL_RTX; + + insn_op = &insn_data[icode].operand[arity + nonvoid]; + + op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); + + if (!(*insn_op->predicate) (op[arity], insn_op->mode)) + { + if (insn_op->predicate == memory_operand) + { + /* Don't move a NULL pointer into a register. Otherwise + we have to rely on combine being able to move it back + in order to get an immediate 0 in the instruction. */ + if (op[arity] != const0_rtx) + op[arity] = copy_to_mode_reg (Pmode, op[arity]); + op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]); + } + else + op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]); + } + + arity++; + } + + if (nonvoid) + { + enum machine_mode tmode = insn_data[icode].operand[0].mode; + if (!target + || GET_MODE (target) != tmode + || !(*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + } + + switch (arity) + { + case 0: + pat = GEN_FCN (icode) (target); + break; + case 1: + if (nonvoid) + pat = GEN_FCN (icode) (target, op[0]); + else + pat = GEN_FCN (icode) (op[0]); + break; + case 2: + if (nonvoid) + pat = GEN_FCN (icode) (target, op[0], op[1]); + else + pat = GEN_FCN (icode) (op[0], op[1]); + break; + default: + gcc_unreachable (); + } + if (!pat) + return NULL_RTX; + emit_insn (pat); + + if (nonvoid) + return target; + else + return const0_rtx; +} + + /* Output assembly code for the trampoline template to stdio stream FILE. @@ -11003,6 +11497,11 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop) #undef TARGET_RETURN_IN_MEMORY #define TARGET_RETURN_IN_MEMORY s390_return_in_memory +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS s390_init_builtins +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN s390_expand_builtin + #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra @@ -11105,6 +11604,9 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop) #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p +#undef TARGET_LRA_P +#define TARGET_LRA_P s390_lra_p + #undef TARGET_CAN_ELIMINATE #define TARGET_CAN_ELIMINATE s390_can_eliminate diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index 43e24d5d112..d53fed7a6f2 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -34,7 +34,8 @@ enum processor_flags PF_DFP = 16, PF_Z10 = 32, PF_Z196 = 64, - PF_ZEC12 = 128 + PF_ZEC12 = 128, + PF_TX = 256 }; /* This is necessary to avoid a warning about comparing different enum @@ -61,6 +62,8 @@ enum processor_flags (s390_arch_flags & PF_Z196) #define TARGET_CPU_ZEC12 \ (s390_arch_flags & PF_ZEC12) +#define TARGET_CPU_HTM \ + (s390_arch_flags & PF_TX) /* These flags indicate that the generated code should run on a cpu providing the respective hardware facility when run in @@ -78,6 +81,8 @@ enum processor_flags (TARGET_ZARCH && TARGET_CPU_Z196) #define TARGET_ZEC12 \ (TARGET_ZARCH && TARGET_CPU_ZEC12) +#define TARGET_HTM \ + (TARGET_ZARCH && TARGET_CPU_HTM && TARGET_OPT_HTM) #define TARGET_AVOID_CMP_AND_BRANCH (s390_tune == PROCESSOR_2817_Z196) @@ -93,23 +98,25 @@ enum processor_flags #define TARGET_TPF 0 /* Target CPU builtins. */ -#define TARGET_CPU_CPP_BUILTINS() \ - do \ - { \ - builtin_assert ("cpu=s390"); \ - builtin_assert ("machine=s390"); \ - builtin_define ("__s390__"); \ - if (TARGET_ZARCH) \ - builtin_define ("__zarch__"); \ - if (TARGET_64BIT) \ - builtin_define ("__s390x__"); \ - if (TARGET_LONG_DOUBLE_128) \ - builtin_define ("__LONG_DOUBLE_128__"); \ - } \ +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_assert ("cpu=s390"); \ + builtin_assert ("machine=s390"); \ + builtin_define ("__s390__"); \ + if (TARGET_ZARCH) \ + builtin_define ("__zarch__"); \ + if (TARGET_64BIT) \ + builtin_define ("__s390x__"); \ + if (TARGET_LONG_DOUBLE_128) \ + builtin_define ("__LONG_DOUBLE_128__"); \ + if (TARGET_HTM) \ + builtin_define ("__HTM__"); \ + } \ while (0) #ifdef DEFAULT_TARGET_64BIT -#define TARGET_DEFAULT (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP) +#define TARGET_DEFAULT (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP | MASK_OPT_HTM) #else #define TARGET_DEFAULT 0 #endif @@ -221,7 +228,7 @@ enum processor_flags /* Alignment on even addresses for LARL instruction. */ #define CONSTANT_ALIGNMENT(EXP, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN) -#define DATA_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN) +#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN) /* Alignment is not required by the hardware. */ #define STRICT_ALIGNMENT 0 diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index cad4f5f579a..e12d1538a50 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -59,11 +59,17 @@ (define_c_enum "unspec" [ ; Miscellaneous UNSPEC_ROUND - UNSPEC_CCU_TO_INT - UNSPEC_CCZ_TO_INT UNSPEC_ICM UNSPEC_TIE + ; Convert CC into a str comparison result and copy it into an + ; integer register + ; cc0->0, cc1->1, cc2->-1, (cc3->-1) + UNSPEC_STRCMPCC_TO_INT + + ; Copy CC as is into the lower 2 bits of an integer register + UNSPEC_CC_TO_INT + ; GOT/PLT and lt-relative accesses UNSPEC_LTREL_OFFSET UNSPEC_LTREL_BASE @@ -138,6 +144,15 @@ ; Atomic Support UNSPECV_CAS UNSPECV_ATOMIC_OP + + ; Transactional Execution support + UNSPECV_TBEGIN + UNSPECV_TBEGINC + UNSPECV_TEND + UNSPECV_TABORT + UNSPECV_ETND + UNSPECV_NTSTG + UNSPECV_PPA ]) ;; @@ -191,6 +206,9 @@ (PFPO_OP1_TYPE_SHIFT 8) ]) +; Immediate operands for tbegin and tbeginc +(define_constants [(TBEGIN_MASK 65292)]) ; 0xff0c +(define_constants [(TBEGINC_MASK 65288)]) ; 0xff08 ;; Instruction operand type as used in the Principles of Operation. ;; Used to determine defaults for length and other attribute values. @@ -277,7 +295,8 @@ (define_attr "cpu" "g5,g6,z900,z990,z9_109,z9_ec,z10,z196,zEC12" (const (symbol_ref "s390_tune_attr"))) -(define_attr "cpu_facility" "standard,ieee,zarch,longdisp,extimm,dfp,z10,z196,zEC12" +(define_attr "cpu_facility" + "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12" (const_string "standard")) (define_attr "enabled" "" @@ -304,6 +323,10 @@ (match_test "TARGET_DFP")) (const_int 1) + (and (eq_attr "cpu_facility" "cpu_zarch") + (match_test "TARGET_CPU_ZARCH")) + (const_int 1) + (and (eq_attr "cpu_facility" "z10") (match_test "TARGET_Z10")) (const_int 1) @@ -2246,7 +2269,7 @@ (define_insn "movcc" [(set (match_operand:CC 0 "nonimmediate_operand" "=d,c,d,d,d,R,T") - (match_operand:CC 1 "nonimmediate_operand" "d,d,c,R,T,d,d"))] + (match_operand:CC 1 "nonimmediate_operand" " d,d,c,R,T,d,d"))] "" "@ lr\t%0,%1 @@ -2578,7 +2601,7 @@ (use (reg:SI 0))]) (parallel [(set (match_operand:SI 0 "register_operand" "=d") - (unspec:SI [(reg:CCU CC_REGNUM)] UNSPEC_CCU_TO_INT)) + (unspec:SI [(reg:CCU CC_REGNUM)] UNSPEC_STRCMPCC_TO_INT)) (clobber (reg:CC CC_REGNUM))])] "" { @@ -2690,7 +2713,7 @@ "(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)" "#" [(set_attr "type" "cs") - (set_attr "cpu_facility" "*,*,z10,*")]) + (set_attr "cpu_facility" "*,*,z10,cpu_zarch")]) (define_split [(set (match_operand:BLK 0 "memory_operand" "") @@ -2820,7 +2843,7 @@ (match_dup 2)] UNSPEC_TDC_INSN)) (set (match_operand:SI 0 "register_operand" "=d") - (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CCZ_TO_INT))] + (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))] "TARGET_HARD_FLOAT" { operands[2] = GEN_INT (S390_TDC_SIGNBIT_SET); @@ -2832,12 +2855,21 @@ (match_dup 2)] UNSPEC_TDC_INSN)) (set (match_operand:SI 0 "register_operand" "=d") - (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CCZ_TO_INT))] + (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))] "TARGET_HARD_FLOAT" { operands[2] = GEN_INT (S390_TDC_INFINITY); }) +(define_insn_and_split "*cc_to_int" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec:SI [(match_operand 1 "register_operand" "0")] + UNSPEC_CC_TO_INT))] + "operands != NULL" + "#" + "reload_completed" + [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 28)))]) + ; This insn is used to generate all variants of the Test Data Class ; instruction, namely tcxb, tcdb, and tceb. The insn's first operand ; is the register to be tested and the second one is the bit mask @@ -2853,14 +2885,6 @@ [(set_attr "op_type" "RXE") (set_attr "type" "fsimp<mode>")]) -(define_insn_and_split "*ccz_to_int" - [(set (match_operand:SI 0 "register_operand" "=d") - (unspec:SI [(match_operand:CCZ 1 "register_operand" "0")] - UNSPEC_CCZ_TO_INT))] - "" - "#" - "reload_completed" - [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 28)))]) ; @@ -2899,7 +2923,7 @@ "(GET_MODE (operands[1]) == Pmode || GET_MODE (operands[1]) == VOIDmode)" "#" [(set_attr "type" "cs") - (set_attr "cpu_facility" "*,*,z10,*")]) + (set_attr "cpu_facility" "*,*,z10,cpu_zarch")]) (define_split [(set (match_operand:BLK 0 "memory_operand" "") @@ -3075,7 +3099,7 @@ "(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)" "#" [(set_attr "type" "cs") - (set_attr "cpu_facility" "*,*,z10,*")]) + (set_attr "cpu_facility" "*,*,z10,cpu_zarch")]) (define_split [(set (reg:CCU CC_REGNUM) @@ -3205,7 +3229,7 @@ (define_insn_and_split "cmpint" [(set (match_operand:SI 0 "register_operand" "=d") (unspec:SI [(match_operand:CCU 1 "register_operand" "0")] - UNSPEC_CCU_TO_INT)) + UNSPEC_STRCMPCC_TO_INT)) (clobber (reg:CC CC_REGNUM))] "" "#" @@ -3218,10 +3242,10 @@ (define_insn_and_split "*cmpint_cc" [(set (reg CC_REGNUM) (compare (unspec:SI [(match_operand:CCU 1 "register_operand" "0")] - UNSPEC_CCU_TO_INT) + UNSPEC_STRCMPCC_TO_INT) (const_int 0))) (set (match_operand:SI 0 "register_operand" "=d") - (unspec:SI [(match_dup 1)] UNSPEC_CCU_TO_INT))] + (unspec:SI [(match_dup 1)] UNSPEC_STRCMPCC_TO_INT))] "s390_match_ccmode (insn, CCSmode)" "#" "&& reload_completed" @@ -3238,7 +3262,7 @@ (define_insn_and_split "*cmpint_sign" [(set (match_operand:DI 0 "register_operand" "=d") (sign_extend:DI (unspec:SI [(match_operand:CCU 1 "register_operand" "0")] - UNSPEC_CCU_TO_INT))) + UNSPEC_STRCMPCC_TO_INT))) (clobber (reg:CC CC_REGNUM))] "TARGET_ZARCH" "#" @@ -3252,11 +3276,11 @@ [(set (reg CC_REGNUM) (compare (ashiftrt:DI (ashift:DI (subreg:DI (unspec:SI [(match_operand:CCU 1 "register_operand" "0")] - UNSPEC_CCU_TO_INT) 0) + UNSPEC_STRCMPCC_TO_INT) 0) (const_int 32)) (const_int 32)) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=d") - (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_CCU_TO_INT)))] + (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_STRCMPCC_TO_INT)))] "s390_match_ccmode (insn, CCSmode) && TARGET_ZARCH" "#" "&& reload_completed" @@ -5507,7 +5531,7 @@ (if_then_else:GPR (match_operator 1 "s390_comparison" [(match_operand 2 "cc_reg_operand" " c,c, c, c, c, c, c") - (const_int 0)]) + (match_operand 5 "const_int_operand" "")]) (match_operand:GPR 3 "nonimmediate_operand" " d,0,QS, 0, d, 0,QS") (match_operand:GPR 4 "nonimmediate_operand" " 0,d, 0,QS, 0, d,QS")))] "TARGET_Z196" @@ -7907,7 +7931,8 @@ (define_insn "*cjump_64" [(set (pc) (if_then_else - (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)]) + (match_operator 1 "s390_comparison" [(reg CC_REGNUM) + (match_operand 2 "const_int_operand" "")]) (label_ref (match_operand 0 "" "")) (pc)))] "TARGET_CPU_ZARCH" @@ -7926,7 +7951,8 @@ (define_insn "*cjump_31" [(set (pc) (if_then_else - (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)]) + (match_operator 1 "s390_comparison" [(reg CC_REGNUM) + (match_operand 2 "const_int_operand" "")]) (label_ref (match_operand 0 "" "")) (pc)))] "!TARGET_CPU_ZARCH" @@ -9795,3 +9821,217 @@ "cpsdr\t%0,%2,%1" [(set_attr "op_type" "RRF") (set_attr "type" "fsimp<mode>")]) + + +;; +;;- Transactional execution instructions +;; + +; This splitter helps combine to make use of CC directly when +; comparing the integer result of a tbegin builtin with a constant. +; The unspec is already removed by canonicalize_comparison. So this +; splitters only job is to turn the PARALLEL into separate insns +; again. Unfortunately this only works with the very first cc/int +; compare since combine is not able to deal with data flow across +; basic block boundaries. + +; It needs to be an insn pattern as well since combine does not apply +; the splitter directly. Combine would only use it if it actually +; would reduce the number of instructions. +(define_insn_and_split "*ccraw_to_int" + [(set (pc) + (if_then_else + (match_operator 0 "s390_eqne_operator" + [(reg:CCRAW CC_REGNUM) + (match_operand 1 "const_int_operand" "")]) + (label_ref (match_operand 2 "" "")) + (pc))) + (set (match_operand:SI 3 "register_operand" "=d") + (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] + "" + "#" + "" + [(set (match_dup 3) + (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT)) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCRAW CC_REGNUM) (match_dup 1)]) + (label_ref (match_dup 2)) + (pc)))] + "") + +; Non-constrained transaction begin + +(define_expand "tbegin" + [(match_operand:SI 0 "register_operand" "=d") + (match_operand:BLK 1 "memory_operand" "=Q")] + "TARGET_HTM" +{ + s390_expand_tbegin (operands[0], operands[1], NULL_RTX, true); + DONE; +}) + +(define_expand "tbegin_nofloat" + [(match_operand:SI 0 "register_operand" "=d") + (match_operand:BLK 1 "memory_operand" "=Q")] + "TARGET_HTM" +{ + s390_expand_tbegin (operands[0], operands[1], NULL_RTX, false); + DONE; +}) + +(define_expand "tbegin_retry" + [(match_operand:SI 0 "register_operand" "=d") + (match_operand:BLK 1 "memory_operand" "=Q") + (match_operand 2 "const_int_operand")] + "TARGET_HTM" +{ + s390_expand_tbegin (operands[0], operands[1], operands[2], true); + DONE; +}) + +(define_expand "tbegin_retry_nofloat" + [(match_operand:SI 0 "register_operand" "=d") + (match_operand:BLK 1 "memory_operand" "=Q") + (match_operand 2 "const_int_operand")] + "TARGET_HTM" +{ + s390_expand_tbegin (operands[0], operands[1], operands[2], false); + DONE; +}) + +(define_insn "tbegin_1" + [(set (reg:CCRAW CC_REGNUM) + (unspec_volatile:CCRAW [(match_operand:BLK 0 "memory_operand" "=Q") + (match_operand 1 "const_int_operand" " D")] + UNSPECV_TBEGIN)) + (clobber (reg:DF 16)) + (clobber (reg:DF 17)) + (clobber (reg:DF 18)) + (clobber (reg:DF 19)) + (clobber (reg:DF 20)) + (clobber (reg:DF 21)) + (clobber (reg:DF 22)) + (clobber (reg:DF 23)) + (clobber (reg:DF 24)) + (clobber (reg:DF 25)) + (clobber (reg:DF 26)) + (clobber (reg:DF 27)) + (clobber (reg:DF 28)) + (clobber (reg:DF 29)) + (clobber (reg:DF 30)) + (clobber (reg:DF 31))] +; CONST_OK_FOR_CONSTRAINT_P does not work with D constraint since D is +; not supposed to be used for immediates (see genpreds.c). + "TARGET_HTM && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 0xffff" + "tbegin\t%0,%x1" + [(set_attr "op_type" "SIL")]) + +; Same as above but without the FPR clobbers +(define_insn "tbegin_nofloat_1" + [(set (reg:CCRAW CC_REGNUM) + (unspec_volatile:CCRAW [(match_operand:BLK 0 "memory_operand" "=Q") + (match_operand 1 "const_int_operand" " D")] + UNSPECV_TBEGIN))] + "TARGET_HTM && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 0xffff" + "tbegin\t%0,%x1" + [(set_attr "op_type" "SIL")]) + + +; Constrained transaction begin + +(define_expand "tbeginc" + [(set (reg:CCRAW CC_REGNUM) + (unspec_volatile:CCRAW [(const_int TBEGINC_MASK)] + UNSPECV_TBEGINC))] + "TARGET_HTM" + "") + +(define_insn "*tbeginc_1" + [(set (reg:CCRAW CC_REGNUM) + (unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" " D")] + UNSPECV_TBEGINC))] + "TARGET_HTM && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 0xffff" + "tbeginc\t0,%x0" + [(set_attr "op_type" "SIL")]) + +; Transaction end + +(define_expand "tend" + [(set (reg:CCRAW CC_REGNUM) + (unspec_volatile:CCRAW [(const_int 0)] UNSPECV_TEND)) + (set (match_operand:SI 0 "register_operand" "=d") + (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] + "TARGET_HTM" + "") + +(define_insn "*tend_1" + [(set (reg:CCRAW CC_REGNUM) + (unspec_volatile:CCRAW [(const_int 0)] UNSPECV_TEND))] + "TARGET_HTM" + "tend" + [(set_attr "op_type" "S")]) + +; Transaction abort + +(define_expand "tabort" + [(unspec_volatile [(match_operand 0 "shift_count_or_setmem_operand" "")] + UNSPECV_TABORT)] + "TARGET_HTM && operands != NULL" +{ + if (CONST_INT_P (operands[0]) + && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 255) + { + error ("Invalid transaction abort code: " HOST_WIDE_INT_PRINT_DEC + ". Values in range 0 through 255 are reserved.", + INTVAL (operands[0])); + FAIL; + } +}) + +(define_insn "*tabort_1" + [(unspec_volatile [(match_operand 0 "shift_count_or_setmem_operand" "")] + UNSPECV_TABORT)] + "TARGET_HTM && operands != NULL" + "tabort\t%Y0" + [(set_attr "op_type" "S")]) + +; Transaction extract nesting depth + +(define_insn "etnd" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec_volatile:SI [(const_int 0)] UNSPECV_ETND))] + "TARGET_HTM" + "etnd\t%0" + [(set_attr "op_type" "RRE")]) + +; Non-transactional store + +(define_insn "ntstg" + [(set (match_operand:DI 0 "memory_operand" "=RT") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "d")] + UNSPECV_NTSTG))] + "TARGET_HTM" + "ntstg\t%1,%0" + [(set_attr "op_type" "RXY")]) + +; Transaction perform processor assist + +(define_expand "tx_assist" + [(set (match_dup 1) (const_int 0)) + (unspec_volatile [(match_operand:SI 0 "register_operand" "d") + (match_dup 1) + (const_int 1)] + UNSPECV_PPA)] + "TARGET_HTM" +{ + operands[1] = gen_reg_rtx (SImode); +}) + +(define_insn "*ppa" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "d") + (match_operand:SI 1 "register_operand" "d") + (match_operand 2 "const_int_operand" "I")] + UNSPECV_PPA)] + "TARGET_HTM && INTVAL (operands[2]) < 16" + "ppa\t%0,%1,1" + [(set_attr "op_type" "RRF")]) diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt index b326441173c..7dedb836701 100644 --- a/gcc/config/s390/s390.opt +++ b/gcc/config/s390/s390.opt @@ -104,6 +104,10 @@ mlong-double-64 Target Report RejectNegative Negative(mlong-double-128) InverseMask(LONG_DOUBLE_128) Use 64-bit long double +mhtm +Target Report Mask(OPT_HTM) +Use hardware transactional execution instructions + mpacked-stack Target Report Mask(PACKED_STACK) Use packed stack layout @@ -149,3 +153,7 @@ Target Report Joined RejectNegative UInteger Var(s390_branch_cost) Init(1) Set the branch costs for conditional branch instructions. Reasonable values are small, non-negative integers. The default branch cost is 1. + +mlra +Target Report Var(s390_lra_flag) Init(1) Save +Use LRA instead of reload diff --git a/gcc/config/s390/s390intrin.h b/gcc/config/s390/s390intrin.h new file mode 100644 index 00000000000..e1a00ce58e3 --- /dev/null +++ b/gcc/config/s390/s390intrin.h @@ -0,0 +1,33 @@ +/* S/390 System z specific intrinsics + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef _S390INTRIN_H +#define _S390INTRIN_H + +#ifndef __s390__ + #error s390intrin.h included on wrong platform/compiler +#endif + +#ifdef __HTM__ +#include <htmintrin.h> +#endif + + +#endif /* _S390INTRIN_H*/ diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md index 25949c62d23..998ba7300ad 100644 --- a/gcc/config/sh/predicates.md +++ b/gcc/config/sh/predicates.md @@ -398,9 +398,13 @@ (define_predicate "general_extend_operand" (match_code "subreg,reg,mem,truncate") { - return (GET_CODE (op) == TRUNCATE - ? arith_operand - : nonimmediate_operand) (op, mode); + if (GET_CODE (op) == TRUNCATE) + return arith_operand (op, mode); + + if (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))) + return general_movsrc_operand (op, mode); + + return nonimmediate_operand (op, mode); }) ;; Returns 1 if OP is a simple register address. @@ -468,17 +472,36 @@ return 0; } - if ((mode == QImode || mode == HImode) - && mode == GET_MODE (op) - && (MEM_P (op) - || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))))) + if (mode == GET_MODE (op) + && (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))))) { - rtx x = XEXP ((MEM_P (op) ? op : SUBREG_REG (op)), 0); + rtx mem_rtx = MEM_P (op) ? op : SUBREG_REG (op); + rtx x = XEXP (mem_rtx, 0); - if (GET_CODE (x) == PLUS + if ((mode == QImode || mode == HImode) + && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1))) return sh_legitimate_index_p (mode, XEXP (x, 1), TARGET_SH2A, false); + + /* Allow reg+reg addressing here without validating the register + numbers. Usually one of the regs must be R0 or a pseudo reg. + In some cases it can happen that arguments from hard regs are + propagated directly into address expressions. In this cases reload + will have to fix it up later. However, allow this only for native + 1, 2 or 4 byte addresses. */ + if (can_create_pseudo_p () && GET_CODE (x) == PLUS + && GET_MODE_SIZE (mode) <= 4 + && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1))) + return true; + + /* 'general_operand' does not allow volatile mems during RTL expansion to + avoid matching arithmetic that operates on mems, it seems. + On SH this leads to redundant sign extensions for QImode or HImode + loads. Thus we mimic the behavior but allow volatile mems. */ + if (memory_address_addr_space_p (GET_MODE (mem_rtx), x, + MEM_ADDR_SPACE (mem_rtx))) + return true; } if (TARGET_SHMEDIA @@ -489,6 +512,7 @@ && GET_CODE (op) == SUBREG && GET_MODE (op) == mode && SUBREG_REG (op) == const0_rtx && subreg_lowpart_p (op)) /* FIXME */ abort (); /* return 1; */ + return general_operand (op, mode); }) diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index 5976206f8b4..60f45452036 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -19,12 +19,6 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ -/* FIXME: This is a temporary hack, so that we can include <algorithm> - below. <algorithm> will try to include <cstdlib> which will reference - malloc & co, which are poisoned by "system.h". The proper solution is - to include <cstdlib> in "system.h" instead of <stdlib.h>. */ -#include <cstdlib> - #include "config.h" #include "system.h" #include "coretypes.h" diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index 71ad1c1a2f6..8a140687654 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -12251,10 +12251,10 @@ label: ;; FMA (fused multiply-add) patterns (define_expand "fmasf4" - [(set (match_operand:SF 0 "fp_arith_reg_operand" "") - (fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "") - (match_operand:SF 2 "fp_arith_reg_operand" "") - (match_operand:SF 3 "fp_arith_reg_operand" "")))] + [(set (match_operand:SF 0 "fp_arith_reg_operand") + (fma:SF (match_operand:SF 1 "fp_arith_reg_operand") + (match_operand:SF 2 "fp_arith_reg_operand") + (match_operand:SF 3 "fp_arith_reg_operand")))] "TARGET_SH2E || TARGET_SHMEDIA_FPU" { if (TARGET_SH2E) @@ -12285,6 +12285,43 @@ label: "fmac.s %1, %2, %0" [(set_attr "type" "fparith_media")]) +;; For some cases such as 'a * b + a' the FMA pattern is not generated by +;; previous transformations. If FMA is generally allowed, let the combine +;; pass utilize it. +(define_insn_and_split "*fmasf4" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%w") + (match_operand:SF 2 "fp_arith_reg_operand" "f")) + (match_operand:SF 3 "arith_reg_operand" "0"))) + (use (match_operand:PSI 4 "fpscr_operand"))] + "TARGET_SH2E && flag_fp_contract_mode != FP_CONTRACT_OFF" + "fmac %1,%2,%0" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (fma:SF (match_dup 1) (match_dup 2) (match_dup 3))) + (use (match_dup 4))])] +{ + /* Change 'b * a + a' into 'a * b + a'. + This is better for register allocation. */ + if (REGNO (operands[2]) == REGNO (operands[3])) + { + rtx tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +} + [(set_attr "type" "fp") + (set_attr "fp_mode" "single")]) + +(define_insn "*fmasf4_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")) + (match_operand:SF 3 "fp_arith_reg_operand" "0")))] + "TARGET_SHMEDIA_FPU && flag_fp_contract_mode != FP_CONTRACT_OFF" + "fmac.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + (define_expand "divsf3" [(set (match_operand:SF 0 "arith_reg_operand" "") (div:SF (match_operand:SF 1 "arith_reg_operand" "") diff --git a/gcc/config/sol2.c b/gcc/config/sol2.c index 718134e2b90..7c7c429db3d 100644 --- a/gcc/config/sol2.c +++ b/gcc/config/sol2.c @@ -29,7 +29,7 @@ along with GCC; see the file COPYING3. If not see #include "tm_p.h" #include "diagnostic-core.h" #include "ggc.h" -#include "hashtab.h" +#include "hash-table.h" tree solaris_pending_aligns, solaris_pending_inits, solaris_pending_finis; @@ -157,10 +157,6 @@ solaris_assemble_visibility (tree decl, int vis ATTRIBUTE_UNUSED) #endif } -/* Hash table of group signature symbols. */ - -static htab_t solaris_comdat_htab; - /* Group section information entry stored in solaris_comdat_htab. */ typedef struct comdat_entry @@ -171,25 +167,34 @@ typedef struct comdat_entry const char *sig; } comdat_entry; -/* Helper routines for maintaining solaris_comdat_htab. */ +/* Helpers for maintaining solaris_comdat_htab. */ -static hashval_t -comdat_hash (const void *p) +struct comdat_entry_hasher : typed_noop_remove <comdat_entry> +{ + typedef comdat_entry value_type; + typedef comdat_entry compare_type; + static inline hashval_t hash (const value_type *); + static inline bool equal (const value_type *, const compare_type *); + static inline void remove (value_type *); +}; + +inline hashval_t +comdat_entry_hasher::hash (const value_type *entry) { - const comdat_entry *entry = (const comdat_entry *) p; - return htab_hash_string (entry->sig); } -static int -comdat_eq (const void *p1, const void *p2) +inline bool +comdat_entry_hasher::equal (const value_type *entry1, + const compare_type *entry2) { - const comdat_entry *entry1 = (const comdat_entry *) p1; - const comdat_entry *entry2 = (const comdat_entry *) p2; - return strcmp (entry1->sig, entry2->sig) == 0; } +/* Hash table of group signature symbols. */ + +static hash_table <comdat_entry_hasher> solaris_comdat_htab; + /* Output assembly to switch to COMDAT group section NAME with attributes FLAGS and group signature symbol DECL, using Sun as syntax. */ @@ -229,12 +234,11 @@ solaris_elf_asm_comdat_section (const char *name, unsigned int flags, tree decl) identify the missing ones without changing the affected frontents, remember the signature symbols and emit those not marked TREE_SYMBOL_REFERENCED in solaris_file_end. */ - if (solaris_comdat_htab == NULL) - solaris_comdat_htab = htab_create_alloc (37, comdat_hash, comdat_eq, NULL, - xcalloc, free); + if (!solaris_comdat_htab.is_created ()) + solaris_comdat_htab.create (37); entry.sig = signature; - slot = (comdat_entry **) htab_find_slot (solaris_comdat_htab, &entry, INSERT); + slot = solaris_comdat_htab.find_slot (&entry, INSERT); if (*slot == NULL) { @@ -250,10 +254,11 @@ solaris_elf_asm_comdat_section (const char *name, unsigned int flags, tree decl) /* Define unreferenced COMDAT group signature symbol corresponding to SLOT. */ -static int -solaris_define_comdat_signature (void **slot, void *aux ATTRIBUTE_UNUSED) +int +solaris_define_comdat_signature (comdat_entry **slot, + void *aux ATTRIBUTE_UNUSED) { - comdat_entry *entry = *(comdat_entry **) slot; + comdat_entry *entry = *slot; tree decl = entry->decl; if (TREE_CODE (decl) != IDENTIFIER_NODE) @@ -277,10 +282,10 @@ solaris_define_comdat_signature (void **slot, void *aux ATTRIBUTE_UNUSED) void solaris_file_end (void) { - if (solaris_comdat_htab == NULL) + if (!solaris_comdat_htab.is_created ()) return; - htab_traverse (solaris_comdat_htab, solaris_define_comdat_signature, NULL); + solaris_comdat_htab.traverse <void *, solaris_define_comdat_signature> (NULL); } void diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index 1dc4e3600a8..d473d6fdd7f 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -11527,7 +11527,7 @@ sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel) } /* Always perform the final addition/merge within the bmask insn. */ - emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1)); + emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1)); } /* Implement TARGET_FRAME_POINTER_REQUIRED. */ @@ -11766,7 +11766,7 @@ static void vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode, enum machine_mode inner_mode) { - rtx t1, final_insn; + rtx t1, final_insn, sel; int bmask; t1 = gen_reg_rtx (mode); @@ -11792,8 +11792,8 @@ vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode, gcc_unreachable (); } - emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode), - force_reg (SImode, GEN_INT (bmask)))); + sel = force_reg (SImode, GEN_INT (bmask)); + emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx)); emit_insn (final_insn); } diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index 292cb205271..7f8d4250502 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -5499,7 +5499,7 @@ [(set (match_operand:DF 0 "register_operand" "=e") (mult:DF (float_extend:DF (match_operand:SF 1 "register_operand" "f")) (float_extend:DF (match_operand:SF 2 "register_operand" "f"))))] - "(TARGET_V8 || TARGET_V9) && TARGET_FPU" + "(TARGET_V8 || TARGET_V9) && TARGET_FPU && !sparc_fix_ut699" "fsmuld\t%1, %2, %0" [(set_attr "type" "fpmul") (set_attr "fptype" "double")]) @@ -5528,20 +5528,37 @@ "fdivq\t%1, %2, %0" [(set_attr "type" "fpdivd")]) -(define_insn "divdf3" +(define_expand "divdf3" [(set (match_operand:DF 0 "register_operand" "=e") (div:DF (match_operand:DF 1 "register_operand" "e") (match_operand:DF 2 "register_operand" "e")))] "TARGET_FPU" + "") + +(define_insn "*divdf3_nofix" + [(set (match_operand:DF 0 "register_operand" "=e") + (div:DF (match_operand:DF 1 "register_operand" "e") + (match_operand:DF 2 "register_operand" "e")))] + "TARGET_FPU && !sparc_fix_ut699" "fdivd\t%1, %2, %0" [(set_attr "type" "fpdivd") (set_attr "fptype" "double")]) +(define_insn "*divdf3_fix" + [(set (match_operand:DF 0 "register_operand" "=e") + (div:DF (match_operand:DF 1 "register_operand" "e") + (match_operand:DF 2 "register_operand" "e")))] + "TARGET_FPU && sparc_fix_ut699" + "fdivd\t%1, %2, %0\n\tstd\t%0, [%%sp-8]" + [(set_attr "type" "fpdivd") + (set_attr "fptype" "double") + (set_attr "length" "2")]) + (define_insn "divsf3" [(set (match_operand:SF 0 "register_operand" "=f") (div:SF (match_operand:SF 1 "register_operand" "f") (match_operand:SF 2 "register_operand" "f")))] - "TARGET_FPU" + "TARGET_FPU && !sparc_fix_ut699" "fdivs\t%1, %2, %0" [(set_attr "type" "fpdivs")]) @@ -5742,18 +5759,33 @@ "fsqrtq\t%1, %0" [(set_attr "type" "fpsqrtd")]) -(define_insn "sqrtdf2" +(define_expand "sqrtdf2" [(set (match_operand:DF 0 "register_operand" "=e") (sqrt:DF (match_operand:DF 1 "register_operand" "e")))] "TARGET_FPU" + "") + +(define_insn "*sqrtdf2_nofix" + [(set (match_operand:DF 0 "register_operand" "=e") + (sqrt:DF (match_operand:DF 1 "register_operand" "e")))] + "TARGET_FPU && !sparc_fix_ut699" "fsqrtd\t%1, %0" [(set_attr "type" "fpsqrtd") (set_attr "fptype" "double")]) +(define_insn "*sqrtdf2_fix" + [(set (match_operand:DF 0 "register_operand" "=e") + (sqrt:DF (match_operand:DF 1 "register_operand" "e")))] + "TARGET_FPU && sparc_fix_ut699" + "fsqrtd\t%1, %0\n\tstd\t%0, [%%sp-8]" + [(set_attr "type" "fpsqrtd") + (set_attr "fptype" "double") + (set_attr "length" "2")]) + (define_insn "sqrtsf2" [(set (match_operand:SF 0 "register_operand" "=f") (sqrt:SF (match_operand:SF 1 "register_operand" "f")))] - "TARGET_FPU" + "TARGET_FPU && !sparc_fix_ut699" "fsqrts\t%1, %0" [(set_attr "type" "fpsqrts")]) @@ -8557,7 +8589,7 @@ mask |= (INTVAL (XVECEXP (sel, 0, i)) & 0xf) << (28 - i*4); sel = force_reg (SImode, gen_int_mode (mask, SImode)); - emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx)); + emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx)); emit_insn (gen_bshufflev8qi_vis (operands[0], operands[1], operands[2])); DONE; }) diff --git a/gcc/config/sparc/sparc.opt b/gcc/config/sparc/sparc.opt index 764c652e837..3b50c6c21f9 100644 --- a/gcc/config/sparc/sparc.opt +++ b/gcc/config/sparc/sparc.opt @@ -201,6 +201,10 @@ Target Report RejectNegative Var(sparc_fix_at697f) Enable workaround for single erratum of AT697F processor (corresponding to erratum #13 of AT697E processor) +mfix-ut699 +Target Report RejectNegative Var(sparc_fix_ut699) +Enable workarounds for the FP errata of the UT699 processor + Mask(LONG_DOUBLE_128) ;; Use 128-bit long double diff --git a/gcc/config/t-sol2 b/gcc/config/t-sol2 index 25b825017f6..142de89de95 100644 --- a/gcc/config/t-sol2 +++ b/gcc/config/t-sol2 @@ -34,5 +34,5 @@ sol2-stubs.o: $(srcdir)/config/sol2-stubs.c $(CONFIG_H) $(SYSTEM_H) coretypes.h # Solaris-specific attributes sol2.o: $(srcdir)/config/sol2.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ - tree.h output.h $(TM_H) $(TARGET_H) $(TM_P_H) $(GGC_H) + tree.h output.h $(TM_H) $(TARGET_H) $(TM_P_H) $(GGC_H) $(HASH_TABLE_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< |