diff options
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/rs6000/constraints.md | 19 | ||||
-rw-r--r-- | gcc/config/rs6000/predicates.md | 130 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-builtin.def | 135 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-c.c | 43 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-cpus.def | 20 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-opts.h | 8 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 258 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.h | 91 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.md | 7 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.opt | 26 | ||||
-rw-r--r-- | gcc/config/rs6000/t-rs6000 | 1 |
11 files changed, 689 insertions, 49 deletions
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index 463d69c6ba4..fa53cbb9de7 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -79,12 +79,31 @@ (define_register_constraint "wl" "rs6000_constraints[RS6000_CONSTRAINT_wl]" "Floating point register if the LFIWAX instruction is enabled or NO_REGS.") +(define_register_constraint "wm" "rs6000_constraints[RS6000_CONSTRAINT_wm]" + "VSX register if direct move instructions are enabled, or NO_REGS.") + +(define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]" + "General purpose register if 64-bit instructions are enabled or NO_REGS.") + +(define_register_constraint "wv" "rs6000_constraints[RS6000_CONSTRAINT_wv]" + "Altivec register if -mpower8-vector is used or NO_REGS.") + (define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]" "Floating point register if the STFIWX instruction is enabled or NO_REGS.") (define_register_constraint "wz" "rs6000_constraints[RS6000_CONSTRAINT_wz]" "Floating point register if the LFIWZX instruction is enabled or NO_REGS.") +;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use +;; direct move directly, and movsf can't to move between the register sets. +;; There is a mode_attr that resolves to wm for SDmode and wn for SFmode +(define_register_constraint "wn" "NO_REGS") + +;; Lq/stq validates the address for load/store quad +(define_memory_constraint "wQ" + "Memory operand suitable for the load/store quad instructions" + (match_operand 0 "quad_memory_operand")) + ;; Altivec style load/store that ignores the bottom bits of the address (define_memory_constraint "wZ" "Indexed or indirect memory operand, ignoring the bottom 4 bits" diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 78ec1b20913..078c9387350 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -166,6 +166,11 @@ (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 2, 3)"))) +;; Match op = 0..15 +(define_predicate "const_0_to_15_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 15)"))) + ;; Return 1 if op is a register that is not special. (define_predicate "gpc_reg_operand" (match_operand 0 "register_operand") @@ -182,9 +187,68 @@ if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op))) return 1; + if (TARGET_VSX && VSX_REGNO_P (REGNO (op))) + return 1; + return INT_REGNO_P (REGNO (op)) || FP_REGNO_P (REGNO (op)); }) +;; Return 1 if op is a general purpose register. Unlike gpc_reg_operand, don't +;; allow floating point or vector registers. +(define_predicate "int_reg_operand" + (match_operand 0 "register_operand") +{ + if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode)) + return 0; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op))) + return 1; + + return INT_REGNO_P (REGNO (op)); +}) + +;; Like int_reg_operand, but only return true for base registers +(define_predicate "base_reg_operand" + (match_operand 0 "int_reg_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + return (REGNO (op) != FIRST_GPR_REGNO); +}) + +;; Return 1 if op is a general purpose register that is an even register +;; which suitable for a load/store quad operation +(define_predicate "quad_int_reg_operand" + (match_operand 0 "register_operand") +{ + HOST_WIDE_INT r; + + if (!TARGET_QUAD_MEMORY) + return 0; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + r = REGNO (op); + if (r >= FIRST_PSEUDO_REGISTER) + return 1; + + return (INT_REGNO_P (r) && ((r & 1) == 0)); +}) + ;; Return 1 if op is a register that is a condition register field. (define_predicate "cc_reg_operand" (match_operand 0 "register_operand") @@ -302,6 +366,11 @@ & (~ (unsigned HOST_WIDE_INT) 0xffffffff)) == 0)") (match_operand 0 "gpc_reg_operand"))) +;; Like reg_or_logical_cint_operand, but allow vsx registers +(define_predicate "vsx_reg_or_cint_operand" + (ior (match_operand 0 "vsx_register_operand") + (match_operand 0 "reg_or_logical_cint_operand"))) + ;; Return 1 if operand is a CONST_DOUBLE that can be set in a register ;; with no more than one instruction per word. (define_predicate "easy_fp_constant" @@ -507,6 +576,54 @@ (and (match_operand 0 "memory_operand") (match_test "offsettable_nonstrict_memref_p (op)"))) +;; Return 1 if the operand is suitable for load/store quad memory. +(define_predicate "quad_memory_operand" + (match_code "mem") +{ + rtx addr, op0, op1; + int ret; + + if (!TARGET_QUAD_MEMORY) + ret = 0; + + else if (!memory_operand (op, mode)) + ret = 0; + + else if (GET_MODE_SIZE (GET_MODE (op)) != 16) + ret = 0; + + else if (MEM_ALIGN (op) < 128) + ret = 0; + + else + { + addr = XEXP (op, 0); + if (int_reg_operand (addr, Pmode)) + ret = 1; + + else if (GET_CODE (addr) != PLUS) + ret = 0; + + else + { + op0 = XEXP (addr, 0); + op1 = XEXP (addr, 1); + ret = (int_reg_operand (op0, Pmode) + && GET_CODE (op1) == CONST_INT + && IN_RANGE (INTVAL (op1), -32768, 32767) + && (INTVAL (op1) & 15) == 0); + } + } + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nquad_memory_operand, ret = %s\n", ret ? "true" : "false"); + debug_rtx (op); + } + + return ret; +}) + ;; Return 1 if the operand is an indexed or indirect memory operand. (define_predicate "indexed_or_indirect_operand" (match_code "mem") @@ -521,6 +638,19 @@ return indexed_or_indirect_address (op, mode); }) +;; Like indexed_or_indirect_operand, but also allow a GPR register if direct +;; moves are supported. +(define_predicate "reg_or_indexed_operand" + (match_code "mem,reg") +{ + if (MEM_P (op)) + return indexed_or_indirect_operand (op, mode); + else if (TARGET_DIRECT_MOVE) + return register_operand (op, mode); + return + 0; +}) + ;; Return 1 if the operand is an indexed or indirect memory operand with an ;; AND -16 in it, used to recognize when we need to switch to Altivec loads ;; to realign loops instead of VSX (altivec silently ignores the bottom bits, diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index a545fe3e448..b41f53314c6 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -30,7 +30,7 @@ RS6000_BUILTIN_A -- ABS builtins RS6000_BUILTIN_D -- DST builtins RS6000_BUILTIN_E -- SPE EVSEL builtins. - RS6000_BUILTIN_P -- Altivec and VSX predicate builtins + RS6000_BUILTIN_P -- Altivec, VSX, Power8 vector predicate builtins RS6000_BUILTIN_Q -- Paired floating point VSX predicate builtins RS6000_BUILTIN_S -- SPE predicate builtins RS6000_BUILTIN_X -- special builtins @@ -301,6 +301,108 @@ | RS6000_BTC_SPECIAL), \ CODE_FOR_nothing) /* ICODE */ +/* Power8 vector convenience macros. */ +/* For the instructions that are encoded as altivec instructions use + __builtin_altivec_ as the builtin name. */ +#define BU_P8V_AV_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P8V_AV_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P8V_AV_P(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_P (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_PREDICATE), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* For the instructions encoded as VSX instructions use __builtin_vsx as the + builtin name. */ +#define BU_P8V_VSX_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P8V_OVERLOAD_1(ENUM, NAME) \ + RS6000_BUILTIN_1 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P8V_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +/* Crypto convenience macros. */ +#define BU_CRYPTO_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_CRYPTO_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_CRYPTO_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_CRYPTO_OVERLOAD_1(ENUM, NAME) \ + RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_CRYPTO_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_CRYPTO_OVERLOAD_3(ENUM, NAME) \ + RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ + /* SPE convenience macros. */ #define BU_SPE_1(ENUM, NAME, ATTR, ICODE) \ RS6000_BUILTIN_1 (SPE_BUILTIN_ ## ENUM, /* ENUM */ \ @@ -1012,7 +1114,7 @@ BU_VSX_1 (XVTSQRTSP_FG, "xvtsqrtsp_fg", CONST, vsx_tsqrtv4sf2_fg) BU_VSX_1 (XVRESP, "xvresp", CONST, vsx_frev4sf2) BU_VSX_1 (XSCVDPSP, "xscvdpsp", CONST, vsx_xscvdpsp) -BU_VSX_1 (XSCVSPDP, "xscvspdp", CONST, vsx_xscvdpsp) +BU_VSX_1 (XSCVSPDP, "xscvspdp", CONST, vsx_xscvspdp) BU_VSX_1 (XVCVDPSP, "xvcvdpsp", CONST, vsx_xvcvdpsp) BU_VSX_1 (XVCVSPDP, "xvcvspdp", CONST, vsx_xvcvspdp) BU_VSX_1 (XSTSQRTDP_FE, "xstsqrtdp_fe", CONST, vsx_tsqrtdf2_fe) @@ -1132,6 +1234,35 @@ BU_VSX_OVERLOAD_2 (XXSPLTW, "xxspltw") BU_VSX_OVERLOAD_X (LD, "ld") BU_VSX_OVERLOAD_X (ST, "st") +/* 1 argument crypto functions. */ +BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox) + +/* 2 argument crypto functions. */ +BU_CRYPTO_2 (VCIPHER, "vcipher", CONST, crypto_vcipher) +BU_CRYPTO_2 (VCIPHERLAST, "vcipherlast", CONST, crypto_vcipherlast) +BU_CRYPTO_2 (VNCIPHER, "vncipher", CONST, crypto_vncipher) +BU_CRYPTO_2 (VNCIPHERLAST, "vncipherlast", CONST, crypto_vncipherlast) +BU_CRYPTO_2 (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb) +BU_CRYPTO_2 (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh) +BU_CRYPTO_2 (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw) +BU_CRYPTO_2 (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd) + +/* 3 argument crypto functions. */ +BU_CRYPTO_3 (VPERMXOR_V2DI, "vpermxor_v2di", CONST, crypto_vpermxor_v2di) +BU_CRYPTO_3 (VPERMXOR_V4SI, "vpermxor_v4si", CONST, crypto_vpermxor_v4si) +BU_CRYPTO_3 (VPERMXOR_V8HI, "vpermxor_v8hi", CONST, crypto_vpermxor_v8hi) +BU_CRYPTO_3 (VPERMXOR_V16QI, "vpermxor_v16qi", CONST, crypto_vpermxor_v16qi) +BU_CRYPTO_3 (VSHASIGMAW, "vshasigmaw", CONST, crypto_vshasigmaw) +BU_CRYPTO_3 (VSHASIGMAD, "vshasigmad", CONST, crypto_vshasigmad) + +/* 2 argument crypto overloaded functions. */ +BU_CRYPTO_OVERLOAD_2 (VPMSUM, "vpmsum") + +/* 3 argument crypto overloaded functions. */ +BU_CRYPTO_OVERLOAD_3 (VPERMXOR, "vpermxor") +BU_CRYPTO_OVERLOAD_3 (VSHASIGMA, "vshasigma") + + /* 3 argument paired floating point builtins. */ BU_PAIRED_3 (MSUB, "msub", FP, fmsv2sf4) BU_PAIRED_3 (MADD, "madd", FP, fmav2sf4) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index a4f66ba8f1b..c6c584d8183 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -315,6 +315,8 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6X"); if ((flags & OPTION_MASK_POPCNTD) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7"); + if ((flags & OPTION_MASK_DIRECT_MOVE) != 0) + rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR8"); if ((flags & OPTION_MASK_SOFT_FLOAT) != 0) rs6000_define_or_undefine_macro (define_p, "_SOFT_FLOAT"); if ((flags & OPTION_MASK_RECIP_PRECISION) != 0) @@ -331,6 +333,10 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, } if ((flags & OPTION_MASK_VSX) != 0) rs6000_define_or_undefine_macro (define_p, "__VSX__"); + if ((flags & OPTION_MASK_P8_VECTOR) != 0) + rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__"); + if ((flags & OPTION_MASK_CRYPTO) != 0) + rs6000_define_or_undefine_macro (define_p, "__CRYPTO__"); /* options from the builtin masks. */ if ((bu_mask & RS6000_BTM_SPE) != 0) @@ -3377,6 +3383,40 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + /* Crypto builtins. */ + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI }, + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { (enum rs6000_builtins) 0, (enum rs6000_builtins) 0, 0, 0, 0, 0 } }; @@ -3824,7 +3864,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, && (desc->op2 == RS6000_BTI_NOT_OPAQUE || rs6000_builtin_type_compatible (types[1], desc->op2)) && (desc->op3 == RS6000_BTI_NOT_OPAQUE - || rs6000_builtin_type_compatible (types[2], desc->op3))) + || rs6000_builtin_type_compatible (types[2], desc->op3)) + && rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE) return altivec_build_resolved_builtin (args, n, desc); bad: diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 0564018b3f0..25a0ed27fe7 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -28,7 +28,7 @@ ALTIVEC, since in general it isn't a win on power6. In ISA 2.04, fsel, fre, fsqrt, etc. were no longer documented as optional. Group masks by server and embedded. */ -#define ISA_2_5_MASKS_EMBEDDED (ISA_2_2_MASKS \ +#define ISA_2_5_MASKS_EMBEDDED (ISA_2_4_MASKS \ | OPTION_MASK_CMPB \ | OPTION_MASK_RECIP_PRECISION \ | OPTION_MASK_PPC_GFXOPT \ @@ -45,6 +45,14 @@ | OPTION_MASK_VSX \ | OPTION_MASK_VSX_TIMODE) +/* For now, don't provide an embedded version of ISA 2.07. */ +#define ISA_2_7_MASKS_SERVER (ISA_2_6_MASKS_SERVER \ + | OPTION_MASK_P8_FUSION \ + | OPTION_MASK_P8_VECTOR \ + | OPTION_MASK_CRYPTO \ + | OPTION_MASK_DIRECT_MOVE \ + | OPTION_MASK_QUAD_MEMORY) + #define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC) /* Deal with ports that do not have -mstrict-align. */ @@ -61,7 +69,9 @@ /* Mask of all options to set the default isa flags based on -mcpu=<xxx>. */ #define POWERPC_MASKS (OPTION_MASK_ALTIVEC \ | OPTION_MASK_CMPB \ + | OPTION_MASK_CRYPTO \ | OPTION_MASK_DFP \ + | OPTION_MASK_DIRECT_MOVE \ | OPTION_MASK_DLMZB \ | OPTION_MASK_FPRND \ | OPTION_MASK_ISEL \ @@ -69,11 +79,14 @@ | OPTION_MASK_MFPGPR \ | OPTION_MASK_MULHW \ | OPTION_MASK_NO_UPDATE \ + | OPTION_MASK_P8_FUSION \ + | OPTION_MASK_P8_VECTOR \ | OPTION_MASK_POPCNTB \ | OPTION_MASK_POPCNTD \ | OPTION_MASK_POWERPC64 \ | OPTION_MASK_PPC_GFXOPT \ | OPTION_MASK_PPC_GPOPT \ + | OPTION_MASK_QUAD_MEMORY \ | OPTION_MASK_RECIP_PRECISION \ | OPTION_MASK_SOFT_FLOAT \ | OPTION_MASK_STRICT_ALIGN_OPTIONAL \ @@ -168,10 +181,7 @@ RS6000_CPU ("power7", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */ POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD | MASK_VSX | MASK_RECIP_PRECISION | MASK_VSX_TIMODE) -RS6000_CPU ("power8", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */ - POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF - | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD - | MASK_VSX | MASK_RECIP_PRECISION | MASK_VSX_TIMODE) +RS6000_CPU ("power8", PROCESSOR_POWER7, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER) RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0) RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64) RS6000_CPU ("rs64", PROCESSOR_RS64A, MASK_PPC_GFXOPT | MASK_POWERPC64) diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h index fc843fd19ca..e143a4ca203 100644 --- a/gcc/config/rs6000/rs6000-opts.h +++ b/gcc/config/rs6000/rs6000-opts.h @@ -59,7 +59,8 @@ enum processor_type PROCESSOR_POWER7, PROCESSOR_CELL, PROCESSOR_PPCA2, - PROCESSOR_TITAN + PROCESSOR_TITAN, + PROCESSOR_POWER8 }; /* FP processor type. */ @@ -131,11 +132,14 @@ enum rs6000_cmodel { CMODEL_LARGE }; -/* Describe which vector unit to use for a given machine mode. */ +/* Describe which vector unit to use for a given machine mode. The + VECTOR_MEM_* and VECTOR_UNIT_* macros assume that Altivec, VSX, and + P8_VECTOR are contiguous. */ enum rs6000_vector { VECTOR_NONE, /* Type is not a vector or not supported */ VECTOR_ALTIVEC, /* Use altivec for vector processing */ VECTOR_VSX, /* Use VSX for vector processing */ + VECTOR_P8_VECTOR, /* Use ISA 2.07 VSX for vector processing */ VECTOR_PAIRED, /* Use paired floating point for vectors */ VECTOR_SPE, /* Use SPE for vector processing */ VECTOR_OTHER /* Some other vector unit */ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index e82b24e22ce..7963bf4d5c2 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -831,6 +831,25 @@ struct processor_costs power7_cost = { 12, /* prefetch streams */ }; +/* Instruction costs on POWER8 processors. */ +static const +struct processor_costs power8_cost = { + COSTS_N_INSNS (3), /* mulsi */ + COSTS_N_INSNS (3), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (3), /* muldi */ + COSTS_N_INSNS (19), /* divsi */ + COSTS_N_INSNS (35), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (14), /* sdiv */ + COSTS_N_INSNS (17), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 256, /* l2 cache */ + 12, /* prefetch streams */ +}; + /* Instruction costs on POWER A2 processors. */ static const struct processor_costs ppca2_cost = { @@ -1547,6 +1566,15 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode) { int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1; + /* PTImode can only go in GPRs. Quad word memory operations require even/odd + register combinations, and use PTImode where we need to deal with quad + word memory operations. Don't allow quad words in the argument or frame + pointer registers, just registers 0..31. */ + if (mode == PTImode) + return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) + && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) + && ((regno & 1) == 0)); + /* VSX registers that overlap the FPR registers are larger than for non-VSX implementations. Don't allow an item to be split between a FP register and an Altivec register. */ @@ -1678,6 +1706,16 @@ rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name) comma = ""; } + len += fprintf (stderr, "%sreg-class = %s", comma, + reg_class_names[(int)rs6000_regno_regclass[r]]); + comma = ", "; + + if (len > 70) + { + fprintf (stderr, ",\n\t"); + comma = ""; + } + fprintf (stderr, "%sregno = %d\n", comma, r); } } @@ -1710,6 +1748,7 @@ rs6000_debug_reg_global (void) "none", "altivec", "vsx", + "p8_vector", "paired", "spe", "other" @@ -1802,8 +1841,11 @@ rs6000_debug_reg_global (void) "wf reg_class = %s\n" "wg reg_class = %s\n" "wl reg_class = %s\n" + "wm reg_class = %s\n" + "wr reg_class = %s\n" "ws reg_class = %s\n" "wt reg_class = %s\n" + "wv reg_class = %s\n" "wx reg_class = %s\n" "wz reg_class = %s\n" "\n", @@ -1815,8 +1857,11 @@ rs6000_debug_reg_global (void) reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]); @@ -2050,6 +2095,10 @@ rs6000_debug_reg_global (void) if (targetm.lra_p ()) fprintf (stderr, DEBUG_FMT_S, "lra", "true"); + if (TARGET_P8_FUSION) + fprintf (stderr, DEBUG_FMT_S, "p8 fusion", + (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero"); + fprintf (stderr, DEBUG_FMT_S, "plt-format", TARGET_SECURE_PLT ? "secure" : "bss"); fprintf (stderr, DEBUG_FMT_S, "struct-return", @@ -2240,6 +2289,15 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) if (TARGET_LFIWAX) rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; + if (TARGET_DIRECT_MOVE) + rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS; + + if (TARGET_POWERPC64) + rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS; + + if (TARGET_P8_VECTOR) + rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; + if (TARGET_STFIWX) rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; @@ -2520,16 +2578,18 @@ darwin_rs6000_override_options (void) HOST_WIDE_INT rs6000_builtin_mask_calculate (void) { - return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0) - | ((TARGET_VSX) ? RS6000_BTM_VSX : 0) - | ((TARGET_SPE) ? RS6000_BTM_SPE : 0) - | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0) - | ((TARGET_FRE) ? RS6000_BTM_FRE : 0) - | ((TARGET_FRES) ? RS6000_BTM_FRES : 0) - | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0) - | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0) - | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0) - | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)); + return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0) + | ((TARGET_VSX) ? RS6000_BTM_VSX : 0) + | ((TARGET_SPE) ? RS6000_BTM_SPE : 0) + | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0) + | ((TARGET_FRE) ? RS6000_BTM_FRE : 0) + | ((TARGET_FRES) ? RS6000_BTM_FRES : 0) + | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0) + | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0) + | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0) + | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0) + | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0) + | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)); } /* Override command line options. Mostly we process the processor type and @@ -2803,7 +2863,9 @@ rs6000_option_override_internal (bool global_init_p) /* For the newer switches (vsx, dfp, etc.) set some of the older options, unless the user explicitly used the -mno-<option> to disable the code. */ - if (TARGET_VSX) + if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO) + rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit); + else if (TARGET_VSX) rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit); else if (TARGET_POPCNTD) rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit); @@ -2818,6 +2880,34 @@ rs6000_option_override_internal (bool global_init_p) else if (TARGET_ALTIVEC) rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit); + if (TARGET_CRYPTO && !TARGET_ALTIVEC) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO) + error ("-mcrypto requires -maltivec"); + rs6000_isa_flags &= ~OPTION_MASK_CRYPTO; + } + + if (TARGET_DIRECT_MOVE && !TARGET_VSX) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) + error ("-mdirect-move requires -mvsx"); + rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE; + } + + if (TARGET_P8_VECTOR && !TARGET_ALTIVEC) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) + error ("-mpower8-vector requires -maltivec"); + rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR; + } + + if (TARGET_P8_VECTOR && !TARGET_VSX) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) + error ("-mpower8-vector requires -mvsx"); + rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR; + } + if (TARGET_VSX_TIMODE && !TARGET_VSX) { if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) @@ -3019,16 +3109,19 @@ rs6000_option_override_internal (bool global_init_p) && rs6000_cpu != PROCESSOR_POWER5 && rs6000_cpu != PROCESSOR_POWER6 && rs6000_cpu != PROCESSOR_POWER7 + && rs6000_cpu != PROCESSOR_POWER8 && rs6000_cpu != PROCESSOR_PPCA2 && rs6000_cpu != PROCESSOR_CELL && rs6000_cpu != PROCESSOR_PPC476); rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5 - || rs6000_cpu == PROCESSOR_POWER7); + || rs6000_cpu == PROCESSOR_POWER7 + || rs6000_cpu == PROCESSOR_POWER8); rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5 || rs6000_cpu == PROCESSOR_POWER6 || rs6000_cpu == PROCESSOR_POWER7 + || rs6000_cpu == PROCESSOR_POWER8 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64 || rs6000_cpu == PROCESSOR_PPCE5500 @@ -3272,6 +3365,10 @@ rs6000_option_override_internal (bool global_init_p) rs6000_cost = &power7_cost; break; + case PROCESSOR_POWER8: + rs6000_cost = &power8_cost; + break; + case PROCESSOR_PPCA2: rs6000_cost = &ppca2_cost; break; @@ -3444,7 +3541,8 @@ rs6000_loop_align (rtx label) && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5 || rs6000_cpu == PROCESSOR_POWER6 - || rs6000_cpu == PROCESSOR_POWER7)) + || rs6000_cpu == PROCESSOR_POWER7 + || rs6000_cpu == PROCESSOR_POWER8)) return 5; else return align_loops_log; @@ -10578,6 +10676,27 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) return const0_rtx; } } + else if (icode == CODE_FOR_crypto_vshasigmaw + || icode == CODE_FOR_crypto_vshasigmad) + { + /* Check whether the 2nd and 3rd arguments are integer constants and in + range and prepare arguments. */ + STRIP_NOPS (arg1); + if (TREE_CODE (arg1) != INTEGER_CST + || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1)) + { + error ("argument 2 must be 0 or 1"); + return const0_rtx; + } + + STRIP_NOPS (arg2); + if (TREE_CODE (arg2) != INTEGER_CST + || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 15)) + { + error ("argument 3 must be in the range 0..15"); + return const0_rtx; + } + } if (target == 0 || GET_MODE (target) != tmode @@ -12268,6 +12387,10 @@ altivec_init_builtins (void) = build_function_type_list (integer_type_node, integer_type_node, V4SI_type_node, V4SI_type_node, NULL_TREE); + tree int_ftype_int_v2di_v2di + = build_function_type_list (integer_type_node, + integer_type_node, V2DI_type_node, + V2DI_type_node, NULL_TREE); tree void_ftype_v4si = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE); tree v8hi_ftype_void @@ -12350,6 +12473,8 @@ altivec_init_builtins (void) = build_function_type_list (integer_type_node, integer_type_node, V2DF_type_node, V2DF_type_node, NULL_TREE); + tree v2di_ftype_v2di + = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); tree v4si_ftype_v4si = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); tree v8hi_ftype_v8hi @@ -12485,6 +12610,9 @@ altivec_init_builtins (void) case VOIDmode: type = int_ftype_int_opaque_opaque; break; + case V2DImode: + type = int_ftype_int_v2di_v2di; + break; case V4SImode: type = int_ftype_int_v4si_v4si; break; @@ -12518,6 +12646,9 @@ altivec_init_builtins (void) switch (mode0) { + case V2DImode: + type = v2di_ftype_v2di; + break; case V4SImode: type = v4si_ftype_v4si; break; @@ -12723,11 +12854,26 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0, are type correct. */ switch (builtin) { + /* unsigned 1 argument functions. */ + case CRYPTO_BUILTIN_VSBOX: + h.uns_p[0] = 1; + h.uns_p[1] = 1; + break; + /* unsigned 2 argument functions. */ case ALTIVEC_BUILTIN_VMULEUB_UNS: case ALTIVEC_BUILTIN_VMULEUH_UNS: case ALTIVEC_BUILTIN_VMULOUB_UNS: case ALTIVEC_BUILTIN_VMULOUH_UNS: + case CRYPTO_BUILTIN_VCIPHER: + case CRYPTO_BUILTIN_VCIPHERLAST: + case CRYPTO_BUILTIN_VNCIPHER: + case CRYPTO_BUILTIN_VNCIPHERLAST: + case CRYPTO_BUILTIN_VPMSUMB: + case CRYPTO_BUILTIN_VPMSUMH: + case CRYPTO_BUILTIN_VPMSUMW: + case CRYPTO_BUILTIN_VPMSUMD: + case CRYPTO_BUILTIN_VPMSUM: h.uns_p[0] = 1; h.uns_p[1] = 1; h.uns_p[2] = 1; @@ -12750,6 +12896,14 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0, case VSX_BUILTIN_XXSEL_8HI_UNS: case VSX_BUILTIN_XXSEL_4SI_UNS: case VSX_BUILTIN_XXSEL_2DI_UNS: + case CRYPTO_BUILTIN_VPERMXOR: + case CRYPTO_BUILTIN_VPERMXOR_V2DI: + case CRYPTO_BUILTIN_VPERMXOR_V4SI: + case CRYPTO_BUILTIN_VPERMXOR_V8HI: + case CRYPTO_BUILTIN_VPERMXOR_V16QI: + case CRYPTO_BUILTIN_VSHASIGMAW: + case CRYPTO_BUILTIN_VSHASIGMAD: + case CRYPTO_BUILTIN_VSHASIGMA: h.uns_p[0] = 1; h.uns_p[1] = 1; h.uns_p[2] = 1; @@ -12891,8 +13045,23 @@ rs6000_common_init_builtins (void) else { enum insn_code icode = d->icode; - if (d->name == 0 || icode == CODE_FOR_nothing) - continue; + if (d->name == 0) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n", + (long unsigned)i); + + continue; + } + + if (icode == CODE_FOR_nothing) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n", + d->name); + + continue; + } type = builtin_function_type (insn_data[icode].operand[0].mode, insn_data[icode].operand[1].mode, @@ -12931,8 +13100,23 @@ rs6000_common_init_builtins (void) else { enum insn_code icode = d->icode; - if (d->name == 0 || icode == CODE_FOR_nothing) - continue; + if (d->name == 0) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n", + (long unsigned)i); + + continue; + } + + if (icode == CODE_FOR_nothing) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n", + d->name); + + continue; + } mode0 = insn_data[icode].operand[0].mode; mode1 = insn_data[icode].operand[1].mode; @@ -12993,8 +13177,23 @@ rs6000_common_init_builtins (void) else { enum insn_code icode = d->icode; - if (d->name == 0 || icode == CODE_FOR_nothing) - continue; + if (d->name == 0) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n", + (long unsigned)i); + + continue; + } + + if (icode == CODE_FOR_nothing) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n", + d->name); + + continue; + } mode0 = insn_data[icode].operand[0].mode; mode1 = insn_data[icode].operand[1].mode; @@ -22951,6 +23150,7 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) || rs6000_cpu_attr == CPU_POWER4 || rs6000_cpu_attr == CPU_POWER5 || rs6000_cpu_attr == CPU_POWER7 + || rs6000_cpu_attr == CPU_POWER8 || rs6000_cpu_attr == CPU_CELL) && recog_memoized (dep_insn) && (INSN_CODE (dep_insn) >= 0)) @@ -23537,6 +23737,8 @@ rs6000_issue_rate (void) case CPU_POWER6: case CPU_POWER7: return 5; + case CPU_POWER8: + return 7; default: return 1; } @@ -24130,6 +24332,7 @@ insn_must_be_first_in_group (rtx insn) } break; case PROCESSOR_POWER7: + case PROCESSOR_POWER8: /* FIXME */ type = get_attr_type (insn); switch (type) @@ -24226,6 +24429,7 @@ insn_must_be_last_in_group (rtx insn) } break; case PROCESSOR_POWER7: + case PROCESSOR_POWER8: /* FIXME */ type = get_attr_type (insn); switch (type) @@ -24332,7 +24536,8 @@ force_new_group (int sched_verbose, FILE *dump, rtx *group_insns, can_issue_more--; /* Power6 and Power7 have special group ending nop. */ - if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7) + if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7 + || rs6000_cpu_attr == CPU_POWER8) { nop = gen_group_ending_nop (); emit_insn_before (nop, next_insn); @@ -26513,7 +26718,8 @@ rs6000_register_move_cost (enum machine_mode mode, /* For those processors that have slow LR/CTR moves, make them more expensive than memory in order to bias spills to memory .*/ else if ((rs6000_cpu == PROCESSOR_POWER6 - || rs6000_cpu == PROCESSOR_POWER7) + || rs6000_cpu == PROCESSOR_POWER7 + || rs6000_cpu == PROCESSOR_POWER8) && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS)) ret = 6 * hard_regno_nregs[0][mode]; @@ -27742,6 +27948,8 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { { "altivec", OPTION_MASK_ALTIVEC, false, true }, { "cmpb", OPTION_MASK_CMPB, false, true }, + { "crypto", OPTION_MASK_CRYPTO, false, true }, + { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true }, { "dlmzb", OPTION_MASK_DLMZB, false, true }, { "fprnd", OPTION_MASK_FPRND, false, true }, { "hard-dfp", OPTION_MASK_DFP, false, true }, @@ -27750,13 +27958,17 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "mfpgpr", OPTION_MASK_MFPGPR, false, true }, { "mulhw", OPTION_MASK_MULHW, false, true }, { "multiple", OPTION_MASK_MULTIPLE, false, true }, - { "update", OPTION_MASK_NO_UPDATE, true , true }, { "popcntb", OPTION_MASK_POPCNTB, false, true }, { "popcntd", OPTION_MASK_POPCNTD, false, true }, + { "power8-fusion", OPTION_MASK_P8_FUSION, false, true }, + { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true }, + { "power8-vector", OPTION_MASK_P8_VECTOR, false, true }, { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true }, { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true }, + { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true }, { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true }, { "string", OPTION_MASK_STRING, false, true }, + { "update", OPTION_MASK_NO_UPDATE, true , true }, { "vsx", OPTION_MASK_VSX, false, true }, { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true }, #ifdef OPTION_MASK_64BIT @@ -27798,6 +28010,8 @@ static struct rs6000_opt_mask const rs6000_builtin_mask_names[] = { "frsqrtes", RS6000_BTM_FRSQRTES, false, false }, { "popcntd", RS6000_BTM_POPCNTD, false, false }, { "cell", RS6000_BTM_CELL, false, false }, + { "power8-vector", RS6000_BTM_P8_VECTOR, false, false }, + { "crypto", RS6000_BTM_CRYPTO, false, false }, }; /* Option variables that we want to support inside attribute((target)) and diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 4c24b56924d..76f3bf99250 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -92,7 +92,7 @@ #ifdef HAVE_AS_POWER8 #define ASM_CPU_POWER8_SPEC "-mpower8" #else -#define ASM_CPU_POWER8_SPEC "-mpower4 -maltivec" +#define ASM_CPU_POWER8_SPEC ASM_CPU_POWER7_SPEC #endif #ifdef HAVE_AS_DCI @@ -164,6 +164,7 @@ %{mcpu=e6500: -me6500} \ %{maltivec: -maltivec} \ %{mvsx: -mvsx %{!maltivec: -maltivec} %{!mcpu*: %(asm_cpu_power7)}} \ +%{mpower8-vector|mcrypto|mdirect-move: %{!mcpu*: %(asm_cpu_power8)}} \ -many" #define CPP_DEFAULT_SPEC "" @@ -277,6 +278,19 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define TARGET_POPCNTD 0 #endif +/* Define the ISA 2.07 flags as 0 if the target assembler does not support the + waitasecond instruction. Allow -mpower8-fusion, since it does not add new + instructions. */ + +#ifndef HAVE_AS_POWER8 +#undef TARGET_DIRECT_MOVE +#undef TARGET_CRYPTO +#undef TARGET_P8_VECTOR +#define TARGET_DIRECT_MOVE 0 +#define TARGET_CRYPTO 0 +#define TARGET_P8_VECTOR 0 +#endif + /* Define TARGET_LWSYNC_INSTRUCTION if the assembler knows about lwsync. If not, generate the lwsync code as an integer constant. */ #ifdef HAVE_AS_LWSYNC @@ -386,6 +400,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define TARGET_DEBUG_TARGET (rs6000_debug & MASK_DEBUG_TARGET) #define TARGET_DEBUG_BUILTIN (rs6000_debug & MASK_DEBUG_BUILTIN) +/* Describe the vector unit used for arithmetic operations. */ extern enum rs6000_vector rs6000_vector_unit[]; #define VECTOR_UNIT_NONE_P(MODE) \ @@ -394,12 +409,25 @@ extern enum rs6000_vector rs6000_vector_unit[]; #define VECTOR_UNIT_VSX_P(MODE) \ (rs6000_vector_unit[(MODE)] == VECTOR_VSX) +#define VECTOR_UNIT_P8_VECTOR_P(MODE) \ + (rs6000_vector_unit[(MODE)] == VECTOR_P8_VECTOR) + #define VECTOR_UNIT_ALTIVEC_P(MODE) \ (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC) +#define VECTOR_UNIT_VSX_OR_P8_VECTOR_P(MODE) \ + (IN_RANGE ((int)rs6000_vector_unit[(MODE)], \ + (int)VECTOR_VSX, \ + (int)VECTOR_P8_VECTOR)) + +/* VECTOR_UNIT_ALTIVEC_OR_VSX_P is used in places where we are using either + altivec (VMX) or VSX vector instructions. P8 vector support is upwards + compatible, so allow it as well, rather than changing all of the uses of the + macro. */ #define VECTOR_UNIT_ALTIVEC_OR_VSX_P(MODE) \ - (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC \ - || rs6000_vector_unit[(MODE)] == VECTOR_VSX) + (IN_RANGE ((int)rs6000_vector_unit[(MODE)], \ + (int)VECTOR_ALTIVEC, \ + (int)VECTOR_P8_VECTOR)) /* Describe whether to use VSX loads or Altivec loads. For now, just use the same unit as the vector unit we are using, but we may want to migrate to @@ -412,12 +440,21 @@ extern enum rs6000_vector rs6000_vector_mem[]; #define VECTOR_MEM_VSX_P(MODE) \ (rs6000_vector_mem[(MODE)] == VECTOR_VSX) +#define VECTOR_MEM_P8_VECTOR_P(MODE) \ + (rs6000_vector_mem[(MODE)] == VECTOR_VSX) + #define VECTOR_MEM_ALTIVEC_P(MODE) \ (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC) +#define VECTOR_MEM_VSX_OR_P8_VECTOR_P(MODE) \ + (IN_RANGE ((int)rs6000_vector_mem[(MODE)], \ + (int)VECTOR_VSX, \ + (int)VECTOR_P8_VECTOR)) + #define VECTOR_MEM_ALTIVEC_OR_VSX_P(MODE) \ - (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC \ - || rs6000_vector_mem[(MODE)] == VECTOR_VSX) + (IN_RANGE ((int)rs6000_vector_mem[(MODE)], \ + (int)VECTOR_ALTIVEC, \ + (int)VECTOR_P8_VECTOR)) /* Return the alignment of a given vector type, which is set based on the vector unit use. VSX for instance can load 32 or 64 bit aligned words @@ -479,6 +516,15 @@ extern int rs6000_vector_align[]; #define TARGET_FCTIDUZ TARGET_POPCNTD #define TARGET_FCTIWUZ TARGET_POPCNTD +#define TARGET_XSCVDPSPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR) +#define TARGET_XSCVSPDPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR) + +/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present + in power7, so conditionalize them on p8 features. TImode syncs need quad + memory support. */ +#define TARGET_SYNC_HI_QI (TARGET_QUAD_MEMORY || TARGET_DIRECT_MOVE) +#define TARGET_SYNC_TI TARGET_QUAD_MEMORY + /* Power7 has both 32-bit load and store integer for the FPRs, so we don't need to allocate the SDmode stack slot to get the value into the proper location in the register. */ @@ -489,10 +535,13 @@ extern int rs6000_vector_align[]; OPTION_MASK_<xxx> back into MASK_<xxx>. */ #define MASK_ALTIVEC OPTION_MASK_ALTIVEC #define MASK_CMPB OPTION_MASK_CMPB +#define MASK_CRYPTO OPTION_MASK_CRYPTO #define MASK_DFP OPTION_MASK_DFP +#define MASK_DIRECT_MOVE OPTION_MASK_DIRECT_MOVE #define MASK_DLMZB OPTION_MASK_DLMZB #define MASK_EABI OPTION_MASK_EABI #define MASK_FPRND OPTION_MASK_FPRND +#define MASK_P8_FUSION OPTION_MASK_P8_FUSION #define MASK_HARD_FLOAT OPTION_MASK_HARD_FLOAT #define MASK_ISEL OPTION_MASK_ISEL #define MASK_MFCRF OPTION_MASK_MFCRF @@ -500,6 +549,7 @@ extern int rs6000_vector_align[]; #define MASK_MULHW OPTION_MASK_MULHW #define MASK_MULTIPLE OPTION_MASK_MULTIPLE #define MASK_NO_UPDATE OPTION_MASK_NO_UPDATE +#define MASK_P8_VECTOR OPTION_MASK_P8_VECTOR #define MASK_POPCNTB OPTION_MASK_POPCNTB #define MASK_POPCNTD OPTION_MASK_POPCNTD #define MASK_PPC_GFXOPT OPTION_MASK_PPC_GFXOPT @@ -1002,7 +1052,9 @@ extern unsigned rs6000_pointer_size; #define REG_ALLOC_ORDER \ {32, \ - 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, \ + /* move fr13 (ie 45) later, so if we need TFmode, it does */ \ + /* not use fr14 which is a saved register. */ \ + 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 45, \ 33, \ 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \ 50, 49, 48, 47, 46, \ @@ -1062,8 +1114,14 @@ extern unsigned rs6000_pointer_size; #define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N) /* Alternate name for any vector register supporting logical operations, no - matter which instruction set(s) are available. */ -#define VLOGICAL_REGNO_P(N) VFLOAT_REGNO_P (N) + matter which instruction set(s) are available. Under VSX, we allow GPRs as + well as vector registers on 64-bit systems. We don't allow 32-bit systems, + due to the number of registers involved, and the number of instructions to + load/store the values.. */ +#define VLOGICAL_REGNO_P(N) \ + (ALTIVEC_REGNO_P (N) \ + || (TARGET_VSX && FP_REGNO_P (N)) \ + || (TARGET_VSX && TARGET_POWERPC64 && INT_REGNO_P (N))) /* Return number of consecutive hard regs needed starting at reg REGNO to hold something of mode MODE. */ @@ -1124,7 +1182,7 @@ extern unsigned rs6000_pointer_size; when one has mode MODE1 and one has mode MODE2. If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, for any hard reg, then this must be 0 for correct output. */ -#define MODES_TIEABLE_P(MODE1, MODE2) \ +#define MODES_TIEABLE_P(MODE1, MODE2) \ (SCALAR_FLOAT_MODE_P (MODE1) \ ? SCALAR_FLOAT_MODE_P (MODE2) \ : SCALAR_FLOAT_MODE_P (MODE2) \ @@ -1137,14 +1195,14 @@ extern unsigned rs6000_pointer_size; ? SPE_VECTOR_MODE (MODE2) \ : SPE_VECTOR_MODE (MODE2) \ ? SPE_VECTOR_MODE (MODE1) \ - : ALTIVEC_VECTOR_MODE (MODE1) \ - ? ALTIVEC_VECTOR_MODE (MODE2) \ - : ALTIVEC_VECTOR_MODE (MODE2) \ - ? ALTIVEC_VECTOR_MODE (MODE1) \ : ALTIVEC_OR_VSX_VECTOR_MODE (MODE1) \ ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE2) \ : ALTIVEC_OR_VSX_VECTOR_MODE (MODE2) \ ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE1) \ + : ALTIVEC_VECTOR_MODE (MODE1) \ + ? ALTIVEC_VECTOR_MODE (MODE2) \ + : ALTIVEC_VECTOR_MODE (MODE2) \ + ? ALTIVEC_VECTOR_MODE (MODE1) \ : 1) /* Post-reload, we can't use any new AltiVec registers, as we already @@ -1337,8 +1395,11 @@ enum r6000_reg_class_enum { RS6000_CONSTRAINT_wg, /* FPR register for -mmfpgpr */ RS6000_CONSTRAINT_wf, /* VSX register for V4SF */ RS6000_CONSTRAINT_wl, /* FPR register for LFIWAX */ + RS6000_CONSTRAINT_wm, /* VSX register for direct move */ + RS6000_CONSTRAINT_wr, /* GPR register if 64-bit */ RS6000_CONSTRAINT_ws, /* VSX register for DF */ RS6000_CONSTRAINT_wt, /* VSX register for TImode */ + RS6000_CONSTRAINT_wv, /* Altivec register for power8 vector */ RS6000_CONSTRAINT_wx, /* FPR register for STFIWX */ RS6000_CONSTRAINT_wz, /* FPR register for LFIWZX */ RS6000_CONSTRAINT_MAX @@ -2372,6 +2433,8 @@ extern int frame_pointer_needed; #define RS6000_BTM_ALWAYS 0 /* Always enabled. */ #define RS6000_BTM_ALTIVEC MASK_ALTIVEC /* VMX/altivec vectors. */ #define RS6000_BTM_VSX MASK_VSX /* VSX (vector/scalar). */ +#define RS6000_BTM_P8_VECTOR MASK_P8_VECTOR /* ISA 2.07 vector. */ +#define RS6000_BTM_CRYPTO MASK_CRYPTO /* crypto funcs. */ #define RS6000_BTM_SPE MASK_STRING /* E500 */ #define RS6000_BTM_PAIRED MASK_MULHW /* 750CL paired insns. */ #define RS6000_BTM_FRE MASK_POPCNTB /* FRE instruction. */ @@ -2383,6 +2446,8 @@ extern int frame_pointer_needed; #define RS6000_BTM_COMMON (RS6000_BTM_ALTIVEC \ | RS6000_BTM_VSX \ + | RS6000_BTM_P8_VECTOR \ + | RS6000_BTM_CRYPTO \ | RS6000_BTM_FRE \ | RS6000_BTM_FRES \ | RS6000_BTM_FRSQRTE \ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 1e65ac1cde0..013a0e38551 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -146,7 +146,7 @@ ;; Define an insn type attribute. This is used in function unit delay ;; computations. -(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt" +(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt,crypto" (const_string "integer")) ;; Define floating point instruction sub-types for use with Xfpu.md @@ -168,7 +168,7 @@ ;; Processor type -- this attribute must exactly match the processor_type ;; enumeration in rs6000.h. -(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan" +(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan,power8" (const (symbol_ref "rs6000_cpu_attr"))) @@ -14788,7 +14788,7 @@ (match_operand:P 2 "gpc_reg_operand" "r")] UNSPEC_BPERM))] "TARGET_POPCNTD" "bpermd %0,%1,%2" - [(set_attr "type" "integer")]) + [(set_attr "type" "popcnt")]) ;; Builtin fma support. Handle @@ -14931,3 +14931,4 @@ (include "spe.md") (include "dfp.md") (include "paired.md") +(include "crypto.md") diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 17b77629fa1..9a078198130 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -517,4 +517,28 @@ Control whether we save the TOC in the prologue for indirect calls or generate t mvsx-timode Target Undocumented Mask(VSX_TIMODE) Var(rs6000_isa_flags) -; Allow/disallow TImode in VSX registers +Allow 128-bit integers in VSX registers + +mpower8-fusion +Target Report Mask(P8_FUSION) Var(rs6000_isa_flags) +Fuse certain integer operations together for better performance on power8 + +mpower8-fusion-sign +Target Undocumented Mask(P8_FUSION_SIGN) Var(rs6000_isa_flags) +Allow sign extension in fusion operations + +mpower8-vector +Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags) +Use/do not use vector and scalar instructions added in ISA 2.07. + +mcrypto +Target Report Mask(CRYPTO) Var(rs6000_isa_flags) +Use ISA 2.07 crypto instructions + +mdirect-move +Target Report Mask(DIRECT_MOVE) Var(rs6000_isa_flags) +Use ISA 2.07 direct move between GPR & VSX register instructions + +mquad-memory +Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags) +Generate the quad word memory instructions (lq/stq/lqarx/stqcx). diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000 index 52c18391556..ecfdf0eee62 100644 --- a/gcc/config/rs6000/t-rs6000 +++ b/gcc/config/rs6000/t-rs6000 @@ -70,6 +70,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \ $(srcdir)/config/rs6000/vector.md \ $(srcdir)/config/rs6000/vsx.md \ $(srcdir)/config/rs6000/altivec.md \ + $(srcdir)/config/rs6000/crypto.md \ $(srcdir)/config/rs6000/spe.md \ $(srcdir)/config/rs6000/dfp.md \ $(srcdir)/config/rs6000/paired.md |