diff options
Diffstat (limited to 'gcc/config/rs6000')
-rw-r--r-- | gcc/config/rs6000/aix43.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/aix51.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/aix52.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/aix53.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/aix61.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/aix71.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/altivec.h | 8 | ||||
-rw-r--r-- | gcc/config/rs6000/altivec.md | 47 | ||||
-rw-r--r-- | gcc/config/rs6000/power9.md | 8 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-builtin.def | 192 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-c.c | 339 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 432 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.md | 198 | ||||
-rw-r--r-- | gcc/config/rs6000/vsx.md | 469 | ||||
-rw-r--r-- | gcc/config/rs6000/xcoff.h | 2 |
16 files changed, 1211 insertions, 498 deletions
diff --git a/gcc/config/rs6000/aix43.h b/gcc/config/rs6000/aix43.h index d61956d3b28..bd8a2c08da2 100644 --- a/gcc/config/rs6000/aix43.h +++ b/gcc/config/rs6000/aix43.h @@ -39,7 +39,7 @@ do { \ { \ error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \ } \ -} while (0); +} while (0) #undef ASM_SPEC #define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)" diff --git a/gcc/config/rs6000/aix51.h b/gcc/config/rs6000/aix51.h index af7e38e1775..e9f88df23da 100644 --- a/gcc/config/rs6000/aix51.h +++ b/gcc/config/rs6000/aix51.h @@ -33,7 +33,7 @@ do { \ { \ error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \ } \ -} while (0); +} while (0) #undef ASM_SPEC #define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)" diff --git a/gcc/config/rs6000/aix52.h b/gcc/config/rs6000/aix52.h index 35d2286e5b3..eade64c5f64 100644 --- a/gcc/config/rs6000/aix52.h +++ b/gcc/config/rs6000/aix52.h @@ -39,7 +39,7 @@ do { \ { \ error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \ } \ -} while (0); +} while (0) #undef ASM_SPEC #define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)" diff --git a/gcc/config/rs6000/aix53.h b/gcc/config/rs6000/aix53.h index a1fbd834d47..b16488b3ba3 100644 --- a/gcc/config/rs6000/aix53.h +++ b/gcc/config/rs6000/aix53.h @@ -39,7 +39,7 @@ do { \ { \ error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \ } \ -} while (0); +} while (0) #undef ASM_SPEC #define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)" diff --git a/gcc/config/rs6000/aix61.h b/gcc/config/rs6000/aix61.h index 0b615057087..25fcb62ea95 100644 --- a/gcc/config/rs6000/aix61.h +++ b/gcc/config/rs6000/aix61.h @@ -56,7 +56,7 @@ do { \ { \ rs6000_current_cmodel = CMODEL_LARGE; \ } \ -} while (0); +} while (0) #undef ASM_SPEC #define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)" diff --git a/gcc/config/rs6000/aix71.h b/gcc/config/rs6000/aix71.h index 4b986d6a818..3b8de897ede 100644 --- a/gcc/config/rs6000/aix71.h +++ b/gcc/config/rs6000/aix71.h @@ -56,7 +56,7 @@ do { \ { \ rs6000_current_cmodel = CMODEL_LARGE; \ } \ -} while (0); +} while (0) #undef ASM_SPEC #define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)" diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 94a4db24a78..068dfef2e00 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -357,6 +357,7 @@ #define vec_xl __builtin_vec_vsx_ld #define vec_xl_be __builtin_vec_xl_be #define vec_xst __builtin_vec_vsx_st +#define vec_xst_be __builtin_vec_xst_be /* Note, xxsldi and xxpermdi were added as __builtin_vsx_<xxx> functions instead of __builtin_vec_<xxx> */ @@ -415,10 +416,15 @@ #define vec_vsubuqm __builtin_vec_vsubuqm #define vec_vupkhsw __builtin_vec_vupkhsw #define vec_vupklsw __builtin_vec_vupklsw +#define vec_revb __builtin_vec_revb #endif #ifdef __POWER9_VECTOR__ /* Vector additions added in ISA 3.0. */ +#define vec_first_match_index __builtin_vec_first_match_index +#define vec_first_match_or_eos_index __builtin_vec_first_match_or_eos_index +#define vec_first_mismatch_index __builtin_vec_first_mismatch_index +#define vec_first_mismatch_or_eos_index __builtin_vec_first_mismatch_or_eos_index #define vec_pack_to_short_fp32 __builtin_vec_convert_4f32_8i16 #define vec_parity_lsbb __builtin_vec_vparity_lsbb #define vec_vctz __builtin_vec_vctz @@ -478,8 +484,6 @@ #define vec_xlx __builtin_vec_vextulx #define vec_xrx __builtin_vec_vexturx - -#define vec_revb __builtin_vec_revb #endif /* Predicates. diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index d0fcd1c3d8a..7122f99bffd 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2130,7 +2130,7 @@ }) ;; Slightly prefer vperm, since the target does not overlap the source -(define_insn "*altivec_vperm_<mode>_internal" +(define_insn "altivec_vperm_<mode>_direct" [(set (match_operand:VM 0 "register_operand" "=v,?wo") (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") (match_operand:VM 2 "register_operand" "v,0") @@ -4020,7 +4020,7 @@ "TARGET_P9_VECTOR") ;; Vector absolute difference unsigned -(define_insn "*p9_vadu<mode>3" +(define_insn "p9_vadu<mode>3" [(set (match_operand:VI 0 "register_operand" "=v") (unspec:VI [(match_operand:VI 1 "register_operand" "v") (match_operand:VI 2 "register_operand" "v")] @@ -4184,6 +4184,49 @@ "vbpermd %0,%1,%2" [(set_attr "type" "vecsimple")]) +;; Support for SAD (sum of absolute differences). + +;; Due to saturating semantics, we can't combine the sum-across +;; with the vector accumulate in vsum4ubs. A vadduwm is needed. +(define_expand "usadv16qi" + [(use (match_operand:V4SI 0 "register_operand")) + (use (match_operand:V16QI 1 "register_operand")) + (use (match_operand:V16QI 2 "register_operand")) + (use (match_operand:V4SI 3 "register_operand"))] + "TARGET_P9_VECTOR" +{ + rtx absd = gen_reg_rtx (V16QImode); + rtx zero = gen_reg_rtx (V4SImode); + rtx psum = gen_reg_rtx (V4SImode); + + emit_insn (gen_p9_vaduv16qi3 (absd, operands[1], operands[2])); + emit_insn (gen_altivec_vspltisw (zero, const0_rtx)); + emit_insn (gen_altivec_vsum4ubs (psum, absd, zero)); + emit_insn (gen_addv4si3 (operands[0], psum, operands[3])); + DONE; +}) + +;; Since vsum4shs is saturating and further performs signed +;; arithmetic, we can't combine the sum-across with the vector +;; accumulate in vsum4shs. A vadduwm is needed. +(define_expand "usadv8hi" + [(use (match_operand:V4SI 0 "register_operand")) + (use (match_operand:V8HI 1 "register_operand")) + (use (match_operand:V8HI 2 "register_operand")) + (use (match_operand:V4SI 3 "register_operand"))] + "TARGET_P9_VECTOR" +{ + rtx absd = gen_reg_rtx (V8HImode); + rtx zero = gen_reg_rtx (V4SImode); + rtx psum = gen_reg_rtx (V4SImode); + + emit_insn (gen_p9_vaduv8hi3 (absd, operands[1], operands[2])); + emit_insn (gen_altivec_vspltisw (zero, const0_rtx)); + emit_insn (gen_altivec_vsum4shs (psum, absd, zero)); + emit_insn (gen_addv4si3 (operands[0], psum, operands[3])); + DONE; +}) + ;; Decimal Integer operations (define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB]) diff --git a/gcc/config/rs6000/power9.md b/gcc/config/rs6000/power9.md index 217864faaed..82e4b1cf65c 100644 --- a/gcc/config/rs6000/power9.md +++ b/gcc/config/rs6000/power9.md @@ -434,7 +434,13 @@ (and (eq_attr "type" "vecdiv") (eq_attr "size" "128") (eq_attr "cpu" "power9")) - "DU_super_power9,dfu_power9") + "DU_super_power9,dfu_power9*44") + +(define_insn_reservation "power9-qpmul" 24 + (and (eq_attr "type" "qmul") + (eq_attr "size" "128") + (eq_attr "cpu" "power9")) + "DU_super_power9,dfu_power9*12") (define_insn_reservation "power9-mffgpr" 2 (and (eq_attr "type" "mffgpr") diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index c8a425cba7e..cfb6e55edc0 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -660,48 +660,6 @@ | RS6000_BTC_BINARY), \ CODE_FOR_ ## ICODE) /* ICODE */ -/* IEEE 128-bit floating-point builtins. */ -#define BU_FLOAT128_2(ENUM, NAME, ATTR, ICODE) \ - RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ - "__builtin_" NAME, /* NAME */ \ - RS6000_BTM_FLOAT128, /* MASK */ \ - (RS6000_BTC_ ## ATTR /* ATTR */ \ - | RS6000_BTC_BINARY), \ - CODE_FOR_ ## ICODE) /* ICODE */ - -#define BU_FLOAT128_1(ENUM, NAME, ATTR, ICODE) \ - RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ - "__builtin_" NAME, /* NAME */ \ - RS6000_BTM_FLOAT128, /* MASK */ \ - (RS6000_BTC_ ## ATTR /* ATTR */ \ - | RS6000_BTC_UNARY), \ - CODE_FOR_ ## ICODE) /* ICODE */ - -/* IEEE 128-bit floating-point builtins that need the ISA 3.0 hardware. */ -#define BU_FLOAT128_1_HW(ENUM, NAME, ATTR, ICODE) \ - RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ - "__builtin_" NAME, /* NAME */ \ - RS6000_BTM_FLOAT128_HW, /* MASK */ \ - (RS6000_BTC_ ## ATTR /* ATTR */ \ - | RS6000_BTC_UNARY), \ - CODE_FOR_ ## ICODE) /* ICODE */ - -#define BU_FLOAT128_2_HW(ENUM, NAME, ATTR, ICODE) \ - RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ - "__builtin_" NAME, /* NAME */ \ - RS6000_BTM_FLOAT128_HW, /* MASK */ \ - (RS6000_BTC_ ## ATTR /* ATTR */ \ - | RS6000_BTC_BINARY), \ - CODE_FOR_ ## ICODE) /* ICODE */ - -#define BU_FLOAT128_3_HW(ENUM, NAME, ATTR, ICODE) \ - RS6000_BUILTIN_3 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ - "__builtin_" NAME, /* NAME */ \ - RS6000_BTM_FLOAT128_HW, /* MASK */ \ - (RS6000_BTC_ ## ATTR /* ATTR */ \ - | RS6000_BTC_TERNARY), \ - CODE_FOR_ ## ICODE) /* ICODE */ - /* Miscellaneous builtins for instructions added in ISA 3.0. These instructions don't require either the DFP or VSX options, just the basic ISA 3.0 enablement since they operate on general purpose registers. */ @@ -951,6 +909,51 @@ | RS6000_BTC_BINARY), \ CODE_FOR_nothing) /* ICODE */ +/* Built-in functions for IEEE 128-bit hardware floating point. IEEE 128-bit + hardware requires p9-vector and 64-bit operation. These functions use just + __builtin_ as the prefix. */ +#define BU_FLOAT128_HW_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (FLOAT128_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_FLOAT128_HW, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_FLOAT128_HW_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (FLOAT128_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_FLOAT128_HW, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_FLOAT128_HW_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (FLOAT128_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_FLOAT128_HW, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* Built-in functions for IEEE 128-bit hardware floating point. These + functions use __builtin_vsx_ as the prefix. */ +#define BU_FLOAT128_HW_VSX_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_FLOAT128_HW, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_FLOAT128_HW_VSX_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_FLOAT128_HW, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + #endif @@ -1121,6 +1124,7 @@ BU_ALTIVEC_2 (VSUM4SBS, "vsum4sbs", CONST, altivec_vsum4sbs) BU_ALTIVEC_2 (VSUM4SHS, "vsum4shs", CONST, altivec_vsum4shs) BU_ALTIVEC_2 (VSUM2SWS, "vsum2sws", CONST, altivec_vsum2sws) BU_ALTIVEC_2 (VSUMSWS, "vsumsws", CONST, altivec_vsumsws) +BU_ALTIVEC_2 (VSUMSWS_BE, "vsumsws_be", CONST, altivec_vsumsws_direct) BU_ALTIVEC_2 (VXOR, "vxor", CONST, xorv4si3) BU_ALTIVEC_2 (COPYSIGN_V4SF, "copysignfp", CONST, vector_copysignv4sf3) @@ -1770,14 +1774,6 @@ BU_VSX_X (LXVW4X_V4SF, "lxvw4x_v4sf", MEM) BU_VSX_X (LXVW4X_V4SI, "lxvw4x_v4si", MEM) BU_VSX_X (LXVW4X_V8HI, "lxvw4x_v8hi", MEM) BU_VSX_X (LXVW4X_V16QI, "lxvw4x_v16qi", MEM) - -BU_VSX_X (XL_BE_V16QI, "xl_be_v16qi", MEM) -BU_VSX_X (XL_BE_V8HI, "xl_be_v8hi", MEM) -BU_VSX_X (XL_BE_V4SI, "xl_be_v4si", MEM) -BU_VSX_X (XL_BE_V2DI, "xl_be_v2di", MEM) -BU_VSX_X (XL_BE_V4SF, "xl_be_v4sf", MEM) -BU_VSX_X (XL_BE_V2DF, "xl_be_v2df", MEM) - BU_VSX_X (STXSDX, "stxsdx", MEM) BU_VSX_X (STXVD2X_V1TI, "stxvd2x_v1ti", MEM) BU_VSX_X (STXVD2X_V2DF, "stxvd2x_v2df", MEM) @@ -1880,6 +1876,7 @@ BU_VSX_OVERLOAD_X (ST, "st") BU_VSX_OVERLOAD_X (XL, "xl") BU_VSX_OVERLOAD_X (XL_BE, "xl_be") BU_VSX_OVERLOAD_X (XST, "xst") +BU_VSX_OVERLOAD_X (XST_BE, "xst_be") /* 1 argument builtins pre ISA 2.04. */ BU_FP_MISC_1 (FCTID, "fctid", CONST, lrintdfdi2) @@ -1892,6 +1889,13 @@ BU_P6_64BIT_2 (CMPB, "cmpb", CONST, cmpbdi3) /* 1 argument VSX instructions added in ISA 2.07. */ BU_P8V_VSX_1 (XSCVSPDPN, "xscvspdpn", CONST, vsx_xscvspdpn) BU_P8V_VSX_1 (XSCVDPSPN, "xscvdpspn", CONST, vsx_xscvdpspn) +BU_P8V_VSX_1 (REVB_V1TI, "revb_v1ti", CONST, revb_v1ti) +BU_P8V_VSX_1 (REVB_V2DI, "revb_v2di", CONST, revb_v2di) +BU_P8V_VSX_1 (REVB_V4SI, "revb_v4si", CONST, revb_v4si) +BU_P8V_VSX_1 (REVB_V8HI, "revb_v8hi", CONST, revb_v8hi) +BU_P8V_VSX_1 (REVB_V16QI, "revb_v16qi", CONST, revb_v16qi) +BU_P8V_VSX_1 (REVB_V2DF, "revb_v2df", CONST, revb_v2df) +BU_P8V_VSX_1 (REVB_V4SF, "revb_v4sf", CONST, revb_v4sf) /* 1 argument altivec instructions added in ISA 2.07. */ BU_P8V_AV_1 (ABS_V2DI, "abs_v2di", CONST, absv2di2) @@ -2001,6 +2005,7 @@ BU_P8V_OVERLOAD_1 (VPOPCNTUH, "vpopcntuh") BU_P8V_OVERLOAD_1 (VPOPCNTUW, "vpopcntuw") BU_P8V_OVERLOAD_1 (VPOPCNTUD, "vpopcntud") BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd") +BU_P8V_OVERLOAD_1 (REVB, "revb") /* ISA 2.07 vector overloaded 2 argument functions. */ BU_P8V_OVERLOAD_2 (EQV, "eqv") @@ -2040,6 +2045,31 @@ BU_P9V_AV_2 (VSLV, "vslv", CONST, vslv) BU_P9V_AV_2 (VSRV, "vsrv", CONST, vsrv) BU_P9V_AV_2 (CONVERT_4F32_8I16, "convert_4f32_8i16", CONST, convert_4f32_8i16) +BU_P9V_AV_2 (VFIRSTMATCHINDEX_V16QI, "first_match_index_v16qi", + CONST, first_match_index_v16qi) +BU_P9V_AV_2 (VFIRSTMATCHINDEX_V8HI, "first_match_index_v8hi", + CONST, first_match_index_v8hi) +BU_P9V_AV_2 (VFIRSTMATCHINDEX_V4SI, "first_match_index_v4si", + CONST, first_match_index_v4si) +BU_P9V_AV_2 (VFIRSTMATCHOREOSINDEX_V16QI, "first_match_or_eos_index_v16qi", + CONST, first_match_or_eos_index_v16qi) +BU_P9V_AV_2 (VFIRSTMATCHOREOSINDEX_V8HI, "first_match_or_eos_index_v8hi", + CONST, first_match_or_eos_index_v8hi) +BU_P9V_AV_2 (VFIRSTMATCHOREOSINDEX_V4SI, "first_match_or_eos_index_v4si", + CONST, first_match_or_eos_index_v4si) +BU_P9V_AV_2 (VFIRSTMISMATCHINDEX_V16QI, "first_mismatch_index_v16qi", + CONST, first_mismatch_index_v16qi) +BU_P9V_AV_2 (VFIRSTMISMATCHINDEX_V8HI, "first_mismatch_index_v8hi", + CONST, first_mismatch_index_v8hi) +BU_P9V_AV_2 (VFIRSTMISMATCHINDEX_V4SI, "first_mismatch_index_v4si", + CONST, first_mismatch_index_v4si) +BU_P9V_AV_2 (VFIRSTMISMATCHOREOSINDEX_V16QI, "first_mismatch_or_eos_index_v16qi", + CONST, first_mismatch_or_eos_index_v16qi) +BU_P9V_AV_2 (VFIRSTMISMATCHOREOSINDEX_V8HI, "first_mismatch_or_eos_index_v8hi", + CONST, first_mismatch_or_eos_index_v8hi) +BU_P9V_AV_2 (VFIRSTMISMATCHOREOSINDEX_V4SI, "first_mismatch_or_eos_index_v4si", + CONST, first_mismatch_or_eos_index_v4si) + /* ISA 3.0 vector overloaded 2-argument functions. */ BU_P9V_OVERLOAD_2 (VSLV, "vslv") BU_P9V_OVERLOAD_2 (VSRV, "vsrv") @@ -2072,10 +2102,10 @@ BU_P9V_OVERLOAD_3 (RLMI, "rlmi") BU_P9V_64BIT_VSX_1 (VSEEDP, "scalar_extract_exp", CONST, xsxexpdp) BU_P9V_64BIT_VSX_1 (VSESDP, "scalar_extract_sig", CONST, xsxsigdp) -BU_P9V_64BIT_VSX_1 (VSEEQP, "scalar_extract_expq", CONST, xsxexpqp) -BU_P9V_64BIT_VSX_1 (VSESQP, "scalar_extract_sigq", CONST, xsxsigqp) +BU_FLOAT128_HW_VSX_1 (VSEEQP, "scalar_extract_expq", CONST, xsxexpqp_kf) +BU_FLOAT128_HW_VSX_1 (VSESQP, "scalar_extract_sigq", CONST, xsxsigqp_kf) -BU_P9V_VSX_1 (VSTDCNQP, "scalar_test_neg_qp", CONST, xststdcnegqp) +BU_FLOAT128_HW_VSX_1 (VSTDCNQP, "scalar_test_neg_qp", CONST, xststdcnegqp_kf) BU_P9V_VSX_1 (VSTDCNDP, "scalar_test_neg_dp", CONST, xststdcnegdp) BU_P9V_VSX_1 (VSTDCNSP, "scalar_test_neg_sp", CONST, xststdcnegsp) @@ -2091,15 +2121,15 @@ BU_P9V_VSX_1 (XXBRH_V8HI, "xxbrh_v8hi", CONST, p9_xxbrh_v8hi) BU_P9V_64BIT_VSX_2 (VSIEDP, "scalar_insert_exp", CONST, xsiexpdp) BU_P9V_64BIT_VSX_2 (VSIEDPF, "scalar_insert_exp_dp", CONST, xsiexpdpf) -BU_P9V_64BIT_VSX_2 (VSIEQP, "scalar_insert_exp_q", CONST, xsiexpqp) -BU_P9V_64BIT_VSX_2 (VSIEQPF, "scalar_insert_exp_qp", CONST, xsiexpqpf) +BU_FLOAT128_HW_VSX_2 (VSIEQP, "scalar_insert_exp_q", CONST, xsiexpqp_kf) +BU_FLOAT128_HW_VSX_2 (VSIEQPF, "scalar_insert_exp_qp", CONST, xsiexpqpf_kf) BU_P9V_VSX_2 (VSCEDPGT, "scalar_cmp_exp_dp_gt", CONST, xscmpexpdp_gt) BU_P9V_VSX_2 (VSCEDPLT, "scalar_cmp_exp_dp_lt", CONST, xscmpexpdp_lt) BU_P9V_VSX_2 (VSCEDPEQ, "scalar_cmp_exp_dp_eq", CONST, xscmpexpdp_eq) BU_P9V_VSX_2 (VSCEDPUO, "scalar_cmp_exp_dp_unordered", CONST, xscmpexpdp_unordered) -BU_P9V_VSX_2 (VSTDCQP, "scalar_test_data_class_qp", CONST, xststdcqp) +BU_FLOAT128_HW_VSX_2 (VSTDCQP, "scalar_test_data_class_qp", CONST, xststdcqp_kf) BU_P9V_VSX_2 (VSTDCDP, "scalar_test_data_class_dp", CONST, xststdcdp) BU_P9V_VSX_2 (VSTDCSP, "scalar_test_data_class_sp", CONST, xststdcsp) @@ -2112,12 +2142,15 @@ BU_P9V_OVERLOAD_1 (VSTDCNQP, "scalar_test_neg_qp") BU_P9V_OVERLOAD_1 (VSTDCNDP, "scalar_test_neg_dp") BU_P9V_OVERLOAD_1 (VSTDCNSP, "scalar_test_neg_sp") -BU_P9V_OVERLOAD_1 (REVB, "revb") - BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTH, "vextract_fp_from_shorth") BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTL, "vextract_fp_from_shortl") /* ISA 3.0 vector scalar overloaded 2 argument functions. */ +BU_P9V_OVERLOAD_2 (VFIRSTMATCHINDEX, "first_match_index") +BU_P9V_OVERLOAD_2 (VFIRSTMISMATCHINDEX, "first_mismatch_index") +BU_P9V_OVERLOAD_2 (VFIRSTMATCHOREOSINDEX, "first_match_or_eos_index") +BU_P9V_OVERLOAD_2 (VFIRSTMISMATCHOREOSINDEX, "first_mismatch_or_eos_index") + BU_P9V_OVERLOAD_2 (VSIEDP, "scalar_insert_exp") BU_P9V_OVERLOAD_2 (VSTDC, "scalar_test_data_class") @@ -2178,6 +2211,16 @@ BU_P9V_VSX_2 (VEXTRACT4B, "vextract4b", CONST, vextract4b) BU_P9V_VSX_3 (VINSERT4B, "vinsert4b", CONST, vinsert4b) BU_P9V_VSX_3 (VINSERT4B_DI, "vinsert4b_di", CONST, vinsert4b_di) +/* Hardware IEEE 128-bit floating point round to odd instrucitons added in ISA + 3.0 (power9). */ +BU_FLOAT128_HW_1 (SQRTF128_ODD, "sqrtf128_round_to_odd", FP, sqrtkf2_odd) +BU_FLOAT128_HW_1 (TRUNCF128_ODD, "truncf128_round_to_odd", FP, trunckfdf2_odd) +BU_FLOAT128_HW_2 (ADDF128_ODD, "addf128_round_to_odd", FP, addkf3_odd) +BU_FLOAT128_HW_2 (SUBF128_ODD, "subf128_round_to_odd", FP, subkf3_odd) +BU_FLOAT128_HW_2 (MULF128_ODD, "mulf128_round_to_odd", FP, mulkf3_odd) +BU_FLOAT128_HW_2 (DIVF128_ODD, "divf128_round_to_odd", FP, divkf3_odd) +BU_FLOAT128_HW_3 (FMAF128_ODD, "fmaf128_round_to_odd", FP, fmakf4_odd) + /* 3 argument vector functions returning void, treated as SPECIAL, added in ISA 3.0 (power9). */ BU_P9V_64BIT_AV_X (STXVL, "stxvl", MISC) @@ -2185,7 +2228,9 @@ BU_P9V_64BIT_AV_X (XST_LEN_R, "xst_len_r", MISC) /* 1 argument vector functions added in ISA 3.0 (power9). */ BU_P9V_AV_1 (VCLZLSBB, "vclzlsbb", CONST, vclzlsbb) -BU_P9V_AV_1 (VCTZLSBB, "vctzlsbb", CONST, vctzlsbb) +BU_P9V_AV_1 (VCTZLSBB_V16QI, "vctzlsbb_v16qi", CONST, vctzlsbb_v16qi) +BU_P9V_AV_1 (VCTZLSBB_V8HI, "vctzlsbb_v8hi", CONST, vctzlsbb_v8hi) +BU_P9V_AV_1 (VCTZLSBB_V4SI, "vctzlsbb_v4si", CONST, vctzlsbb_v4si) /* Built-in support for Power9 "VSU option" string operations includes new awareness of the "vector compare not equal" (vcmpneb, vcmpneb., @@ -2365,23 +2410,6 @@ BU_P9_64BIT_2 (CMPEQB, "byte_in_set", CONST, cmpeqb) BU_P9_OVERLOAD_2 (CMPRB, "byte_in_range") BU_P9_OVERLOAD_2 (CMPRB2, "byte_in_either_range") BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set") - -/* 1 and 2 argument IEEE 128-bit floating-point functions. These functions use - the older 'q' suffix from libquadmath. The standard built-in functions - support fabsf128 and copysignf128, but older code used these 'q' versions, - so keep them around. */ -BU_FLOAT128_1 (FABSQ, "fabsq", CONST, abskf2) -BU_FLOAT128_2 (COPYSIGNQ, "copysignq", CONST, copysignkf3) - -/* 1, 2, and 3 argument IEEE 128-bit floating point functions that require ISA - 3.0 hardware. These functions use the new 'f128' suffix. */ -BU_FLOAT128_1_HW (SQRTF128_ODD, "sqrtf128_round_to_odd", CONST, sqrtkf2_odd) -BU_FLOAT128_1_HW (TRUNCF128_ODD, "truncf128_round_to_odd", CONST, trunckfdf2_odd) -BU_FLOAT128_2_HW (ADDF128_ODD, "addf128_round_to_odd", CONST, addkf3_odd) -BU_FLOAT128_2_HW (SUBF128_ODD, "subf128_round_to_odd", CONST, subkf3_odd) -BU_FLOAT128_2_HW (MULF128_ODD, "mulf128_round_to_odd", CONST, mulkf3_odd) -BU_FLOAT128_2_HW (DIVF128_ODD, "divf128_round_to_odd", CONST, divkf3_odd) -BU_FLOAT128_3_HW (FMAF128_ODD, "fmaf128_round_to_odd", CONST, fmakf4_odd) /* 1 argument crypto functions. */ BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox) @@ -2517,18 +2545,6 @@ BU_SPECIAL_X (RS6000_BUILTIN_CPU_IS, "__builtin_cpu_is", BU_SPECIAL_X (RS6000_BUILTIN_CPU_SUPPORTS, "__builtin_cpu_supports", RS6000_BTM_ALWAYS, RS6000_BTC_MISC) -BU_SPECIAL_X (RS6000_BUILTIN_NANQ, "__builtin_nanq", - RS6000_BTM_FLOAT128, RS6000_BTC_CONST) - -BU_SPECIAL_X (RS6000_BUILTIN_NANSQ, "__builtin_nansq", - RS6000_BTM_FLOAT128, RS6000_BTC_CONST) - -BU_SPECIAL_X (RS6000_BUILTIN_INFQ, "__builtin_infq", - RS6000_BTM_FLOAT128, RS6000_BTC_CONST) - -BU_SPECIAL_X (RS6000_BUILTIN_HUGE_VALQ, "__builtin_huge_valq", - RS6000_BTM_FLOAT128, RS6000_BTC_CONST) - /* Darwin CfString builtin. */ BU_SPECIAL_X (RS6000_BUILTIN_CFSTRING, "__builtin_cfstring", RS6000_BTM_ALWAYS, RS6000_BTC_MISC) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index 5c4b7664430..301ca172207 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -685,6 +685,17 @@ rs6000_cpu_cpp_builtins (cpp_reader *pfile) builtin_define ("__builtin_vsx_xvnmsubmsp=__builtin_vsx_xvnmsubsp"); } + /* Map the old _Float128 'q' builtins into the new 'f128' builtins. */ + if (TARGET_FLOAT128_TYPE) + { + builtin_define ("__builtin_fabsq=__builtin_fabsf128"); + builtin_define ("__builtin_copysignq=__builtin_copysignf128"); + builtin_define ("__builtin_nanq=__builtin_nanf128"); + builtin_define ("__builtin_nansq=__builtin_nansf128"); + builtin_define ("__builtin_infq=__builtin_inff128"); + builtin_define ("__builtin_huge_valq=__builtin_huge_valf128"); + } + /* Tell users they can use __builtin_bswap{16,64}. */ builtin_define ("__HAVE_BSWAP__"); @@ -2402,6 +2413,62 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P9V_BUILTIN_VEC_CONVERT_4F32_8I16, P9V_BUILTIN_CONVERT_4F32_8I16, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMATCHINDEX, P9V_BUILTIN_VFIRSTMATCHINDEX_V16QI, + RS6000_BTI_UINTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMATCHINDEX, P9V_BUILTIN_VFIRSTMATCHINDEX_V16QI, + RS6000_BTI_UINTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMATCHINDEX, P9V_BUILTIN_VFIRSTMATCHINDEX_V8HI, + RS6000_BTI_UINTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMATCHINDEX, P9V_BUILTIN_VFIRSTMATCHINDEX_V8HI, + RS6000_BTI_UINTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMATCHINDEX, P9V_BUILTIN_VFIRSTMATCHINDEX_V4SI, + RS6000_BTI_UINTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMATCHINDEX, P9V_BUILTIN_VFIRSTMATCHINDEX_V4SI, + RS6000_BTI_UINTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMATCHOREOSINDEX, P9V_BUILTIN_VFIRSTMATCHOREOSINDEX_V16QI, + RS6000_BTI_UINTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMATCHOREOSINDEX, P9V_BUILTIN_VFIRSTMATCHOREOSINDEX_V16QI, + RS6000_BTI_UINTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMATCHOREOSINDEX, P9V_BUILTIN_VFIRSTMATCHOREOSINDEX_V8HI, + RS6000_BTI_UINTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMATCHOREOSINDEX, P9V_BUILTIN_VFIRSTMATCHOREOSINDEX_V8HI, + RS6000_BTI_UINTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMATCHOREOSINDEX, P9V_BUILTIN_VFIRSTMATCHOREOSINDEX_V4SI, + RS6000_BTI_UINTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMATCHOREOSINDEX, P9V_BUILTIN_VFIRSTMATCHOREOSINDEX_V4SI, + RS6000_BTI_UINTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMISMATCHINDEX, P9V_BUILTIN_VFIRSTMISMATCHINDEX_V16QI, + RS6000_BTI_UINTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMISMATCHINDEX, P9V_BUILTIN_VFIRSTMISMATCHINDEX_V16QI, + RS6000_BTI_UINTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMISMATCHINDEX, P9V_BUILTIN_VFIRSTMISMATCHINDEX_V8HI, + RS6000_BTI_UINTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMISMATCHINDEX, P9V_BUILTIN_VFIRSTMISMATCHINDEX_V8HI, + RS6000_BTI_UINTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMISMATCHINDEX, P9V_BUILTIN_VFIRSTMISMATCHINDEX_V4SI, + RS6000_BTI_UINTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMISMATCHINDEX, P9V_BUILTIN_VFIRSTMISMATCHINDEX_V4SI, + RS6000_BTI_UINTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + + { P9V_BUILTIN_VEC_VFIRSTMISMATCHOREOSINDEX, + P9V_BUILTIN_VFIRSTMISMATCHOREOSINDEX_V16QI, + RS6000_BTI_UINTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMISMATCHOREOSINDEX, + P9V_BUILTIN_VFIRSTMISMATCHOREOSINDEX_V16QI, RS6000_BTI_UINTSI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMISMATCHOREOSINDEX, + P9V_BUILTIN_VFIRSTMISMATCHOREOSINDEX_V8HI, + RS6000_BTI_UINTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMISMATCHOREOSINDEX, + P9V_BUILTIN_VFIRSTMISMATCHOREOSINDEX_V8HI, + RS6000_BTI_UINTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMISMATCHOREOSINDEX, + P9V_BUILTIN_VFIRSTMISMATCHOREOSINDEX_V4SI, + RS6000_BTI_UINTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P9V_BUILTIN_VEC_VFIRSTMISMATCHOREOSINDEX, + P9V_BUILTIN_VFIRSTMISMATCHOREOSINDEX_V4SI, + RS6000_BTI_UINTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM, RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM, @@ -3046,69 +3113,94 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_SUMS, ALTIVEC_BUILTIN_VSUMSWS, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DF, + + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DF, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DF, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DF, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V2DI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SF, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SF, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V8HI, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V8HI, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V16QI, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V16QI, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V16QI, - RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V16QI, - RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V8HI, - RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V8HI, - RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SI, - RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SI, - RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DI, + + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DI, - RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SF, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_long_long, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DF, - RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V16QI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, @@ -3884,53 +3976,111 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DF, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVD2X_V2DF, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVD2X_V2DI, + RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVD2X_V2DI, + RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVD2X_V2DI, + RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_bool_V2DI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI }, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DF, RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DF, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DF, RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DI, RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DI, RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DI, RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V2DI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DI, RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SF, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SF, RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SF, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SF, RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SI, RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SI, RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SI, RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SI, RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V8HI, RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V8HI, RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V8HI, RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V8HI, RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V16QI, RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V16QI, RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V16QI, RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V16QI, RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI, @@ -5180,10 +5330,14 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P9V_BUILTIN_VEC_VCLZLSBB, P9V_BUILTIN_VCLZLSBB, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0 }, - { P9V_BUILTIN_VEC_VCTZLSBB, P9V_BUILTIN_VCTZLSBB, + { P9V_BUILTIN_VEC_VCTZLSBB, P9V_BUILTIN_VCTZLSBB_V16QI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, 0, 0 }, - { P9V_BUILTIN_VEC_VCTZLSBB, P9V_BUILTIN_VCTZLSBB, + { P9V_BUILTIN_VEC_VCTZLSBB, P9V_BUILTIN_VCTZLSBB_V16QI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZLSBB, P9V_BUILTIN_VCTZLSBB_V8HI, + RS6000_BTI_INTSI, RS6000_BTI_V8HI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZLSBB, P9V_BUILTIN_VCTZLSBB_V4SI, + RS6000_BTI_INTSI, RS6000_BTI_V4SI, 0, 0 }, { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B, RS6000_BTI_INTDI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 }, @@ -5553,36 +5707,38 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V16QI, - RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V16QI, - RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V16QI, - RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V1TI, - RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V1TI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRD_V2DI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0, 0 }, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRD_V2DI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRD_V2DF, - RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SF, - RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRH_V8HI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRH_V8HI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRH_V8HI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, @@ -5688,12 +5844,22 @@ rs6000_builtin_type (int id) return id < 0 ? build_pointer_type (t) : t; } -/* Check whether the type of an argument, T, is compatible with a - type ID stored into a struct altivec_builtin_types. Integer - types are considered compatible; otherwise, the language hook - lang_hooks.types_compatible_p makes the decision. */ +/* Check whether the type of an argument, T, is compatible with a type ID + stored into a struct altivec_builtin_types. Integer types are considered + compatible; otherwise, the language hook lang_hooks.types_compatible_p makes + the decision. Also allow long double and _Float128 to be compatible if + -mabi=ieeelongdouble. */ static inline bool +is_float128_p (tree t) +{ + return (t == float128_type_node + || (TARGET_IEEEQUAD + && TARGET_LONG_DOUBLE_128 + && t == long_double_type_node)); +} + +static inline bool rs6000_builtin_type_compatible (tree t, int id) { tree builtin_type; @@ -5702,6 +5868,9 @@ rs6000_builtin_type_compatible (tree t, int id) return false; if (INTEGRAL_TYPE_P (t) && INTEGRAL_TYPE_P (builtin_type)) return true; + else if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128 + && is_float128_p (t) && is_float128_p (builtin_type)) + return true; else return lang_hooks.types_compatible_p (t, builtin_type); } diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 721b906ee65..07288000705 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -136,6 +136,8 @@ extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx); extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx); extern void rs6000_split_signbit (rtx, rtx); extern void rs6000_expand_atomic_compare_and_swap (rtx op[]); +extern rtx swap_endian_selector_for_mode (machine_mode mode); + extern void rs6000_expand_atomic_exchange (rtx op[]); extern void rs6000_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx); extern void rs6000_emit_swdiv (rtx, rtx, rtx, bool); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 6402c0386a6..2d739fef57c 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -173,6 +173,7 @@ typedef struct GTY(()) machine_function bool gpr_is_wrapped_separately[32]; bool fpr_is_wrapped_separately[32]; bool lr_is_wrapped_separately; + bool toc_is_wrapped_separately; } machine_function; /* Support targetm.vectorize.builtin_mask_for_load. */ @@ -4428,6 +4429,13 @@ rs6000_option_override_internal (bool global_init_p) && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0)) rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC; + /* If we can shrink-wrap the TOC register save separately, then use + -msave-toc-indirect unless explicitly disabled. */ + if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0 + && flag_shrink_wrap_separate + && optimize_function_for_speed_p (cfun)) + rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT; + /* Enable power8 fusion if we are tuning for power8, even if we aren't generating power8 instructions. */ if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)) @@ -4800,10 +4808,7 @@ rs6000_option_override_internal (bool global_init_p) /* For the E500 family of cores, reset the single/double FP flags to let us check that they remain constant across attributes or pragmas. Also, clear a possible request for string instructions, not supported and which - we might have silently queried above for -Os. - - For other families, clear ISEL in case it was set implicitly. - */ + we might have silently queried above for -Os. */ switch (rs6000_cpu) { @@ -4813,19 +4818,12 @@ rs6000_option_override_internal (bool global_init_p) case PROCESSOR_PPCE500MC64: case PROCESSOR_PPCE5500: case PROCESSOR_PPCE6500: - rs6000_single_float = 0; rs6000_double_float = 0; - rs6000_isa_flags &= ~OPTION_MASK_STRING; - break; default: - - if (cpu_index >= 0 && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL)) - rs6000_isa_flags &= ~OPTION_MASK_ISEL; - break; } @@ -14088,7 +14086,8 @@ rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) return CONST0_RTX (tmode); } } - else if (icode == CODE_FOR_xststdcqp + else if (icode == CODE_FOR_xststdcqp_kf + || icode == CODE_FOR_xststdcqp_tf || icode == CODE_FOR_xststdcdp || icode == CODE_FOR_xststdcsp || icode == CODE_FOR_xvtstdcdp @@ -14305,6 +14304,44 @@ swap_selector_for_mode (machine_mode mode) return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm))); } +rtx +swap_endian_selector_for_mode (machine_mode mode) +{ + unsigned int swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8}; + unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12}; + unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14}; + + unsigned int *swaparray, i; + rtx perm[16]; + + switch (mode) + { + case E_V1TImode: + swaparray = swap1; + break; + case E_V2DFmode: + case E_V2DImode: + swaparray = swap2; + break; + case E_V4SFmode: + case E_V4SImode: + swaparray = swap4; + break; + case E_V8HImode: + swaparray = swap8; + break; + default: + gcc_unreachable (); + } + + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (swaparray[i]); + + return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, + gen_rtvec_v (16, perm))); +} + /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target with -maltivec=be specified. Issue the load followed by an element- reversing permute. */ @@ -14443,58 +14480,6 @@ altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk) } static rtx -altivec_expand_xl_be_builtin (enum insn_code icode, tree exp, rtx target, bool blk) -{ - rtx pat, addr; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - machine_mode tmode = insn_data[icode].operand[0].mode; - machine_mode mode0 = Pmode; - machine_mode mode1 = Pmode; - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - - if (icode == CODE_FOR_nothing) - /* Builtin not supported on this processor. */ - return 0; - - /* If we got invalid arguments bail out before generating bad rtl. */ - if (arg0 == error_mark_node || arg1 == error_mark_node) - return const0_rtx; - - if (target == 0 - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - - op1 = copy_to_mode_reg (mode1, op1); - - if (op0 == const0_rtx) - addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1); - else - { - op0 = copy_to_mode_reg (mode0, op0); - addr = gen_rtx_MEM (blk ? BLKmode : tmode, - gen_rtx_PLUS (Pmode, op1, op0)); - } - - pat = GEN_FCN (icode) (target, addr); - if (!pat) - return 0; - - emit_insn (pat); - /* Reverse element order of elements if in LE mode */ - if (!VECTOR_ELT_ORDER_BIG) - { - rtx sel = swap_selector_for_mode (tmode); - rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, target, target, sel), - UNSPEC_VPERM); - emit_insn (gen_rtx_SET (target, vperm)); - } - return target; -} - -static rtx paired_expand_stv_builtin (enum insn_code icode, tree exp) { tree arg0 = CALL_EXPR_ARG (exp, 0); @@ -15890,50 +15875,6 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) /* Fall through. */ } - /* XL_BE We initialized them to always load in big endian order. */ - switch (fcode) - { - case VSX_BUILTIN_XL_BE_V2DI: - { - enum insn_code code = CODE_FOR_vsx_load_v2di; - return altivec_expand_xl_be_builtin (code, exp, target, false); - } - break; - case VSX_BUILTIN_XL_BE_V4SI: - { - enum insn_code code = CODE_FOR_vsx_load_v4si; - return altivec_expand_xl_be_builtin (code, exp, target, false); - } - break; - case VSX_BUILTIN_XL_BE_V8HI: - { - enum insn_code code = CODE_FOR_vsx_load_v8hi; - return altivec_expand_xl_be_builtin (code, exp, target, false); - } - break; - case VSX_BUILTIN_XL_BE_V16QI: - { - enum insn_code code = CODE_FOR_vsx_load_v16qi; - return altivec_expand_xl_be_builtin (code, exp, target, false); - } - break; - case VSX_BUILTIN_XL_BE_V2DF: - { - enum insn_code code = CODE_FOR_vsx_load_v2df; - return altivec_expand_xl_be_builtin (code, exp, target, false); - } - break; - case VSX_BUILTIN_XL_BE_V4SF: - { - enum insn_code code = CODE_FOR_vsx_load_v4sf; - return altivec_expand_xl_be_builtin (code, exp, target, false); - } - break; - default: - break; - /* Fall through. */ - } - *expandedp = false; return NULL_RTX; } @@ -16110,39 +16051,11 @@ rs6000_invalid_builtin (enum rs6000_builtins fncode) from ia64.c. */ static tree -rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, - tree *args, bool ignore ATTRIBUTE_UNUSED) +rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED, + int n_args ATTRIBUTE_UNUSED, + tree *args ATTRIBUTE_UNUSED, + bool ignore ATTRIBUTE_UNUSED) { - if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) - { - enum rs6000_builtins fn_code - = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); - switch (fn_code) - { - case RS6000_BUILTIN_NANQ: - case RS6000_BUILTIN_NANSQ: - { - tree type = TREE_TYPE (TREE_TYPE (fndecl)); - const char *str = c_getstr (*args); - int quiet = fn_code == RS6000_BUILTIN_NANQ; - REAL_VALUE_TYPE real; - - if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) - return build_real (type, real); - return NULL_TREE; - } - case RS6000_BUILTIN_INFQ: - case RS6000_BUILTIN_HUGE_VALQ: - { - tree type = TREE_TYPE (TREE_TYPE (fndecl)); - REAL_VALUE_TYPE inf; - real_inf (&inf); - return build_real (type, inf); - } - default: - break; - } - } #ifdef SUBTARGET_FOLD_BUILTIN return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); #else @@ -16169,6 +16082,36 @@ rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code) } } +/* Helper function to handle the gimple folding of a vector compare + operation. This sets up true/false vectors, and uses the + VEC_COND_EXPR operation. + CODE indicates which comparison is to be made. (EQ, GT, ...). + TYPE indicates the type of the result. */ +static tree +fold_build_vec_cmp (tree_code code, tree type, + tree arg0, tree arg1) +{ + tree cmp_type = build_same_sized_truth_vector_type (type); + tree zero_vec = build_zero_cst (type); + tree minus_one_vec = build_minus_one_cst (type); + tree cmp = fold_build2 (code, cmp_type, arg0, arg1); + return fold_build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec); +} + +/* Helper function to handle the in-between steps for the + vector compare built-ins. */ +static void +fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt) +{ + tree arg0 = gimple_call_arg (stmt, 0); + tree arg1 = gimple_call_arg (stmt, 1); + tree lhs = gimple_call_lhs (stmt); + tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1); + gimple *g = gimple_build_assign (lhs, cmp); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); +} + /* Fold a machine-dependent built-in in GIMPLE. (For folding into a constant, use rs6000_fold_builtin.) */ @@ -16664,6 +16607,53 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) return true; } + /* Vector compares; EQ, NE, GE, GT, LE. */ + case ALTIVEC_BUILTIN_VCMPEQUB: + case ALTIVEC_BUILTIN_VCMPEQUH: + case ALTIVEC_BUILTIN_VCMPEQUW: + case P8V_BUILTIN_VCMPEQUD: + fold_compare_helper (gsi, EQ_EXPR, stmt); + return true; + + case P9V_BUILTIN_CMPNEB: + case P9V_BUILTIN_CMPNEH: + case P9V_BUILTIN_CMPNEW: + fold_compare_helper (gsi, NE_EXPR, stmt); + return true; + + case VSX_BUILTIN_CMPGE_16QI: + case VSX_BUILTIN_CMPGE_U16QI: + case VSX_BUILTIN_CMPGE_8HI: + case VSX_BUILTIN_CMPGE_U8HI: + case VSX_BUILTIN_CMPGE_4SI: + case VSX_BUILTIN_CMPGE_U4SI: + case VSX_BUILTIN_CMPGE_2DI: + case VSX_BUILTIN_CMPGE_U2DI: + fold_compare_helper (gsi, GE_EXPR, stmt); + return true; + + case ALTIVEC_BUILTIN_VCMPGTSB: + case ALTIVEC_BUILTIN_VCMPGTUB: + case ALTIVEC_BUILTIN_VCMPGTSH: + case ALTIVEC_BUILTIN_VCMPGTUH: + case ALTIVEC_BUILTIN_VCMPGTSW: + case ALTIVEC_BUILTIN_VCMPGTUW: + case P8V_BUILTIN_VCMPGTUD: + case P8V_BUILTIN_VCMPGTSD: + fold_compare_helper (gsi, GT_EXPR, stmt); + return true; + + case VSX_BUILTIN_CMPLE_16QI: + case VSX_BUILTIN_CMPLE_U16QI: + case VSX_BUILTIN_CMPLE_8HI: + case VSX_BUILTIN_CMPLE_U8HI: + case VSX_BUILTIN_CMPLE_4SI: + case VSX_BUILTIN_CMPLE_U4SI: + case VSX_BUILTIN_CMPLE_2DI: + case VSX_BUILTIN_CMPLE_U2DI: + fold_compare_helper (gsi, LE_EXPR, stmt); + return true; + default: if (TARGET_DEBUG_BUILTIN) fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", @@ -16695,10 +16685,37 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, bool success; HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask; bool func_valid_p = ((rs6000_builtin_mask & mask) == mask); + enum insn_code icode = rs6000_builtin_info[uns_fcode].icode; + + /* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit + floating point type, depending on whether long double is the IBM extended + double (KFmode) or long double is IEEE 128-bit (TFmode). It is simpler if + we only define one variant of the built-in function, and switch the code + when defining it, rather than defining two built-ins and using the + overload table in rs6000-c.c to switch between the two. */ + if (FLOAT128_IEEE_P (TFmode)) + switch (icode) + { + default: + break; + + case CODE_FOR_sqrtkf2_odd: icode = CODE_FOR_sqrttf2_odd; break; + case CODE_FOR_trunckfdf2_odd: icode = CODE_FOR_trunctfdf2_odd; break; + case CODE_FOR_addkf3_odd: icode = CODE_FOR_addtf3_odd; break; + case CODE_FOR_subkf3_odd: icode = CODE_FOR_subtf3_odd; break; + case CODE_FOR_mulkf3_odd: icode = CODE_FOR_multf3_odd; break; + case CODE_FOR_divkf3_odd: icode = CODE_FOR_divtf3_odd; break; + case CODE_FOR_fmakf4_odd: icode = CODE_FOR_fmatf4_odd; break; + case CODE_FOR_xsxexpqp_kf: icode = CODE_FOR_xsxexpqp_tf; break; + case CODE_FOR_xsxsigqp_kf: icode = CODE_FOR_xsxsigqp_tf; break; + case CODE_FOR_xststdcnegqp_kf: icode = CODE_FOR_xststdcnegqp_tf; break; + case CODE_FOR_xsiexpqp_kf: icode = CODE_FOR_xsiexpqp_tf; break; + case CODE_FOR_xsiexpqpf_kf: icode = CODE_FOR_xsiexpqpf_tf; break; + case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break; + } if (TARGET_DEBUG_BUILTIN) { - enum insn_code icode = rs6000_builtin_info[uns_fcode].icode; const char *name1 = rs6000_builtin_info[uns_fcode].name; const char *name2 = (icode != CODE_FOR_nothing) ? get_insn_name ((int) icode) @@ -16777,10 +16794,10 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, case ALTIVEC_BUILTIN_MASK_FOR_LOAD: case ALTIVEC_BUILTIN_MASK_FOR_STORE: { - int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct + int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct : (int) CODE_FOR_altivec_lvsl_direct); - machine_mode tmode = insn_data[icode].operand[0].mode; - machine_mode mode = insn_data[icode].operand[1].mode; + machine_mode tmode = insn_data[icode2].operand[0].mode; + machine_mode mode = insn_data[icode2].operand[1].mode; tree arg; rtx op, addr, pat; @@ -16802,10 +16819,10 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, if (target == 0 || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + || ! (*insn_data[icode2].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); - pat = GEN_FCN (icode) (target, op); + pat = GEN_FCN (icode2) (target, op); if (!pat) return 0; emit_insn (pat); @@ -16863,25 +16880,25 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, d = bdesc_1arg; for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++) if (d->code == fcode) - return rs6000_expand_unop_builtin (d->icode, exp, target); + return rs6000_expand_unop_builtin (icode, exp, target); /* Handle simple binary operations. */ d = bdesc_2arg; for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++) if (d->code == fcode) - return rs6000_expand_binop_builtin (d->icode, exp, target); + return rs6000_expand_binop_builtin (icode, exp, target); /* Handle simple ternary operations. */ d = bdesc_3arg; for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++) if (d->code == fcode) - return rs6000_expand_ternop_builtin (d->icode, exp, target); + return rs6000_expand_ternop_builtin (icode, exp, target); /* Handle simple no-argument operations. */ d = bdesc_0arg; for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++) if (d->code == fcode) - return rs6000_expand_zeroop_builtin (d->icode, target); + return rs6000_expand_zeroop_builtin (icode, target); gcc_unreachable (); } @@ -17103,15 +17120,6 @@ rs6000_init_builtins (void) if (TARGET_EXTRA_BUILTINS || TARGET_PAIRED_FLOAT) rs6000_common_init_builtins (); - ftype = build_function_type_list (ieee128_float_type_node, - const_str_type_node, NULL_TREE); - def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ); - def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ); - - ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE); - def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ); - def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ); - ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode, RS6000_BUILTIN_RECIP, "__builtin_recipdiv"); def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP); @@ -17527,6 +17535,10 @@ altivec_init_builtins (void) VSX_BUILTIN_LD_ELEMREV_V4SF); def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid, VSX_BUILTIN_LD_ELEMREV_V4SI); + def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid, + VSX_BUILTIN_LD_ELEMREV_V8HI); + def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid, + VSX_BUILTIN_LD_ELEMREV_V16QI); def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V2DF); def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid, @@ -17535,42 +17547,10 @@ altivec_init_builtins (void) VSX_BUILTIN_ST_ELEMREV_V4SF); def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V4SI); - - def_builtin ("__builtin_vsx_le_be_v8hi", v8hi_ftype_long_pcvoid, - VSX_BUILTIN_XL_BE_V8HI); - def_builtin ("__builtin_vsx_le_be_v4si", v4si_ftype_long_pcvoid, - VSX_BUILTIN_XL_BE_V4SI); - def_builtin ("__builtin_vsx_le_be_v2di", v2di_ftype_long_pcvoid, - VSX_BUILTIN_XL_BE_V2DI); - def_builtin ("__builtin_vsx_le_be_v4sf", v4sf_ftype_long_pcvoid, - VSX_BUILTIN_XL_BE_V4SF); - def_builtin ("__builtin_vsx_le_be_v2df", v2df_ftype_long_pcvoid, - VSX_BUILTIN_XL_BE_V2DF); - def_builtin ("__builtin_vsx_le_be_v16qi", v16qi_ftype_long_pcvoid, - VSX_BUILTIN_XL_BE_V16QI); - - if (TARGET_P9_VECTOR) - { - def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid, - VSX_BUILTIN_LD_ELEMREV_V8HI); - def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid, - VSX_BUILTIN_LD_ELEMREV_V16QI); - def_builtin ("__builtin_vsx_st_elemrev_v8hi", - void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI); - def_builtin ("__builtin_vsx_st_elemrev_v16qi", - void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI); - } - else - { - rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V8HI] - = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V8HI]; - rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V16QI] - = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V16QI]; - rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V8HI] - = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V8HI]; - rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V16QI] - = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V16QI]; - } + def_builtin ("__builtin_vsx_st_elemrev_v8hi", void_ftype_v8hi_long_pvoid, + VSX_BUILTIN_ST_ELEMREV_V8HI); + def_builtin ("__builtin_vsx_st_elemrev_v16qi", void_ftype_v16qi_long_pvoid, + VSX_BUILTIN_ST_ELEMREV_V16QI); def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid, VSX_BUILTIN_VEC_LD); @@ -17582,6 +17562,8 @@ altivec_init_builtins (void) VSX_BUILTIN_VEC_XL_BE); def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid, VSX_BUILTIN_VEC_XST); + def_builtin ("__builtin_vec_xst_be", void_ftype_opaque_long_pvoid, + VSX_BUILTIN_VEC_XST_BE); def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP); def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS); @@ -18065,7 +18047,7 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, are type correct. */ switch (builtin) { - /* unsigned 1 argument functions. */ + /* unsigned 1 argument functions. */ case CRYPTO_BUILTIN_VSBOX: case P8V_BUILTIN_VGBBD: case MISC_BUILTIN_CDTBCD: @@ -18074,7 +18056,7 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, h.uns_p[1] = 1; break; - /* unsigned 2 argument functions. */ + /* unsigned 2 argument functions. */ case ALTIVEC_BUILTIN_VMULEUB: case ALTIVEC_BUILTIN_VMULEUH: case ALTIVEC_BUILTIN_VMULEUW: @@ -18109,7 +18091,7 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, h.uns_p[2] = 1; break; - /* unsigned 3 argument functions. */ + /* unsigned 3 argument functions. */ case ALTIVEC_BUILTIN_VPERM_16QI_UNS: case ALTIVEC_BUILTIN_VPERM_8HI_UNS: case ALTIVEC_BUILTIN_VPERM_4SI_UNS: @@ -18140,7 +18122,7 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, h.uns_p[3] = 1; break; - /* signed permute functions with unsigned char mask. */ + /* signed permute functions with unsigned char mask. */ case ALTIVEC_BUILTIN_VPERM_16QI: case ALTIVEC_BUILTIN_VPERM_8HI: case ALTIVEC_BUILTIN_VPERM_4SI: @@ -18156,14 +18138,14 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, h.uns_p[3] = 1; break; - /* unsigned args, signed return. */ + /* unsigned args, signed return. */ case VSX_BUILTIN_XVCVUXDSP: case VSX_BUILTIN_XVCVUXDDP_UNS: case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF: h.uns_p[1] = 1; break; - /* signed args, unsigned return. */ + /* signed args, unsigned return. */ case VSX_BUILTIN_XVCVDPUXDS_UNS: case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI: case MISC_BUILTIN_UNPACK_TD: @@ -18171,14 +18153,31 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, h.uns_p[0] = 1; break; - /* unsigned arguments for 128-bit pack instructions. */ + /* unsigned arguments, bool return (compares). */ + case ALTIVEC_BUILTIN_VCMPEQUB: + case ALTIVEC_BUILTIN_VCMPEQUH: + case ALTIVEC_BUILTIN_VCMPEQUW: + case P8V_BUILTIN_VCMPEQUD: + case VSX_BUILTIN_CMPGE_U16QI: + case VSX_BUILTIN_CMPGE_U8HI: + case VSX_BUILTIN_CMPGE_U4SI: + case VSX_BUILTIN_CMPGE_U2DI: + case ALTIVEC_BUILTIN_VCMPGTUB: + case ALTIVEC_BUILTIN_VCMPGTUH: + case ALTIVEC_BUILTIN_VCMPGTUW: + case P8V_BUILTIN_VCMPGTUD: + h.uns_p[1] = 1; + h.uns_p[2] = 1; + break; + + /* unsigned arguments for 128-bit pack instructions. */ case MISC_BUILTIN_PACK_TD: case MISC_BUILTIN_PACK_V1TI: h.uns_p[1] = 1; h.uns_p[2] = 1; break; - /* unsigned second arguments (vector shift right). */ + /* unsigned second arguments (vector shift right). */ case ALTIVEC_BUILTIN_VSRB: case ALTIVEC_BUILTIN_VSRH: case ALTIVEC_BUILTIN_VSRW: @@ -26638,6 +26637,7 @@ rs6000_get_separate_components (void) && !(info->savres_strategy & REST_MULTIPLE)); /* Component 0 is the save/restore of LR (done via GPR0). + Component 2 is the save of the TOC (GPR2). Components 13..31 are the save/restore of GPR13..GPR31. Components 46..63 are the save/restore of FPR14..FPR31. */ @@ -26712,6 +26712,10 @@ rs6000_get_separate_components (void) bitmap_set_bit (components, 0); } + /* Optimize saving the TOC. This is component 2. */ + if (cfun->machine->save_toc_in_prologue) + bitmap_set_bit (components, 2); + return components; } @@ -26750,6 +26754,12 @@ rs6000_components_for_bb (basic_block bb) || bitmap_bit_p (kill, LR_REGNO)) bitmap_set_bit (components, 0); + /* The TOC save. */ + if (bitmap_bit_p (in, TOC_REGNUM) + || bitmap_bit_p (gen, TOC_REGNUM) + || bitmap_bit_p (kill, TOC_REGNUM)) + bitmap_set_bit (components, 2); + return components; } @@ -26804,6 +26814,14 @@ rs6000_emit_prologue_components (sbitmap components) add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr)); } + /* Prologue for TOC. */ + if (bitmap_bit_p (components, 2)) + { + rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM); + rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); + emit_insn (gen_frame_store (reg, sp_reg, RS6000_TOC_SAVE_SLOT)); + } + /* Prologue for the GPRs. */ int offset = info->gp_save_offset; if (info->push_p) @@ -26928,6 +26946,9 @@ rs6000_set_handled_components (sbitmap components) if (bitmap_bit_p (components, 0)) cfun->machine->lr_is_wrapped_separately = true; + + if (bitmap_bit_p (components, 2)) + cfun->machine->toc_is_wrapped_separately = true; } /* VRSAVE is a bit vector representing which AltiVec registers @@ -27885,7 +27906,8 @@ rs6000_emit_prologue (void) unwinder to interpret it. R2 changes, apart from the calls_eh_return case earlier in this function, are handled by linux-unwind.h frob_update_context. */ - if (rs6000_save_toc_in_prologue_p ()) + if (rs6000_save_toc_in_prologue_p () + && !cfun->machine->toc_is_wrapped_separately) { rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM); emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT)); @@ -35040,6 +35062,8 @@ rs6000_insn_cost (rtx_insn *insn, bool speed) case TYPE_SYNC: case TYPE_LOAD_L: + case TYPE_MFCR: + case TYPE_MFCRF: cost = COSTS_N_INSNS (n + 2); break; diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index ed5ff397e07..276ad8a32e8 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -182,7 +182,7 @@ cmp, branch,jmpreg,mfjmpr,mtjmpr,trap,isync,sync,load_l,store_c, cr_logical,delayed_cr,mfcr,mfcrf,mtcr, - fpcompare,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt, + fpcompare,fp,fpsimple,dmul,qmul,sdiv,ddiv,ssqrt,dsqrt, vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm, vecfloat,vecfdiv,vecdouble,mffgpr,mftgpr,crypto, veclogical,veccmpfx,vecexts,vecmove, @@ -323,6 +323,9 @@ ; of whole values in GPRs. (define_mode_iterator GPR [SI (DI "TARGET_POWERPC64")]) +; And again, for patterns that need two (potentially) different integer modes. +(define_mode_iterator GPR2 [SI (DI "TARGET_POWERPC64")]) + ; Any supported integer mode. (define_mode_iterator INT [QI HI SI DI TI PTI]) @@ -2429,13 +2432,15 @@ [(set_attr "type" "store")]) (define_insn_and_split "bswaphi2_reg" - [(set (match_operand:HI 0 "gpc_reg_operand" "=&r") + [(set (match_operand:HI 0 "gpc_reg_operand" "=&r,wo") (bswap:HI - (match_operand:HI 1 "gpc_reg_operand" "r"))) - (clobber (match_scratch:SI 2 "=&r"))] + (match_operand:HI 1 "gpc_reg_operand" "r,wo"))) + (clobber (match_scratch:SI 2 "=&r,X"))] "" - "#" - "reload_completed" + "@ + # + xxbrh %x0,%x1" + "reload_completed && int_reg_operand (operands[0], HImode)" [(set (match_dup 3) (and:SI (lshiftrt:SI (match_dup 4) (const_int 8)) @@ -2451,18 +2456,20 @@ operands[3] = simplify_gen_subreg (SImode, operands[0], HImode, 0); operands[4] = simplify_gen_subreg (SImode, operands[1], HImode, 0); } - [(set_attr "length" "12") - (set_attr "type" "*")]) + [(set_attr "length" "12,4") + (set_attr "type" "*,vecperm")]) ;; We are always BITS_BIG_ENDIAN, so the bit positions below in ;; zero_extract insns do not change for -mlittle. (define_insn_and_split "bswapsi2_reg" - [(set (match_operand:SI 0 "gpc_reg_operand" "=&r") + [(set (match_operand:SI 0 "gpc_reg_operand" "=&r,wo") (bswap:SI - (match_operand:SI 1 "gpc_reg_operand" "r")))] + (match_operand:SI 1 "gpc_reg_operand" "r,wo")))] "" - "#" - "reload_completed" + "@ + # + xxbrw %x0,%x1" + "reload_completed && int_reg_operand (operands[0], SImode)" [(set (match_dup 0) ; DABC (rotate:SI (match_dup 1) (const_int 24))) @@ -2478,7 +2485,9 @@ (const_int 255)) (and:SI (match_dup 0) (const_int -256))))] - "") + "" + [(set_attr "length" "12,4") + (set_attr "type" "*,vecperm")]) ;; On systems with LDBRX/STDBRX generate the loads/stores directly, just like ;; we do for L{H,W}BRX and ST{H,W}BRX above. If not, we have to generate more @@ -2504,6 +2513,8 @@ emit_insn (gen_bswapdi2_load (dest, src)); else if (MEM_P (dest)) emit_insn (gen_bswapdi2_store (dest, src)); + else if (TARGET_P9_VECTOR) + emit_insn (gen_bswapdi2_xxbrd (dest, src)); else emit_insn (gen_bswapdi2_reg (dest, src)); DONE; @@ -2534,12 +2545,19 @@ "stdbrx %1,%y0" [(set_attr "type" "store")]) +(define_insn "bswapdi2_xxbrd" + [(set (match_operand:DI 0 "gpc_reg_operand" "=wo") + (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "wo")))] + "TARGET_P9_VECTOR" + "xxbrd %x0,%x1" + [(set_attr "type" "vecperm")]) + (define_insn "bswapdi2_reg" [(set (match_operand:DI 0 "gpc_reg_operand" "=&r") (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r"))) (clobber (match_scratch:DI 2 "=&r")) (clobber (match_scratch:DI 3 "=&r"))] - "TARGET_POWERPC64 && TARGET_LDBRX" + "TARGET_POWERPC64 && TARGET_LDBRX && !TARGET_P9_VECTOR" "#" [(set_attr "length" "36")]) @@ -2688,7 +2706,7 @@ (bswap:DI (match_operand:DI 1 "gpc_reg_operand" ""))) (clobber (match_operand:DI 2 "gpc_reg_operand" "")) (clobber (match_operand:DI 3 "gpc_reg_operand" ""))] - "TARGET_POWERPC64 && reload_completed" + "TARGET_POWERPC64 && !TARGET_P9_VECTOR && reload_completed" [(const_int 0)] " { @@ -11780,13 +11798,9 @@ (clobber (match_operand:GPR 0 "gpc_reg_operand"))] "" { - /* Use ISEL if the user asked for it. */ - if (TARGET_ISEL) - rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx); - /* Expanding EQ and NE directly to some machine instructions does not help but does hurt combine. So don't. */ - else if (GET_CODE (operands[1]) == EQ) + if (GET_CODE (operands[1]) == EQ) emit_insn (gen_eq<mode>3 (operands[0], operands[2], operands[3])); else if (<MODE>mode == Pmode && GET_CODE (operands[1]) == NE) @@ -11798,7 +11812,11 @@ emit_insn (gen_xor<mode>3 (operands[0], tmp, const1_rtx)); } - /* Expanding the unsigned comparisons however helps a lot: all the neg_ltu + /* If ISEL is fast, expand to it. */ + else if (TARGET_ISEL) + rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx); + + /* Expanding the unsigned comparisons helps a lot: all the neg_ltu etc. combinations magically work out just right. */ else if (<MODE>mode == Pmode && unsigned_comparison_operator (operands[1], VOIDmode)) @@ -12280,18 +12298,102 @@ "") +(define_code_iterator cmp [eq ne lt ltu gt gtu le leu ge geu]) +(define_code_attr UNS [(eq "CC") + (ne "CC") + (lt "CC") (ltu "CCUNS") + (gt "CC") (gtu "CCUNS") + (le "CC") (leu "CCUNS") + (ge "CC") (geu "CCUNS")]) +(define_code_attr UNSu_ [(eq "") + (ne "") + (lt "") (ltu "u_") + (gt "") (gtu "u_") + (le "") (leu "u_") + (ge "") (geu "u_")]) +(define_code_attr UNSIK [(eq "I") + (ne "I") + (lt "I") (ltu "K") + (gt "I") (gtu "K") + (le "I") (leu "K") + (ge "I") (geu "K")]) + +(define_insn_and_split "<code><GPR:mode><GPR2:mode>2_isel" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (cmp:GPR (match_operand:GPR2 1 "gpc_reg_operand" "r") + (match_operand:GPR2 2 "reg_or_<cmp:UNSu_>short_operand" "r<cmp:UNSIK>"))) + (clobber (match_scratch:GPR 3 "=r")) + (clobber (match_scratch:GPR 4 "=r")) + (clobber (match_scratch:<UNS> 5 "=y"))] + "TARGET_ISEL + && !(<CODE> == EQ && operands[2] == const0_rtx) + && !(<CODE> == NE && operands[2] == const0_rtx + && <GPR:MODE>mode == Pmode && <GPR2:MODE>mode == Pmode)" + "#" + "&& 1" + [(pc)] +{ + if (<CODE> == NE || <CODE> == LE || <CODE> == GE + || <CODE> == LEU || <CODE> == GEU) + operands[3] = const0_rtx; + else + { + if (GET_CODE (operands[3]) == SCRATCH) + operands[3] = gen_reg_rtx (<GPR:MODE>mode); + emit_move_insn (operands[3], const0_rtx); + } + + if (GET_CODE (operands[4]) == SCRATCH) + operands[4] = gen_reg_rtx (<GPR:MODE>mode); + emit_move_insn (operands[4], const1_rtx); + + if (GET_CODE (operands[5]) == SCRATCH) + operands[5] = gen_reg_rtx (<UNS>mode); + + rtx c1 = gen_rtx_COMPARE (<UNS>mode, operands[1], operands[2]); + emit_insn (gen_rtx_SET (operands[5], c1)); + + rtx c2 = gen_rtx_fmt_ee (<CODE>, <GPR:MODE>mode, operands[5], const0_rtx); + rtx x = gen_rtx_IF_THEN_ELSE (<GPR:MODE>mode, c2, operands[4], operands[3]); + emit_move_insn (operands[0], x); + + DONE; +} + [(set (attr "cost") + (if_then_else (match_test "<CODE> == NE || <CODE> == LE || <CODE> == GE + || <CODE> == LEU || <CODE> == GEU") + (const_string "9") + (const_string "10")))]) + (define_mode_attr scc_eq_op2 [(SI "rKLI") (DI "rKJI")]) -(define_insn_and_split "eq<mode>3" +(define_expand "eq<mode>3" + [(parallel [ + (set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (eq:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "scc_eq_operand" "<scc_eq_op2>"))) + (clobber (match_scratch:GPR 3 "=r")) + (clobber (match_scratch:GPR 4 "=r"))])] + "" +{ + if (TARGET_ISEL && operands[2] != const0_rtx) + { + emit_insn (gen_eq<mode><mode>2_isel (operands[0], operands[1], + operands[2])); + DONE; + } +}) + +(define_insn_and_split "*eq<mode>3" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (eq:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") (match_operand:GPR 2 "scc_eq_operand" "<scc_eq_op2>"))) (clobber (match_scratch:GPR 3 "=r")) (clobber (match_scratch:GPR 4 "=r"))] - "" + "!(TARGET_ISEL && operands[2] != const0_rtx)" "#" - "" + "&& 1" [(set (match_dup 4) (clz:GPR (match_dup 3))) (set (match_dup 0) @@ -12311,16 +12413,34 @@ (const_string "8") (const_string "12")))]) -(define_insn_and_split "ne<mode>3" +(define_expand "ne<mode>3" + [(parallel [ + (set (match_operand:P 0 "gpc_reg_operand" "=r") + (ne:P (match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "scc_eq_operand" "<scc_eq_op2>"))) + (clobber (match_scratch:P 3 "=r")) + (clobber (match_scratch:P 4 "=r")) + (clobber (reg:P CA_REGNO))])] + "" +{ + if (TARGET_ISEL && operands[2] != const0_rtx) + { + emit_insn (gen_ne<mode><mode>2_isel (operands[0], operands[1], + operands[2])); + DONE; + } +}) + +(define_insn_and_split "*ne<mode>3" [(set (match_operand:P 0 "gpc_reg_operand" "=r") (ne:P (match_operand:P 1 "gpc_reg_operand" "r") (match_operand:P 2 "scc_eq_operand" "<scc_eq_op2>"))) (clobber (match_scratch:P 3 "=r")) (clobber (match_scratch:P 4 "=r")) (clobber (reg:P CA_REGNO))] - "!TARGET_ISEL" + "!(TARGET_ISEL && operands[2] != const0_rtx)" "#" - "" + "&& 1" [(parallel [(set (match_dup 4) (plus:P (match_dup 3) (const_int -1))) @@ -12573,9 +12693,9 @@ (clobber (match_scratch:SI 3 "=r")) (clobber (match_scratch:SI 4 "=r")) (clobber (match_scratch:EXTSI 5 "=r"))] - "" + "!TARGET_ISEL" "#" - "" + "&& 1" [(set (match_dup 4) (clz:SI (match_dup 3))) (set (match_dup 5) @@ -14230,7 +14350,7 @@ (match_operand:IEEE128 2 "altivec_register_operand" "v")))] "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" "xsmulqp %0,%1,%2" - [(set_attr "type" "vecfloat") + [(set_attr "type" "qmul") (set_attr "size" "128")]) (define_insn "div<mode>3" @@ -14332,7 +14452,7 @@ (match_operand:IEEE128 3 "altivec_register_operand" "0")))] "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" "xsmaddqp %0,%1,%2" - [(set_attr "type" "vecfloat") + [(set_attr "type" "qmul") (set_attr "size" "128")]) (define_insn "*fms<mode>4_hw" @@ -14344,7 +14464,7 @@ (match_operand:IEEE128 3 "altivec_register_operand" "0"))))] "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" "xsmsubqp %0,%1,%2" - [(set_attr "type" "vecfloat") + [(set_attr "type" "qmul") (set_attr "size" "128")]) (define_insn "*nfma<mode>4_hw" @@ -14356,7 +14476,7 @@ (match_operand:IEEE128 3 "altivec_register_operand" "0"))))] "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" "xsnmaddqp %0,%1,%2" - [(set_attr "type" "vecfloat") + [(set_attr "type" "qmul") (set_attr "size" "128")]) (define_insn "*nfms<mode>4_hw" @@ -14369,7 +14489,7 @@ (match_operand:IEEE128 3 "altivec_register_operand" "0")))))] "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" "xsnmsubqp %0,%1,%2" - [(set_attr "type" "vecfloat") + [(set_attr "type" "qmul") (set_attr "size" "128")]) (define_insn "extend<SFDF:mode><IEEE128:mode>2_hw" @@ -14644,7 +14764,7 @@ UNSPEC_MUL_ROUND_TO_ODD))] "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" "xsmulqpo %0,%1,%2" - [(set_attr "type" "vecfloat") + [(set_attr "type" "qmul") (set_attr "size" "128")]) (define_insn "div<mode>3_odd" @@ -14677,7 +14797,7 @@ UNSPEC_FMA_ROUND_TO_ODD))] "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" "xsmaddqpo %0,%1,%2" - [(set_attr "type" "vecfloat") + [(set_attr "type" "qmul") (set_attr "size" "128")]) (define_insn "*fms<mode>4_odd" @@ -14690,7 +14810,7 @@ UNSPEC_FMA_ROUND_TO_ODD))] "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" "xsmsubqpo %0,%1,%2" - [(set_attr "type" "vecfloat") + [(set_attr "type" "qmul") (set_attr "size" "128")]) (define_insn "*nfma<mode>4_odd" @@ -14703,7 +14823,7 @@ UNSPEC_FMA_ROUND_TO_ODD)))] "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" "xsnmaddqpo %0,%1,%2" - [(set_attr "type" "vecfloat") + [(set_attr "type" "qmul") (set_attr "size" "128")]) (define_insn "*nfms<mode>4_odd" @@ -14717,7 +14837,7 @@ UNSPEC_FMA_ROUND_TO_ODD)))] "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" "xsnmsubqpo %0,%1,%2" - [(set_attr "type" "vecfloat") + [(set_attr "type" "qmul") (set_attr "size" "128")]) (define_insn "trunc<mode>df2_odd" diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 35be5dead64..00d76563f37 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -73,6 +73,13 @@ (TF "FLOAT128_VECTOR_P (TFmode)") TI]) +(define_mode_attr VSX_XXBR [(V8HI "h") + (V4SI "w") + (V4SF "w") + (V2DF "d") + (V2DI "d") + (V1TI "q")]) + ;; Map into the appropriate load/store name based on the type (define_mode_attr VSm [(V16QI "vw4") (V8HI "vw4") @@ -273,6 +280,9 @@ (define_mode_iterator VSINT_84 [V4SI V2DI DI SI]) (define_mode_iterator VSINT_842 [V8HI V4SI V2DI]) +;; Vector reverse byte modes +(define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI]) + ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors. ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be ;; done on ISA 2.07 and not just ISA 3.0. @@ -408,6 +418,10 @@ UNSPEC_VCMPNEZW UNSPEC_XXEXTRACTUW UNSPEC_XXINSERTW + UNSPEC_VSX_FIRST_MATCH_INDEX + UNSPEC_VSX_FIRST_MATCH_EOS_INDEX + UNSPEC_VSX_FIRST_MISMATCH_INDEX + UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX ]) ;; VSX moves @@ -1108,7 +1122,7 @@ "lxvw4x %x0,%y1" [(set_attr "type" "vecload")]) -(define_insn "vsx_ld_elemrev_v8hi" +(define_expand "vsx_ld_elemrev_v8hi" [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") (vec_select:V8HI (match_operand:V8HI 1 "memory_operand" "Z") @@ -1116,22 +1130,94 @@ (const_int 5) (const_int 4) (const_int 3) (const_int 2) (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" +{ + if (!TARGET_P9_VECTOR) + { + rtx tmp = gen_reg_rtx (V4SImode); + rtx subreg, subreg2, perm[16], pcv; + /* 2 is leftmost element in register */ + unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; + int i; + + subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0); + emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); + subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0); + + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (reorder[i]); + + pcv = force_reg (V16QImode, + gen_rtx_CONST_VECTOR (V16QImode, + gen_rtvec_v (16, perm))); + emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2, + subreg2, pcv)); + DONE; + } +}) + +(define_insn "*vsx_ld_elemrev_v8hi_internal" + [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") + (vec_select:V8HI + (match_operand:V8HI 1 "memory_operand" "Z") + (parallel [(const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" "lxvh8x %x0,%y1" [(set_attr "type" "vecload")]) -(define_insn "vsx_ld_elemrev_v16qi" +(define_expand "vsx_ld_elemrev_v16qi" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (vec_select:V16QI + (match_operand:V16QI 1 "memory_operand" "Z") + (parallel [(const_int 15) (const_int 14) + (const_int 13) (const_int 12) + (const_int 11) (const_int 10) + (const_int 9) (const_int 8) + (const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" +{ + if (!TARGET_P9_VECTOR) + { + rtx tmp = gen_reg_rtx (V4SImode); + rtx subreg, subreg2, perm[16], pcv; + /* 3 is leftmost element in register */ + unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; + int i; + + subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0); + emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); + subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0); + + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (reorder[i]); + + pcv = force_reg (V16QImode, + gen_rtx_CONST_VECTOR (V16QImode, + gen_rtvec_v (16, perm))); + emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2, + subreg2, pcv)); + DONE; + } +}) + +(define_insn "*vsx_ld_elemrev_v16qi_internal" [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") (vec_select:V16QI - (match_operand:V16QI 1 "memory_operand" "Z") - (parallel [(const_int 15) (const_int 14) - (const_int 13) (const_int 12) - (const_int 11) (const_int 10) - (const_int 9) (const_int 8) - (const_int 7) (const_int 6) - (const_int 5) (const_int 4) - (const_int 3) (const_int 2) - (const_int 1) (const_int 0)])))] + (match_operand:V16QI 1 "memory_operand" "Z") + (parallel [(const_int 15) (const_int 14) + (const_int 13) (const_int 12) + (const_int 11) (const_int 10) + (const_int 9) (const_int 8) + (const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" "lxvb16x %x0,%y1" [(set_attr "type" "vecload")]) @@ -1139,8 +1225,8 @@ (define_insn "vsx_st_elemrev_v2df" [(set (match_operand:V2DF 0 "memory_operand" "=Z") (vec_select:V2DF - (match_operand:V2DF 1 "vsx_register_operand" "wa") - (parallel [(const_int 1) (const_int 0)])))] + (match_operand:V2DF 1 "vsx_register_operand" "wa") + (parallel [(const_int 1) (const_int 0)])))] "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" "stxvd2x %x1,%y0" [(set_attr "type" "vecstore")]) @@ -1148,8 +1234,8 @@ (define_insn "vsx_st_elemrev_v2di" [(set (match_operand:V2DI 0 "memory_operand" "=Z") (vec_select:V2DI - (match_operand:V2DI 1 "vsx_register_operand" "wa") - (parallel [(const_int 1) (const_int 0)])))] + (match_operand:V2DI 1 "vsx_register_operand" "wa") + (parallel [(const_int 1) (const_int 0)])))] "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" "stxvd2x %x1,%y0" [(set_attr "type" "vecstore")]) @@ -1157,9 +1243,9 @@ (define_insn "vsx_st_elemrev_v4sf" [(set (match_operand:V4SF 0 "memory_operand" "=Z") (vec_select:V4SF - (match_operand:V4SF 1 "vsx_register_operand" "wa") - (parallel [(const_int 3) (const_int 2) - (const_int 1) (const_int 0)])))] + (match_operand:V4SF 1 "vsx_register_operand" "wa") + (parallel [(const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" "stxvw4x %x1,%y0" [(set_attr "type" "vecstore")]) @@ -1174,30 +1260,98 @@ "stxvw4x %x1,%y0" [(set_attr "type" "vecstore")]) -(define_insn "vsx_st_elemrev_v8hi" +(define_expand "vsx_st_elemrev_v8hi" [(set (match_operand:V8HI 0 "memory_operand" "=Z") (vec_select:V8HI - (match_operand:V8HI 1 "vsx_register_operand" "wa") - (parallel [(const_int 7) (const_int 6) - (const_int 5) (const_int 4) - (const_int 3) (const_int 2) - (const_int 1) (const_int 0)])))] + (match_operand:V8HI 1 "vsx_register_operand" "wa") + (parallel [(const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" +{ + if (!TARGET_P9_VECTOR) + { + rtx subreg, perm[16], pcv; + rtx tmp = gen_reg_rtx (V8HImode); + /* 2 is leftmost element in register */ + unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; + int i; + + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (reorder[i]); + + pcv = force_reg (V16QImode, + gen_rtx_CONST_VECTOR (V16QImode, + gen_rtvec_v (16, perm))); + emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1], + operands[1], pcv)); + subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0); + emit_insn (gen_vsx_st_elemrev_v4si (subreg, operands[0])); + DONE; + } +}) + +(define_insn "*vsx_st_elemrev_v8hi_internal" + [(set (match_operand:V8HI 0 "memory_operand" "=Z") + (vec_select:V8HI + (match_operand:V8HI 1 "vsx_register_operand" "wa") + (parallel [(const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" "stxvh8x %x1,%y0" [(set_attr "type" "vecstore")]) -(define_insn "vsx_st_elemrev_v16qi" +(define_expand "vsx_st_elemrev_v16qi" [(set (match_operand:V16QI 0 "memory_operand" "=Z") (vec_select:V16QI - (match_operand:V16QI 1 "vsx_register_operand" "wa") - (parallel [(const_int 15) (const_int 14) - (const_int 13) (const_int 12) - (const_int 11) (const_int 10) - (const_int 9) (const_int 8) - (const_int 7) (const_int 6) - (const_int 5) (const_int 4) - (const_int 3) (const_int 2) - (const_int 1) (const_int 0)])))] + (match_operand:V16QI 1 "vsx_register_operand" "wa") + (parallel [(const_int 15) (const_int 14) + (const_int 13) (const_int 12) + (const_int 11) (const_int 10) + (const_int 9) (const_int 8) + (const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" +{ + if (!TARGET_P9_VECTOR) + { + rtx subreg, perm[16], pcv; + rtx tmp = gen_reg_rtx (V16QImode); + /* 3 is leftmost element in register */ + unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; + int i; + + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (reorder[i]); + + pcv = force_reg (V16QImode, + gen_rtx_CONST_VECTOR (V16QImode, + gen_rtvec_v (16, perm))); + emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1], + operands[1], pcv)); + subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0); + emit_insn (gen_vsx_st_elemrev_v4si (subreg, operands[0])); + DONE; + } +}) + +(define_insn "*vsx_st_elemrev_v16qi_internal" + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (vec_select:V16QI + (match_operand:V16QI 1 "vsx_register_operand" "wa") + (parallel [(const_int 15) (const_int 14) + (const_int 13) (const_int 12) + (const_int 11) (const_int 10) + (const_int 9) (const_int 8) + (const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" "stxvb16x %x1,%y0" [(set_attr "type" "vecstore")]) @@ -4054,9 +4208,9 @@ ;; ISA 3.0 Binary Floating-Point Support ;; VSX Scalar Extract Exponent Quad-Precision -(define_insn "xsxexpqp" +(define_insn "xsxexpqp_<mode>" [(set (match_operand:DI 0 "altivec_register_operand" "=v") - (unspec:DI [(match_operand:KF 1 "altivec_register_operand" "v")] + (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] UNSPEC_VSX_SXEXPDP))] "TARGET_P9_VECTOR" "xsxexpqp %0,%1" @@ -4072,9 +4226,9 @@ [(set_attr "type" "integer")]) ;; VSX Scalar Extract Significand Quad-Precision -(define_insn "xsxsigqp" +(define_insn "xsxsigqp_<mode>" [(set (match_operand:TI 0 "altivec_register_operand" "=v") - (unspec:TI [(match_operand:KF 1 "altivec_register_operand" "v")] + (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] UNSPEC_VSX_SXSIG))] "TARGET_P9_VECTOR" "xsxsigqp %0,%1" @@ -4090,20 +4244,21 @@ [(set_attr "type" "integer")]) ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument -(define_insn "xsiexpqpf" - [(set (match_operand:KF 0 "altivec_register_operand" "=v") - (unspec:KF [(match_operand:KF 1 "altivec_register_operand" "v") - (match_operand:DI 2 "altivec_register_operand" "v")] +(define_insn "xsiexpqpf_<mode>" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unspec:IEEE128 + [(match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:DI 2 "altivec_register_operand" "v")] UNSPEC_VSX_SIEXPQP))] "TARGET_P9_VECTOR" "xsiexpqp %0,%1,%2" [(set_attr "type" "vecmove")]) ;; VSX Scalar Insert Exponent Quad-Precision -(define_insn "xsiexpqp" - [(set (match_operand:KF 0 "altivec_register_operand" "=v") - (unspec:KF [(match_operand:TI 1 "altivec_register_operand" "v") - (match_operand:DI 2 "altivec_register_operand" "v")] +(define_insn "xsiexpqp_<mode>" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v") + (match_operand:DI 2 "altivec_register_operand" "v")] UNSPEC_VSX_SIEXPQP))] "TARGET_P9_VECTOR" "xsiexpqp %0,%1,%2" @@ -4162,11 +4317,11 @@ ;; (Has side effect of setting the lt bit if operand 1 is negative, ;; setting the eq bit if any of the conditions tested by operand 2 ;; are satisfied, and clearing the gt and undordered bits to zero.) -(define_expand "xststdcqp" +(define_expand "xststdcqp_<mode>" [(set (match_dup 3) (compare:CCFP - (unspec:KF - [(match_operand:KF 1 "altivec_register_operand" "v") + (unspec:IEEE128 + [(match_operand:IEEE128 1 "altivec_register_operand" "v") (match_operand:SI 2 "u7bit_cint_operand" "n")] UNSPEC_VSX_STSTDC) (const_int 0))) @@ -4200,11 +4355,11 @@ }) ;; The VSX Scalar Test Negative Quad-Precision -(define_expand "xststdcnegqp" +(define_expand "xststdcnegqp_<mode>" [(set (match_dup 2) (compare:CCFP - (unspec:KF - [(match_operand:KF 1 "altivec_register_operand" "v") + (unspec:IEEE128 + [(match_operand:IEEE128 1 "altivec_register_operand" "v") (const_int 0)] UNSPEC_VSX_STSTDC) (const_int 0))) @@ -4234,11 +4389,12 @@ operands[3] = CONST0_RTX (SImode); }) -(define_insn "*xststdcqp" +(define_insn "*xststdcqp_<mode>" [(set (match_operand:CCFP 0 "" "=y") (compare:CCFP - (unspec:KF [(match_operand:KF 1 "altivec_register_operand" "v") - (match_operand:SI 2 "u7bit_cint_operand" "n")] + (unspec:IEEE128 + [(match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:SI 2 "u7bit_cint_operand" "n")] UNSPEC_VSX_STSTDC) (const_int 0)))] "TARGET_P9_VECTOR" @@ -4335,6 +4491,149 @@ "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2" [(set_attr "type" "vecsimple")]) +;; Return first position of match between vectors +(define_expand "first_match_index_<mode>" + [(match_operand:SI 0 "register_operand") + (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") + (match_operand:VSX_EXTRACT_I 2 "register_operand")] + UNSPEC_VSX_FIRST_MATCH_INDEX)] + "TARGET_P9_VECTOR" +{ + int sh; + + rtx cmp_result = gen_reg_rtx (<MODE>mode); + rtx not_result = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1], + operands[2])); + emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result)); + + sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; + + if (<MODE>mode == V16QImode) + emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result)); + else + { + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_vctzlsbb_<mode> (tmp, not_result)); + emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh))); + } + DONE; +}) + +;; Return first position of match between vectors or end of string (EOS) +(define_expand "first_match_or_eos_index_<mode>" + [(match_operand:SI 0 "register_operand") + (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") + (match_operand:VSX_EXTRACT_I 2 "register_operand")] + UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)] + "TARGET_P9_VECTOR" +{ + int sh; + rtx cmpz1_result = gen_reg_rtx (<MODE>mode); + rtx cmpz2_result = gen_reg_rtx (<MODE>mode); + rtx cmpz_result = gen_reg_rtx (<MODE>mode); + rtx and_result = gen_reg_rtx (<MODE>mode); + rtx result = gen_reg_rtx (<MODE>mode); + rtx vzero = gen_reg_rtx (<MODE>mode); + + /* Vector with zeros in elements that correspond to zeros in operands. */ + emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); + emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero)); + emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero)); + emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result)); + + /* Vector with ones in elments that do not match. */ + emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1], + operands[2])); + + /* Create vector with ones in elements where there was a zero in one of + the source elements or the elements that match. */ + emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result)); + sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; + + if (<MODE>mode == V16QImode) + emit_insn (gen_vctzlsbb_<mode> (operands[0], result)); + else + { + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_vctzlsbb_<mode> (tmp, result)); + emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh))); + } + DONE; +}) + +;; Return first position of mismatch between vectors +(define_expand "first_mismatch_index_<mode>" + [(match_operand:SI 0 "register_operand") + (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") + (match_operand:VSX_EXTRACT_I 2 "register_operand")] + UNSPEC_VSX_FIRST_MISMATCH_INDEX)] + "TARGET_P9_VECTOR" +{ + int sh; + rtx cmp_result = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1], + operands[2])); + sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; + + if (<MODE>mode == V16QImode) + emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result)); + else + { + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result)); + emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh))); + } + DONE; +}) + +;; Return first position of mismatch between vectors or end of string (EOS) +(define_expand "first_mismatch_or_eos_index_<mode>" + [(match_operand:SI 0 "register_operand") + (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") + (match_operand:VSX_EXTRACT_I 2 "register_operand")] + UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)] + "TARGET_P9_VECTOR" +{ + int sh; + rtx cmpz1_result = gen_reg_rtx (<MODE>mode); + rtx cmpz2_result = gen_reg_rtx (<MODE>mode); + rtx cmpz_result = gen_reg_rtx (<MODE>mode); + rtx not_cmpz_result = gen_reg_rtx (<MODE>mode); + rtx and_result = gen_reg_rtx (<MODE>mode); + rtx result = gen_reg_rtx (<MODE>mode); + rtx vzero = gen_reg_rtx (<MODE>mode); + + /* Vector with zeros in elements that correspond to zeros in operands. */ + emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); + + emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero)); + emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero)); + emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result)); + + /* Vector with ones in elments that match. */ + emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1], + operands[2])); + emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result)); + + /* Create vector with ones in elements where there was a zero in one of + the source elements or the elements did not match. */ + emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result)); + sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; + + if (<MODE>mode == V16QImode) + emit_insn (gen_vctzlsbb_<mode> (operands[0], result)); + else + { + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_vctzlsbb_<mode> (tmp, result)); + emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh))); + } + DONE; +}) + ;; Load VSX Vector with Length (define_expand "lxvl" [(set (match_dup 3) @@ -4441,12 +4740,12 @@ DONE; }) -;; Vector Compare Not Equal Byte +;; Vector Compare Not Equal Byte (specified/not+eq:) (define_insn "vcmpneb" [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") - (unspec:V16QI [(match_operand:V16QI 1 "altivec_register_operand" "v") - (match_operand:V16QI 2 "altivec_register_operand" "v")] - UNSPEC_VCMPNEB))] + (not:V16QI + (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v") + (match_operand:V16QI 2 "altivec_register_operand" "v"))))] "TARGET_P9_VECTOR" "vcmpneb %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -4462,12 +4761,12 @@ "vcmpnezb %0,%1,%2" [(set_attr "type" "vecsimple")]) -;; Vector Compare Not Equal Half Word +;; Vector Compare Not Equal Half Word (specified/not+eq:) (define_insn "vcmpneh" [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") - (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v") - (match_operand:V8HI 2 "altivec_register_operand" "v")] - UNSPEC_VCMPNEH))] + (not:V8HI + (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v") + (match_operand:V8HI 2 "altivec_register_operand" "v"))))] "TARGET_P9_VECTOR" "vcmpneh %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -4482,13 +4781,12 @@ "vcmpnezh %0,%1,%2" [(set_attr "type" "vecsimple")]) -;; Vector Compare Not Equal Word +;; Vector Compare Not Equal Word (specified/not+eq:) (define_insn "vcmpnew" [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") - (unspec:V4SI - [(match_operand:V4SI 1 "altivec_register_operand" "v") - (match_operand:V4SI 2 "altivec_register_operand" "v")] - UNSPEC_VCMPNEH))] + (not:V4SI + (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v") + (match_operand:V4SI 2 "altivec_register_operand" "v"))))] "TARGET_P9_VECTOR" "vcmpnew %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -4514,10 +4812,10 @@ [(set_attr "type" "vecsimple")]) ;; Vector Count Trailing Zero Least-Significant Bits Byte -(define_insn "vctzlsbb" +(define_insn "vctzlsbb_<mode>" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI - [(match_operand:V16QI 1 "altivec_register_operand" "v")] + [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")] UNSPEC_VCTZLSBB))] "TARGET_P9_VECTOR" "vctzlsbb %0,%1" @@ -4776,6 +5074,37 @@ "xxbrw %x0,%x1" [(set_attr "type" "vecperm")]) +;; Swap all bytes in each element of vector +(define_expand "revb_<mode>" + [(set (match_operand:VEC_REVB 0 "vsx_register_operand") + (bswap:VEC_REVB (match_operand:VEC_REVB 1 "vsx_register_operand")))] + "" +{ + if (TARGET_P9_VECTOR) + emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1])); + else + { + /* Want to have the elements in reverse order relative + to the endian mode in use, i.e. in LE mode, put elements + in BE order. */ + rtx sel = swap_endian_selector_for_mode(<MODE>mode); + emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], + operands[1], sel)); + } + + DONE; +}) + +;; Reversing bytes in vector char is just a NOP. +(define_expand "revb_v16qi" + [(set (match_operand:V16QI 0 "vsx_register_operand") + (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))] + "" +{ + emit_move_insn (operands[0], operands[1]); + DONE; +}) + ;; Swap all bytes in each 16-bit element (define_insn "p9_xxbrh_v8hi" [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") diff --git a/gcc/config/rs6000/xcoff.h b/gcc/config/rs6000/xcoff.h index 36f40f4b11e..1eeb75c3e6c 100644 --- a/gcc/config/rs6000/xcoff.h +++ b/gcc/config/rs6000/xcoff.h @@ -179,7 +179,7 @@ `assemble_name' uses this. */ #define ASM_OUTPUT_LABELREF(FILE,NAME) \ - asm_fprintf ((FILE), "%U%s", rs6000_xcoff_strip_dollar (NAME)); + asm_fprintf ((FILE), "%U%s", rs6000_xcoff_strip_dollar (NAME)) /* This is how to output an internal label prefix. rs6000.c uses this when generating traceback tables. */ |